epg/siteini.pack/Italy/mediasetpremium.it.ini

**------------------------------------------------------------------------------------------------
* @header_start
* WebGrab+Plus ini for grabbing EPG data from TvGuide websites
* @Site: mediasetpremium.it
* @MinSWversion: V1.1.1/55.26
* @Revision 2 - [31/05/2017] Mat8861 *fix channel creation
* @Revision 1 - [18/10/2015] Jan van Straaten
* added rating conversion, plus one hour offset in the times!?
* @Revision 0 - [27/08/2015] Jan van Straaten
*   - creation
* @Remarks:
* @header_end
**------------------------------------------------------------------------------------------------

site {url=mediasetpremium.it|timezone=Europe/Rome?|maxdays=7|cultureinfo=it-IT|charset=UTF-8|titlematchfactor=40|nopageoverlaps}

urldate.format {datestring|yyyyMMdd}
url_index {url|http://www.mediasetpremium.it/guida_tv/json/|urldate|/|urldate|-|channel|.json}
url_index.headers {customheader=Accept-Encoding=gzip,deflate}
index_showsplit.scrub {single||||} *copies the index page
* get rid of the disturbing {} in an empty parentalControl :
index_showsplit.modify {replace|{}| }
index_showsplit.modify {substring(type=regex)|'index_showsplit' "\{(\"id\":.+?)\}"}

index_start.scrub {regex||\"timeStart\":(\d{10}),||}
** a very strange fact: The times in the index are one hour behind!!
index_start.modify {calculate(format=date,yyyy/MM/ddTHH:mm)|0:1:00 +}

*index_stop.scrub {regex||\"timeEnd\":(\d{10}),||} * there are errors in the sitedata

index_title.scrub {regex||\"title\":\"(.+?)\",||}
* the next returns a number, 0, 1 or 2 : no iedee of the meaning of that
index_rating.scrub {regex||\"parentalControl\":(\d),||}
index_rating.modify {set("0")|G}
index_rating.modify {set("1")|PG}
index_rating.modify {set("3")|R}

index_urlshow.scrub {regex||\"linkDetail\":\"(http://www.mediasetpremium.it/.+?\.html)||}
index_urlshow.headers {customheader=Accept-Encoding=gzip,deflate}     * to speedup the downloading of the detail pages

title.scrub {regex||<div class=\"rowText\">\s*?<h1>(.+?)</h1>||}
*subtitle.modify {addstart|'index_title'} *test only
description.scrub {regex||<div class=\"textParagrafo\">\s*?(.+?)</div>||}
description.modify {cleanup(tags="<"">")}
category.scrub {regex||<span><b>genere</b></span><strong>(.+?)</strong>||}
productiondate.scrub {regex||<span><b>anno</b></span><strong>(\d{4})</strong>||}
actor.modify {substring('actor' "" type=regex)|'description' "el cast.+?:(.+?)\."} * alternative
temp_1.modify {addstart|'category'}
temp_1.modify {cleanup(style=upper)} * needed to as delimiter for the cast in temp_2
temp_2.scrub {regex||<h2>(.+?)</h2>||} * copy of the section that can contain the credits, category , country and productiondate
* get the credits out of temp_2
director.modify {substring(type=regex)|'temp_2' "Regia:(.+?)\s-"}
director.modify {replace|,|\|} * make multi
actor.modify {substring('actor' "" type=regex)|'temp_2' ".+?Cast:(.+?)\.\s'temp_1'"}
actor.modify {replace|,|\|} * make multi
**  _  _  _  _  _  _  _  _  _  _  _  _  _  _  _  _  _  _  _  _  _  _  _  _  _  _  _  _  _  _  _  _
**      #####  CHANNEL FILE CREATION (only to create the xxx-channel.xml file)
**
** @auto_xml_channel_start
*url_index {url|http://www.mediasetpremium.it/guida_tv/json/config.json}
*index_site_id.scrub {multi|"channels"|"channelid":"|"|,]}]}
*index_site_channel.scrub{multi|"channels"|"label":"|"|,]}]}
*scope.range {(channellist)|end}
*index_site_id.modify {cleanup(removeduplicates=equal,100 link="index_site_channel")}
*end_scope
** @auto_xml_channel_end