epg/siteini.pack/International/schedulesdirect.org.ini

**------------------------------------------------------------------------------------------------
* @header_start
* WebGrab+Plus ini for grabbing EPG data from TvGuide websites
* @Site: schedulesdirect.org
* @MinSWversion: V1.1.1/56.6
* @Revision 3 - [15/11/2015] Jan van Straaten
*	- remove of faulty credits role solution
* @Revision 2 - [24/10/2015] Francis De Paemeleere
*   - speedup parsing (tested with V1.56.6)
* @Revision 1 - [23/10/2014] Jan van Straaten
*   - adapted site changes
* @Revision 0 - [31/08/2013] Jan van Straaten / Francis De Paemeleere
*   - creation
* @Remarks: You need a login and password for this site
* @header_end
**------------------------------------------------------------------------------------------------

site {url=schedulesdirect.org|timezone=UTC|maxdays=16.1|cultureinfo=en-US|charset=UTF-8|titlematchfactor=90|keepindexpage|firstshow=1}
site {ratingsystem=MPAA|subtitlestype=teletext|episodesystem=onscreen}
url_index {url|http://dd.schedulesdirect.org/schedulesdirect/tvlistings/xtvdService}
url_index.headers {method=SOAP}
url_index.headers {customheader=SOAPAction=urn:tvDataDelivery#download}
url_index.headers {customheader=Accept-Encoding=gzip,deflate}
*
*url_index.headers {credentials=ENTER_USERNAME,ENTER_PASSWORD}
url_index.headers {credentials=pedrobmat,bmatbmat}
*
url_index.headers {accept=text/xml|contenttype=text/xml;charset="utf-8"}
url_index.headers {postdata='index_variable_element'}

scope.range {(urlindex)|end}
** timespan calculation to enable to add the requested timespan from the config
index_variable_element.modify {calculate(format=F1)|'config_timespan_days' 1 +} * add 1 day because config_timespan_days is 0 based
index_variable_element.modify {calculate(format=timespan,hours)} * convert to the proper timespan string required for index_temp_3
index_temp_1.modify {calculate(format=date,yyyy-MM-dd)|'urldate'}
index_temp_3.modify {calculate(format=date,yyyy-MM-dd)|'urldate' 'index_variable_element' +}
index_variable_element.modify {clear} * clear the timespan value
index_variable_element.modify {addstart|<soapenv:Envelope xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns:xsd="http://www.w3.org/2001/XMLSchema" xmlns:soapenv="http://schemas.xmlsoap.org/soap/envelope/" xmlns:urn="urn:tvDataDelivery"><soapenv:Header/><soapenv:Body><urn:download soapenv:encodingStyle="http://schemas.xmlsoap.org/soap/encoding/"><startTime xsi:type="xsd:string">'index_temp_1'T00:00:00Z</startTime><endTime xsi:type="xsd:string">'index_temp_3'T00:00:00Z</endTime></urn:download></soapenv:Body></soapenv:Envelope>}
end_scope

index_showsplit.scrub {regex||<xtvd.*?</xtvd>||} * copies the whole index page
scope.range {(splitindex)|end}
* now break up the large chuck into 3 smaller chucks, to speedup parsing
index_variable_element.modify {clear}

global_temp_1.modify {substring(type=regex)|'index_showsplit' "^.*?<programs>(.*?)</programs>"}  * contains all programs
global_temp_1.modify {replace|title>|t>}
global_temp_1.modify {replace|subt>|st>}
global_temp_1.modify {replace|<program id=|<p id=}
global_temp_1.modify {replace|</program>|</p>}
global_temp_1.modify {replace|syndicatedEpisodeNumber>|sEN>}
global_temp_1.modify {replace|description>|d>}
global_temp_1.modify {replace|originalAirDate>|oAD>}
global_temp_1.modify {replace|showType>|sT>}
global_temp_1.modify {replace|series>|s>}

** SPEEDUP option: if you don't need production/crew/cast info, just comment out next line
global_temp_2.modify {substring(type=regex)|'index_showsplit' "^.*?<productionCrew>(.*?)</productionCrew>"}  * contains all productionCrew
global_temp_2.modify {replace|name>|>}
global_temp_2.modify {replace|given>|g>}
global_temp_2.modify {replace|sur>|s>}
global_temp_2.modify {replace|member>|m>}
global_temp_2.modify {replace|role>|r>}
global_temp_2.modify {replace|r>Actor</r|r>A</r}
global_temp_2.modify {replace|program=|p=}

** SPEEDUP option: if you don't need extra genre info, just comment out next line
global_temp_3.modify {substring(type=regex)|'index_showsplit' "^.*?<genres>(.*?)</genres>"}  * contains all programs
global_temp_3.modify {replace|genre>|g>}
global_temp_3.modify {replace|class>|c>}
global_temp_3.modify {replace|relevance>|r>}
global_temp_3.modify {replace|programGenre>|pG>}
global_temp_3.modify {replace|<programGenre program=|<pG p=}

index_temp_2.modify {substring(type=regex)|'index_showsplit' "^.*?<schedules>(.*?)</schedules>"}  * contains all shows we need
index_temp_2.modify {substring(type=regex)|(<schedule [^>]* station=\''config_site_id'\' [^>]*>)}
index_temp_2.modify {cleanup(removeduplicates=equal,100)}
index_showsplit.modify {set|'index_temp_2'}

end_scope
**
index_start.scrub {regex||time='[^\']*?T(\d{2}:\d{2}):\d{2}Z'||}
index_duration.scrub {regex||duration='PT(\d*H\d*)M||}
index_temp_5.scrub {regex||program='([^\']*)||}
index_videoquality.scrub {regex||hdtv='([^\']*)||}
index_videoquality.modify {replace(not="")|'index_videoquality'|HDTV}

index_subtitles.scrub {regex||closeCaptioned='([^\']*)||}
index_subtitles.modify {replace(not="")|'index_subtitles'|true}

scope.range {(indexshowdetails)|end}
index_start.modify {calculate(format=utctime)}
index_duration.modify {replace|H|:}

** get the programs part
index_temp_4.modify {substring(type=regex)|'global_temp_1' "^.*?<p id=\''index_temp_5'\'>(.*?)</p>"}
index_title.modify          {substring(type=regex)|'index_temp_4' "<t>([^<]*)"}
index_subtitle.modify       {substring(type=regex)|'index_temp_4' "<st>([^<]*)"}
index_description.modify    {substring(type=regex)|'index_temp_4' "<d>([^<]*)"}
index_rating.modify         {substring(type=regex)|'index_temp_4' "<mpaaRating>([^<]*)"}
index_temp_3.modify         {substring(type=regex)|'index_temp_4' "<advisory>([^<]*)"}
index_rating.modify         {addend|\|'index_temp_3'} *advisory added to rating
index_productiondate.modify {substring(type=regex)|'index_temp_4' "<year>([^<]*)"}
index_episode.modify        {substring(type=regex)|'index_temp_4' "<sEN>([^<]*)"}
index_starrating.modify     {substring(type=regex)|'index_temp_4' "<starRating>(\**)[\+]*</starRating>"}  * full stars
index_temp_1.modify         {substring(type=regex)|'index_temp_4' "<starRating>.*(\+)</starRating>"}      * half star
index_starrating.modify     {calculate(not="" type=char format=F0)|#}
index_starrating.modify     {addend('index_temp_1' not="")|.5}
index_starrating.modify     {addend(not="")| / 4}
index_category.modify       {substring(type=regex)|'index_temp_4' "<sT>([^<]*)"}

* get the productionCrew part
index_temp_4.modify {substring(type=regex)|'global_temp_2' "^.*?<crew p=\''index_temp_5'\'>(.*?)</crew>"}
index_actor.modify          {substring(type=regex)|'index_temp_4' "<m>[^<]*<r>A</r>(.*?)</m>"}
index_actor.modify          {cleanup(tags="<"">")}
*index_temp_1.modify         {substring(type=regex)|'index_temp_4' "<m>[^<]*<r>Guest Star</r>(.*?)</m>"}
*index_temp_1.modify         {cleanup(tags="<"">")}
*index_temp_1.modify         {addend(not "")| (Guest Star)}
*index_actor.modify          {addend('index_temp_1' not "")|'index_temp_1'\|}
index_actor.modify          {cleanup(removeduplicates)}
index_presenter.modify      {substring(type=regex)|'index_temp_4' "<m>[^<]*<r>Host</r>(.*?)</m>"}
index_presenter.modify      {cleanup(tags="<"">")}
index_director.modify       {substring(type=regex)|'index_temp_4' "<m>[^<]*<r>Director</r>(.*?)</m>"}
index_director.modify       {cleanup(tags="<"">")}
index_producer.modify       {substring(type=regex)|'index_temp_4' "<m>[^<]*<r>Producer</r>(.*?)</m>"}
index_producer.modify       {cleanup(tags="<"">")}
index_producer.modify       {substring(type=regex)|'index_temp_4' "<m>[^<]*<r>Executive Producer</r>(.*?)</m>"}
index_producer.modify       {cleanup(tags="<"">")}
*index_temp_1.modify         {substring(type=regex)|'index_temp_4' "<m>[^<]*<r>Executive Producer</r>(.*?)</m>"}
*index_temp_1.modify         {cleanup(tags="<"">")}
*index_temp_1.modify         {addend(not "")| (Executive Producer)}
*index_producer.modify       {addstart('index_temp_1' not="")|'index_temp_1'\|}
index_producer.modify       {cleanup(removeduplicates)}
index_writer.modify         {substring(type=regex)|'index_temp_4' "<m>[^<]*<r>Writer</r>(.*?)</m>"}
index_writer.modify         {cleanup(tags="<"">")}
* get the genres part
index_temp_4.modify {substring(type=regex)|'global_temp_3' "^.*?<pG p=\''index_temp_5'\'>(.*?)</pG>"}
index_temp_1.modify       {substring(type=regex)|'index_temp_4' "<g>.*?<c>(.*?)</c>"}
index_category.modify     {addstart('index_temp_1' not= "")|'index_temp_1'\|}
end_scope

**  _  _  _  _  _  _  _  _  _  _  _  _  _  _  _  _  _  _  _  _  _  _  _  _  _  _  _  _  _  _  _  _
**      #####  CHANNEL FILE CREATION (only to create the xxx-channel.xml file)
**
** @auto_xml_channel_start
*index_site_id.scrub  {regex||(<station .*?</station>)||}
*scope.range {(channellist)|end}
*index_site_channel.modify {substring(type=regex)|'index_site_id' "<name>(.*?)</name>"}
*index_site_id.modify {substring(type=regex)|'index_site_id' "<station id=\'([0-9]*)\'>"}
*index_site_id.modify {cleanup(removeduplicates=equal,100 link="index_site_channel")}
*end_scope
** @auto_xml_channel_end