epg/siteini.pack/USA/tv.com.ini

**------------------------------------------------------------------------------------------------
* @header_start
* WebGrab+Plus ini for grabbing EPG data from TvGuide websites
* @Site: tv.com
* @MinSWversion: 1.1.1/56.12
* @Revision 4 - [24/07/2016] Blackbear199
*   - added retry=
* @Revision 3 - [03/06/2016] Blackbear199
*   - fix - sometimes title not present on showdetails page
* @Revision 2 - [12/01/2016] Francis De Paemeleere
*   - get all the data available (previous only 3 or 4 days were grabbed)
* @Revision 1 - [05/01/2016] Jan van Straaten
*   - remove some special chars in the title (only seen on movies)
* @Revision 0 - [03/11/2015] Jan van Straaten
*   - creation
* @Remarks: directv alternative, less details
* @header_end
**------------------------------------------------------------------------------------------------

site {url=tv.com|timezone=UTC|maxdays=10|cultureinfo=en-US|charset=UTF-8|titlematchfactor=90|nopageoverlaps}
site {loadcookie=tv.com.cookies.txt|retry=<retry channel-delay="5" index-delay="1" time-out="5">4</retry>}
urldate.format {datestring|yyyy-MM-dd}
*url_index{url|http://www.tv.com/listings/station/|channel|}
url_index{url|http://www.tv.com/listings/singlestation/?start=##TIMESTAMP##&station=|channel|}

scope.range {(urlindex)|end}
index_variable_element.modify {calculate(format=date,unix)|'urldate'}
url_index.modify {replace|##TIMESTAMP##|'index_variable_element'}
end_scope

url_index.headers {customheader=Accept-Encoding=gzip,deflate} * to speedup the downloading of the index pages

scope.range {(splitindex)|end}
index_showsplit.scrub {single||||}
index_showsplit.modify {cleanup(style=jsondecode)}
index_showsplit.modify {substring(type=regex)|<li class="event row".+?</li>}
index_showsplit.modify {cleanup(removeduplicates=equal,100)}
end_scope

index_start.scrub {regex||data-start="(\d{10})"||}
index_title.scrub {regex||<div class="title">(.+?)</div>||}
index_title.modify {remove(type=regex)|"(<label>.+?</label>)"}
index_title.modify {remove(type=regex)|"(<[^>]*>)"}
index_description.scrub {regex||<div class="desc">(.*?)</div>||}

index_temp_1.scrub {regex||data-tmsid="rvp:(\d+?)"||} * id
index_urlshow.modify {set('index_temp_1' not "")|http://www.tv.com/listings/event/?EventTmsId=rvp%3A'index_temp_1'}
*http://www.tv.com/listings/event/?EventTmsId=rvp%3A1952005171

index_urlshow.headers {customheader=Accept-Encoding=gzip,deflate}* to speedup the downloading of the detail pages

title.scrub {regex||<h1>(?:<a href=.+?>)?(.+?)(?:</a>)?</h1>||}
subtitle.scrub {regex||<h2>(?:<a href=.+?>)?(.+?)(?:</a>)?</h2>||}
description.scrub {regex||<div class=\\"description\\">(.*?)</div>||}
description.modify {remove|\}
showicon.scrub {regex||data-image=\\"(http://.+?\.jpg)\\"||}

title.modify {remove(type=regex)|"(<label>.+?</label>)"}
title.modify {remove(type=regex)|"(<[^>]*>)"}
title.modify {addstart('title' "")|'index_title'}
subtitle.modify {remove(type=regex)|"(<[^>]*>)"}
category.modify {substring(type=regex)|'title' "<label>(.+?):\s?</label>"}

**  _  _  _  _  _  _  _  _  _  _  _  _  _  _  _  _  _  _  _  _  _  _  _  _  _  _  _  _  _  _  _  _
**      #####  CHANNEL FILE CREATION (only to create the xxx-channel.xml file)
**
** @auto_xml_channel_start
*scope.range {(channellist)|end}
*url_index {url|http://www.tv.com/listings/}
*index_site_channel.scrub {regex||<a href="/listings/station/\d+?".+?title="(.+?)"||}
*index_site_id.scrub {regex||<a href="/listings/station/(\d+?)"||}
*index_temp_6.scrub {regex||class="name">(.+?)(?:</a>\|</div>)||}
*** add channel name makes it more clear?
*index_temp_1.modify {set|0}
*loop {(each "index_temp_2" in 'index_site_channel')|end}
*index_temp_3.modify {substring(type=element)|'index_temp_6' 'index_temp_1' 1} * name
*index_temp_4.modify {addend|'index_temp_2' ('index_temp_3')####}
*index_temp_1.modify {calculate(format=F0)|1+}
*end_loop
*index_site_channel.modify {set|'index_temp_4'}
*index_site_channel.modify {replace|####|\|}
*index_site_channel.modify {cleanup(removeduplicates=equal,100 link="index_site_id")}
*end_scope
** @auto_xml_channel_end