epg/siteini.pack/Italy/rai.it.ini

**------------------------------------------------------------------------------------------------
* @header_start
* WebGrab+Plus ini for grabbing EPG data from TvGuide websites
* @Site: rai.it
* @MinSWversion: V1.1.1/52
* @Revision 1 - [13/04/2014] Jan van Straaten
*   various adaptions to site changes
* @Revision 0 - [11/08/2011] Jan van Straaten
*   none
* @Remarks:
*   unstructured site rai.it makes it difficult to grab all epg data
* @header_end
**------------------------------------------------------------------------------------------------

site {url=rai.it|timezone=UTC+01:00|maxdays=6|cultureinfo=it-IT|charset=UTF-8|titlematchfactor=90|episodesystem=onscreen}
*site {ratingsystem=IT|grabengine=|firstshow=0|firstday=0000000}
url_index{url|http://www.rai.it/dl/portale/html/palinsesti/guidatv/static/|channel|_|urldate|.html}
urldate.format {datestring|yyyy_MM_dd}
*subpage.format {number||1|}
index_showsplit.scrub {multi|<div class="intG">|||}
index_urlshow {url(include="ContentItem" exclude="media")||<a href="||" |</a></span>}
*
index_start.scrub {single|<span class="ora">||</span>|</span>}
index_title.scrub {single(separator=" -"": " include=first)|<span class="info">|">|</a>|</span>}
index_subtitle.scrub {single(separator=" -"": " exclude="Ep.""ep.""Ep " include=last)|<span class="info">|">|</a>|</span>}
index_episode.scrub {single(separator=" -" include="Ep.""ep.""Ep ")|<span class="info">|">|</a>|</span>}
index_description.scrub {single|class="eventDescription">||</div>|</div>}
index_description.modify {cleanup(tags="<"">")}
index_temp_3.scrub {single|<span class="solotesto">||</span>|</div>}
index_description.modify {remove|'index_temp_3'}
index_description.modify {addend|'index_temp_3'}
*
*title.scrub {single|<title>||</title>|</title>} * this site has no consistent title in the detail page!!
subtitle.scrub {single|<div class="superTitolo">|<h2>|</h2>|<h3>}
description.scrub {single(separator=" />""\">" exclude="<img""<span style=""<iframe""<a href")|<div class="programmaArticolo">|<p>|</p>|</p>}
description.scrub {single(separator=" />""\">" exclude="<img""<span style=""<iframe""<a href")|<div class="Articolo">|<p>|</p>|</p>}
category.scrub {single|<div class="programmaArticolo">|Genere: | <br />|</p>}
titleoriginal.scrub {single|<div class="programmaArticolo">|Titolo originale: <i>|</i>}
titleoriginal.scrub {single|<div class="programmaArticolo">|<em>(|)</em>|</p>}
producer.scrub {single|<div class="programmaArticolo">|Produzione: |<}
director.scrub {single(separator="Cast" include=first)|<div class="programmaArticolo">|Regia: |<}
director.scrub {single|<div class="programmaArticolo">| di<strong>|</strong>|</p>}
actor.scrub {single|<div class="programmaArticolo">|Cast: |<}
actor.scrub {single(separator=",")|<div class="programmaArticolo">| con<strong>|</strong>|</p>}
writer.scrub {single(separator=",")|<div class="programmaArticolo">|Ideatore: |<}
productiondate.scrub {single|<div class="programmaArticolo">|Anno: |<|</p>}
productiondate.scrub {single|<div class="programmaArticolo">|||</p>}
*
*index_description.modify {addstart("")|nessun dettaglio}
*index_description.modify {remove('index_urlshow' not "")|'index_description'}
*
* the following extracts the episode from a title like .. Art alive Ep 42 .. and removes the Ep part from it:
index_title.modify {replace|ep.|ep. } * add space to ep. in case ep.52 (fails to extract the ep number without the space!)
index_title.modify {replace|Ep.|Ep. }
index_title.modify {replace|  | }
index_title.modify {cleanup(style=name)}
index_temp_1.modify {clear}
index_temp_1.modify {calculate('index_title' ~ "Ep " format=F0)|'index_title' 1 *} * extract the Ep number
index_temp_1.modify {calculate('index_title' ~ "ep " format=F0)|'index_title' 1 *} * extract the Ep number
index_temp_1.modify {calculate('index_title' ~ "Ep." format=F0)|'index_title' 1 *} * extract the Ep number
index_temp_1.modify {calculate('index_title' ~ "ep." format=F0)|'index_title' 1 *} * extract the Ep number
index_temp_1.modify {calculate('index_title' ~ "EP." format=F0)|'index_title' 1 *} * extract the Ep number
index_episode.modify {replace('index_temp_1' not "")|'index_episode'|Ep. 'index_temp_1'}
index_title.modify {remove| Ep 'index_temp_1'}
index_title.modify {remove| Ep. 'index_temp_1'}
index_title.modify {remove| ep 'index_temp_1'}
index_title.modify {remove| ep. 'index_temp_1'}
index_title.modify {remove| EP. 'index_temp_1'}
*
* episode can contain subtitle like: Trambusto nel bosco Ep 25
index_temp_2.modify {calculate(format=F0)|'index_episode' " " #} * count the words
index_temp_1.modify {calculate(format=F0)|'index_episode' 1 *} * extract the episode num
index_subtitle.modify {addstart('index_temp_2' > "3")|'index_episode'}
index_subtitle.modify {remove| Ep 'index_temp_1'}
index_subtitle.modify {remove| Ep. 'index_temp_1'}
index_subtitle.modify {remove| ep 'index_temp_1'}
index_subtitle.modify {remove| ep. 'index_temp_1'}
index_episode.modify {replace|'index_episode'|Ep. 'index_temp_1'}
*
index_temp_1.modify {replace|'index_temp_1'|'index_title'}
index_temp_1.modify {remove| 2a serie}
index_subtitle.modify {replace(~ 'index_temp_1')|'index_subtitle'|} * removes title without the 2a serie addition
index_subtitle.modify {remove|"}
index_subtitle.modify {cleanup(style=name)}
*
title.modify {addstart|'index_title'} * no reliable title in detail page!!
description.modify {remove|Titolo originale: 'titleoriginal'}
description.modify {remove|Genere: 'category'}
description.modify {remove|Produzione: 'producer'}
description.modify {remove|Regia: 'director'}
description.modify {remove|Cast: 'actor'}
description.modify {remove|Anno: 'productiondate'}
description.modify {cleanup}
description.modify {replace|  | }
description.modify {remove|<br}
titleoriginal.modify {replace(~ "Id.")|'title'}
subtitle.modify {replace(~ 'index_temp_1')|'subtitle'|} * removes title without the 2a serie addition
actor.modify {replace|, |\|}