99 lines
5.9 KiB
INI
Executable File
99 lines
5.9 KiB
INI
Executable File
**------------------------------------------------------------------------------------------------
|
|
* @header_start
|
|
* WebGrab+Plus ini for grabbing EPG data from TvGuide websites
|
|
* @Site: rai.it
|
|
* @MinSWversion: V1.1.1/52
|
|
* @Revision 1 - [13/04/2014] Jan van Straaten
|
|
* various adaptions to site changes
|
|
* @Revision 0 - [11/08/2011] Jan van Straaten
|
|
* none
|
|
* @Remarks:
|
|
* unstructured site rai.it makes it difficult to grab all epg data
|
|
* @header_end
|
|
**------------------------------------------------------------------------------------------------
|
|
|
|
site {url=rai.it|timezone=UTC+01:00|maxdays=6|cultureinfo=it-IT|charset=UTF-8|titlematchfactor=90|episodesystem=onscreen}
|
|
*site {ratingsystem=IT|grabengine=|firstshow=0|firstday=0000000}
|
|
url_index{url|http://www.rai.it/dl/portale/html/palinsesti/guidatv/static/|channel|_|urldate|.html}
|
|
urldate.format {datestring|yyyy_MM_dd}
|
|
*subpage.format {number||1|}
|
|
index_showsplit.scrub {multi|<div class="intG">|||}
|
|
index_urlshow {url(include="ContentItem" exclude="media")||<a href="||" |</a></span>}
|
|
*
|
|
index_start.scrub {single|<span class="ora">||</span>|</span>}
|
|
index_title.scrub {single(separator=" -"": " include=first)|<span class="info">|">|</a>|</span>}
|
|
index_subtitle.scrub {single(separator=" -"": " exclude="Ep.""ep.""Ep " include=last)|<span class="info">|">|</a>|</span>}
|
|
index_episode.scrub {single(separator=" -" include="Ep.""ep.""Ep ")|<span class="info">|">|</a>|</span>}
|
|
index_description.scrub {single|class="eventDescription">||</div>|</div>}
|
|
index_description.modify {cleanup(tags="<"">")}
|
|
index_temp_3.scrub {single|<span class="solotesto">||</span>|</div>}
|
|
index_description.modify {remove|'index_temp_3'}
|
|
index_description.modify {addend|'index_temp_3'}
|
|
*
|
|
*title.scrub {single|<title>||</title>|</title>} * this site has no consistent title in the detail page!!
|
|
subtitle.scrub {single|<div class="superTitolo">|<h2>|</h2>|<h3>}
|
|
description.scrub {single(separator=" />""\">" exclude="<img""<span style=""<iframe""<a href")|<div class="programmaArticolo">|<p>|</p>|</p>}
|
|
description.scrub {single(separator=" />""\">" exclude="<img""<span style=""<iframe""<a href")|<div class="Articolo">|<p>|</p>|</p>}
|
|
category.scrub {single|<div class="programmaArticolo">|Genere: | <br />|</p>}
|
|
titleoriginal.scrub {single|<div class="programmaArticolo">|Titolo originale: <i>|</i>}
|
|
titleoriginal.scrub {single|<div class="programmaArticolo">|<em>(|)</em>|</p>}
|
|
producer.scrub {single|<div class="programmaArticolo">|Produzione: |<}
|
|
director.scrub {single(separator="Cast" include=first)|<div class="programmaArticolo">|Regia: |<}
|
|
director.scrub {single|<div class="programmaArticolo">| di<strong>|</strong>|</p>}
|
|
actor.scrub {single|<div class="programmaArticolo">|Cast: |<}
|
|
actor.scrub {single(separator=",")|<div class="programmaArticolo">| con<strong>|</strong>|</p>}
|
|
writer.scrub {single(separator=",")|<div class="programmaArticolo">|Ideatore: |<}
|
|
productiondate.scrub {single|<div class="programmaArticolo">|Anno: |<|</p>}
|
|
productiondate.scrub {single|<div class="programmaArticolo">|||</p>}
|
|
*
|
|
*index_description.modify {addstart("")|nessun dettaglio}
|
|
*index_description.modify {remove('index_urlshow' not "")|'index_description'}
|
|
*
|
|
* the following extracts the episode from a title like .. Art alive Ep 42 .. and removes the Ep part from it:
|
|
index_title.modify {replace|ep.|ep. } * add space to ep. in case ep.52 (fails to extract the ep number without the space!)
|
|
index_title.modify {replace|Ep.|Ep. }
|
|
index_title.modify {replace| | }
|
|
index_title.modify {cleanup(style=name)}
|
|
index_temp_1.modify {clear}
|
|
index_temp_1.modify {calculate('index_title' ~ "Ep " format=F0)|'index_title' 1 *} * extract the Ep number
|
|
index_temp_1.modify {calculate('index_title' ~ "ep " format=F0)|'index_title' 1 *} * extract the Ep number
|
|
index_temp_1.modify {calculate('index_title' ~ "Ep." format=F0)|'index_title' 1 *} * extract the Ep number
|
|
index_temp_1.modify {calculate('index_title' ~ "ep." format=F0)|'index_title' 1 *} * extract the Ep number
|
|
index_temp_1.modify {calculate('index_title' ~ "EP." format=F0)|'index_title' 1 *} * extract the Ep number
|
|
index_episode.modify {replace('index_temp_1' not "")|'index_episode'|Ep. 'index_temp_1'}
|
|
index_title.modify {remove| Ep 'index_temp_1'}
|
|
index_title.modify {remove| Ep. 'index_temp_1'}
|
|
index_title.modify {remove| ep 'index_temp_1'}
|
|
index_title.modify {remove| ep. 'index_temp_1'}
|
|
index_title.modify {remove| EP. 'index_temp_1'}
|
|
*
|
|
* episode can contain subtitle like: Trambusto nel bosco Ep 25
|
|
index_temp_2.modify {calculate(format=F0)|'index_episode' " " #} * count the words
|
|
index_temp_1.modify {calculate(format=F0)|'index_episode' 1 *} * extract the episode num
|
|
index_subtitle.modify {addstart('index_temp_2' > "3")|'index_episode'}
|
|
index_subtitle.modify {remove| Ep 'index_temp_1'}
|
|
index_subtitle.modify {remove| Ep. 'index_temp_1'}
|
|
index_subtitle.modify {remove| ep 'index_temp_1'}
|
|
index_subtitle.modify {remove| ep. 'index_temp_1'}
|
|
index_episode.modify {replace|'index_episode'|Ep. 'index_temp_1'}
|
|
*
|
|
index_temp_1.modify {replace|'index_temp_1'|'index_title'}
|
|
index_temp_1.modify {remove| 2a serie}
|
|
index_subtitle.modify {replace(~ 'index_temp_1')|'index_subtitle'|} * removes title without the 2a serie addition
|
|
index_subtitle.modify {remove|"}
|
|
index_subtitle.modify {cleanup(style=name)}
|
|
*
|
|
title.modify {addstart|'index_title'} * no reliable title in detail page!!
|
|
description.modify {remove|Titolo originale: 'titleoriginal'}
|
|
description.modify {remove|Genere: 'category'}
|
|
description.modify {remove|Produzione: 'producer'}
|
|
description.modify {remove|Regia: 'director'}
|
|
description.modify {remove|Cast: 'actor'}
|
|
description.modify {remove|Anno: 'productiondate'}
|
|
description.modify {cleanup}
|
|
description.modify {replace| | }
|
|
description.modify {remove|<br}
|
|
titleoriginal.modify {replace(~ "Id.")|'title'}
|
|
subtitle.modify {replace(~ 'index_temp_1')|'subtitle'|} * removes title without the 2a serie addition
|
|
actor.modify {replace|, |\|}
|