epg/siteini.pack/Spain/elmundo.es.mp.ini

**------------------------------------------------------------------------------------------------
* @header_start
* WebGrab+Plus ini for grabbing EPG data from TvGuide websites
* @Site: elmundo.es
* @MinSWversion: V1.1.1/54
*   none
* @Revision 11 - [22/04/2015] Francis De Paemeleere
* - sync this version to mp version
* - make description work again
* @Revision 10 - [26/05/2013] Jan van Straaten
*	- removed categories like cine from title and index_title
* @Revision 9 - [13/05/2013] Mr Spock
*   - addded missing * in header
* @Revision 8 - [11/03/2013] Francis De Paemeleere
*   - added showicon & urlchannellogo
* @Revision 7 - [01/11/2012] MrSpock, use REX to do the MP formatting
*   added date force, needs 1.0.9
* @Revision 6 - [16/10/2011] Jan van Straaten/MrSpock
* - added date force, needs 1.0.9
* @Remarks:
*   none
* @header_end
**------------------------------------------------------------------------------------------------

site {url=elmundo.es|timezone=Europe/Madrid|maxdays=4.1|cultureinfo=es-ES|charset=ISO-8859-1|titlematchfactor=1}
site {episodesystem=onscreen}
url_index{url|http://estaticos.elmundo.es/elmundo/television/guiatv/js_parrilla/|channel|.js} *4 days without urldate
* channel example: http://estaticos.elmundo.es/elmundo/television/guiatv/js_parrilla/3.js (La 1)
urldate.format {daycounter|0}
index_showsplit.scrub {multi|new Programa("|||= new Canal}
index_urlchannellogo.scrub {single(separator="\"" include=3)|new Canal("|||;}
index_urlchannellogo.modify {addstart('index_urlchannellogo' not= "")|http://www.elmundo.es/}
index_urlshow {url|http://www.elmundo.es/elmundo/television/fichas_programas|/elmundo/television/fichas_programas||"));|"));}
* target example: http://www.elmundo.es/elmundo/television/fichas_programas/programa.html?p=139510&e=14793182-496
*
index_date.scrub {single(force separator="\", \"" include=3)||||"));} * scrubs the date and forces it as start date
index_start.scrub {single(separator="\", \"" include=8)||||"));} * no bs, es, ee and be="));  start is element 8 after of the separation
index_title.scrub {single(separator="\", \"" include=1)||||"));}
index_temp_2.scrub {single(separator="\", \"" include=2)||||"));} * Subtitle; I prefer it outside index_subtitle to better control
index_temp_4.scrub {single(separator="\", \"" include=5)||||"));} * category candidate, by number coding
index_title.modify {replace|: |\|}
*
* grabbing:
title.scrub {single(separator=": " include=last)|<div class="cabecera2">|<h3>|</h3>|</h3>}
titleoriginal.scrub {single(lang=xx)|<strong>Título original:|</strong>|</li>|</li>}
subtitle.scrub {single|<div class="evento">|<h3>|</h3>|</h3>}
description.scrub {single|<div class="evento">|<p>|</p>|</div>} * first description of the episode
temp_1.scrub {single|<div class="descripcion">|<p>|</p>|</p>} * general description, we add it if different from first description
director.scrub {single|<strong>Director:|</strong>|</li>|</li>}
actor.scrub {single(separator=", ")|<strong>Intérpretes:|</strong>|</li>|</li>}
producer.scrub {single(separator=", ")|<strong>Productora:|</strong>|</li>|</li>}
producer.scrub {single(separator=", ")|<strong>Producción:|</strong>|</li>|</li>}
producer.scrub {single(separator=", ")|<strong>Producción:|</strong>|<li>|</ul>} * They put information outside li
producer.modify {replace|\"#38;|&amp;}	* They use that incorrectly
producer.modify {cleanup(null)}	* If Producción was wrong placed, some HTML code can be included
writer.scrub {single(separator=", ")|<strong>Guión:|</strong>|</li>|</li>}
composer.scrub {single(separator=", ")|<strong>Música:|</strong>|</li>|</li>}
category.scrub {single|<div class="genero">||</div>|</div>}
productiondate.scrub {single|<div class="fichatecnica">|<li>|<strong>|<strong>} * There is country and year, but only year is extracted
showicon.scrub {single|<div class="foto">|src="|"|</div>}
*
* operations:
*change \' and \" from index_title
index_title.modify {replace|\\'|\'}	*Kid\'s Club -> Kid's Club
index_title.modify {replace|\\"|\"}
***
index_title.modify {addend('index_temp_2' not "")|\|'index_temp_2'} *add possible second element of index_title to faciltate title comparison
index_date.modify {calculate(format=utcdate)} * convert from unix date format to dd/MM/yyy and correct from utc to timezone
***
*
*add general description only if different from description:
description.modify {addend(not ~ 'temp_1')|'temp_1'}
description.modify {replace(null)|\'\'|\"} * they use '' instead of "
description.modify {remove|Emisión de una película. }
*
title.modify {addstart('title' = "")|'index_title'}	* If title is empty, copy index_title
titleoriginal.modify {remove(= 'title')|'titleoriginal'} * Removes titleoriginal if same as title
*
* If subtitle has an episode number, copy it to episode
episode.modify {calculate('subtitle' ~ "episodio" format=F0)|'subtitle' 1 *} * if word episodio is in subtitle, extract the number
subtitle.modify {remove (~ "episodio")|: Episodio 'episode'}	* remove the episode info from subtitle
subtitle.modify {remove (~ "episodio")|Episodio 'episode'}	* remove the episode info from subtitle
episode.modify {remove("0")|0} * if the number is 0, remove it
episode.modify {addstart(not "")|E}	* adds an 'E' in front of episode number
*
* If title = index_temp_2 (index_subtitle) and subtitle is empty, then copy index_title into subtitle
* Usually, index_title is the name of a movie program, and title is the name of the movie
temp_1.modify {clear}
temp_1.modify {addstart('title' = 'index_temp_2')|'index_title'}
subtitle.modify {addstart("")|'temp_1'}
*
* If index_title = title and subtitle is empty, then copy index_temp_2 (index_subtitle) into subtitle
* Usually, title is the name of a sport program, and index_subtitle is the specific name of the match, race...
temp_1.modify {clear}
temp_1.modify {addstart('title' = 'index_title')|'index_temp_2'}
subtitle.modify {addstart("")|'temp_1'}
*
* If subtitle = title, then remove subtitle
subtitle.modify {remove('title')|'title'}
*
* If titleoriginal = title, then remove titleoriginal
titleoriginal.modify {remove('title')|'titleoriginal'}
*
* Look up table to convert pos 5 numbers in categories:
category.modify {addstart('index_temp_4' "1")|Series\|}
category.modify {addstart('index_temp_4' "20")|Informativos\|}
category.modify {addstart('index_temp_4' "31")|Magacín\|}
category.modify {addstart('index_temp_4' "90")|Deportes\|}
category.modify {addstart('index_temp_4' "104")|Cine\|}
category.modify {addstart('index_temp_4' "INT_1")|Corazón\|}
category.modify {addstart('index_temp_4' "INT_2")|Concursos\|}
*

********************
* operations for MP:
* uses rex.config.xml for putting all together into description:
*  <desc>{'subtitle'. }{'episode'. }{'productiondate'. }{'starrating' }{['rating']. }{'category(, )'. }{\n· Int.: 'actor(, )'. }{\n· Dirección: 'director(, )'. }{\n'description' }{\n· Presenta: 'presenter(, )'. }{\n· Guión: 'writer(, )'. }{\n· Música: 'composer(, )'. }{\n· Producción: 'producer(, )'.}</desc>
*  <star-rating></star-rating>

*Country: List of countries followed by year:
temp_3.modify {clear}
temp_3.scrub {single(exclude=last)|<div class="fichatecnica">|<li>|<strong>|<strong>}

* uses starrating to send this info to rex:
starrating.modify {addstart('titleoriginal' not "")|('titleoriginal')}
starrating.modify {addstart('temp_3' not "")|'temp_3'. }	*Country
* end

**  _  _  _  _  _  _  _  _  _  _  _  _  _  _  _  _  _  _  _  _  _  _  _  _  _  _  _  _  _  _  _  _
**      #####  CHANNEL FILE CREATION (only to create the xxx-channel.xml file)
**
** @auto_xml_channel_start
*scope.range {(channellist)|end}
*url_index{url|http://www.elmundo.es/elmundo/television/guiatv/}
*index_site_channel.scrub {multi|<span id="nombre_canal|">|</span>|</div>}
*index_site_id.scrub {multi|<span id="nombre_canal|_|">|</div>}
*end_scope
** @auto_xml_channel_end