159 lines
8.1 KiB
INI
Executable File
159 lines
8.1 KiB
INI
Executable File
**------------------------------------------------------------------------------------------------
|
|
* @header_start
|
|
* WebGrab+Plus ini for grabbing EPG data from TvGuide websites
|
|
* @Site: nz.entertainment.yahoo.com
|
|
* @MinSWversion: V1.1.1/49
|
|
* none
|
|
* @Revision 2 - [08/05/2013] Jan van Straaten
|
|
* general overhall, yahoo search result filtered with select
|
|
* @Revision 1 - [19/07/2012] Jan van Straaten
|
|
* timeformat temp_4 'globalized' , showdate in index_variable and forwards loop iso backwards
|
|
* @Remarks:
|
|
* none
|
|
* @header_end
|
|
**------------------------------------------------------------------------------------------------
|
|
|
|
site {url=nz.entertainment.yahoo.com|timezone=UTC+13:00|maxdays=3.1|cultureinfo=en-NZ|charset=UTF-8|titlematchfactor=90|ratingsystem=NZ}
|
|
site {loadcookie=nz.entertainment.cookie.txt,yahoo.com|firstshow=1}
|
|
url_index{url|http://nz.entertainment.yahoo.com/tv-guide/channel/|channel|/}
|
|
*http://nz.entertainment.yahoo.com/tv-guide/channel/2/ = 3 days
|
|
urldate.format {daycounter|0}
|
|
index_showsplit.scrub {multi|<li class="genre|||</li>}
|
|
index_urlshow {url()|http://nz.entertainment.yahoo.com|<h4>|<a href="|">|</a>} * returns a search result with all shows with title
|
|
*
|
|
index_date.scrub {multi(force)|<h3 id="|">|<span class="back">|</h3>}
|
|
index_start.scrub {single(separator=" - " include=first)|<span class="stamp">||</span>|</span>}
|
|
index_stop.scrub {single(separator=" - " exclude=first)|<span class="stamp">||</span>|</span>}
|
|
index_title.scrub {single|<h4>|>|</a>|</h4>}
|
|
index_category.scrub {single|-|| item|">}
|
|
index_rating.scrub {single|<dd>| (|)|</dd>}
|
|
*
|
|
*title.scrub {single|} * always the same as index_title
|
|
* the detail page is a search result, we scrub it in temp_1
|
|
temp_1.scrub {multi(includeblock='index_variable_element')|class="day">|||</li></ul></div>} *
|
|
*
|
|
* operations:
|
|
*
|
|
scope.range {(indexshowdetails)|end}
|
|
* the detail page is a search result that lists all shows with that title for a number of days and all channels
|
|
* the following filters the scrub of this search in temp_1 by means of includeblock
|
|
* first we compose a variable element with the date and the xmltv_id
|
|
*index_variable_element.modify {remove|'index_variable_element'} * clear
|
|
index_variable_element.modify {clear}
|
|
index_variable_element.modify {calculate(format=date,dddd d MMM)|'showdate'} * the date as in the start of the search result of the show detail page
|
|
index_variable_element.modify {addend|"&"'config_xmltv_id'"}
|
|
index_variable_element.modify {addstart()|"}
|
|
*
|
|
* next we extract the xmltv_id in a temp for use in the select
|
|
index_temp_1.modify {calculate(type=char format=F0)|'index_variable_element' "&" -@}
|
|
index_temp_1.modify {calculate(format=F0)|2 +}
|
|
index_temp_1.modify {substring(type=char)|'index_variable_element' 'index_temp_1'}
|
|
index_temp_1.modify {remove()|"} * is xmltv_id
|
|
index_variable_element.modify {remove|"&"'config_xmltv_id'} * only date
|
|
end_scope.range
|
|
*
|
|
scope.range {(showdetails)|end}
|
|
* the start time in the proper format:
|
|
temp_4.modify {calculate(format=time,h:mmtt)|'index_start'}
|
|
temp_4.modify {cleanup(style=lower)} *am/pm is lowercase
|
|
temp_4.modify {remove()|.} * the time format in the search result is 12:35pm and not 12:35p.m.
|
|
*
|
|
* the following extracts the search result that contains the right date, xmltv_id and starttime
|
|
temp_1.scrub {single()||||} * copy the detail show page in temp_1
|
|
temp_1.modify {replace|class="day">|\|} * split in shows
|
|
temp_1.modify {replace()|<div class="bd-rhs">|\|} * split non shows
|
|
temp_1.modify {select|'index_variable_element' ~} * select contains date
|
|
temp_1.modify {select()|'index_temp_1' ~~} * select contains xmltv_id
|
|
temp_1.modify {select()|'temp_4' ~~} * select contains start time
|
|
temp_1.modify {substring(type=element)|0 1} * take the first
|
|
*
|
|
* get rid of the non description data:
|
|
temp_2.modify {calculate(type=char format=F0)|'temp_1' "<p class="abstract">" -@} * index of the start of the description
|
|
temp_2.modify {calculate(format=F0)|20 +}
|
|
description.modify {substring(type=char)|'temp_1' 'temp_2'}
|
|
description.modify {cleanup(tags="<"">")}
|
|
*
|
|
* extract other elements from description:
|
|
videoaspect.modify {addstart('description' ~ "(WS\)")|WS}
|
|
videoquality.modify {addstart('description' ~ "HD")|HD}
|
|
description.modify {remove|(WS)}
|
|
description.modify {remove|HD}
|
|
*
|
|
* productiondate
|
|
productiondate.modify {calculate(format=productiondate)|'description'}
|
|
description.modify {remove|. 'productiondate'} *if at the end
|
|
description.modify {remove|'productiondate':} *if at the beginning
|
|
*
|
|
* director
|
|
temp_1.modify {calculate(type=char format=F0)|'description' "Director" -@}
|
|
temp_1.modify {calculate(not "-1" format=F0)|9 +}
|
|
director.modify {substring('temp_1' not "-1" type=char)|'description' 'temp_1'}
|
|
description.modify {remove(separator=", ")|Director 'director'}
|
|
description.modify {remove(separator=", ")|Director: 'director'}
|
|
* actors
|
|
temp_1.modify {calculate(type=char format=F0)|'description' "Starring" -@}
|
|
temp_1.modify {calculate(not "-1" format=F0)|9 +}
|
|
actor.modify {substring('temp_1' not "-1" type=char)|'description' 'temp_1'}
|
|
actor.modify {remove|.}
|
|
actor.modify {cleanup} *removes trailing spaces
|
|
actor.modify {replace|, |\|} * convert to multivalue
|
|
description.modify {remove(separator=", ")|Starring 'actor'}
|
|
description.modify {remove(separator=", ")|Starring: 'actor'}
|
|
* presenters
|
|
temp_1.modify {calculate(type=char format=F0)|'description' "Voices Of" -@}
|
|
temp_1.modify {calculate(not "-1" format=F0)|10 +}
|
|
presenter.modify {substring('temp_1' not "-1" type=char)|'description' 'temp_1'}
|
|
presenter.modify {remove|.}
|
|
presenter.modify {cleanup} *removes trailing spaces
|
|
presenter.modify {replace|, |\|} * convert to multivalue
|
|
description.modify {remove(separator=", ")|Voices Of 'presenter'}
|
|
description.modify {remove(separator=", ")|Voices Of: 'presenter'}
|
|
* category
|
|
category.modify {addstart('description' ~ "romantic comedy")|Romantic Comedy}
|
|
description.modify {remove|'category'}
|
|
category.modify {addstart('description' ~ "comedy")|Comedy}
|
|
category.modify {addstart('description' ~ "action")|Action}
|
|
category.modify {addstart('description' ~ "featurette")|Featurette}
|
|
category.modify {addstart('description' ~ "drama")|Drama}
|
|
category.modify {addstart('description' ~ "crime")|Crime}
|
|
category.modify {addstart('description' ~ "thriller")|Thriller}
|
|
category.modify {addstart('description' ~ "horror")|Horror}
|
|
category.modify {addstart('description' ~ "western")|Western}
|
|
category.modify {addstart('description' ~ "animated")|Animated}
|
|
category.modify {addstart('description' ~ "documentary")|Documentary}
|
|
description.modify {remove|'category'}
|
|
*description.modify {cleanup}
|
|
description.modify {addstart("")|No details.}
|
|
end_scope
|
|
*
|
|
category.modify {cleanup(style=lower)} *same style as index_category
|
|
title.modify {addstart|'index_title'} * title is always the same as index_title
|
|
*
|
|
** _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
|
|
** ##### CHANNEL FILE CREATION (only to create the xxx-channel.xml file)
|
|
**
|
|
** @auto_xml_channel_start
|
|
*scope.range {(channellist)|end}
|
|
*url_index {url|http://nz.entertainment.yahoo.com/tv-guide/personalise/}
|
|
*index_temp_1.scrub {single||||}
|
|
*index_site_id.scrub {multi||||}
|
|
*index_temp_1.modify {replace|Select your channels|\|}
|
|
*index_temp_1.modify {replace|<div class="buttons">|\|}
|
|
*index_temp_1.modify {substring(type=element)|1} * not the frst block
|
|
*index_temp_1.modify {remove(type=element)|-1} * not the last
|
|
*index_temp_1.modify {remove|</label><select id="tv-form-channels" name="venue_id" multiple>}
|
|
*index_temp_1.modify {remove()| selected}
|
|
*index_temp_1.modify {remove()|selected}
|
|
*index_temp_1.modify {remove()|</select>}
|
|
*index_temp_1.modify {remove()|</div>}
|
|
*index_temp_1.modify {remove()|<option value="}
|
|
*index_temp_1.modify {remove()|</option>}
|
|
*index_temp_1.modify {replace()|\n|\|}
|
|
*index_temp_1.modify {replace()|">|\|}
|
|
*index_site_id.modify {clear}
|
|
*index_site_id.modify {substring(type=element)|'index_temp_1' 0 1/2}
|
|
*index_site_channel.modify {substring(type=element)|'index_temp_1' 1 1/2}
|
|
*index_site_channel.modify {remove|amp;}
|
|
*end_scope
|
|
** @auto_xml_channel_end
|