72 lines
5.1 KiB
INI
Executable File
72 lines
5.1 KiB
INI
Executable File
**------------------------------------------------------------------------------------------------
|
|
* @header_start
|
|
* WebGrab+Plus ini for grabbing EPG data from Finnish Yle Areena. Works for all free to air channels.
|
|
* @Site: areena.yle.fi
|
|
* @MinSWversion: 1.56.12
|
|
* @Revision 1 - [16/07/2017]
|
|
* - creation
|
|
* @Remarks:
|
|
* @header_end
|
|
**------------------------------------------------------------------------------------------------
|
|
site {url=areena.yle.fi|timezone=Europe/Helsinki|maxdays=28|cultureinfo=fi-FI|charset=UTF-8|titlematchfactor=90|episodesystem=xmltv-ns|ratingsystem=MEKU|keepindexpage}
|
|
url_index{url|http://areena.yle.fi/tv/opas?t=|urldate|}
|
|
url_index.headers {accept=text/html,application/xhtml+xml,application/xml|contenttype=text/html}
|
|
url_index.headers {customheader=Accept-Encoding=gzip,deflate}
|
|
urldate.format {datestring|yyyy-MM-dd}
|
|
*Showsplit
|
|
index_variable_element.modify {addstart('index_variable_element' = "")|"'config_site_id'"}
|
|
index_showsplit.scrub {regex||aria-label='index_variable_element'></div>(.+?)<li class="schedule-card__mobile-more">||} *Get all programs for the correct channel
|
|
index_showsplit.modify {replace(type=regex)|(?<=<\/li>[\s\n]+)<li class|\|<li class} *Split the data into separate programs
|
|
*Start and stop times
|
|
index_start.scrub {regex||<time itemprop="startDate" datetime="([^"]+)">||}
|
|
index_stop.scrub {regex||<time itemprop="endDate" datetime="([^"]+)">||}
|
|
*Title and rating
|
|
index_title.scrub {regex||<span class="schedule-card__title">[\s\n]*<span itemprop="name">(.+?)</span>||}
|
|
index_rating.modify {substring(type=regex)|'index_title' \((S?\d{0,2})\)$} *Get rating from program title
|
|
index_title.modify {remove(type=regex)| \(S?\d{0,2}\)} *Remove rating from program title
|
|
index_title.modify {remove(type=regex)| \d+\. kausi$} *Remove season number from program title (Sub,Ava,MTV3)
|
|
index_category.modify {addend('index_title' ~ "Elokuva: ")|Elokuva} *Mark movies based on title
|
|
index_category.modify {addend('index_title' ~ "kino")|Elokuva} *Mark movies based on title
|
|
index_category.modify {addend('index_title' ~ "Kotikatsomo: ")|Elokuva} *Mark movies based on title
|
|
index_category.modify {addend('index_title' ~ "Leffa: ")|Elokuva} *Mark movies based on title
|
|
index_title.modify {remove(type=regex)|^(?i)([\w-]*)(elokuva\|jännäri\|kino\|komedia\|kotikatsomo\|leffa\|perjantai\|putki\|trilleri)(: )} *Remove nonsense from the start of program names
|
|
*Description
|
|
index_description.scrub {regex||<span itemprop="description">(.*?)</span>||}
|
|
index_description.modify {remove(type=regex)|(?<=[\?!])\.} *Remove dot after ? or !
|
|
index_description.modify {remove(type=regex)|\s+$} *Remove extra spaces
|
|
index_description.modify {replace(type=regex)|\s{2,}| } *Remove extra spaces
|
|
index_description.modify {replace(type=regex)|(?<=\d+\. )tuotantokausi|kausi} * Tuotantokausi -> kausi
|
|
index_description.modify {replace(type=regex)|(?:Kausi \d+([,.\s]+\|[,.\s]+[Oo]sa \|[,.\s]+jakso ))(?:\d+)|. Jakso } *Kausi 1, 1/10 or Kausi 1, osa 1/10 or Kausi 1, jakso 1/10 -> Kausi 1. Jakso 1/10.
|
|
index_description.modify {replace(type=regex)|(?<=\d+\. kausi)([,\s]+\|, osa )(?=\d+)|. Jakso } *1. kausi, 1/10 -> 1. kausi. Jakso 1/10.
|
|
index_description.modify {addend('index_description' = "")|'index_title'} *Add title to description if description is empty
|
|
*Showicon
|
|
index_showicon.scrub {regex||<a href="/([^"]+)" class="schedule-card__link"||} *Areena url for YLE programs
|
|
index_showicon.modify {addstart('index_showicon' not = "")|http://a5.images.cdn.yle.fi/image/upload/w_400,h_225,c_thumb/v42/13-} *Set size for icon with w_xxx and h_xxx
|
|
index_showicon.modify {addend('index_showicon' not = "")|.jpg}
|
|
*Season and episode numbers from description
|
|
index_temp_1.modify {substring(type=regex)|'index_description' (\d+\. kausi\|\d+\. kauden\|Kausi \d+)}
|
|
index_temp_1.modify {substring(type=element)|0 1} *Keep only first match
|
|
index_temp_1.modify {substring(type=regex)|\d+} *Get rid of text
|
|
index_temp_2.modify {substring(type=regex)|'index_description' (Jakso \d+\|Osa \d+\|\d+\. jakso\|^\d+\/\d+\.)}
|
|
index_temp_2.modify {substring(type=element)|0 1} *Keep only first match
|
|
index_temp_2.modify {remove(type=regex)|\/.+} *Get rid of text
|
|
index_temp_2.modify {substring(type=regex)|\d+} *Get rid of text
|
|
*Episode xmltv_ns
|
|
index_temp_1.modify {calculate(format=F0 'index_temp_1' not = "")|1 -}
|
|
index_temp_2.modify {calculate(format=F0 'index_temp_2' not = "")|1 -}
|
|
index_episode.modify {addend('index_temp_1' not = "")|'index_temp_1'.}
|
|
index_episode.modify {addend('index_temp_2' not = "")|.'index_temp_2'.}
|
|
index_episode.modify {replace|..|.}
|
|
index_temp_1.modify {clear}
|
|
index_temp_2.modify {clear}
|
|
** _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
|
|
** ##### CHANNEL FILE CREATION (only to create the xxx-channel.xml file)
|
|
**
|
|
** @auto_xml_channel_start
|
|
*index_site_id.scrub {regex||<div class="channel-header__logo.+?aria-label="([^"]+)"></div>||}
|
|
*index_site_channel.modify {addend('index_site_channel' = "")|'index_site_id'}
|
|
*scope.range {(channellist)|end}
|
|
*index_site_id.modify {cleanup(removeduplicates=equal,100 link="index_site_channel")}
|
|
*end_scope
|
|
** @auto_xml_channel_end
|