76 lines
3.5 KiB
INI
Executable File
76 lines
3.5 KiB
INI
Executable File
**------------------------------------------------------------------------------------------------
|
|
* @header_start
|
|
* WebGrab+Plus ini for grabbing EPG data from TvGuide websites
|
|
* @Site: rtm.gov.my
|
|
* @MinSWversion:
|
|
* @Revision 0 - [11/08/2014] Francis De Paemeleere
|
|
* - creation
|
|
* @Remarks:
|
|
* @header_end
|
|
**------------------------------------------------------------------------------------------------
|
|
|
|
site {url=rtm.gov.my|timezone=Asia/Kuala_Lumpur|maxdays=1|cultureinfo=ms-MY|charset=UTF-8|titlematchfactor=90|keepindexpage}
|
|
|
|
url_index{url|http://live.rtm.gov.my/epg/}
|
|
url_index.headers {customheader=Accept-Encoding=gzip,deflate} * to speedup the downloading of the index pages
|
|
|
|
index_showsplit.scrub {multi||||}
|
|
|
|
scope.range {(urlindex)|end}
|
|
index_variable_element.modify {clear}
|
|
index_variable_element.modify {addend|'config_site_id'}
|
|
end_scope
|
|
|
|
scope.range {(datelogo)|end}
|
|
index_urlchannellogo.scrub {regex||src="([^"]*)"[^>]*title="'index_variable_element'"[^>]*class="tl_location_image"||}
|
|
end_scope
|
|
|
|
scope.range {(splitindex)|end}
|
|
index_temp_6.modify {addend|'index_showsplit'} * full index_page
|
|
index_showsplit.modify {substring(type=regex)|'index_showsplit' "title="'index_variable_element'"[^>]*class="tl_location_image"(.*?)class="tl_location_image""}
|
|
index_showsplit.modify {substring(type=regex)|class="tl_event"(.*?)</a}
|
|
|
|
index_temp_1.modify {clear}
|
|
loop {(each 'index_temp_5' in 'index_showsplit' max=3000)|end}
|
|
index_temp_4.modify {clear}
|
|
index_temp_2.modify {clear}
|
|
index_temp_2.modify {substring(type=regex)|'index_temp_5' "data-event="(\d*)""}
|
|
index_temp_4.modify {substring(""not='index_temp_2' type=regex)|'index_temp_6' "(class="tl_event_details"[^>]*data-event="'index_temp_2'".*?class="tl_description_bottom")"}
|
|
index_temp_1.modify {addend(""not='index_temp_2')|'index_temp_5'_DEATILS_</div>'index_temp_4'###_###}
|
|
end_loop
|
|
index_showsplit.modify {clear}
|
|
index_showsplit.modify {addstart|'index_temp_1'}
|
|
index_showsplit.modify {replace|###_###|\|}
|
|
index_showsplit.modify {remove(type=regex)|<[^>]*class="tl_description_bottom"$}
|
|
|
|
* now we have to sort the listing (why oh why!)
|
|
index_showsplit.modify {sort(ascending,string)}
|
|
sort_by.scrub {regex(target="index_showsplit")||data-start="([^"]*)||}
|
|
sort_by.modify {calculate(target="index_showsplit" format=time,unix)}
|
|
end_scope
|
|
|
|
index_start.scrub {regex||data-start="([^"]*)||}
|
|
|
|
index_title.scrub {regex||class="tl_description_header_title"[^>]*>([^<]*)||}
|
|
index_category.scrub {regex||<strong>Genre:[^>]*?>([^<]*)||}
|
|
index_subtitle.scrub {regex||class="tl_description_header_subtitle"[^>]*?>([^<]*)||}
|
|
index_description.scrub {regex||^.*</div>(.*?)$||}
|
|
index_showicon.scrub {regex||class="tl_event_image".*? src="([^"]*)||}
|
|
|
|
index_description.modify {remove(type=regex)||<img[^>]*>||} * remove any images in the desciption
|
|
index_description.modify {cleanup(tags="<"">")}
|
|
index_category.modify {remove|N/A}
|
|
index_showicon.modify {addstart('index_showicon'not="")|http://live.rtm.gov.my/epg/}
|
|
|
|
|
|
** _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
|
|
** ##### CHANNEL FILE CREATION (only to create the xxx-channel.xml file)
|
|
**
|
|
** @auto_xml_channel_start
|
|
*index_site_channel.scrub {regex||<img [^>]*title="([^"]*)"[^>]*class="tl_location_image"[^>]*>||}
|
|
*index_site_id.scrub {regex||<img [^>]*title="([^"]*)"[^>]*class="tl_location_image"[^>]*>||}
|
|
*scope.range {(channellist)|end}
|
|
*index_site_id.modify {cleanup(removeduplicates=equal,100 link="index_site_channel")}
|
|
*end_scope
|
|
** @auto_xml_channel_end
|