120 lines
6.2 KiB
INI
Executable File
120 lines
6.2 KiB
INI
Executable File
**------------------------------------------------------------------------------------------------
|
|
* @header_start
|
|
* WebGrab+Plus ini for grabbing EPG data from TvGuide websites
|
|
* @Site: www.tm.com.my
|
|
* @MinSWversion: 1.1.1/54
|
|
* @Revision 3 - [21/05/2015] Francis De Paemeleere
|
|
* - try to reduce the memory footprint
|
|
* - speed up by using keepindexpage
|
|
* - create category pages (smaller memory footprint, but watch out for keepindexpage, see comments later on)
|
|
* @Revision 2 - [20/05/2015] Francis De Paemeleere
|
|
* - change to full page (and just keep index pages)
|
|
* - change .channels.xml generation
|
|
* @Revision 1 - [11/05/2013] Jan van Straaten
|
|
* - added season
|
|
* @Revision 0 - [30/04/2013] Jan van Straaten
|
|
* - creation
|
|
* @Remarks: Index_page grabbing very slow due to excessive data
|
|
* @header_end
|
|
**------------------------------------------------------------------------------------------------
|
|
*
|
|
site {url=www.tm.com.my|timezone=UTC|maxdays=7|cultureinfo=en-GB|charset=utf-8|titlematchfactor=90|firstshow=1|nopageoverlaps|keepindexpage}
|
|
site {episodesystem=onscreen}
|
|
|
|
urldate.format {datestring|M/dd/yyyy}
|
|
url_index.headers {customheader=Accept-Encoding=gzip,deflate} * to speedup the downloading of the index pages
|
|
|
|
*One page version (very large memory footprint)
|
|
**>>
|
|
url_index{url|http://www.tm.com.my/hypptv/Pages/livetv.aspx?Date=|urldate}
|
|
**<<
|
|
|
|
* Category page version
|
|
**>>
|
|
*url_index{url|https://www.tm.com.my/hypptv/Pages/live_tv_apekDev_ajax.aspx?Cat=##CAT##&Date=|urldate}
|
|
* replace the category in the url
|
|
* Remark: if you use this cateror page version, keep in mind that keepindexpage is enalbed!
|
|
* so or:
|
|
* - remove keepindexpage -> very slow download for all channels
|
|
* or:
|
|
* - make a copy of this .ini for every category you need
|
|
**<<
|
|
|
|
scope.range {(urlindex)|end}
|
|
index_variable_element.modify {clear}
|
|
index_variable_element.modify {addstart|'config_site_id'}
|
|
index_variable_element.modify {substring(type=regex)|##Cat=([^#]*)}
|
|
url_index.modify {replace|##CAT##|'index_variable_element'} * will do nothing in "One page version"
|
|
end_scope
|
|
|
|
|
|
scope.range {(datelogo)|end}
|
|
index_variable_element.modify {clear}
|
|
index_variable_element.modify {addstart|'config_site_id'}
|
|
index_variable_element.modify {substring(type=regex)|##ID=([^#]*)}
|
|
end_scope
|
|
*
|
|
*index_showsplit.scrub {multi|<ul id="times'index_variable_element'"|$StartTimeHidden"|</li>|</ul>}
|
|
index_showsplit.scrub {multi|<ul id="times'index_variable_element'"|$StartTimeHidden"||</ul>}
|
|
*index_showsplit.scrub {regex||<ul id="times'index_variable_element'".*?</ul>||}
|
|
scope.range {(splitindex)|end}
|
|
* remove the overlaps on the indexpages
|
|
* bur first remove any different data (for the same shows)
|
|
index_showsplit.modify {remove(type=regex)|id=\"([^\"]*)_}
|
|
index_showsplit.modify {remove(type=regex)|id=\"_lindon\".*?\"hypptv_schedule_timeslot_popup_container\"}
|
|
index_showsplit.modify {remove(type=regex)|name=\"([^\"]*)}
|
|
index_showsplit.modify {remove(type=regex)|style=\"([^\"]*)\"}
|
|
index_showsplit.modify {remove(type=regex)|<div[^>]*id=\"_TitleSlot\"[^>]*>}
|
|
|
|
index_showsplit.modify {remove(type=regex)|^.*class="hypptv_schedule_timeslot_popup_description".*(</li>.*)}
|
|
index_showsplit.modify {cleanup(removeduplicates)}
|
|
end_scope
|
|
**
|
|
scope.range {(indexshowdetails)|end}
|
|
index_start.scrub {regex||_StartTimeHidden[^>]*value=\"([^\"]*)\"||}
|
|
index_start.modify {calculate(format=time)}
|
|
index_stop.scrub {regex||_EndTimeHidden[^>]*value=\"([^\"]*)\"||}
|
|
index_stop.modify {calculate(format=time)}
|
|
|
|
index_title.scrub {regex||^.*class="hypptv_schedule_timeslot_popup_title"[^>]*>([^<]*)||}
|
|
index_description.scrub {regex||^.*class="hypptv_schedule_timeslot_popup_description"[^>]*>([^<]*)||}
|
|
|
|
index_episode.scrub {regex||[Ee][Pp]\s*\.?\s*(\d+)||}
|
|
index_subtitle.scrub {regex||[Ee][Pp]\s*\.?\s*\d+\s*- (.*?) -||} * subtitle is the part between - just after the episode number
|
|
index_actor.scrub {regex||(?:Lakonan\|Cast)\s*:(.*)\.||}
|
|
|
|
|
|
index_description.modify {remove(type=regex)|([Ee][Pp]\s*\.?\s*\d+\s*- .*? -)} * remove episode info and subtitle
|
|
index_description.modify {remove(type=regex)|([Ee][Pp]\s*\.?\s*\d+\s*-\s*)} * remove episode info alone
|
|
index_description.modify {remove(type=regex)|([Ee][Pp]\s*\.?\s*\d+)} * remove episode info alone
|
|
index_description.modify {remove(type=regex)|((?:Lakonan\|Cast)\s*:.*\.)} * remove cast from description
|
|
index_actor.modify {replace|,|\|}
|
|
index_episode.modify {addstart(not="")|EP }
|
|
end_scope
|
|
*
|
|
** _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
|
|
** ##### CHANNEL FILE CREATION (only to create the xxx-channel.xml file)
|
|
**
|
|
** @auto_xml_channel_start
|
|
*url_index{url|http://www.tm.com.my/hypptv/Pages/livetv.aspx?Cat=1}
|
|
*scope.range {(channellist)|end}
|
|
*index_site_id.scrub {multi|<div class="hypptv_scheduleselection_dropdown_channel"|<a id="ctl02_clusterLink||</ul> <!-- dropdown list -->}
|
|
*index_temp_1.modify {calculate(type=element format=F0)|'index_site_id' #}
|
|
*index_temp_4.modify {clear}
|
|
*index_temp_4.modify {addend|'index_site_id'}
|
|
*index_site_id.modify {clear}
|
|
*loop {(each 'index_temp_6' in 'index_temp_4' max=100)|end}
|
|
*index_temp_3.modify {substring(type=regex)|'index_temp_6' "^(\d*)"} * the category numbers
|
|
*index_temp_6.modify {replace|<div class="hypptv_scheduleselection_dropdown_channel_content_description_heading">|<div class="hypptv_scheduleselection_dropdown_channel_content_description_category">'index_temp_3'</div>}
|
|
*index_site_id.modify {addend|####'index_temp_6'}
|
|
*end_loop
|
|
*index_site_id.modify {replace|hypptv_scheduleselection_dropdown_channel_content_description|hsdccd}
|
|
*index_site_channel.modify {substring(type=regex)|'index_site_id' "<div class="hsdccd_category">[^<]*</div>.*?<div class="hsdccd_number">[^<]*</div>.*?<div class="hsdccd_title">([^<]*)</div>"}
|
|
*index_site_id.modify {substring(type=regex)|'index_site_id' "(<div class="hsdccd_category">[^<]*</div>.*?<div class="hsdccd_number">[^<]*)</div>.*?<div class="hsdccd_title">[^<]*</div>"}
|
|
*index_site_id.modify {remove(type=regex)|</div>(.*?)<}
|
|
*index_site_id.modify {replace|<div class="hsdccd_category">|##Cat=}
|
|
*index_site_id.modify {replace|<div class="hsdccd_number">|##ID=}
|
|
*index_site_id.modify {remove(type=regex)|<[^>]*>}
|
|
*end_scope
|
|
** @auto_xml_channel_end
|