Initial Commit

This commit is contained in:
2023-09-10 21:48:48 +02:00
commit 227cca7d31
791 changed files with 165200 additions and 0 deletions

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

View File

@@ -0,0 +1 @@
{"result":[{"name":"dtt31","key":"31","data":"<div id=\"tabledtt31_20231115\" class=\"tableWrap clearfix timetable\" data-type=\"tv\">\r\n <div class=\"timeFill\"><\/div>\r\n<\/div>\r\n\r\n"},{"name":"dtt32","key":"32","data":"<div id=\"tabledtt32_20231115\" class=\"tableWrap clearfix timetable\" data-type=\"tv\">\r\n <div class=\"timeFill\"><\/div>\r\n<\/div>\r\n\r\n"},{"name":"dtt33","key":"33","data":"<div id=\"tabledtt33_20231115\" class=\"tableWrap clearfix timetable\" data-type=\"tv\">\r\n <div class=\"timeFill\"><\/div>\r\n<\/div>\r\n\r\n"},{"name":"dtt34","key":"34","data":"<div id=\"tabledtt34_20231115\" class=\"tableWrap clearfix timetable\" data-type=\"tv\">\r\n <div class=\"timeFill\"><\/div>\r\n<\/div>\r\n\r\n"},{"name":"radio1","key":"radio1","data":"<div id=\"tableradio1_20231115\" class=\"tableWrap clearfix timetable\" data-type=\"radio\">\r\n <div class=\"timeFill\"><\/div>\r\n<\/div>\r\n"},{"name":"radio2","key":"radio2","data":"<div id=\"tableradio2_20231115\" class=\"tableWrap clearfix timetable\" data-type=\"radio\">\r\n <div class=\"timeFill\"><\/div>\r\n<\/div>\r\n"},{"name":"radio3","key":"radio3","data":"<div id=\"tableradio3_20231115\" class=\"tableWrap clearfix timetable\" data-type=\"radio\">\r\n <div class=\"timeFill\"><\/div>\r\n<\/div>\r\n"},{"name":"radio4","key":"radio4","data":"<div id=\"tableradio4_20231115\" class=\"tableWrap clearfix timetable\" data-type=\"radio\">\r\n <div class=\"timeFill\"><\/div>\r\n<\/div>\r\n"},{"name":"radio5","key":"radio5","data":"<div id=\"tableradio5_20231115\" class=\"tableWrap clearfix timetable\" data-type=\"radio\">\r\n <div class=\"timeFill\"><\/div>\r\n<\/div>\r\n"},{"name":"radio6","key":"radio6","data":"<div id=\"tableradio6_20231115\" class=\"tableWrap clearfix timetable\" data-type=\"radio\">\r\n <div class=\"timeFill\"><\/div>\r\n<\/div>\r\n"},{"name":"radiocnrhk","key":"radiocnrhk","data":"<div id=\"tableradiocnrhk_20231115\" class=\"tableWrap clearfix timetable\" data-type=\"radio\">\r\n <div class=\"timeFill\"><\/div>\r\n<\/div>\r\n"},{"name":"radiocmgrgb","key":"radiocmgrgb","data":"<div id=\"tableradiocmgrgb_20231115\" class=\"tableWrap clearfix timetable\" data-type=\"radio\">\r\n <div class=\"timeFill\"><\/div>\r\n<\/div>\r\n"},{"name":"pth","key":"pth","data":"<div id=\"tablepth_20231115\" class=\"tableWrap clearfix timetable\" data-type=\"radio\">\r\n <div class=\"timeFill\"><\/div>\r\n<\/div>\r\n"},{"name":"dab31","key":"dab31","data":"<div id=\"tabledab31_20231115\" class=\"tableWrap clearfix timetable\" data-type=\"radio\">\r\n <div class=\"timeFill\"><\/div>\r\n<\/div>\r\n"},{"name":"dab33","key":"dab33","data":"<div id=\"tabledab33_20231115\" class=\"tableWrap clearfix timetable\" data-type=\"radio\">\r\n <div class=\"timeFill\"><\/div>\r\n<\/div>\r\n"},{"name":"dab35","key":"dab35","data":"<div id=\"tabledab35_20231115\" class=\"tableWrap clearfix timetable\" data-type=\"radio\">\r\n <div class=\"timeFill\"><\/div>\r\n<\/div>\r\n"},{"name":"web+","key":"WEB+","data":"<div id=\"tableweb+_20231115\" class=\"tableWrap clearfix timetable\" data-type=\"radio\">\r\n <div class=\"timeFill\"><\/div>\r\n<\/div>\r\n"}]}

View File

@@ -0,0 +1,13 @@
<?xml version="1.0" encoding="UTF-8"?>
<site site="rthk.hk">
<channels>
<channel lang="en" xmltv_id="RTHKTV31.hk" site_id="31">TV 31</channel>
<channel lang="en" xmltv_id="RTHKTV32.hk" site_id="32">TV 32</channel>
<channel lang="en" xmltv_id="RTHKTV33.hk" site_id="33">TV 33</channel>
<channel lang="en" xmltv_id="RTHKTV34.hk" site_id="34">TV 34</channel>
<channel lang="zh" xmltv_id="RTHKTV31.hk" site_id="31">TV 31</channel>
<channel lang="zh" xmltv_id="RTHKTV32.hk" site_id="32">TV 32</channel>
<channel lang="zh" xmltv_id="RTHKTV33.hk" site_id="33">TV 33</channel>
<channel lang="zh" xmltv_id="RTHKTV34.hk" site_id="34">TV 34</channel>
</channels>
</site>

View File

@@ -0,0 +1,90 @@
const axios = require('axios')
const dayjs = require('dayjs')
const cheerio = require('cheerio')
const utc = require('dayjs/plugin/utc')
const timezone = require('dayjs/plugin/timezone')
const customParseFormat = require('dayjs/plugin/customParseFormat')
dayjs.extend(utc)
dayjs.extend(timezone)
dayjs.extend(customParseFormat)
module.exports = {
site: 'rthk.hk',
days: 2,
request: {
headers({ channel }) {
return {
Cookie: `lang=${channel.lang}`
}
},
cache: {
ttl: 60 * 60 * 1000 // 1h
}
},
url: function ({ date }) {
return `https://www.rthk.hk/timetable/main_timetable/${date.format('YYYYMMDD')}`
},
parser({ content, channel, date }) {
const programs = []
const items = parseItems(content, channel)
for (let item of items) {
const $item = cheerio.load(item)
programs.push({
title: parseTitle($item),
sub_title: parseSubTitle($item),
categories: parseCategories($item),
icon: parseIcon($item),
start: parseStart($item, date),
stop: parseStop($item, date)
})
}
return programs
}
}
function parseIcon($item) {
return $item('.single-wrap').data('p')
}
function parseCategories($item) {
let cate = $item('.single-wrap').data('cate') || ''
let [_, categories] = cate.match(/^\|(.*)\|$/) || [null, '']
return categories.split('||').filter(Boolean)
}
function parseTitle($item) {
return $item('.showTit').attr('title')
}
function parseSubTitle($item) {
return $item('.showEpi').attr('title')
}
function parseStart($item, date) {
const timeRow = $item('.timeRow').text().trim()
const [_, HH, mm] = timeRow.match(/^(\d+):(\d+)-/) || [null, null, null]
if (!HH || !mm) return null
return dayjs.tz(`${date.format('YYYY-MM-DD')} ${HH}:${mm}`, 'YYYY-MM-DD HH:mm', 'Asia/Hong_Kong')
}
function parseStop($item, date) {
const timeRow = $item('.timeRow').text().trim()
const [_, HH, mm] = timeRow.match(/-(\d+):(\d+)$/) || [null, null, null]
if (!HH || !mm) return null
return dayjs.tz(`${date.format('YYYY-MM-DD')} ${HH}:${mm}`, 'YYYY-MM-DD HH:mm', 'Asia/Hong_Kong')
}
function parseItems(content, channel) {
const data = JSON.parse(content)
if (!data || !Array.isArray(data.result)) return []
const channelData = data.result.find(i => i.key == channel.site_id)
if (!channelData || !channelData.data) return []
const $ = cheerio.load(channelData.data)
return $('.showWrap').toArray()
}

View File

@@ -0,0 +1,83 @@
// npx epg-grabber --config=sites/rthk.hk/rthk.hk.config.js --channels=sites/rthk.hk/rthk.hk.channels.xml --output=guide.xml --days=2
const { parser, url, request } = require('./rthk.hk.config.js')
const fs = require('fs')
const path = require('path')
const axios = require('axios')
const dayjs = require('dayjs')
const utc = require('dayjs/plugin/utc')
const customParseFormat = require('dayjs/plugin/customParseFormat')
dayjs.extend(customParseFormat)
dayjs.extend(utc)
jest.mock('axios')
const date = dayjs.utc('2022-12-02', 'YYYY-MM-DD').startOf('d')
const channel = {
site_id: '31',
xmltv_id: 'RTHKTV31.hk',
lang: 'zh'
}
it('can generate valid url', () => {
expect(url({ date })).toBe('https://www.rthk.hk/timetable/main_timetable/20221202')
})
it('can generate valid request headers', () => {
expect(request.headers({ channel })).toMatchObject({
Cookie: 'lang=zh'
})
})
it('can generate valid request headers for English version', () => {
const channelEN = { ...channel, lang: 'en' }
expect(request.headers({ channel: channelEN })).toMatchObject({
Cookie: 'lang=en'
})
})
it('can parse response', () => {
const content = fs.readFileSync(path.resolve(__dirname, '__data__/content_zh.json'))
let results = parser({ content, channel, date })
results = results.map(p => {
p.start = p.start.toJSON()
p.stop = p.stop.toJSON()
return p
})
expect(results[0]).toMatchObject({
start: '2022-12-01T16:00:00.000Z',
stop: '2022-12-01T17:00:00.000Z',
title: '問天',
sub_title: '第十四集',
categories: ['戲劇'],
icon: 'https://www.rthk.hk/assets/images/rthk/dtt31/thegreataerospace/10239_1920_s.jpg'
})
})
it('can parse response in English', () => {
const content = fs.readFileSync(path.resolve(__dirname, '__data__/content_en.json'))
let results = parser({ content, channel, date })
results = results.map(p => {
p.start = p.start.toJSON()
p.stop = p.stop.toJSON()
return p
})
expect(results[0]).toMatchObject({
start: '2022-12-01T16:00:00.000Z',
stop: '2022-12-01T17:00:00.000Z',
title: 'The Great Aerospace',
sub_title: 'Episode 14',
categories: ['戲劇'],
icon: 'https://www.rthk.hk/assets/images/rthk/dtt31/thegreataerospace/10239_1920_s.jpg'
})
})
it('can handle empty guide', () => {
const content = fs.readFileSync(path.resolve(__dirname, '__data__/no_content.json'))
const results = parser({ date, channel, content })
expect(results).toMatchObject([])
})