Initial Commit

This commit is contained in:
2023-09-10 21:48:48 +02:00
commit 227cca7d31
791 changed files with 165200 additions and 0 deletions

Binary file not shown.

View File

@@ -0,0 +1,8 @@
<?xml version="1.0" encoding="UTF-8"?>
<site site="rtb.gov.bn">
<channels>
<channel lang="ms" xmltv_id="RTBAneka.bn" site_id="Aneka">RTB Aneka</channel>
<channel lang="ms" xmltv_id="RTBPerdana.bn" site_id="Perdana">RTB Perdana</channel>
<channel lang="ms" xmltv_id="RTBSukmaindera.bn" site_id="Sukmaindera">RTB Sukmaindera</channel>
</channels>
</site>

View File

@@ -0,0 +1,71 @@
const pdf = require('pdf-parse')
const dayjs = require('dayjs')
const utc = require('dayjs/plugin/utc')
const timezone = require('dayjs/plugin/timezone')
const customParseFormat = require('dayjs/plugin/customParseFormat')
dayjs.extend(utc)
dayjs.extend(timezone)
dayjs.extend(customParseFormat)
module.exports = {
skip: true, // INFO: guide is not available on the site
site: 'rtb.gov.bn',
days: 2,
url: function ({ channel, date }) {
const [position] = channel.site_id.split('#')
return encodeURI(
`http://www.rtb.gov.bn/PublishingImages/SitePages/Programme Guide/${
channel.site_id
} ${date.format('DD MMMM YYYY')}.pdf`
)
},
parser: async function ({ buffer, date }) {
let programs = []
const items = await parseItems(buffer)
items.forEach(item => {
const prev = programs[programs.length - 1]
let start = parseStart(item, date)
if (prev) {
if (start.isBefore(prev.start)) {
start = start.add(1, 'd')
date = date.add(1, 'd')
}
prev.stop = start
}
const stop = start.add(1, 'h')
programs.push({
title: item.title,
start,
stop
})
})
return programs
}
}
function parseStart(item, date) {
const dateString = `${date.format('YYYY-MM-DD')} ${item.time}`
return dayjs.tz(dateString, 'YYYY-MM-DD HH:mm', 'Asia/Brunei')
}
async function parseItems(buffer) {
const data = await pdf(buffer).catch(err => null)
if (!data) return []
return data.text
.split('\n')
.filter(s => {
const string = s.trim()
return string && /^\d{2}:\d{2}/.test(string)
})
.map(s => {
const [_, time, title] = s.trim().match(/^(\d{2}:\d{2}) (.*)/) || [null, null, null]
return { time, title }
})
}

View File

@@ -0,0 +1,96 @@
// npx epg-grabber --config=sites/rtb.gov.bn/rtb.gov.bn.config.js --channels=sites/rtb.gov.bn/rtb.gov.bn.channels.xml --output=guide.xml --days=2
const { parser, url } = require('./rtb.gov.bn.config.js')
const path = require('path')
const fs = require('fs')
const dayjs = require('dayjs')
const utc = require('dayjs/plugin/utc')
const customParseFormat = require('dayjs/plugin/customParseFormat')
dayjs.extend(customParseFormat)
dayjs.extend(utc)
const date = dayjs.utc('2021-11-11', 'YYYY-MM-DD').startOf('d')
const channel = {
site_id: 'Sukmaindera',
xmltv_id: 'RTBSukmaindera.bn'
}
it('can generate valid url', () => {
expect(url({ channel, date })).toBe(
'http://www.rtb.gov.bn/PublishingImages/SitePages/Programme%20Guide/Sukmaindera%2011%20November%202021.pdf'
)
})
it('can parse Sukmaindera 11 November 2021.pdf', done => {
const buffer = fs.readFileSync(
path.resolve(__dirname, '__data__/Sukmaindera 11 November 2021.pdf'),
{
charset: 'utf8'
}
)
parser({ buffer, date })
.then(results => {
results = results.map(p => {
p.start = p.start.toJSON()
p.stop = p.stop.toJSON()
return p
})
expect(results.length).toBe(47)
expect(results[0]).toMatchObject({
start: '2021-11-10T22:00:00.000Z',
stop: '2021-11-10T22:05:00.000Z',
title: 'NATIONAL ANTHEM'
})
expect(results[46]).toMatchObject({
start: '2021-11-11T21:30:00.000Z',
stop: '2021-11-11T22:30:00.000Z',
title: 'BACAAN SURAH YASSIN'
})
done()
})
.catch(error => {
done(error)
})
})
it('can parse Aneka 11 November 2021.pdf', done => {
const buffer = fs.readFileSync(path.resolve(__dirname, '__data__/Aneka 11 November 2021.pdf'), {
charset: 'utf8'
})
parser({ buffer, date })
.then(results => {
results = results.map(p => {
p.start = p.start.toJSON()
p.stop = p.stop.toJSON()
return p
})
expect(results.length).toBe(26)
expect(results[4]).toMatchObject({
start: '2021-11-11T03:00:00.000Z',
stop: '2021-11-11T04:05:00.000Z',
title: 'DRAMA TURKI:'
})
done()
})
.catch(error => {
done(error)
})
})
it('can handle empty guide', done => {
parser({
date,
channel,
content: `<html><head><title>Object moved</title></head><body>
<h2>Object moved to <a href="/en/_layouts/OGP/ErrorPage.aspx">here</a>.</h2>
</body></html>
`
})
.then(result => {
expect(result).toMatchObject([])
done()
})
.catch(error => {
done(error)
})
})