const cheerio = require('cheerio') const axios = require('axios') const dayjs = require('dayjs') const utc = require('dayjs/plugin/utc') const timezone = require('dayjs/plugin/timezone') const customParseFormat = require('dayjs/plugin/customParseFormat') dayjs.extend(utc) dayjs.extend(timezone) dayjs.extend(customParseFormat) module.exports = { site: 'tvhebdo.com', days: 2, url: function ({ channel, date }) { return `https://www.tvhebdo.com/horaire-tele/${channel.site_id}/date/${date.format( 'YYYY-MM-DD' )}` }, parser: function ({ content, date }) { let programs = [] const items = parseItems(content) items.forEach(item => { const prev = programs[programs.length - 1] const $item = cheerio.load(item) let start = parseStart($item, date) if (prev) { if (start.isBefore(prev.start)) { start = start.add(1, 'd') } prev.stop = start } let stop = start.add(30, 'm') programs.push({ title: parseTitle($item), start, stop }) }) return programs }, async channels() { let items = [] const offsets = [ 0, 20, 40, 60, 80, 100, 120, 140, 160, 180, 200, 220, 240, 260, 280, 300, 320, 340, 360 ] for (let offset of offsets) { const url = `https://www.tvhebdo.com/horaire/gr/offset/${offset}/gr_id/0/date/2022-05-11/time/12:00:00` console.log(url) const html = await axios .get(url, { headers: { Cookie: 'distributeur=8004264; __utmz=222163677.1652094266.1.1.utmcsr=(direct)|utmccn=(direct)|utmcmd=(none); _gcl_au=1.1.656635701.1652094273; tvh=3c2kaml9u14m83v91bg4dqgaf3; __utmc=222163677; IR_gbd=tvhebdo.com; IR_MPI=cf76b363-cf87-11ec-93f5-13daf79f8f76%7C1652367602625; __utma=222163677.2064368965.1652094266.1652281202.1652281479.3; __utmt=1; IR_MPS=1652284935955%7C1652284314367; _uetsid=0d8e2e60d13b11ec850db551304ae9e7; _uetvid=80456fa0b26e11ec9bf94951ce79b5f8; __utmb=222163677.19.9.1652284953979; __atuvc=30%7C19; __atuvs=627bdb98682bc242006' } }) .then(r => r.data) .catch(console.error) const $ = cheerio.load(html) const rows = $('table.gr_row').toArray() items = items.concat(rows) } console.log(`Found ${items.length} channels`) return items.map(item => { const $item = cheerio.load(item) const name = $item('.gr_row_head > div > a.gr_row_head_logo.link_to_station > img').attr( 'alt' ) const url = $item('.gr_row_head > div > div.gr_row_head_poste > a').attr('href') const [_, site_id] = url.match(/horaire-tele\/(.*)/) || [null, null] return { lang: 'fr', site_id, name } }) } } function parseTitle($item) { return $item('.titre').first().text().trim() } function parseStart($item, date) { const time = $item('.heure').text() return dayjs.tz(`${date.format('YYYY-MM-DD')} ${time}`, 'YYYY-MM-DD HH:mm', 'America/Toronto') } function parseItems(content) { const $ = cheerio.load(content) return $( '#main_container > div.liste_container > table > tbody > tr[class^=liste_row_style_]' ).toArray() }