From f0b94f3d2a5a733f2572615be77655b37052dace Mon Sep 17 00:00:00 2001 From: Ke Xu Date: Sat, 9 Nov 2024 20:21:06 +0800 Subject: [PATCH 1/8] Add RSS for Tongji SEM --- lib/routes/tongji/sem/_utils.ts | 69 +++++++++++++++++++++++++++++++++ lib/routes/tongji/sem/notice.ts | 40 +++++++++++++++++++ 2 files changed, 109 insertions(+) create mode 100644 lib/routes/tongji/sem/_utils.ts create mode 100644 lib/routes/tongji/sem/notice.ts diff --git a/lib/routes/tongji/sem/_utils.ts b/lib/routes/tongji/sem/_utils.ts new file mode 100644 index 00000000000000..9c12acfa874311 --- /dev/null +++ b/lib/routes/tongji/sem/_utils.ts @@ -0,0 +1,69 @@ +import got from '@/utils/got'; +import { load } from 'cheerio'; +import { parseDate } from '@/utils/parse-date'; + +export async function getNotifByPage(pageNumber: number) { + const pageUrl: string = `https://sem.tongji.edu.cn/semch/category/frontpage/notice/page/${pageNumber}`; + + try { + const response = await got.get(pageUrl, { + headers: { + 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/38.0.101.76 Safari/537.36', + }, + }); + + const html = response.body; + const $ = load(html); + + const notifListElements = $('#page-wrap > div.maim_pages > div > div.leftmain_page > div > ul > li'); + + return notifListElements.toArray().map((Element) => { + const aTagFirst = $(Element).find('a.bt'); + const aTagSecond = $(Element).find('a.time'); + + const title = aTagFirst.attr('title'); + const href = aTagFirst.attr('href'); + const time = aTagSecond.text().trim(); + + return { + title, + link: href, + pubDate: parseDate(time, 'YYYY-MM-DD'), + }; + }); + } catch { + // console.error(error); + } + return []; +} + +export async function getLastPageNumber() { + try { + const response = await got.get('https://sem.tongji.edu.cn/semch/category/frontpage/notice', { + headers: { + 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/38.0.101.76 Safari/537.36', + }, + }); + const html = response.body; + const $ = load(html); + + const lastPageElement = $('#page-wrap > div.maim_pages > div > div.leftmain_page > div > div > a.extend'); + const lastPageUrl: string | undefined = lastPageElement.attr('href'); + + // console.log(lastPageUrl); + + if (lastPageUrl) { + const lastPageNumber = lastPageUrl.match(/page\/(\d+)/)?.[1]; + if (lastPageNumber) { + // console.log(`Last page number: ${lastPageNumber}`); + return Number.parseInt(lastPageNumber); + } else { + // console.error('Failed to extract last page number.'); + } + } + } catch { + // console.error(error); + } + + return -1; +} diff --git a/lib/routes/tongji/sem/notice.ts b/lib/routes/tongji/sem/notice.ts new file mode 100644 index 00000000000000..56643740258a48 --- /dev/null +++ b/lib/routes/tongji/sem/notice.ts @@ -0,0 +1,40 @@ +// Warning: The author still knows nothing about javascript! +import { Route } from '@/types'; + +export const route: Route = { + path: '/sem', + categories: ['university'], + example: '/tongji/sem', + parameters: {}, + features: { + requireConfig: false, + requirePuppeteer: false, + antiCrawler: false, + supportBT: false, + supportPodcast: false, + supportScihub: false, + }, + name: '经济与管理学院通知', + maintainers: ['sitdownkevin'], + url: 'https://sem.tongji.edu.cn/semch/category/frontpage/notice', + handler, + description: ``, +}; + +import { getNotifByPage } from './_utils'; + +async function handler() { + const promises = []; + for (let i = 1; i <= 20; i++) { + promises.push(getNotifByPage(i)); + } + + const results = await Promise.all(promises); + + // feed the data + return { + title: '同济大学经济与管理学院', + link: 'https://bksy.tongji.edu.cn/30359/list.htm', + item: results.flat(), + }; +} From 163c5c6430ccba9a42808ae56ad954025653b4ed Mon Sep 17 00:00:00 2001 From: Ke Xu Date: Sat, 9 Nov 2024 20:42:44 +0800 Subject: [PATCH 2/8] Update notice.ts --- lib/routes/tongji/sem/notice.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/routes/tongji/sem/notice.ts b/lib/routes/tongji/sem/notice.ts index 56643740258a48..5f6a9733f21f02 100644 --- a/lib/routes/tongji/sem/notice.ts +++ b/lib/routes/tongji/sem/notice.ts @@ -31,7 +31,7 @@ async function handler() { const results = await Promise.all(promises); - // feed the data + // feed the data to rss return { title: '同济大学经济与管理学院', link: 'https://bksy.tongji.edu.cn/30359/list.htm', From 81c8086b08f6ad38b83898a9fefda6ab8ff040e2 Mon Sep 17 00:00:00 2001 From: Ke Xu Date: Sat, 9 Nov 2024 21:10:42 +0800 Subject: [PATCH 3/8] Update _utils.ts --- lib/routes/tongji/sem/_utils.ts | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/lib/routes/tongji/sem/_utils.ts b/lib/routes/tongji/sem/_utils.ts index 9c12acfa874311..b019d5643eb799 100644 --- a/lib/routes/tongji/sem/_utils.ts +++ b/lib/routes/tongji/sem/_utils.ts @@ -2,13 +2,15 @@ import got from '@/utils/got'; import { load } from 'cheerio'; import { parseDate } from '@/utils/parse-date'; +import { config } from '@/config'; + export async function getNotifByPage(pageNumber: number) { const pageUrl: string = `https://sem.tongji.edu.cn/semch/category/frontpage/notice/page/${pageNumber}`; try { const response = await got.get(pageUrl, { headers: { - 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/38.0.101.76 Safari/537.36', + 'User-Agent': config.ua, }, }); @@ -41,7 +43,7 @@ export async function getLastPageNumber() { try { const response = await got.get('https://sem.tongji.edu.cn/semch/category/frontpage/notice', { headers: { - 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/38.0.101.76 Safari/537.36', + 'User-Agent': config.ua, }, }); const html = response.body; From 0c99a82b65f14b2f7c58a0c2f09d218a85df9024 Mon Sep 17 00:00:00 2001 From: Ke Xu Date: Sat, 9 Nov 2024 21:14:56 +0800 Subject: [PATCH 4/8] Update notice.ts Fix url --- lib/routes/tongji/sem/notice.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/routes/tongji/sem/notice.ts b/lib/routes/tongji/sem/notice.ts index 5f6a9733f21f02..fabda50cb557c3 100644 --- a/lib/routes/tongji/sem/notice.ts +++ b/lib/routes/tongji/sem/notice.ts @@ -16,7 +16,7 @@ export const route: Route = { }, name: '经济与管理学院通知', maintainers: ['sitdownkevin'], - url: 'https://sem.tongji.edu.cn/semch/category/frontpage/notice', + url: 'sem.tongji.edu.cn/semch/category/frontpage/notice', handler, description: ``, }; From 13ee1a1d4c07231dea72d02eb56b3d40b033deb3 Mon Sep 17 00:00:00 2001 From: Ke Xu Date: Sat, 9 Nov 2024 21:21:17 +0800 Subject: [PATCH 5/8] Update notice.ts Fetch data from the first page only. --- lib/routes/tongji/sem/notice.ts | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) diff --git a/lib/routes/tongji/sem/notice.ts b/lib/routes/tongji/sem/notice.ts index fabda50cb557c3..edb301e058d574 100644 --- a/lib/routes/tongji/sem/notice.ts +++ b/lib/routes/tongji/sem/notice.ts @@ -24,17 +24,12 @@ export const route: Route = { import { getNotifByPage } from './_utils'; async function handler() { - const promises = []; - for (let i = 1; i <= 20; i++) { - promises.push(getNotifByPage(i)); - } - - const results = await Promise.all(promises); + const results = await getNotifByPage(1); // feed the data to rss return { title: '同济大学经济与管理学院', link: 'https://bksy.tongji.edu.cn/30359/list.htm', - item: results.flat(), + item: results, }; } From faf4690400287585f16b8ce1508017d7b9f7eaaa Mon Sep 17 00:00:00 2001 From: Ke Xu Date: Sat, 9 Nov 2024 22:44:46 +0800 Subject: [PATCH 6/8] Update notice.ts --- lib/routes/tongji/sem/notice.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/routes/tongji/sem/notice.ts b/lib/routes/tongji/sem/notice.ts index edb301e058d574..673f3b81d21344 100644 --- a/lib/routes/tongji/sem/notice.ts +++ b/lib/routes/tongji/sem/notice.ts @@ -29,7 +29,7 @@ async function handler() { // feed the data to rss return { title: '同济大学经济与管理学院', - link: 'https://bksy.tongji.edu.cn/30359/list.htm', + link: 'https://sem.tongji.edu.cn/semch/category/frontpage/notice', item: results, }; } From 5dfce0d725a4808053c1dc6cef7c43cc4c643000 Mon Sep 17 00:00:00 2001 From: Ke Xu Date: Sun, 10 Nov 2024 08:43:00 +0800 Subject: [PATCH 7/8] Use the redirected URL instead --- lib/routes/tongji/sem/_utils.ts | 35 ++------------------------------- lib/routes/tongji/sem/notice.ts | 2 +- 2 files changed, 3 insertions(+), 34 deletions(-) diff --git a/lib/routes/tongji/sem/_utils.ts b/lib/routes/tongji/sem/_utils.ts index b019d5643eb799..7be21e22237e59 100644 --- a/lib/routes/tongji/sem/_utils.ts +++ b/lib/routes/tongji/sem/_utils.ts @@ -4,8 +4,8 @@ import { parseDate } from '@/utils/parse-date'; import { config } from '@/config'; -export async function getNotifByPage(pageNumber: number) { - const pageUrl: string = `https://sem.tongji.edu.cn/semch/category/frontpage/notice/page/${pageNumber}`; +export async function getNotifByPage() { + const pageUrl: string = `https://sem.tongji.edu.cn/semch/category/frontpage/notice`; try { const response = await got.get(pageUrl, { @@ -38,34 +38,3 @@ export async function getNotifByPage(pageNumber: number) { } return []; } - -export async function getLastPageNumber() { - try { - const response = await got.get('https://sem.tongji.edu.cn/semch/category/frontpage/notice', { - headers: { - 'User-Agent': config.ua, - }, - }); - const html = response.body; - const $ = load(html); - - const lastPageElement = $('#page-wrap > div.maim_pages > div > div.leftmain_page > div > div > a.extend'); - const lastPageUrl: string | undefined = lastPageElement.attr('href'); - - // console.log(lastPageUrl); - - if (lastPageUrl) { - const lastPageNumber = lastPageUrl.match(/page\/(\d+)/)?.[1]; - if (lastPageNumber) { - // console.log(`Last page number: ${lastPageNumber}`); - return Number.parseInt(lastPageNumber); - } else { - // console.error('Failed to extract last page number.'); - } - } - } catch { - // console.error(error); - } - - return -1; -} diff --git a/lib/routes/tongji/sem/notice.ts b/lib/routes/tongji/sem/notice.ts index 673f3b81d21344..a15d74a47202b2 100644 --- a/lib/routes/tongji/sem/notice.ts +++ b/lib/routes/tongji/sem/notice.ts @@ -24,7 +24,7 @@ export const route: Route = { import { getNotifByPage } from './_utils'; async function handler() { - const results = await getNotifByPage(1); + const results = await getNotifByPage(); // feed the data to rss return { From 651ee48e3c6b943e836492538f8145bd366c9bc0 Mon Sep 17 00:00:00 2001 From: Ke Xu Date: Sun, 10 Nov 2024 08:45:53 +0800 Subject: [PATCH 8/8] Update code --- lib/routes/tongji/sem/_utils.ts | 1 - lib/routes/tongji/sem/notice.ts | 3 +-- 2 files changed, 1 insertion(+), 3 deletions(-) diff --git a/lib/routes/tongji/sem/_utils.ts b/lib/routes/tongji/sem/_utils.ts index 7be21e22237e59..2a45f9c2ce628a 100644 --- a/lib/routes/tongji/sem/_utils.ts +++ b/lib/routes/tongji/sem/_utils.ts @@ -1,7 +1,6 @@ import got from '@/utils/got'; import { load } from 'cheerio'; import { parseDate } from '@/utils/parse-date'; - import { config } from '@/config'; export async function getNotifByPage() { diff --git a/lib/routes/tongji/sem/notice.ts b/lib/routes/tongji/sem/notice.ts index a15d74a47202b2..55651dad99c78f 100644 --- a/lib/routes/tongji/sem/notice.ts +++ b/lib/routes/tongji/sem/notice.ts @@ -1,5 +1,6 @@ // Warning: The author still knows nothing about javascript! import { Route } from '@/types'; +import { getNotifByPage } from './_utils'; export const route: Route = { path: '/sem', @@ -21,8 +22,6 @@ export const route: Route = { description: ``, }; -import { getNotifByPage } from './_utils'; - async function handler() { const results = await getNotifByPage();