Skip to content

Commit

Permalink
fix(route/google): missing title, author and content
Browse files Browse the repository at this point in the history
  • Loading branch information
pseudoyu committed Nov 2, 2024
1 parent 797470c commit c285165
Showing 1 changed file with 24 additions and 6 deletions.
30 changes: 24 additions & 6 deletions lib/routes/google/news.ts
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ import { getCurrentPath } from '@/utils/helpers';
const __dirname = getCurrentPath(import.meta.url);

import cache from '@/utils/cache';
import got from '@/utils/got';
import ofetch from '@/utils/ofetch';
import { load } from 'cheerio';
import { parseDate } from '@/utils/parse-date';
import { art } from '@/utils/render';
Expand All @@ -25,7 +25,7 @@ export const route: Route = {
supportScihub: false,
},
name: 'News',
maintainers: ['zoenglinghou'],
maintainers: ['zoenglinghou', 'pseudoyu'],
handler,
};

Expand All @@ -34,7 +34,7 @@ async function handler(ctx) {
const locale = ctx.req.param('locale');

const categoryUrls = await cache.tryGet(`google:news:${locale}`, async () => {
const { data: front_data } = await got(`${baseUrl}/?${locale}`);
const front_data = await ofetch(`${baseUrl}/?${locale}`);

const $ = load(front_data);
return [
Expand All @@ -61,22 +61,40 @@ async function handler(ctx) {
});
const categoryUrl = categoryUrls.find((item) => item.category === category).url;

const { data } = await got(categoryUrl);
const data = await ofetch(categoryUrl);
const $ = load(data);

const list = [...$('.UwIKyb'), ...$('.IBr9hb'), ...$('.IFHyqb')]; // 3 rows of news, 3-rows-wide news, single row news

const items = list.map((item) => {
item = $(item);
const title = item.find('h4').text();

const title = item.find('.gPFEn').text();

const authorText = item.find('.bInasb span').text();
const authors = authorText
? authorText
.replace(/^By\s+/i, '') // Handle 'By' case-insensitively
.replaceAll(/\s+\([^)]*\)/g, '') // Remove parenthetical info like (She/Her)
.split(/,|\s+&\s+|\s+and\s+/) // Split on comma, &, and 'and'
.map((author) => author.trim())
.filter((author) => {
// Filter out empty strings and common suffixes
if (!author) {return false;}
const suffixes = ['et al', 'et al.'];
return !suffixes.some((suffix) => author.toLowerCase().endsWith(suffix));
})
.map((author) => ({ name: author }))
: [];

return {
title,
description: art(path.join(__dirname, 'templates/news.art'), {
img: item.find('img.Quavad').attr('src'),
brief: title,
}),
pubDate: parseDate(item.find('time').attr('datetime')),
author: item.find('.oovtQ').text(),
author: authors,
link: new URL(item.find('a.WwrzSb').first().attr('href'), baseUrl).href,
};
});
Expand Down

0 comments on commit c285165

Please sign in to comment.