fix(route/google): missing title, author and content

DIYgod · Nov 2, 2024 · c285165 · c285165
1 parent 797470c
commit c285165
Showing 1 changed file with 24 additions and 6 deletions.
diff --git a/lib/routes/google/news.ts b/lib/routes/google/news.ts
@@ -3,7 +3,7 @@ import { getCurrentPath } from '@/utils/helpers';
 const __dirname = getCurrentPath(import.meta.url);
 
 import cache from '@/utils/cache';
-import got from '@/utils/got';
+import ofetch from '@/utils/ofetch';
 import { load } from 'cheerio';
 import { parseDate } from '@/utils/parse-date';
 import { art } from '@/utils/render';
@@ -25,7 +25,7 @@ export const route: Route = {
         supportScihub: false,
     },
     name: 'News',
-    maintainers: ['zoenglinghou'],
+    maintainers: ['zoenglinghou', 'pseudoyu'],
     handler,
 };
 
@@ -34,7 +34,7 @@ async function handler(ctx) {
     const locale = ctx.req.param('locale');
 
     const categoryUrls = await cache.tryGet(`google:news:${locale}`, async () => {
-        const { data: front_data } = await got(`${baseUrl}/?${locale}`);
+        const front_data = await ofetch(`${baseUrl}/?${locale}`);
 
         const $ = load(front_data);
         return [
@@ -61,22 +61,40 @@ async function handler(ctx) {
     });
     const categoryUrl = categoryUrls.find((item) => item.category === category).url;
 
-    const { data } = await got(categoryUrl);
+    const data = await ofetch(categoryUrl);
     const $ = load(data);
 
     const list = [...$('.UwIKyb'), ...$('.IBr9hb'), ...$('.IFHyqb')]; // 3 rows of news, 3-rows-wide news, single row news
 
     const items = list.map((item) => {
         item = $(item);
-        const title = item.find('h4').text();
+
+        const title = item.find('.gPFEn').text();
+
+        const authorText = item.find('.bInasb span').text();
+        const authors = authorText
+            ? authorText
+                  .replace(/^By\s+/i, '') // Handle 'By' case-insensitively
+                  .replaceAll(/\s+\([^)]*\)/g, '') // Remove parenthetical info like (She/Her)
+                  .split(/,|\s+&\s+|\s+and\s+/) // Split on comma, &, and 'and'
+                  .map((author) => author.trim())
+                  .filter((author) => {
+                      // Filter out empty strings and common suffixes
+                      if (!author) {return false;}
+                      const suffixes = ['et al', 'et al.'];
+                      return !suffixes.some((suffix) => author.toLowerCase().endsWith(suffix));
+                  })
+                  .map((author) => ({ name: author }))
+            : [];
+
         return {
             title,
             description: art(path.join(__dirname, 'templates/news.art'), {
                 img: item.find('img.Quavad').attr('src'),
                 brief: title,
             }),
             pubDate: parseDate(item.find('time').attr('datetime')),
-            author: item.find('.oovtQ').text(),
+            author: authors,
             link: new URL(item.find('a.WwrzSb').first().attr('href'), baseUrl).href,
         };
     });