diff --git a/lib/routes/afr/latest.ts b/lib/routes/afr/latest.ts new file mode 100644 index 00000000000000..b656755ceccfc8 --- /dev/null +++ b/lib/routes/afr/latest.ts @@ -0,0 +1,69 @@ +import { Route } from '@/types'; +import type { Context } from 'hono'; + +import cache from '@/utils/cache'; +import ofetch from '@/utils/ofetch'; +import { parseDate } from '@/utils/parse-date'; +import { assetsConnectionByCriteriaQuery } from './query'; +import { getItem } from './utils'; + +export const route: Route = { + path: '/latest', + categories: ['traditional-media'], + example: '/afr/latest', + features: { + requireConfig: false, + requirePuppeteer: false, + antiCrawler: false, + supportBT: false, + supportPodcast: false, + supportScihub: false, + }, + radar: [ + { + source: ['www.afr.com/latest', 'www.afr.com/'], + }, + ], + name: 'Latest', + maintainers: ['TonyRL'], + handler, + url: 'www.afr.com/latest', +}; + +async function handler(ctx: Context) { + const limit = Number.parseInt(ctx.req.query('limit') ?? '10'); + const response = await ofetch('https://api.afr.com/graphql', { + query: { + query: assetsConnectionByCriteriaQuery, + operationName: 'assetsConnectionByCriteria', + variables: { + brand: 'afr', + first: limit, + render: 'web', + types: ['article', 'bespoke', 'featureArticle', 'liveArticle', 'video'], + after: '', + }, + }, + }); + + const list = response.data.assetsConnectionByCriteria.edges.map(({ node }) => ({ + title: node.asset.headlines.headline, + description: node.asset.about, + link: `https://www.afr.com${node.urls.published.afr.path}`, + pubDate: parseDate(node.dates.firstPublished), + updated: parseDate(node.dates.modified), + author: node.asset.byline, + category: [node.tags.primary.displayName, ...node.tags.secondary.map((tag) => tag.displayName)], + image: node.featuredImages && `https://static.ffx.io/images/${node.featuredImages.landscape16x9.data.id}`, + })); + + const items = await Promise.all(list.map((item) => cache.tryGet(item.link, () => getItem(item)))); + + return { + title: 'Latest | The Australian Financial Review | AFR', + description: 'The latest news, events, analysis and opinion from The Australian Financial Review', + image: 'https://www.afr.com/apple-touch-icon-1024x1024.png', + link: 'https://www.afr.com/latest', + item: items, + }; +} diff --git a/lib/routes/afr/namespace.ts b/lib/routes/afr/namespace.ts new file mode 100644 index 00000000000000..d6fd9b647e2165 --- /dev/null +++ b/lib/routes/afr/namespace.ts @@ -0,0 +1,7 @@ +import type { Namespace } from '@/types'; + +export const namespace: Namespace = { + name: 'The Australian Financial Review', + url: 'afr.com', + lang: 'en', +}; diff --git a/lib/routes/afr/navigation.ts b/lib/routes/afr/navigation.ts new file mode 100644 index 00000000000000..cbe7421a296b16 --- /dev/null +++ b/lib/routes/afr/navigation.ts @@ -0,0 +1,75 @@ +import { Route } from '@/types'; +import type { Context } from 'hono'; + +import cache from '@/utils/cache'; +import ofetch from '@/utils/ofetch'; +import { parseDate } from '@/utils/parse-date'; +import { pageByNavigationPathQuery } from './query'; +import { getItem } from './utils'; + +export const route: Route = { + path: '/navigation/:path{.+}', + categories: ['traditional-media'], + example: '/afr/navigation/markets', + parameters: { + path: 'Navigation path, can be found in the URL of the page', + }, + features: { + requireConfig: false, + requirePuppeteer: false, + antiCrawler: false, + supportBT: false, + supportPodcast: false, + supportScihub: false, + }, + radar: [ + { + source: ['www.afr.com/path*'], + }, + ], + name: 'Navigation', + maintainers: ['TonyRL'], + handler, + url: 'www.afr.com', +}; + +async function handler(ctx: Context) { + const { path } = ctx.req.param(); + const limit = Number.parseInt(ctx.req.query('limit') ?? '10'); + + const response = await ofetch('https://api.afr.com/api/content-audience/afr/graphql', { + query: { + query: pageByNavigationPathQuery, + operationName: 'pageByNavigationPath', + variables: { + input: { brandKey: 'afr', navigationPath: `/${path}`, renderName: 'web' }, + firstStories: limit, + afterStories: '', + }, + }, + }); + + const list = response.data.pageByNavigationPath.page.latestStoriesConnection.edges.map(({ node }) => ({ + title: node.headlines.headline, + description: node.overview.about, + link: `https://www.afr.com${node.urls.canonical.path}`, + pubDate: parseDate(node.dates.firstPublished), + updated: parseDate(node.dates.modified), + author: node.byline + .filter((byline) => byline.type === 'AUTHOR') + .map((byline) => byline.author.name) + .join(', '), + category: [node.tags.primary.displayName, ...node.tags.secondary.map((tag) => tag.displayName)], + image: node.images && `https://static.ffx.io/images/${node.images.landscape16x9.mediaId}`, + })); + + const items = await Promise.all(list.map((item) => cache.tryGet(item.link, () => getItem(item)))); + + return { + title: response.data.pageByNavigationPath.page.seo.title, + description: response.data.pageByNavigationPath.page.seo.description, + image: 'https://www.afr.com/apple-touch-icon-1024x1024.png', + link: `https://www.afr.com/${path}`, + item: items, + }; +} diff --git a/lib/routes/afr/query.ts b/lib/routes/afr/query.ts new file mode 100644 index 00000000000000..a596f8fc4f70dd --- /dev/null +++ b/lib/routes/afr/query.ts @@ -0,0 +1,349 @@ +export const pageByNavigationPathQuery = `query pageByNavigationPath( + $input: PageByNavigationPathInput! + $firstStories: Int + $afterStories: Cursor + ) { + pageByNavigationPath(input: $input) { + error { + message + type { + class + ... on ErrorTypeInvalidRequest { + fields { + field + message + } + } + } + } + page { + ads { + suppress + } + description + id + latestStoriesConnection(first: $firstStories, after: $afterStories) { + edges { + node { + byline { + ...AssetBylineFragment + } + headlines { + headline + } + ads { + sponsor { + name + } + } + overview { + about + label + } + type + dates { + firstPublished + published + } + id + publicId + images { + ...AssetImagesFragmentAudience + } + tags { + primary { + ...TagFragmentAudience + } + secondary { + ...TagFragmentAudience + } + } + urls { + ...AssetUrlsAudienceFragment + } + } + } + pageInfo { + endCursor + hasNextPage + } + } + name + seo { + canonical { + brand { + key + } + } + description + title + } + social { + image { + height + url + width + } + } + } + redirect + } + } + fragment AssetBylineFragment on AssetByline { + type + ... on AssetBylineAuthor { + author { + name + publicId + profile { + avatar + bio + body + canonical { + brand { + key + } + } + email + socials { + facebook { + publicId + } + twitter { + publicId + } + } + title + } + } + } + ... on AssetBylineName { + name + } + } + fragment AssetImagesFragmentAudience on ImageRenditions { + landscape16x9 { + ...ImageFragmentAudience + } + landscape3x2 { + ...ImageFragmentAudience + } + portrait2x3 { + ...ImageFragmentAudience + } + square1x1 { + ...ImageFragmentAudience + } + } + fragment ImageFragmentAudience on ImageRendition { + altText + animated + caption + credit + crop { + offsetX + offsetY + width + zoom + } + mediaId + mimeType + source + type + } + fragment AssetUrlsAudienceFragment on AssetURLs { + canonical { + brand { + key + } + path + } + external { + url + } + published { + brand { + key + } + path + } + } + fragment TagFragmentAudience on Tag { + company { + exchangeCode + stockCode + } + context { + name + } + description + displayName + externalEntities { + google { + placeId + } + wikipedia { + publicId + url + } + } + id + location { + latitude + longitude + postalCode + state + } + name + publicId + seo { + description + title + } + urls { + canonical { + brand { + key + } + path + } + published { + brand { + key + } + path + } + } + }`; + +export const assetsConnectionByCriteriaQuery = `query assetsConnectionByCriteria( + $after: ID + $brand: Brand! + $categories: [Int!] + $first: Int! + $render: Render! + $types: [AssetType!]! + ) { + assetsConnectionByCriteria( + after: $after + brand: $brand + categories: $categories + first: $first + render: $render + types: $types + ) { + edges { + cursor + node { + ...AssetFragment + sponsor { + name + } + } + } + error { + message + type { + class + } + } + pageInfo { + endCursor + hasNextPage + } + } + } + fragment AssetFragment on Asset { + asset { + about + byline + duration + headlines { + headline + } + live + } + assetType + dates { + firstPublished + modified + published + } + id + featuredImages { + landscape16x9 { + ...ImageFragment + } + landscape3x2 { + ...ImageFragment + } + portrait2x3 { + ...ImageFragment + } + square1x1 { + ...ImageFragment + } + } + label + tags { + primary: primaryTag { + ...AssetTag + } + secondary { + ...AssetTag + } + } + urls { + ...AssetURLs + } + } + fragment AssetTag on AssetTagDetails { + ...AssetTagAudience + shortID + slug + } + fragment AssetTagAudience on AssetTagDetails { + company { + exchangeCode + stockCode + } + context + displayName + id + name + urls { + canonical { + brand + path + } + published { + afr { + path + } + } + } + } + fragment AssetURLs on AssetURLs { + canonical { + brand + path + } + published { + afr { + path + } + } + } + fragment ImageFragment on Image { + data { + altText + aspect + autocrop + caption + cropWidth + id + offsetX + offsetY + zoom + } + }`; diff --git a/lib/routes/afr/utils.ts b/lib/routes/afr/utils.ts new file mode 100644 index 00000000000000..c055ae9e70d29a --- /dev/null +++ b/lib/routes/afr/utils.ts @@ -0,0 +1,80 @@ +import * as cheerio from 'cheerio'; +import ofetch from '@/utils/ofetch'; + +export const getItem = async (item) => { + const response = await ofetch(item.link); + const $ = cheerio.load(response); + + const reduxState = JSON.parse($('script#__REDUX_STATE__').text().replaceAll(':undefined', ':null').match('__REDUX_STATE__=(.*);')?.[1] || '{}'); + + const content = reduxState.page.content; + const asset = content.asset; + + switch (content.assetType) { + case 'liveArticle': + item.description = asset.posts.map((post) => `

${post.asset.headlines.headline}

${post.asset.body}`).join(''); + break; + + case 'article': + case 'featureArticle': + item.description = renderArticle(asset, item.link); + break; + + default: + throw new Error(`Unknown asset type: ${content.assetType} in ${item.link}`); + } + + return item; +}; + +const renderArticle = (asset, link: string) => { + const $ = cheerio.load(asset.body, null, false); + $('x-placeholder').each((_, el) => { + const $el = $(el); + const id = $el.attr('id'); + if (!id) { + $el.replaceWith(''); + } + + const placeholder = asset.bodyPlaceholders[id!]; + switch (placeholder?.type) { + case 'callout': + case 'relatedStory': + $el.replaceWith(''); + break; + + case 'iframe': + $el.replaceWith(``); + break; + + case 'image': + $el.replaceWith(`${placeholder.data.altText}`); + break; + + case 'linkArticle': + $el.replaceWith(placeholder.data.text); + break; + + case 'linkExternal': + $el.replaceWith(`${placeholder.data.text}`); + break; + + case 'quote': + $el.replaceWith(placeholder.data.markup); + break; + + case 'scribd': + $el.replaceWith(`View on Scribd`); + break; + + case 'twitter': + $el.replaceWith(`${placeholder.data.url}`); + break; + + default: + throw new Error(`Unknown placeholder type: ${placeholder?.type} in ${link}`); + } + }); + + return $.html(); +};