diff --git a/README.md b/README.md index 16dea42..e22089c 100644 --- a/README.md +++ b/README.md @@ -61,7 +61,7 @@ Check the return for a ```success``` flag. If success is set to true, then the u | fetchOptions | Options that are used by the Fetch API | {} | | | timeout | Request timeout for Fetch (Default is 10 seconds) | 10 | | | blacklist | Pass in an array of sites you don't want ogs to run on. | [] | | -| onlyGetOpenGraphInfo | Only fetch open graph info and don't fall back on anything else. | false | | +| onlyGetOpenGraphInfo | Only fetch open graph info and don't fall back on anything else. Also accepts an array of properties for which no fallback should be used | false | | | customMetaTags | Here you can define custom meta tags you want to scrape. | [] | | | urlValidatorSettings | Sets the options used by validator.js for testing the URL | [Here](https://github.com/jshemas/openGraphScraper/blob/master/lib/utils.ts#L4-L17) | | diff --git a/index.ts b/index.ts index 89214d8..d615ed8 100644 --- a/index.ts +++ b/index.ts @@ -11,7 +11,8 @@ import type { * for scraping Open Graph and Twitter Card info off a website. * * @param {object} options - The options used by Open Graph Scraper - * @param {boolean} [options.onlyGetOpenGraphInfo] - Only fetch open graph info and don't fall back on anything else. + * @param {boolean|string[]} [options.onlyGetOpenGraphInfo] - Only fetch open graph info and don't fall back on + * anything else. * @param {object} [options.customMetaTags] - Here you can define custom meta tags you want to scrape. * @param {object} [options.fetchOptions] - Sets the options used by fetch for the http requests * @param {object} [options.urlValidatorSettings] - Sets the options used by validator.js for testing the URL diff --git a/lib/extract.ts b/lib/extract.ts index 30067f8..7e3f644 100644 --- a/lib/extract.ts +++ b/lib/extract.ts @@ -89,7 +89,7 @@ export default function extractMetaTags(body: string, options: OpenGraphScraperO ogObject = mediaSetup(ogObject); // if onlyGetOpenGraphInfo isn't set, run the open graph fallbacks - if (!options.onlyGetOpenGraphInfo) { + if (!options.onlyGetOpenGraphInfo || Array.isArray(options.onlyGetOpenGraphInfo)) { ogObject = fallback(ogObject, options, $, body); $('script').each((index, script) => { diff --git a/lib/fallback.ts b/lib/fallback.ts index 554e3cb..7163978 100644 --- a/lib/fallback.ts +++ b/lib/fallback.ts @@ -7,7 +7,9 @@ import { isImageTypeValid, isUrlValid, } from './utils'; -import type { OpenGraphScraperOptions, ImageObject, OgObjectInteral } from './types'; +import type { + OpenGraphScraperOptions, ImageObject, OgObjectInteral, OnlyGetOpenGraphInfoItem, +} from './types'; const doesElementExist = (selector:string, attribute:string, $: CheerioAPI) => ( $(selector).attr(attribute) && ($(selector).attr(attribute)?.length ?? 0) > 0 @@ -23,8 +25,18 @@ const doesElementExist = (selector:string, attribute:string, $: CheerioAPI) => ( * */ export function fallback(ogObject: OgObjectInteral, options: OpenGraphScraperOptions, $: CheerioAPI, body: string) { + const shouldFallback = (key: OnlyGetOpenGraphInfoItem): boolean => { + if (!options.onlyGetOpenGraphInfo) { + return true; + } + if (options.onlyGetOpenGraphInfo === true) { + return false; + } + return !options.onlyGetOpenGraphInfo.includes(key); + }; + // title fallback - if (!ogObject.ogTitle) { + if (!ogObject.ogTitle && shouldFallback('title')) { if ($('title').text() && $('title').text().length > 0) { ogObject.ogTitle = $('title').first().text(); } else if ($('head > meta[name="title"]').attr('content') && ($('head > meta[name="title"]').attr('content')?.length ?? 0) > 0) { @@ -41,7 +53,7 @@ export function fallback(ogObject: OgObjectInteral, options: OpenGraphScraperOpt } // Get meta description tag if og description was not provided - if (!ogObject.ogDescription) { + if (!ogObject.ogDescription && shouldFallback('description')) { if (doesElementExist('head > meta[name="description"]', 'content', $)) { ogObject.ogDescription = $('head > meta[name="description"]').attr('content'); } else if (doesElementExist('head > meta[itemprop="description"]', 'content', $)) { @@ -52,7 +64,7 @@ export function fallback(ogObject: OgObjectInteral, options: OpenGraphScraperOpt } // Get all of images if there is no og:image info - if (!ogObject.ogImage) { + if (!ogObject.ogImage && shouldFallback('image')) { ogObject.ogImage = []; $('img').map((index, imageElement) => { const source: string = $(imageElement).attr('src') ?? ''; @@ -85,7 +97,7 @@ export function fallback(ogObject: OgObjectInteral, options: OpenGraphScraperOpt } // audio fallback - if (!ogObject.ogAudioURL && !ogObject.ogAudioSecureURL) { + if (!ogObject.ogAudioURL && !ogObject.ogAudioSecureURL && shouldFallback('audioUrl')) { const audioElementValue: string = $('audio').attr('src') ?? ''; const audioSourceElementValue: string = $('audio > source').attr('src') ?? ''; if (doesElementExist('audio', 'src', $)) { @@ -108,7 +120,7 @@ export function fallback(ogObject: OgObjectInteral, options: OpenGraphScraperOpt } // locale fallback - if (!ogObject.ogLocale) { + if (!ogObject.ogLocale && shouldFallback('locale')) { if (doesElementExist('html', 'lang', $)) { ogObject.ogLocale = $('html').attr('lang'); } else if (doesElementExist('head > meta[itemprop="inLanguage"]', 'content', $)) { @@ -117,7 +129,7 @@ export function fallback(ogObject: OgObjectInteral, options: OpenGraphScraperOpt } // logo fallback - if (!ogObject.ogLogo) { + if (!ogObject.ogLogo && shouldFallback('logo')) { if (doesElementExist('meta[itemprop="logo"]', 'content', $)) { ogObject.ogLogo = $('meta[itemprop="logo"]').attr('content'); } else if (doesElementExist('img[itemprop="logo"]', 'src', $)) { @@ -126,7 +138,7 @@ export function fallback(ogObject: OgObjectInteral, options: OpenGraphScraperOpt } // url fallback - if (!ogObject.ogUrl) { + if (!ogObject.ogUrl && shouldFallback('url')) { if (doesElementExist('link[rel="canonical"]', 'href', $)) { ogObject.ogUrl = $('link[rel="canonical"]').attr('href'); } else if (doesElementExist('link[rel="alternate"][hreflang="x-default"]', 'href', $)) { @@ -135,7 +147,7 @@ export function fallback(ogObject: OgObjectInteral, options: OpenGraphScraperOpt } // date fallback - if (!ogObject.ogDate) { + if (!ogObject.ogDate && shouldFallback('date')) { if (doesElementExist('head > meta[name="date"]', 'content', $)) { ogObject.ogDate = $('head > meta[name="date"]').attr('content'); } else if (doesElementExist('[itemprop*="datemodified" i]', 'content', $)) { @@ -152,7 +164,7 @@ export function fallback(ogObject: OgObjectInteral, options: OpenGraphScraperOpt } // favicon fallback - if (!ogObject.favicon) { + if (!ogObject.favicon && shouldFallback('favicon')) { if (doesElementExist('link[rel="shortcut icon"]', 'href', $)) { ogObject.favicon = $('link[rel="shortcut icon"]').attr('href'); } else if (doesElementExist('link[rel="icon"]', 'href', $)) { diff --git a/lib/types.ts b/lib/types.ts index b7182ea..2c8def6 100644 --- a/lib/types.ts +++ b/lib/types.ts @@ -15,13 +15,15 @@ export interface ErrorResult { result: OgObject; } +export type OnlyGetOpenGraphInfoItem = 'image' | 'title' | 'description' | 'locale' | 'logo' | 'url' | 'favicon' | 'audioUrl' | 'date'; + /** * The options used by Open Graph Scraper * * @typeParam {string} url - URL of the site. (Required) * @typeParam {string} [html] - You can pass in an HTML string to run ogs on it. (use without options.url) * @typeParam {string[]} [blacklist] - Pass in an array of sites you don't want ogs to run on. - * @typeParam {boolean} [onlyGetOpenGraphInfo] - Only fetch open graph info and don't fall back on anything else. + * @typeParam {boolean | OnlyGetOpenGraphInfoItem[]} [onlyGetOpenGraphInfo] - Only fetch open graph info and don't fall back on anything else. * @typeParam {CustomMetaTags} [customMetaTags] - Here you can define custom meta tags you want to scrape. * @typeParam {Request} [fetchOptions] - The options passed into fetch. * @typeParam {number} [timeout] - Number of seconds before the fetch request ends. (default is 10 seconds) @@ -32,7 +34,7 @@ export interface OpenGraphScraperOptions { customMetaTags?: CustomMetaTags[]; fetchOptions?: RequestInit; html?: string; - onlyGetOpenGraphInfo?: boolean; + onlyGetOpenGraphInfo?: boolean | OnlyGetOpenGraphInfoItem[]; timeout?: number; url?: string; urlValidatorSettings?: ValidatorSettings; diff --git a/tests/unit/openGraphScraper.spec.ts b/tests/unit/openGraphScraper.spec.ts index 8cf9513..98101ed 100644 --- a/tests/unit/openGraphScraper.spec.ts +++ b/tests/unit/openGraphScraper.spec.ts @@ -149,6 +149,20 @@ describe('return ogs', function () { }); }); + it('when it should not fallback to image elements', function () { + mockAgent.get('http://www.test.com') + .intercept({ path: '/' }) + .reply(200, basicHTML); + + return ogs({ url: 'www.test.com', onlyGetOpenGraphInfo: ['image'] }) + .then(function (data) { + expect(data.result.success).to.be.eql(true); + expect(data.result.ogImage).to.be.eql(undefined); + expect(data.html).to.be.eql(basicHTML); + expect(data.response).to.be.a('response'); + }); + }); + it('when meta description exist while og description does not', function () { mockAgent.get('http://www.test.com') .intercept({ path: '/' })