From 0f9fb80d8b36bbdd1c1b4da99dc6f2f424389421 Mon Sep 17 00:00:00 2001 From: Hatton Date: Tue, 26 Jul 2022 14:00:40 -0600 Subject: [PATCH] feat: localizable images through urls in captions --- README.md | 2 + src/CustomTranformers.ts | 2 +- src/NotionImage.spec.ts | 134 +++++++++++++++++++++++++++++++++++++-- src/NotionImage.ts | 95 ++++++++++++++++++++------- 4 files changed, 203 insertions(+), 30 deletions(-) diff --git a/README.md b/README.md index f2570bb..d2937c8 100644 --- a/README.md +++ b/README.md @@ -81,6 +81,8 @@ notion-pull-mdx makes some attempt to keep the right order of things, but there Localize your files in Crowdin (or whatever) based on the markdown files, not in Notion. For how to do this with Docusaurus, see [Docusaurus i18n](https://docusaurus.io/docs/i18n/crowdin). +You may also need to localize screenshots. Crowdin can also handle localizing assets, but this library currently supports a different approach. If you place for example `fr https:\\imgur.com\1234.png` in the caption of a screenshot in Notion, `notion-pull-mdx` will fetch that image and save it locally with the same name as the primary screenshot, but with "-fr" appended. So you'd get for example `static\img\9876.png` and `static\img\9876-fr.png`. To get the French version to show, you'd need to add that "-fr" to the markdown link when you localize the page's text in crowdin. If there is a way, maybe this modification of the markdown can be made automatic in the future so that you automatically get the right image version. + # Automated builds with Github Actions Here is a working Github Action script to copy and customize: https://github.com/BloomBooks/bloom-docs/blob/master/.github/workflows/release.yml diff --git a/src/CustomTranformers.ts b/src/CustomTranformers.ts index 65df35f..109df37 100644 --- a/src/CustomTranformers.ts +++ b/src/CustomTranformers.ts @@ -51,7 +51,7 @@ async function notionColumnToMarkdown( notionClient: Client, block: ListBlockChildrenResponseResult ): Promise { - console.log(JSON.stringify(block)); + //console.log(JSON.stringify(block)); const { id, has_children } = block as any; // "any" because the notion api type system is complex with a union that don't know how to help TS to cope with if (!has_children) return ""; diff --git a/src/NotionImage.spec.ts b/src/NotionImage.spec.ts index d6f32cb..723b427 100644 --- a/src/NotionImage.spec.ts +++ b/src/NotionImage.spec.ts @@ -1,14 +1,134 @@ import { parseImageBlock } from "./NotionImage"; +const kPrimaryImageUrl = + "https://s3.us-west-2.amazonaws.com/primaryImage.png?Blah=foo"; + /* eslint-disable @typescript-eslint/require-await */ -describe("image caption", () => { - it("should find a caption", async () => { - const img = parseImageBlock(kImageBlock); - expect(img.url).toBe("https://someimage.png"); - }); +test("finds primary image url", async () => { + const img = parseImageBlock(kImageBlockWithTwoLocalizedImages); + expect(img.primaryUrl).toBe(kPrimaryImageUrl); +}); + +test("primary caption content after image links are removed", async () => { + const img = parseImageBlock( + kImageBlockWithTwoLocalizedImagesWrappedWithActualCaptionText + ); + expect(img.caption).toBe("Caption before images.\nCaption after images."); }); -const kImageBlock = { +test("gets localized image links", async () => { + const img = parseImageBlock( + kImageBlockWithTwoLocalizedImagesWrappedWithActualCaptionText + ); + expect(img.localizedUrls.length).toBe(2); + expect(img.localizedUrls[0].iso632Code).toBe("FR"); + expect(img.localizedUrls[1].iso632Code).toBe("ES"); + expect(img.localizedUrls[0].url).toBe("https://i.imgur.com/pYmE7OJ.png"); + expect(img.localizedUrls[1].url).toBe("https://i.imgur.com/8paSZ0i.png"); +}); + +const kImageBlockWithTwoLocalizedImagesWrappedWithActualCaptionText = { + object: "block", + id: "20b821b4-7c5b-41dc-8e30-92c23c125580", + parent: { type: "page_id", page_id: "9dd05134-0401-47f6-b159-1e6b76b9aad3" }, + created_time: "2022-07-25T23:05:00.000Z", + last_edited_time: "2022-07-26T15:31:00.000Z", + created_by: { object: "user", id: "11fb7f16-0560-4aee-ab88-ed75a850cfc4" }, + last_edited_by: { + object: "user", + id: "11fb7f16-0560-4aee-ab88-ed75a850cfc4", + }, + has_children: false, + archived: false, + type: "image", + image: { + caption: [ + { + type: "text", + text: { content: "Caption before images. fr-", link: null }, + annotations: { + bold: false, + italic: false, + strikethrough: false, + underline: false, + code: false, + color: "default", + }, + plain_text: "Caption before images.\nfr ", + href: null, + }, + { + type: "text", + text: { + content: "https://i.imgur.com/pYmE7OJ.png", + link: { url: "https://i.imgur.com/pYmE7OJ.png" }, + }, + annotations: { + bold: false, + italic: false, + strikethrough: false, + underline: false, + code: false, + color: "default", + }, + plain_text: "https://i.imgur.com/pYmE7OJ.png", + href: "https://i.imgur.com/pYmE7OJ.png", + }, + { + type: "text", + text: { content: " es-", link: null }, + annotations: { + bold: false, + italic: false, + strikethrough: false, + underline: false, + code: false, + color: "default", + }, + plain_text: "\nES ", + href: null, + }, + { + type: "text", + text: { + content: "https://i.imgur.com/8paSZ0i.png", + link: { url: "https://i.imgur.com/8paSZ0i.png" }, + }, + annotations: { + bold: false, + italic: false, + strikethrough: false, + underline: false, + code: false, + color: "default", + }, + plain_text: "https://i.imgur.com/8paSZ0i.png", + href: "https://i.imgur.com/8paSZ0i.png", + }, + { + type: "text", + text: { content: "\nCaption after images", link: null }, + annotations: { + bold: false, + italic: false, + strikethrough: false, + underline: false, + code: false, + color: "default", + }, + plain_text: "\nCaption after images.", + href: null, + }, + ], + type: "file", + file: { + url: kPrimaryImageUrl, + expiry_time: "2022-07-26T16:35:44.029Z", + }, + }, +}; + +const kImageBlockWithTwoLocalizedImages = { object: "block", id: "20b821b4-7c5b-41dc-8e30-92c23c125580", parent: { @@ -95,7 +215,7 @@ const kImageBlock = { ], type: "file", file: { - url: "https://someimage.png", + url: kPrimaryImageUrl, expiry_time: "2022-07-26T00:19:09.096Z", }, }, diff --git a/src/NotionImage.ts b/src/NotionImage.ts index 3226668..faad48f 100644 --- a/src/NotionImage.ts +++ b/src/NotionImage.ts @@ -23,10 +23,10 @@ export async function initImageHandling( } async function saveImage( - url: string, + imageSet: ImageSet, imageFolderPath: string ): Promise { - const response = await fetch(url); + const response = await fetch(imageSet.primaryUrl); const arrayBuffer = await response.arrayBuffer(); const buffer = Buffer.from(arrayBuffer); const fileType = await FileType.fromBuffer(buffer); @@ -35,29 +35,55 @@ async function saveImage( // Images that are stored by notion come to us with a complex url that changes over time, so we pick out the UUID that doesn't change. Example: // https://s3.us-west-2.amazonaws.com/secure.notion-static.com/d1058f46-4d2f-4292-8388-4ad393383439/Untitled.png?X-Amz-Algorithm=AWS4-HMAC-SHA256&X-Amz-Content-Sha256=UNSIGNED-PAYLOAD&X-Amz-Credential=AKIAT73L2G45EIPT3X45%2F20220516%2Fus-west-2%2Fs3%2Faws4_request&X-Amz-Date=20220516T233630Z&X-Amz-Expires=3600&X-Amz-Signature=f215704094fcc884d37073b0b108cf6d1c9da9b7d57a898da38bc30c30b4c4b5&X-Amz-SignedHeaders=host&x-id=GetObject - let thingToHash = url; - const m = /.*secure\.notion-static\.com\/(.*)\//gm.exec(url); + let thingToHash = imageSet.primaryUrl; + const m = /.*secure\.notion-static\.com\/(.*)\//gm.exec( + imageSet.primaryUrl + ); if (m && m.length > 1) { thingToHash = m[1]; } const hash = hashOfString(thingToHash); const outputFileName = `${hash}.${fileType.ext}`; - const path = imageFolderPath + "/" + outputFileName; - imageWasSeen(path); - if (!fs.pathExistsSync(path)) { - // // I think that this ok that this is writing async as we continue - console.log("Adding image " + path); - fs.createWriteStream(path).write(buffer); + const primaryFilePath = writeImageIfNew( + imageFolderPath, + outputFileName, + buffer + ); + + // if there are localized images, save them too, using the same + // name as the primary but with their language code attached + for (const localizedImage of imageSet.localizedUrls) { + const outputFileName = `${hash}-${localizedImage.iso632Code}.${fileType.ext}`; + console.log("Saving localized image to " + outputFileName); + const response = await fetch(localizedImage.url); + const arrayBuffer = await response.arrayBuffer(); + const buffer = Buffer.from(arrayBuffer); + writeImageIfNew(imageFolderPath, outputFileName, buffer); } - return outputFileName; + + return primaryFilePath; } else { console.error( - `Something wrong with the filetype extension on the blob we got from ${url}` + `Something wrong with the filetype extension on the blob we got from ${imageSet.primaryUrl}` ); return "error"; } } +function writeImageIfNew( + imageFolderPath: string, + outputFileName: string, + buffer: Buffer +) { + const path = imageFolderPath + "/" + outputFileName; + imageWasSeen(path); + if (!fs.pathExistsSync(path)) { + console.log("Adding image " + path); + fs.createWriteStream(path).write(buffer); // async but we're not waiting + } + return outputFileName; +} + function hashOfString(s: string) { let hash = 0; for (let i = 0; i < s.length; ++i) @@ -66,33 +92,58 @@ function hashOfString(s: string) { return Math.abs(hash); } -type LocalizableImageWithCaption = { - url: string; +// we parse a notion image and its caption into what we need, which includes any urls to localized versions of the image that may be embedded in the caption +type ImageSet = { + primaryUrl: string; caption?: string; localizedUrls: Array<{ iso632Code: string; url: string }>; }; -export function parseImageBlock(b: any): LocalizableImageWithCaption { - const img: LocalizableImageWithCaption = { - url: "", +export function parseImageBlock(b: any): ImageSet { + const imageSet: ImageSet = { + primaryUrl: "", + caption: "", localizedUrls: [], }; if ("file" in b.image) { - img.url = b.image.file.url; // image saved on notion (actually AWS) + imageSet.primaryUrl = b.image.file.url; // image saved on notion (actually AWS) } else { - img.url = b.image.external.url; // image still pointing somewhere else. I've see this happen when copying a Google Doc into Notion. Notion kep pointing at the google doc. + imageSet.primaryUrl = b.image.external.url; // image still pointing somewhere else. I've see this happen when copying a Google Doc into Notion. Notion kep pointing at the google doc. } - return img; + const mergedCaption: string = b.image.caption + // eslint-disable-next-line @typescript-eslint/no-unsafe-return + .map((c: any) => c.plain_text) + .join(""); + const lines = mergedCaption.split("\n"); + + // Example: + // Caption before images.\nfr https://i.imgur.com/pYmE7OJ.png\nES https://i.imgur.com/8paSZ0i.png\nCaption after images + + lines.forEach(l => { + const match = /\s*(..)\s*(https:\/\/.*)/.exec(l); + if (match) { + imageSet.localizedUrls.push({ + iso632Code: match[1].toUpperCase(), + url: match[2], + }); + } else { + imageSet.caption += l + "\n"; + } + }); + imageSet.caption = imageSet.caption?.trim(); + //console.log(JSON.stringify(imageSet, null, 2)); + + return imageSet; } // Download the image if we don't have it, give it a good name, and // change the src to point to our copy of the image. export async function processImageBlock(b: any): Promise { + //console.log(JSON.stringify(b)); const img = parseImageBlock(b); - const newPath = - imagePrefix + "/" + (await saveImage(img.url, imageOutputPath)); + const newPath = imagePrefix + "/" + (await saveImage(img, imageOutputPath)); // change the src to point to our copy of the image if ("file" in b.image) {