Skip to content

Commit

Permalink
feat: localizable images through urls in captions
Browse files Browse the repository at this point in the history
  • Loading branch information
hatton committed Jul 26, 2022
1 parent 5ceb7f2 commit 0f9fb80
Show file tree
Hide file tree
Showing 4 changed files with 203 additions and 30 deletions.
2 changes: 2 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,8 @@ notion-pull-mdx makes some attempt to keep the right order of things, but there

Localize your files in Crowdin (or whatever) based on the markdown files, not in Notion. For how to do this with Docusaurus, see [Docusaurus i18n](https://docusaurus.io/docs/i18n/crowdin).

You may also need to localize screenshots. Crowdin can also handle localizing assets, but this library currently supports a different approach. If you place for example `fr https:\\imgur.com\1234.png` in the caption of a screenshot in Notion, `notion-pull-mdx` will fetch that image and save it locally with the same name as the primary screenshot, but with "-fr" appended. So you'd get for example `static\img\9876.png` and `static\img\9876-fr.png`. To get the French version to show, you'd need to add that "-fr" to the markdown link when you localize the page's text in crowdin. If there is a way, maybe this modification of the markdown can be made automatic in the future so that you automatically get the right image version.

# Automated builds with Github Actions

Here is a working Github Action script to copy and customize: https://github.com/BloomBooks/bloom-docs/blob/master/.github/workflows/release.yml
2 changes: 1 addition & 1 deletion src/CustomTranformers.ts
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@ async function notionColumnToMarkdown(
notionClient: Client,
block: ListBlockChildrenResponseResult
): Promise<string> {
console.log(JSON.stringify(block));
//console.log(JSON.stringify(block));
const { id, has_children } = block as any; // "any" because the notion api type system is complex with a union that don't know how to help TS to cope with

if (!has_children) return "";
Expand Down
134 changes: 127 additions & 7 deletions src/NotionImage.spec.ts
Original file line number Diff line number Diff line change
@@ -1,14 +1,134 @@
import { parseImageBlock } from "./NotionImage";

const kPrimaryImageUrl =
"https://s3.us-west-2.amazonaws.com/primaryImage.png?Blah=foo";

/* eslint-disable @typescript-eslint/require-await */
describe("image caption", () => {
it("should find a caption", async () => {
const img = parseImageBlock(kImageBlock);
expect(img.url).toBe("https://someimage.png");
});
test("finds primary image url", async () => {
const img = parseImageBlock(kImageBlockWithTwoLocalizedImages);
expect(img.primaryUrl).toBe(kPrimaryImageUrl);
});

test("primary caption content after image links are removed", async () => {
const img = parseImageBlock(
kImageBlockWithTwoLocalizedImagesWrappedWithActualCaptionText
);
expect(img.caption).toBe("Caption before images.\nCaption after images.");
});

const kImageBlock = {
test("gets localized image links", async () => {
const img = parseImageBlock(
kImageBlockWithTwoLocalizedImagesWrappedWithActualCaptionText
);
expect(img.localizedUrls.length).toBe(2);
expect(img.localizedUrls[0].iso632Code).toBe("FR");
expect(img.localizedUrls[1].iso632Code).toBe("ES");
expect(img.localizedUrls[0].url).toBe("https://i.imgur.com/pYmE7OJ.png");
expect(img.localizedUrls[1].url).toBe("https://i.imgur.com/8paSZ0i.png");
});

const kImageBlockWithTwoLocalizedImagesWrappedWithActualCaptionText = {
object: "block",
id: "20b821b4-7c5b-41dc-8e30-92c23c125580",
parent: { type: "page_id", page_id: "9dd05134-0401-47f6-b159-1e6b76b9aad3" },
created_time: "2022-07-25T23:05:00.000Z",
last_edited_time: "2022-07-26T15:31:00.000Z",
created_by: { object: "user", id: "11fb7f16-0560-4aee-ab88-ed75a850cfc4" },
last_edited_by: {
object: "user",
id: "11fb7f16-0560-4aee-ab88-ed75a850cfc4",
},
has_children: false,
archived: false,
type: "image",
image: {
caption: [
{
type: "text",
text: { content: "Caption before images. fr-", link: null },
annotations: {
bold: false,
italic: false,
strikethrough: false,
underline: false,
code: false,
color: "default",
},
plain_text: "Caption before images.\nfr ",
href: null,
},
{
type: "text",
text: {
content: "https://i.imgur.com/pYmE7OJ.png",
link: { url: "https://i.imgur.com/pYmE7OJ.png" },
},
annotations: {
bold: false,
italic: false,
strikethrough: false,
underline: false,
code: false,
color: "default",
},
plain_text: "https://i.imgur.com/pYmE7OJ.png",
href: "https://i.imgur.com/pYmE7OJ.png",
},
{
type: "text",
text: { content: " es-", link: null },
annotations: {
bold: false,
italic: false,
strikethrough: false,
underline: false,
code: false,
color: "default",
},
plain_text: "\nES ",
href: null,
},
{
type: "text",
text: {
content: "https://i.imgur.com/8paSZ0i.png",
link: { url: "https://i.imgur.com/8paSZ0i.png" },
},
annotations: {
bold: false,
italic: false,
strikethrough: false,
underline: false,
code: false,
color: "default",
},
plain_text: "https://i.imgur.com/8paSZ0i.png",
href: "https://i.imgur.com/8paSZ0i.png",
},
{
type: "text",
text: { content: "\nCaption after images", link: null },
annotations: {
bold: false,
italic: false,
strikethrough: false,
underline: false,
code: false,
color: "default",
},
plain_text: "\nCaption after images.",
href: null,
},
],
type: "file",
file: {
url: kPrimaryImageUrl,
expiry_time: "2022-07-26T16:35:44.029Z",
},
},
};

const kImageBlockWithTwoLocalizedImages = {
object: "block",
id: "20b821b4-7c5b-41dc-8e30-92c23c125580",
parent: {
Expand Down Expand Up @@ -95,7 +215,7 @@ const kImageBlock = {
],
type: "file",
file: {
url: "https://someimage.png",
url: kPrimaryImageUrl,
expiry_time: "2022-07-26T00:19:09.096Z",
},
},
Expand Down
95 changes: 73 additions & 22 deletions src/NotionImage.ts
Original file line number Diff line number Diff line change
Expand Up @@ -23,10 +23,10 @@ export async function initImageHandling(
}

async function saveImage(
url: string,
imageSet: ImageSet,
imageFolderPath: string
): Promise<string> {
const response = await fetch(url);
const response = await fetch(imageSet.primaryUrl);
const arrayBuffer = await response.arrayBuffer();
const buffer = Buffer.from(arrayBuffer);
const fileType = await FileType.fromBuffer(buffer);
Expand All @@ -35,29 +35,55 @@ async function saveImage(
// Images that are stored by notion come to us with a complex url that changes over time, so we pick out the UUID that doesn't change. Example:
// https://s3.us-west-2.amazonaws.com/secure.notion-static.com/d1058f46-4d2f-4292-8388-4ad393383439/Untitled.png?X-Amz-Algorithm=AWS4-HMAC-SHA256&X-Amz-Content-Sha256=UNSIGNED-PAYLOAD&X-Amz-Credential=AKIAT73L2G45EIPT3X45%2F20220516%2Fus-west-2%2Fs3%2Faws4_request&X-Amz-Date=20220516T233630Z&X-Amz-Expires=3600&X-Amz-Signature=f215704094fcc884d37073b0b108cf6d1c9da9b7d57a898da38bc30c30b4c4b5&X-Amz-SignedHeaders=host&x-id=GetObject

let thingToHash = url;
const m = /.*secure\.notion-static\.com\/(.*)\//gm.exec(url);
let thingToHash = imageSet.primaryUrl;
const m = /.*secure\.notion-static\.com\/(.*)\//gm.exec(
imageSet.primaryUrl
);
if (m && m.length > 1) {
thingToHash = m[1];
}

const hash = hashOfString(thingToHash);
const outputFileName = `${hash}.${fileType.ext}`;
const path = imageFolderPath + "/" + outputFileName;
imageWasSeen(path);
if (!fs.pathExistsSync(path)) {
// // I think that this ok that this is writing async as we continue
console.log("Adding image " + path);
fs.createWriteStream(path).write(buffer);
const primaryFilePath = writeImageIfNew(
imageFolderPath,
outputFileName,
buffer
);

// if there are localized images, save them too, using the same
// name as the primary but with their language code attached
for (const localizedImage of imageSet.localizedUrls) {
const outputFileName = `${hash}-${localizedImage.iso632Code}.${fileType.ext}`;
console.log("Saving localized image to " + outputFileName);
const response = await fetch(localizedImage.url);
const arrayBuffer = await response.arrayBuffer();
const buffer = Buffer.from(arrayBuffer);
writeImageIfNew(imageFolderPath, outputFileName, buffer);
}
return outputFileName;

return primaryFilePath;
} else {
console.error(
`Something wrong with the filetype extension on the blob we got from ${url}`
`Something wrong with the filetype extension on the blob we got from ${imageSet.primaryUrl}`
);
return "error";
}
}
function writeImageIfNew(
imageFolderPath: string,
outputFileName: string,
buffer: Buffer
) {
const path = imageFolderPath + "/" + outputFileName;
imageWasSeen(path);
if (!fs.pathExistsSync(path)) {
console.log("Adding image " + path);
fs.createWriteStream(path).write(buffer); // async but we're not waiting
}
return outputFileName;
}

function hashOfString(s: string) {
let hash = 0;
for (let i = 0; i < s.length; ++i)
Expand All @@ -66,33 +92,58 @@ function hashOfString(s: string) {
return Math.abs(hash);
}

type LocalizableImageWithCaption = {
url: string;
// we parse a notion image and its caption into what we need, which includes any urls to localized versions of the image that may be embedded in the caption
type ImageSet = {
primaryUrl: string;
caption?: string;
localizedUrls: Array<{ iso632Code: string; url: string }>;
};
export function parseImageBlock(b: any): LocalizableImageWithCaption {
const img: LocalizableImageWithCaption = {
url: "",
export function parseImageBlock(b: any): ImageSet {
const imageSet: ImageSet = {
primaryUrl: "",
caption: "",
localizedUrls: [],
};

if ("file" in b.image) {
img.url = b.image.file.url; // image saved on notion (actually AWS)
imageSet.primaryUrl = b.image.file.url; // image saved on notion (actually AWS)
} else {
img.url = b.image.external.url; // image still pointing somewhere else. I've see this happen when copying a Google Doc into Notion. Notion kep pointing at the google doc.
imageSet.primaryUrl = b.image.external.url; // image still pointing somewhere else. I've see this happen when copying a Google Doc into Notion. Notion kep pointing at the google doc.
}

return img;
const mergedCaption: string = b.image.caption
// eslint-disable-next-line @typescript-eslint/no-unsafe-return
.map((c: any) => c.plain_text)
.join("");
const lines = mergedCaption.split("\n");

// Example:
// Caption before images.\nfr https://i.imgur.com/pYmE7OJ.png\nES https://i.imgur.com/8paSZ0i.png\nCaption after images

lines.forEach(l => {
const match = /\s*(..)\s*(https:\/\/.*)/.exec(l);
if (match) {
imageSet.localizedUrls.push({
iso632Code: match[1].toUpperCase(),
url: match[2],
});
} else {
imageSet.caption += l + "\n";
}
});
imageSet.caption = imageSet.caption?.trim();
//console.log(JSON.stringify(imageSet, null, 2));

return imageSet;
}

// Download the image if we don't have it, give it a good name, and
// change the src to point to our copy of the image.
export async function processImageBlock(b: any): Promise<void> {
//console.log(JSON.stringify(b));
const img = parseImageBlock(b);

const newPath =
imagePrefix + "/" + (await saveImage(img.url, imageOutputPath));
const newPath = imagePrefix + "/" + (await saveImage(img, imageOutputPath));

// change the src to point to our copy of the image
if ("file" in b.image) {
Expand Down

0 comments on commit 0f9fb80

Please sign in to comment.