Skip to content

Commit

Permalink
fix: enhancing PDF feature
Browse files Browse the repository at this point in the history
  • Loading branch information
AhmadMuj committed Apr 11, 2024
1 parent 4520eb6 commit beeaf5a
Show file tree
Hide file tree
Showing 4 changed files with 20 additions and 33 deletions.
46 changes: 18 additions & 28 deletions apps/workers/openaiWorker.ts
Original file line number Diff line number Diff line change
Expand Up @@ -77,32 +77,22 @@ export class OpenAiWorker {
}
}

function promptFactory(type: "text" | "web" | "pdf" | "image") {
const typeContent = {
text: "User Note",
web: "HTML page",
pdf: "PDF file",
image: "Image",
};
return `I'm building a read-it-later app and I need your help with automatic tagging.
${
type === "web" || type === "pdf" || type === "text"
? `You are currently analyzing the content of a ${typeContent[type]}, please analyze the content after the sentence "CONTENT START HERE:"`
: `Please analyze the attached image`
}
Suggest relevant tags that describe its key themes, topics, and main ideas.
const IMAGE_PROMPT_BASE = `
I'm building a read-it-later app and I need your help with automatic tagging.
Please analyze the attached image and suggest relevant tags that describe its key themes, topics, and main ideas.
Aim for a variety of tags, including broad categories, specific keywords, and potential sub-genres. The tags language must be ${serverConfig.inference.inferredTagLang}.
If the tag is not generic enough, don't include it. Aim for 5-8 tags.
If there are no good tags, don't emit any.
If the tag is not generic enough, don't include it. Aim for 10-15 tags. If there are no good tags, don't emit any. You must respond in valid JSON
with the key "tags" and the value is list of tags. Don't wrap the response in a markdown code.`;

const TEXT_PROMPT_BASE = `
I'm building a read-it-later app and I need your help with automatic tagging.
Please analyze the text after the sentence "CONTENT START HERE:" and suggest relevant tags that describe its key themes, topics, and main ideas.
Aim for a variety of tags, including broad categories, specific keywords, and potential sub-genres. The tags language must be ${serverConfig.inference.inferredTagLang}. If it's a famous website
you may also include a tag for the website. If the tag is not generic enough, don't include it. Aim for 3-5 tags. If there are no good tags, don't emit any.
The content can include text for cookie consent and privacy policy, ignore those while tagging.
You must respond in JSON with the key "tags" and the value is list of tags.
In addition to the tags key, you should include a description key which includes a text that describes the content of the ${typeContent[type]}.
Don't wrap the response in a markdown code.`;
}

const TEXT_PROMPT = promptFactory("text");
const WEB_PROMPT = promptFactory("web");
const IMAGE_PROMPT = promptFactory("image");
const PDF_PROMPT = promptFactory("pdf");
CONTENT START HERE:
`;

function buildPrompt(
bookmark: NonNullable<Awaited<ReturnType<typeof fetchBookmark>>>,
Expand All @@ -119,7 +109,7 @@ function buildPrompt(
content = truncateContent(content);
}
return `
${WEB_PROMPT}
${TEXT_PROMPT_BASE}
URL: ${bookmark.link.url}
Title: ${bookmark.link.title ?? ""}
Description: ${bookmark.link.description ?? ""}
Expand All @@ -131,7 +121,7 @@ Content: ${content ?? ""}
const content = truncateContent(bookmark.text.text ?? "");
// TODO: Ensure that the content doesn't exceed the context length of openai
return `
${TEXT_PROMPT}
${TEXT_PROMPT_BASE}
${content}
`;
}
Expand Down Expand Up @@ -167,7 +157,7 @@ async function inferTagsFromImage(
}
const base64 = asset.toString("base64");
return inferenceClient.inferFromImage(
IMAGE_PROMPT,
IMAGE_PROMPT_BASE,
metadata.contentType,
base64,
);
Expand Down Expand Up @@ -202,7 +192,7 @@ async function inferTagsFromPDF(
})
.where(eq(bookmarkAssets.id, bookmark.id));

const prompt = `${PDF_PROMPT}
const prompt = `${TEXT_PROMPT_BASE}
Content: ${truncateContent(pdfParse.text)}
`;
return inferenceClient.inferFromText(prompt);
Expand Down
1 change: 0 additions & 1 deletion apps/workers/searchWorker.ts
Original file line number Diff line number Diff line change
Expand Up @@ -76,7 +76,6 @@ async function runIndex(
...(bookmark.asset
? {
content: bookmark.asset.content,
info: bookmark.asset.info,
metadata: bookmark.asset.metadata,
}
: undefined),
Expand Down
4 changes: 1 addition & 3 deletions apps/workers/utils.ts
Original file line number Diff line number Diff line change
Expand Up @@ -19,16 +19,14 @@ export function withTimeout<T, Ret>(

export async function readPDFText(buffer: Buffer): Promise<{
text: string;
metadata: Record<string, string | Record<string, string>>;
metadata: Record<string, string>;
}> {
return new Promise((resolve, reject) => {
// Need raw text flag represents as number (1), reference : https://github.com/modesty/pdf2json/issues/76#issuecomment-236569265
const pdfParser = new PDFParser(null, 1);
pdfParser.on("pdfParser_dataError", reject);
pdfParser.on("pdfParser_dataReady", (pdfData) => {
console.log(pdfParser);
// eslint-disable-next-line
console.log((pdfParser as any).getRawTextContent());
resolve({
// The type isn't set correctly, reference : https://github.com/modesty/pdf2json/issues/327
// eslint-disable-next-line
Expand Down
2 changes: 1 addition & 1 deletion packages/shared/search.ts
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ export const zBookmarkIdxSchema = z.object({
title: z.string().nullish(),
description: z.string().nullish(),
content: z.string().nullish(),
metadata: z.record(z.string()).nullish(),
metadata: z.string().nullish(),
fileName: z.string().nullish(),
createdAt: z.string().nullish(),
note: z.string().nullish(),
Expand Down

0 comments on commit beeaf5a

Please sign in to comment.