Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: implement core structure #8

Merged
merged 33 commits into from
Jan 16, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
33 commits
Select commit Hold shift + click to select a range
836309a
feat: implement core structure
risu729 Jan 9, 2024
4cfe311
chore(bun.lockb): update lockfile
risu729 Jan 9, 2024
c6b6feb
feat: download and split a video file
risu729 Jan 10, 2024
733f5cb
feat: transcribe an interview using whisper
risu729 Jan 10, 2024
63b97aa
fix: add loggings
risu729 Jan 10, 2024
b5087f2
fix(src/transcribe.ts): skip splitting if unnecessary
risu729 Jan 10, 2024
279bc9f
Merge remote-tracking branch 'origin/main' into 6-implement-core-feature
risu729 Jan 12, 2024
32ad7af
feat: proofread the transcribed text
risu729 Jan 15, 2024
6d30f44
style(src/ai.ts): run biome
risu729 Jan 15, 2024
76a4fd4
Merge remote-tracking branch 'origin/main' into 6-implement-core-feature
risu729 Jan 15, 2024
921c1de
feat: upload result files to Google Drive
risu729 Jan 15, 2024
3a208f2
feat: reply file urls to command
risu729 Jan 15, 2024
2be81a4
fix(src/main.ts): remove activity since the bot never be online
risu729 Jan 15, 2024
f81b8e0
style: run biome
risu729 Jan 15, 2024
ad971f2
fix(src/ai.ts): define the max file size for Whisper API as const
risu729 Jan 15, 2024
088407a
fix(src/ai.ts): fix error message
risu729 Jan 15, 2024
6adabec
fix(src/commands.ts): fix type assumption
risu729 Jan 15, 2024
3b22458
feat(src/transcribe.ts): remove temp direcotry after completion
risu729 Jan 15, 2024
92a2971
fix(src/transcribe.ts): log file uploads
risu729 Jan 15, 2024
6811f28
fix(src/commands.ts): improve bot response
risu729 Jan 15, 2024
aa4564b
style(src/commands.ts): run biome
risu729 Jan 15, 2024
02b2dea
style(src/gdrive.ts): use {} instead of undefined for spread
risu729 Jan 15, 2024
1ee9ddc
fix(src/gdrive.ts): upload text instead of stream if the file is conv…
risu729 Jan 15, 2024
4f12e62
fix(src/transcribe.ts): remove duplicated logs
risu729 Jan 15, 2024
45a3a7b
fix(src/gdrive.ts): upload files with filename
risu729 Jan 15, 2024
63427c9
fix(src/gdrive.ts): specify mimeType if converting
risu729 Jan 15, 2024
c2240fd
fix(src/gdrive.ts): remove extension if converting
risu729 Jan 15, 2024
72dca31
fix(src/commands.ts): inline fields of command response
risu729 Jan 15, 2024
55757af
fix: use UUID as filename of temp files to avoid a whisper's error
risu729 Jan 15, 2024
79ecbf0
fix(src/transcribe.ts): use unique-string instead of UUID for filename
risu729 Jan 16, 2024
512f937
style: run biome
risu729 Jan 16, 2024
fd854b7
fix(src/gdrive.ts): fix filename to upload
risu729 Jan 16, 2024
d77c658
Merge remote-tracking branch 'origin/main' into 6-implement-core-feature
risu729 Jan 16, 2024
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 4 additions & 1 deletion biome.json
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,10 @@
},
"linter": {
"rules": {
"all": true
"all": true,
"nursery": {
"noNodejsModules": "off"
}
}
},
"json": {
Expand Down
Binary file modified bun.lockb
Binary file not shown.
4 changes: 4 additions & 0 deletions cspell.config.cjs
Original file line number Diff line number Diff line change
Expand Up @@ -21,5 +21,9 @@ module.exports = {
"knip",
"commitlint",
"automerge",
"openai",
"consola",
"gdrive",
"ffprobe",
],
};
2 changes: 2 additions & 0 deletions knip.config.ts
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,12 @@ import type { KnipConfig } from "knip";

const config: KnipConfig = {
ignoreDependencies: [
"bun",
// @commitlint/cli cannot be detected because its binary is named "commitlint"
// ref: https://knip.dev/guides/handling-issues/#example
"@commitlint/cli",
],
ignoreBinaries: ["screen"],
};

// biome-ignore lint/style/noDefaultExport:
Expand Down
15 changes: 15 additions & 0 deletions package.json
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,8 @@
"name": "interview-transcriber",
"private": true,
"scripts": {
"start": "bun src/main.ts",
"start:screen": "screen -DRS transcriber bun start",
"commit": "git-cz",
"check": "npm-run-all check:*",
"check:biome": "biome check --apply-unsafe .",
Expand All @@ -12,6 +14,17 @@
"ignore-sync": "ignore-sync .",
"prepare": "husky install"
},
"dependencies": {
"@google/generative-ai": "0.1.3",
"@googleapis/drive": "8.5.0",
"consola": "3.2.3",
"csv-parse": "5.5.3",
"discord.js": "14.14.1",
"fluent-ffmpeg": "2.1.2",
"mime": "4.0.1",
"openai": "4.24.1",
"unique-string": "3.0.0"
},
"devDependencies": {
"@biomejs/biome": "1.5.2",
"@commitlint/cli": "18.4.4",
Expand All @@ -21,6 +34,8 @@
"@cspell/cspell-types": "8.3.2",
"@tsconfig/bun": "1.0.1",
"@tsconfig/strictest": "2.0.2",
"@types/fluent-ffmpeg": "2.1.24",
"@types/node": "20.10.8",
"bun-types": "1.0.22",
"commitizen": "4.3.0",
"cspell": "8.3.2",
Expand Down
Empty file removed src/.gitkeep
Empty file.
115 changes: 115 additions & 0 deletions src/ai.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,115 @@
import { createReadStream } from "node:fs";
import { GoogleGenerativeAI } from "@google/generative-ai";
import { env } from "bun";
import openAi from "openai";
import { SupportedLanguages } from "./transcribe";

/**
* OpenAI API client.
*/
export const openaiClient = new openAi({
apiKey: env.OPENAI_API_KEY,
});

/**
* Maximum file size for Whisper API.
* @see https://platform.openai.com/docs/api-reference/speech-to-text
*/
export const whisperMaxFileSize = 25 * 1000 * 1000;

/**
* Gemini API client.
*/
export const geminiClient = new GoogleGenerativeAI(env.GEMINI_API_KEY);

/**
* Transcribe an audio file.
* @param audioFilePath Path to the audio file
* @param language Language of the audio file
* @returns Transcribed text segments
*/
export const transcribeAudioFile = async (
audioFilePath: string,
language: SupportedLanguages,
): Promise<string[]> => {
const response = (await openaiClient.audio.transcriptions.create({
file: createReadStream(audioFilePath),
model: "whisper-1",
language,
prompt:
language === "en"
? "Hello. This is an interview, and you transcribe it."
: "こんにちは。これはインタビューの録音で、文字起こしをします。",
// biome-ignore lint/style/useNamingConvention: library's naming convention
response_format: "verbose_json",
})) as openAi.Audio.Transcriptions.Transcription & {
segments: {
text: string;
}[];
}; // cast since the library doesn't support verbose_json

return response.segments.map((segment) => segment.text);
};

/**
* Proofread a transcription.
* @param transcription Transcription to proofread
* @param language Language of the transcription
* @param model AI model to use
* @param prompt System prompt to use
* @returns Proofread transcription
*/
export const proofreadTranscription = async <M extends "gpt-4" | "gemini-pro">(
transcription: string,
language: SupportedLanguages,
model: M,
): Promise<{ model: M; prompt: string; response: string }> => {
const systemPrompt = `You are a web media proofreader.
The text ${model === "gpt-4" ? "entered by the user" : "below"} is a transcription of the interview.
Follow the guide below and improve it.
- Remove redundant or repeating expressions.
- Remove fillers.
- Correct grammar errors.
- Replace unnatural or difficult wordings.
- Shorten sentences.
The output style should be the style of an interview, like \`interviewer: \` or \`interviewee\`.
${
language === "en"
? "The response must not include markdown syntax."
: "The response must be in Japanese without markdown syntax."
}`;

let result = "";
if (model === "gpt-4") {
const response = await openaiClient.chat.completions.create({
messages: [
{
role: "system",
content: systemPrompt,
},
{
role: "user",
content: transcription,
},
],
model,
});
result = response.choices[0]?.message.content ?? "";
} else {
const response = await geminiClient
.getGenerativeModel({
model,
})
.generateContent(`${systemPrompt}\n\n---\n\n${transcription}`);
result = response.response.text();
}
if (!result) {
throw new Error("The response is empty.");
}

return {
model,
prompt: systemPrompt,
response: result,
};
};
Loading