Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Create outetts JS library for text-to-speech in the browser, Node.js, Deno, Bun, etc. #42

Merged
merged 28 commits into from
Dec 8, 2024
Merged
Show file tree
Hide file tree
Changes from 24 commits
Commits
Show all changes
28 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
28 changes: 28 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
# Byte-compiled / optimized / DLL files
__pycache__/
*.py[cod]
*$py.class

# Distribution / packaging
.Python
build/
develop-eggs/
dist/
downloads/
eggs/
.eggs/
lib/
lib64/
parts/
sdist/
var/
wheels/
share/python-wheels/
*.egg-info/
.installed.cfg
*.egg
MANIFEST

# Node
node_modules
coverage
33 changes: 33 additions & 0 deletions examples/v2/node-tts/index.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@

import { HFModelConfig_v1, InterfaceHF } from "outetts";

// Configure the model
const model_config = new HFModelConfig_v1({
model_path: "onnx-community/OuteTTS-0.2-500M",
language: "en", // Supported languages in v0.2: en, zh, ja, ko
dtype: "fp32", // Supported dtypes: fp32, q8, q4
});

// Initialize the interface
const tts_interface = await InterfaceHF({ model_version: "0.2", cfg: model_config });

// Print available default speakers
tts_interface.print_default_speakers();

// Load a default speaker
const speaker = tts_interface.load_default_speaker("male_1");

// Generate speech
const output = await tts_interface.generate({
text: "Speech synthesis is the artificial production of human speech.",
temperature: 0.1, // Lower temperature values may result in a more stable tone
repetition_penalty: 1.1,
max_length: 4096,

// Optional: Use a speaker profile for consistent voice characteristics
// Without a speaker profile, the model will generate a voice with random characteristics
speaker,
});

// Save the synthesized speech to a file
output.save("output.wav");
28 changes: 28 additions & 0 deletions examples/v2/node-tts/package-lock.json

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

15 changes: 15 additions & 0 deletions examples/v2/node-tts/package.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
{
"name": "node-tts",
"version": "1.0.0",
"main": "index.js",
"type": "module",
"scripts": {
"test": "echo \"Error: no test specified\" && exit 1"
},
"author": "OuteAI",
"license": "Apache-2.0",
"description": "",
"dependencies": {
"outetts": "file:../../.."
}
}
3 changes: 3 additions & 0 deletions outetts.js/index.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
export const __version__ = "0.2.1";

export { InterfaceHF, HFModelConfig_v1 } from "./interface.js";
96 changes: 96 additions & 0 deletions outetts.js/interface.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,96 @@
import { InterfaceHF as _InterfaceHF_v1, HFModelConfig as HFModelConfig_v1 } from "./version/v1/interface.js";

const MODEL_CONFIGS = Object.freeze({
// TODO: Add support for 0.1 model
// 0.1: {
// tokenizer: "onnx-community/OuteTTS-0.1-350M",
// sizes: ["350M"],
// links: ["https://huggingface.co/onnx-community/OuteTTS-0.1-350M"],
// languages: ["en"],
// hf_interface: _InterfaceHF_v1,
// max_seq_length: 4096,
// },
0.2: {
tokenizer: "onnx-community/OuteTTS-0.2-500M",
sizes: ["500M"],
links: ["https://huggingface.co/onnx-community/OuteTTS-0.2-500M"],
languages: ["en", "ja", "ko", "zh"],
hf_interface: _InterfaceHF_v1,
max_seq_length: 4096,
},
});

function display_available_models() {
console.log("\n=== Available OuteTTS Models ===\n");
const separator = "-".repeat(50);
for (const [version, details] of Object.entries(MODEL_CONFIGS)) {
console.log(separator);
console.log(`Version: ${version}`);
console.log(`Supported Languages: ${details.languages.join(", ")}`);
console.log(`Model Sizes: ${details.sizes.join(", ")}`);
console.log("Available Formats: HF");
console.log(`Tokenizer: ${details.tokenizer}`);
console.log(`Links: ${details.links.join(", ")}`);
console.log(separator + "\n");
}
}

/**
* Retrieve the configuration for a given model version.
* @param {keyof MODEL_CONFIGS} version Version identifier for the model.
*/
function get_model_config(version) {
if (!(version in MODEL_CONFIGS)) {
throw new Error(
`Unsupported model version '${version}'. Supported versions are: ${Object.keys(MODEL_CONFIGS)}`,
);
}
return MODEL_CONFIGS[version];
}

function check_max_length(max_seq_length, model_max_seq_length) {
if (!max_seq_length) {
throw new Error("max_seq_length must be specified.");
}
if (max_seq_length > model_max_seq_length) {
throw new Error(
`Requested max_seq_length (${max_seq_length}) exceeds the maximum supported length (${model_max_seq_length}).`,
);
}
}

/**
* @typedef {Object} InterfaceConstructorArgs
* @property {string} model_version Version identifier for the model to be loaded.
* @property {HFModelConfig_v1} cfg Configuration object containing parameters.
* @property {string} cfg.tokenizer_path Path to the tokenizer.
* @property {string} cfg.language Language to be used.
* @property {number} cfg.max_seq_length Maximum sequence length.
*/

/**
* Creates and returns a Hugging Face model interface for OuteTTS.
*
* @param {InterfaceConstructorArgs} inputs
* @returns {Promise<_InterfaceHF_v1>} An instance of the interface based on the specified version.
* @throws {Error} If the specified language is not supported by the model version.
*/
export async function InterfaceHF({ model_version, cfg }) {
const config = get_model_config(model_version);
cfg.tokenizer_path = cfg.tokenizer_path || config.tokenizer;
const languages = config.languages;
if (!languages.includes(cfg.language)) {
throw new Error(
`Language '${cfg.language}' is not supported by model version '${model_version}'. Supported languages are: ${languages}`,
);
}
cfg.languages = languages;

const interface_class = config.hf_interface;

check_max_length(cfg.max_seq_length, config.max_seq_length);

return await interface_class.load(cfg);
}

export { HFModelConfig_v1 };
19 changes: 19 additions & 0 deletions outetts.js/version/v1/audio_codec.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
import { Tensor } from "@huggingface/transformers";

export class AudioCodec {
constructor(wavtokenizer) {
this.wavtokenizer = wavtokenizer;
this.sr = 24000;
}

/**
* Decode a list of audio codes into a waveform.
* @param {bigint[]} codes
* @returns {Promise<Tensor>} The generated waveform.
*/
async decode(codes) {
codes = new Tensor("int64", codes, [1, codes.length]);
const { waveform } = await this.wavtokenizer({ codes });
return waveform;
}
}
43 changes: 43 additions & 0 deletions outetts.js/version/v1/default_speakers.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
import en_female_1 from "./default_speakers/en_female_1.json" assert { type: "json" };
import en_female_2 from "./default_speakers/en_female_2.json" assert { type: "json" };
import en_male_1 from "./default_speakers/en_male_1.json" assert { type: "json" };
import en_male_2 from "./default_speakers/en_male_2.json" assert { type: "json" };
import en_male_3 from "./default_speakers/en_male_3.json" assert { type: "json" };
import en_male_4 from "./default_speakers/en_male_4.json" assert { type: "json" };
import ja_female_1 from "./default_speakers/ja_female_1.json" assert { type: "json" };
import ja_female_2 from "./default_speakers/ja_female_2.json" assert { type: "json" };
import ja_female_3 from "./default_speakers/ja_female_3.json" assert { type: "json" };
import ja_male_1 from "./default_speakers/ja_male_1.json" assert { type: "json" };
import ko_female_1 from "./default_speakers/ko_female_1.json" assert { type: "json" };
import ko_female_2 from "./default_speakers/ko_female_2.json" assert { type: "json" };
import ko_male_1 from "./default_speakers/ko_male_1.json" assert { type: "json" };
import ko_male_2 from "./default_speakers/ko_male_2.json" assert { type: "json" };
import zh_female_1 from "./default_speakers/zh_female_1.json" assert { type: "json" };
import zh_male_1 from "./default_speakers/zh_male_1.json" assert { type: "json" };

xenova marked this conversation as resolved.
Show resolved Hide resolved
export default {
en: {
male_1: en_male_1,
male_2: en_male_2,
male_3: en_male_3,
male_4: en_male_4,
female_1: en_female_1,
female_2: en_female_2,
},
ja: {
male_1: ja_male_1,
female_1: ja_female_1,
female_2: ja_female_2,
female_3: ja_female_3,
},
ko: {
male_1: ko_male_1,
male_2: ko_male_2,
female_1: ko_female_1,
female_2: ko_female_2,
},
zh: {
male_1: zh_male_1,
female_1: zh_female_1,
},
};
Loading