diff --git a/app/client/api.ts b/app/client/api.ts index 01f13339..62e85be1 100644 --- a/app/client/api.ts +++ b/app/client/api.ts @@ -43,7 +43,7 @@ export interface LLMUsage { total: number; } -export interface LLMModel { +export interface ModelRecord { name: string; display_name: string; provider?: string; @@ -52,6 +52,10 @@ export interface LLMModel { context_length?: string; is_default?: boolean; family?: string; + vram_required_MB?: number; + buffer_size_required_bytes?: number; + low_resource_required?: boolean; + required_features?: string[]; } export abstract class LLMApi { diff --git a/app/constant.ts b/app/constant.ts index af5ffa59..cc8bbc1d 100644 --- a/app/constant.ts +++ b/app/constant.ts @@ -1,4 +1,4 @@ -import { LLMModel } from "./client/api"; +import { ModelRecord } from "./client/api"; export const OWNER = "mlc-ai"; export const REPO = "web-llm-chat"; @@ -64,91 +64,157 @@ Latex inline format: \\(x^2\\) Latex block format: $$e=mc^2$$ `; -export const DEFAULT_MODELS: LLMModel[] = [ +export const DEFAULT_MODELS: ModelRecord[] = [ + // Llama-3 { - name: "Llama-3-8B-Instruct-q4f32_1-MLC", - display_name: "Llama 3", + name: "Llama-3-8B-Instruct-q4f32_1-MLC-1k", + display_name: "Llama", provider: "Meta", size: "8B", quantization: "q4f32_1", + context_length: "1k", is_default: true, family: "Llama 3", + vram_required_MB: 5295.7, + low_resource_required: true, }, { - name: "Llama-3-8B-Instruct-q4f16_1-MLC", - display_name: "Llama 3", + name: "Llama-3-8B-Instruct-q4f16_1-MLC-1k", + display_name: "Llama", provider: "Meta", size: "8B", quantization: "q4f16_1", + context_length: "1k", family: "Llama 3", + vram_required_MB: 4598.34, + low_resource_required: true, }, { - name: "Llama-3-8B-Instruct-q4f32_1-MLC-1k", - display_name: "Llama 3", + name: "Llama-3-8B-Instruct-q4f32_1-MLC", + display_name: "Llama", provider: "Meta", size: "8B", quantization: "q4f32_1", - context_length: "1k", family: "Llama 3", + vram_required_MB: 6101.01, + low_resource_required: false, }, { - name: "Llama-3-8B-Instruct-q4f16_1-MLC-1k", - display_name: "Llama 3", + name: "Llama-3-8B-Instruct-q4f16_1-MLC", + display_name: "Llama", provider: "Meta", size: "8B", quantization: "q4f16_1", - context_length: "1k", family: "Llama 3", + vram_required_MB: 5001.0, + low_resource_required: false, }, { name: "Llama-3-70B-Instruct-q3f16_1-MLC", - display_name: "Llama 3", + display_name: "Llama", provider: "Meta", size: "70B", quantization: "q3f16_1", family: "Llama 3", + vram_required_MB: 31153.13, + low_resource_required: false, }, + // Phi3-mini-instruct + { + name: "Phi-3-mini-4k-instruct-q4f16_1-MLC", + display_name: "Phi", + provider: "MLC", + quantization: "q4f16_1", + family: "Phi 3 Mini", + vram_required_MB: 3672.07, + low_resource_required: false, + }, + { + name: "Phi-3-mini-4k-instruct-q4f32_1-MLC", + display_name: "Phi", + provider: "MLC", + quantization: "q4f32_1", + family: "Phi 3 Mini", + vram_required_MB: 5483.12, + low_resource_required: false, + }, + { + name: "Phi-3-mini-4k-instruct-q4f16_1-MLC-1k", + display_name: "Phi", + provider: "MLC", + quantization: "q4f16_1", + context_length: "1k", + family: "Phi 3 Mini", + vram_required_MB: 2520.07, + low_resource_required: true, + }, + { + name: "Phi-3-mini-4k-instruct-q4f32_1-MLC-1k", + display_name: "Phi", + provider: "MLC", + quantization: "q4f32_1", + context_length: "1k", + family: "Phi 3 Mini", + vram_required_MB: 3179.12, + low_resource_required: true, + }, + // Llama-2 { name: "Llama-2-7b-chat-hf-q4f32_1-MLC-1k", - display_name: "Llama 2", + display_name: "Llama", provider: "Meta", size: "7B", quantization: "q4f32_1", context_length: "1k", family: "Llama 2", + vram_required_MB: 5284.01, + low_resource_required: false, }, { name: "Llama-2-7b-chat-hf-q4f16_1-MLC-1k", - display_name: "Llama 2", + display_name: "Llama", provider: "Meta", size: "7B", quantization: "q4f16_1", context_length: "1k", family: "Llama 2", + vram_required_MB: 4618.52, + low_resource_required: false, + required_features: ["shader-f16"], }, { name: "Llama-2-7b-chat-hf-q4f32_1-MLC", - display_name: "Llama 2", + display_name: "Llama", provider: "Meta", size: "7B", quantization: "q4f32_1", family: "Llama 2", + vram_required_MB: 9109.03, + low_resource_required: false, }, { name: "Llama-2-7b-chat-hf-q4f16_1-MLC", - display_name: "Llama 2", + display_name: "Llama", provider: "Meta", size: "7B", + quantization: "q4f16_1", family: "Llama 2", + vram_required_MB: 6749.02, + low_resource_required: false, + required_features: ["shader-f16"], }, { name: "Llama-2-13b-chat-hf-q4f16_1-MLC", - display_name: "Llama 2", + display_name: "Llama", provider: "Meta", size: "13B", quantization: "q4f16_1", family: "Llama 2", + vram_required_MB: 11814.09, + low_resource_required: false, + required_features: ["shader-f16"], }, + // Mistral variants { name: "WizardMath-7B-V1.1-q4f16_1-MLC", display_name: "WizardMath", @@ -156,6 +222,9 @@ export const DEFAULT_MODELS: LLMModel[] = [ size: "7B", quantization: "q4f16_1", family: "WizardMath", + vram_required_MB: 6079.02, + low_resource_required: false, + required_features: ["shader-f16"], }, { name: "Mistral-7B-Instruct-v0.2-q4f16_1-MLC", @@ -164,31 +233,65 @@ export const DEFAULT_MODELS: LLMModel[] = [ size: "7B", quantization: "q4f16_1", family: "Mistral", + vram_required_MB: 6079.02, + low_resource_required: false, + required_features: ["shader-f16"], }, { name: "OpenHermes-2.5-Mistral-7B-q4f16_1-MLC", - display_name: "OpenHermes 2.5", + display_name: "OpenHermes", provider: "NousResearch", size: "7B", quantization: "q4f16_1", family: "Hermes", + vram_required_MB: 6079.02, + low_resource_required: false, + required_features: ["shader-f16"], }, { name: "NeuralHermes-2.5-Mistral-7B-q4f16_1-MLC", - display_name: "NeuralHermes 2.5", + display_name: "NeuralHermes", provider: "Maxime Labonne", size: "7B", quantization: "q4f16_1", family: "Hermes", + vram_required_MB: 6079.02, + low_resource_required: false, + required_features: ["shader-f16"], + }, + // Hermes-2 Pro + { + name: "Hermes-2-Pro-Llama-3-8B-q4f16_1-MLC", + display_name: "Hermes", + provider: "NousResearch", + size: "8B", + quantization: "q4f16_1", + family: "Hermes", + vram_required_MB: 4976.13, + low_resource_required: false, + }, + { + name: "Hermes-2-Pro-Llama-3-8B-q4f32_1-MLC", + display_name: "Hermes", + provider: "NousResearch", + size: "8B", + quantization: "q4f32_1", + family: "Hermes", + vram_required_MB: 6051.27, + low_resource_required: false, }, { name: "Hermes-2-Pro-Mistral-7B-q4f16_1-MLC", - display_name: "Hermes 2 Pro", + display_name: "Hermes", provider: "NousResearch", size: "7B", quantization: "q4f16_1", family: "Hermes", + vram_required_MB: 4033.28, + low_resource_required: false, + required_features: ["shader-f16"], }, + // Gemma-2B { name: "gemma-2b-it-q4f16_1-MLC", display_name: "Gemma", @@ -196,6 +299,10 @@ export const DEFAULT_MODELS: LLMModel[] = [ size: "2B", quantization: "q4f16_1", family: "Gemma", + vram_required_MB: 1476.52, + low_resource_required: false, + buffer_size_required_bytes: 262144000, + required_features: ["shader-f16"], }, { name: "gemma-2b-it-q4f32_1-MLC", @@ -204,6 +311,9 @@ export const DEFAULT_MODELS: LLMModel[] = [ size: "2B", quantization: "q4f32_1", family: "Gemma", + vram_required_MB: 1750.66, + low_resource_required: false, + buffer_size_required_bytes: 262144000, }, { name: "gemma-2b-it-q4f16_1-MLC-1k", @@ -213,6 +323,10 @@ export const DEFAULT_MODELS: LLMModel[] = [ quantization: "q4f16_1", context_length: "1k", family: "Gemma", + vram_required_MB: 1476.52, + low_resource_required: true, + buffer_size_required_bytes: 262144000, + required_features: ["shader-f16"], }, { name: "gemma-2b-it-q4f32_1-MLC-1k", @@ -222,75 +336,97 @@ export const DEFAULT_MODELS: LLMModel[] = [ quantization: "q4f32_1", context_length: "1k", family: "Gemma", + vram_required_MB: 1750.66, + low_resource_required: true, + buffer_size_required_bytes: 262144000, }, + // Qwen-1.5-1.8B { name: "Qwen1.5-1.8B-Chat-q4f16_1-MLC", - display_name: "Qwen 1.5", + display_name: "Qwen", provider: "Alibaba", size: "1.8B", quantization: "q4f16_1", family: "Qwen 1.5", + vram_required_MB: 2404.94, + low_resource_required: false, }, { name: "Qwen1.5-1.8B-Chat-q4f32_1-MLC", - display_name: "Qwen 1.5", + display_name: "Qwen", provider: "Alibaba", size: "1.8B", quantization: "q4f32_1", family: "Qwen 1.5", + vram_required_MB: 3313.63, + low_resource_required: false, }, { name: "Qwen1.5-1.8B-Chat-q4f16_1-MLC-1k", - display_name: "Qwen 1.5", + display_name: "Qwen", provider: "Alibaba", size: "1.8B", quantization: "q4f16_1", context_length: "1k", family: "Qwen 1.5", + vram_required_MB: 1828.94, + low_resource_required: true, }, { name: "Qwen1.5-1.8B-Chat-q4f32_1-MLC-1k", - display_name: "Qwen 1.5", + display_name: "Qwen", provider: "Alibaba", size: "1.8B", quantization: "q4f32_1", context_length: "1k", family: "Qwen 1.5", + vram_required_MB: 2161.63, + low_resource_required: true, }, + // StableLM-zephyr-1.6B { name: "stablelm-2-zephyr-1_6b-q4f16_1-MLC", - display_name: "StableLM 2 Zephyr", + display_name: "Zephyr", provider: "Hugging Face H4", size: "1.6B", quantization: "q4f16_1", family: "StableLM 2", + vram_required_MB: 2087.66, + low_resource_required: false, }, { name: "stablelm-2-zephyr-1_6b-q4f32_1-MLC", - display_name: "StableLM 2 Zephyr", + display_name: "Zephyr", provider: "Hugging Face H4", size: "1.6B", quantization: "q4f32_1", family: "StableLM 2", + vram_required_MB: 2999.33, + low_resource_required: false, }, { name: "stablelm-2-zephyr-1_6b-q4f16_1-MLC-1k", - display_name: "StableLM 2 Zephyr", + display_name: "Zephyr", provider: "Hugging Face H4", size: "1.6B", quantization: "q4f16_1", context_length: "1k", family: "StableLM 2", + vram_required_MB: 1511.66, + low_resource_required: true, }, { name: "stablelm-2-zephyr-1_6b-q4f32_1-MLC-1k", - display_name: "StableLM 2 Zephyr", + display_name: "Zephyr", provider: "Hugging Face H4", size: "1.6B", quantization: "q4f32_1", context_length: "1k", family: "StableLM 2", + vram_required_MB: 1847.33, + low_resource_required: true, }, + // RedPajama { name: "RedPajama-INCITE-Chat-3B-v1-q4f16_1-MLC", display_name: "RedPajama", @@ -298,6 +434,9 @@ export const DEFAULT_MODELS: LLMModel[] = [ size: "3B", quantization: "q4f16_1", family: "RedPajama", + vram_required_MB: 2972.09, + low_resource_required: false, + required_features: ["shader-f16"], }, { name: "RedPajama-INCITE-Chat-3B-v1-q4f32_1-MLC", @@ -306,6 +445,8 @@ export const DEFAULT_MODELS: LLMModel[] = [ size: "3B", quantization: "q4f32_1", family: "RedPajama", + vram_required_MB: 3928.09, + low_resource_required: false, }, { name: "RedPajama-INCITE-Chat-3B-v1-q4f16_1-MLC-1k", @@ -315,6 +456,9 @@ export const DEFAULT_MODELS: LLMModel[] = [ quantization: "q4f16_1", context_length: "1k", family: "RedPajama", + vram_required_MB: 2041.09, + low_resource_required: true, + required_features: ["shader-f16"], }, { name: "RedPajama-INCITE-Chat-3B-v1-q4f32_1-MLC-1k", @@ -324,82 +468,135 @@ export const DEFAULT_MODELS: LLMModel[] = [ quantization: "q4f32_1", context_length: "1k", family: "RedPajama", + vram_required_MB: 2558.09, + low_resource_required: true, }, + // Phi-2 { name: "phi-2-q4f16_1-MLC", - display_name: "Phi 2", + display_name: "Phi", provider: "Microsoft", quantization: "q4f16_1", family: "Phi 2", + vram_required_MB: 3053.97, + low_resource_required: false, + required_features: ["shader-f16"], }, { name: "phi-2-q4f32_1-MLC", - display_name: "Phi 2", + display_name: "Phi", provider: "Microsoft", quantization: "q4f32_1", family: "Phi 2", + vram_required_MB: 4032.48, + low_resource_required: false, }, { name: "phi-2-q4f16_1-MLC-1k", - display_name: "Phi 2", + display_name: "Phi", provider: "Microsoft", quantization: "q4f16_1", context_length: "1k", family: "Phi 2", + vram_required_MB: 2131.97, + low_resource_required: true, + required_features: ["shader-f16"], }, { name: "phi-2-q4f32_1-MLC-1k", - display_name: "Phi 2", + display_name: "Phi", provider: "Microsoft", quantization: "q4f32_1", context_length: "1k", family: "Phi 2", + vram_required_MB: 2740.48, + low_resource_required: true, }, + // Phi-1.5 { name: "phi-1_5-q4f16_1-MLC", - display_name: "Phi 1.5", + display_name: "Phi", provider: "Microsoft", quantization: "q4f16_1", family: "Phi 1.5", + vram_required_MB: 1210.09, + low_resource_required: true, + required_features: ["shader-f16"], }, { name: "phi-1_5-q4f32_1-MLC", - display_name: "Phi 1.5", + display_name: "Phi", provider: "Meta", quantization: "q4f32_1", family: "Phi 1.5", + vram_required_MB: 1682.09, + low_resource_required: true, }, { name: "phi-1_5-q4f16_1-MLC-1k", - display_name: "Phi 1.5", + display_name: "Phi", provider: "Microsoft", quantization: "q4f16_1", context_length: "1k", family: "Phi 1.5", + vram_required_MB: 1210.09, + low_resource_required: true, + required_features: ["shader-f16"], }, { name: "phi-1_5-q4f32_1-MLC-1k", - display_name: "Phi 1.5", + display_name: "Phi", provider: "Microsoft", quantization: "q4f32_1", context_length: "1k", family: "Phi 1.5", + vram_required_MB: 1682.09, + low_resource_required: true, }, + // TinyLlama { name: "TinyLlama-1.1B-Chat-v0.4-q4f16_1-MLC", - display_name: "TinyLlama 1.1B", + display_name: "TinyLlama", provider: "Zhang Peiyuan", size: "1.1B", quantization: "q4f16_1", family: "TinyLlama", + vram_required_MB: 697.24, + low_resource_required: true, + required_features: ["shader-f16"], }, { name: "TinyLlama-1.1B-Chat-v0.4-q4f32_1-MLC", - display_name: "TinyLlama 1.1B", + display_name: "TinyLlama", + provider: "Zhang Peiyuan", + size: "1.1B", + quantization: "q4f32_1", + family: "TinyLlama", + vram_required_MB: 839.98, + low_resource_required: true, + }, + { + name: "TinyLlama-1.1B-Chat-v0.4-q4f16_1-MLC-1k", + display_name: "TinyLlama", + provider: "Zhang Peiyuan", + size: "1.1B", + quantization: "q4f16_1", + context_length: "1k", + family: "TinyLlama", + vram_required_MB: 675.24, + low_resource_required: true, + required_features: ["shader-f16"], + }, + { + name: "TinyLlama-1.1B-Chat-v0.4-q4f32_1-MLC-1k", + display_name: "TinyLlama", provider: "Zhang Peiyuan", size: "1.1B", quantization: "q4f32_1", + context_length: "1k", family: "TinyLlama", + vram_required_MB: 795.98, + low_resource_required: true, }, ]; diff --git a/app/store/config.ts b/app/store/config.ts index 5e42d679..d9453228 100644 --- a/app/store/config.ts +++ b/app/store/config.ts @@ -1,5 +1,5 @@ import { LogLevel } from "@neet-nestor/web-llm"; -import { LLMModel } from "../client/api"; +import { ModelRecord } from "../client/api"; import { DEFAULT_INPUT_TEMPLATE, DEFAULT_MODELS, @@ -48,7 +48,7 @@ export const DEFAULT_CONFIG = { cacheType: "cache" as CacheType, logLevel: "INFO" as LogLevel, customModels: "", - models: DEFAULT_MODELS as any as LLMModel[], + models: DEFAULT_MODELS as any as ModelRecord[], modelConfig: { model: DEFAULT_MODELS[0].name, @@ -110,13 +110,13 @@ export const useAppConfig = createPersistStore( set(() => ({ ...DEFAULT_CONFIG })); }, - mergeModels(newModels: LLMModel[]) { + mergeModels(newModels: ModelRecord[]) { if (!newModels || newModels.length === 0) { return; } const oldModels = get().models; - const modelMap: Record = {}; + const modelMap: Record = {}; for (const model of oldModels) { modelMap[model.name] = model; @@ -147,12 +147,12 @@ export const useAppConfig = createPersistStore( }), { name: StoreKey.Config, - version: 0.31, + version: 0.32, migrate: (persistedState, version) => { if (version < 0.3) { return { ...(persistedState as ChatConfig), - models: DEFAULT_MODELS as any as LLMModel[], + models: DEFAULT_MODELS as any as ModelRecord[], logLevel: "WARN", modelConfig: { @@ -176,6 +176,25 @@ export const useAppConfig = createPersistStore( ...{ logLevel: "INFO" }, }; } + if (version === 0.31) { + return { + ...(persistedState as ChatConfig), + models: DEFAULT_MODELS as any as ModelRecord[], + modelConfig: { + model: DEFAULT_MODELS[0].name, + temperature: 1.0, + top_p: 1, + max_tokens: 4000, + presence_penalty: 0, + frequency_penalty: 0, + sendMemory: true, + historyMessageCount: 4, + compressMessageLengthThreshold: 1000, + enableInjectSystemPrompts: false, + template: DEFAULT_INPUT_TEMPLATE, + }, + }; + } return persistedState as any; }, diff --git a/app/utils/hooks.ts b/app/utils/hooks.ts index 1a5bd3b1..11cbe8f2 100644 --- a/app/utils/hooks.ts +++ b/app/utils/hooks.ts @@ -1,9 +1,9 @@ import { useMemo } from "react"; import { useAppConfig } from "../store"; import { collectModels } from "./model"; -import { LLMModel } from "../client/api"; +import { ModelRecord } from "../client/api"; -export function useAllModels(): LLMModel[] { +export function useAllModels(): ModelRecord[] { const configStore = useAppConfig(); const models = useMemo(() => { return collectModels(configStore.models, configStore.customModels); diff --git a/app/utils/model.ts b/app/utils/model.ts index 7f8656c2..05a2da66 100644 --- a/app/utils/model.ts +++ b/app/utils/model.ts @@ -1,7 +1,7 @@ -import { LLMModel } from "../client/api"; +import { ModelRecord } from "../client/api"; export function collectModelTable( - models: readonly LLMModel[], + models: readonly ModelRecord[], customModels: string, ) { const modelTable: Record< @@ -9,7 +9,7 @@ export function collectModelTable( { name: string; display_name: string; - provider?: LLMModel["provider"]; // Marked as optional + provider?: ModelRecord["provider"]; // Marked as optional isDefault?: boolean; } > = {}; @@ -46,7 +46,7 @@ export function collectModelTable( * Generate full model table. */ export function collectModels( - models: readonly LLMModel[], + models: readonly ModelRecord[], customModels: string, ) { const modelTable = collectModelTable(models, customModels); diff --git a/package.json b/package.json index cc59a4fa..2cca469d 100644 --- a/package.json +++ b/package.json @@ -17,7 +17,7 @@ "dependencies": { "@fortaine/fetch-event-source": "^3.0.6", "@hello-pangea/dnd": "^16.5.0", - "@neet-nestor/web-llm": "^0.2.54", + "@neet-nestor/web-llm": "^0.2.55", "@serwist/next": "^9.0.2", "@svgr/webpack": "^6.5.1", "emoji-picker-react": "^4.9.2", diff --git a/yarn.lock b/yarn.lock index 93d44499..bd2fdb78 100644 --- a/yarn.lock +++ b/yarn.lock @@ -2089,10 +2089,10 @@ "@jridgewell/resolve-uri" "^3.1.0" "@jridgewell/sourcemap-codec" "^1.4.14" -"@neet-nestor/web-llm@^0.2.54": - version "0.2.54" - resolved "https://registry.yarnpkg.com/@neet-nestor/web-llm/-/web-llm-0.2.54.tgz#a1626ebd8faeab21cc16d08fb6af3d8b1d4df569" - integrity sha512-XTGjZKVlKtVMxgbAeQSlXwhG+EK++rEBeCVXbM26SL7kcYnSzwKyCkFUDIXprTjWLJ+JRXNx5FaxWQZIIi16yw== +"@neet-nestor/web-llm@^0.2.55": + version "0.2.55" + resolved "https://registry.yarnpkg.com/@neet-nestor/web-llm/-/web-llm-0.2.55.tgz#56a23faaaa43d1b5a4f63e6b0a54d85e66ebb5a3" + integrity sha512-J/qKFT8fpVs+Q1z3YJwTZLbWNjZtMZs7A9pUL9ogioI6bYQA/roxKyYkjTtio6x6n63leE+4E+LYOC6bb6OUWw== dependencies: loglevel "^1.9.1"