diff --git a/app/constant.ts b/app/constant.ts index cc8bbc1d..a9dfe48a 100644 --- a/app/constant.ts +++ b/app/constant.ts @@ -65,7 +65,7 @@ Latex block format: $$e=mc^2$$ `; export const DEFAULT_MODELS: ModelRecord[] = [ - // Llama-3 + // Llama-3 8B { name: "Llama-3-8B-Instruct-q4f32_1-MLC-1k", display_name: "Llama", @@ -73,7 +73,6 @@ export const DEFAULT_MODELS: ModelRecord[] = [ size: "8B", quantization: "q4f32_1", context_length: "1k", - is_default: true, family: "Llama 3", vram_required_MB: 5295.7, low_resource_required: true, @@ -95,6 +94,7 @@ export const DEFAULT_MODELS: ModelRecord[] = [ provider: "Meta", size: "8B", quantization: "q4f32_1", + context_length: "4k", family: "Llama 3", vram_required_MB: 6101.01, low_resource_required: false, @@ -105,19 +105,45 @@ export const DEFAULT_MODELS: ModelRecord[] = [ provider: "Meta", size: "8B", quantization: "q4f16_1", + context_length: "4k", family: "Llama 3", vram_required_MB: 5001.0, low_resource_required: false, }, + // Hermes-2 Pro { - name: "Llama-3-70B-Instruct-q3f16_1-MLC", - display_name: "Llama", - provider: "Meta", - size: "70B", - quantization: "q3f16_1", - family: "Llama 3", - vram_required_MB: 31153.13, + name: "Hermes-2-Pro-Llama-3-8B-q4f16_1-MLC", + display_name: "Hermes", + provider: "NousResearch", + size: "8B", + quantization: "q4f16_1", + context_length: "4k", + family: "Hermes 2 Pro", + vram_required_MB: 4976.13, + low_resource_required: false, + }, + { + name: "Hermes-2-Pro-Llama-3-8B-q4f32_1-MLC", + display_name: "Hermes", + provider: "NousResearch", + size: "8B", + quantization: "q4f32_1", + context_length: "4k", + family: "Hermes 2 Pro", + vram_required_MB: 6051.27, + low_resource_required: false, + }, + { + name: "Hermes-2-Pro-Mistral-7B-q4f16_1-MLC", + display_name: "Hermes", + provider: "NousResearch", + size: "7B", + quantization: "q4f16_1", + context_length: "4k", + family: "Hermes 2 Pro", + vram_required_MB: 4033.28, low_resource_required: false, + required_features: ["shader-f16"], }, // Phi3-mini-instruct { @@ -125,6 +151,7 @@ export const DEFAULT_MODELS: ModelRecord[] = [ display_name: "Phi", provider: "MLC", quantization: "q4f16_1", + context_length: "4k", family: "Phi 3 Mini", vram_required_MB: 3672.07, low_resource_required: false, @@ -134,6 +161,7 @@ export const DEFAULT_MODELS: ModelRecord[] = [ display_name: "Phi", provider: "MLC", quantization: "q4f32_1", + context_length: "4k", family: "Phi 3 Mini", vram_required_MB: 5483.12, low_resource_required: false, @@ -158,73 +186,29 @@ export const DEFAULT_MODELS: ModelRecord[] = [ vram_required_MB: 3179.12, low_resource_required: true, }, - // Llama-2 - { - name: "Llama-2-7b-chat-hf-q4f32_1-MLC-1k", - display_name: "Llama", - provider: "Meta", - size: "7B", - quantization: "q4f32_1", - context_length: "1k", - family: "Llama 2", - vram_required_MB: 5284.01, - low_resource_required: false, - }, + // Mistral variants { - name: "Llama-2-7b-chat-hf-q4f16_1-MLC-1k", - display_name: "Llama", - provider: "Meta", + name: "Mistral-7B-Instruct-v0.3-q4f16_1-MLC", + display_name: "Mistral", + provider: "Mistral AI", size: "7B", quantization: "q4f16_1", - context_length: "1k", - family: "Llama 2", - vram_required_MB: 4618.52, + context_length: "4k", + family: "Mistral", + vram_required_MB: 4573.39, low_resource_required: false, required_features: ["shader-f16"], }, { - name: "Llama-2-7b-chat-hf-q4f32_1-MLC", - display_name: "Llama", - provider: "Meta", + name: "Mistral-7B-Instruct-v0.3-q4f32_1-MLC", + display_name: "Mistral", + provider: "Mistral AI", size: "7B", quantization: "q4f32_1", - family: "Llama 2", - vram_required_MB: 9109.03, - low_resource_required: false, - }, - { - name: "Llama-2-7b-chat-hf-q4f16_1-MLC", - display_name: "Llama", - provider: "Meta", - size: "7B", - quantization: "q4f16_1", - family: "Llama 2", - vram_required_MB: 6749.02, - low_resource_required: false, - required_features: ["shader-f16"], - }, - { - name: "Llama-2-13b-chat-hf-q4f16_1-MLC", - display_name: "Llama", - provider: "Meta", - size: "13B", - quantization: "q4f16_1", - family: "Llama 2", - vram_required_MB: 11814.09, - low_resource_required: false, - required_features: ["shader-f16"], - }, - // Mistral variants - { - name: "WizardMath-7B-V1.1-q4f16_1-MLC", - display_name: "WizardMath", - provider: "WizardLM", - size: "7B", - quantization: "q4f16_1", - family: "WizardMath", - vram_required_MB: 6079.02, + context_length: "4k", + family: "Mistral", + vram_required_MB: 5619.27, low_resource_required: false, - required_features: ["shader-f16"], }, { name: "Mistral-7B-Instruct-v0.2-q4f16_1-MLC", @@ -232,8 +216,9 @@ export const DEFAULT_MODELS: ModelRecord[] = [ provider: "Mistral AI", size: "7B", quantization: "q4f16_1", + context_length: "4k", family: "Mistral", - vram_required_MB: 6079.02, + vram_required_MB: 4573.39, low_resource_required: false, required_features: ["shader-f16"], }, @@ -243,8 +228,9 @@ export const DEFAULT_MODELS: ModelRecord[] = [ provider: "NousResearch", size: "7B", quantization: "q4f16_1", + context_length: "4k", family: "Hermes", - vram_required_MB: 6079.02, + vram_required_MB: 4573.39, low_resource_required: false, required_features: ["shader-f16"], }, @@ -254,42 +240,90 @@ export const DEFAULT_MODELS: ModelRecord[] = [ provider: "Maxime Labonne", size: "7B", quantization: "q4f16_1", + context_length: "4k", family: "Hermes", - vram_required_MB: 6079.02, + vram_required_MB: 4573.39, low_resource_required: false, required_features: ["shader-f16"], }, - // Hermes-2 Pro { - name: "Hermes-2-Pro-Llama-3-8B-q4f16_1-MLC", - display_name: "Hermes", - provider: "NousResearch", - size: "8B", + name: "WizardMath-7B-V1.1-q4f16_1-MLC", + display_name: "WizardMath", + provider: "WizardLM", + size: "7B", quantization: "q4f16_1", - family: "Hermes", - vram_required_MB: 4976.13, + context_length: "4k", + family: "WizardMath", + vram_required_MB: 4573.39, low_resource_required: false, + required_features: ["shader-f16"], }, + // Qwen-2 { - name: "Hermes-2-Pro-Llama-3-8B-q4f32_1-MLC", - display_name: "Hermes", - provider: "NousResearch", - size: "8B", + name: "Qwen2-0.5B-Instruct-q0f16-MLC", + display_name: "Qwen", + provider: "Alibaba", + size: "0.5B", + quantization: "q0f16", + context_length: "4k", + family: "Qwen 2", + vram_required_MB: 1624.12, + low_resource_required: true, + }, + { + name: "Qwen2-0.5B-Instruct-q0f32-MLC", + display_name: "Qwen", + provider: "Alibaba", + size: "0.5B", + quantization: "q0f32", + context_length: "4k", + family: "Qwen 2", + vram_required_MB: 2654.75, + low_resource_required: true, + }, + { + name: "Qwen2-1.5B-Instruct-q4f16_1-MLC", + display_name: "Qwen", + provider: "Alibaba", + size: "1.5B", + quantization: "q4f16_1", + context_length: "4k", + family: "Qwen 2", + vram_required_MB: 1629.75, + low_resource_required: true, + }, + { + name: "Qwen2-1.5B-Instruct-q4f32_1-MLC", + display_name: "Qwen", + provider: "Alibaba", + size: "1.5B", quantization: "q4f32_1", - family: "Hermes", - vram_required_MB: 6051.27, - low_resource_required: false, + context_length: "4k", + family: "Qwen 2", + vram_required_MB: 1888.97, + low_resource_required: true, }, { - name: "Hermes-2-Pro-Mistral-7B-q4f16_1-MLC", - display_name: "Hermes", - provider: "NousResearch", + name: "Qwen2-7B-Instruct-q4f16_1-MLC", + display_name: "Qwen", + provider: "Alibaba", size: "7B", quantization: "q4f16_1", - family: "Hermes", - vram_required_MB: 4033.28, + context_length: "4k", + family: "Qwen 2", + vram_required_MB: 5106.67, + low_resource_required: false, + }, + { + name: "Qwen2-7B-Instruct-q4f32_1-MLC", + display_name: "Qwen", + provider: "Alibaba", + size: "7B", + quantization: "q4f32_1", + context_length: "4k", + family: "Qwen 2", + vram_required_MB: 5900.09, low_resource_required: false, - required_features: ["shader-f16"], }, // Gemma-2B { @@ -298,6 +332,7 @@ export const DEFAULT_MODELS: ModelRecord[] = [ provider: "Google", size: "2B", quantization: "q4f16_1", + context_length: "4k", family: "Gemma", vram_required_MB: 1476.52, low_resource_required: false, @@ -310,6 +345,7 @@ export const DEFAULT_MODELS: ModelRecord[] = [ provider: "Google", size: "2B", quantization: "q4f32_1", + context_length: "4k", family: "Gemma", vram_required_MB: 1750.66, low_resource_required: false, @@ -340,74 +376,33 @@ export const DEFAULT_MODELS: ModelRecord[] = [ low_resource_required: true, buffer_size_required_bytes: 262144000, }, - // Qwen-1.5-1.8B - { - name: "Qwen1.5-1.8B-Chat-q4f16_1-MLC", - display_name: "Qwen", - provider: "Alibaba", - size: "1.8B", - quantization: "q4f16_1", - family: "Qwen 1.5", - vram_required_MB: 2404.94, - low_resource_required: false, - }, - { - name: "Qwen1.5-1.8B-Chat-q4f32_1-MLC", - display_name: "Qwen", - provider: "Alibaba", - size: "1.8B", - quantization: "q4f32_1", - family: "Qwen 1.5", - vram_required_MB: 3313.63, - low_resource_required: false, - }, - { - name: "Qwen1.5-1.8B-Chat-q4f16_1-MLC-1k", - display_name: "Qwen", - provider: "Alibaba", - size: "1.8B", - quantization: "q4f16_1", - context_length: "1k", - family: "Qwen 1.5", - vram_required_MB: 1828.94, - low_resource_required: true, - }, - { - name: "Qwen1.5-1.8B-Chat-q4f32_1-MLC-1k", - display_name: "Qwen", - provider: "Alibaba", - size: "1.8B", - quantization: "q4f32_1", - context_length: "1k", - family: "Qwen 1.5", - vram_required_MB: 2161.63, - low_resource_required: true, - }, // StableLM-zephyr-1.6B { name: "stablelm-2-zephyr-1_6b-q4f16_1-MLC", - display_name: "Zephyr", - provider: "Hugging Face H4", + display_name: "StableLM", + provider: "Hugging Face", size: "1.6B", quantization: "q4f16_1", + context_length: "4k", family: "StableLM 2", vram_required_MB: 2087.66, low_resource_required: false, }, { name: "stablelm-2-zephyr-1_6b-q4f32_1-MLC", - display_name: "Zephyr", - provider: "Hugging Face H4", + display_name: "StableLM", + provider: "Hugging Face", size: "1.6B", quantization: "q4f32_1", + context_length: "4k", family: "StableLM 2", vram_required_MB: 2999.33, low_resource_required: false, }, { name: "stablelm-2-zephyr-1_6b-q4f16_1-MLC-1k", - display_name: "Zephyr", - provider: "Hugging Face H4", + display_name: "StableLM", + provider: "Hugging Face", size: "1.6B", quantization: "q4f16_1", context_length: "1k", @@ -417,8 +412,8 @@ export const DEFAULT_MODELS: ModelRecord[] = [ }, { name: "stablelm-2-zephyr-1_6b-q4f32_1-MLC-1k", - display_name: "Zephyr", - provider: "Hugging Face H4", + display_name: "StableLM", + provider: "Hugging Face", size: "1.6B", quantization: "q4f32_1", context_length: "1k", @@ -433,6 +428,7 @@ export const DEFAULT_MODELS: ModelRecord[] = [ provider: "Together", size: "3B", quantization: "q4f16_1", + context_length: "2k", family: "RedPajama", vram_required_MB: 2972.09, low_resource_required: false, @@ -444,6 +440,7 @@ export const DEFAULT_MODELS: ModelRecord[] = [ provider: "Together", size: "3B", quantization: "q4f32_1", + context_length: "2k", family: "RedPajama", vram_required_MB: 3928.09, low_resource_required: false, @@ -471,12 +468,131 @@ export const DEFAULT_MODELS: ModelRecord[] = [ vram_required_MB: 2558.09, low_resource_required: true, }, + // TinyLlama v1.0 + { + name: "TinyLlama-1.1B-Chat-v1.0-q4f16_1-MLC", + display_name: "TinyLlama", + provider: "Zhang Peiyuan", + size: "1.1B", + quantization: "q4f16_1", + context_length: "2k", + family: "TinyLlama", + vram_required_MB: 697.24, + low_resource_required: true, + required_features: ["shader-f16"], + }, + { + name: "TinyLlama-1.1B-Chat-v1.0-q4f32_1-MLC", + display_name: "TinyLlama", + provider: "Zhang Peiyuan", + size: "1.1B", + quantization: "q4f32_1", + context_length: "2k", + family: "TinyLlama", + vram_required_MB: 839.98, + low_resource_required: true, + }, + { + name: "TinyLlama-1.1B-Chat-v1.0-q4f16_1-MLC-1k", + display_name: "TinyLlama", + provider: "Zhang Peiyuan", + size: "1.1B", + quantization: "q4f16_1", + context_length: "1k", + family: "TinyLlama", + vram_required_MB: 675.24, + low_resource_required: true, + required_features: ["shader-f16"], + }, + { + name: "TinyLlama-1.1B-Chat-v1.0-q4f32_1-MLC-1k", + display_name: "TinyLlama", + provider: "Zhang Peiyuan", + size: "1.1B", + quantization: "q4f32_1", + context_length: "1k", + family: "TinyLlama", + vram_required_MB: 795.98, + low_resource_required: true, + }, + // Llama-3 70B + { + name: "Llama-3-70B-Instruct-q3f16_1-MLC", + display_name: "Llama", + provider: "Meta", + size: "70B", + quantization: "q3f16_1", + context_length: "4k", + family: "Llama 3", + vram_required_MB: 31153.13, + low_resource_required: false, + }, + // Llama-2 + { + name: "Llama-2-7b-chat-hf-q4f32_1-MLC-1k", + display_name: "Llama", + provider: "Meta", + size: "7B", + quantization: "q4f32_1", + context_length: "1k", + family: "Llama 2", + vram_required_MB: 5284.01, + low_resource_required: false, + }, + { + name: "Llama-2-7b-chat-hf-q4f16_1-MLC-1k", + display_name: "Llama", + provider: "Meta", + size: "7B", + quantization: "q4f16_1", + context_length: "1k", + family: "Llama 2", + vram_required_MB: 4618.52, + low_resource_required: false, + required_features: ["shader-f16"], + }, + { + name: "Llama-2-7b-chat-hf-q4f32_1-MLC", + display_name: "Llama", + provider: "Meta", + size: "7B", + quantization: "q4f32_1", + context_length: "4k", + family: "Llama 2", + vram_required_MB: 9109.03, + low_resource_required: false, + }, + { + name: "Llama-2-7b-chat-hf-q4f16_1-MLC", + display_name: "Llama", + provider: "Meta", + size: "7B", + quantization: "q4f16_1", + context_length: "4k", + family: "Llama 2", + vram_required_MB: 6749.02, + low_resource_required: false, + required_features: ["shader-f16"], + }, + { + name: "Llama-2-13b-chat-hf-q4f16_1-MLC", + display_name: "Llama", + provider: "Meta", + size: "13B", + quantization: "q4f16_1", + context_length: "4k", + family: "Llama 2", + vram_required_MB: 11814.09, + low_resource_required: false, + required_features: ["shader-f16"], + }, // Phi-2 { name: "phi-2-q4f16_1-MLC", display_name: "Phi", provider: "Microsoft", quantization: "q4f16_1", + context_length: "2k", family: "Phi 2", vram_required_MB: 3053.97, low_resource_required: false, @@ -487,6 +603,7 @@ export const DEFAULT_MODELS: ModelRecord[] = [ display_name: "Phi", provider: "Microsoft", quantization: "q4f32_1", + context_length: "2k", family: "Phi 2", vram_required_MB: 4032.48, low_resource_required: false, @@ -518,6 +635,7 @@ export const DEFAULT_MODELS: ModelRecord[] = [ display_name: "Phi", provider: "Microsoft", quantization: "q4f16_1", + context_length: "2k", family: "Phi 1.5", vram_required_MB: 1210.09, low_resource_required: true, @@ -526,8 +644,9 @@ export const DEFAULT_MODELS: ModelRecord[] = [ { name: "phi-1_5-q4f32_1-MLC", display_name: "Phi", - provider: "Meta", + provider: "Microsoft", quantization: "q4f32_1", + context_length: "2k", family: "Phi 1.5", vram_required_MB: 1682.09, low_resource_required: true, @@ -553,13 +672,14 @@ export const DEFAULT_MODELS: ModelRecord[] = [ vram_required_MB: 1682.09, low_resource_required: true, }, - // TinyLlama + // TinyLlama v0.4 { name: "TinyLlama-1.1B-Chat-v0.4-q4f16_1-MLC", display_name: "TinyLlama", provider: "Zhang Peiyuan", size: "1.1B", quantization: "q4f16_1", + context_length: "2k", family: "TinyLlama", vram_required_MB: 697.24, low_resource_required: true, @@ -571,6 +691,7 @@ export const DEFAULT_MODELS: ModelRecord[] = [ provider: "Zhang Peiyuan", size: "1.1B", quantization: "q4f32_1", + context_length: "2k", family: "TinyLlama", vram_required_MB: 839.98, low_resource_required: true,