From e2d5342ba9cb5eefdd062317ee2afd5a9a7c4681 Mon Sep 17 00:00:00 2001 From: Judd Date: Sun, 23 Jun 2024 10:36:10 +0800 Subject: [PATCH] add index model --- README.md | 1 + convert.py | 24 ++++++++++++++++++ docs/models.md | 1 + models.cpp | 11 +++++++++ models/index.cpp | 60 +++++++++++++++++++++++++++++++++++++++++++++ scripts/models.json | 25 +++++++++++++++++++ 6 files changed, 122 insertions(+) create mode 100644 models/index.cpp diff --git a/README.md b/README.md index 6db0853..cc0fc59 100644 --- a/README.md +++ b/README.md @@ -13,6 +13,7 @@ pure C++ implementation based on [@ggerganov](https://github.com/ggerganov)'s [g **What's New:** +* 2024-06-23: Index * 2024-06-17: DeepSeek-Coder-V2 (Instruct & Base) * 2024-06-15: [Tool calling](./docs/tool_calling.md) * 2024-06-07: Qwen2 diff --git a/convert.py b/convert.py index 99570ff..30b76cc 100644 --- a/convert.py +++ b/convert.py @@ -118,6 +118,8 @@ class ModelType(Enum): XVERSE = 0x1900 + Index = 0x1a00 + BCE_Embedding = 0x10000100 BCE_ReRanker = 0x10000101 BGE_M3 = 0x10000102 @@ -3144,6 +3146,26 @@ def get_weight_names(config): return weight_names +class IndexConverter(BaseConverter): + MODEL_TYPE = ModelType.Index + + @classmethod + def pp(cls, config, name: str, tensor): + if name == 'lm_head.weight': + return nn.Parameter(nn.functional.normalize(tensor)) if config.norm_head else 0 + else: + return Llama3Converter.pp(config, name, tensor) + + @staticmethod + def dump_config(f, config, ggml_type): + config.rope_theta = 10000.0 + + Llama3Converter.dump_config(f, config, ggml_type) + + @staticmethod + def get_weight_names(config): + return Llama3Converter.get_weight_names(config) + def convert_grok_1_base(args, vocab, ggml_type): def ffn_size(emb_size, widening_factor): _ffn_size = int(widening_factor * emb_size) * 2 // 3 @@ -3476,6 +3498,8 @@ def main(): DeepSeekV2Converter.MODEL_TYPE = ModelType.DeepSeekV2 print("DeelseekV2 is not fully supported yet!!!!") DeepSeekV2Converter.convert(config, model_files, vocab, ggml_type, args.save_path) + elif arch == 'IndexForCausalLM': + IndexConverter.convert(config, model_files, vocab, ggml_type, args.save_path) else: raise Exception(f'unknown model_type: {arch}') diff --git a/docs/models.md b/docs/models.md index 02701e9..e473094 100644 --- a/docs/models.md +++ b/docs/models.md @@ -19,6 +19,7 @@ * [x] TigerBot: [Chat-7B](https://huggingface.co/TigerResearch/tigerbot-7b-chat), [Chat-13B](https://huggingface.co/TigerResearch/tigerbot-13b-chat-v5) (`-a TigerBot`) * [x] CodeFuse-DeepSeek: [33B](https://huggingface.co/codefuse-ai/CodeFuse-DeepSeek-33B) (`-a CodeFuseDeepSeek`) * [x] MAP-Neo: [Instruct-7B](https://huggingface.co/m-a-p/neo_7b_instruct_v0.1) (`-a MAP-Neo`) + * [*] Index: [Chat-1.9B](https://huggingface.co/IndexTeam/Index-1.9B-Chat), [Character-1.9B](https://huggingface.co/IndexTeam/Index-1.9B-Character) For other models that using `LlamaForCausalLM` architecture, for example, [aiXcoder-7B](https://huggingface.co/aiXcoder/aixcoder-7b-base), try `-a Yi`. diff --git a/models.cpp b/models.cpp index 3eefe52..36b5bfa 100644 --- a/models.cpp +++ b/models.cpp @@ -156,6 +156,8 @@ namespace chatllm MODEL_TYPE_XVERSE = 0x1900, + MODEL_TYPE_INDEX = 0x1a00, + MODEL_TYPE_BCE_Embedding = 0x10000100, MODEL_TYPE_BCE_ReRanker = 0x10000101, MODEL_TYPE_BGE_M3 = 0x10000102, @@ -308,6 +310,8 @@ namespace chatllm return "StarCoder2"; case MODEL_TYPE_XVERSE: return "XVERSE"; + case MODEL_TYPE_INDEX: + return "Index"; default: CHATLLM_THROW << "unknown model type: " << model_type; return "???"; @@ -1289,6 +1293,11 @@ namespace chatllm #include "models/xverse.cpp" } + namespace index + { + #include "models/index.cpp" + } + template void load_config(ModelLoader &loader, Config &config, const ModelObject::extra_args &args) { @@ -1558,6 +1567,8 @@ namespace chatllm \ CASE(XVERSE, xverse::dense, 1) \ \ + CASE(INDEX, index, 1) \ + \ CASE(BCE_Embedding, bce::embedding, 1) \ CASE(BCE_ReRanker, bce::ranker, 1) \ CASE(BGE_M3, bge::embedding, 1) \ diff --git a/models/index.cpp b/models/index.cpp new file mode 100644 index 0000000..336c906 --- /dev/null +++ b/models/index.cpp @@ -0,0 +1,60 @@ +typedef llama::v3::Config Config; + +class ChatHistoryEncoder : public BaseHistoryEncoder +{ +public: + void append_sys_prompt(std::vector &ids) const override; + void append_pair(int round_idx, const std::string &user, const std::string &ai, std::vector &ids) const override; + void do_append_user(int round_idx, const std::string &user, std::vector &ids) const override; +}; + +static ChatHistoryEncoder _chat_encoder; + +class Tokenizer : public llama::v2::Tokenizer +{ +public: + Tokenizer(const Config &config) + : llama::v2::Tokenizer(config, &_chat_encoder) + { + sys_prompt = ""; + resevered_0_token_id = 3; + resevered_1_token_id = 4; + } +public: + int resevered_0_token_id; + int resevered_1_token_id; +}; + +class ConditionalGeneration : public llama::v3::ConditionalGeneration +{ +public: + ConditionalGeneration() = default; + ConditionalGeneration(const Config &config) + : llama::v3::ConditionalGeneration(config, ModelType::MODEL_TYPE_INDEX) + {} +}; + +void ChatHistoryEncoder::append_sys_prompt(std::vector &ids) const +{ + if (tokenizer->get_system_prompt().size() > 0) + { + ids.push_back(tokenizer->pad_token_id); + tokenizer->encode(tokenizer->get_system_prompt(), ids); + } + +} + +void ChatHistoryEncoder::append_pair(int round_idx, const std::string &user, const std::string &ai, std::vector &ids) const +{ + Tokenizer *tok = dynamic_cast(tokenizer); + do_append_user(round_idx, user, ids); + tok->encode(ai, ids); +} + +void ChatHistoryEncoder::do_append_user(int round_idx, const std::string &user, std::vector &ids) const +{ + Tokenizer *tok = dynamic_cast(tokenizer); + ids.push_back(tok->resevered_0_token_id); + tok->encode(user, ids); + ids.push_back(tok->resevered_1_token_id); +} \ No newline at end of file diff --git a/scripts/models.json b/scripts/models.json index 78ead59..1d1f30f 100644 --- a/scripts/models.json +++ b/scripts/models.json @@ -1,4 +1,29 @@ { + "index": { + "brief": "LLM developed by Bilibili.", + "default": "1.9b-chat", + "license": "https://huggingface.co/IndexTeam/Index-1.9B-Chat/blob/main/LICENSE", + "variants": { + "1.9b-chat": { + "default": "q8", + "quantized": { + "q8": { + "size": 2309982912, + "url": "chatllm_quantized_index/index.bin" + } + } + }, + "1.9b-character": { + "default": "q8", + "quantized": { + "q8": { + "size": 2309982912, + "url": "chatllm_quantized_index/index-ch.bin" + } + } + } + } + }, "glm-4": { "brief": "GLM-4-9B is the open-source version of the latest generation of pre-trained models in the GLM-4 series launched by Zhipu AI.", "default": "9b",