Skip to content

Commit

Permalink
add index model
Browse files Browse the repository at this point in the history
  • Loading branch information
Judd committed Jun 23, 2024
1 parent e97d1bc commit e2d5342
Show file tree
Hide file tree
Showing 6 changed files with 122 additions and 0 deletions.
1 change: 1 addition & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ pure C++ implementation based on [@ggerganov](https://github.com/ggerganov)'s [g

**What's New:**

* 2024-06-23: Index
* 2024-06-17: DeepSeek-Coder-V2 (Instruct & Base)
* 2024-06-15: [Tool calling](./docs/tool_calling.md)
* 2024-06-07: Qwen2
Expand Down
24 changes: 24 additions & 0 deletions convert.py
Original file line number Diff line number Diff line change
Expand Up @@ -118,6 +118,8 @@ class ModelType(Enum):

XVERSE = 0x1900

Index = 0x1a00

BCE_Embedding = 0x10000100
BCE_ReRanker = 0x10000101
BGE_M3 = 0x10000102
Expand Down Expand Up @@ -3144,6 +3146,26 @@ def get_weight_names(config):

return weight_names

class IndexConverter(BaseConverter):
MODEL_TYPE = ModelType.Index

@classmethod
def pp(cls, config, name: str, tensor):
if name == 'lm_head.weight':
return nn.Parameter(nn.functional.normalize(tensor)) if config.norm_head else 0
else:
return Llama3Converter.pp(config, name, tensor)

@staticmethod
def dump_config(f, config, ggml_type):
config.rope_theta = 10000.0

Llama3Converter.dump_config(f, config, ggml_type)

@staticmethod
def get_weight_names(config):
return Llama3Converter.get_weight_names(config)

def convert_grok_1_base(args, vocab, ggml_type):
def ffn_size(emb_size, widening_factor):
_ffn_size = int(widening_factor * emb_size) * 2 // 3
Expand Down Expand Up @@ -3476,6 +3498,8 @@ def main():
DeepSeekV2Converter.MODEL_TYPE = ModelType.DeepSeekV2
print("DeelseekV2 is not fully supported yet!!!!")
DeepSeekV2Converter.convert(config, model_files, vocab, ggml_type, args.save_path)
elif arch == 'IndexForCausalLM':
IndexConverter.convert(config, model_files, vocab, ggml_type, args.save_path)
else:
raise Exception(f'unknown model_type: {arch}')

Expand Down
1 change: 1 addition & 0 deletions docs/models.md
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
* [x] TigerBot: [Chat-7B](https://huggingface.co/TigerResearch/tigerbot-7b-chat), [Chat-13B](https://huggingface.co/TigerResearch/tigerbot-13b-chat-v5) (`-a TigerBot`)
* [x] CodeFuse-DeepSeek: [33B](https://huggingface.co/codefuse-ai/CodeFuse-DeepSeek-33B) (`-a CodeFuseDeepSeek`)
* [x] MAP-Neo: [Instruct-7B](https://huggingface.co/m-a-p/neo_7b_instruct_v0.1) (`-a MAP-Neo`)
* [*] Index: [Chat-1.9B](https://huggingface.co/IndexTeam/Index-1.9B-Chat), [Character-1.9B](https://huggingface.co/IndexTeam/Index-1.9B-Character)

For other models that using `LlamaForCausalLM` architecture, for example, [aiXcoder-7B](https://huggingface.co/aiXcoder/aixcoder-7b-base), try `-a Yi`.

Expand Down
11 changes: 11 additions & 0 deletions models.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -156,6 +156,8 @@ namespace chatllm

MODEL_TYPE_XVERSE = 0x1900,

MODEL_TYPE_INDEX = 0x1a00,

MODEL_TYPE_BCE_Embedding = 0x10000100,
MODEL_TYPE_BCE_ReRanker = 0x10000101,
MODEL_TYPE_BGE_M3 = 0x10000102,
Expand Down Expand Up @@ -308,6 +310,8 @@ namespace chatllm
return "StarCoder2";
case MODEL_TYPE_XVERSE:
return "XVERSE";
case MODEL_TYPE_INDEX:
return "Index";
default:
CHATLLM_THROW << "unknown model type: " << model_type;
return "???";
Expand Down Expand Up @@ -1289,6 +1293,11 @@ namespace chatllm
#include "models/xverse.cpp"
}

namespace index
{
#include "models/index.cpp"
}

template <class Config>
void load_config(ModelLoader &loader, Config &config, const ModelObject::extra_args &args)
{
Expand Down Expand Up @@ -1558,6 +1567,8 @@ namespace chatllm
\
CASE(XVERSE, xverse::dense, 1) \
\
CASE(INDEX, index, 1) \
\
CASE(BCE_Embedding, bce::embedding, 1) \
CASE(BCE_ReRanker, bce::ranker, 1) \
CASE(BGE_M3, bge::embedding, 1) \
Expand Down
60 changes: 60 additions & 0 deletions models/index.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
typedef llama::v3::Config Config;

class ChatHistoryEncoder : public BaseHistoryEncoder
{
public:
void append_sys_prompt(std::vector<int> &ids) const override;
void append_pair(int round_idx, const std::string &user, const std::string &ai, std::vector<int> &ids) const override;
void do_append_user(int round_idx, const std::string &user, std::vector<int> &ids) const override;
};

static ChatHistoryEncoder _chat_encoder;

class Tokenizer : public llama::v2::Tokenizer
{
public:
Tokenizer(const Config &config)
: llama::v2::Tokenizer(config, &_chat_encoder)
{
sys_prompt = "";
resevered_0_token_id = 3;
resevered_1_token_id = 4;
}
public:
int resevered_0_token_id;
int resevered_1_token_id;
};

class ConditionalGeneration : public llama::v3::ConditionalGeneration
{
public:
ConditionalGeneration() = default;
ConditionalGeneration(const Config &config)
: llama::v3::ConditionalGeneration(config, ModelType::MODEL_TYPE_INDEX)
{}
};

void ChatHistoryEncoder::append_sys_prompt(std::vector<int> &ids) const
{
if (tokenizer->get_system_prompt().size() > 0)
{
ids.push_back(tokenizer->pad_token_id);
tokenizer->encode(tokenizer->get_system_prompt(), ids);
}

}

void ChatHistoryEncoder::append_pair(int round_idx, const std::string &user, const std::string &ai, std::vector<int> &ids) const
{
Tokenizer *tok = dynamic_cast<Tokenizer *>(tokenizer);
do_append_user(round_idx, user, ids);
tok->encode(ai, ids);
}

void ChatHistoryEncoder::do_append_user(int round_idx, const std::string &user, std::vector<int> &ids) const
{
Tokenizer *tok = dynamic_cast<Tokenizer *>(tokenizer);
ids.push_back(tok->resevered_0_token_id);
tok->encode(user, ids);
ids.push_back(tok->resevered_1_token_id);
}
25 changes: 25 additions & 0 deletions scripts/models.json
Original file line number Diff line number Diff line change
@@ -1,4 +1,29 @@
{
"index": {
"brief": "LLM developed by Bilibili.",
"default": "1.9b-chat",
"license": "https://huggingface.co/IndexTeam/Index-1.9B-Chat/blob/main/LICENSE",
"variants": {
"1.9b-chat": {
"default": "q8",
"quantized": {
"q8": {
"size": 2309982912,
"url": "chatllm_quantized_index/index.bin"
}
}
},
"1.9b-character": {
"default": "q8",
"quantized": {
"q8": {
"size": 2309982912,
"url": "chatllm_quantized_index/index-ch.bin"
}
}
}
}
},
"glm-4": {
"brief": "GLM-4-9B is the open-source version of the latest generation of pre-trained models in the GLM-4 series launched by Zhipu AI.",
"default": "9b",
Expand Down

0 comments on commit e2d5342

Please sign in to comment.