Skip to content

Commit

Permalink
update tokenizer to bpe.
Browse files Browse the repository at this point in the history
  • Loading branch information
wangzhaode committed Sep 26, 2023
1 parent 412b6fa commit 7a1dbf8
Show file tree
Hide file tree
Showing 23 changed files with 516 additions and 1,179,238 deletions.
2 changes: 1 addition & 1 deletion CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
cmake_minimum_required(VERSION 3.0)
project(mnn-llm)

set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++11")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++17")

option(BUILD_FOR_ANDROID "Build for android whith mini memory mode." OFF)
option(WITH_CUDA "Enable CUDA support" OFF)
Expand Down
2 changes: 1 addition & 1 deletion android/app/src/main/java/com/mnn/llm/Chat.java
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
import java.io.Serializable;

public class Chat implements Serializable {
public native boolean Init(String modelDir, String tokenizerDir);
public native boolean Init(String modelDir);
public native boolean Ready();
public native float Progress();
public native String Submit(String input);
Expand Down
20 changes: 3 additions & 17 deletions android/app/src/main/java/com/mnn/llm/MainActivity.java
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,6 @@ public class MainActivity extends AppCompatActivity {
private TextView mProcessPercent;
// resource files
private String mModelDir = "/data/local/tmp/model";
private String mTokenizerDir = "";
private boolean mModelNeedDownload = true;
@Override
protected void onCreate(Bundle savedInstanceState) {
Expand Down Expand Up @@ -116,7 +115,6 @@ public void loadModel(View view) {
mLoadButton.setText("模型加载中 ...");
mProcessView.setVisibility(View.VISIBLE);
mChat = new Chat();
prepareFiles();
System.out.println("[MNN_DEBUG] is chat Ready: " + mChat.Ready());
Handler handler = new Handler() {
@Override
Expand All @@ -126,37 +124,25 @@ public void handleMessage(Message msg) {
}
};
// copy models
LoadThread loadT = new LoadThread(mChat, handler, mModelDir, mTokenizerDir);
LoadThread loadT = new LoadThread(mChat, handler, mModelDir);
loadT.start();
ProgressThread progressT = new ProgressThread(mChat, mProcessHandler);
progressT.start();
}

public void prepareFiles() {
System.out.println("MNN_DEBUG: prepareFiles Start");
try {
mTokenizerDir = Common.copyAssetResource2File(this, "tokenizer");
} catch (Exception e) {
System.out.println(e.toString());
}
System.out.println("MNN_DEBUG: prepareFiles End" + mModelDir + " # " + mTokenizerDir);
}
}

class LoadThread extends Thread {
private Chat mChat;
private Handler mHandler;
private String mModelDir;
private String mTokenizerDir;
LoadThread(Chat chat, Handler handler, String modelDir, String tokenizerDir) {
LoadThread(Chat chat, Handler handler, String modelDir) {
mChat = chat;
mHandler = handler;
mModelDir = modelDir;
mTokenizerDir = tokenizerDir;
}
public void run() {
super.run();
mChat.Init(mModelDir, mTokenizerDir);
mChat.Init(mModelDir);
mHandler.sendMessage(new Message());
}
}
Expand Down
5 changes: 2 additions & 3 deletions android/app/src/main/jni/llm_mnn_jni.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -24,12 +24,11 @@ JNIEXPORT void JNI_OnUnload(JavaVM* vm, void* reserved) {
__android_log_print(ANDROID_LOG_DEBUG, "MNN_DEBUG", "JNI_OnUnload");
}

JNIEXPORT jboolean JNICALL Java_com_mnn_llm_Chat_Init(JNIEnv* env, jobject thiz, jstring modelDir, jstring tokenizerDir) {
JNIEXPORT jboolean JNICALL Java_com_mnn_llm_Chat_Init(JNIEnv* env, jobject thiz, jstring modelDir) {
if (llm->load_progress() < 100) {
const char* model_dir = env->GetStringUTFChars(modelDir, 0);
const char* token_dir = env->GetStringUTFChars(tokenizerDir, 0);
llm = Llm::createLLM(model_dir);
llm->load(model_dir, token_dir);
llm->load(model_dir);
}
return JNI_TRUE;
}
Expand Down
3 changes: 1 addition & 2 deletions demo/cli_demo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -48,12 +48,11 @@ int main(int argc, const char* argv[]) {
app.add_option("-c,--cpusize", cpusize,"cpu memory size(G), default is 8G.");
app.add_option("-g,--gpusize", gpusize,"gpu memory size(G)");
app.add_option("-m,--model_dir", model_dir, "model directory");
app.add_option("-t,--tokenizer_dir", tokenizer_dir, "tokenizer directory");

CLI11_PARSE(app, argc, argv);
std::cout << "model path is " << model_dir << std::endl;
std::unique_ptr<Llm> llm(Llm::createLLM(model_dir));
llm->load(model_dir, tokenizer_dir);
llm->load(model_dir);
llm->response("你好");
return 0;
}
3 changes: 1 addition & 2 deletions demo/web_demo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -50,13 +50,12 @@ int main(int argc, const char* argv[]) {
app.add_option("-c,--cpusize", cpusize,"cpu memory size(G), default is 8G.");
app.add_option("-g,--gpusize", gpusize,"gpu memory size(G)");
app.add_option("-m,--model_dir", model_dir, "model directory");
app.add_option("-t,--tokenizer_dir", tokenizer_dir, "tokenizer directory");

CLI11_PARSE(app, argc, argv);

std::cout << "model path is " << model_dir << std::endl;
std::unique_ptr<Llm> llm(Llm::createLLM(model_dir));
llm->load(model_dir, tokenizer_dir);
llm->load(model_dir);

std::stringstream ss;
httplib::Server svr;
Expand Down
16 changes: 11 additions & 5 deletions include/llm.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -19,18 +19,22 @@
#include <MNN/expr/Module.hpp>
#include <MNN/expr/MathOp.hpp>
#include <MNN/expr/NeuralNetWorkOp.hpp>
#include "tokenizer.hpp"

using namespace MNN;
using namespace Express;

class Llm {
public:
Llm() {}
Llm() {
// default tokenier is senrencepiece
tokenizer_.reset(new Sentencepiece);
}
static Llm* createLLM(const std::string& path);
VARP gen_embedding(const std::vector<int>& input_ids);
void load(const std::string& model_dir, const std::string& tokenizer_dir);
void load(const std::string& model_dir);
int forward(const std::vector<int>& input_ids);
std::vector<int> tokenizer_encode(std::string input_str);
std::vector<int> tokenizer_encode(const std::string& input_str);
std::string decode(int id);
std::string response(const std::string& input_str, std::ostream* os = &std::cout);
float load_progress() { return load_progress_; }
Expand All @@ -52,14 +56,15 @@ class Llm {
int all_seq_len_ = 0;
int max_seq_len_ = 256;
float load_progress_ = 0.f;
// tokenizer
std::unique_ptr<Tokenizer> tokenizer_;
private:
// MNN Modules
std::shared_ptr<Executor::RuntimeManager> runtime_manager_;
std::vector<std::shared_ptr<Module>> modules_;
std::vector<VARP> past_key_values_;
// model dir
std::string model_dir_ = "../resource/models/fp16";
std::string tokenizer_dir_ = "../resource/tokenizer";
std::string model_dir_;
// tokenizer
std::vector<std::string> word_decoder_;
std::unordered_map<std::string, int> word_encoder_;
Expand Down Expand Up @@ -102,6 +107,7 @@ class Qwen_7b : public Llm {
model_name_ = "Qwen_7b";
layer_nums_ = 32;
key_value_shape_ = {2, 1, 0, 32, 128};
tokenizer_.reset(new Tiktoken);
}
private:
virtual std::vector<int> tokenizer(const std::string& query) override;
Expand Down
87 changes: 87 additions & 0 deletions include/tokenizer.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,87 @@
//
// tokenizer.hpp
//
// Created by MNN on 2023/09/25.
// ZhaodeWang
//

#ifndef TOKENIZER_hpp
#define TOKENIZER_hpp

#include <vector>
#include <memory>
#include <string>
#include <unordered_map>
#include <iostream>
#include <string_view>

class Tokenizer {
public:
Tokenizer() = default;
virtual bool load(const std::string& filename) = 0;
virtual std::vector<int> encode(const std::string& str) = 0;
virtual std::string decode(int id) = 0;
};

class Sentencepiece : public Tokenizer {
public:
Sentencepiece() = default;
virtual bool load(const std::string& filename) override;
virtual std::vector<int> encode(const std::string& str) override;
virtual std::string decode(int id) override;
private:
enum ModelType {
UNIGRAM = 1,
BPE = 2,
WORD = 3,
CHAR = 4
};
enum PieceType {
NORMAL = 1,
UNKNOWN = 2,
CONTROL = 3,
USER_DEFINED = 4,
UNUSED = 5,
BYTE = 6
};
struct SentencePiece {
std::string piece;
float score;
PieceType type = PieceType::NORMAL;
};
using EncodeResult = std::vector<std::pair<std::string_view, int>>;
private:
// model train type
ModelType type_ = BPE;
// byte fall back enable
bool byte_fall_back_ = true;
// unknown id.
int unk_id_ = 0;
// pieces from model
std::vector<SentencePiece> sentence_pieces_;
// piece -> id map for normal pieces
std::unordered_map<std::string, int> pieces_;
// piece -> id map for control, unknown, and byte pieces
std::unordered_map<std::string, int> reserved_id_map_;
private:
float get_score(int id) const;
bool is_unused(int id) const;
bool is_control(int id) const;
int piece_to_id(const std::string& w) const;
std::string byte_to_piece(unsigned char c) const;
EncodeResult bpe_encode(std::string_view str, float alpha = 0.f);
};

class Tiktoken : public Tokenizer {
public:
Tiktoken() = default;
virtual bool load(const std::string& filename) override;
virtual std::vector<int> encode(const std::string& str) override;
virtual std::string decode(int id) override;
private:
std::vector<std::string> decoder_;
std::vector<int> tokens_;
std::vector<int> token_ids_;
};

#endif // TOKENIZER_hpp
Loading

0 comments on commit 7a1dbf8

Please sign in to comment.