Skip to content

Commit

Permalink
add python function
Browse files Browse the repository at this point in the history
  • Loading branch information
wangzhaode committed Apr 25, 2024
1 parent d5fe08a commit 0f25187
Show file tree
Hide file tree
Showing 5 changed files with 296 additions and 46 deletions.
16 changes: 11 additions & 5 deletions include/llm.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -80,26 +80,33 @@ class Llm {
void warmup();
std::string response(const std::string& input_str, std::ostream* os = &std::cout, const char* end_with = nullptr);
std::string response_nohistory(const std::string& input_str, std::ostream* os = &std::cout, const char* end_with = nullptr);
void generate_init();
std::string generate(const std::vector<int>& input_ids, std::ostream* os, const char* end_with);
std::vector<int> generate(const std::vector<int>& input_ids);
int forward(const std::vector<int>& input_ids);
float load_progress() { return load_progress_; }
void reset();
void print_speed();
friend class Pipeline;
public:
std::vector<int> history_;
std::string model_name_ = "";
// config
int max_new_tokens_ = 1024;
int backend_type_ = 0;
int thread_num_ = 4;
bool low_precision_ = true;
bool chatml_ = true;
// forward info
int max_seq_len_ = 1024;
int prompt_len_ = 0;
int gen_seq_len_ = 0;
int all_seq_len_ = 0;
// time
int64_t prefill_us_ = 0;
int64_t decode_us_ = 0;
protected:
void response_init();
std::string response_impl(const std::vector<int>& input_ids, std::ostream* os, const char* end_with);
VARP embedding(const std::vector<int>& input_ids);
VARP txt_embedding(const std::vector<int>& input_ids);
int forward(const std::vector<int>& input_ids);
std::vector<int> tokenizer_encode(const std::string& input_str);
std::string decode(int id);
protected:
Expand All @@ -111,7 +118,6 @@ class Llm {
int layer_nums_ = 0;
int hidden_size_ = 4096;
std::vector<int> key_value_shape_ = {};
std::string model_name_ = "";
std::string disk_embedding_file_ = "";
// gen info
float load_progress_ = 0.f;
Expand Down
198 changes: 172 additions & 26 deletions python/mnnllm.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,23 @@

using namespace std;

// macros
#define def_attr(NAME) \
static PyObject* PyLLM_get_##NAME(LLM *self, void *closure) {\
return PyLong_FromLong(self->llm->NAME##_);\
}\
static int PyLLM_set_##NAME(LLM *self, PyObject *value, void *closure) {\
if (self->llm) {\
self->llm->NAME##_ = PyLong_AsLong(value);\
}\
return 0;\
}

#define register_attr(NAME) \
{#NAME, (getter)PyLLM_get_##NAME, (setter)PyLLM_set_##NAME, "___"#NAME"__", NULL},
// end

// type convert start
inline PyObject* string2Object(const std::string& str) {
#if PY_MAJOR_VERSION == 2
return PyString_FromString(str.c_str());
Expand All @@ -19,6 +36,114 @@ inline PyObject* string2Object(const std::string& str) {
#endif
}

static inline PyObject* toPyObj(string val) {
return string2Object(val);
}

static inline PyObject* toPyObj(int val) {
return PyLong_FromLong(val);
}

template <typename T, PyObject*(*Func)(T)=toPyObj>
static PyObject* toPyObj(vector<T> values) {
PyObject* obj = PyList_New(values.size());
for (int i = 0; i < values.size(); i++) {
PyList_SetItem(obj, i, Func(values[i]));
}
return obj;
}

/*
static inline PyObject* toPyArray(MNN::Express::VARP var) {
auto info = var->getInfo();
auto shape = info->dim;
size_t total_length = info->size;
auto var_ptr = const_cast<void*>(var->readMap<void>());
std::vector<npy_intp> npy_dims;
for(const auto dim : shape) {
npy_dims.push_back(dim);
}
// auto data = PyArray_SimpleNewFromData(npy_dims.size(), npy_dims.data(), NPY_FLOAT, ptr);
auto ndarray = PyArray_SimpleNew(npy_dims.size(), npy_dims.data(), NPY_FLOAT);
void* npy_ptr = PyArray_DATA((PyArrayObject*)ndarray);
std::memcpy(npy_ptr, var_ptr, total_length * sizeof(float));
return (PyObject*)ndarray;
}
static inline PyObject* toPyArray(std::vector<int> vec) {
npy_intp dims[1] = { static_cast<npy_intp>(vec.size()) };
auto ndarray = PyArray_SimpleNew(1, dims, NPY_INT);
void* npy_ptr = PyArray_DATA((PyArrayObject*)ndarray);
std::memcpy(npy_ptr, vec.data(), vec.size() * sizeof(int));
return (PyObject*)ndarray;
}
*/

static inline bool isInt(PyObject* obj) {
return PyLong_Check(obj)
#if PY_MAJOR_VERSION < 3
|| PyInt_Check(obj)
#endif
;
}

template <bool (*Func)(PyObject*)>
static bool isVec(PyObject* obj) {
if (PyTuple_Check(obj)) {
if (PyTuple_Size(obj) > 0) {
return Func(PyTuple_GetItem(obj, 0));
} else return true;
} else if (PyList_Check(obj)) {
if (PyList_Size(obj) > 0) {
return Func(PyList_GetItem(obj, 0));
} else return true;
}
return false;
}

static inline bool isInts(PyObject* obj) {
return isInt(obj) || isVec<isInt>(obj);
}

inline int64_t unpackLong(PyObject* obj) {
int overflow;
long long value = PyLong_AsLongLongAndOverflow(obj, &overflow);
return (int64_t)value;
}

static inline int toInt(PyObject* obj) {
return static_cast<int>(unpackLong(obj));
}

template <typename T, T (*Func)(PyObject*)>
static vector<T> toVec(PyObject* obj) {
vector<T> values;
if (PyTuple_Check(obj)) {
size_t size = PyTuple_Size(obj);
values.resize(size);
for (int i = 0; i < size; i++) {
values[i] = Func(PyTuple_GetItem(obj, i));
}
return values;
}
if (PyList_Check(obj)) {
size_t size = PyList_Size(obj);
values.resize(size);
for (int i = 0; i < size; i++) {
values[i] = Func(PyList_GetItem(obj, i));
}
return values;
}
values.push_back(Func(obj));
return values;
}

static inline std::vector<int> toInts(PyObject* obj) {
if (isInt(obj)) { return { toInt(obj) }; }
return toVec<int, toInt>(obj);
}
// type convert end

typedef struct {
PyObject_HEAD
Llm* llm;
Expand All @@ -30,17 +155,36 @@ static PyObject* PyLLM_new(struct _typeobject *type, PyObject *args, PyObject *k
}

static PyObject* Py_str(PyObject *self) {
char str[50];
LLM* llm = (LLM*)self;
sprintf(str, "Llm object: %p", llm->llm);
return Py_BuildValue("s", str);
if (!llm) {
Py_RETURN_NONE;
}
return toPyObj(llm->llm->model_name_);
}

static PyObject* PyLLM_load(LLM *self, PyObject *args) {
const char* model_dir = NULL;
if (!PyArg_ParseTuple(args, "s", &model_dir)) {
Py_RETURN_NONE;
}
self->llm->load(model_dir);
Py_RETURN_NONE;
}

static PyObject* PyLLM_generate(LLM *self, PyObject *args) {
PyObject *input_ids = nullptr;
if (!PyArg_ParseTuple(args, "O", &input_ids) && isInts(input_ids)) {
Py_RETURN_NONE;
}
auto output_ids = self->llm->generate(toInts(input_ids));
return toPyObj<int, toPyObj>(output_ids);
}

static PyObject* PyLLM_response(LLM *self, PyObject *args) {
const char* query = NULL;
int stream = 0;
if (!PyArg_ParseTuple(args, "s|p", &query, &stream)) {
return NULL;
Py_RETURN_NONE;
}
LlmStreamBuffer buffer(nullptr);
std::ostream null_os(&buffer);
Expand All @@ -49,24 +193,24 @@ static PyObject* PyLLM_response(LLM *self, PyObject *args) {
}

static PyMethodDef PyLLM_methods[] = {
{"response", (PyCFunction)PyLLM_response, METH_VARARGS, "response without hsitory."},
{"load", (PyCFunction)PyLLM_load, METH_VARARGS, "load model from `dir`."},
{"generate", (PyCFunction)PyLLM_generate, METH_VARARGS, "generate `output_ids` by `input_ids`."},
{"response", (PyCFunction)PyLLM_response, METH_VARARGS, "response `query` without hsitory."},
{NULL} /* Sentinel */
};


static PyObject* PyLLM_get_mgl(LLM *self, void *closure) {
return PyLong_FromLong(self->llm->max_seq_len_);
}

static int PyLLM_set_mgl(LLM *self, PyObject *value, void *closure) {
if (self->llm) {
self->llm->max_seq_len_ = (int)PyLong_AsLong(value);
}
return 0;
}
def_attr(backend_type)
def_attr(thread_num)
def_attr(low_precision)
def_attr(chatml)
def_attr(max_new_tokens)

static PyGetSetDef PyLLM_getsetters[] = {
{"max_gen_len", (getter)PyLLM_get_mgl, (setter)PyLLM_set_mgl, "___max_gen_len___", NULL},
register_attr(backend_type)
register_attr(thread_num)
register_attr(low_precision)
register_attr(chatml)
register_attr(max_new_tokens)
{NULL} /* Sentinel */
};

Expand All @@ -90,7 +234,7 @@ static PyTypeObject PyLLM = {
0, /*tp_getattro*/
0, /*tp_setattro*/
0, /*tp_as_buffer*/
Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HEAPTYPE, /*tp_flags*/
Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /*tp_flags*/
"LLM is mnn-llm's `Llm` python wrapper", /* tp_doc */
0, /* tp_traverse */
0, /* tp_clear */
Expand All @@ -111,11 +255,11 @@ static PyTypeObject PyLLM = {
PyLLM_new, /* tp_new */
};

static PyObject *py_load(PyObject *self, PyObject *args) {
static PyObject* py_create(PyObject *self, PyObject *args) {
if (!PyTuple_Size(args)) {
return NULL;
}
const char *model_dir = NULL;
const char* model_dir = NULL;
const char* model_type = "auto";
if (!PyArg_ParseTuple(args, "s|s", &model_dir, &model_type)) {
return NULL;
Expand All @@ -125,19 +269,19 @@ static PyObject *py_load(PyObject *self, PyObject *args) {
return NULL;
}
llm->llm = Llm::createLLM(model_dir, model_type);
llm->llm->load(model_dir);
// llm->llm->load(model_dir);
return (PyObject*)llm;
}

static PyMethodDef Methods[] = {
{"load", py_load, METH_VARARGS},
{NULL, NULL}
{"create", py_create, METH_VARARGS},
{NULL, NULL}
};

static struct PyModuleDef mnnllmModule = {
PyModuleDef_HEAD_INIT,
"cmnnllm", /*module name*/
"", /* module documentation, may be NULL */
"mnnllm cpython module.", /* module documentation, may be NULL */
-1, /* size of per-interpreter state of the module, or -1 if the module keeps state in global variables. */
Methods
};
Expand All @@ -147,10 +291,12 @@ static void def(PyObject* m, PyMethodDef* method) {
}

PyMODINIT_FUNC PyInit_cmnnllm(void) {
PyObject *m = PyModule_Create(&mnnllmModule);
if (PyType_Ready(&PyLLM) < 0) {
PyErr_SetString(PyExc_Exception, "init LLM: PyType_Ready PyLLM failed");
PyErr_SetString(PyExc_Exception, "init LLM: PyType_Ready PyLLM failed.");
return NULL;
}
PyObject *m = PyModule_Create(&mnnllmModule);
// _import_array();
PyModule_AddObject(m, "LLM", (PyObject *)&PyLLM);
def(m, &Methods[0]);
return m;
Expand Down
Loading

0 comments on commit 0f25187

Please sign in to comment.