diff --git a/Cargo.toml b/Cargo.toml index 5eec5127c..024300da1 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -10,7 +10,7 @@ members = [ resolver = "2" [workspace.package] -version = "0.1.17" +version = "0.1.18" edition = "2021" description = "Fast and easy LLM serving." homepage = "https://github.com/EricLBuehler/mistral.rs" diff --git a/README.md b/README.md index d20419ec4..67403b95e 100644 --- a/README.md +++ b/README.md @@ -46,6 +46,7 @@ Please submit requests for new models [here](https://github.com/EricLBuehler/mis - φ³ 📷 Run the Phi 3 vision model: [documentation and guide here](docs/PHI3V.md) Mount Washington +
Credit
*After following installation instructions* @@ -197,10 +198,11 @@ Please submit more benchmarks via raising an issue! ## Installation and Build 1) Install required packages - - `openssl` (ex., `sudo apt install libssl-dev`) - - `pkg-config` (ex., `sudo apt install pkg-config`) + - `openssl` (ex. on Ubuntu, `sudo apt install libssl-dev`) + - `pkg-config` (ex. on Ubuntu, `sudo apt install pkg-config`) 2) Install Rust: https://rustup.rs/ + *Example on Ubuntu:* ```bash curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh source $HOME/.cargo/env diff --git a/docs/PHI3V.md b/docs/PHI3V.md index 0d8e5628a..d93ca15c6 100644 --- a/docs/PHI3V.md +++ b/docs/PHI3V.md @@ -2,10 +2,17 @@ The Phi 3 Vision Model has support in the Rust, Python, and HTTP APIs. The Phi 3 Vision Model supports ISQ for increased performance. +The Python and HTTP APIs support sending images as: +- URL +- Path to a local image +- [Base64](https://en.wikipedia.org/wiki/Base64) encoded string + +The Rust API takes an image from the [image](https://docs.rs/image/latest/image/index.html) crate. + > Note: The Phi 3 Vision model works best with one image although it is supported to send multiple images. > Note: when sending multiple images, they will be resized to the minimum dimension by which all will fit without cropping. -> Aspect ratio is not preserved. +> Aspect ratio is not preserved in that case. ## HTTP server You can find this example [here](../examples/server/phi3v.py). @@ -18,6 +25,7 @@ We support an OpenAI compatible HTTP API for vision models. This example demonst **Image:** Mount Washington +
Credit
**Prompt:** ``` @@ -73,6 +81,9 @@ print(resp) ``` +- You can find an example of encoding the [image via base64 here](../examples/server/phi3v_base64.py). +- You can find an example of loading an [image locally base64 here](../examples/server/phi3v_local_img.py). + --- ## Rust @@ -201,4 +212,7 @@ res = runner.send_chat_completion_request( ) print(res.choices[0].message.content) print(res.usage) -``` \ No newline at end of file +``` + +- You can find an example of encoding the [image via base64 here](../examples/python/phi3v_base64.py). +- You can find an example of loading an [image locally base64 here](../examples/python/phi3v_local_img.py). diff --git a/examples/python/cookbook.ipynb b/examples/python/cookbook.ipynb index 214d9bb95..0a6ccd93d 100644 --- a/examples/python/cookbook.ipynb +++ b/examples/python/cookbook.ipynb @@ -14,7 +14,7 @@ "outputs": [], "source": [ "# First, install Rust: https://rustup.rs/\n", - "%pip install mistralrs-cuda" + "%pip install mistralrs-cuda -v" ] }, { diff --git a/examples/python/phi3v_base64.py b/examples/python/phi3v_base64.py new file mode 100644 index 000000000..97bfd3f4c --- /dev/null +++ b/examples/python/phi3v_base64.py @@ -0,0 +1,44 @@ +from mistralrs import Runner, Which, ChatCompletionRequest, VisionArchitecture +import base64 + +runner = Runner( + which=Which.VisionPlain( + model_id="microsoft/Phi-3-vision-128k-instruct", + tokenizer_json=None, + repeat_last_n=64, + arch=VisionArchitecture.Phi3V, + ), +) + +FILENAME = "picture.jpg" +with open(FILENAME, "rb") as image_file: + encoded_string = base64.b64encode(image_file.read()).decode("utf-8") + +res = runner.send_chat_completion_request( + ChatCompletionRequest( + model="phi3v", + messages=[ + { + "role": "user", + "content": [ + { + "type": "image_url", + "image_url": { + "url": str(encoded_string), + }, + }, + { + "type": "text", + "text": "<|image_1|>\nWhat is shown in this image?", + }, + ], + } + ], + max_tokens=256, + presence_penalty=1.0, + top_p=0.1, + temperature=0.1, + ) +) +print(res.choices[0].message.content) +print(res.usage) diff --git a/examples/python/phi3v_local_img.py b/examples/python/phi3v_local_img.py new file mode 100644 index 000000000..89da913b4 --- /dev/null +++ b/examples/python/phi3v_local_img.py @@ -0,0 +1,42 @@ +from mistralrs import Runner, Which, ChatCompletionRequest, VisionArchitecture +import base64 + +runner = Runner( + which=Which.VisionPlain( + model_id="microsoft/Phi-3-vision-128k-instruct", + tokenizer_json=None, + repeat_last_n=64, + arch=VisionArchitecture.Phi3V, + ), +) + +FILENAME = "picture.jpg" + +res = runner.send_chat_completion_request( + ChatCompletionRequest( + model="phi3v", + messages=[ + { + "role": "user", + "content": [ + { + "type": "image_url", + "image_url": { + "url": FILENAME, + }, + }, + { + "type": "text", + "text": "<|image_1|>\nWhat is shown in this image?", + }, + ], + } + ], + max_tokens=256, + presence_penalty=1.0, + top_p=0.1, + temperature=0.1, + ) +) +print(res.choices[0].message.content) +print(res.usage) diff --git a/examples/server/phi3v_base64.py b/examples/server/phi3v_base64.py new file mode 100644 index 000000000..22f0fa48e --- /dev/null +++ b/examples/server/phi3v_base64.py @@ -0,0 +1,69 @@ +import requests +import httpx +import textwrap, json +import base64 + + +def log_response(response: httpx.Response): + request = response.request + print(f"Request: {request.method} {request.url}") + print(" Headers:") + for key, value in request.headers.items(): + if key.lower() == "authorization": + value = "[...]" + if key.lower() == "cookie": + value = value.split("=")[0] + "=..." + print(f" {key}: {value}") + print(" Body:") + try: + request_body = json.loads(request.content) + print(textwrap.indent(json.dumps(request_body, indent=2), " ")) + except json.JSONDecodeError: + print(textwrap.indent(request.content.decode(), " ")) + print(f"Response: status_code={response.status_code}") + print(" Headers:") + for key, value in response.headers.items(): + if key.lower() == "set-cookie": + value = value.split("=")[0] + "=..." + print(f" {key}: {value}") + + +BASE_URL = "http://localhost:1234/v1" + +# Enable this to log requests and responses +# openai.http_client = httpx.Client( +# event_hooks={"request": [print], "response": [log_response]} +# ) + +FILENAME = "picture.jpg" +with open(FILENAME, "rb") as image_file: + encoded_string = base64.b64encode(image_file.read()).decode("utf-8") + +headers = { + "Content-Type": "application/json", +} + +payload = { + "model": "phi3v", + "messages": [ + { + "role": "user", + "content": [ + { + "type": "image_url", + "image_url": { + "url": str(encoded_string), + }, + }, + { + "type": "text", + "text": "<|image_1|>\nWhat is shown in this image?", + }, + ], + } + ], + "max_tokens": 300, +} + +response = requests.post(f"{BASE_URL}/chat/completions", headers=headers, json=payload) +print(response.json()) diff --git a/examples/server/phi3v_local_img.py b/examples/server/phi3v_local_img.py new file mode 100644 index 000000000..47051e7d3 --- /dev/null +++ b/examples/server/phi3v_local_img.py @@ -0,0 +1,67 @@ +import requests +import httpx +import textwrap, json +import base64 + + +def log_response(response: httpx.Response): + request = response.request + print(f"Request: {request.method} {request.url}") + print(" Headers:") + for key, value in request.headers.items(): + if key.lower() == "authorization": + value = "[...]" + if key.lower() == "cookie": + value = value.split("=")[0] + "=..." + print(f" {key}: {value}") + print(" Body:") + try: + request_body = json.loads(request.content) + print(textwrap.indent(json.dumps(request_body, indent=2), " ")) + except json.JSONDecodeError: + print(textwrap.indent(request.content.decode(), " ")) + print(f"Response: status_code={response.status_code}") + print(" Headers:") + for key, value in response.headers.items(): + if key.lower() == "set-cookie": + value = value.split("=")[0] + "=..." + print(f" {key}: {value}") + + +BASE_URL = "http://localhost:1234/v1" + +# Enable this to log requests and responses +# openai.http_client = httpx.Client( +# event_hooks={"request": [print], "response": [log_response]} +# ) + +FILENAME = "picture.jpg" + +headers = { + "Content-Type": "application/json", +} + +payload = { + "model": "phi3v", + "messages": [ + { + "role": "user", + "content": [ + { + "type": "image_url", + "image_url": { + "url": FILENAME, + }, + }, + { + "type": "text", + "text": "<|image_1|>\nWhat is shown in this image?", + }, + ], + } + ], + "max_tokens": 300, +} + +response = requests.post(f"{BASE_URL}/chat/completions", headers=headers, json=payload) +print(response.json()) diff --git a/mistralrs-bench/Cargo.toml b/mistralrs-bench/Cargo.toml index b7f262778..029c92f53 100644 --- a/mistralrs-bench/Cargo.toml +++ b/mistralrs-bench/Cargo.toml @@ -17,7 +17,7 @@ candle-core.workspace = true serde.workspace = true serde_json.workspace = true clap.workspace = true -mistralrs-core = { version = "0.1.17", path = "../mistralrs-core" } +mistralrs-core = { version = "0.1.18", path = "../mistralrs-core" } tracing.workspace = true either.workspace = true tokio.workspace = true diff --git a/mistralrs-pyo3/Cargo.toml b/mistralrs-pyo3/Cargo.toml index 6f17e9f39..3a299889e 100644 --- a/mistralrs-pyo3/Cargo.toml +++ b/mistralrs-pyo3/Cargo.toml @@ -17,7 +17,7 @@ doc = false [dependencies] pyo3.workspace = true -mistralrs-core = { version = "0.1.17", path = "../mistralrs-core", features = ["pyo3_macros"] } +mistralrs-core = { version = "0.1.18", path = "../mistralrs-core", features = ["pyo3_macros"] } serde.workspace = true serde_json.workspace = true candle-core.workspace = true diff --git a/mistralrs-pyo3/Cargo_template.toml b/mistralrs-pyo3/Cargo_template.toml index c6b3a5d6c..76626a08b 100644 --- a/mistralrs-pyo3/Cargo_template.toml +++ b/mistralrs-pyo3/Cargo_template.toml @@ -17,7 +17,7 @@ doc = false [dependencies] pyo3.workspace = true -mistralrs-core = { version = "0.1.17", path = "../mistralrs-core", features=["pyo3_macros","$feature_name"] } +mistralrs-core = { version = "0.1.18", path = "../mistralrs-core", features=["pyo3_macros","$feature_name"] } serde.workspace = true serde_json.workspace = true candle-core = { git = "https://github.com/EricLBuehler/candle.git", version = "0.6.0", rev = "f52e2347b6237d19ffd7af26315f543c22f9f286", features=["$feature_name"] } @@ -27,6 +27,9 @@ intel-mkl-src = { workspace = true, optional = true } either.workspace = true futures.workspace = true tokio.workspace = true +image.workspace = true +reqwest.workspace = true +base64.workspace = true [build-dependencies] pyo3-build-config = "0.21" diff --git a/mistralrs-pyo3/README.md b/mistralrs-pyo3/README.md index 5e058da37..aef3faac8 100644 --- a/mistralrs-pyo3/README.md +++ b/mistralrs-pyo3/README.md @@ -21,19 +21,19 @@ sudo apt install pkg-config - CUDA - `pip install mistralrs-cuda` + `pip install mistralrs-cuda -v` - Metal - `pip install mistralrs-metal` + `pip install mistralrs-metal -v` - Apple Accelerate - `pip install mistralrs-accelerate` + `pip install mistralrs-accelerate -v` - Intel MKL - `pip install mistralrs-mkl` + `pip install mistralrs-mkl -v` - Without accelerators - `pip install mistralrs` + `pip install mistralrs -v` All installations will install the `mistralrs` package. The suffix on the package installed by `pip` only controls the feature activation. diff --git a/mistralrs-pyo3/pyproject.toml b/mistralrs-pyo3/pyproject.toml index 29b66ebb0..bf036b698 100644 --- a/mistralrs-pyo3/pyproject.toml +++ b/mistralrs-pyo3/pyproject.toml @@ -4,7 +4,7 @@ build-backend = "maturin" [project] name = "mistralrs" -version = "0.1.17" +version = "0.1.18" requires-python = ">=3.8" classifiers = [ "Programming Language :: Rust", @@ -18,3 +18,4 @@ dynamic = ["description"] [tool.maturin] features = ["pyo3/extension-module"] +profile = "release" diff --git a/mistralrs-pyo3/pyproject_template.toml b/mistralrs-pyo3/pyproject_template.toml index 969f987d1..07f2a4920 100644 --- a/mistralrs-pyo3/pyproject_template.toml +++ b/mistralrs-pyo3/pyproject_template.toml @@ -4,7 +4,7 @@ build-backend = "maturin" [project] name = "$name" -version = "0.1.17" +version = "0.1.18" requires-python = ">=3.8" classifiers = [ "Programming Language :: Rust", @@ -18,3 +18,4 @@ dynamic = ["description"] [tool.maturin] features = ["pyo3/extension-module"] +profile = "release" diff --git a/mistralrs-pyo3/src/lib.rs b/mistralrs-pyo3/src/lib.rs index 4cb99e989..8fa68fedc 100644 --- a/mistralrs-pyo3/src/lib.rs +++ b/mistralrs-pyo3/src/lib.rs @@ -4,11 +4,12 @@ use base64::{engine::general_purpose, Engine}; use candle_core::{quantized::GgmlDType, Result}; use either::Either; use indexmap::IndexMap; -use reqwest::StatusCode; use std::{ cell::RefCell, collections::HashMap, fmt::Debug, + fs, + io::Read, str::FromStr, sync::{Arc, Mutex}, }; @@ -603,22 +604,29 @@ impl Runner { if !image_urls.is_empty() { let mut images = Vec::new(); for url in image_urls { - let bytes = match reqwest::blocking::get(url.clone()) { - Ok(http_resp) => http_resp - .bytes() - .map_err(|e| PyValueError::new_err(e.to_string()))? - .to_vec(), - Err(e) => { - if e.status() - .is_some_and(|code| code == StatusCode::NOT_FOUND) - { - general_purpose::STANDARD - .decode(url) - .map_err(|e| PyValueError::new_err(e.to_string()))? - } else { - return Err(PyValueError::new_err(e.to_string())); + let bytes = if url.contains("http") { + // Read from http + match reqwest::blocking::get(url.clone()) { + Ok(http_resp) => http_resp + .bytes() + .map_err(|e| PyValueError::new_err(e.to_string()))? + .to_vec(), + Err(e) => { + return Err(PyValueError::new_err(format!("{e}"))) } } + } else if let Ok(mut f) = File::open(&url) { + // Read from local file + let metadata = fs::metadata(&url) + .map_err(|e| PyValueError::new_err(e.to_string()))?; + let mut buffer = vec![0; metadata.len() as usize]; + f.read_exact(&mut buffer)?; + buffer + } else { + // Decode with base64 + general_purpose::STANDARD + .decode(url) + .map_err(|e| PyValueError::new_err(e.to_string()))? }; images.push( image::load_from_memory(&bytes) diff --git a/mistralrs-server/Cargo.toml b/mistralrs-server/Cargo.toml index bb683b1a4..625fad0fd 100644 --- a/mistralrs-server/Cargo.toml +++ b/mistralrs-server/Cargo.toml @@ -22,7 +22,7 @@ axum = { version = "0.7.4", features = ["tokio"] } tower-http = { version = "0.5.1", features = ["cors"]} utoipa = { version = "4.2", features = ["axum_extras"] } utoipa-swagger-ui = { version = "7.1.0", features = ["axum"]} -mistralrs-core = { version = "0.1.17", path = "../mistralrs-core" } +mistralrs-core = { version = "0.1.18", path = "../mistralrs-core" } indexmap.workspace = true accelerate-src = { workspace = true, optional = true } intel-mkl-src = { workspace = true, optional = true } diff --git a/mistralrs-server/src/chat_completion.rs b/mistralrs-server/src/chat_completion.rs index cd4d4519e..fe6089c2a 100644 --- a/mistralrs-server/src/chat_completion.rs +++ b/mistralrs-server/src/chat_completion.rs @@ -3,6 +3,8 @@ use std::{ collections::HashMap, env, error::Error, + fs::{self, File}, + io::Read, ops::Deref, pin::Pin, sync::Arc, @@ -256,15 +258,21 @@ async fn parse_request( if !image_urls.is_empty() { let mut images = Vec::new(); for url in image_urls { - let bytes = match reqwest::get(url.clone()).await { - Ok(http_resp) => http_resp.bytes().await?.to_vec(), - Err(e) => { - if e.status().is_some_and(|code| code == StatusCode::NOT_FOUND) { - general_purpose::STANDARD.decode(url)? - } else { - anyhow::bail!(e) - } + let bytes = if url.contains("http") { + // Read from http + match reqwest::get(url.clone()).await { + Ok(http_resp) => http_resp.bytes().await?.to_vec(), + Err(e) => anyhow::bail!(e), } + } else if let Ok(mut f) = File::open(&url) { + // Read from local file + let metadata = fs::metadata(&url)?; + let mut buffer = vec![0; metadata.len() as usize]; + f.read_exact(&mut buffer)?; + buffer + } else { + // Decode with base64 + general_purpose::STANDARD.decode(url)? }; images.push(image::load_from_memory(&bytes)?); } diff --git a/mistralrs-server/src/main.rs b/mistralrs-server/src/main.rs index 73a7277a9..609fd0c51 100644 --- a/mistralrs-server/src/main.rs +++ b/mistralrs-server/src/main.rs @@ -1,6 +1,6 @@ use anyhow::Result; use axum::{ - extract::{Json, State}, + extract::{DefaultBodyLimit, Json, State}, http::{self, Method}, routing::{get, post}, Router, @@ -28,6 +28,10 @@ use tracing::{info, warn}; use utoipa::{OpenApi, ToSchema}; use utoipa_swagger_ui::SwaggerUi; +// NOTE(EricLBuehler): Accept up to 50mb input +const N_INPUT_SIZE: usize = 50; +const MB_TO_B: usize = 1024 * 1024; // 1024 kb in a mb + fn parse_token_source(s: &str) -> Result { s.parse() } @@ -223,6 +227,7 @@ fn get_router(state: Arc) -> Router { .route("/", get(health)) .route("/activate_adapters", post(activate_adapters)) .route("/re_isq", post(re_isq)) + .layer(DefaultBodyLimit::max(N_INPUT_SIZE * MB_TO_B)) .with_state(state) } diff --git a/mistralrs/Cargo.toml b/mistralrs/Cargo.toml index 74462594a..af1c2baf1 100644 --- a/mistralrs/Cargo.toml +++ b/mistralrs/Cargo.toml @@ -12,7 +12,7 @@ license.workspace = true homepage.workspace = true [dependencies] -mistralrs-core = { version = "0.1.17", path = "../mistralrs-core" } +mistralrs-core = { version = "0.1.18", path = "../mistralrs-core" } anyhow.workspace = true tokio.workspace = true candle-core.workspace = true