diff --git a/Cargo.toml b/Cargo.toml
index 5eec5127c..024300da1 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -10,7 +10,7 @@ members = [
resolver = "2"
[workspace.package]
-version = "0.1.17"
+version = "0.1.18"
edition = "2021"
description = "Fast and easy LLM serving."
homepage = "https://github.com/EricLBuehler/mistral.rs"
diff --git a/README.md b/README.md
index d20419ec4..67403b95e 100644
--- a/README.md
+++ b/README.md
@@ -46,6 +46,7 @@ Please submit requests for new models [here](https://github.com/EricLBuehler/mis
- φ³ 📷 Run the Phi 3 vision model: [documentation and guide here](docs/PHI3V.md)
+
*After following installation instructions*
@@ -197,10 +198,11 @@ Please submit more benchmarks via raising an issue!
## Installation and Build
1) Install required packages
- - `openssl` (ex., `sudo apt install libssl-dev`)
- - `pkg-config` (ex., `sudo apt install pkg-config`)
+ - `openssl` (ex. on Ubuntu, `sudo apt install libssl-dev`)
+ - `pkg-config` (ex. on Ubuntu, `sudo apt install pkg-config`)
2) Install Rust: https://rustup.rs/
+ *Example on Ubuntu:*
```bash
curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh
source $HOME/.cargo/env
diff --git a/docs/PHI3V.md b/docs/PHI3V.md
index 0d8e5628a..d93ca15c6 100644
--- a/docs/PHI3V.md
+++ b/docs/PHI3V.md
@@ -2,10 +2,17 @@
The Phi 3 Vision Model has support in the Rust, Python, and HTTP APIs. The Phi 3 Vision Model supports ISQ for increased performance.
+The Python and HTTP APIs support sending images as:
+- URL
+- Path to a local image
+- [Base64](https://en.wikipedia.org/wiki/Base64) encoded string
+
+The Rust API takes an image from the [image](https://docs.rs/image/latest/image/index.html) crate.
+
> Note: The Phi 3 Vision model works best with one image although it is supported to send multiple images.
> Note: when sending multiple images, they will be resized to the minimum dimension by which all will fit without cropping.
-> Aspect ratio is not preserved.
+> Aspect ratio is not preserved in that case.
## HTTP server
You can find this example [here](../examples/server/phi3v.py).
@@ -18,6 +25,7 @@ We support an OpenAI compatible HTTP API for vision models. This example demonst
**Image:**
+
**Prompt:**
```
@@ -73,6 +81,9 @@ print(resp)
```
+- You can find an example of encoding the [image via base64 here](../examples/server/phi3v_base64.py).
+- You can find an example of loading an [image locally base64 here](../examples/server/phi3v_local_img.py).
+
---
## Rust
@@ -201,4 +212,7 @@ res = runner.send_chat_completion_request(
)
print(res.choices[0].message.content)
print(res.usage)
-```
\ No newline at end of file
+```
+
+- You can find an example of encoding the [image via base64 here](../examples/python/phi3v_base64.py).
+- You can find an example of loading an [image locally base64 here](../examples/python/phi3v_local_img.py).
diff --git a/examples/python/cookbook.ipynb b/examples/python/cookbook.ipynb
index 214d9bb95..0a6ccd93d 100644
--- a/examples/python/cookbook.ipynb
+++ b/examples/python/cookbook.ipynb
@@ -14,7 +14,7 @@
"outputs": [],
"source": [
"# First, install Rust: https://rustup.rs/\n",
- "%pip install mistralrs-cuda"
+ "%pip install mistralrs-cuda -v"
]
},
{
diff --git a/examples/python/phi3v_base64.py b/examples/python/phi3v_base64.py
new file mode 100644
index 000000000..97bfd3f4c
--- /dev/null
+++ b/examples/python/phi3v_base64.py
@@ -0,0 +1,44 @@
+from mistralrs import Runner, Which, ChatCompletionRequest, VisionArchitecture
+import base64
+
+runner = Runner(
+ which=Which.VisionPlain(
+ model_id="microsoft/Phi-3-vision-128k-instruct",
+ tokenizer_json=None,
+ repeat_last_n=64,
+ arch=VisionArchitecture.Phi3V,
+ ),
+)
+
+FILENAME = "picture.jpg"
+with open(FILENAME, "rb") as image_file:
+ encoded_string = base64.b64encode(image_file.read()).decode("utf-8")
+
+res = runner.send_chat_completion_request(
+ ChatCompletionRequest(
+ model="phi3v",
+ messages=[
+ {
+ "role": "user",
+ "content": [
+ {
+ "type": "image_url",
+ "image_url": {
+ "url": str(encoded_string),
+ },
+ },
+ {
+ "type": "text",
+ "text": "<|image_1|>\nWhat is shown in this image?",
+ },
+ ],
+ }
+ ],
+ max_tokens=256,
+ presence_penalty=1.0,
+ top_p=0.1,
+ temperature=0.1,
+ )
+)
+print(res.choices[0].message.content)
+print(res.usage)
diff --git a/examples/python/phi3v_local_img.py b/examples/python/phi3v_local_img.py
new file mode 100644
index 000000000..89da913b4
--- /dev/null
+++ b/examples/python/phi3v_local_img.py
@@ -0,0 +1,42 @@
+from mistralrs import Runner, Which, ChatCompletionRequest, VisionArchitecture
+import base64
+
+runner = Runner(
+ which=Which.VisionPlain(
+ model_id="microsoft/Phi-3-vision-128k-instruct",
+ tokenizer_json=None,
+ repeat_last_n=64,
+ arch=VisionArchitecture.Phi3V,
+ ),
+)
+
+FILENAME = "picture.jpg"
+
+res = runner.send_chat_completion_request(
+ ChatCompletionRequest(
+ model="phi3v",
+ messages=[
+ {
+ "role": "user",
+ "content": [
+ {
+ "type": "image_url",
+ "image_url": {
+ "url": FILENAME,
+ },
+ },
+ {
+ "type": "text",
+ "text": "<|image_1|>\nWhat is shown in this image?",
+ },
+ ],
+ }
+ ],
+ max_tokens=256,
+ presence_penalty=1.0,
+ top_p=0.1,
+ temperature=0.1,
+ )
+)
+print(res.choices[0].message.content)
+print(res.usage)
diff --git a/examples/server/phi3v_base64.py b/examples/server/phi3v_base64.py
new file mode 100644
index 000000000..22f0fa48e
--- /dev/null
+++ b/examples/server/phi3v_base64.py
@@ -0,0 +1,69 @@
+import requests
+import httpx
+import textwrap, json
+import base64
+
+
+def log_response(response: httpx.Response):
+ request = response.request
+ print(f"Request: {request.method} {request.url}")
+ print(" Headers:")
+ for key, value in request.headers.items():
+ if key.lower() == "authorization":
+ value = "[...]"
+ if key.lower() == "cookie":
+ value = value.split("=")[0] + "=..."
+ print(f" {key}: {value}")
+ print(" Body:")
+ try:
+ request_body = json.loads(request.content)
+ print(textwrap.indent(json.dumps(request_body, indent=2), " "))
+ except json.JSONDecodeError:
+ print(textwrap.indent(request.content.decode(), " "))
+ print(f"Response: status_code={response.status_code}")
+ print(" Headers:")
+ for key, value in response.headers.items():
+ if key.lower() == "set-cookie":
+ value = value.split("=")[0] + "=..."
+ print(f" {key}: {value}")
+
+
+BASE_URL = "http://localhost:1234/v1"
+
+# Enable this to log requests and responses
+# openai.http_client = httpx.Client(
+# event_hooks={"request": [print], "response": [log_response]}
+# )
+
+FILENAME = "picture.jpg"
+with open(FILENAME, "rb") as image_file:
+ encoded_string = base64.b64encode(image_file.read()).decode("utf-8")
+
+headers = {
+ "Content-Type": "application/json",
+}
+
+payload = {
+ "model": "phi3v",
+ "messages": [
+ {
+ "role": "user",
+ "content": [
+ {
+ "type": "image_url",
+ "image_url": {
+ "url": str(encoded_string),
+ },
+ },
+ {
+ "type": "text",
+ "text": "<|image_1|>\nWhat is shown in this image?",
+ },
+ ],
+ }
+ ],
+ "max_tokens": 300,
+}
+
+response = requests.post(f"{BASE_URL}/chat/completions", headers=headers, json=payload)
+print(response.json())
diff --git a/examples/server/phi3v_local_img.py b/examples/server/phi3v_local_img.py
new file mode 100644
index 000000000..47051e7d3
--- /dev/null
+++ b/examples/server/phi3v_local_img.py
@@ -0,0 +1,67 @@
+import requests
+import httpx
+import textwrap, json
+import base64
+
+
+def log_response(response: httpx.Response):
+ request = response.request
+ print(f"Request: {request.method} {request.url}")
+ print(" Headers:")
+ for key, value in request.headers.items():
+ if key.lower() == "authorization":
+ value = "[...]"
+ if key.lower() == "cookie":
+ value = value.split("=")[0] + "=..."
+ print(f" {key}: {value}")
+ print(" Body:")
+ try:
+ request_body = json.loads(request.content)
+ print(textwrap.indent(json.dumps(request_body, indent=2), " "))
+ except json.JSONDecodeError:
+ print(textwrap.indent(request.content.decode(), " "))
+ print(f"Response: status_code={response.status_code}")
+ print(" Headers:")
+ for key, value in response.headers.items():
+ if key.lower() == "set-cookie":
+ value = value.split("=")[0] + "=..."
+ print(f" {key}: {value}")
+
+
+BASE_URL = "http://localhost:1234/v1"
+
+# Enable this to log requests and responses
+# openai.http_client = httpx.Client(
+# event_hooks={"request": [print], "response": [log_response]}
+# )
+
+FILENAME = "picture.jpg"
+
+headers = {
+ "Content-Type": "application/json",
+}
+
+payload = {
+ "model": "phi3v",
+ "messages": [
+ {
+ "role": "user",
+ "content": [
+ {
+ "type": "image_url",
+ "image_url": {
+ "url": FILENAME,
+ },
+ },
+ {
+ "type": "text",
+ "text": "<|image_1|>\nWhat is shown in this image?",
+ },
+ ],
+ }
+ ],
+ "max_tokens": 300,
+}
+
+response = requests.post(f"{BASE_URL}/chat/completions", headers=headers, json=payload)
+print(response.json())
diff --git a/mistralrs-bench/Cargo.toml b/mistralrs-bench/Cargo.toml
index b7f262778..029c92f53 100644
--- a/mistralrs-bench/Cargo.toml
+++ b/mistralrs-bench/Cargo.toml
@@ -17,7 +17,7 @@ candle-core.workspace = true
serde.workspace = true
serde_json.workspace = true
clap.workspace = true
-mistralrs-core = { version = "0.1.17", path = "../mistralrs-core" }
+mistralrs-core = { version = "0.1.18", path = "../mistralrs-core" }
tracing.workspace = true
either.workspace = true
tokio.workspace = true
diff --git a/mistralrs-pyo3/Cargo.toml b/mistralrs-pyo3/Cargo.toml
index 6f17e9f39..3a299889e 100644
--- a/mistralrs-pyo3/Cargo.toml
+++ b/mistralrs-pyo3/Cargo.toml
@@ -17,7 +17,7 @@ doc = false
[dependencies]
pyo3.workspace = true
-mistralrs-core = { version = "0.1.17", path = "../mistralrs-core", features = ["pyo3_macros"] }
+mistralrs-core = { version = "0.1.18", path = "../mistralrs-core", features = ["pyo3_macros"] }
serde.workspace = true
serde_json.workspace = true
candle-core.workspace = true
diff --git a/mistralrs-pyo3/Cargo_template.toml b/mistralrs-pyo3/Cargo_template.toml
index c6b3a5d6c..76626a08b 100644
--- a/mistralrs-pyo3/Cargo_template.toml
+++ b/mistralrs-pyo3/Cargo_template.toml
@@ -17,7 +17,7 @@ doc = false
[dependencies]
pyo3.workspace = true
-mistralrs-core = { version = "0.1.17", path = "../mistralrs-core", features=["pyo3_macros","$feature_name"] }
+mistralrs-core = { version = "0.1.18", path = "../mistralrs-core", features=["pyo3_macros","$feature_name"] }
serde.workspace = true
serde_json.workspace = true
candle-core = { git = "https://github.com/EricLBuehler/candle.git", version = "0.6.0", rev = "f52e2347b6237d19ffd7af26315f543c22f9f286", features=["$feature_name"] }
@@ -27,6 +27,9 @@ intel-mkl-src = { workspace = true, optional = true }
either.workspace = true
futures.workspace = true
tokio.workspace = true
+image.workspace = true
+reqwest.workspace = true
+base64.workspace = true
[build-dependencies]
pyo3-build-config = "0.21"
diff --git a/mistralrs-pyo3/README.md b/mistralrs-pyo3/README.md
index 5e058da37..aef3faac8 100644
--- a/mistralrs-pyo3/README.md
+++ b/mistralrs-pyo3/README.md
@@ -21,19 +21,19 @@ sudo apt install pkg-config
- CUDA
- `pip install mistralrs-cuda`
+ `pip install mistralrs-cuda -v`
- Metal
- `pip install mistralrs-metal`
+ `pip install mistralrs-metal -v`
- Apple Accelerate
- `pip install mistralrs-accelerate`
+ `pip install mistralrs-accelerate -v`
- Intel MKL
- `pip install mistralrs-mkl`
+ `pip install mistralrs-mkl -v`
- Without accelerators
- `pip install mistralrs`
+ `pip install mistralrs -v`
All installations will install the `mistralrs` package. The suffix on the package installed by `pip` only controls the feature activation.
diff --git a/mistralrs-pyo3/pyproject.toml b/mistralrs-pyo3/pyproject.toml
index 29b66ebb0..bf036b698 100644
--- a/mistralrs-pyo3/pyproject.toml
+++ b/mistralrs-pyo3/pyproject.toml
@@ -4,7 +4,7 @@ build-backend = "maturin"
[project]
name = "mistralrs"
-version = "0.1.17"
+version = "0.1.18"
requires-python = ">=3.8"
classifiers = [
"Programming Language :: Rust",
@@ -18,3 +18,4 @@ dynamic = ["description"]
[tool.maturin]
features = ["pyo3/extension-module"]
+profile = "release"
diff --git a/mistralrs-pyo3/pyproject_template.toml b/mistralrs-pyo3/pyproject_template.toml
index 969f987d1..07f2a4920 100644
--- a/mistralrs-pyo3/pyproject_template.toml
+++ b/mistralrs-pyo3/pyproject_template.toml
@@ -4,7 +4,7 @@ build-backend = "maturin"
[project]
name = "$name"
-version = "0.1.17"
+version = "0.1.18"
requires-python = ">=3.8"
classifiers = [
"Programming Language :: Rust",
@@ -18,3 +18,4 @@ dynamic = ["description"]
[tool.maturin]
features = ["pyo3/extension-module"]
+profile = "release"
diff --git a/mistralrs-pyo3/src/lib.rs b/mistralrs-pyo3/src/lib.rs
index 4cb99e989..8fa68fedc 100644
--- a/mistralrs-pyo3/src/lib.rs
+++ b/mistralrs-pyo3/src/lib.rs
@@ -4,11 +4,12 @@ use base64::{engine::general_purpose, Engine};
use candle_core::{quantized::GgmlDType, Result};
use either::Either;
use indexmap::IndexMap;
-use reqwest::StatusCode;
use std::{
cell::RefCell,
collections::HashMap,
fmt::Debug,
+ fs,
+ io::Read,
str::FromStr,
sync::{Arc, Mutex},
};
@@ -603,22 +604,29 @@ impl Runner {
if !image_urls.is_empty() {
let mut images = Vec::new();
for url in image_urls {
- let bytes = match reqwest::blocking::get(url.clone()) {
- Ok(http_resp) => http_resp
- .bytes()
- .map_err(|e| PyValueError::new_err(e.to_string()))?
- .to_vec(),
- Err(e) => {
- if e.status()
- .is_some_and(|code| code == StatusCode::NOT_FOUND)
- {
- general_purpose::STANDARD
- .decode(url)
- .map_err(|e| PyValueError::new_err(e.to_string()))?
- } else {
- return Err(PyValueError::new_err(e.to_string()));
+ let bytes = if url.contains("http") {
+ // Read from http
+ match reqwest::blocking::get(url.clone()) {
+ Ok(http_resp) => http_resp
+ .bytes()
+ .map_err(|e| PyValueError::new_err(e.to_string()))?
+ .to_vec(),
+ Err(e) => {
+ return Err(PyValueError::new_err(format!("{e}")))
}
}
+ } else if let Ok(mut f) = File::open(&url) {
+ // Read from local file
+ let metadata = fs::metadata(&url)
+ .map_err(|e| PyValueError::new_err(e.to_string()))?;
+ let mut buffer = vec![0; metadata.len() as usize];
+ f.read_exact(&mut buffer)?;
+ buffer
+ } else {
+ // Decode with base64
+ general_purpose::STANDARD
+ .decode(url)
+ .map_err(|e| PyValueError::new_err(e.to_string()))?
};
images.push(
image::load_from_memory(&bytes)
diff --git a/mistralrs-server/Cargo.toml b/mistralrs-server/Cargo.toml
index bb683b1a4..625fad0fd 100644
--- a/mistralrs-server/Cargo.toml
+++ b/mistralrs-server/Cargo.toml
@@ -22,7 +22,7 @@ axum = { version = "0.7.4", features = ["tokio"] }
tower-http = { version = "0.5.1", features = ["cors"]}
utoipa = { version = "4.2", features = ["axum_extras"] }
utoipa-swagger-ui = { version = "7.1.0", features = ["axum"]}
-mistralrs-core = { version = "0.1.17", path = "../mistralrs-core" }
+mistralrs-core = { version = "0.1.18", path = "../mistralrs-core" }
indexmap.workspace = true
accelerate-src = { workspace = true, optional = true }
intel-mkl-src = { workspace = true, optional = true }
diff --git a/mistralrs-server/src/chat_completion.rs b/mistralrs-server/src/chat_completion.rs
index cd4d4519e..fe6089c2a 100644
--- a/mistralrs-server/src/chat_completion.rs
+++ b/mistralrs-server/src/chat_completion.rs
@@ -3,6 +3,8 @@ use std::{
collections::HashMap,
env,
error::Error,
+ fs::{self, File},
+ io::Read,
ops::Deref,
pin::Pin,
sync::Arc,
@@ -256,15 +258,21 @@ async fn parse_request(
if !image_urls.is_empty() {
let mut images = Vec::new();
for url in image_urls {
- let bytes = match reqwest::get(url.clone()).await {
- Ok(http_resp) => http_resp.bytes().await?.to_vec(),
- Err(e) => {
- if e.status().is_some_and(|code| code == StatusCode::NOT_FOUND) {
- general_purpose::STANDARD.decode(url)?
- } else {
- anyhow::bail!(e)
- }
+ let bytes = if url.contains("http") {
+ // Read from http
+ match reqwest::get(url.clone()).await {
+ Ok(http_resp) => http_resp.bytes().await?.to_vec(),
+ Err(e) => anyhow::bail!(e),
}
+ } else if let Ok(mut f) = File::open(&url) {
+ // Read from local file
+ let metadata = fs::metadata(&url)?;
+ let mut buffer = vec![0; metadata.len() as usize];
+ f.read_exact(&mut buffer)?;
+ buffer
+ } else {
+ // Decode with base64
+ general_purpose::STANDARD.decode(url)?
};
images.push(image::load_from_memory(&bytes)?);
}
diff --git a/mistralrs-server/src/main.rs b/mistralrs-server/src/main.rs
index 73a7277a9..609fd0c51 100644
--- a/mistralrs-server/src/main.rs
+++ b/mistralrs-server/src/main.rs
@@ -1,6 +1,6 @@
use anyhow::Result;
use axum::{
- extract::{Json, State},
+ extract::{DefaultBodyLimit, Json, State},
http::{self, Method},
routing::{get, post},
Router,
@@ -28,6 +28,10 @@ use tracing::{info, warn};
use utoipa::{OpenApi, ToSchema};
use utoipa_swagger_ui::SwaggerUi;
+// NOTE(EricLBuehler): Accept up to 50mb input
+const N_INPUT_SIZE: usize = 50;
+const MB_TO_B: usize = 1024 * 1024; // 1024 kb in a mb
+
fn parse_token_source(s: &str) -> Result {
s.parse()
}
@@ -223,6 +227,7 @@ fn get_router(state: Arc) -> Router {
.route("/", get(health))
.route("/activate_adapters", post(activate_adapters))
.route("/re_isq", post(re_isq))
+ .layer(DefaultBodyLimit::max(N_INPUT_SIZE * MB_TO_B))
.with_state(state)
}
diff --git a/mistralrs/Cargo.toml b/mistralrs/Cargo.toml
index 74462594a..af1c2baf1 100644
--- a/mistralrs/Cargo.toml
+++ b/mistralrs/Cargo.toml
@@ -12,7 +12,7 @@ license.workspace = true
homepage.workspace = true
[dependencies]
-mistralrs-core = { version = "0.1.17", path = "../mistralrs-core" }
+mistralrs-core = { version = "0.1.18", path = "../mistralrs-core" }
anyhow.workspace = true
tokio.workspace = true
candle-core.workspace = true