diff --git a/Cargo.toml b/Cargo.toml
index 5eec5127c..024300da1 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -10,7 +10,7 @@ members = [
 resolver = "2"
 
 [workspace.package]
-version = "0.1.17"
+version = "0.1.18"
 edition = "2021"
 description = "Fast and easy LLM serving."
 homepage = "https://github.com/EricLBuehler/mistral.rs"
diff --git a/README.md b/README.md
index d20419ec4..67403b95e 100644
--- a/README.md
+++ b/README.md
@@ -46,6 +46,7 @@ Please submit requests for new models [here](https://github.com/EricLBuehler/mis
 - φ³ 📷 Run the Phi 3 vision model: [documentation and guide here](docs/PHI3V.md)
 
     <img src="https://www.nhmagazine.com/content/uploads/2019/05/mtwashingtonFranconia-2-19-18-108-Edit-Edit.jpg" alt="Mount Washington" width = "400" height = "267">
+    <h6><a href = "https://www.nhmagazine.com/mount-washington/">Credit</a></h6>
 
     *After following installation instructions*
 
@@ -197,10 +198,11 @@ Please submit more benchmarks via raising an issue!
 ## Installation and Build
 
 1) Install required packages
-    - `openssl` (ex., `sudo apt install libssl-dev`)
-    - `pkg-config` (ex., `sudo apt install pkg-config`)
+    - `openssl` (ex. on Ubuntu, `sudo apt install libssl-dev`)
+    - `pkg-config` (ex. on Ubuntu, `sudo apt install pkg-config`)
 
 2) Install Rust: https://rustup.rs/
+    *Example on Ubuntu:*
     ```bash
     curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh
     source $HOME/.cargo/env
diff --git a/docs/PHI3V.md b/docs/PHI3V.md
index 0d8e5628a..d93ca15c6 100644
--- a/docs/PHI3V.md
+++ b/docs/PHI3V.md
@@ -2,10 +2,17 @@
 
 The Phi 3 Vision Model has support in the Rust, Python, and HTTP APIs. The Phi 3 Vision Model supports ISQ for increased performance.
 
+The Python and HTTP APIs support sending images as:
+- URL
+- Path to a local image
+- [Base64](https://en.wikipedia.org/wiki/Base64) encoded string
+
+The Rust API takes an image from the [image](https://docs.rs/image/latest/image/index.html) crate.
+
 > Note: The Phi 3 Vision model works best with one image although it is supported to send multiple images.
 
 > Note: when sending multiple images, they will be resized to the minimum dimension by which all will fit without cropping.
-> Aspect ratio is not preserved.
+> Aspect ratio is not preserved in that case.
 
 ## HTTP server
 You can find this example [here](../examples/server/phi3v.py).
@@ -18,6 +25,7 @@ We support an OpenAI compatible HTTP API for vision models. This example demonst
 
 **Image:**
 <img src="https://www.nhmagazine.com/content/uploads/2019/05/mtwashingtonFranconia-2-19-18-108-Edit-Edit.jpg" alt="Mount Washington" width = "1000" height = "666">
+<h6><a href = "https://www.nhmagazine.com/mount-washington/">Credit</a></h6>
 
 **Prompt:**
 ```
@@ -73,6 +81,9 @@ print(resp)
 
 ```
 
+- You can find an example of encoding the [image via base64 here](../examples/server/phi3v_base64.py).
+- You can find an example of loading an [image locally base64 here](../examples/server/phi3v_local_img.py).
+
 ---
 
 ## Rust
@@ -201,4 +212,7 @@ res = runner.send_chat_completion_request(
 )
 print(res.choices[0].message.content)
 print(res.usage)
-```
\ No newline at end of file
+```
+
+- You can find an example of encoding the [image via base64 here](../examples/python/phi3v_base64.py).
+- You can find an example of loading an [image locally base64 here](../examples/python/phi3v_local_img.py).
diff --git a/examples/python/cookbook.ipynb b/examples/python/cookbook.ipynb
index 214d9bb95..0a6ccd93d 100644
--- a/examples/python/cookbook.ipynb
+++ b/examples/python/cookbook.ipynb
@@ -14,7 +14,7 @@
    "outputs": [],
    "source": [
     "# First, install Rust: https://rustup.rs/\n",
-    "%pip install mistralrs-cuda"
+    "%pip install mistralrs-cuda -v"
    ]
   },
   {
diff --git a/examples/python/phi3v_base64.py b/examples/python/phi3v_base64.py
new file mode 100644
index 000000000..97bfd3f4c
--- /dev/null
+++ b/examples/python/phi3v_base64.py
@@ -0,0 +1,44 @@
+from mistralrs import Runner, Which, ChatCompletionRequest, VisionArchitecture
+import base64
+
+runner = Runner(
+    which=Which.VisionPlain(
+        model_id="microsoft/Phi-3-vision-128k-instruct",
+        tokenizer_json=None,
+        repeat_last_n=64,
+        arch=VisionArchitecture.Phi3V,
+    ),
+)
+
+FILENAME = "picture.jpg"
+with open(FILENAME, "rb") as image_file:
+    encoded_string = base64.b64encode(image_file.read()).decode("utf-8")
+
+res = runner.send_chat_completion_request(
+    ChatCompletionRequest(
+        model="phi3v",
+        messages=[
+            {
+                "role": "user",
+                "content": [
+                    {
+                        "type": "image_url",
+                        "image_url": {
+                            "url": str(encoded_string),
+                        },
+                    },
+                    {
+                        "type": "text",
+                        "text": "<|image_1|>\nWhat is shown in this image?",
+                    },
+                ],
+            }
+        ],
+        max_tokens=256,
+        presence_penalty=1.0,
+        top_p=0.1,
+        temperature=0.1,
+    )
+)
+print(res.choices[0].message.content)
+print(res.usage)
diff --git a/examples/python/phi3v_local_img.py b/examples/python/phi3v_local_img.py
new file mode 100644
index 000000000..89da913b4
--- /dev/null
+++ b/examples/python/phi3v_local_img.py
@@ -0,0 +1,42 @@
+from mistralrs import Runner, Which, ChatCompletionRequest, VisionArchitecture
+import base64
+
+runner = Runner(
+    which=Which.VisionPlain(
+        model_id="microsoft/Phi-3-vision-128k-instruct",
+        tokenizer_json=None,
+        repeat_last_n=64,
+        arch=VisionArchitecture.Phi3V,
+    ),
+)
+
+FILENAME = "picture.jpg"
+
+res = runner.send_chat_completion_request(
+    ChatCompletionRequest(
+        model="phi3v",
+        messages=[
+            {
+                "role": "user",
+                "content": [
+                    {
+                        "type": "image_url",
+                        "image_url": {
+                            "url": FILENAME,
+                        },
+                    },
+                    {
+                        "type": "text",
+                        "text": "<|image_1|>\nWhat is shown in this image?",
+                    },
+                ],
+            }
+        ],
+        max_tokens=256,
+        presence_penalty=1.0,
+        top_p=0.1,
+        temperature=0.1,
+    )
+)
+print(res.choices[0].message.content)
+print(res.usage)
diff --git a/examples/server/phi3v_base64.py b/examples/server/phi3v_base64.py
new file mode 100644
index 000000000..22f0fa48e
--- /dev/null
+++ b/examples/server/phi3v_base64.py
@@ -0,0 +1,69 @@
+import requests
+import httpx
+import textwrap, json
+import base64
+
+
+def log_response(response: httpx.Response):
+    request = response.request
+    print(f"Request: {request.method} {request.url}")
+    print("  Headers:")
+    for key, value in request.headers.items():
+        if key.lower() == "authorization":
+            value = "[...]"
+        if key.lower() == "cookie":
+            value = value.split("=")[0] + "=..."
+        print(f"    {key}: {value}")
+    print("  Body:")
+    try:
+        request_body = json.loads(request.content)
+        print(textwrap.indent(json.dumps(request_body, indent=2), "    "))
+    except json.JSONDecodeError:
+        print(textwrap.indent(request.content.decode(), "    "))
+    print(f"Response: status_code={response.status_code}")
+    print("  Headers:")
+    for key, value in response.headers.items():
+        if key.lower() == "set-cookie":
+            value = value.split("=")[0] + "=..."
+        print(f"    {key}: {value}")
+
+
+BASE_URL = "http://localhost:1234/v1"
+
+# Enable this to log requests and responses
+# openai.http_client = httpx.Client(
+#     event_hooks={"request": [print], "response": [log_response]}
+# )
+
+FILENAME = "picture.jpg"
+with open(FILENAME, "rb") as image_file:
+    encoded_string = base64.b64encode(image_file.read()).decode("utf-8")
+
+headers = {
+    "Content-Type": "application/json",
+}
+
+payload = {
+    "model": "phi3v",
+    "messages": [
+        {
+            "role": "user",
+            "content": [
+                {
+                    "type": "image_url",
+                    "image_url": {
+                        "url": str(encoded_string),
+                    },
+                },
+                {
+                    "type": "text",
+                    "text": "<|image_1|>\nWhat is shown in this image?",
+                },
+            ],
+        }
+    ],
+    "max_tokens": 300,
+}
+
+response = requests.post(f"{BASE_URL}/chat/completions", headers=headers, json=payload)
+print(response.json())
diff --git a/examples/server/phi3v_local_img.py b/examples/server/phi3v_local_img.py
new file mode 100644
index 000000000..47051e7d3
--- /dev/null
+++ b/examples/server/phi3v_local_img.py
@@ -0,0 +1,67 @@
+import requests
+import httpx
+import textwrap, json
+import base64
+
+
+def log_response(response: httpx.Response):
+    request = response.request
+    print(f"Request: {request.method} {request.url}")
+    print("  Headers:")
+    for key, value in request.headers.items():
+        if key.lower() == "authorization":
+            value = "[...]"
+        if key.lower() == "cookie":
+            value = value.split("=")[0] + "=..."
+        print(f"    {key}: {value}")
+    print("  Body:")
+    try:
+        request_body = json.loads(request.content)
+        print(textwrap.indent(json.dumps(request_body, indent=2), "    "))
+    except json.JSONDecodeError:
+        print(textwrap.indent(request.content.decode(), "    "))
+    print(f"Response: status_code={response.status_code}")
+    print("  Headers:")
+    for key, value in response.headers.items():
+        if key.lower() == "set-cookie":
+            value = value.split("=")[0] + "=..."
+        print(f"    {key}: {value}")
+
+
+BASE_URL = "http://localhost:1234/v1"
+
+# Enable this to log requests and responses
+# openai.http_client = httpx.Client(
+#     event_hooks={"request": [print], "response": [log_response]}
+# )
+
+FILENAME = "picture.jpg"
+
+headers = {
+    "Content-Type": "application/json",
+}
+
+payload = {
+    "model": "phi3v",
+    "messages": [
+        {
+            "role": "user",
+            "content": [
+                {
+                    "type": "image_url",
+                    "image_url": {
+                        "url": FILENAME,
+                    },
+                },
+                {
+                    "type": "text",
+                    "text": "<|image_1|>\nWhat is shown in this image?",
+                },
+            ],
+        }
+    ],
+    "max_tokens": 300,
+}
+
+response = requests.post(f"{BASE_URL}/chat/completions", headers=headers, json=payload)
+print(response.json())
diff --git a/mistralrs-bench/Cargo.toml b/mistralrs-bench/Cargo.toml
index b7f262778..029c92f53 100644
--- a/mistralrs-bench/Cargo.toml
+++ b/mistralrs-bench/Cargo.toml
@@ -17,7 +17,7 @@ candle-core.workspace = true
 serde.workspace = true
 serde_json.workspace = true
 clap.workspace = true
-mistralrs-core = { version = "0.1.17", path = "../mistralrs-core" }
+mistralrs-core = { version = "0.1.18", path = "../mistralrs-core" }
 tracing.workspace = true
 either.workspace = true
 tokio.workspace = true
diff --git a/mistralrs-pyo3/Cargo.toml b/mistralrs-pyo3/Cargo.toml
index 6f17e9f39..3a299889e 100644
--- a/mistralrs-pyo3/Cargo.toml
+++ b/mistralrs-pyo3/Cargo.toml
@@ -17,7 +17,7 @@ doc = false
 
 [dependencies]
 pyo3.workspace = true
-mistralrs-core = { version = "0.1.17", path = "../mistralrs-core", features = ["pyo3_macros"] }
+mistralrs-core = { version = "0.1.18", path = "../mistralrs-core", features = ["pyo3_macros"] }
 serde.workspace = true
 serde_json.workspace = true
 candle-core.workspace = true
diff --git a/mistralrs-pyo3/Cargo_template.toml b/mistralrs-pyo3/Cargo_template.toml
index c6b3a5d6c..76626a08b 100644
--- a/mistralrs-pyo3/Cargo_template.toml
+++ b/mistralrs-pyo3/Cargo_template.toml
@@ -17,7 +17,7 @@ doc = false
 
 [dependencies]
 pyo3.workspace = true
-mistralrs-core = { version = "0.1.17", path = "../mistralrs-core", features=["pyo3_macros","$feature_name"] }
+mistralrs-core = { version = "0.1.18", path = "../mistralrs-core", features=["pyo3_macros","$feature_name"] }
 serde.workspace = true
 serde_json.workspace = true
 candle-core = { git = "https://github.com/EricLBuehler/candle.git", version = "0.6.0", rev = "f52e2347b6237d19ffd7af26315f543c22f9f286", features=["$feature_name"] }
@@ -27,6 +27,9 @@ intel-mkl-src = { workspace = true, optional = true }
 either.workspace = true
 futures.workspace = true
 tokio.workspace = true
+image.workspace = true
+reqwest.workspace = true
+base64.workspace = true
 
 [build-dependencies]
 pyo3-build-config = "0.21"
diff --git a/mistralrs-pyo3/README.md b/mistralrs-pyo3/README.md
index 5e058da37..aef3faac8 100644
--- a/mistralrs-pyo3/README.md
+++ b/mistralrs-pyo3/README.md
@@ -21,19 +21,19 @@ sudo apt install pkg-config
 
 - CUDA
 
-  `pip install mistralrs-cuda`
+  `pip install mistralrs-cuda -v`
 - Metal
 
-  `pip install mistralrs-metal`
+  `pip install mistralrs-metal -v`
 - Apple Accelerate
 
-  `pip install mistralrs-accelerate`
+  `pip install mistralrs-accelerate -v`
 - Intel MKL
 
-  `pip install mistralrs-mkl`
+  `pip install mistralrs-mkl -v`
 - Without accelerators
 
-  `pip install mistralrs`
+  `pip install mistralrs -v`
 
 All installations will install the `mistralrs` package. The suffix on the package installed by `pip` only controls the feature activation.
 
diff --git a/mistralrs-pyo3/pyproject.toml b/mistralrs-pyo3/pyproject.toml
index 29b66ebb0..bf036b698 100644
--- a/mistralrs-pyo3/pyproject.toml
+++ b/mistralrs-pyo3/pyproject.toml
@@ -4,7 +4,7 @@ build-backend = "maturin"
 
 [project]
 name = "mistralrs"
-version = "0.1.17"
+version = "0.1.18"
 requires-python = ">=3.8"
 classifiers = [
     "Programming Language :: Rust",
@@ -18,3 +18,4 @@ dynamic = ["description"]
 
 [tool.maturin]
 features = ["pyo3/extension-module"]
+profile = "release"
diff --git a/mistralrs-pyo3/pyproject_template.toml b/mistralrs-pyo3/pyproject_template.toml
index 969f987d1..07f2a4920 100644
--- a/mistralrs-pyo3/pyproject_template.toml
+++ b/mistralrs-pyo3/pyproject_template.toml
@@ -4,7 +4,7 @@ build-backend = "maturin"
 
 [project]
 name = "$name"
-version = "0.1.17"
+version = "0.1.18"
 requires-python = ">=3.8"
 classifiers = [
     "Programming Language :: Rust",
@@ -18,3 +18,4 @@ dynamic = ["description"]
 
 [tool.maturin]
 features = ["pyo3/extension-module"]
+profile = "release"
diff --git a/mistralrs-pyo3/src/lib.rs b/mistralrs-pyo3/src/lib.rs
index 4cb99e989..8fa68fedc 100644
--- a/mistralrs-pyo3/src/lib.rs
+++ b/mistralrs-pyo3/src/lib.rs
@@ -4,11 +4,12 @@ use base64::{engine::general_purpose, Engine};
 use candle_core::{quantized::GgmlDType, Result};
 use either::Either;
 use indexmap::IndexMap;
-use reqwest::StatusCode;
 use std::{
     cell::RefCell,
     collections::HashMap,
     fmt::Debug,
+    fs,
+    io::Read,
     str::FromStr,
     sync::{Arc, Mutex},
 };
@@ -603,22 +604,29 @@ impl Runner {
                         if !image_urls.is_empty() {
                             let mut images = Vec::new();
                             for url in image_urls {
-                                let bytes = match reqwest::blocking::get(url.clone()) {
-                                    Ok(http_resp) => http_resp
-                                        .bytes()
-                                        .map_err(|e| PyValueError::new_err(e.to_string()))?
-                                        .to_vec(),
-                                    Err(e) => {
-                                        if e.status()
-                                            .is_some_and(|code| code == StatusCode::NOT_FOUND)
-                                        {
-                                            general_purpose::STANDARD
-                                                .decode(url)
-                                                .map_err(|e| PyValueError::new_err(e.to_string()))?
-                                        } else {
-                                            return Err(PyValueError::new_err(e.to_string()));
+                                let bytes = if url.contains("http") {
+                                    // Read from http
+                                    match reqwest::blocking::get(url.clone()) {
+                                        Ok(http_resp) => http_resp
+                                            .bytes()
+                                            .map_err(|e| PyValueError::new_err(e.to_string()))?
+                                            .to_vec(),
+                                        Err(e) => {
+                                            return Err(PyValueError::new_err(format!("{e}")))
                                         }
                                     }
+                                } else if let Ok(mut f) = File::open(&url) {
+                                    // Read from local file
+                                    let metadata = fs::metadata(&url)
+                                        .map_err(|e| PyValueError::new_err(e.to_string()))?;
+                                    let mut buffer = vec![0; metadata.len() as usize];
+                                    f.read_exact(&mut buffer)?;
+                                    buffer
+                                } else {
+                                    // Decode with base64
+                                    general_purpose::STANDARD
+                                        .decode(url)
+                                        .map_err(|e| PyValueError::new_err(e.to_string()))?
                                 };
                                 images.push(
                                     image::load_from_memory(&bytes)
diff --git a/mistralrs-server/Cargo.toml b/mistralrs-server/Cargo.toml
index bb683b1a4..625fad0fd 100644
--- a/mistralrs-server/Cargo.toml
+++ b/mistralrs-server/Cargo.toml
@@ -22,7 +22,7 @@ axum = { version = "0.7.4", features = ["tokio"] }
 tower-http = { version = "0.5.1", features = ["cors"]}
 utoipa = { version = "4.2", features = ["axum_extras"] }
 utoipa-swagger-ui = { version = "7.1.0", features = ["axum"]}
-mistralrs-core = { version = "0.1.17", path = "../mistralrs-core" }
+mistralrs-core = { version = "0.1.18", path = "../mistralrs-core" }
 indexmap.workspace = true
 accelerate-src = { workspace = true, optional = true }
 intel-mkl-src = { workspace = true, optional = true }
diff --git a/mistralrs-server/src/chat_completion.rs b/mistralrs-server/src/chat_completion.rs
index cd4d4519e..fe6089c2a 100644
--- a/mistralrs-server/src/chat_completion.rs
+++ b/mistralrs-server/src/chat_completion.rs
@@ -3,6 +3,8 @@ use std::{
     collections::HashMap,
     env,
     error::Error,
+    fs::{self, File},
+    io::Read,
     ops::Deref,
     pin::Pin,
     sync::Arc,
@@ -256,15 +258,21 @@ async fn parse_request(
             if !image_urls.is_empty() {
                 let mut images = Vec::new();
                 for url in image_urls {
-                    let bytes = match reqwest::get(url.clone()).await {
-                        Ok(http_resp) => http_resp.bytes().await?.to_vec(),
-                        Err(e) => {
-                            if e.status().is_some_and(|code| code == StatusCode::NOT_FOUND) {
-                                general_purpose::STANDARD.decode(url)?
-                            } else {
-                                anyhow::bail!(e)
-                            }
+                    let bytes = if url.contains("http") {
+                        // Read from http
+                        match reqwest::get(url.clone()).await {
+                            Ok(http_resp) => http_resp.bytes().await?.to_vec(),
+                            Err(e) => anyhow::bail!(e),
                         }
+                    } else if let Ok(mut f) = File::open(&url) {
+                        // Read from local file
+                        let metadata = fs::metadata(&url)?;
+                        let mut buffer = vec![0; metadata.len() as usize];
+                        f.read_exact(&mut buffer)?;
+                        buffer
+                    } else {
+                        // Decode with base64
+                        general_purpose::STANDARD.decode(url)?
                     };
                     images.push(image::load_from_memory(&bytes)?);
                 }
diff --git a/mistralrs-server/src/main.rs b/mistralrs-server/src/main.rs
index 73a7277a9..609fd0c51 100644
--- a/mistralrs-server/src/main.rs
+++ b/mistralrs-server/src/main.rs
@@ -1,6 +1,6 @@
 use anyhow::Result;
 use axum::{
-    extract::{Json, State},
+    extract::{DefaultBodyLimit, Json, State},
     http::{self, Method},
     routing::{get, post},
     Router,
@@ -28,6 +28,10 @@ use tracing::{info, warn};
 use utoipa::{OpenApi, ToSchema};
 use utoipa_swagger_ui::SwaggerUi;
 
+// NOTE(EricLBuehler): Accept up to 50mb input
+const N_INPUT_SIZE: usize = 50;
+const MB_TO_B: usize = 1024 * 1024; // 1024 kb in a mb
+
 fn parse_token_source(s: &str) -> Result<TokenSource, String> {
     s.parse()
 }
@@ -223,6 +227,7 @@ fn get_router(state: Arc<MistralRs>) -> Router {
         .route("/", get(health))
         .route("/activate_adapters", post(activate_adapters))
         .route("/re_isq", post(re_isq))
+        .layer(DefaultBodyLimit::max(N_INPUT_SIZE * MB_TO_B))
         .with_state(state)
 }
 
diff --git a/mistralrs/Cargo.toml b/mistralrs/Cargo.toml
index 74462594a..af1c2baf1 100644
--- a/mistralrs/Cargo.toml
+++ b/mistralrs/Cargo.toml
@@ -12,7 +12,7 @@ license.workspace = true
 homepage.workspace = true
 
 [dependencies]
-mistralrs-core = { version = "0.1.17", path = "../mistralrs-core" }
+mistralrs-core = { version = "0.1.18", path = "../mistralrs-core" }
 anyhow.workspace = true
 tokio.workspace = true
 candle-core.workspace = true