microsoft · fionabos · Jul 2, 2024 · Jul 3, 2024 · Jul 11, 2024 · Jul 15, 2024
diff --git a/.gitignore b/.gitignore
@@ -437,3 +437,5 @@ config.json
 .ipynb_aml_checkpoints
 *.ipynb.amltmp
 
+## Android Studio IDE files
+.idea/
diff --git a/mobile/examples/phi-3-vision/android/android/.gitignore b/mobile/examples/phi-3-vision/android/android/.gitignore
@@ -0,0 +1,10 @@
+*.iml
+.gradle
+.idea
+.DS_Store
+build
+/captures
+.externalNativeBuild
+.cxx
+local.properties
+
diff --git a/mobile/examples/phi-3-vision/android/android/README.md b/mobile/examples/phi-3-vision/android/android/README.md
@@ -0,0 +1,136 @@
+# Local Chatbot on Android with Phi-3 Vision, ONNX Runtime Mobile and ONNX Runtime Generate() API
+
+## Overview
+
+This is a basic [Phi-3 Vision](https://huggingface.co/microsoft/Phi-3-vision-128k-instruct-onnx-cpu) Android example application using [ONNX Runtime mobile](https://onnxruntime.ai/docs/tutorials/mobile/) and [ONNX Runtime Generate() API](https://github.com/microsoft/onnxruntime-genai) with support for efficiently running generative AI models. This tutorial will walk you through how to download and run the Phi-3 Vision App on your own mobile device and help you incorporate Phi-3 Vision into your own mobile developments. 
+
+### Capabilities
+[Phi-3 Vision](https://huggingface.co/microsoft/Phi-3-vision-128k-instruct-onnx-cpu) is a multimodal model incorporating imaging into [Phi-3's](https://huggingface.co/microsoft/Phi-3-mini-4k-instruct) language input capabilities. This expands Phi-3's usages to include Optical Character Recognition (OCR), image captioning, table parsing, and more. 
+
+## Important Features
+
+### Java API
+This app uses the [generate() Java API's](https://github.com/microsoft/onnxruntime-genai/tree/main/src/java/src/main/java/ai/onnxruntime/genai) GenAIException, Generator, GeneratorParams, Images, Model, MultiModalProcessor, NamedTensors, and TokenizerStream classes ([documentation](https://onnxruntime.ai/docs/genai/api/java.html)). The [generate() C API](https://onnxruntime.ai/docs/genai/api/c.html), [generate() C# API](https://onnxruntime.ai/docs/genai/api/csharp.html), and [generate() Python API](https://onnxruntime.ai/docs/genai/api/python.html) are also available.
+
+### Model Downloads
+This app downloads the [Phi-3 Vision](https://huggingface.co/microsoft/Phi-3-vision-128k-instruct-onnx-cpu) model through Hugging Face. To use a different model, change the path links to refer to your chosen model.
+```java
+final String baseUrl = "https://huggingface.co/microsoft/Phi-3-vision-128k-instruct-onnx-cpu/resolve/main/cpu-int4-rtn-block-32-acc-level-4/";
+List<String> files = Arrays.asList(
+    "genai_config.json",
+    "phi-3-v-128k-instruct-text-embedding.onnx",
+    "phi-3-v-128k-instruct-text-embedding.onnx.data",
+    "phi-3-v-128k-instruct-text.onnx",
+    "phi-3-v-128k-instruct-text.onnx.data",
+    "phi-3-v-128k-instruct-vision.onnx",
+    "phi-3-v-128k-instruct-vision.onnx.data",
+    "processor_config.json",
+    "special_tokens_map.json",
+    "tokenizer.json",
+    "tokenizer_config.json");
+```
+These packages will only need to be downloaded once. While editing your app and running new versions, the downloads will skip since all files already exist.
+```java
+if (urlFilePairs.isEmpty()) {
+    // Display a message using Toast
+    Toast.makeText(this, "All files already exist. Skipping download.", Toast.LENGTH_SHORT).show();
+    Log.d(TAG, "All files already exist. Skipping download.");
+    model = new Model(getFilesDir().getPath());
+    multiModalProcessor = new MultiModalProcessor(model);
+    return;
+}
+```
+### Crash Prevention
+Downloading the packages for the app on your mobile device takes ~15-30 minutes depending on which device you are using. The progress bar indicates what percent of the downloads are completed. 
+```java
+public void onProgress(long lastBytesRead, long bytesRead, long bytesTotal) {
+    long lastPctDone = 100 * lastBytesRead / bytesTotal;
+    long pctDone = 100 * bytesRead / bytesTotal;
+    if (pctDone > lastPctDone) {
+        Log.d(TAG, "Downloading files: " + pctDone + "%");
+        runOnUiThread(() -> {
+            progressText.setText("Downloading: " + pctDone + "%");
+        });
+    }
+}
+```
+Because the app is initialized when downloads start, the 'send' button for prompts is disabled until downloads are complete to prevent crashing.
+```java
+if (model == null) {
+    // if the edit text is empty display a toast message.
+    Toast.makeText(MainActivity.this, "Model not loaded yet, please wait...", Toast.LENGTH_SHORT).show();
+    return;
+}
+```
+### Multimodal Processor
+Since we are using Phi-3 Vision, we refer to the [MultiModalProcessor Class]() to include imaging as well as text input. In an application with no imaging, you can use the [Tokenizer Class](https://github.com/microsoft/onnxruntime-genai/blob/main/src/java/src/main/java/ai/onnxruntime/genai/Tokenizer.java) instead.
+
+### Prompt Template
+On its own, this model's answers can be very long. To format the AI assistant's answers, you can adjust the prompt template. 
+```java
+String promptQuestion = "<|user|>\n";
+if (inputImage != null) {
+    promptQuestion += "<|image_1|>\n";
+}
+promptQuestion += userMsgEdt.getText().toString() + "<system>You are a helpful AI assistant. Answer in two paragraphs or less<|end|>\n<|assistant|>\n";
+final String promptQuestion_formatted = promptQuestion;
+
+Log.i("GenAI: prompt question", promptQuestion_formatted);
+```
+You can also include [parameters](https://huggingface.co/microsoft/Phi-3-vision-128k-instruct-onnx-cpu/blob/main/cpu-int4-rtn-block-32-acc-level-4/genai_config.json) such as a max_length or length_penalty to your liking. 
+```java
+generatorParams.setSearchOption("length_penalty", 1000);
+generatorParams.setSearchOption("max_length", 500);
+```
+NOTE: Including a max_length will cut off the assistant's answer once reaching the maximum number of tokens rather than formatting a complete response.
+
+
+### Requirements
+- [Android Studio](https://developer.android.com/studio) Giraffe | 2022.3.1 or later (installed on Mac/Windows/Linux)
+- Android SDK 29+
+- Android NDK r22+
+- An Android device or an Android Emulator
+
+## Build And Run
+
+### Step 1: Clone the ONNX runtime mobile examples source code
+
+Clone this repository to get the sample application. 
+
+`[email protected]:microsoft/onnxruntime-inference-examples.git`
+
+### [Optional] Step 2: Prepare the model
+
+The current set up supports downloading Phi-3-mini model directly from Huggingface repo to the android device folder. However, it takes time since the model data is >2.5G.
+
+You can also download [**Phi-3-Vision**](https://huggingface.co/microsoft/Phi-3-vision-128k-instruct-onnx-cpu/tree/main)
+and manually copy to the android device file directory following the below instructions:
+
+#### Steps for manual copying models to android device directory:
+From Android Studio:
+  - create (if necessary) and run your emulator/device
+    - make sure it has at least 8GB of internal storage
+  - debug/run the app so it's deployed to the device and creates it's `files` directory
+    - expected to be `/data/data/ai.onnxruntime.genai.vision.demo/files`
+      - this is the path returned by `getFilesDir()`
+  - Open Device Explorer in Android Studio
+  - Navigate to `/data/data/ai.onnxruntime.genai.vision.demo/files`
+    - adjust as needed if the value returned by getFilesDir() differs for your emulator or device
+  - copy the whole [phi-3](https://huggingface.co/microsoft/Phi-3-vision-128k-instruct-onnx-cpu/tree/main) model folder to the `files` directory
+
+### Step 3: Connect Android Device and Run the app
+  Connect your Android Device to your computer or select the Android Emulator in Android Studio Device manager.
+
+  Then select `Run -> Run app` and this will prompt the app to be built and installed on your device or emulator.
+
+  Now you can try giving some sample prompt questions and test the chatbot android app by clicking the ">" action button.
+
+#
+Here are some sample example screenshots of the app.
+
+<img width=20% src="images/Local_LLM_1.jpg" alt="App Screenshot 1" />
+
+<img width=20% src="images/Local_LLM_2.jpg" alt="App Screenshot 2" />
+
+<img width=20% src="images/Local_LLM_3.jpg" alt="App Screenshot 3" />
+
diff --git a/mobile/examples/phi-3-vision/android/android/app/.gitignore b/mobile/examples/phi-3-vision/android/android/app/.gitignore
@@ -0,0 +1 @@
+/build
diff --git a/mobile/examples/phi-3-vision/android/android/app/build.gradle.kts b/mobile/examples/phi-3-vision/android/android/app/build.gradle.kts
@@ -0,0 +1,58 @@
+plugins {
+    id("com.android.application")
+}
+
+android {
+    namespace = "ai.onnxruntime.genai.vision.demo"
+    compileSdk = 33
+
+    defaultConfig {
+        applicationId = "ai.onnxruntime.genai.vision.demo"
+        minSdk = 27
+        targetSdk = 33
+        versionCode = 1
+        versionName = "1.0"
+
+        testInstrumentationRunner = "androidx.test.runner.AndroidJUnitRunner"
+
+        ndk {
+            //noinspection ChromeOsAbiSupport
+            abiFilters += listOf("arm64-v8a", "x86_64")
+        }
+    }
+
+    buildTypes {
+        release {
+            isMinifyEnabled = false
+            proguardFiles(
+                getDefaultProguardFile("proguard-android-optimize.txt"),
+                "proguard-rules.pro"
+            )
+        }
+    }
+
+    compileOptions {
+        sourceCompatibility = JavaVersion.VERSION_1_8
+        targetCompatibility = JavaVersion.VERSION_1_8
+    }
+
+    buildFeatures {
+        viewBinding = true
+    }
+}
+
+dependencies {
+
+    implementation("androidx.appcompat:appcompat:1.6.1")
+    implementation("com.google.android.material:material:1.9.0")
+    implementation("androidx.constraintlayout:constraintlayout:2.1.4")
+    testImplementation("junit:junit:4.13.2")
+    androidTestImplementation("androidx.test.ext:junit:1.1.5")
+    androidTestImplementation("androidx.test.espresso:espresso-core:3.5.1")
+
+    // ONNX Runtime with GenAI
+    implementation("com.microsoft.onnxruntime:onnxruntime-android:latest.release")
+    //implementation(files("libs/onnxruntime-genai-android-0.4.1-dev.aar"))
+    //implementation(files("F:/onnx/onnxruntime/build/Android/Release/java/build/android/outputs/aar/onnxruntime-release.aar"))
+    implementation(files("C:/Users/t-boskovicf/repos/onnxruntime-genai-fb/build/Android/Release/src/java/build/android/outputs/aar/onnxruntime-genai-release.aar"))
+}
diff --git a/...le/examples/phi-3-vision/android/android/app/libs/onnxruntime-genai-android-0.4.0-dev.aar b/...le/examples/phi-3-vision/android/android/app/libs/onnxruntime-genai-android-0.4.0-dev.aar
diff --git a/...xamples/phi-3-vision/android/android/app/libs/onnxruntime-genai-android-0.4.1-dev-old.aar b/...xamples/phi-3-vision/android/android/app/libs/onnxruntime-genai-android-0.4.1-dev-old.aar
diff --git a/...le/examples/phi-3-vision/android/android/app/libs/onnxruntime-genai-android-0.4.1-dev.aar b/...le/examples/phi-3-vision/android/android/app/libs/onnxruntime-genai-android-0.4.1-dev.aar
diff --git a/mobile/examples/phi-3-vision/android/android/app/proguard-rules.pro b/mobile/examples/phi-3-vision/android/android/app/proguard-rules.pro
@@ -0,0 +1,21 @@
+# Add project specific ProGuard rules here.
+# You can control the set of applied configuration files using the
+# proguardFiles setting in build.gradle.
+#
+# For more details, see
+#   http://developer.android.com/guide/developing/tools/proguard.html
+
+# If your project uses WebView with JS, uncomment the following
+# and specify the fully qualified class name to the JavaScript interface
+# class:
+#-keepclassmembers class fqcn.of.javascript.interface.for.webview {
+#   public *;
+#}
+
+# Uncomment this to preserve the line number information for
+# debugging stack traces.
+#-keepattributes SourceFile,LineNumberTable
+
+# If you keep the line number information, uncomment this to
+# hide the original source file name.
+#-renamesourcefileattribute SourceFile
diff --git a/...id/app/src/androidTest/java/ai/onnxruntime/genai/vision/demo/ExampleInstrumentedTest.java b/...id/app/src/androidTest/java/ai/onnxruntime/genai/vision/demo/ExampleInstrumentedTest.java
@@ -0,0 +1,26 @@
+package ai.onnxruntime.genai.vision.demo;
+
+import android.content.Context;
+
+import androidx.test.platform.app.InstrumentationRegistry;
+import androidx.test.ext.junit.runners.AndroidJUnit4;
+
+import org.junit.Test;
+import org.junit.runner.RunWith;
+
+import static org.junit.Assert.*;
+
+/**
+ * Instrumented test, which will execute on an Android device.
+ *
+ * @see <a href="http://d.android.com/tools/testing">Testing documentation</a>
+ */
+@RunWith(AndroidJUnit4.class)
+public class ExampleInstrumentedTest {
+    @Test
+    public void useAppContext() {
+        // Context of the app under test.
+        Context appContext = InstrumentationRegistry.getInstrumentation().getTargetContext();
+        assertEquals("ai.onnxruntime.genai.vision.demo", appContext.getPackageName());
+    }
+}
diff --git a/mobile/examples/phi-3-vision/android/android/app/src/main/AndroidManifest.xml b/mobile/examples/phi-3-vision/android/android/app/src/main/AndroidManifest.xml
@@ -0,0 +1,29 @@
+<?xml version="1.0" encoding="utf-8"?>
+<manifest xmlns:android="http://schemas.android.com/apk/res/android"
+    xmlns:tools="http://schemas.android.com/tools">
+
+    <uses-permission android:name="android.permission.INTERNET" />
+    <uses-permission android:name="android.permission.WRITE_EXTERNAL_STORAGE" />
+
+    <application
+        android:allowBackup="true"
+        android:dataExtractionRules="@xml/data_extraction_rules"
+        android:fullBackupContent="@xml/backup_rules"
+        android:icon="@mipmap/ic_launcher"
+        android:label="@string/app_name"
+        android:roundIcon="@mipmap/ic_launcher_round"
+        android:supportsRtl="true"
+        android:theme="@style/Theme.ORTGenAIDemo"
+        tools:targetApi="31">
+        <activity
+            android:name=".MainActivity"
+            android:exported="true">
+            <intent-filter>
+                <action android:name="android.intent.action.MAIN" />
+
+                <category android:name="android.intent.category.LAUNCHER" />
+            </intent-filter>
+        </activity>
+    </application>
+
+</manifest>
diff --git a/...le/examples/phi-3-vision/android/android/app/src/main/ic_launcher-playstore.png b/...le/examples/phi-3-vision/android/android/app/src/main/ic_launcher-playstore.png
diff --git a/...vision/android/android/app/src/main/java/ai/onnxruntime/genai/vision/demo/GenAIImage.java b/...vision/android/android/app/src/main/java/ai/onnxruntime/genai/vision/demo/GenAIImage.java
@@ -0,0 +1,60 @@
+package ai.onnxruntime.genai.vision.demo;
+
+import android.content.Context;
+import android.graphics.Bitmap;
+import android.graphics.BitmapFactory;
+import android.net.Uri;
+
+import java.io.FileNotFoundException;
+import java.io.FileOutputStream;
+import java.io.IOException;
+
+import ai.onnxruntime.genai.GenAIException;
+import ai.onnxruntime.genai.Images;
+
+public class GenAIImage {
+    Images images = null;
+    Bitmap bitmap = null;
+
+    GenAIImage(Context context, Uri uri, final int maxWidth, final int maxHeight) throws IOException, GenAIException {
+        Bitmap bmp = decodeUri(context, uri, maxWidth, maxHeight);
+        String filename = context.getFilesDir() + "/multimodalinput.png";
+        FileOutputStream out = new FileOutputStream(filename);
+        bmp.compress(Bitmap.CompressFormat.PNG, 100, out); // bmp is your Bitmap instance
+        // PNG is a lossless format, the compression factor (100) is ignored
+        images = new Images(filename);
+        images = new Images(filename);
+        bitmap = BitmapFactory.decodeFile(filename);
+    }
+
+    GenAIImage(Context context, Uri uri) throws IOException, GenAIException {
+        this(context, uri, 100000, 100000);
+    }
+
+    public Images getImages() {
+        return images;
+    }
+
+    public Bitmap getBitmap() { return bitmap; }
+
+    private static Bitmap decodeUri(Context c, Uri uri, final int maxWidth, final int maxHeight)
+            throws FileNotFoundException {
+        BitmapFactory.Options o = new BitmapFactory.Options();
+        o.inJustDecodeBounds = true;
+        BitmapFactory.decodeStream(c.getContentResolver().openInputStream(uri), null, o);
+
+        int width_tmp = o.outWidth
+                , height_tmp = o.outHeight;
+        int scale = 1;
+
+        while(width_tmp / 2 > maxWidth || height_tmp / 2 > maxHeight) {
+            width_tmp /= 2;
+            height_tmp /= 2;
+            scale *= 2;
+        }
+
+        BitmapFactory.Options o2 = new BitmapFactory.Options();
+        o2.inSampleSize = scale;
+        return BitmapFactory.decodeStream(c.getContentResolver().openInputStream(uri), null, o2);
+    }
+}