From b99986250a98f1d1505cd2adc5763aec5fa93912 Mon Sep 17 00:00:00 2001
From: Justin Lee <justinai@fb.com>
Date: Tue, 5 Nov 2024 10:18:36 -0800
Subject: [PATCH 1/6] storing commits locally for docs-improvement

---
 examples/agents/comprehensive-start.md    | 111 +++++++++++++++++++
 examples/agents/inference-fewshot.py      |  46 ++++++++
 examples/agents/inference-local-cloud.py  |  46 ++++++++
 examples/agents/inference-loop-history.py |  37 +++++++
 examples/agents/inference-loop.py         |  32 ++++++
 examples/agents/inference-streaming.py    |  36 +++++++
 examples/agents/inference.py              |  19 ++++
 examples/agents/inflation.py              |  21 ++--
 examples/agents/multi_turn.py             |   8 +-
 examples/agents/pdf-rag.ipynb             | 126 ++++++++++++++++++++++
 examples/agents/quickstart.md             |  99 +++++++++++++++++
 11 files changed, 569 insertions(+), 12 deletions(-)
 create mode 100644 examples/agents/comprehensive-start.md
 create mode 100644 examples/agents/inference-fewshot.py
 create mode 100644 examples/agents/inference-local-cloud.py
 create mode 100644 examples/agents/inference-loop-history.py
 create mode 100644 examples/agents/inference-loop.py
 create mode 100644 examples/agents/inference-streaming.py
 create mode 100644 examples/agents/inference.py
 create mode 100644 examples/agents/pdf-rag.ipynb
 create mode 100644 examples/agents/quickstart.md
diff --git a/examples/agents/comprehensive-start.md b/examples/agents/comprehensive-start.md
new file mode 100644
index 00000000..604c8756
--- /dev/null
+++ b/examples/agents/comprehensive-start.md
@@ -0,0 +1,111 @@
+
+# Getting Started with Llama Stack
+
+This guide will walk you through the steps to set up an end-to-end workflow with Llama Stack. It focuses on building a Llama Stack distribution and starting up a Llama Stack server. See our [documentation](../README.md) for more on Llama Stack's capabilities, or visit [llama-stack-apps](https://github.com/meta-llama/llama-stack-apps/tree/main) for example apps.
+
+## Installation
+
+The `llama` CLI tool helps you manage the Llama toolchain & agentic systems. After installing the `llama-stack` package, the `llama` command should be available in your path.
+
+You can install this repository in two ways:
+
+1. **Install as a package**:
+   Install directly from [PyPI](https://pypi.org/project/llama-stack/) with:
+   ```bash
+   pip install llama-stack
+   ```
+
+2. **Install from source**:
+   Follow these steps to install from the source code:
+   ```bash
+   mkdir -p ~/local
+   cd ~/local
+   git clone git@github.com:meta-llama/llama-stack.git
+
+   conda create -n stack python=3.10
+   conda activate stack
+
+   cd llama-stack
+   $CONDA_PREFIX/bin/pip install -e .
+   ```
+
+Refer to the [CLI Reference](./cli_reference.md) for details on Llama CLI commands.
+
+## Starting Up Llama Stack Server
+
+There are two ways to start the Llama Stack server:
+
+1. **Using Docker**:
+   We provide a pre-built Docker image of Llama Stack, available in the [distributions](../distributions/) folder.
+
+   > **Note:** For GPU inference, set environment variables to specify the local directory with your model checkpoints and enable GPU inference.
+   ```bash
+   export LLAMA_CHECKPOINT_DIR=~/.llama
+   ```
+   Download Llama models with:
+   ```
+   llama download --model-id Llama3.1-8B-Instruct
+   ```
+   Start a Docker container with:
+   ```bash
+   cd llama-stack/distributions/meta-reference-gpu
+   docker run -it -p 5000:5000 -v ~/.llama:/root/.llama -v ./run.yaml:/root/my-run.yaml --gpus=all distribution-meta-reference-gpu --yaml_config /root/my-run.yaml
+   ```
+
+   **Tip:** For remote providers, use `docker compose up` with scripts in the [distributions folder](../distributions/).
+
+2. **Build->Configure->Run via Conda**:
+   For development, build a LlamaStack distribution from scratch.
+
+   **`llama stack build`**
+   Enter build information interactively:
+   ```bash
+   llama stack build
+   ```
+
+   **`llama stack configure`**
+   Run `llama stack configure <name>` using the name from the build step.
+   ```bash
+   llama stack configure my-local-stack
+   ```
+
+   **`llama stack run`**
+   Start the server with:
+   ```bash
+   llama stack run my-local-stack
+   ```
+
+## Testing with Client
+
+After setup, test the server with a client:
+```bash
+cd /path/to/llama-stack
+conda activate <env>
+
+python -m llama_stack.apis.inference.client localhost 5000
+```
+
+You can also send a POST request:
+```bash
+curl http://localhost:5000/inference/chat_completion \
+-H "Content-Type: application/json" \
+-d '{
+    "model": "Llama3.1-8B-Instruct",
+    "messages": [
+        {"role": "system", "content": "You are a helpful assistant."},
+        {"role": "user", "content": "Write me a 2-sentence poem about the moon"}
+    ],
+    "sampling_params": {"temperature": 0.7, "seed": 42, "max_tokens": 512}
+}'
+```
+
+For testing safety, run:
+```bash
+python -m llama_stack.apis.safety.client localhost 5000
+```
+
+Check our client SDKs for various languages: [Python](https://github.com/meta-llama/llama-stack-client-python), [Node](https://github.com/meta-llama/llama-stack-client-node), [Swift](https://github.com/meta-llama/llama-stack-client-swift), and [Kotlin](https://github.com/meta-llama/llama-stack-client-kotlin).
+
+## Advanced Guides
+
+For more on custom Llama Stack distributions, refer to our [Building a Llama Stack Distribution](./building_distro.md) guide.
diff --git a/examples/agents/inference-fewshot.py b/examples/agents/inference-fewshot.py
new file mode 100644
index 00000000..e90b87c5
--- /dev/null
+++ b/examples/agents/inference-fewshot.py
@@ -0,0 +1,46 @@
+from llama_stack_client import LlamaStackClient
+from llama_stack_client.types import CompletionMessage, UserMessage
+from termcolor import cprint
+
+# Initialize the LlamaStackClient with the base URL for inference endpoint
+client = LlamaStackClient(base_url="http://localhost:5000")
+
+# Invoke chat_completion with the few-shot example set
+response = client.inference.chat_completion(
+    messages=[
+        UserMessage(content="Have shorter, spear-shaped ears.", role="user"),
+        CompletionMessage(
+            content="That's Alpaca!",
+            role="assistant",
+            stop_reason="end_of_message",
+            tool_calls=[],
+        ),
+        UserMessage(
+            content="Known for their calm nature and used as pack animals in mountainous regions.",
+            role="user",
+        ),
+        CompletionMessage(
+            content="That's Llama!",
+            role="assistant",
+            stop_reason="end_of_message",
+            tool_calls=[],
+        ),
+        UserMessage(
+            content="Has a straight, slender neck and is smaller in size compared to its relative.",
+            role="user",
+        ),
+        CompletionMessage(
+            content="That's Alpaca!",
+            role="assistant",
+            stop_reason="end_of_message",
+            tool_calls=[],
+        ),
+        UserMessage(
+            content="Generally taller and more robust, commonly seen as guard animals.",
+            role="user",
+        ),
+    ],
+    model="Llama3.2-11B-Vision-Instruct",
+)
+
+cprint(f"> Response: {response.completion_message.content}", "cyan")
diff --git a/examples/agents/inference-local-cloud.py b/examples/agents/inference-local-cloud.py
new file mode 100644
index 00000000..a1a727f6
--- /dev/null
+++ b/examples/agents/inference-local-cloud.py
@@ -0,0 +1,46 @@
+import asyncio
+
+import httpx
+from llama_stack_client import LlamaStackClient
+from llama_stack_client.lib.inference.event_logger import EventLogger
+from llama_stack_client.types import UserMessage
+from termcolor import cprint
+
+local_client = LlamaStackClient(base_url="http://localhost:5000")
+cloud_client = LlamaStackClient(base_url="http://localhost:5001")
+
+
+async def select_client() -> LlamaStackClient:
+    try:
+        async with httpx.AsyncClient() as http_client:
+            response = await http_client.get(f"{local_client.base_url}/health")
+            if response.status_code == 200:
+                cprint("Using local client.", "yellow")
+                return local_client
+    except httpx.RequestError:
+        pass
+    cprint("Local client unavailable. Switching to cloud client.", "yellow")
+    return cloud_client
+
+
+async def get_llama_response(stream: bool = True):
+    client = await select_client()
+    message = UserMessage(
+        content="hello world, write me a 2 sentence poem about the moon", role="user"
+    )
+    cprint(f"User> {message.content}", "green")
+
+    response = client.inference.chat_completion(
+        messages=[message],
+        model="Llama3.2-11B-Vision-Instruct",
+        stream=stream,
+    )
+
+    if not stream:
+        cprint(f"> Response: {response}", "cyan")
+    else:
+        async for log in EventLogger().log(response):
+            log.print()
+
+
+asyncio.run(get_llama_response())
diff --git a/examples/agents/inference-loop-history.py b/examples/agents/inference-loop-history.py
new file mode 100644
index 00000000..5dc61fc5
--- /dev/null
+++ b/examples/agents/inference-loop-history.py
@@ -0,0 +1,37 @@
+import asyncio
+
+from llama_stack_client import LlamaStackClient
+from llama_stack_client.types import UserMessage
+from termcolor import cprint
+
+client = LlamaStackClient(
+    base_url="http://localhost:5000",
+)
+
+
+async def chat_loop():
+    conversation_history = []
+
+    while True:
+        user_input = input("User> ")
+        if user_input.lower() in ["exit", "quit", "bye"]:
+            cprint("Ending conversation. Goodbye!", "yellow")
+            break
+
+        user_message = UserMessage(content=user_input, role="user")
+        conversation_history.append(user_message)
+
+        response = client.inference.chat_completion(
+            messages=conversation_history,
+            model="Llama3.2-11B-Vision-Instruct",
+        )
+
+        cprint(f"> Response: {response.completion_message.content}", "cyan")
+
+        assistant_message = UserMessage(
+            content=response.completion_message.content, role="user"
+        )
+        conversation_history.append(assistant_message)
+
+
+asyncio.run(chat_loop())
diff --git a/examples/agents/inference-loop.py b/examples/agents/inference-loop.py
new file mode 100644
index 00000000..031f22d5
--- /dev/null
+++ b/examples/agents/inference-loop.py
@@ -0,0 +1,32 @@
+import asyncio
+
+from llama_stack_client import LlamaStackClient
+from llama_stack_client.lib.inference.event_logger import EventLogger
+from llama_stack_client.types import UserMessage
+from termcolor import cprint
+
+client = LlamaStackClient(
+    base_url="http://localhost:5000",
+)
+
+
+async def chat_loop():
+    while True:
+
+        user_input = input("User> ")
+
+        if user_input.lower() in ["exit", "quit", "bye"]:
+            cprint("Ending conversation. Goodbye!", "yellow")
+            break
+
+        message = UserMessage(content=user_input, role="user")
+
+        response = client.inference.chat_completion(
+            messages=[message],
+            model="Llama3.2-11B-Vision-Instruct",
+        )
+
+        cprint(f"> Response: {response.completion_message.content}", "cyan")
+
+
+asyncio.run(chat_loop())
diff --git a/examples/agents/inference-streaming.py b/examples/agents/inference-streaming.py
new file mode 100644
index 00000000..85afbb4a
--- /dev/null
+++ b/examples/agents/inference-streaming.py
@@ -0,0 +1,36 @@
+import asyncio
+
+from llama_stack_client import LlamaStackClient
+from llama_stack_client.lib.inference.event_logger import EventLogger
+from llama_stack_client.types import UserMessage
+from termcolor import cprint
+
+
+async def run_main(stream: bool = True):
+    client = LlamaStackClient(
+        base_url=f"http://localhost:5000",
+    )
+
+    message = UserMessage(
+        content="hello world, write me a 2 sentence poem about the moon", role="user"
+    )
+    print(f"User>{message.content}", "green")
+
+    response = client.inference.chat_completion(
+        messages=[message],
+        model="Llama3.2-11B-Vision-Instruct",
+        stream=stream,
+    )
+
+    if not stream:
+        cprint(f"> Response: {response}", "cyan")
+    else:
+        async for log in EventLogger().log(response):
+            log.print()
+
+    models_response = client.models.list()
+    print(models_response)
+
+
+if __name__ == "__main__":
+    asyncio.run(run_main())
diff --git a/examples/agents/inference.py b/examples/agents/inference.py
new file mode 100644
index 00000000..ad781d40
--- /dev/null
+++ b/examples/agents/inference.py
@@ -0,0 +1,19 @@
+from llama_stack_client import LlamaStackClient
+from llama_stack_client.types import SystemMessage, UserMessage
+
+client = LlamaStackClient(
+    base_url="http://localhost:5000",
+)
+
+response = client.inference.chat_completion(
+    messages=[
+        SystemMessage(content="pretend you are a llama", role="system"),
+        UserMessage(
+            content="hello world, write me a 2 sentence poem about the moon",
+            role="user",
+        ),
+    ],
+    model="Llama3.2-11B-Vision-Instruct",
+)
+
+print(response.completion_message.content)
diff --git a/examples/agents/inflation.py b/examples/agents/inflation.py
index 635fd95f..e2af7a8a 100644
--- a/examples/agents/inflation.py
+++ b/examples/agents/inflation.py
@@ -14,10 +14,9 @@
 from llama_stack_client.types import Attachment, SamplingParams, UserMessage
 from llama_stack_client.types.agent_create_params import *  # noqa: F403
 from common.client_utils import *  # noqa: F403
+from examples.agents.multi_turn import execute_turns, prompt_to_turn
 from termcolor import cprint
 
-from .multi_turn import execute_turns, prompt_to_turn
-
 
 async def run_main(host: str, port: int, disable_safety: bool = False):
     api_keys = load_api_keys_from_env()
@@ -40,15 +39,15 @@ async def run_main(host: str, port: int, disable_safety: bool = False):
         agent_config=agent_config,
         custom_tools=[],
         turn_inputs=[
-            prompt_to_turn(
-                "Here is a csv, can you describe it ?",
-                attachments=[
-                    Attachment(
-                        content="https://raw.githubusercontent.com/meta-llama/llama-stack-apps/main/examples/resources/inflation.csv",
-                        mime_type="text/csv",
-                    ),
-                ],
-            ),
+            # prompt_to_turn(
+            #     "Here is a csv, can you describe it ?",
+            #     attachments=[
+            #         Attachment(
+            #             content="https://raw.githubusercontent.com/meta-llama/llama-stack-apps/main/examples/resources/inflation.csv",
+            #             mime_type="text/csv",
+            #         ),
+            #     ],
+            # ),
             prompt_to_turn("Which year ended with the highest inflation ?"),
             prompt_to_turn(
                 "What macro economic situations that led to such high inflation in that period?"
diff --git a/examples/agents/multi_turn.py b/examples/agents/multi_turn.py
index 8b20352a..e96c6ea7 100644
--- a/examples/agents/multi_turn.py
+++ b/examples/agents/multi_turn.py
@@ -10,10 +10,16 @@
 import sys
 from typing import List, Optional
 
-from pydantic import BaseModel
+
+# from llama_stack_client.lib.agents.agents import Attachment
+# from llama_stack.apis.agents import AgentConfig
+# from llama_stack.tools.custom.datatypes import CustomTool
 
 from common.client_utils import *  # noqa: F403
+
 from llama_stack_client.lib.agents.event_logger import EventLogger
+from llama_stack_client.types import Attachment, UserMessage
+from pydantic import BaseModel
 from termcolor import cprint
 
 
diff --git a/examples/agents/pdf-rag.ipynb b/examples/agents/pdf-rag.ipynb
new file mode 100644
index 00000000..62d53429
--- /dev/null
+++ b/examples/agents/pdf-rag.ipynb
@@ -0,0 +1,126 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Install required libraries if not already installed\n",
+    "# !pip install pdf2image\n",
+    "# !pip install pdfminer\n",
+    "# !pip install tqdm\n",
+    "# !pip install llama_stack_client\n",
+    "\n",
+    "# Required imports\n",
+    "from pdf2image import convert_from_path\n",
+    "from pdfminer.high_level import extract_text\n",
+    "from io import BytesIO\n",
+    "import base64\n",
+    "import os\n",
+    "import concurrent.futures\n",
+    "from tqdm import tqdm\n",
+    "import json\n",
+    "from llama_stack_client import LlamaStackClient\n",
+    "from llama_stack_client.types import SystemMessage, UserMessage\n",
+    "\n",
+    "# Function to convert PDF to images\n",
+    "def convert_doc_to_images(pdf_path):\n",
+    "    try:\n",
+    "        images = convert_from_path(pdf_path)\n",
+    "        return images\n",
+    "    except Exception as e:\n",
+    "        print(f\"Error converting PDF to images: {e}\")\n",
+    "        return []\n",
+    "\n",
+    "# Function to extract text from PDF\n",
+    "def extract_text_from_pdf(pdf_path):\n",
+    "    try:\n",
+    "        text = extract_text(pdf_path)\n",
+    "        return text\n",
+    "    except Exception as e:\n",
+    "        print(f\"Error extracting text from PDF: {e}\")\n",
+    "        return \"\"\n",
+    "\n",
+    "# Function to convert image to base64 for LlamaStack analysis\n",
+    "def get_img_uri(img):\n",
+    "    buffer = BytesIO()\n",
+    "    img.save(buffer, format=\"jpeg\")\n",
+    "    base64_image = base64.b64encode(buffer.getvalue()).decode(\"utf-8\")\n",
+    "    data_uri = f\"data:image/jpeg;base64,{base64_image}\"\n",
+    "    return data_uri\n",
+    "\n",
+    "# LlamaStack inference function\n",
+    "def analyze_image_llama_stack(img_uri, client):\n",
+    "    system_prompt = '''\n",
+    "    You will be provided with an image of a pdf page or a slide. Your goal is to describe the content of the image in detail.\n",
+    "    Do not mention the format of the image or page numbers, but focus on explaining the contents as if you are presenting it to a technical audience.\n",
+    "    '''\n",
+    "\n",
+    "    response = client.inference.chat_completion(\n",
+    "        messages=[\n",
+    "            SystemMessage(content=system_prompt, role=\"system\"),\n",
+    "            UserMessage(\n",
+    "                content=f\"Here is the image: {img_uri}\",\n",
+    "                role=\"user\",\n",
+    "            ),\n",
+    "        ],\n",
+    "        model=\"Llama3.2-11B-Vision-Instruct\",\n",
+    "    )\n",
+    "\n",
+    "    return response.completion_message.content\n",
+    "\n",
+    "# Function to process a single PDF and analyze its pages\n",
+    "def process_pdf(pdf_path, client):\n",
+    "    doc = {\"filename\": os.path.basename(pdf_path)}\n",
+    "\n",
+    "    # Extract text\n",
+    "    doc['text'] = extract_text_from_pdf(pdf_path)\n",
+    "\n",
+    "    # Convert to images\n",
+    "    images = convert_doc_to_images(pdf_path)\n",
+    "\n",
+    "    # Analyze images with LlamaStack\n",
+    "    if images:\n",
+    "        pages_description = []\n",
+    "        with concurrent.futures.ThreadPoolExecutor(max_workers=8) as executor:\n",
+    "            futures = [executor.submit(analyze_image_llama_stack, get_img_uri(img), client) for img in images[1:]]  # Skipping first page if desired\n",
+    "\n",
+    "            with tqdm(total=len(images) - 1) as pbar:\n",
+    "                for _ in concurrent.futures.as_completed(futures):\n",
+    "                    pbar.update(1)\n",
+    "\n",
+    "            for f in futures:\n",
+    "                result = f.result()\n",
+    "                pages_description.append(result)\n",
+    "\n",
+    "        doc['pages_description'] = pages_description\n",
+    "\n",
+    "    return doc\n",
+    "\n",
+    "# Initialize LlamaStack client\n",
+    "client = LlamaStackClient(base_url=\"http://localhost:5000\")  # Replace with your actual LlamaStack base URL\n",
+    "\n",
+    "# Example usage with your PDF file path\n",
+    "pdf_file_path = \"/mnt/data/your_pdf_file.pdf\"  # Adjust path as needed\n",
+    "\n",
+    "# Process the PDF and get the result\n",
+    "doc_data = process_pdf(pdf_file_path, client)\n",
+    "\n",
+    "# Saving result to JSON for later use\n",
+    "output_path = \"/mnt/data/processed_pdf_data.json\"\n",
+    "with open(output_path, 'w') as f:\n",
+    "    json.dump(doc_data, f)\n",
+    "\n",
+    "print(f\"Processed PDF data saved to {output_path}\")\n"
+   ]
+  }
+ ],
+ "metadata": {
+  "language_info": {
+   "name": "python"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
diff --git a/examples/agents/quickstart.md b/examples/agents/quickstart.md
new file mode 100644
index 00000000..465e2be2
--- /dev/null
+++ b/examples/agents/quickstart.md
@@ -0,0 +1,99 @@
+
+# Quickstart
+
+This guide will walk you through the steps to set up an end-to-end workflow with Llama Stack. It focuses on building a Llama Stack distribution and starting up a Llama Stack server. See our [documentation](../README.md) for more on Llama Stack's capabilities, or visit [llama-stack-apps](https://github.com/meta-llama/llama-stack-apps/tree/main) for example apps.
+
+
+## 0. Prerequsite
+Feel free to skip this step if you already have the prerequsite installed.
+
+1. conda (steps to install)
+2.
+
+
+## 1. Installation
+
+The `llama` CLI tool helps you manage the Llama toolchain & agentic systems. After installing the `llama-stack` package, the `llama` command should be available in your path.
+
+**Install as a package**:
+   Install directly from [PyPI](https://pypi.org/project/llama-stack/) with:
+   ```bash
+   pip install llama-stack
+   ```
+
+## 2. Download Llama models:
+
+
+   ```
+   llama download --model-id Llama3.1-8B-Instruct
+   ```
+   You will have to follow the instructions in the cli to complete the download, get a instant license here: URL to license.
+
+## 3. Build->Configure->Run via Conda:
+   For development, build a LlamaStack distribution from scratch.
+
+   **`llama stack build`**
+   Enter build information interactively:
+   ```bash
+   llama stack build
+   ```
+
+   **`llama stack configure`**
+   Run `llama stack configure <name>` using the name from the build step.
+   ```bash
+   llama stack configure my-local-stack
+   ```
+
+   **`llama stack run`**
+   Start the server with:
+   ```bash
+   llama stack run my-local-stack
+   ```
+
+## 4. Testing with Client
+
+After setup, test the server with a POST request:
+```bash
+curl http://localhost:5000/inference/chat_completion \
+-H "Content-Type: application/json" \
+-d '{
+    "model": "Llama3.1-8B-Instruct",
+    "messages": [
+        {"role": "system", "content": "You are a helpful assistant."},
+        {"role": "user", "content": "Write me a 2-sentence poem about the moon"}
+    ],
+    "sampling_params": {"temperature": 0.7, "seed": 42, "max_tokens": 512}
+}'
+```
+
+
+## 5. Inference
+
+After setup, test the server with a POST request:
+```bash
+curl http://localhost:5000/inference/chat_completion \
+-H "Content-Type: application/json" \
+-d '{
+    "model": "Llama3.1-8B-Instruct",
+    "messages": [
+        {"role": "system", "content": "You are a helpful assistant."},
+        {"role": "user", "content": "Write me a 2-sentence poem about the moon"}
+    ],
+    "sampling_params": {"temperature": 0.7, "seed": 42, "max_tokens": 512}
+}'
+```
+
+
+
+Check our client SDKs for various languages: [Python](https://github.com/meta-llama/llama-stack-client-python), [Node](https://github.com/meta-llama/llama-stack-client-node), [Swift](https://github.com/meta-llama/llama-stack-client-swift), and [Kotlin](https://github.com/meta-llama/llama-stack-client-kotlin).
+
+## Advanced Guides
+
+For more on custom Llama Stack distributions, refer to our [Building a Llama Stack Distribution](./building_distro.md) guide.
+
+
+## Next Steps:
+check out
+
+1.
+2.

From b2699b2720ffb2f291bccf0833d8e30f9c518b5c Mon Sep 17 00:00:00 2001
From: Justin Lee <justinai@fb.com>
Date: Mon, 11 Nov 2024 18:18:22 -0800
Subject: [PATCH 2/6] fix register and also message typedict in inference

---
 examples/inference/client.py             | 15 +++++++++++++--
 examples/inference/client_with_vision.py | 19 +++++++++++++------
 2 files changed, 26 insertions(+), 8 deletions(-)

diff --git a/examples/inference/client.py b/examples/inference/client.py
index 54d7aca4..39a028ab 100644
--- a/examples/inference/client.py
+++ b/examples/inference/client.py
@@ -19,9 +19,20 @@ async def run_main(host: str, port: int, stream: bool = True):
         base_url=f"http://{host}:{port}",
     )
 
-    message = UserMessage(
-        content="hello world, write me a 2 sentence poem about the moon", role="user"
+    client.models.register(
+        model={
+            "identifier": "Llama3.1-8B-Instruct",
+            "llama_model": "Llama3.1-8B-Instruct",
+            "provider_id": "meta-reference-0",
+            "metadata": {},
+        }
     )
+
+    message = {
+        "role": "user",
+        "content": "hello world, write me a 2 sentence poem about the moon",
+    }
+
     cprint(f"User>{message.content}", "green")
     response = client.inference.chat_completion(
         messages=[message],
diff --git a/examples/inference/client_with_vision.py b/examples/inference/client_with_vision.py
index 4b6ac097..544ca367 100644
--- a/examples/inference/client_with_vision.py
+++ b/examples/inference/client_with_vision.py
@@ -32,13 +32,20 @@ async def run_main(host: str, port: int, stream: bool = True):
 
     data_url = f"data:{mime_type};base64,{encoded_string}"
 
-    message = UserMessage(
-        role="user",
-        content=[
-            {"image": {"uri": data_url}},
-            "Describe what is in this image.",
-        ],
+    client.models.register(
+        model={
+            "identifier": "Llama3.2-11B-Vision-Instruct",
+            "llama_model": "Llama3.2-11B-Vision-Instruct",
+            "provider_id": "meta-reference-0",
+            "metadata": {},
+        }
     )
+
+    message = {
+        "role": "user",
+        "content": [{"image": {"uri": data_url}}, "Describe what is in this image."],
+    }
+
     cprint(f"User>{message.content}", "green")
     response = client.inference.chat_completion(
         messages=[message],

From fb633ee1aadc4bb021121d2772bce3e12d929b0b Mon Sep 17 00:00:00 2001
From: Justin Lee <justinlee38@outlook.com>
Date: Mon, 11 Nov 2024 18:36:37 -0800
Subject: [PATCH 3/6] Delete examples/agents/quickstart.md

---
 examples/agents/quickstart.md | 99 -----------------------------------
 1 file changed, 99 deletions(-)
 delete mode 100644 examples/agents/quickstart.md

diff --git a/examples/agents/quickstart.md b/examples/agents/quickstart.md
deleted file mode 100644
index 465e2be2..00000000
--- a/examples/agents/quickstart.md
+++ /dev/null
@@ -1,99 +0,0 @@
-
-# Quickstart
-
-This guide will walk you through the steps to set up an end-to-end workflow with Llama Stack. It focuses on building a Llama Stack distribution and starting up a Llama Stack server. See our [documentation](../README.md) for more on Llama Stack's capabilities, or visit [llama-stack-apps](https://github.com/meta-llama/llama-stack-apps/tree/main) for example apps.
-
-
-## 0. Prerequsite
-Feel free to skip this step if you already have the prerequsite installed.
-
-1. conda (steps to install)
-2.
-
-
-## 1. Installation
-
-The `llama` CLI tool helps you manage the Llama toolchain & agentic systems. After installing the `llama-stack` package, the `llama` command should be available in your path.
-
-**Install as a package**:
-   Install directly from [PyPI](https://pypi.org/project/llama-stack/) with:
-   ```bash
-   pip install llama-stack
-   ```
-
-## 2. Download Llama models:
-
-
-   ```
-   llama download --model-id Llama3.1-8B-Instruct
-   ```
-   You will have to follow the instructions in the cli to complete the download, get a instant license here: URL to license.
-
-## 3. Build->Configure->Run via Conda:
-   For development, build a LlamaStack distribution from scratch.
-
-   **`llama stack build`**
-   Enter build information interactively:
-   ```bash
-   llama stack build
-   ```
-
-   **`llama stack configure`**
-   Run `llama stack configure <name>` using the name from the build step.
-   ```bash
-   llama stack configure my-local-stack
-   ```
-
-   **`llama stack run`**
-   Start the server with:
-   ```bash
-   llama stack run my-local-stack
-   ```
-
-## 4. Testing with Client
-
-After setup, test the server with a POST request:
-```bash
-curl http://localhost:5000/inference/chat_completion \
--H "Content-Type: application/json" \
--d '{
-    "model": "Llama3.1-8B-Instruct",
-    "messages": [
-        {"role": "system", "content": "You are a helpful assistant."},
-        {"role": "user", "content": "Write me a 2-sentence poem about the moon"}
-    ],
-    "sampling_params": {"temperature": 0.7, "seed": 42, "max_tokens": 512}
-}'
-```
-
-
-## 5. Inference
-
-After setup, test the server with a POST request:
-```bash
-curl http://localhost:5000/inference/chat_completion \
--H "Content-Type: application/json" \
--d '{
-    "model": "Llama3.1-8B-Instruct",
-    "messages": [
-        {"role": "system", "content": "You are a helpful assistant."},
-        {"role": "user", "content": "Write me a 2-sentence poem about the moon"}
-    ],
-    "sampling_params": {"temperature": 0.7, "seed": 42, "max_tokens": 512}
-}'
-```
-
-
-
-Check our client SDKs for various languages: [Python](https://github.com/meta-llama/llama-stack-client-python), [Node](https://github.com/meta-llama/llama-stack-client-node), [Swift](https://github.com/meta-llama/llama-stack-client-swift), and [Kotlin](https://github.com/meta-llama/llama-stack-client-kotlin).
-
-## Advanced Guides
-
-For more on custom Llama Stack distributions, refer to our [Building a Llama Stack Distribution](./building_distro.md) guide.
-
-
-## Next Steps:
-check out
-
-1.
-2.

From cb8d4cbb8efa11bbed8aae32d05304ec32a9014a Mon Sep 17 00:00:00 2001
From: Justin Lee <justinai@fb.com>
Date: Mon, 11 Nov 2024 18:41:53 -0800
Subject: [PATCH 4/6] removed unnecessary files

---
 examples/agents/comprehensive-start.md    | 111 -------------------
 examples/agents/inference-fewshot.py      |  46 --------
 examples/agents/inference-local-cloud.py  |  46 --------
 examples/agents/inference-loop-history.py |  37 -------
 examples/agents/inference-loop.py         |  32 ------
 examples/agents/inference-streaming.py    |  36 -------
 examples/agents/inference.py              |  19 ----
 examples/agents/inflation.py              | 112 +++++++++++--------
 examples/agents/multi_turn.py             |  62 -----------
 examples/agents/pdf-rag.ipynb             | 126 ----------------------
 10 files changed, 70 insertions(+), 557 deletions(-)
 delete mode 100644 examples/agents/comprehensive-start.md
 delete mode 100644 examples/agents/inference-fewshot.py
 delete mode 100644 examples/agents/inference-local-cloud.py
 delete mode 100644 examples/agents/inference-loop-history.py
 delete mode 100644 examples/agents/inference-loop.py
 delete mode 100644 examples/agents/inference-streaming.py
 delete mode 100644 examples/agents/inference.py
 delete mode 100644 examples/agents/multi_turn.py
 delete mode 100644 examples/agents/pdf-rag.ipynb

diff --git a/examples/agents/comprehensive-start.md b/examples/agents/comprehensive-start.md
deleted file mode 100644
index 604c8756..00000000
--- a/examples/agents/comprehensive-start.md
+++ /dev/null
@@ -1,111 +0,0 @@
-
-# Getting Started with Llama Stack
-
-This guide will walk you through the steps to set up an end-to-end workflow with Llama Stack. It focuses on building a Llama Stack distribution and starting up a Llama Stack server. See our [documentation](../README.md) for more on Llama Stack's capabilities, or visit [llama-stack-apps](https://github.com/meta-llama/llama-stack-apps/tree/main) for example apps.
-
-## Installation
-
-The `llama` CLI tool helps you manage the Llama toolchain & agentic systems. After installing the `llama-stack` package, the `llama` command should be available in your path.
-
-You can install this repository in two ways:
-
-1. **Install as a package**:
-   Install directly from [PyPI](https://pypi.org/project/llama-stack/) with:
-   ```bash
-   pip install llama-stack
-   ```
-
-2. **Install from source**:
-   Follow these steps to install from the source code:
-   ```bash
-   mkdir -p ~/local
-   cd ~/local
-   git clone git@github.com:meta-llama/llama-stack.git
-
-   conda create -n stack python=3.10
-   conda activate stack
-
-   cd llama-stack
-   $CONDA_PREFIX/bin/pip install -e .
-   ```
-
-Refer to the [CLI Reference](./cli_reference.md) for details on Llama CLI commands.
-
-## Starting Up Llama Stack Server
-
-There are two ways to start the Llama Stack server:
-
-1. **Using Docker**:
-   We provide a pre-built Docker image of Llama Stack, available in the [distributions](../distributions/) folder.
-
-   > **Note:** For GPU inference, set environment variables to specify the local directory with your model checkpoints and enable GPU inference.
-   ```bash
-   export LLAMA_CHECKPOINT_DIR=~/.llama
-   ```
-   Download Llama models with:
-   ```
-   llama download --model-id Llama3.1-8B-Instruct
-   ```
-   Start a Docker container with:
-   ```bash
-   cd llama-stack/distributions/meta-reference-gpu
-   docker run -it -p 5000:5000 -v ~/.llama:/root/.llama -v ./run.yaml:/root/my-run.yaml --gpus=all distribution-meta-reference-gpu --yaml_config /root/my-run.yaml
-   ```
-
-   **Tip:** For remote providers, use `docker compose up` with scripts in the [distributions folder](../distributions/).
-
-2. **Build->Configure->Run via Conda**:
-   For development, build a LlamaStack distribution from scratch.
-
-   **`llama stack build`**
-   Enter build information interactively:
-   ```bash
-   llama stack build
-   ```
-
-   **`llama stack configure`**
-   Run `llama stack configure <name>` using the name from the build step.
-   ```bash
-   llama stack configure my-local-stack
-   ```
-
-   **`llama stack run`**
-   Start the server with:
-   ```bash
-   llama stack run my-local-stack
-   ```
-
-## Testing with Client
-
-After setup, test the server with a client:
-```bash
-cd /path/to/llama-stack
-conda activate <env>
-
-python -m llama_stack.apis.inference.client localhost 5000
-```
-
-You can also send a POST request:
-```bash
-curl http://localhost:5000/inference/chat_completion \
--H "Content-Type: application/json" \
--d '{
-    "model": "Llama3.1-8B-Instruct",
-    "messages": [
-        {"role": "system", "content": "You are a helpful assistant."},
-        {"role": "user", "content": "Write me a 2-sentence poem about the moon"}
-    ],
-    "sampling_params": {"temperature": 0.7, "seed": 42, "max_tokens": 512}
-}'
-```
-
-For testing safety, run:
-```bash
-python -m llama_stack.apis.safety.client localhost 5000
-```
-
-Check our client SDKs for various languages: [Python](https://github.com/meta-llama/llama-stack-client-python), [Node](https://github.com/meta-llama/llama-stack-client-node), [Swift](https://github.com/meta-llama/llama-stack-client-swift), and [Kotlin](https://github.com/meta-llama/llama-stack-client-kotlin).
-
-## Advanced Guides
-
-For more on custom Llama Stack distributions, refer to our [Building a Llama Stack Distribution](./building_distro.md) guide.
diff --git a/examples/agents/inference-fewshot.py b/examples/agents/inference-fewshot.py
deleted file mode 100644
index e90b87c5..00000000
--- a/examples/agents/inference-fewshot.py
+++ /dev/null
@@ -1,46 +0,0 @@
-from llama_stack_client import LlamaStackClient
-from llama_stack_client.types import CompletionMessage, UserMessage
-from termcolor import cprint
-
-# Initialize the LlamaStackClient with the base URL for inference endpoint
-client = LlamaStackClient(base_url="http://localhost:5000")
-
-# Invoke chat_completion with the few-shot example set
-response = client.inference.chat_completion(
-    messages=[
-        UserMessage(content="Have shorter, spear-shaped ears.", role="user"),
-        CompletionMessage(
-            content="That's Alpaca!",
-            role="assistant",
-            stop_reason="end_of_message",
-            tool_calls=[],
-        ),
-        UserMessage(
-            content="Known for their calm nature and used as pack animals in mountainous regions.",
-            role="user",
-        ),
-        CompletionMessage(
-            content="That's Llama!",
-            role="assistant",
-            stop_reason="end_of_message",
-            tool_calls=[],
-        ),
-        UserMessage(
-            content="Has a straight, slender neck and is smaller in size compared to its relative.",
-            role="user",
-        ),
-        CompletionMessage(
-            content="That's Alpaca!",
-            role="assistant",
-            stop_reason="end_of_message",
-            tool_calls=[],
-        ),
-        UserMessage(
-            content="Generally taller and more robust, commonly seen as guard animals.",
-            role="user",
-        ),
-    ],
-    model="Llama3.2-11B-Vision-Instruct",
-)
-
-cprint(f"> Response: {response.completion_message.content}", "cyan")
diff --git a/examples/agents/inference-local-cloud.py b/examples/agents/inference-local-cloud.py
deleted file mode 100644
index a1a727f6..00000000
--- a/examples/agents/inference-local-cloud.py
+++ /dev/null
@@ -1,46 +0,0 @@
-import asyncio
-
-import httpx
-from llama_stack_client import LlamaStackClient
-from llama_stack_client.lib.inference.event_logger import EventLogger
-from llama_stack_client.types import UserMessage
-from termcolor import cprint
-
-local_client = LlamaStackClient(base_url="http://localhost:5000")
-cloud_client = LlamaStackClient(base_url="http://localhost:5001")
-
-
-async def select_client() -> LlamaStackClient:
-    try:
-        async with httpx.AsyncClient() as http_client:
-            response = await http_client.get(f"{local_client.base_url}/health")
-            if response.status_code == 200:
-                cprint("Using local client.", "yellow")
-                return local_client
-    except httpx.RequestError:
-        pass
-    cprint("Local client unavailable. Switching to cloud client.", "yellow")
-    return cloud_client
-
-
-async def get_llama_response(stream: bool = True):
-    client = await select_client()
-    message = UserMessage(
-        content="hello world, write me a 2 sentence poem about the moon", role="user"
-    )
-    cprint(f"User> {message.content}", "green")
-
-    response = client.inference.chat_completion(
-        messages=[message],
-        model="Llama3.2-11B-Vision-Instruct",
-        stream=stream,
-    )
-
-    if not stream:
-        cprint(f"> Response: {response}", "cyan")
-    else:
-        async for log in EventLogger().log(response):
-            log.print()
-
-
-asyncio.run(get_llama_response())
diff --git a/examples/agents/inference-loop-history.py b/examples/agents/inference-loop-history.py
deleted file mode 100644
index 5dc61fc5..00000000
--- a/examples/agents/inference-loop-history.py
+++ /dev/null
@@ -1,37 +0,0 @@
-import asyncio
-
-from llama_stack_client import LlamaStackClient
-from llama_stack_client.types import UserMessage
-from termcolor import cprint
-
-client = LlamaStackClient(
-    base_url="http://localhost:5000",
-)
-
-
-async def chat_loop():
-    conversation_history = []
-
-    while True:
-        user_input = input("User> ")
-        if user_input.lower() in ["exit", "quit", "bye"]:
-            cprint("Ending conversation. Goodbye!", "yellow")
-            break
-
-        user_message = UserMessage(content=user_input, role="user")
-        conversation_history.append(user_message)
-
-        response = client.inference.chat_completion(
-            messages=conversation_history,
-            model="Llama3.2-11B-Vision-Instruct",
-        )
-
-        cprint(f"> Response: {response.completion_message.content}", "cyan")
-
-        assistant_message = UserMessage(
-            content=response.completion_message.content, role="user"
-        )
-        conversation_history.append(assistant_message)
-
-
-asyncio.run(chat_loop())
diff --git a/examples/agents/inference-loop.py b/examples/agents/inference-loop.py
deleted file mode 100644
index 031f22d5..00000000
--- a/examples/agents/inference-loop.py
+++ /dev/null
@@ -1,32 +0,0 @@
-import asyncio
-
-from llama_stack_client import LlamaStackClient
-from llama_stack_client.lib.inference.event_logger import EventLogger
-from llama_stack_client.types import UserMessage
-from termcolor import cprint
-
-client = LlamaStackClient(
-    base_url="http://localhost:5000",
-)
-
-
-async def chat_loop():
-    while True:
-
-        user_input = input("User> ")
-
-        if user_input.lower() in ["exit", "quit", "bye"]:
-            cprint("Ending conversation. Goodbye!", "yellow")
-            break
-
-        message = UserMessage(content=user_input, role="user")
-
-        response = client.inference.chat_completion(
-            messages=[message],
-            model="Llama3.2-11B-Vision-Instruct",
-        )
-
-        cprint(f"> Response: {response.completion_message.content}", "cyan")
-
-
-asyncio.run(chat_loop())
diff --git a/examples/agents/inference-streaming.py b/examples/agents/inference-streaming.py
deleted file mode 100644
index 85afbb4a..00000000
--- a/examples/agents/inference-streaming.py
+++ /dev/null
@@ -1,36 +0,0 @@
-import asyncio
-
-from llama_stack_client import LlamaStackClient
-from llama_stack_client.lib.inference.event_logger import EventLogger
-from llama_stack_client.types import UserMessage
-from termcolor import cprint
-
-
-async def run_main(stream: bool = True):
-    client = LlamaStackClient(
-        base_url=f"http://localhost:5000",
-    )
-
-    message = UserMessage(
-        content="hello world, write me a 2 sentence poem about the moon", role="user"
-    )
-    print(f"User>{message.content}", "green")
-
-    response = client.inference.chat_completion(
-        messages=[message],
-        model="Llama3.2-11B-Vision-Instruct",
-        stream=stream,
-    )
-
-    if not stream:
-        cprint(f"> Response: {response}", "cyan")
-    else:
-        async for log in EventLogger().log(response):
-            log.print()
-
-    models_response = client.models.list()
-    print(models_response)
-
-
-if __name__ == "__main__":
-    asyncio.run(run_main())
diff --git a/examples/agents/inference.py b/examples/agents/inference.py
deleted file mode 100644
index ad781d40..00000000
--- a/examples/agents/inference.py
+++ /dev/null
@@ -1,19 +0,0 @@
-from llama_stack_client import LlamaStackClient
-from llama_stack_client.types import SystemMessage, UserMessage
-
-client = LlamaStackClient(
-    base_url="http://localhost:5000",
-)
-
-response = client.inference.chat_completion(
-    messages=[
-        SystemMessage(content="pretend you are a llama", role="system"),
-        UserMessage(
-            content="hello world, write me a 2 sentence poem about the moon",
-            role="user",
-        ),
-    ],
-    model="Llama3.2-11B-Vision-Instruct",
-)
-
-print(response.completion_message.content)
diff --git a/examples/agents/inflation.py b/examples/agents/inflation.py
index e2af7a8a..abe845d1 100644
--- a/examples/agents/inflation.py
+++ b/examples/agents/inflation.py
@@ -4,60 +4,88 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-# This software may be used and distributed in accordance with the terms of the Llama 3 Community License Agreement.
-
 import asyncio
+import os
 
 import fire
 
-from llama_stack_client.types import Attachment, SamplingParams, UserMessage
-from llama_stack_client.types.agent_create_params import *  # noqa: F403
-from common.client_utils import *  # noqa: F403
-from examples.agents.multi_turn import execute_turns, prompt_to_turn
-from termcolor import cprint
+from llama_stack_client import LlamaStackClient
+from llama_stack_client.lib.agents.agent import Agent
+from llama_stack_client.lib.agents.event_logger import EventLogger
+from llama_stack_client.types import Attachment
+from llama_stack_client.types.agent_create_params import AgentConfig
 
 
 async def run_main(host: str, port: int, disable_safety: bool = False):
-    api_keys = load_api_keys_from_env()
-    tool_definitions = [
-        search_tool_defn(api_keys),
-        # Adding code_interpreter enables file analysis
-        AgentConfigToolCodeInterpreterToolDefinition(type="code_interpreter"),
-    ]
-
-    agent_config = await make_agent_config_with_custom_tools(
-        disable_safety=disable_safety,
-        tool_config=QuickToolConfig(
-            tool_definitions=tool_definitions,
-            custom_tools=[],
-            attachment_behavior="code_interpreter",
-        ),
+    client = LlamaStackClient(
+        base_url=f"http://{host}:{port}",
     )
 
-    await execute_turns(
-        agent_config=agent_config,
-        custom_tools=[],
-        turn_inputs=[
-            # prompt_to_turn(
-            #     "Here is a csv, can you describe it ?",
-            #     attachments=[
-            #         Attachment(
-            #             content="https://raw.githubusercontent.com/meta-llama/llama-stack-apps/main/examples/resources/inflation.csv",
-            #             mime_type="text/csv",
-            #         ),
-            #     ],
-            # ),
-            prompt_to_turn("Which year ended with the highest inflation ?"),
-            prompt_to_turn(
-                "What macro economic situations that led to such high inflation in that period?"
-            ),
-            prompt_to_turn("Plot average yearly inflation as a time series"),
+    input_shields = [] if disable_safety else ["llama_guard"]
+    output_shields = [] if disable_safety else ["llama_guard"]
+
+    agent_config = AgentConfig(
+        model="Llama3.1-8B-Instruct",
+        instructions="You are a helpful assistant",
+        sampling_params={
+            "strategy": "greedy",
+            "temperature": 1.0,
+            "top_p": 0.9,
+        },
+        tools=[
+            {
+                "type": "brave_search",
+                "engine": "brave",
+                "api_key": os.getenv("BRAVE_SEARCH_API_KEY"),
+            },
+            {
+                "type": "code_interpreter",
+            },
         ],
-        host=host,
-        port=port,
+        tool_choice="required",
+        tool_prompt_format="json",
+        input_shields=input_shields,
+        output_shields=output_shields,
+        enable_session_persistence=False,
     )
 
+    agent = Agent(client, agent_config)
+    session_id = agent.create_session("test-session")
+    print(f"Created session_id={session_id} for Agent({agent.agent_id})")
+
+    user_prompts = [
+        (
+            "Here is a csv, can you describe it ?",
+            [
+                Attachment(
+                    content="https://raw.githubusercontent.com/meta-llama/llama-stack-apps/main/examples/resources/inflation.csv",
+                    mime_type="test/csv",
+                )
+            ],
+        ),
+        ("Which year ended with the highest inflation ?", None),
+        (
+            "What macro economic situations that led to such high inflation in that period?",
+            None,
+        ),
+        ("Plot average yearly inflation as a time series", None),
+    ]
+
+    for prompt in user_prompts:
+        response = agent.create_turn(
+            messages=[
+                {
+                    "role": "user",
+                    "content": prompt[0],
+                }
+            ],
+            attachments=prompt[1],
+            session_id=session_id,
+        )
+
+        async for log in EventLogger().log(response):
+            log.print()
+
 
 def main(host: str, port: int, disable_safety: bool = False):
     asyncio.run(run_main(host, port, disable_safety))
diff --git a/examples/agents/multi_turn.py b/examples/agents/multi_turn.py
deleted file mode 100644
index e96c6ea7..00000000
--- a/examples/agents/multi_turn.py
+++ /dev/null
@@ -1,62 +0,0 @@
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the terms described in the LICENSE file in
-# the root directory of this source tree.
-
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-# This software may be used and distributed in accordance with the terms of the Llama 3 Community License Agreement.
-import os
-import sys
-from typing import List, Optional
-
-
-# from llama_stack_client.lib.agents.agents import Attachment
-# from llama_stack.apis.agents import AgentConfig
-# from llama_stack.tools.custom.datatypes import CustomTool
-
-from common.client_utils import *  # noqa: F403
-
-from llama_stack_client.lib.agents.event_logger import EventLogger
-from llama_stack_client.types import Attachment, UserMessage
-from pydantic import BaseModel
-from termcolor import cprint
-
-
-class UserTurnInput(BaseModel):
-    message: UserMessage
-    attachments: Optional[List[Attachment]] = None
-
-
-def prompt_to_turn(
-    content: str, attachments: Optional[List[Attachment]] = None
-) -> UserTurnInput:
-    return UserTurnInput(
-        message=UserMessage(content=content, role="user"), attachments=attachments
-    )
-
-
-async def execute_turns(
-    agent_config: AgentConfig,
-    custom_tools: List[CustomTool],
-    turn_inputs: List[UserTurnInput],
-    host: str = "localhost",
-    port: int = 5000,
-):
-    agent = await get_agent_with_custom_tools(
-        host=host,
-        port=port,
-        agent_config=agent_config,
-        custom_tools=custom_tools,
-    )
-    while len(turn_inputs) > 0:
-        turn = turn_inputs.pop(0)
-
-        iterator = agent.execute_turn(
-            [turn.message],
-            turn.attachments,
-        )
-        cprint(f"User> {turn.message.content}", color="white", attrs=["bold"])
-        async for log in EventLogger().log(iterator):
-            if log is not None:
-                log.print()
diff --git a/examples/agents/pdf-rag.ipynb b/examples/agents/pdf-rag.ipynb
deleted file mode 100644
index 62d53429..00000000
--- a/examples/agents/pdf-rag.ipynb
+++ /dev/null
@@ -1,126 +0,0 @@
-{
- "cells": [
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# Install required libraries if not already installed\n",
-    "# !pip install pdf2image\n",
-    "# !pip install pdfminer\n",
-    "# !pip install tqdm\n",
-    "# !pip install llama_stack_client\n",
-    "\n",
-    "# Required imports\n",
-    "from pdf2image import convert_from_path\n",
-    "from pdfminer.high_level import extract_text\n",
-    "from io import BytesIO\n",
-    "import base64\n",
-    "import os\n",
-    "import concurrent.futures\n",
-    "from tqdm import tqdm\n",
-    "import json\n",
-    "from llama_stack_client import LlamaStackClient\n",
-    "from llama_stack_client.types import SystemMessage, UserMessage\n",
-    "\n",
-    "# Function to convert PDF to images\n",
-    "def convert_doc_to_images(pdf_path):\n",
-    "    try:\n",
-    "        images = convert_from_path(pdf_path)\n",
-    "        return images\n",
-    "    except Exception as e:\n",
-    "        print(f\"Error converting PDF to images: {e}\")\n",
-    "        return []\n",
-    "\n",
-    "# Function to extract text from PDF\n",
-    "def extract_text_from_pdf(pdf_path):\n",
-    "    try:\n",
-    "        text = extract_text(pdf_path)\n",
-    "        return text\n",
-    "    except Exception as e:\n",
-    "        print(f\"Error extracting text from PDF: {e}\")\n",
-    "        return \"\"\n",
-    "\n",
-    "# Function to convert image to base64 for LlamaStack analysis\n",
-    "def get_img_uri(img):\n",
-    "    buffer = BytesIO()\n",
-    "    img.save(buffer, format=\"jpeg\")\n",
-    "    base64_image = base64.b64encode(buffer.getvalue()).decode(\"utf-8\")\n",
-    "    data_uri = f\"data:image/jpeg;base64,{base64_image}\"\n",
-    "    return data_uri\n",
-    "\n",
-    "# LlamaStack inference function\n",
-    "def analyze_image_llama_stack(img_uri, client):\n",
-    "    system_prompt = '''\n",
-    "    You will be provided with an image of a pdf page or a slide. Your goal is to describe the content of the image in detail.\n",
-    "    Do not mention the format of the image or page numbers, but focus on explaining the contents as if you are presenting it to a technical audience.\n",
-    "    '''\n",
-    "\n",
-    "    response = client.inference.chat_completion(\n",
-    "        messages=[\n",
-    "            SystemMessage(content=system_prompt, role=\"system\"),\n",
-    "            UserMessage(\n",
-    "                content=f\"Here is the image: {img_uri}\",\n",
-    "                role=\"user\",\n",
-    "            ),\n",
-    "        ],\n",
-    "        model=\"Llama3.2-11B-Vision-Instruct\",\n",
-    "    )\n",
-    "\n",
-    "    return response.completion_message.content\n",
-    "\n",
-    "# Function to process a single PDF and analyze its pages\n",
-    "def process_pdf(pdf_path, client):\n",
-    "    doc = {\"filename\": os.path.basename(pdf_path)}\n",
-    "\n",
-    "    # Extract text\n",
-    "    doc['text'] = extract_text_from_pdf(pdf_path)\n",
-    "\n",
-    "    # Convert to images\n",
-    "    images = convert_doc_to_images(pdf_path)\n",
-    "\n",
-    "    # Analyze images with LlamaStack\n",
-    "    if images:\n",
-    "        pages_description = []\n",
-    "        with concurrent.futures.ThreadPoolExecutor(max_workers=8) as executor:\n",
-    "            futures = [executor.submit(analyze_image_llama_stack, get_img_uri(img), client) for img in images[1:]]  # Skipping first page if desired\n",
-    "\n",
-    "            with tqdm(total=len(images) - 1) as pbar:\n",
-    "                for _ in concurrent.futures.as_completed(futures):\n",
-    "                    pbar.update(1)\n",
-    "\n",
-    "            for f in futures:\n",
-    "                result = f.result()\n",
-    "                pages_description.append(result)\n",
-    "\n",
-    "        doc['pages_description'] = pages_description\n",
-    "\n",
-    "    return doc\n",
-    "\n",
-    "# Initialize LlamaStack client\n",
-    "client = LlamaStackClient(base_url=\"http://localhost:5000\")  # Replace with your actual LlamaStack base URL\n",
-    "\n",
-    "# Example usage with your PDF file path\n",
-    "pdf_file_path = \"/mnt/data/your_pdf_file.pdf\"  # Adjust path as needed\n",
-    "\n",
-    "# Process the PDF and get the result\n",
-    "doc_data = process_pdf(pdf_file_path, client)\n",
-    "\n",
-    "# Saving result to JSON for later use\n",
-    "output_path = \"/mnt/data/processed_pdf_data.json\"\n",
-    "with open(output_path, 'w') as f:\n",
-    "    json.dump(doc_data, f)\n",
-    "\n",
-    "print(f\"Processed PDF data saved to {output_path}\")\n"
-   ]
-  }
- ],
- "metadata": {
-  "language_info": {
-   "name": "python"
-  }
- },
- "nbformat": 4,
- "nbformat_minor": 2
-}

From 8a03e4b9f30ade24e486445e6a47b7427a687324 Mon Sep 17 00:00:00 2001
From: Justin Lee <justinai@fb.com>
Date: Mon, 11 Nov 2024 18:44:45 -0800
Subject: [PATCH 5/6] revert changes

---
 examples/agents/inflation.py | 112 +++++++++++++----------------------
 1 file changed, 42 insertions(+), 70 deletions(-)

diff --git a/examples/agents/inflation.py b/examples/agents/inflation.py
index abe845d1..e2af7a8a 100644
--- a/examples/agents/inflation.py
+++ b/examples/agents/inflation.py
@@ -4,88 +4,60 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# This software may be used and distributed in accordance with the terms of the Llama 3 Community License Agreement.
+
 import asyncio
-import os
 
 import fire
 
-from llama_stack_client import LlamaStackClient
-from llama_stack_client.lib.agents.agent import Agent
-from llama_stack_client.lib.agents.event_logger import EventLogger
-from llama_stack_client.types import Attachment
-from llama_stack_client.types.agent_create_params import AgentConfig
+from llama_stack_client.types import Attachment, SamplingParams, UserMessage
+from llama_stack_client.types.agent_create_params import *  # noqa: F403
+from common.client_utils import *  # noqa: F403
+from examples.agents.multi_turn import execute_turns, prompt_to_turn
+from termcolor import cprint
 
 
 async def run_main(host: str, port: int, disable_safety: bool = False):
-    client = LlamaStackClient(
-        base_url=f"http://{host}:{port}",
-    )
+    api_keys = load_api_keys_from_env()
+    tool_definitions = [
+        search_tool_defn(api_keys),
+        # Adding code_interpreter enables file analysis
+        AgentConfigToolCodeInterpreterToolDefinition(type="code_interpreter"),
+    ]
 
-    input_shields = [] if disable_safety else ["llama_guard"]
-    output_shields = [] if disable_safety else ["llama_guard"]
+    agent_config = await make_agent_config_with_custom_tools(
+        disable_safety=disable_safety,
+        tool_config=QuickToolConfig(
+            tool_definitions=tool_definitions,
+            custom_tools=[],
+            attachment_behavior="code_interpreter",
+        ),
+    )
 
-    agent_config = AgentConfig(
-        model="Llama3.1-8B-Instruct",
-        instructions="You are a helpful assistant",
-        sampling_params={
-            "strategy": "greedy",
-            "temperature": 1.0,
-            "top_p": 0.9,
-        },
-        tools=[
-            {
-                "type": "brave_search",
-                "engine": "brave",
-                "api_key": os.getenv("BRAVE_SEARCH_API_KEY"),
-            },
-            {
-                "type": "code_interpreter",
-            },
+    await execute_turns(
+        agent_config=agent_config,
+        custom_tools=[],
+        turn_inputs=[
+            # prompt_to_turn(
+            #     "Here is a csv, can you describe it ?",
+            #     attachments=[
+            #         Attachment(
+            #             content="https://raw.githubusercontent.com/meta-llama/llama-stack-apps/main/examples/resources/inflation.csv",
+            #             mime_type="text/csv",
+            #         ),
+            #     ],
+            # ),
+            prompt_to_turn("Which year ended with the highest inflation ?"),
+            prompt_to_turn(
+                "What macro economic situations that led to such high inflation in that period?"
+            ),
+            prompt_to_turn("Plot average yearly inflation as a time series"),
         ],
-        tool_choice="required",
-        tool_prompt_format="json",
-        input_shields=input_shields,
-        output_shields=output_shields,
-        enable_session_persistence=False,
+        host=host,
+        port=port,
     )
 
-    agent = Agent(client, agent_config)
-    session_id = agent.create_session("test-session")
-    print(f"Created session_id={session_id} for Agent({agent.agent_id})")
-
-    user_prompts = [
-        (
-            "Here is a csv, can you describe it ?",
-            [
-                Attachment(
-                    content="https://raw.githubusercontent.com/meta-llama/llama-stack-apps/main/examples/resources/inflation.csv",
-                    mime_type="test/csv",
-                )
-            ],
-        ),
-        ("Which year ended with the highest inflation ?", None),
-        (
-            "What macro economic situations that led to such high inflation in that period?",
-            None,
-        ),
-        ("Plot average yearly inflation as a time series", None),
-    ]
-
-    for prompt in user_prompts:
-        response = agent.create_turn(
-            messages=[
-                {
-                    "role": "user",
-                    "content": prompt[0],
-                }
-            ],
-            attachments=prompt[1],
-            session_id=session_id,
-        )
-
-        async for log in EventLogger().log(response):
-            log.print()
-
 
 def main(host: str, port: int, disable_safety: bool = False):
     asyncio.run(run_main(host, port, disable_safety))

From 811e1176f8ce06b24c5624312bc504f968f197c1 Mon Sep 17 00:00:00 2001
From: Justin Lee <justinai@fb.com>
Date: Mon, 11 Nov 2024 18:45:26 -0800
Subject: [PATCH 6/6] revert changes

---
 examples/agents/inflation.py | 18 +++++++++---------
 1 file changed, 9 insertions(+), 9 deletions(-)

diff --git a/examples/agents/inflation.py b/examples/agents/inflation.py
index e2af7a8a..bdcfcc96 100644
--- a/examples/agents/inflation.py
+++ b/examples/agents/inflation.py
@@ -39,15 +39,15 @@ async def run_main(host: str, port: int, disable_safety: bool = False):
         agent_config=agent_config,
         custom_tools=[],
         turn_inputs=[
-            # prompt_to_turn(
-            #     "Here is a csv, can you describe it ?",
-            #     attachments=[
-            #         Attachment(
-            #             content="https://raw.githubusercontent.com/meta-llama/llama-stack-apps/main/examples/resources/inflation.csv",
-            #             mime_type="text/csv",
-            #         ),
-            #     ],
-            # ),
+            prompt_to_turn(
+                "Here is a csv, can you describe it ?",
+                attachments=[
+                    Attachment(
+                        content="https://raw.githubusercontent.com/meta-llama/llama-stack-apps/main/examples/resources/inflation.csv",
+                        mime_type="text/csv",
+                    ),
+                ],
+            ),
             prompt_to_turn("Which year ended with the highest inflation ?"),
             prompt_to_turn(
                 "What macro economic situations that led to such high inflation in that period?"