From b99986250a98f1d1505cd2adc5763aec5fa93912 Mon Sep 17 00:00:00 2001 From: Justin Lee Date: Tue, 5 Nov 2024 10:18:36 -0800 Subject: [PATCH 1/6] storing commits locally for docs-improvement --- examples/agents/comprehensive-start.md | 111 +++++++++++++++++++ examples/agents/inference-fewshot.py | 46 ++++++++ examples/agents/inference-local-cloud.py | 46 ++++++++ examples/agents/inference-loop-history.py | 37 +++++++ examples/agents/inference-loop.py | 32 ++++++ examples/agents/inference-streaming.py | 36 +++++++ examples/agents/inference.py | 19 ++++ examples/agents/inflation.py | 21 ++-- examples/agents/multi_turn.py | 8 +- examples/agents/pdf-rag.ipynb | 126 ++++++++++++++++++++++ examples/agents/quickstart.md | 99 +++++++++++++++++ 11 files changed, 569 insertions(+), 12 deletions(-) create mode 100644 examples/agents/comprehensive-start.md create mode 100644 examples/agents/inference-fewshot.py create mode 100644 examples/agents/inference-local-cloud.py create mode 100644 examples/agents/inference-loop-history.py create mode 100644 examples/agents/inference-loop.py create mode 100644 examples/agents/inference-streaming.py create mode 100644 examples/agents/inference.py create mode 100644 examples/agents/pdf-rag.ipynb create mode 100644 examples/agents/quickstart.md diff --git a/examples/agents/comprehensive-start.md b/examples/agents/comprehensive-start.md new file mode 100644 index 00000000..604c8756 --- /dev/null +++ b/examples/agents/comprehensive-start.md @@ -0,0 +1,111 @@ + +# Getting Started with Llama Stack + +This guide will walk you through the steps to set up an end-to-end workflow with Llama Stack. It focuses on building a Llama Stack distribution and starting up a Llama Stack server. See our [documentation](../README.md) for more on Llama Stack's capabilities, or visit [llama-stack-apps](https://github.com/meta-llama/llama-stack-apps/tree/main) for example apps. + +## Installation + +The `llama` CLI tool helps you manage the Llama toolchain & agentic systems. After installing the `llama-stack` package, the `llama` command should be available in your path. + +You can install this repository in two ways: + +1. **Install as a package**: + Install directly from [PyPI](https://pypi.org/project/llama-stack/) with: + ```bash + pip install llama-stack + ``` + +2. **Install from source**: + Follow these steps to install from the source code: + ```bash + mkdir -p ~/local + cd ~/local + git clone git@github.com:meta-llama/llama-stack.git + + conda create -n stack python=3.10 + conda activate stack + + cd llama-stack + $CONDA_PREFIX/bin/pip install -e . + ``` + +Refer to the [CLI Reference](./cli_reference.md) for details on Llama CLI commands. + +## Starting Up Llama Stack Server + +There are two ways to start the Llama Stack server: + +1. **Using Docker**: + We provide a pre-built Docker image of Llama Stack, available in the [distributions](../distributions/) folder. + + > **Note:** For GPU inference, set environment variables to specify the local directory with your model checkpoints and enable GPU inference. + ```bash + export LLAMA_CHECKPOINT_DIR=~/.llama + ``` + Download Llama models with: + ``` + llama download --model-id Llama3.1-8B-Instruct + ``` + Start a Docker container with: + ```bash + cd llama-stack/distributions/meta-reference-gpu + docker run -it -p 5000:5000 -v ~/.llama:/root/.llama -v ./run.yaml:/root/my-run.yaml --gpus=all distribution-meta-reference-gpu --yaml_config /root/my-run.yaml + ``` + + **Tip:** For remote providers, use `docker compose up` with scripts in the [distributions folder](../distributions/). + +2. **Build->Configure->Run via Conda**: + For development, build a LlamaStack distribution from scratch. + + **`llama stack build`** + Enter build information interactively: + ```bash + llama stack build + ``` + + **`llama stack configure`** + Run `llama stack configure ` using the name from the build step. + ```bash + llama stack configure my-local-stack + ``` + + **`llama stack run`** + Start the server with: + ```bash + llama stack run my-local-stack + ``` + +## Testing with Client + +After setup, test the server with a client: +```bash +cd /path/to/llama-stack +conda activate + +python -m llama_stack.apis.inference.client localhost 5000 +``` + +You can also send a POST request: +```bash +curl http://localhost:5000/inference/chat_completion \ +-H "Content-Type: application/json" \ +-d '{ + "model": "Llama3.1-8B-Instruct", + "messages": [ + {"role": "system", "content": "You are a helpful assistant."}, + {"role": "user", "content": "Write me a 2-sentence poem about the moon"} + ], + "sampling_params": {"temperature": 0.7, "seed": 42, "max_tokens": 512} +}' +``` + +For testing safety, run: +```bash +python -m llama_stack.apis.safety.client localhost 5000 +``` + +Check our client SDKs for various languages: [Python](https://github.com/meta-llama/llama-stack-client-python), [Node](https://github.com/meta-llama/llama-stack-client-node), [Swift](https://github.com/meta-llama/llama-stack-client-swift), and [Kotlin](https://github.com/meta-llama/llama-stack-client-kotlin). + +## Advanced Guides + +For more on custom Llama Stack distributions, refer to our [Building a Llama Stack Distribution](./building_distro.md) guide. diff --git a/examples/agents/inference-fewshot.py b/examples/agents/inference-fewshot.py new file mode 100644 index 00000000..e90b87c5 --- /dev/null +++ b/examples/agents/inference-fewshot.py @@ -0,0 +1,46 @@ +from llama_stack_client import LlamaStackClient +from llama_stack_client.types import CompletionMessage, UserMessage +from termcolor import cprint + +# Initialize the LlamaStackClient with the base URL for inference endpoint +client = LlamaStackClient(base_url="http://localhost:5000") + +# Invoke chat_completion with the few-shot example set +response = client.inference.chat_completion( + messages=[ + UserMessage(content="Have shorter, spear-shaped ears.", role="user"), + CompletionMessage( + content="That's Alpaca!", + role="assistant", + stop_reason="end_of_message", + tool_calls=[], + ), + UserMessage( + content="Known for their calm nature and used as pack animals in mountainous regions.", + role="user", + ), + CompletionMessage( + content="That's Llama!", + role="assistant", + stop_reason="end_of_message", + tool_calls=[], + ), + UserMessage( + content="Has a straight, slender neck and is smaller in size compared to its relative.", + role="user", + ), + CompletionMessage( + content="That's Alpaca!", + role="assistant", + stop_reason="end_of_message", + tool_calls=[], + ), + UserMessage( + content="Generally taller and more robust, commonly seen as guard animals.", + role="user", + ), + ], + model="Llama3.2-11B-Vision-Instruct", +) + +cprint(f"> Response: {response.completion_message.content}", "cyan") diff --git a/examples/agents/inference-local-cloud.py b/examples/agents/inference-local-cloud.py new file mode 100644 index 00000000..a1a727f6 --- /dev/null +++ b/examples/agents/inference-local-cloud.py @@ -0,0 +1,46 @@ +import asyncio + +import httpx +from llama_stack_client import LlamaStackClient +from llama_stack_client.lib.inference.event_logger import EventLogger +from llama_stack_client.types import UserMessage +from termcolor import cprint + +local_client = LlamaStackClient(base_url="http://localhost:5000") +cloud_client = LlamaStackClient(base_url="http://localhost:5001") + + +async def select_client() -> LlamaStackClient: + try: + async with httpx.AsyncClient() as http_client: + response = await http_client.get(f"{local_client.base_url}/health") + if response.status_code == 200: + cprint("Using local client.", "yellow") + return local_client + except httpx.RequestError: + pass + cprint("Local client unavailable. Switching to cloud client.", "yellow") + return cloud_client + + +async def get_llama_response(stream: bool = True): + client = await select_client() + message = UserMessage( + content="hello world, write me a 2 sentence poem about the moon", role="user" + ) + cprint(f"User> {message.content}", "green") + + response = client.inference.chat_completion( + messages=[message], + model="Llama3.2-11B-Vision-Instruct", + stream=stream, + ) + + if not stream: + cprint(f"> Response: {response}", "cyan") + else: + async for log in EventLogger().log(response): + log.print() + + +asyncio.run(get_llama_response()) diff --git a/examples/agents/inference-loop-history.py b/examples/agents/inference-loop-history.py new file mode 100644 index 00000000..5dc61fc5 --- /dev/null +++ b/examples/agents/inference-loop-history.py @@ -0,0 +1,37 @@ +import asyncio + +from llama_stack_client import LlamaStackClient +from llama_stack_client.types import UserMessage +from termcolor import cprint + +client = LlamaStackClient( + base_url="http://localhost:5000", +) + + +async def chat_loop(): + conversation_history = [] + + while True: + user_input = input("User> ") + if user_input.lower() in ["exit", "quit", "bye"]: + cprint("Ending conversation. Goodbye!", "yellow") + break + + user_message = UserMessage(content=user_input, role="user") + conversation_history.append(user_message) + + response = client.inference.chat_completion( + messages=conversation_history, + model="Llama3.2-11B-Vision-Instruct", + ) + + cprint(f"> Response: {response.completion_message.content}", "cyan") + + assistant_message = UserMessage( + content=response.completion_message.content, role="user" + ) + conversation_history.append(assistant_message) + + +asyncio.run(chat_loop()) diff --git a/examples/agents/inference-loop.py b/examples/agents/inference-loop.py new file mode 100644 index 00000000..031f22d5 --- /dev/null +++ b/examples/agents/inference-loop.py @@ -0,0 +1,32 @@ +import asyncio + +from llama_stack_client import LlamaStackClient +from llama_stack_client.lib.inference.event_logger import EventLogger +from llama_stack_client.types import UserMessage +from termcolor import cprint + +client = LlamaStackClient( + base_url="http://localhost:5000", +) + + +async def chat_loop(): + while True: + + user_input = input("User> ") + + if user_input.lower() in ["exit", "quit", "bye"]: + cprint("Ending conversation. Goodbye!", "yellow") + break + + message = UserMessage(content=user_input, role="user") + + response = client.inference.chat_completion( + messages=[message], + model="Llama3.2-11B-Vision-Instruct", + ) + + cprint(f"> Response: {response.completion_message.content}", "cyan") + + +asyncio.run(chat_loop()) diff --git a/examples/agents/inference-streaming.py b/examples/agents/inference-streaming.py new file mode 100644 index 00000000..85afbb4a --- /dev/null +++ b/examples/agents/inference-streaming.py @@ -0,0 +1,36 @@ +import asyncio + +from llama_stack_client import LlamaStackClient +from llama_stack_client.lib.inference.event_logger import EventLogger +from llama_stack_client.types import UserMessage +from termcolor import cprint + + +async def run_main(stream: bool = True): + client = LlamaStackClient( + base_url=f"http://localhost:5000", + ) + + message = UserMessage( + content="hello world, write me a 2 sentence poem about the moon", role="user" + ) + print(f"User>{message.content}", "green") + + response = client.inference.chat_completion( + messages=[message], + model="Llama3.2-11B-Vision-Instruct", + stream=stream, + ) + + if not stream: + cprint(f"> Response: {response}", "cyan") + else: + async for log in EventLogger().log(response): + log.print() + + models_response = client.models.list() + print(models_response) + + +if __name__ == "__main__": + asyncio.run(run_main()) diff --git a/examples/agents/inference.py b/examples/agents/inference.py new file mode 100644 index 00000000..ad781d40 --- /dev/null +++ b/examples/agents/inference.py @@ -0,0 +1,19 @@ +from llama_stack_client import LlamaStackClient +from llama_stack_client.types import SystemMessage, UserMessage + +client = LlamaStackClient( + base_url="http://localhost:5000", +) + +response = client.inference.chat_completion( + messages=[ + SystemMessage(content="pretend you are a llama", role="system"), + UserMessage( + content="hello world, write me a 2 sentence poem about the moon", + role="user", + ), + ], + model="Llama3.2-11B-Vision-Instruct", +) + +print(response.completion_message.content) diff --git a/examples/agents/inflation.py b/examples/agents/inflation.py index 635fd95f..e2af7a8a 100644 --- a/examples/agents/inflation.py +++ b/examples/agents/inflation.py @@ -14,10 +14,9 @@ from llama_stack_client.types import Attachment, SamplingParams, UserMessage from llama_stack_client.types.agent_create_params import * # noqa: F403 from common.client_utils import * # noqa: F403 +from examples.agents.multi_turn import execute_turns, prompt_to_turn from termcolor import cprint -from .multi_turn import execute_turns, prompt_to_turn - async def run_main(host: str, port: int, disable_safety: bool = False): api_keys = load_api_keys_from_env() @@ -40,15 +39,15 @@ async def run_main(host: str, port: int, disable_safety: bool = False): agent_config=agent_config, custom_tools=[], turn_inputs=[ - prompt_to_turn( - "Here is a csv, can you describe it ?", - attachments=[ - Attachment( - content="https://raw.githubusercontent.com/meta-llama/llama-stack-apps/main/examples/resources/inflation.csv", - mime_type="text/csv", - ), - ], - ), + # prompt_to_turn( + # "Here is a csv, can you describe it ?", + # attachments=[ + # Attachment( + # content="https://raw.githubusercontent.com/meta-llama/llama-stack-apps/main/examples/resources/inflation.csv", + # mime_type="text/csv", + # ), + # ], + # ), prompt_to_turn("Which year ended with the highest inflation ?"), prompt_to_turn( "What macro economic situations that led to such high inflation in that period?" diff --git a/examples/agents/multi_turn.py b/examples/agents/multi_turn.py index 8b20352a..e96c6ea7 100644 --- a/examples/agents/multi_turn.py +++ b/examples/agents/multi_turn.py @@ -10,10 +10,16 @@ import sys from typing import List, Optional -from pydantic import BaseModel + +# from llama_stack_client.lib.agents.agents import Attachment +# from llama_stack.apis.agents import AgentConfig +# from llama_stack.tools.custom.datatypes import CustomTool from common.client_utils import * # noqa: F403 + from llama_stack_client.lib.agents.event_logger import EventLogger +from llama_stack_client.types import Attachment, UserMessage +from pydantic import BaseModel from termcolor import cprint diff --git a/examples/agents/pdf-rag.ipynb b/examples/agents/pdf-rag.ipynb new file mode 100644 index 00000000..62d53429 --- /dev/null +++ b/examples/agents/pdf-rag.ipynb @@ -0,0 +1,126 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Install required libraries if not already installed\n", + "# !pip install pdf2image\n", + "# !pip install pdfminer\n", + "# !pip install tqdm\n", + "# !pip install llama_stack_client\n", + "\n", + "# Required imports\n", + "from pdf2image import convert_from_path\n", + "from pdfminer.high_level import extract_text\n", + "from io import BytesIO\n", + "import base64\n", + "import os\n", + "import concurrent.futures\n", + "from tqdm import tqdm\n", + "import json\n", + "from llama_stack_client import LlamaStackClient\n", + "from llama_stack_client.types import SystemMessage, UserMessage\n", + "\n", + "# Function to convert PDF to images\n", + "def convert_doc_to_images(pdf_path):\n", + " try:\n", + " images = convert_from_path(pdf_path)\n", + " return images\n", + " except Exception as e:\n", + " print(f\"Error converting PDF to images: {e}\")\n", + " return []\n", + "\n", + "# Function to extract text from PDF\n", + "def extract_text_from_pdf(pdf_path):\n", + " try:\n", + " text = extract_text(pdf_path)\n", + " return text\n", + " except Exception as e:\n", + " print(f\"Error extracting text from PDF: {e}\")\n", + " return \"\"\n", + "\n", + "# Function to convert image to base64 for LlamaStack analysis\n", + "def get_img_uri(img):\n", + " buffer = BytesIO()\n", + " img.save(buffer, format=\"jpeg\")\n", + " base64_image = base64.b64encode(buffer.getvalue()).decode(\"utf-8\")\n", + " data_uri = f\"data:image/jpeg;base64,{base64_image}\"\n", + " return data_uri\n", + "\n", + "# LlamaStack inference function\n", + "def analyze_image_llama_stack(img_uri, client):\n", + " system_prompt = '''\n", + " You will be provided with an image of a pdf page or a slide. Your goal is to describe the content of the image in detail.\n", + " Do not mention the format of the image or page numbers, but focus on explaining the contents as if you are presenting it to a technical audience.\n", + " '''\n", + "\n", + " response = client.inference.chat_completion(\n", + " messages=[\n", + " SystemMessage(content=system_prompt, role=\"system\"),\n", + " UserMessage(\n", + " content=f\"Here is the image: {img_uri}\",\n", + " role=\"user\",\n", + " ),\n", + " ],\n", + " model=\"Llama3.2-11B-Vision-Instruct\",\n", + " )\n", + "\n", + " return response.completion_message.content\n", + "\n", + "# Function to process a single PDF and analyze its pages\n", + "def process_pdf(pdf_path, client):\n", + " doc = {\"filename\": os.path.basename(pdf_path)}\n", + "\n", + " # Extract text\n", + " doc['text'] = extract_text_from_pdf(pdf_path)\n", + "\n", + " # Convert to images\n", + " images = convert_doc_to_images(pdf_path)\n", + "\n", + " # Analyze images with LlamaStack\n", + " if images:\n", + " pages_description = []\n", + " with concurrent.futures.ThreadPoolExecutor(max_workers=8) as executor:\n", + " futures = [executor.submit(analyze_image_llama_stack, get_img_uri(img), client) for img in images[1:]] # Skipping first page if desired\n", + "\n", + " with tqdm(total=len(images) - 1) as pbar:\n", + " for _ in concurrent.futures.as_completed(futures):\n", + " pbar.update(1)\n", + "\n", + " for f in futures:\n", + " result = f.result()\n", + " pages_description.append(result)\n", + "\n", + " doc['pages_description'] = pages_description\n", + "\n", + " return doc\n", + "\n", + "# Initialize LlamaStack client\n", + "client = LlamaStackClient(base_url=\"http://localhost:5000\") # Replace with your actual LlamaStack base URL\n", + "\n", + "# Example usage with your PDF file path\n", + "pdf_file_path = \"/mnt/data/your_pdf_file.pdf\" # Adjust path as needed\n", + "\n", + "# Process the PDF and get the result\n", + "doc_data = process_pdf(pdf_file_path, client)\n", + "\n", + "# Saving result to JSON for later use\n", + "output_path = \"/mnt/data/processed_pdf_data.json\"\n", + "with open(output_path, 'w') as f:\n", + " json.dump(doc_data, f)\n", + "\n", + "print(f\"Processed PDF data saved to {output_path}\")\n" + ] + } + ], + "metadata": { + "language_info": { + "name": "python" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/examples/agents/quickstart.md b/examples/agents/quickstart.md new file mode 100644 index 00000000..465e2be2 --- /dev/null +++ b/examples/agents/quickstart.md @@ -0,0 +1,99 @@ + +# Quickstart + +This guide will walk you through the steps to set up an end-to-end workflow with Llama Stack. It focuses on building a Llama Stack distribution and starting up a Llama Stack server. See our [documentation](../README.md) for more on Llama Stack's capabilities, or visit [llama-stack-apps](https://github.com/meta-llama/llama-stack-apps/tree/main) for example apps. + + +## 0. Prerequsite +Feel free to skip this step if you already have the prerequsite installed. + +1. conda (steps to install) +2. + + +## 1. Installation + +The `llama` CLI tool helps you manage the Llama toolchain & agentic systems. After installing the `llama-stack` package, the `llama` command should be available in your path. + +**Install as a package**: + Install directly from [PyPI](https://pypi.org/project/llama-stack/) with: + ```bash + pip install llama-stack + ``` + +## 2. Download Llama models: + + + ``` + llama download --model-id Llama3.1-8B-Instruct + ``` + You will have to follow the instructions in the cli to complete the download, get a instant license here: URL to license. + +## 3. Build->Configure->Run via Conda: + For development, build a LlamaStack distribution from scratch. + + **`llama stack build`** + Enter build information interactively: + ```bash + llama stack build + ``` + + **`llama stack configure`** + Run `llama stack configure ` using the name from the build step. + ```bash + llama stack configure my-local-stack + ``` + + **`llama stack run`** + Start the server with: + ```bash + llama stack run my-local-stack + ``` + +## 4. Testing with Client + +After setup, test the server with a POST request: +```bash +curl http://localhost:5000/inference/chat_completion \ +-H "Content-Type: application/json" \ +-d '{ + "model": "Llama3.1-8B-Instruct", + "messages": [ + {"role": "system", "content": "You are a helpful assistant."}, + {"role": "user", "content": "Write me a 2-sentence poem about the moon"} + ], + "sampling_params": {"temperature": 0.7, "seed": 42, "max_tokens": 512} +}' +``` + + +## 5. Inference + +After setup, test the server with a POST request: +```bash +curl http://localhost:5000/inference/chat_completion \ +-H "Content-Type: application/json" \ +-d '{ + "model": "Llama3.1-8B-Instruct", + "messages": [ + {"role": "system", "content": "You are a helpful assistant."}, + {"role": "user", "content": "Write me a 2-sentence poem about the moon"} + ], + "sampling_params": {"temperature": 0.7, "seed": 42, "max_tokens": 512} +}' +``` + + + +Check our client SDKs for various languages: [Python](https://github.com/meta-llama/llama-stack-client-python), [Node](https://github.com/meta-llama/llama-stack-client-node), [Swift](https://github.com/meta-llama/llama-stack-client-swift), and [Kotlin](https://github.com/meta-llama/llama-stack-client-kotlin). + +## Advanced Guides + +For more on custom Llama Stack distributions, refer to our [Building a Llama Stack Distribution](./building_distro.md) guide. + + +## Next Steps: +check out + +1. +2. From b2699b2720ffb2f291bccf0833d8e30f9c518b5c Mon Sep 17 00:00:00 2001 From: Justin Lee Date: Mon, 11 Nov 2024 18:18:22 -0800 Subject: [PATCH 2/6] fix register and also message typedict in inference --- examples/inference/client.py | 15 +++++++++++++-- examples/inference/client_with_vision.py | 19 +++++++++++++------ 2 files changed, 26 insertions(+), 8 deletions(-) diff --git a/examples/inference/client.py b/examples/inference/client.py index 54d7aca4..39a028ab 100644 --- a/examples/inference/client.py +++ b/examples/inference/client.py @@ -19,9 +19,20 @@ async def run_main(host: str, port: int, stream: bool = True): base_url=f"http://{host}:{port}", ) - message = UserMessage( - content="hello world, write me a 2 sentence poem about the moon", role="user" + client.models.register( + model={ + "identifier": "Llama3.1-8B-Instruct", + "llama_model": "Llama3.1-8B-Instruct", + "provider_id": "meta-reference-0", + "metadata": {}, + } ) + + message = { + "role": "user", + "content": "hello world, write me a 2 sentence poem about the moon", + } + cprint(f"User>{message.content}", "green") response = client.inference.chat_completion( messages=[message], diff --git a/examples/inference/client_with_vision.py b/examples/inference/client_with_vision.py index 4b6ac097..544ca367 100644 --- a/examples/inference/client_with_vision.py +++ b/examples/inference/client_with_vision.py @@ -32,13 +32,20 @@ async def run_main(host: str, port: int, stream: bool = True): data_url = f"data:{mime_type};base64,{encoded_string}" - message = UserMessage( - role="user", - content=[ - {"image": {"uri": data_url}}, - "Describe what is in this image.", - ], + client.models.register( + model={ + "identifier": "Llama3.2-11B-Vision-Instruct", + "llama_model": "Llama3.2-11B-Vision-Instruct", + "provider_id": "meta-reference-0", + "metadata": {}, + } ) + + message = { + "role": "user", + "content": [{"image": {"uri": data_url}}, "Describe what is in this image."], + } + cprint(f"User>{message.content}", "green") response = client.inference.chat_completion( messages=[message], From fb633ee1aadc4bb021121d2772bce3e12d929b0b Mon Sep 17 00:00:00 2001 From: Justin Lee Date: Mon, 11 Nov 2024 18:36:37 -0800 Subject: [PATCH 3/6] Delete examples/agents/quickstart.md --- examples/agents/quickstart.md | 99 ----------------------------------- 1 file changed, 99 deletions(-) delete mode 100644 examples/agents/quickstart.md diff --git a/examples/agents/quickstart.md b/examples/agents/quickstart.md deleted file mode 100644 index 465e2be2..00000000 --- a/examples/agents/quickstart.md +++ /dev/null @@ -1,99 +0,0 @@ - -# Quickstart - -This guide will walk you through the steps to set up an end-to-end workflow with Llama Stack. It focuses on building a Llama Stack distribution and starting up a Llama Stack server. See our [documentation](../README.md) for more on Llama Stack's capabilities, or visit [llama-stack-apps](https://github.com/meta-llama/llama-stack-apps/tree/main) for example apps. - - -## 0. Prerequsite -Feel free to skip this step if you already have the prerequsite installed. - -1. conda (steps to install) -2. - - -## 1. Installation - -The `llama` CLI tool helps you manage the Llama toolchain & agentic systems. After installing the `llama-stack` package, the `llama` command should be available in your path. - -**Install as a package**: - Install directly from [PyPI](https://pypi.org/project/llama-stack/) with: - ```bash - pip install llama-stack - ``` - -## 2. Download Llama models: - - - ``` - llama download --model-id Llama3.1-8B-Instruct - ``` - You will have to follow the instructions in the cli to complete the download, get a instant license here: URL to license. - -## 3. Build->Configure->Run via Conda: - For development, build a LlamaStack distribution from scratch. - - **`llama stack build`** - Enter build information interactively: - ```bash - llama stack build - ``` - - **`llama stack configure`** - Run `llama stack configure ` using the name from the build step. - ```bash - llama stack configure my-local-stack - ``` - - **`llama stack run`** - Start the server with: - ```bash - llama stack run my-local-stack - ``` - -## 4. Testing with Client - -After setup, test the server with a POST request: -```bash -curl http://localhost:5000/inference/chat_completion \ --H "Content-Type: application/json" \ --d '{ - "model": "Llama3.1-8B-Instruct", - "messages": [ - {"role": "system", "content": "You are a helpful assistant."}, - {"role": "user", "content": "Write me a 2-sentence poem about the moon"} - ], - "sampling_params": {"temperature": 0.7, "seed": 42, "max_tokens": 512} -}' -``` - - -## 5. Inference - -After setup, test the server with a POST request: -```bash -curl http://localhost:5000/inference/chat_completion \ --H "Content-Type: application/json" \ --d '{ - "model": "Llama3.1-8B-Instruct", - "messages": [ - {"role": "system", "content": "You are a helpful assistant."}, - {"role": "user", "content": "Write me a 2-sentence poem about the moon"} - ], - "sampling_params": {"temperature": 0.7, "seed": 42, "max_tokens": 512} -}' -``` - - - -Check our client SDKs for various languages: [Python](https://github.com/meta-llama/llama-stack-client-python), [Node](https://github.com/meta-llama/llama-stack-client-node), [Swift](https://github.com/meta-llama/llama-stack-client-swift), and [Kotlin](https://github.com/meta-llama/llama-stack-client-kotlin). - -## Advanced Guides - -For more on custom Llama Stack distributions, refer to our [Building a Llama Stack Distribution](./building_distro.md) guide. - - -## Next Steps: -check out - -1. -2. From cb8d4cbb8efa11bbed8aae32d05304ec32a9014a Mon Sep 17 00:00:00 2001 From: Justin Lee Date: Mon, 11 Nov 2024 18:41:53 -0800 Subject: [PATCH 4/6] removed unnecessary files --- examples/agents/comprehensive-start.md | 111 ------------------- examples/agents/inference-fewshot.py | 46 -------- examples/agents/inference-local-cloud.py | 46 -------- examples/agents/inference-loop-history.py | 37 ------- examples/agents/inference-loop.py | 32 ------ examples/agents/inference-streaming.py | 36 ------- examples/agents/inference.py | 19 ---- examples/agents/inflation.py | 112 +++++++++++-------- examples/agents/multi_turn.py | 62 ----------- examples/agents/pdf-rag.ipynb | 126 ---------------------- 10 files changed, 70 insertions(+), 557 deletions(-) delete mode 100644 examples/agents/comprehensive-start.md delete mode 100644 examples/agents/inference-fewshot.py delete mode 100644 examples/agents/inference-local-cloud.py delete mode 100644 examples/agents/inference-loop-history.py delete mode 100644 examples/agents/inference-loop.py delete mode 100644 examples/agents/inference-streaming.py delete mode 100644 examples/agents/inference.py delete mode 100644 examples/agents/multi_turn.py delete mode 100644 examples/agents/pdf-rag.ipynb diff --git a/examples/agents/comprehensive-start.md b/examples/agents/comprehensive-start.md deleted file mode 100644 index 604c8756..00000000 --- a/examples/agents/comprehensive-start.md +++ /dev/null @@ -1,111 +0,0 @@ - -# Getting Started with Llama Stack - -This guide will walk you through the steps to set up an end-to-end workflow with Llama Stack. It focuses on building a Llama Stack distribution and starting up a Llama Stack server. See our [documentation](../README.md) for more on Llama Stack's capabilities, or visit [llama-stack-apps](https://github.com/meta-llama/llama-stack-apps/tree/main) for example apps. - -## Installation - -The `llama` CLI tool helps you manage the Llama toolchain & agentic systems. After installing the `llama-stack` package, the `llama` command should be available in your path. - -You can install this repository in two ways: - -1. **Install as a package**: - Install directly from [PyPI](https://pypi.org/project/llama-stack/) with: - ```bash - pip install llama-stack - ``` - -2. **Install from source**: - Follow these steps to install from the source code: - ```bash - mkdir -p ~/local - cd ~/local - git clone git@github.com:meta-llama/llama-stack.git - - conda create -n stack python=3.10 - conda activate stack - - cd llama-stack - $CONDA_PREFIX/bin/pip install -e . - ``` - -Refer to the [CLI Reference](./cli_reference.md) for details on Llama CLI commands. - -## Starting Up Llama Stack Server - -There are two ways to start the Llama Stack server: - -1. **Using Docker**: - We provide a pre-built Docker image of Llama Stack, available in the [distributions](../distributions/) folder. - - > **Note:** For GPU inference, set environment variables to specify the local directory with your model checkpoints and enable GPU inference. - ```bash - export LLAMA_CHECKPOINT_DIR=~/.llama - ``` - Download Llama models with: - ``` - llama download --model-id Llama3.1-8B-Instruct - ``` - Start a Docker container with: - ```bash - cd llama-stack/distributions/meta-reference-gpu - docker run -it -p 5000:5000 -v ~/.llama:/root/.llama -v ./run.yaml:/root/my-run.yaml --gpus=all distribution-meta-reference-gpu --yaml_config /root/my-run.yaml - ``` - - **Tip:** For remote providers, use `docker compose up` with scripts in the [distributions folder](../distributions/). - -2. **Build->Configure->Run via Conda**: - For development, build a LlamaStack distribution from scratch. - - **`llama stack build`** - Enter build information interactively: - ```bash - llama stack build - ``` - - **`llama stack configure`** - Run `llama stack configure ` using the name from the build step. - ```bash - llama stack configure my-local-stack - ``` - - **`llama stack run`** - Start the server with: - ```bash - llama stack run my-local-stack - ``` - -## Testing with Client - -After setup, test the server with a client: -```bash -cd /path/to/llama-stack -conda activate - -python -m llama_stack.apis.inference.client localhost 5000 -``` - -You can also send a POST request: -```bash -curl http://localhost:5000/inference/chat_completion \ --H "Content-Type: application/json" \ --d '{ - "model": "Llama3.1-8B-Instruct", - "messages": [ - {"role": "system", "content": "You are a helpful assistant."}, - {"role": "user", "content": "Write me a 2-sentence poem about the moon"} - ], - "sampling_params": {"temperature": 0.7, "seed": 42, "max_tokens": 512} -}' -``` - -For testing safety, run: -```bash -python -m llama_stack.apis.safety.client localhost 5000 -``` - -Check our client SDKs for various languages: [Python](https://github.com/meta-llama/llama-stack-client-python), [Node](https://github.com/meta-llama/llama-stack-client-node), [Swift](https://github.com/meta-llama/llama-stack-client-swift), and [Kotlin](https://github.com/meta-llama/llama-stack-client-kotlin). - -## Advanced Guides - -For more on custom Llama Stack distributions, refer to our [Building a Llama Stack Distribution](./building_distro.md) guide. diff --git a/examples/agents/inference-fewshot.py b/examples/agents/inference-fewshot.py deleted file mode 100644 index e90b87c5..00000000 --- a/examples/agents/inference-fewshot.py +++ /dev/null @@ -1,46 +0,0 @@ -from llama_stack_client import LlamaStackClient -from llama_stack_client.types import CompletionMessage, UserMessage -from termcolor import cprint - -# Initialize the LlamaStackClient with the base URL for inference endpoint -client = LlamaStackClient(base_url="http://localhost:5000") - -# Invoke chat_completion with the few-shot example set -response = client.inference.chat_completion( - messages=[ - UserMessage(content="Have shorter, spear-shaped ears.", role="user"), - CompletionMessage( - content="That's Alpaca!", - role="assistant", - stop_reason="end_of_message", - tool_calls=[], - ), - UserMessage( - content="Known for their calm nature and used as pack animals in mountainous regions.", - role="user", - ), - CompletionMessage( - content="That's Llama!", - role="assistant", - stop_reason="end_of_message", - tool_calls=[], - ), - UserMessage( - content="Has a straight, slender neck and is smaller in size compared to its relative.", - role="user", - ), - CompletionMessage( - content="That's Alpaca!", - role="assistant", - stop_reason="end_of_message", - tool_calls=[], - ), - UserMessage( - content="Generally taller and more robust, commonly seen as guard animals.", - role="user", - ), - ], - model="Llama3.2-11B-Vision-Instruct", -) - -cprint(f"> Response: {response.completion_message.content}", "cyan") diff --git a/examples/agents/inference-local-cloud.py b/examples/agents/inference-local-cloud.py deleted file mode 100644 index a1a727f6..00000000 --- a/examples/agents/inference-local-cloud.py +++ /dev/null @@ -1,46 +0,0 @@ -import asyncio - -import httpx -from llama_stack_client import LlamaStackClient -from llama_stack_client.lib.inference.event_logger import EventLogger -from llama_stack_client.types import UserMessage -from termcolor import cprint - -local_client = LlamaStackClient(base_url="http://localhost:5000") -cloud_client = LlamaStackClient(base_url="http://localhost:5001") - - -async def select_client() -> LlamaStackClient: - try: - async with httpx.AsyncClient() as http_client: - response = await http_client.get(f"{local_client.base_url}/health") - if response.status_code == 200: - cprint("Using local client.", "yellow") - return local_client - except httpx.RequestError: - pass - cprint("Local client unavailable. Switching to cloud client.", "yellow") - return cloud_client - - -async def get_llama_response(stream: bool = True): - client = await select_client() - message = UserMessage( - content="hello world, write me a 2 sentence poem about the moon", role="user" - ) - cprint(f"User> {message.content}", "green") - - response = client.inference.chat_completion( - messages=[message], - model="Llama3.2-11B-Vision-Instruct", - stream=stream, - ) - - if not stream: - cprint(f"> Response: {response}", "cyan") - else: - async for log in EventLogger().log(response): - log.print() - - -asyncio.run(get_llama_response()) diff --git a/examples/agents/inference-loop-history.py b/examples/agents/inference-loop-history.py deleted file mode 100644 index 5dc61fc5..00000000 --- a/examples/agents/inference-loop-history.py +++ /dev/null @@ -1,37 +0,0 @@ -import asyncio - -from llama_stack_client import LlamaStackClient -from llama_stack_client.types import UserMessage -from termcolor import cprint - -client = LlamaStackClient( - base_url="http://localhost:5000", -) - - -async def chat_loop(): - conversation_history = [] - - while True: - user_input = input("User> ") - if user_input.lower() in ["exit", "quit", "bye"]: - cprint("Ending conversation. Goodbye!", "yellow") - break - - user_message = UserMessage(content=user_input, role="user") - conversation_history.append(user_message) - - response = client.inference.chat_completion( - messages=conversation_history, - model="Llama3.2-11B-Vision-Instruct", - ) - - cprint(f"> Response: {response.completion_message.content}", "cyan") - - assistant_message = UserMessage( - content=response.completion_message.content, role="user" - ) - conversation_history.append(assistant_message) - - -asyncio.run(chat_loop()) diff --git a/examples/agents/inference-loop.py b/examples/agents/inference-loop.py deleted file mode 100644 index 031f22d5..00000000 --- a/examples/agents/inference-loop.py +++ /dev/null @@ -1,32 +0,0 @@ -import asyncio - -from llama_stack_client import LlamaStackClient -from llama_stack_client.lib.inference.event_logger import EventLogger -from llama_stack_client.types import UserMessage -from termcolor import cprint - -client = LlamaStackClient( - base_url="http://localhost:5000", -) - - -async def chat_loop(): - while True: - - user_input = input("User> ") - - if user_input.lower() in ["exit", "quit", "bye"]: - cprint("Ending conversation. Goodbye!", "yellow") - break - - message = UserMessage(content=user_input, role="user") - - response = client.inference.chat_completion( - messages=[message], - model="Llama3.2-11B-Vision-Instruct", - ) - - cprint(f"> Response: {response.completion_message.content}", "cyan") - - -asyncio.run(chat_loop()) diff --git a/examples/agents/inference-streaming.py b/examples/agents/inference-streaming.py deleted file mode 100644 index 85afbb4a..00000000 --- a/examples/agents/inference-streaming.py +++ /dev/null @@ -1,36 +0,0 @@ -import asyncio - -from llama_stack_client import LlamaStackClient -from llama_stack_client.lib.inference.event_logger import EventLogger -from llama_stack_client.types import UserMessage -from termcolor import cprint - - -async def run_main(stream: bool = True): - client = LlamaStackClient( - base_url=f"http://localhost:5000", - ) - - message = UserMessage( - content="hello world, write me a 2 sentence poem about the moon", role="user" - ) - print(f"User>{message.content}", "green") - - response = client.inference.chat_completion( - messages=[message], - model="Llama3.2-11B-Vision-Instruct", - stream=stream, - ) - - if not stream: - cprint(f"> Response: {response}", "cyan") - else: - async for log in EventLogger().log(response): - log.print() - - models_response = client.models.list() - print(models_response) - - -if __name__ == "__main__": - asyncio.run(run_main()) diff --git a/examples/agents/inference.py b/examples/agents/inference.py deleted file mode 100644 index ad781d40..00000000 --- a/examples/agents/inference.py +++ /dev/null @@ -1,19 +0,0 @@ -from llama_stack_client import LlamaStackClient -from llama_stack_client.types import SystemMessage, UserMessage - -client = LlamaStackClient( - base_url="http://localhost:5000", -) - -response = client.inference.chat_completion( - messages=[ - SystemMessage(content="pretend you are a llama", role="system"), - UserMessage( - content="hello world, write me a 2 sentence poem about the moon", - role="user", - ), - ], - model="Llama3.2-11B-Vision-Instruct", -) - -print(response.completion_message.content) diff --git a/examples/agents/inflation.py b/examples/agents/inflation.py index e2af7a8a..abe845d1 100644 --- a/examples/agents/inflation.py +++ b/examples/agents/inflation.py @@ -4,60 +4,88 @@ # This source code is licensed under the terms described in the LICENSE file in # the root directory of this source tree. -# Copyright (c) Meta Platforms, Inc. and affiliates. -# This software may be used and distributed in accordance with the terms of the Llama 3 Community License Agreement. - import asyncio +import os import fire -from llama_stack_client.types import Attachment, SamplingParams, UserMessage -from llama_stack_client.types.agent_create_params import * # noqa: F403 -from common.client_utils import * # noqa: F403 -from examples.agents.multi_turn import execute_turns, prompt_to_turn -from termcolor import cprint +from llama_stack_client import LlamaStackClient +from llama_stack_client.lib.agents.agent import Agent +from llama_stack_client.lib.agents.event_logger import EventLogger +from llama_stack_client.types import Attachment +from llama_stack_client.types.agent_create_params import AgentConfig async def run_main(host: str, port: int, disable_safety: bool = False): - api_keys = load_api_keys_from_env() - tool_definitions = [ - search_tool_defn(api_keys), - # Adding code_interpreter enables file analysis - AgentConfigToolCodeInterpreterToolDefinition(type="code_interpreter"), - ] - - agent_config = await make_agent_config_with_custom_tools( - disable_safety=disable_safety, - tool_config=QuickToolConfig( - tool_definitions=tool_definitions, - custom_tools=[], - attachment_behavior="code_interpreter", - ), + client = LlamaStackClient( + base_url=f"http://{host}:{port}", ) - await execute_turns( - agent_config=agent_config, - custom_tools=[], - turn_inputs=[ - # prompt_to_turn( - # "Here is a csv, can you describe it ?", - # attachments=[ - # Attachment( - # content="https://raw.githubusercontent.com/meta-llama/llama-stack-apps/main/examples/resources/inflation.csv", - # mime_type="text/csv", - # ), - # ], - # ), - prompt_to_turn("Which year ended with the highest inflation ?"), - prompt_to_turn( - "What macro economic situations that led to such high inflation in that period?" - ), - prompt_to_turn("Plot average yearly inflation as a time series"), + input_shields = [] if disable_safety else ["llama_guard"] + output_shields = [] if disable_safety else ["llama_guard"] + + agent_config = AgentConfig( + model="Llama3.1-8B-Instruct", + instructions="You are a helpful assistant", + sampling_params={ + "strategy": "greedy", + "temperature": 1.0, + "top_p": 0.9, + }, + tools=[ + { + "type": "brave_search", + "engine": "brave", + "api_key": os.getenv("BRAVE_SEARCH_API_KEY"), + }, + { + "type": "code_interpreter", + }, ], - host=host, - port=port, + tool_choice="required", + tool_prompt_format="json", + input_shields=input_shields, + output_shields=output_shields, + enable_session_persistence=False, ) + agent = Agent(client, agent_config) + session_id = agent.create_session("test-session") + print(f"Created session_id={session_id} for Agent({agent.agent_id})") + + user_prompts = [ + ( + "Here is a csv, can you describe it ?", + [ + Attachment( + content="https://raw.githubusercontent.com/meta-llama/llama-stack-apps/main/examples/resources/inflation.csv", + mime_type="test/csv", + ) + ], + ), + ("Which year ended with the highest inflation ?", None), + ( + "What macro economic situations that led to such high inflation in that period?", + None, + ), + ("Plot average yearly inflation as a time series", None), + ] + + for prompt in user_prompts: + response = agent.create_turn( + messages=[ + { + "role": "user", + "content": prompt[0], + } + ], + attachments=prompt[1], + session_id=session_id, + ) + + async for log in EventLogger().log(response): + log.print() + def main(host: str, port: int, disable_safety: bool = False): asyncio.run(run_main(host, port, disable_safety)) diff --git a/examples/agents/multi_turn.py b/examples/agents/multi_turn.py deleted file mode 100644 index e96c6ea7..00000000 --- a/examples/agents/multi_turn.py +++ /dev/null @@ -1,62 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the terms described in the LICENSE file in -# the root directory of this source tree. - -# Copyright (c) Meta Platforms, Inc. and affiliates. -# This software may be used and distributed in accordance with the terms of the Llama 3 Community License Agreement. -import os -import sys -from typing import List, Optional - - -# from llama_stack_client.lib.agents.agents import Attachment -# from llama_stack.apis.agents import AgentConfig -# from llama_stack.tools.custom.datatypes import CustomTool - -from common.client_utils import * # noqa: F403 - -from llama_stack_client.lib.agents.event_logger import EventLogger -from llama_stack_client.types import Attachment, UserMessage -from pydantic import BaseModel -from termcolor import cprint - - -class UserTurnInput(BaseModel): - message: UserMessage - attachments: Optional[List[Attachment]] = None - - -def prompt_to_turn( - content: str, attachments: Optional[List[Attachment]] = None -) -> UserTurnInput: - return UserTurnInput( - message=UserMessage(content=content, role="user"), attachments=attachments - ) - - -async def execute_turns( - agent_config: AgentConfig, - custom_tools: List[CustomTool], - turn_inputs: List[UserTurnInput], - host: str = "localhost", - port: int = 5000, -): - agent = await get_agent_with_custom_tools( - host=host, - port=port, - agent_config=agent_config, - custom_tools=custom_tools, - ) - while len(turn_inputs) > 0: - turn = turn_inputs.pop(0) - - iterator = agent.execute_turn( - [turn.message], - turn.attachments, - ) - cprint(f"User> {turn.message.content}", color="white", attrs=["bold"]) - async for log in EventLogger().log(iterator): - if log is not None: - log.print() diff --git a/examples/agents/pdf-rag.ipynb b/examples/agents/pdf-rag.ipynb deleted file mode 100644 index 62d53429..00000000 --- a/examples/agents/pdf-rag.ipynb +++ /dev/null @@ -1,126 +0,0 @@ -{ - "cells": [ - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# Install required libraries if not already installed\n", - "# !pip install pdf2image\n", - "# !pip install pdfminer\n", - "# !pip install tqdm\n", - "# !pip install llama_stack_client\n", - "\n", - "# Required imports\n", - "from pdf2image import convert_from_path\n", - "from pdfminer.high_level import extract_text\n", - "from io import BytesIO\n", - "import base64\n", - "import os\n", - "import concurrent.futures\n", - "from tqdm import tqdm\n", - "import json\n", - "from llama_stack_client import LlamaStackClient\n", - "from llama_stack_client.types import SystemMessage, UserMessage\n", - "\n", - "# Function to convert PDF to images\n", - "def convert_doc_to_images(pdf_path):\n", - " try:\n", - " images = convert_from_path(pdf_path)\n", - " return images\n", - " except Exception as e:\n", - " print(f\"Error converting PDF to images: {e}\")\n", - " return []\n", - "\n", - "# Function to extract text from PDF\n", - "def extract_text_from_pdf(pdf_path):\n", - " try:\n", - " text = extract_text(pdf_path)\n", - " return text\n", - " except Exception as e:\n", - " print(f\"Error extracting text from PDF: {e}\")\n", - " return \"\"\n", - "\n", - "# Function to convert image to base64 for LlamaStack analysis\n", - "def get_img_uri(img):\n", - " buffer = BytesIO()\n", - " img.save(buffer, format=\"jpeg\")\n", - " base64_image = base64.b64encode(buffer.getvalue()).decode(\"utf-8\")\n", - " data_uri = f\"data:image/jpeg;base64,{base64_image}\"\n", - " return data_uri\n", - "\n", - "# LlamaStack inference function\n", - "def analyze_image_llama_stack(img_uri, client):\n", - " system_prompt = '''\n", - " You will be provided with an image of a pdf page or a slide. Your goal is to describe the content of the image in detail.\n", - " Do not mention the format of the image or page numbers, but focus on explaining the contents as if you are presenting it to a technical audience.\n", - " '''\n", - "\n", - " response = client.inference.chat_completion(\n", - " messages=[\n", - " SystemMessage(content=system_prompt, role=\"system\"),\n", - " UserMessage(\n", - " content=f\"Here is the image: {img_uri}\",\n", - " role=\"user\",\n", - " ),\n", - " ],\n", - " model=\"Llama3.2-11B-Vision-Instruct\",\n", - " )\n", - "\n", - " return response.completion_message.content\n", - "\n", - "# Function to process a single PDF and analyze its pages\n", - "def process_pdf(pdf_path, client):\n", - " doc = {\"filename\": os.path.basename(pdf_path)}\n", - "\n", - " # Extract text\n", - " doc['text'] = extract_text_from_pdf(pdf_path)\n", - "\n", - " # Convert to images\n", - " images = convert_doc_to_images(pdf_path)\n", - "\n", - " # Analyze images with LlamaStack\n", - " if images:\n", - " pages_description = []\n", - " with concurrent.futures.ThreadPoolExecutor(max_workers=8) as executor:\n", - " futures = [executor.submit(analyze_image_llama_stack, get_img_uri(img), client) for img in images[1:]] # Skipping first page if desired\n", - "\n", - " with tqdm(total=len(images) - 1) as pbar:\n", - " for _ in concurrent.futures.as_completed(futures):\n", - " pbar.update(1)\n", - "\n", - " for f in futures:\n", - " result = f.result()\n", - " pages_description.append(result)\n", - "\n", - " doc['pages_description'] = pages_description\n", - "\n", - " return doc\n", - "\n", - "# Initialize LlamaStack client\n", - "client = LlamaStackClient(base_url=\"http://localhost:5000\") # Replace with your actual LlamaStack base URL\n", - "\n", - "# Example usage with your PDF file path\n", - "pdf_file_path = \"/mnt/data/your_pdf_file.pdf\" # Adjust path as needed\n", - "\n", - "# Process the PDF and get the result\n", - "doc_data = process_pdf(pdf_file_path, client)\n", - "\n", - "# Saving result to JSON for later use\n", - "output_path = \"/mnt/data/processed_pdf_data.json\"\n", - "with open(output_path, 'w') as f:\n", - " json.dump(doc_data, f)\n", - "\n", - "print(f\"Processed PDF data saved to {output_path}\")\n" - ] - } - ], - "metadata": { - "language_info": { - "name": "python" - } - }, - "nbformat": 4, - "nbformat_minor": 2 -} From 8a03e4b9f30ade24e486445e6a47b7427a687324 Mon Sep 17 00:00:00 2001 From: Justin Lee Date: Mon, 11 Nov 2024 18:44:45 -0800 Subject: [PATCH 5/6] revert changes --- examples/agents/inflation.py | 112 +++++++++++++---------------------- 1 file changed, 42 insertions(+), 70 deletions(-) diff --git a/examples/agents/inflation.py b/examples/agents/inflation.py index abe845d1..e2af7a8a 100644 --- a/examples/agents/inflation.py +++ b/examples/agents/inflation.py @@ -4,88 +4,60 @@ # This source code is licensed under the terms described in the LICENSE file in # the root directory of this source tree. +# Copyright (c) Meta Platforms, Inc. and affiliates. +# This software may be used and distributed in accordance with the terms of the Llama 3 Community License Agreement. + import asyncio -import os import fire -from llama_stack_client import LlamaStackClient -from llama_stack_client.lib.agents.agent import Agent -from llama_stack_client.lib.agents.event_logger import EventLogger -from llama_stack_client.types import Attachment -from llama_stack_client.types.agent_create_params import AgentConfig +from llama_stack_client.types import Attachment, SamplingParams, UserMessage +from llama_stack_client.types.agent_create_params import * # noqa: F403 +from common.client_utils import * # noqa: F403 +from examples.agents.multi_turn import execute_turns, prompt_to_turn +from termcolor import cprint async def run_main(host: str, port: int, disable_safety: bool = False): - client = LlamaStackClient( - base_url=f"http://{host}:{port}", - ) + api_keys = load_api_keys_from_env() + tool_definitions = [ + search_tool_defn(api_keys), + # Adding code_interpreter enables file analysis + AgentConfigToolCodeInterpreterToolDefinition(type="code_interpreter"), + ] - input_shields = [] if disable_safety else ["llama_guard"] - output_shields = [] if disable_safety else ["llama_guard"] + agent_config = await make_agent_config_with_custom_tools( + disable_safety=disable_safety, + tool_config=QuickToolConfig( + tool_definitions=tool_definitions, + custom_tools=[], + attachment_behavior="code_interpreter", + ), + ) - agent_config = AgentConfig( - model="Llama3.1-8B-Instruct", - instructions="You are a helpful assistant", - sampling_params={ - "strategy": "greedy", - "temperature": 1.0, - "top_p": 0.9, - }, - tools=[ - { - "type": "brave_search", - "engine": "brave", - "api_key": os.getenv("BRAVE_SEARCH_API_KEY"), - }, - { - "type": "code_interpreter", - }, + await execute_turns( + agent_config=agent_config, + custom_tools=[], + turn_inputs=[ + # prompt_to_turn( + # "Here is a csv, can you describe it ?", + # attachments=[ + # Attachment( + # content="https://raw.githubusercontent.com/meta-llama/llama-stack-apps/main/examples/resources/inflation.csv", + # mime_type="text/csv", + # ), + # ], + # ), + prompt_to_turn("Which year ended with the highest inflation ?"), + prompt_to_turn( + "What macro economic situations that led to such high inflation in that period?" + ), + prompt_to_turn("Plot average yearly inflation as a time series"), ], - tool_choice="required", - tool_prompt_format="json", - input_shields=input_shields, - output_shields=output_shields, - enable_session_persistence=False, + host=host, + port=port, ) - agent = Agent(client, agent_config) - session_id = agent.create_session("test-session") - print(f"Created session_id={session_id} for Agent({agent.agent_id})") - - user_prompts = [ - ( - "Here is a csv, can you describe it ?", - [ - Attachment( - content="https://raw.githubusercontent.com/meta-llama/llama-stack-apps/main/examples/resources/inflation.csv", - mime_type="test/csv", - ) - ], - ), - ("Which year ended with the highest inflation ?", None), - ( - "What macro economic situations that led to such high inflation in that period?", - None, - ), - ("Plot average yearly inflation as a time series", None), - ] - - for prompt in user_prompts: - response = agent.create_turn( - messages=[ - { - "role": "user", - "content": prompt[0], - } - ], - attachments=prompt[1], - session_id=session_id, - ) - - async for log in EventLogger().log(response): - log.print() - def main(host: str, port: int, disable_safety: bool = False): asyncio.run(run_main(host, port, disable_safety)) From 811e1176f8ce06b24c5624312bc504f968f197c1 Mon Sep 17 00:00:00 2001 From: Justin Lee Date: Mon, 11 Nov 2024 18:45:26 -0800 Subject: [PATCH 6/6] revert changes --- examples/agents/inflation.py | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/examples/agents/inflation.py b/examples/agents/inflation.py index e2af7a8a..bdcfcc96 100644 --- a/examples/agents/inflation.py +++ b/examples/agents/inflation.py @@ -39,15 +39,15 @@ async def run_main(host: str, port: int, disable_safety: bool = False): agent_config=agent_config, custom_tools=[], turn_inputs=[ - # prompt_to_turn( - # "Here is a csv, can you describe it ?", - # attachments=[ - # Attachment( - # content="https://raw.githubusercontent.com/meta-llama/llama-stack-apps/main/examples/resources/inflation.csv", - # mime_type="text/csv", - # ), - # ], - # ), + prompt_to_turn( + "Here is a csv, can you describe it ?", + attachments=[ + Attachment( + content="https://raw.githubusercontent.com/meta-llama/llama-stack-apps/main/examples/resources/inflation.csv", + mime_type="text/csv", + ), + ], + ), prompt_to_turn("Which year ended with the highest inflation ?"), prompt_to_turn( "What macro economic situations that led to such high inflation in that period?"