From 5c4a5b30288f3f09e186ad73cb2fb0954da14499 Mon Sep 17 00:00:00 2001
From: sethjuarez <me@sethjuarez.com>
Date: Mon, 28 Oct 2024 21:57:53 -0700
Subject: [PATCH] added credential handling for serverless as well as proper
 async execution

---
 .../prompty/prompty/serverless/executor.py    | 95 ++++++++++++++++++-
 .../prompty/prompty/serverless/processor.py   | 41 +++++++-
 runtime/prompty/prompty/tracer.py             | 36 ++++---
 runtime/prompty/pyproject.toml                |  2 +-
 runtime/prompty/tests/test_tracing.py         |  4 +-
 5 files changed, 148 insertions(+), 30 deletions(-)

diff --git a/runtime/prompty/prompty/serverless/executor.py b/runtime/prompty/prompty/serverless/executor.py
index c912490..58a8dc6 100644
--- a/runtime/prompty/prompty/serverless/executor.py
+++ b/runtime/prompty/prompty/serverless/executor.py
@@ -1,3 +1,4 @@
+import azure.identity
 import importlib.metadata
 from typing import Iterator
 from azure.core.credentials import AzureKeyCredential
@@ -5,6 +6,11 @@
     ChatCompletionsClient,
     EmbeddingsClient,
 )
+
+from azure.ai.inference.aio import (
+    ChatCompletionsClient as AsyncChatCompletionsClient,
+    EmbeddingsClient as AsyncEmbeddingsClient,
+)
 from azure.ai.inference.models import (
     StreamingChatCompletions,
     AsyncStreamingChatCompletions,
@@ -24,10 +30,18 @@ class ServerlessExecutor(Invoker):
     def __init__(self, prompty: Prompty) -> None:
         super().__init__(prompty)
 
-        # serverless configuration
         self.endpoint = self.prompty.model.configuration["endpoint"]
         self.model = self.prompty.model.configuration["model"]
-        self.key = self.prompty.model.configuration["key"]
+
+        # no key, use default credentials
+        if "key" not in self.kwargs:
+            self.credential = azure.identity.DefaultAzureCredential(
+                exclude_shared_token_cache_credential=True
+            )
+        else:
+            self.credential = AzureKeyCredential(
+                self.prompty.model.configuration["key"]
+            )
 
         # api type
         self.api = self.prompty.model.api
@@ -64,7 +78,7 @@ def invoke(self, data: any) -> any:
 
         cargs = {
             "endpoint": self.endpoint,
-            "credential": AzureKeyCredential(self.key),
+            "credential": self.credential,
         }
 
         if self.api == "chat":
@@ -150,4 +164,77 @@ async def invoke_async(self, data: str) -> str:
         str
             The parsed data
         """
-        return self.invoke(data)
+        cargs = {
+            "endpoint": self.endpoint,
+            "credential": self.credential,
+        }
+
+        if self.api == "chat":
+            with Tracer.start("ChatCompletionsClient") as trace:
+                trace("type", "LLM")
+                trace("signature", "azure.ai.inference.aio.ChatCompletionsClient.ctor")
+                trace(
+                    "description", "Azure Unified Inference SDK Async Chat Completions Client"
+                )
+                trace("inputs", cargs)
+                client = AsyncChatCompletionsClient(
+                    user_agent=f"prompty/{VERSION}",
+                    **cargs,
+                )
+                trace("result", client)
+
+            with Tracer.start("complete") as trace:
+                trace("type", "LLM")
+                trace("signature", "azure.ai.inference.ChatCompletionsClient.complete")
+                trace(
+                    "description", "Azure Unified Inference SDK Async Chat Completions Client"
+                )
+                eargs = {
+                    "model": self.model,
+                    "messages": data if isinstance(data, list) else [data],
+                    **self.prompty.model.parameters,
+                }
+                trace("inputs", eargs)
+                r = await client.complete(**eargs)
+                trace("result", r)
+
+            response = self._response(r)
+
+        elif self.api == "completion":
+            raise NotImplementedError(
+                "Serverless Completions API is not implemented yet"
+            )
+
+        elif self.api == "embedding":
+            with Tracer.start("EmbeddingsClient") as trace:
+                trace("type", "LLM")
+                trace("signature", "azure.ai.inference.aio.EmbeddingsClient.ctor")
+                trace("description", "Azure Unified Inference SDK Async Embeddings Client")
+                trace("inputs", cargs)
+                client = AsyncEmbeddingsClient(
+                    user_agent=f"prompty/{VERSION}",
+                    **cargs,
+                )
+                trace("result", client)
+
+            with Tracer.start("complete") as trace:
+                trace("type", "LLM")
+                trace("signature", "azure.ai.inference.ChatCompletionsClient.complete")
+                trace(
+                    "description", "Azure Unified Inference SDK Chat Completions Client"
+                )
+                eargs = {
+                    "model": self.model,
+                    "input": data if isinstance(data, list) else [data],
+                    **self.prompty.model.parameters,
+                }
+                trace("inputs", eargs)
+                r = await client.complete(**eargs)
+                trace("result", r)
+
+            response = self._response(r)
+
+        elif self.api == "image":
+            raise NotImplementedError("Azure OpenAI Image API is not implemented yet")
+
+        return response
diff --git a/runtime/prompty/prompty/serverless/processor.py b/runtime/prompty/prompty/serverless/processor.py
index 98e1070..32c0b31 100644
--- a/runtime/prompty/prompty/serverless/processor.py
+++ b/runtime/prompty/prompty/serverless/processor.py
@@ -1,6 +1,6 @@
-from typing import Iterator
+from typing import AsyncIterator, Iterator
 from ..invoker import Invoker, InvokerFactory
-from ..core import Prompty, PromptyStream, ToolCall
+from ..core import AsyncPromptyStream, Prompty, PromptyStream, ToolCall
 
 from azure.ai.inference.models import ChatCompletions, EmbeddingsResult
 
@@ -75,4 +75,39 @@ async def invoke_async(self, data: str) -> str:
         str
             The parsed data
         """
-        return self.invoke(data)
+        if isinstance(data, ChatCompletions):
+            response = data.choices[0].message
+            # tool calls available in response
+            if response.tool_calls:
+                return [
+                    ToolCall(
+                        id=tool_call.id,
+                        name=tool_call.function.name,
+                        arguments=tool_call.function.arguments,
+                    )
+                    for tool_call in response.tool_calls
+                ]
+            else:
+                return response.content
+
+        elif isinstance(data, EmbeddingsResult):
+            if len(data.data) == 0:
+                raise ValueError("Invalid data")
+            elif len(data.data) == 1:
+                return data.data[0].embedding
+            else:
+                return [item.embedding for item in data.data]
+        elif isinstance(data, AsyncIterator):
+
+            async def generator():
+                async for chunk in data:
+                    if (
+                        len(chunk.choices) == 1
+                        and chunk.choices[0].delta.content != None
+                    ):
+                        content = chunk.choices[0].delta.content
+                        yield content
+
+            return AsyncPromptyStream("ServerlessProcessor", generator())
+        else:
+            return data
diff --git a/runtime/prompty/prompty/tracer.py b/runtime/prompty/prompty/tracer.py
index 417cd24..8653d34 100644
--- a/runtime/prompty/prompty/tracer.py
+++ b/runtime/prompty/prompty/tracer.py
@@ -93,7 +93,9 @@ def _name(func: Callable, args):
     if core_invoker:
         name = type(args[0]).__name__
         if signature.endswith("async"):
-            signature = f"{args[0].__module__}.{args[0].__class__.__name__}.invoke_async"
+            signature = (
+                f"{args[0].__module__}.{args[0].__class__.__name__}.invoke_async"
+            )
         else:
             signature = f"{args[0].__module__}.{args[0].__class__.__name__}.invoke"
     else:
@@ -116,20 +118,19 @@ def _results(result: Any) -> dict:
 
 
 def _trace_sync(
-    func: Callable = None, *, description: str = None, itemtype: str = None
+    func: Callable = None, **okwargs: Any
 ) -> Callable:
-    description = description or ""
 
     @wraps(func)
     def wrapper(*args, **kwargs):
         name, signature = _name(func, args)
         with Tracer.start(name) as trace:
             trace("signature", signature)
-            if description and description != "":
-                trace("description", description)
 
-            if itemtype and itemtype != "":
-                trace("type", itemtype)
+            # support arbitrary keyword
+            # arguments for trace decorator
+            for k, v in okwargs.items():
+                trace(k, to_dict(v))
 
             inputs = _inputs(func, args, kwargs)
             trace("inputs", inputs)
@@ -161,20 +162,19 @@ def wrapper(*args, **kwargs):
 
 
 def _trace_async(
-    func: Callable = None, *, description: str = None, itemtype: str = None
+    func: Callable = None, **okwargs: Any
 ) -> Callable:
-    description = description or ""
 
     @wraps(func)
     async def wrapper(*args, **kwargs):
         name, signature = _name(func, args)
         with Tracer.start(name) as trace:
             trace("signature", signature)
-            if description and description != "":
-                trace("description", description)
 
-            if itemtype and itemtype != "":
-                trace("type", itemtype)
+            # support arbitrary keyword 
+            # arguments for trace decorator
+            for k, v in okwargs.items():
+                trace(k, to_dict(v))
 
             inputs = _inputs(func, args, kwargs)
             trace("inputs", inputs)
@@ -204,15 +204,11 @@ async def wrapper(*args, **kwargs):
     return wrapper
 
 
-def trace(
-    func: Callable = None, *, description: str = None, itemtype: str = None
-) -> Callable:
+def trace(func: Callable = None, **kwargs: Any) -> Callable:
     if func is None:
-        return partial(trace, description=description, itemtype=itemtype)
-
+        return partial(trace, **kwargs)
     wrapped_method = _trace_async if inspect.iscoroutinefunction(func) else _trace_sync
-
-    return wrapped_method(func, description=description, itemtype=itemtype)
+    return wrapped_method(func, **kwargs)
 
 
 class PromptyTracer:
diff --git a/runtime/prompty/pyproject.toml b/runtime/prompty/pyproject.toml
index 7736e64..a5a8529 100644
--- a/runtime/prompty/pyproject.toml
+++ b/runtime/prompty/pyproject.toml
@@ -19,7 +19,7 @@ dependencies = [
 [project.optional-dependencies]
 azure = ["azure-identity>=1.17.1","openai>=1.35.10"]
 openai = ["openai>=1.35.10"]
-serverless = ["azure-ai-inference>=1.0.0b3"]
+serverless = ["azure-identity>=1.17.1","azure-ai-inference>=1.0.0b3"]
 
 
 [tool.pdm]
diff --git a/runtime/prompty/tests/test_tracing.py b/runtime/prompty/tests/test_tracing.py
index c89c596..8749568 100644
--- a/runtime/prompty/tests/test_tracing.py
+++ b/runtime/prompty/tests/test_tracing.py
@@ -241,7 +241,7 @@ async def test_function_calling_async():
 # need to add trace attribute to
 # materialize stream into the function
 # trace decorator
-@trace
+@trace(streaming=True, other="test")
 def test_streaming():
     result = prompty.execute(
         "prompts/streaming.prompty",
@@ -254,7 +254,7 @@ def test_streaming():
 
 
 @pytest.mark.asyncio
-@trace
+@trace(streaming=True)
 async def test_streaming_async():
     result = await prompty.execute_async(
         "prompts/streaming.prompty",