updated llm clients to also abide by the global caching parameter.

unifyai · Nov 7, 2024 · 56f8aa5 · 56f8aa5
1 parent d5ed783
commit 56f8aa5
Show file tree

Hide file tree

Showing 2 changed files with 10 additions and 14 deletions.
diff --git a/unify/universal_api/clients/multi_llm.py b/unify/universal_api/clients/multi_llm.py
@@ -67,7 +67,7 @@ def __init__(
         stateful: bool = False,
         return_full_completion: bool = False,
         traced: bool = False,
-        cache: bool = False,
+        cache: bool = None,
         # passthrough arguments
         extra_headers: Optional[Headers] = None,
         extra_query: Optional[Query] = None,

diff --git a/unify/universal_api/clients/uni_llm.py b/unify/universal_api/clients/uni_llm.py
@@ -24,7 +24,7 @@
 from openai.types.chat.completion_create_params import ResponseFormat
 from typing_extensions import Self
 from unify import BASE_URL, LOCAL_MODELS
-from ...utils._caching import _get_cache, _write_to_cache
+from ...utils._caching import _get_cache, _write_to_cache, _get_caching
 from ..clients.base import _Client
 from ..types import Prompt
 from ..utils.endpoint_metrics import Metrics
@@ -70,7 +70,7 @@ def __init__(
         stateful: bool = False,
         return_full_completion: bool = False,
         traced: bool = False,
-        cache: bool = False,
+        cache: Optional[bool] = None,
         # passthrough arguments
         extra_headers: Optional[Headers] = None,
         extra_query: Optional[Query] = None,
@@ -813,7 +813,7 @@ def _generate_non_stream(
             log_response_body=log_response_body,
         )
         chat_completion = None
-        if cache:
+        if cache is True or _get_caching() and cache is None:
             chat_completion = _get_cache(fn_name="chat.completions.create", kw=kw)
         if chat_completion is None:
             try:
@@ -832,7 +832,7 @@ def _generate_non_stream(
                         print(f"done (thread {threading.get_ident()})")
             except openai.APIStatusError as e:
                 raise Exception(e.message)
-            if cache:
+            if cache is True or _get_caching() and cache is None:
                 _write_to_cache(
                     fn_name="chat.completions.create",
                     kw=kw,
@@ -1044,14 +1044,10 @@ async def _generate_non_stream(
             log_query_body=log_query_body,
             log_response_body=log_response_body,
         )
-        chat_completion = (
-            _get_cache(
-                fn_name="chat.completions.create",
-                kw=kw,
-            )
-            if cache
-            else None
-        )
+        if cache is True or _get_caching() and cache is None:
+            chat_completion = _get_cache(fn_name="chat.completions.create", kw=kw)
+        else:
+            chat_completion = None
         if chat_completion is None:
             try:
                 if endpoint in LOCAL_MODELS:
@@ -1071,7 +1067,7 @@ async def _generate_non_stream(
                         )
             except openai.APIStatusError as e:
                 raise Exception(e.message)
-            if cache:
+            if cache is True or _get_caching() and cache is None:
                 _write_to_cache(
                     fn_name="chat.completions.create",
                     kw=kw,