Merge pull request #597 from cheshire-cat-ai/develop

version 1.4.0
cheshire-cat-ai · Dec 1, 2023 · 2eaa194 · 2eaa194
2 parents e01819c + 9d06d9a
commit 2eaa194
Show file tree

Hide file tree

Showing 43 changed files with 1,513 additions and 1,041 deletions.
diff --git a/.github/workflows/tag.yml b/.github/workflows/tag.yml
@@ -15,6 +15,8 @@ permissions:
 jobs:
   tag:
     runs-on: ubuntu-latest
+    outputs:
+      tag: ${{ steps.read_toml.outputs.value }}
     steps:
       - uses: actions/checkout@v3
       - name: Read TOML
@@ -29,10 +31,12 @@ jobs:
           tag_name: "${{steps.read_toml.outputs.value}}"
         env:
           GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+          id: publish_tag
+          run: echo "::set-output name=tag::${{ steps.read_toml.outputs.value }}"
 
   build-and-push-image:
     runs-on: ubuntu-latest
-
+    needs: tag
     steps:
       - name: Checkout repository
         uses: actions/checkout@v3
@@ -64,6 +68,7 @@ jobs:
         uses: docker/metadata-action@v5
         with:
           images: ghcr.io/${{ github.repository }}
+
 
       # This step uses the `docker/build-push-action` action to build the image, based on your repository's `Dockerfile`.
       # If the build succeeds, it pushes the image to GitHub Packages.
@@ -73,9 +78,8 @@ jobs:
       - name: Build and push Docker image
         uses: docker/build-push-action@v5
         with:
-          context: core/
-          push: true
-          tags: ghcr.io/${{ github.repository }} # ${{ steps.meta.outputs.tags }}
-#          labels: ${{ steps.meta.outputs.labels }}
-          platforms: linux/amd64,linux/arm64
-          provenance: false
+         context: core/
+         push: true
+         tags: ghcr.io/${{ github.repository }}:latest , ghcr.io/${{ github.repository }}:${{ needs.tag.outputs.tag }}
+         platforms: linux/amd64,linux/arm64
+         provenance: false
diff --git a/.gitignore b/.gitignore
@@ -32,3 +32,6 @@ core/cat/plugins/*
 # tests plugin folder
 plugin_folder
 
+# cache embedder
+core/local_cache/*
+
diff --git a/README.md b/README.md
@@ -26,7 +26,7 @@
 ## Production ready AI assistant framework
 
 The Cheshire Cat is a framework to build custom AIs on top of any language model. 
-If you ever used systems like WordPress or Django to build web apps, imagine the Cat as a similar tool, but specific for AI.
+If you have ever used systems like WordPress or Django to build web apps, imagine the Cat as a similar tool, but specific for AI.
 
 ## Quickstart
 
@@ -53,10 +53,10 @@ Enjoy the Cat!
 
 ## Why use the Cat
 
-- 🌍 Supports any language model (works with OpenAI chatGPT, LLAMA2, HuggingFace models, custom)
+- 🌍 Supports any language model (works with OpenAI chatGPT, Llama2, HuggingFace models, custom)
 - 🐘 Remembers conversations and documents and uses them in conversation
 - 🚀 Extensible via plugins (AI can connect to your APIs or execute custom python code)
-- 🐋 Production Ready - 100% [dockerized](https://docs.docker.com/get-docker/)
+- 🐋 Production ready - 100% [dockerized](https://docs.docker.com/get-docker/)
 - 👩‍👧‍👦 Active [Discord community](https://discord.gg/bHX5sNFCYU) and easy to understand [docs](https://cheshire-cat-ai.github.io/docs/)
 
 We are committed to openness, privacy and creativity, we want to bring AI to the long tail. If you want to know more about our vision and values, read the [Code of Ethics](./readme/CODE-OF-ETHICS.md). 

diff --git a/core/cat/db/database.py b/core/cat/db/database.py
@@ -1,21 +1,17 @@
-from tinydb import TinyDB
 import os
+from tinydb import TinyDB
 
-#TODO can we add a verbose level for logging?
+from cat.utils import singleton
 
+@singleton
 class Database:
 
-    _instance = None
-
-    def __new__(cls):
-        if not cls._instance:
-            cls._instance = super().__new__(cls)
-            cls._instance.db = TinyDB(cls._instance.get_file_name())
-        return cls._instance.db
+    def __init__(self):
+        self.db = TinyDB(self.get_file_name())
 
     def get_file_name(self):
         tinydb_file = os.getenv("METADATA_FILE", "metadata.json")
         return tinydb_file
 
 def get_db():
-    return Database()
+    return Database().db
diff --git a/core/cat/factory/custom_embedder.py b/core/cat/factory/custom_embedder.py
@@ -65,22 +65,3 @@ def embed_query(self, text: str) -> List[float]:
         ret.raise_for_status()
         return ret.json()['data'][0]['embedding']
 
-class CustomFastembedEmbeddings(Embeddings):
-    """Use Fastembed for embedding.
-    """
-    def __init__(self, url, model,max_length) -> None:
-        self.url = url
-        output = httpx.post(f"{url}/embeddings", json={"model": model, "max_length": max_length}, follow_redirects=True, timeout=None)
-        output.raise_for_status()
-
-
-    def embed_documents(self, texts: List[str]):
-        ret = httpx.post(f"{self.url}/embeddings/document", json={"document": texts}, timeout=None)
-        ret.raise_for_status()
-        return ret.json()
-
-    def embed_query(self, text: str) -> List[float]:
-        ret = httpx.post(f"{self.url}/embeddings/prompt", json={"prompt": text}, timeout=None)
-        ret.raise_for_status()
-        return ret.json()
-
diff --git a/core/cat/factory/embedder.py b/core/cat/factory/embedder.py
@@ -1,8 +1,8 @@
 from typing import Type
 import langchain
 from pydantic import BaseModel, ConfigDict
-
-from cat.factory.custom_embedder import CustomFastembedEmbeddings, DumbEmbedder, CustomOpenAIEmbeddings
+from langchain.embeddings.fastembed import FastEmbedEmbeddings
+from cat.factory.custom_embedder import DumbEmbedder, CustomOpenAIEmbeddings
 
 
 # Base class to manage LLM configuration.
@@ -34,6 +34,7 @@ class EmbedderFakeConfig(EmbedderSettings):
         json_schema_extra = {
             "humanReadableName": "Default Embedder",
             "description": "Configuration for default embedder. It just outputs random numbers.",
+            "link": "",
         }
     )
 
@@ -46,6 +47,7 @@ class EmbedderDumbConfig(EmbedderSettings):
         json_schema_extra = {
             "humanReadableName": "Dumb Embedder",
             "description": "Configuration for default embedder. It encodes the pairs of characters",
+            "link": "",
         }
     )
 
@@ -58,6 +60,7 @@ class EmbedderLlamaCppConfig(EmbedderSettings):
         json_schema_extra = {
             "humanReadableName": "Self-hosted llama-cpp-python embedder",
             "description": "Self-hosted llama-cpp-python embedder",
+            "link": "",
         }
     )
 
@@ -71,6 +74,7 @@ class EmbedderOpenAIConfig(EmbedderSettings):
         json_schema_extra = {
             "humanReadableName": "OpenAI Embedder",
             "description": "Configuration for OpenAI embeddings",
+            "link": "https://platform.openai.com/docs/models/overview",
         }
     )
 
@@ -90,6 +94,7 @@ class EmbedderAzureOpenAIConfig(EmbedderSettings):
         json_schema_extra = {
             "humanReadableName": "Azure OpenAI Embedder",
             "description": "Configuration for Azure OpenAI embeddings",
+            "link": "https://azure.microsoft.com/en-us/products/ai-services/openai-service",
         }
     )
 
@@ -103,23 +108,26 @@ class EmbedderCohereConfig(EmbedderSettings):
         json_schema_extra = {
             "humanReadableName": "Cohere Embedder",
             "description": "Configuration for Cohere embeddings",
+            "link": "https://docs.cohere.com/docs/models",
         }
     )
 
 
-class EmbedderFastEmbedConfig(EmbedderSettings):
-    url: str
-    model: str = "intfloat/multilingual-e5-large"
-    max_length: int = 512
-
-    _pyclass: Type = CustomFastembedEmbeddings
+class EmbedderQdrantFastEmbedConfig(EmbedderSettings):
+    model_name: str = "BAAI/bge-base-en"
+    max_length: int = 512 # Unknown behavior for values > 512.
+    doc_embed_type: str = "passage" # as suggest on fastembed documentation, "passage" is the best option for documents.
+    
+    _pyclass: Type = FastEmbedEmbeddings
 
     model_config = ConfigDict(
         json_schema_extra = {
-            "humanReadableName": "Fast Embedder",
-            "description": "Configuration for Fast embeddings",
+            "humanReadableName": "Qdrant FastEmbed (Local)",
+            "description": "Configuration for Qdrant FastEmbed",
+            "link": "https://qdrant.github.io/fastembed/",
         }
     )
+
 
 
 SUPPORTED_EMDEDDING_MODELS = [
@@ -129,7 +137,7 @@ class EmbedderFastEmbedConfig(EmbedderSettings):
     EmbedderOpenAIConfig,
     EmbedderAzureOpenAIConfig,
     EmbedderCohereConfig,
-    EmbedderFastEmbedConfig
+    EmbedderQdrantFastEmbedConfig
 ]
 
 

diff --git a/core/cat/factory/llm.py b/core/cat/factory/llm.py
@@ -39,9 +39,8 @@ class LLMDefaultConfig(LLMSettings):
             "humanReadableName": "Default Language Model",
             "description":
                 "A dumb LLM just telling that the Cat is not configured. "
-                "There will be a nice LLM here "
-                "once consumer hardware allows it.",
-            "link": ""
+                "There will be a nice LLM here once consumer hardware allows it.",
+            "link": "",
         }
     )
 
@@ -68,10 +67,8 @@ def get_llm_from_config(cls, config):
     model_config = ConfigDict(
         json_schema_extra={
             "humanReadableName": "Custom LLM",
-            "description":
-                "LLM on a custom endpoint. "
-                "See docs for examples.",
-            "link": "https://cheshirecat.ai/2023/08/19/custom-large-language-model/"
+            "description": "LLM on a custom endpoint. See docs for examples.",
+            "link": "https://cheshirecat.ai/2023/08/19/custom-large-language-model/",
         }
     )
 
@@ -90,6 +87,7 @@ class LLMLlamaCppConfig(LLMSettings):
         json_schema_extra={
             "humanReadableName": "Self-hosted llama-cpp-python",
             "description": "Self-hosted llama-cpp-python compatible LLM",
+            "link": "",
         }
     )
 
@@ -105,7 +103,7 @@ class LLMOpenAIChatConfig(LLMSettings):
         json_schema_extra={
             "humanReadableName": "OpenAI ChatGPT",
             "description": "Chat model from OpenAI",
-            "link": "https://platform.openai.com/docs/models/overview"
+            "link": "https://platform.openai.com/docs/models/overview",
         }
     )
 
@@ -120,10 +118,8 @@ class LLMOpenAIConfig(LLMSettings):
     model_config = ConfigDict(
         json_schema_extra={
             "humanReadableName": "OpenAI GPT-3",
-            "description":
-                "OpenAI GPT-3. More expensive but "
-                "also more flexible than ChatGPT.",
-            "link": "https://platform.openai.com/docs/models/overview"
+            "description": "OpenAI GPT-3. More expensive but also more flexible than ChatGPT.",
+            "link": "https://platform.openai.com/docs/models/overview",
         }
     )
 
@@ -145,7 +141,7 @@ class LLMAzureChatOpenAIConfig(LLMSettings):
         json_schema_extra={
             "humanReadableName": "Azure OpenAI Chat Models",
             "description": "Chat model from Azure OpenAI",
-            "link": "https://azure.microsoft.com/en-us/products/ai-services/openai-service"
+            "link": "https://azure.microsoft.com/en-us/products/ai-services/openai-service",
         }
     )
 
@@ -168,7 +164,7 @@ class LLMAzureOpenAIConfig(LLMSettings):
         json_schema_extra={
             "humanReadableName": "Azure OpenAI Completion models",
             "description": "Configuration for Cognitive Services Azure OpenAI",
-            "link": "https://azure.microsoft.com/en-us/products/ai-services/openai-service"
+            "link": "https://azure.microsoft.com/en-us/products/ai-services/openai-service",
         }
     )
 
@@ -183,7 +179,7 @@ class LLMCohereConfig(LLMSettings):
         json_schema_extra={
             "humanReadableName": "Cohere",
             "description": "Configuration for Cohere language model",
-            "link": "https://docs.cohere.com/docs/models"
+            "link": "https://docs.cohere.com/docs/models",
         }
     )
 
@@ -203,7 +199,7 @@ class LLMHuggingFaceTextGenInferenceConfig(LLMSettings):
         json_schema_extra={
             "humanReadableName": "HuggingFace TextGen Inference",
             "description": "Configuration for HuggingFace TextGen Inference",
-            "link": "https://huggingface.co/text-generation-inference"
+            "link": "https://huggingface.co/text-generation-inference",
         }
     )
 
@@ -218,9 +214,8 @@ class LLMHuggingFaceEndpointConfig(LLMSettings):
     model_config = ConfigDict(
         json_schema_extra={
             "humanReadableName": "HuggingFace Endpoint",
-            "description":
-                "Configuration for HuggingFace Endpoint language models",
-            "link": "https://huggingface.co/inference-endpoints"
+            "description": "Configuration for HuggingFace Endpoint language models",
+            "link": "https://huggingface.co/inference-endpoints",
         }
     )
 
@@ -239,7 +234,7 @@ class LLMOllamaConfig(LLMSettings):
         json_schema_extra={
             "humanReadableName": "Ollama",
             "description": "Configuration for Ollama",
-            "link": "https://ollama.ai/library"
+            "link": "https://ollama.ai/library",
         }
     )
 

diff --git a/core/cat/headers.py b/core/cat/headers.py
@@ -5,6 +5,8 @@
 from fastapi import Security, HTTPException
 from fastapi.security.api_key import APIKeyHeader
 
+from cat.looking_glass.stray_cat import StrayCat
+
 API_KEY = [
     key.strip() for key in os.getenv("API_KEY", "").split("|") if key.strip()
 ]
@@ -53,9 +55,13 @@ def check_api_key(request: Request, api_key: str = Security(api_key_header)) ->
         )
 
 
-def check_user_id(request: Request) -> str:
+# get or create session (StrayCat)
+def session(request: Request) -> str:
+
+    strays = request.app.state.strays
     user_id = request.headers.get("user_id")
-    if user_id:
-        return user_id
-    else:
-        return "user"
+    event_loop = request.app.state.event_loop
+
+    if user_id not in strays.keys():
+        strays[user_id] = StrayCat(user_id=user_id, event_loop=event_loop)
+    return strays[user_id]