hassancs91 · SaiNivedh26 · Jun 28, 2024 · Jun 28, 2024 · Jun 28, 2024 · Jun 28, 2024
diff --git a/SimplerLLM/language/llm.py b/SimplerLLM/language/llm.py
@@ -1,7 +1,8 @@
-import SimplerLLM.language.llm_providers.openai_llm as openai_llm
-import SimplerLLM.language.llm_providers.gemini_llm as gemini_llm
-import SimplerLLM.language.llm_providers.anthropic_llm as anthropic_llm
-from SimplerLLM.prompts.messages_template import MessagesTemplate
+import os
+from dotenv import load_dotenv
+from SimplerLLM.tools.vector_db import VectorDB
+from SimplerLLM.language.llm_providers.openai_llm import generate_response as openai_generate_response
+from SimplerLLM.language.llm_providers.openai_llm import generate_response_async as openai_generate_response_async
 from enum import Enum
 
 
@@ -23,6 +24,8 @@ def __init__(
         self.model_name = model_name
         self.temperature = temperature
         self.top_p = top_p
+        self.vector_db = VectorDB()
+
-        self.vector_db = VectorDB()
+        def __init__(self, vector_db=None):
+            self.vector_db = vector_db if vector_db else VectorDB()
-        self.vector_db = VectorDB()
+        def __init__(self, vector_db=None):
+            self.vector_db = vector_db if vector_db else VectorDB()
 
     @staticmethod
     def create(
@@ -52,6 +55,11 @@ def prepare_params(self, model_name, temperature, top_p):
             "temperature": temperature if temperature else self.temperature,
             "top_p": top_p if top_p else self.top_p,
         }
+    def store_response_as_vector(self, texts):
+        self.vector_db.store_vectors(texts)
+
+    def find_similar_responses(self, text):
+        return self.vector_db.query_similar(text)
 
 
 class OpenAILLM(LLM):

diff --git a/SimplerLLM/tools/vector_db.py b/SimplerLLM/tools/vector_db.py
@@ -0,0 +1,27 @@
+import os
+import chromadb
+from chromadb.utils import embedding_functions
+
+class VectorDB:
+    def __init__(self):
+        persistence_directory = "./chroma_db"
+        self.client = chromadb.PersistentClient(path=persistence_directory)
+        self.embedding_function = embedding_functions.SentenceTransformerEmbeddingFunction(model_name="all-MiniLM-L6-v2")
+        self.collection = self.client.get_or_create_collection(
+            name="responses",
+            embedding_function=self.embedding_function
+        )
+
+    def store_vectors(self, texts):
+        self.collection.add(documents=texts, ids=[f"id_{i}" for i in range(len(texts))])
+
+    def query_vectors(self, query_text):
+        results = self.collection.query(query_texts=[query_text], n_results=5)
+        return results['documents'][0]
+
+    def store_response(self, text):
+        self.collection.add(documents=[text], ids=[f"id_{self.collection.count()}"])
-    def store_response(self, text):
-        self.collection.add(documents=[text], ids=[f"id_{self.collection.count()}"])
+    def store_response(self, text):
+        import uuid
+        self.collection.add(documents=[text], ids=[str(uuid.uuid4())])
-    def store_response(self, text):
-        self.collection.add(documents=[text], ids=[f"id_{self.collection.count()}"])
+    def store_response(self, text):
+        import uuid
+        self.collection.add(documents=[text], ids=[str(uuid.uuid4())])
+
+    def query_similar(self, query_text):
+        return self.query_vectors(query_text)
+
diff --git a/chroma_db/104d460b-aaa5-4746-969c-b131149e52a7/data_level0.bin b/chroma_db/104d460b-aaa5-4746-969c-b131149e52a7/data_level0.bin
diff --git a/chroma_db/104d460b-aaa5-4746-969c-b131149e52a7/header.bin b/chroma_db/104d460b-aaa5-4746-969c-b131149e52a7/header.bin
diff --git a/chroma_db/104d460b-aaa5-4746-969c-b131149e52a7/length.bin b/chroma_db/104d460b-aaa5-4746-969c-b131149e52a7/length.bin
diff --git a/chroma_db/104d460b-aaa5-4746-969c-b131149e52a7/link_lists.bin b/chroma_db/104d460b-aaa5-4746-969c-b131149e52a7/link_lists.bin
diff --git a/chroma_db/7f8bd9ff-1cf4-4944-81ab-e7c257a0268c/data_level0.bin b/chroma_db/7f8bd9ff-1cf4-4944-81ab-e7c257a0268c/data_level0.bin
diff --git a/chroma_db/7f8bd9ff-1cf4-4944-81ab-e7c257a0268c/header.bin b/chroma_db/7f8bd9ff-1cf4-4944-81ab-e7c257a0268c/header.bin
diff --git a/chroma_db/7f8bd9ff-1cf4-4944-81ab-e7c257a0268c/length.bin b/chroma_db/7f8bd9ff-1cf4-4944-81ab-e7c257a0268c/length.bin
diff --git a/chroma_db/7f8bd9ff-1cf4-4944-81ab-e7c257a0268c/link_lists.bin b/chroma_db/7f8bd9ff-1cf4-4944-81ab-e7c257a0268c/link_lists.bin
diff --git a/chroma_db/chroma.sqlite3 b/chroma_db/chroma.sqlite3
diff --git a/new.py b/new.py
@@ -0,0 +1,67 @@
+from SimplerLLM.language.llm import LLM, LLMProvider
+from dotenv import load_dotenv
+import os
+import time
+
+load_dotenv()
+
+def test_vector_storage_and_retrieval():
+    llm = LLM(provider=LLMProvider.OPENAI, model_name="gpt-3.5-turbo")
+
+    prompts = [
+        "What is artificial intelligence and how does it differ from human intelligence?",
+        "Explain the process of machine learning and its key components.",
+        "Describe the architecture of deep neural networks and their layers.",
+        "What are the applications of natural language processing in everyday technology?",
+        "How does computer vision work and what are its real-world applications?",
+        "Explain the concept of reinforcement learning and its use in robotics.",
+        "What are the ethical concerns surrounding AI development and deployment?",
+        "How does transfer learning accelerate AI model development?",
+        "Describe the differences between supervised, unsupervised, and semi-supervised learning.",
+        "What is the role of big data in advancing AI capabilities?",
+        "Explain the concept of explainable AI and why it's important.",
+        "How do genetic algorithms work in optimization problems?",
+        "What are the challenges in developing artificial general intelligence (AGI)?",
+        "Describe the impact of AI on healthcare diagnostics and treatment.",
+        "How does AI contribute to autonomous vehicle technology?"
+    ]
+
+    print("Storing responses as vectors...")
+    start_time = time.time()
+    try:
+        llm.store_response_as_vector(prompts)
+    except Exception as e:
+        print("Error occurred:", e)
+    end_time = time.time()
+    print(f"Responses stored successfully. Time taken: {end_time - start_time:.2f} seconds")
+
+    query_prompts = [
+        "What are the fundamental principles of AI?",
+        "How do machines learn from data?",
+        "Explain the inner workings of neural networks.",
+        "What are some practical applications of NLP?",
+        "How is AI changing the automotive industry?",
+        "What are the moral implications of using AI in decision-making?",
+        "How is AI transforming the healthcare sector?",
+        "What are the key differences between AI learning paradigms?",
+        "How does AI handle complex optimization problems?",
+        "What are the challenges in making AI systems more transparent?"
+    ]
+
+    print("\nQuerying for similar responses:")
+    for query_prompt in query_prompts:
+        print(f"\nQuery: {query_prompt}")
+        start_time = time.time()
+        similar_responses = llm.find_similar_responses(query_prompt)
+        end_time = time.time()
+        print(f"Time taken: {end_time - start_time:.2f} seconds")
+        print("Similar responses:")
+        for i, response in enumerate(similar_responses, 1):
+            print(f"{i}. {response}")
+
-    print("\nQuerying for similar responses:")
-    for query_prompt in query_prompts:
-        print(f"\nQuery: {query_prompt}")
-        start_time = time.time()
-        similar_responses = llm.find_similar_responses(query_prompt)
-        end_time = time.time()
-        print(f"Time taken: {end_time - start_time:.2f} seconds")
-        print("Similar responses:")
-        for i, response in enumerate(similar_responses, 1):
-            print(f"{i}. {response}")
+    print("\nQuerying for similar responses:")
+    for query_prompt in query_prompts:
+        print(f"\nQuery: {query_prompt}")
+        start_time = time.time()
+        try:
+            similar_responses = llm.find_similar_responses(query_prompt)
+        except Exception as e:
+            print("Error occurred:", e)
+            continue
+        end_time = time.time()
+        print(f"Time taken: {end_time - start_time:.2f} seconds")
+        print("Similar responses:")
+        for i, response in enumerate(similar_responses, 1):
+            print(f"{i}. {response}")
-    print("\nQuerying for similar responses:")
-    for query_prompt in query_prompts:
-        print(f"\nQuery: {query_prompt}")
-        start_time = time.time()
-        similar_responses = llm.find_similar_responses(query_prompt)
-        end_time = time.time()
-        print(f"Time taken: {end_time - start_time:.2f} seconds")
-        print("Similar responses:")
-        for i, response in enumerate(similar_responses, 1):
-            print(f"{i}. {response}")
+    print("\nQuerying for similar responses:")
+    for query_prompt in query_prompts:
+        print(f"\nQuery: {query_prompt}")
+        start_time = time.time()
+        try:
+            similar_responses = llm.find_similar_responses(query_prompt)
+        except Exception as e:
+            print("Error occurred:", e)
+            continue
+        end_time = time.time()
+        print(f"Time taken: {end_time - start_time:.2f} seconds")
+        print("Similar responses:")
+        for i, response in enumerate(similar_responses, 1):
+            print(f"{i}. {response}")
+def main():
+    print("Starting vector storage and retrieval test...")
+    test_vector_storage_and_retrieval()
+
+if __name__ == "__main__":
+    main()
diff --git a/requirements.txt b/requirements.txt
@@ -11,4 +11,5 @@ python_docx==1.1.0
 pytube==15.0.0
 Requests==2.31.0
 youtube_transcript_api==0.6.2
-
+sentence-transformers==3.0.1
+chromadb==0.5.3