diff --git a/SimplerLLM/language/llm.py b/SimplerLLM/language/llm.py index e1a9de1..1ff7aa9 100644 --- a/SimplerLLM/language/llm.py +++ b/SimplerLLM/language/llm.py @@ -1,7 +1,8 @@ -import SimplerLLM.language.llm_providers.openai_llm as openai_llm -import SimplerLLM.language.llm_providers.gemini_llm as gemini_llm -import SimplerLLM.language.llm_providers.anthropic_llm as anthropic_llm -from SimplerLLM.prompts.messages_template import MessagesTemplate +import os +from dotenv import load_dotenv +from SimplerLLM.tools.vector_db import VectorDB +from SimplerLLM.language.llm_providers.openai_llm import generate_response as openai_generate_response +from SimplerLLM.language.llm_providers.openai_llm import generate_response_async as openai_generate_response_async from enum import Enum @@ -23,6 +24,8 @@ def __init__( self.model_name = model_name self.temperature = temperature self.top_p = top_p + self.vector_db = VectorDB() + @staticmethod def create( @@ -52,6 +55,11 @@ def prepare_params(self, model_name, temperature, top_p): "temperature": temperature if temperature else self.temperature, "top_p": top_p if top_p else self.top_p, } + def store_response_as_vector(self, texts): + self.vector_db.store_vectors(texts) + + def find_similar_responses(self, text): + return self.vector_db.query_similar(text) class OpenAILLM(LLM): diff --git a/SimplerLLM/tools/vector_db.py b/SimplerLLM/tools/vector_db.py new file mode 100644 index 0000000..002323c --- /dev/null +++ b/SimplerLLM/tools/vector_db.py @@ -0,0 +1,27 @@ +import os +import chromadb +from chromadb.utils import embedding_functions + +class VectorDB: + def __init__(self): + persistence_directory = "./chroma_db" + self.client = chromadb.PersistentClient(path=persistence_directory) + self.embedding_function = embedding_functions.SentenceTransformerEmbeddingFunction(model_name="all-MiniLM-L6-v2") + self.collection = self.client.get_or_create_collection( + name="responses", + embedding_function=self.embedding_function + ) + + def store_vectors(self, texts): + self.collection.add(documents=texts, ids=[f"id_{i}" for i in range(len(texts))]) + + def query_vectors(self, query_text): + results = self.collection.query(query_texts=[query_text], n_results=5) + return results['documents'][0] + + def store_response(self, text): + self.collection.add(documents=[text], ids=[f"id_{self.collection.count()}"]) + + def query_similar(self, query_text): + return self.query_vectors(query_text) + diff --git a/chroma_db/104d460b-aaa5-4746-969c-b131149e52a7/data_level0.bin b/chroma_db/104d460b-aaa5-4746-969c-b131149e52a7/data_level0.bin new file mode 100644 index 0000000..ea3192e Binary files /dev/null and b/chroma_db/104d460b-aaa5-4746-969c-b131149e52a7/data_level0.bin differ diff --git a/chroma_db/104d460b-aaa5-4746-969c-b131149e52a7/header.bin b/chroma_db/104d460b-aaa5-4746-969c-b131149e52a7/header.bin new file mode 100644 index 0000000..3e0932a Binary files /dev/null and b/chroma_db/104d460b-aaa5-4746-969c-b131149e52a7/header.bin differ diff --git a/chroma_db/104d460b-aaa5-4746-969c-b131149e52a7/length.bin b/chroma_db/104d460b-aaa5-4746-969c-b131149e52a7/length.bin new file mode 100644 index 0000000..ba341b3 Binary files /dev/null and b/chroma_db/104d460b-aaa5-4746-969c-b131149e52a7/length.bin differ diff --git a/chroma_db/104d460b-aaa5-4746-969c-b131149e52a7/link_lists.bin b/chroma_db/104d460b-aaa5-4746-969c-b131149e52a7/link_lists.bin new file mode 100644 index 0000000..e69de29 diff --git a/chroma_db/7f8bd9ff-1cf4-4944-81ab-e7c257a0268c/data_level0.bin b/chroma_db/7f8bd9ff-1cf4-4944-81ab-e7c257a0268c/data_level0.bin new file mode 100644 index 0000000..ae2f009 Binary files /dev/null and b/chroma_db/7f8bd9ff-1cf4-4944-81ab-e7c257a0268c/data_level0.bin differ diff --git a/chroma_db/7f8bd9ff-1cf4-4944-81ab-e7c257a0268c/header.bin b/chroma_db/7f8bd9ff-1cf4-4944-81ab-e7c257a0268c/header.bin new file mode 100644 index 0000000..d2383e3 Binary files /dev/null and b/chroma_db/7f8bd9ff-1cf4-4944-81ab-e7c257a0268c/header.bin differ diff --git a/chroma_db/7f8bd9ff-1cf4-4944-81ab-e7c257a0268c/length.bin b/chroma_db/7f8bd9ff-1cf4-4944-81ab-e7c257a0268c/length.bin new file mode 100644 index 0000000..ba34898 Binary files /dev/null and b/chroma_db/7f8bd9ff-1cf4-4944-81ab-e7c257a0268c/length.bin differ diff --git a/chroma_db/7f8bd9ff-1cf4-4944-81ab-e7c257a0268c/link_lists.bin b/chroma_db/7f8bd9ff-1cf4-4944-81ab-e7c257a0268c/link_lists.bin new file mode 100644 index 0000000..e69de29 diff --git a/chroma_db/chroma.sqlite3 b/chroma_db/chroma.sqlite3 new file mode 100644 index 0000000..8322145 Binary files /dev/null and b/chroma_db/chroma.sqlite3 differ diff --git a/new.py b/new.py new file mode 100644 index 0000000..373a26f --- /dev/null +++ b/new.py @@ -0,0 +1,67 @@ +from SimplerLLM.language.llm import LLM, LLMProvider +from dotenv import load_dotenv +import os +import time + +load_dotenv() + +def test_vector_storage_and_retrieval(): + llm = LLM(provider=LLMProvider.OPENAI, model_name="gpt-3.5-turbo") + + prompts = [ + "What is artificial intelligence and how does it differ from human intelligence?", + "Explain the process of machine learning and its key components.", + "Describe the architecture of deep neural networks and their layers.", + "What are the applications of natural language processing in everyday technology?", + "How does computer vision work and what are its real-world applications?", + "Explain the concept of reinforcement learning and its use in robotics.", + "What are the ethical concerns surrounding AI development and deployment?", + "How does transfer learning accelerate AI model development?", + "Describe the differences between supervised, unsupervised, and semi-supervised learning.", + "What is the role of big data in advancing AI capabilities?", + "Explain the concept of explainable AI and why it's important.", + "How do genetic algorithms work in optimization problems?", + "What are the challenges in developing artificial general intelligence (AGI)?", + "Describe the impact of AI on healthcare diagnostics and treatment.", + "How does AI contribute to autonomous vehicle technology?" + ] + + print("Storing responses as vectors...") + start_time = time.time() + try: + llm.store_response_as_vector(prompts) + except Exception as e: + print("Error occurred:", e) + end_time = time.time() + print(f"Responses stored successfully. Time taken: {end_time - start_time:.2f} seconds") + + query_prompts = [ + "What are the fundamental principles of AI?", + "How do machines learn from data?", + "Explain the inner workings of neural networks.", + "What are some practical applications of NLP?", + "How is AI changing the automotive industry?", + "What are the moral implications of using AI in decision-making?", + "How is AI transforming the healthcare sector?", + "What are the key differences between AI learning paradigms?", + "How does AI handle complex optimization problems?", + "What are the challenges in making AI systems more transparent?" + ] + + print("\nQuerying for similar responses:") + for query_prompt in query_prompts: + print(f"\nQuery: {query_prompt}") + start_time = time.time() + similar_responses = llm.find_similar_responses(query_prompt) + end_time = time.time() + print(f"Time taken: {end_time - start_time:.2f} seconds") + print("Similar responses:") + for i, response in enumerate(similar_responses, 1): + print(f"{i}. {response}") + +def main(): + print("Starting vector storage and retrieval test...") + test_vector_storage_and_retrieval() + +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/requirements.txt b/requirements.txt index 293f55f..971bf43 100644 --- a/requirements.txt +++ b/requirements.txt @@ -11,4 +11,5 @@ python_docx==1.1.0 pytube==15.0.0 Requests==2.31.0 youtube_transcript_api==0.6.2 - +sentence-transformers==3.0.1 +chromadb==0.5.3