From 08417b86d81ae7fdb42cb03d1eb5a79cfbf46c1c Mon Sep 17 00:00:00 2001 From: Piero Savastano Date: Fri, 15 Sep 2023 19:15:01 +0200 Subject: [PATCH 01/10] piepeable hooks via tea_cup --- core/cat/looking_glass/cheshire_cat.py | 151 +++++++++++++++- .../mad_hatter/core_plugin/hooks/models.py | 161 ------------------ core/cat/mad_hatter/mad_hatter.py | 38 ++++- 3 files changed, 181 insertions(+), 169 deletions(-) delete mode 100644 core/cat/mad_hatter/core_plugin/hooks/models.py diff --git a/core/cat/looking_glass/cheshire_cat.py b/core/cat/looking_glass/cheshire_cat.py index accc8599..a50437d8 100644 --- a/core/cat/looking_glass/cheshire_cat.py +++ b/core/cat/looking_glass/cheshire_cat.py @@ -12,6 +12,18 @@ from cat.memory.long_term_memory import LongTermMemory from cat.looking_glass.agent_manager import AgentManager +# TODO: natural language dependencies; move to another file +import cat.factory.llm as llms +import cat.factory.embedder as embedders +from cat.db import crud +from langchain.llms import Cohere, OpenAI, OpenAIChat, AzureOpenAI, HuggingFaceTextGenInference +from langchain.chat_models import ChatOpenAI +from langchain.base_language import BaseLanguageModel +from langchain import HuggingFaceHub +from langchain.chat_models import AzureChatOpenAI +from cat.factory.custom_llm import CustomOpenAI + + MSG_TYPES = Literal["notification", "chat", "error"] # main class @@ -75,13 +87,144 @@ def load_natural_language(self): See Also -------- - get_language_model - get_language_embedder agent_prompt_prefix """ # LLM and embedder - self._llm = self.mad_hatter.execute_hook("get_language_model") - self.embedder = self.mad_hatter.execute_hook("get_language_embedder") + self._llm = self.get_language_model() + self.embedder = self.get_language_embedder() + + def get_language_model(self) -> BaseLanguageModel: + """Large Language Model (LLM) selection at bootstrap time. + + Returns + ------- + llm : BaseLanguageModel + Langchain `BaseLanguageModel` instance of the selected model. + + Notes + ----- + Bootstrapping is the process of loading the plugins, the natural language objects (e.g. the LLM), the memories, + the *Agent Manager* and the *Rabbit Hole*. + + """ + selected_llm = crud.get_setting_by_name(name="llm_selected") + + if selected_llm is None: + # return default LLM + llm = llms.LLMDefaultConfig.get_llm_from_config({}) + + else: + # get LLM factory class + selected_llm_class = selected_llm["value"]["name"] + FactoryClass = getattr(llms, selected_llm_class) + + # obtain configuration and instantiate LLM + selected_llm_config = crud.get_setting_by_name(name=selected_llm_class) + try: + llm = FactoryClass.get_llm_from_config(selected_llm_config["value"]) + except Exception as e: + import traceback + traceback.print_exc() + llm = llms.LLMDefaultConfig.get_llm_from_config({}) + + return llm + + + def get_language_embedder(self) -> embedders.EmbedderSettings: + """Hook into the embedder selection. + + Allows to modify how the Cat selects the embedder at bootstrap time. + + Bootstrapping is the process of loading the plugins, the natural language objects (e.g. the LLM), + the memories, the *Agent Manager* and the *Rabbit Hole*. + + Parameters + ---------- + cat: CheshireCat + Cheshire Cat instance. + + Returns + ------- + embedder : Embeddings + Selected embedder model. + """ + # Embedding LLM + + selected_embedder = crud.get_setting_by_name(name="embedder_selected") + + if selected_embedder is not None: + # get Embedder factory class + selected_embedder_class = selected_embedder["value"]["name"] + FactoryClass = getattr(embedders, selected_embedder_class) + + # obtain configuration and instantiate Embedder + selected_embedder_config = crud.get_setting_by_name(name=selected_embedder_class) + embedder = FactoryClass.get_embedder_from_config(selected_embedder_config["value"]) + + return embedder + + # OpenAI embedder + if type(self._llm) in [OpenAI, OpenAIChat, ChatOpenAI]: + embedder = embedders.EmbedderOpenAIConfig.get_embedder_from_config( + { + "openai_api_key": self._llm.openai_api_key, + } + ) + + # Azure + elif type(self._llm) in [AzureOpenAI, AzureChatOpenAI]: + embedder = embedders.EmbedderAzureOpenAIConfig.get_embedder_from_config( + { + "openai_api_key": self._llm.openai_api_key, + "openai_api_type": "azure", + "model": "text-embedding-ada-002", + # Now the only model for embeddings is text-embedding-ada-002 + # It is also possible to use the Azure "deployment" name that is user defined + # when the model is deployed to Azure. + # "deployment": "my-text-embedding-ada-002", + "openai_api_base": self._llm.openai_api_base, + # https://learn.microsoft.com/en-us/azure/cognitive-services/openai/reference#embeddings + # current supported versions 2022-12-01,2023-03-15-preview, 2023-05-15 + # Don't mix api versions https://github.com/hwchase17/langchain/issues/4775 + "openai_api_version": "2023-05-15", + } + ) + + # Cohere + elif type(self._llm) in [Cohere]: + embedder = embedders.EmbedderCohereConfig.get_embedder_from_config( + { + "cohere_api_key": self._llm.cohere_api_key, + "model": "embed-multilingual-v2.0", + # Now the best model for embeddings is embed-multilingual-v2.0 + } + ) + + # HuggingFace + elif type(self._llm) in [HuggingFaceHub]: + embedder = embedders.EmbedderHuggingFaceHubConfig.get_embedder_from_config( + { + "huggingfacehub_api_token": self._llm.huggingfacehub_api_token, + "repo_id": "sentence-transformers/all-mpnet-base-v2", + } + ) + + # Llama-cpp-python + elif type(self._llm) in [CustomOpenAI]: + embedder = embedders.EmbedderLlamaCppConfig.get_embedder_from_config( + { + "url": self._llm.url + } + ) + + else: + # If no embedder matches vendor, and no external embedder is configured, we use the DumbEmbedder. + # `This embedder is not a model properly trained + # and this makes it not suitable to effectively embed text, + # "but it does not know this and embeds anyway".` - cit. Nicola Corbellini + embedder = embedders.EmbedderDumbConfig.get_embedder_from_config({}) + + return embedder def load_memory(self): """Load LongTerMemory and WorkingMemory.""" diff --git a/core/cat/mad_hatter/core_plugin/hooks/models.py b/core/cat/mad_hatter/core_plugin/hooks/models.py deleted file mode 100644 index a41a4e4c..00000000 --- a/core/cat/mad_hatter/core_plugin/hooks/models.py +++ /dev/null @@ -1,161 +0,0 @@ -"""Hooks to modify the Cat's language and embedding models. - -Here is a collection of methods to hook into the settings of the Large Language Model and the Embedder. - -""" - -import os - -import cat.factory.llm as llms -import cat.factory.embedder as embedders -from cat.db import crud -from langchain.llms import Cohere, OpenAI, OpenAIChat, AzureOpenAI, HuggingFaceTextGenInference -from langchain.chat_models import ChatOpenAI -from langchain.base_language import BaseLanguageModel -from langchain import HuggingFaceHub -from langchain.chat_models import AzureChatOpenAI -from cat.mad_hatter.decorators import hook -from cat.factory.custom_llm import CustomOpenAI - - -@hook(priority=0) -def get_language_model(cat) -> BaseLanguageModel: - """Hook into the Large Language Model (LLM) selection. - - Allows to modify how the Cat selects the LLM at bootstrap time. - - Parameters - ---------- - cat: CheshireCat - Cheshire Cat instance. - - Returns - ------- - lll : BaseLanguageModel - Langchain `BaseLanguageModel` instance of the selected model. - - Notes - ----- - Bootstrapping is the process of loading the plugins, the natural language objects (e.g. the LLM), the memories, - the *Agent Manager* and the *Rabbit Hole*. - - """ - selected_llm = crud.get_setting_by_name(name="llm_selected") - - if selected_llm is None: - # return default LLM - llm = llms.LLMDefaultConfig.get_llm_from_config({}) - - else: - # get LLM factory class - selected_llm_class = selected_llm["value"]["name"] - FactoryClass = getattr(llms, selected_llm_class) - - # obtain configuration and instantiate LLM - selected_llm_config = crud.get_setting_by_name(name=selected_llm_class) - try: - llm = FactoryClass.get_llm_from_config(selected_llm_config["value"]) - except Exception as e: - import traceback - traceback.print_exc() - llm = llms.LLMDefaultConfig.get_llm_from_config({}) - - return llm - - -@hook(priority=0) -def get_language_embedder(cat) -> embedders.EmbedderSettings: - """Hook into the embedder selection. - - Allows to modify how the Cat selects the embedder at bootstrap time. - - Bootstrapping is the process of loading the plugins, the natural language objects (e.g. the LLM), - the memories, the *Agent Manager* and the *Rabbit Hole*. - - Parameters - ---------- - cat: CheshireCat - Cheshire Cat instance. - - Returns - ------- - embedder : Embeddings - Selected embedder model. - """ - # Embedding LLM - - selected_embedder = crud.get_setting_by_name(name="embedder_selected") - - if selected_embedder is not None: - # get Embedder factory class - selected_embedder_class = selected_embedder["value"]["name"] - FactoryClass = getattr(embedders, selected_embedder_class) - - # obtain configuration and instantiate Embedder - selected_embedder_config = crud.get_setting_by_name(name=selected_embedder_class) - embedder = FactoryClass.get_embedder_from_config(selected_embedder_config["value"]) - - return embedder - - # OpenAI embedder - if type(cat._llm) in [OpenAI, OpenAIChat, ChatOpenAI]: - embedder = embedders.EmbedderOpenAIConfig.get_embedder_from_config( - { - "openai_api_key": cat._llm.openai_api_key, - } - ) - - # Azure - elif type(cat._llm) in [AzureOpenAI, AzureChatOpenAI]: - embedder = embedders.EmbedderAzureOpenAIConfig.get_embedder_from_config( - { - "openai_api_key": cat._llm.openai_api_key, - "openai_api_type": "azure", - "model": "text-embedding-ada-002", - # Now the only model for embeddings is text-embedding-ada-002 - # It is also possible to use the Azure "deployment" name that is user defined - # when the model is deployed to Azure. - # "deployment": "my-text-embedding-ada-002", - "openai_api_base": cat._llm.openai_api_base, - # https://learn.microsoft.com/en-us/azure/cognitive-services/openai/reference#embeddings - # current supported versions 2022-12-01,2023-03-15-preview, 2023-05-15 - # Don't mix api versions https://github.com/hwchase17/langchain/issues/4775 - "openai_api_version": "2023-05-15", - } - ) - - # Cohere - elif type(cat._llm) in [Cohere]: - embedder = embedders.EmbedderCohereConfig.get_embedder_from_config( - { - "cohere_api_key": cat._llm.cohere_api_key, - "model": "embed-multilingual-v2.0", - # Now the best model for embeddings is embed-multilingual-v2.0 - } - ) - - # HuggingFace - elif type(cat._llm) in [HuggingFaceHub]: - embedder = embedders.EmbedderHuggingFaceHubConfig.get_embedder_from_config( - { - "huggingfacehub_api_token": cat._llm.huggingfacehub_api_token, - "repo_id": "sentence-transformers/all-mpnet-base-v2", - } - ) - - # Llama-cpp-python - elif type(cat._llm) in [CustomOpenAI]: - embedder = embedders.EmbedderLlamaCppConfig.get_embedder_from_config( - { - "url": cat._llm.url - } - ) - - else: - # If no embedder matches vendor, and no external embedder is configured, we use the DumbEmbedder. - # `This embedder is not a model properly trained - # and this makes it not suitable to effectively embed text, - # "but it does not know this and embeds anyway".` - cit. Nicola Corbellini - embedder = embedders.EmbedderDumbConfig.get_embedder_from_config({}) - - return embedder diff --git a/core/cat/mad_hatter/mad_hatter.py b/core/cat/mad_hatter/mad_hatter.py index f258a8a5..4d7cd2ca 100644 --- a/core/cat/mad_hatter/mad_hatter.py +++ b/core/cat/mad_hatter/mad_hatter.py @@ -3,6 +3,7 @@ import time import shutil import os +import traceback from cat.log import log from cat.db import crud @@ -246,12 +247,41 @@ def toggle_plugin(self, plugin_id): # execute requested hook def execute_hook(self, hook_name, *args): + log.critical(hook_name) + # check if hook is supported if hook_name not in self.hooks.keys(): raise Exception(f"Hook {hook_name} not present in any plugin") - # run hooks - for h in self.hooks[hook_name]: - return h.function(*args, cat=self.ccat) - # TODO: should be run as a pipe, not return immediately + # First argument is passed to `execute_hook` is the pipeable one. + # We call it `tea_cup` as every hook called will receive it as an input, + # can add sugar, milk, or whatever, and return it for the next hook + if len(args) == 0: + tea_cup = None + else: + tea_cup = args[0] + + # run hooks + for hook in self.hooks[hook_name]: + try: + # pass tea_cup to the hooks, along other args + + # hook has no input (aside cat) + if tea_cup is None: + hook.function(cat=self.ccat) + continue + + # hook has at least one argument, and it will be piped + tea_spoon = hook.function(tea_cup, *args[1:], cat=self.ccat) + if tea_spoon is None: + log.warning(f"Hook {hook.plugin_id}::{hook.name} returned None") + else: + tea_cup = tea_spoon + except Exception as e: + log.error(f"Error in plugin {hook.plugin_id}::{hook.name}") + log.error(e) + traceback.print_exc() + + # tea_cup has passed through all hooks. Return final output + return tea_cup \ No newline at end of file From 46221538198372f351f36ff3b43a7c9cc0aa9e18 Mon Sep 17 00:00:00 2001 From: Piero Savastano Date: Fri, 15 Sep 2023 19:27:06 +0200 Subject: [PATCH 02/10] refactor flow hooks --- core/cat/mad_hatter/core_plugin/hooks/flow.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/core/cat/mad_hatter/core_plugin/hooks/flow.py b/core/cat/mad_hatter/core_plugin/hooks/flow.py index cbedc7ee..fe704cbf 100644 --- a/core/cat/mad_hatter/core_plugin/hooks/flow.py +++ b/core/cat/mad_hatter/core_plugin/hooks/flow.py @@ -25,7 +25,7 @@ def before_cat_bootstrap(cat) -> None: cat : CheshireCat Cheshire Cat instance. """ - return None + pass # do nothing # Called after cat bootstrap @@ -46,7 +46,7 @@ def after_cat_bootstrap(cat) -> None: cat : CheshireCat Cheshire Cat instance. """ - return None + pass # do nothing # Called when a user message arrives. From 25108b01f954e6a0bbb441b6bcd339c95acbab4d Mon Sep 17 00:00:00 2001 From: Piero Savastano Date: Fri, 15 Sep 2023 20:00:18 +0200 Subject: [PATCH 03/10] refactor recall hooks --- core/cat/looking_glass/cheshire_cat.py | 12 ++++++------ core/cat/mad_hatter/core_plugin/hooks/flow.py | 8 +++----- core/cat/mad_hatter/mad_hatter.py | 5 ++--- 3 files changed, 11 insertions(+), 14 deletions(-) diff --git a/core/cat/looking_glass/cheshire_cat.py b/core/cat/looking_glass/cheshire_cat.py index a50437d8..b1627eaf 100644 --- a/core/cat/looking_glass/cheshire_cat.py +++ b/core/cat/looking_glass/cheshire_cat.py @@ -258,15 +258,15 @@ def recall_relevant_memories_to_working_memory(self): after_cat_recalls_memories """ user_id = self.working_memory.get_user_id() - user_message = self.working_memory["user_message_json"]["text"] + recall_query = self.working_memory["user_message_json"]["text"] # We may want to search in memory - memory_query_text = self.mad_hatter.execute_hook("cat_recall_query", user_message) - log.info(f'Recall query: "{memory_query_text}"') + recall_query = self.mad_hatter.execute_hook("cat_recall_query", recall_query) + log.info(f'Recall query: "{recall_query}"') # Embed recall query - memory_query_embedding = self.embedder.embed_query(memory_query_text) - self.working_memory["memory_query"] = memory_query_text + memory_query_embedding = self.embedder.embed_query(recall_query) + self.working_memory["recall_query"] = recall_query # hook to do something before recall begins self.mad_hatter.execute_hook("before_cat_recalls_memories") @@ -312,7 +312,7 @@ def recall_relevant_memories_to_working_memory(self): self.working_memory[memory_key] = memories # hook to modify/enrich retrieved memories - self.mad_hatter.execute_hook("after_cat_recalls_memories", memory_query_text) + self.mad_hatter.execute_hook("after_cat_recalls_memories") def llm(self, prompt: str) -> str: """Generate a response using the LLM model. diff --git a/core/cat/mad_hatter/core_plugin/hooks/flow.py b/core/cat/mad_hatter/core_plugin/hooks/flow.py index fe704cbf..af6afee7 100644 --- a/core/cat/mad_hatter/core_plugin/hooks/flow.py +++ b/core/cat/mad_hatter/core_plugin/hooks/flow.py @@ -201,21 +201,19 @@ def before_cat_recalls_procedural_memories(procedural_recall_config: dict, cat) # Called just before the cat recalls memories. @hook(priority=0) -def after_cat_recalls_memories(query: str, cat) -> None: +def after_cat_recalls_memories(cat) -> None: """Hook after semantic search in memories. - The hook is executed just after the Cat searches for the meaningful context in both memories + The hook is executed just after the Cat searches for the meaningful context in memories and stores it in the *Working Memory*. Parameters ---------- - query : str - Query used to retrieve memories. cat : CheshireCat Cheshire Cat instance. """ - return None + pass # do nothing # What is the input to recall memories? diff --git a/core/cat/mad_hatter/mad_hatter.py b/core/cat/mad_hatter/mad_hatter.py index 4d7cd2ca..cd57e65b 100644 --- a/core/cat/mad_hatter/mad_hatter.py +++ b/core/cat/mad_hatter/mad_hatter.py @@ -274,9 +274,8 @@ def execute_hook(self, hook_name, *args): # hook has at least one argument, and it will be piped tea_spoon = hook.function(tea_cup, *args[1:], cat=self.ccat) - if tea_spoon is None: - log.warning(f"Hook {hook.plugin_id}::{hook.name} returned None") - else: + log.info(f"Hook {hook.plugin_id}::{hook.name} returned {tea_spoon}") + if tea_spoon is not None: tea_cup = tea_spoon except Exception as e: log.error(f"Error in plugin {hook.plugin_id}::{hook.name}") From 1e8c18fb913a3792be7889f30dc5331eb2bdcedd Mon Sep 17 00:00:00 2001 From: Piero Savastano Date: Fri, 15 Sep 2023 20:15:33 +0200 Subject: [PATCH 04/10] refactor memory recall hooks, 2 --- core/cat/looking_glass/cheshire_cat.py | 10 ++- core/cat/mad_hatter/core_plugin/hooks/flow.py | 90 +++++++++---------- 2 files changed, 51 insertions(+), 49 deletions(-) diff --git a/core/cat/looking_glass/cheshire_cat.py b/core/cat/looking_glass/cheshire_cat.py index b1627eaf..0abec674 100644 --- a/core/cat/looking_glass/cheshire_cat.py +++ b/core/cat/looking_glass/cheshire_cat.py @@ -251,6 +251,7 @@ def recall_relevant_memories_to_working_memory(self): See Also -------- + cat_recall_query before_cat_recalls_memories before_cat_recalls_episodic_memories before_cat_recalls_declarative_memories @@ -265,29 +266,30 @@ def recall_relevant_memories_to_working_memory(self): log.info(f'Recall query: "{recall_query}"') # Embed recall query - memory_query_embedding = self.embedder.embed_query(recall_query) + recall_query_embedding = self.embedder.embed_query(recall_query) self.working_memory["recall_query"] = recall_query # hook to do something before recall begins self.mad_hatter.execute_hook("before_cat_recalls_memories") # Setting default recall configs for each memory + # TODO: can these data structrues become instances of a RecallSettings class? default_episodic_recall_config = { - "embedding": memory_query_embedding, + "embedding": recall_query_embedding, "k": 3, "threshold": 0.7, "metadata": {"source": user_id}, } default_declarative_recall_config = { - "embedding": memory_query_embedding, + "embedding": recall_query_embedding, "k": 3, "threshold": 0.7, "metadata": None, } default_procedural_recall_config = { - "embedding": memory_query_embedding, + "embedding": recall_query_embedding, "k": 3, "threshold": 0.7, "metadata": None, diff --git a/core/cat/mad_hatter/core_plugin/hooks/flow.py b/core/cat/mad_hatter/core_plugin/hooks/flow.py index af6afee7..bdaf6bc1 100644 --- a/core/cat/mad_hatter/core_plugin/hooks/flow.py +++ b/core/cat/mad_hatter/core_plugin/hooks/flow.py @@ -93,6 +93,50 @@ def before_cat_reads_message(user_message_json: dict, cat) -> dict: return user_message_json +# What is the input to recall memories? +# Here you can do HyDE embedding, condense recent conversation or condition recall query on something else important to your AI +@hook(priority=0) +def cat_recall_query(user_message: str, cat) -> str: + """Hook the semantic search query. + + This hook allows to edit the user's message used as a query for context retrieval from memories. + As a result, the retrieved context can be conditioned editing the user's message. + + Parameters + ---------- + user_message : str + String with the text received from the user. + cat : CheshireCat + Cheshire Cat instance to exploit the Cat's methods. + + Returns + ------- + Edited string to be used for context retrieval in memory. The returned string is further stored in the + Working Memory at `cat.working_memory["memory_query"]`. + + Notes + ----- + For example, this hook is a suitable to perform Hypothetical Document Embedding (HyDE). + HyDE [1]_ strategy exploits the user's message to generate a hypothetical answer. This is then used to recall + the relevant context from the memory. + An official plugin is available to test this technique. + + References + ---------- + [1] Gao, L., Ma, X., Lin, J., & Callan, J. (2022). Precise Zero-Shot Dense Retrieval without Relevance Labels. + arXiv preprint arXiv:2212.10496. + + """ + # example 1: HyDE embedding + # return cat.hypothetis_chain.run(user_message) + + # example 2: Condense recent conversation + # TODO + + # here we just return the latest user message as is + return user_message + + # Called just before the cat recalls memories. @hook(priority=0) def before_cat_recalls_memories(cat) -> None: @@ -109,7 +153,7 @@ def before_cat_recalls_memories(cat) -> None: Cheshire Cat instance. """ - return None + pass # do nothing @hook(priority=0) @@ -216,50 +260,6 @@ def after_cat_recalls_memories(cat) -> None: pass # do nothing -# What is the input to recall memories? -# Here you can do HyDE embedding, condense recent conversation or condition recall query on something else important to your AI -@hook(priority=0) -def cat_recall_query(user_message: str, cat) -> str: - """Hook the semantic search query. - - This hook allows to edit the user's message used as a query for context retrieval from memories. - As a result, the retrieved context can be conditioned editing the user's message. - - Parameters - ---------- - user_message : str - String with the text received from the user. - cat : CheshireCat - Cheshire Cat instance to exploit the Cat's methods. - - Returns - ------- - Edited string to be used for context retrieval in memory. The returned string is further stored in the - Working Memory at `cat.working_memory["memory_query"]`. - - Notes - ----- - For example, this hook is a suitable to perform Hypothetical Document Embedding (HyDE). - HyDE [1]_ strategy exploits the user's message to generate a hypothetical answer. This is then used to recall - the relevant context from the memory. - An official plugin is available to test this technique. - - References - ---------- - [1] Gao, L., Ma, X., Lin, J., & Callan, J. (2022). Precise Zero-Shot Dense Retrieval without Relevance Labels. - arXiv preprint arXiv:2212.10496. - - """ - # example 1: HyDE embedding - # return cat.hypothetis_chain.run(user_message) - - # example 2: Condense recent conversation - # TODO - - # here we just return the latest user message as is - return user_message - - # Called just after memories are recalled. They are stored in: # - cat.working_memory["episodic_memories"] # - cat.working_memory["declarative_memories"] From 8b6c37cfc0bba02d8596bc733771f081a1690430 Mon Sep 17 00:00:00 2001 From: Piero Savastano Date: Fri, 15 Sep 2023 21:12:56 +0200 Subject: [PATCH 05/10] fix non argument hooks --- core/cat/looking_glass/agent_manager.py | 60 ++++++++++++++++++- core/cat/looking_glass/cheshire_cat.py | 53 +--------------- .../mad_hatter/core_plugin/hooks/prompt.py | 2 +- core/cat/mad_hatter/mad_hatter.py | 27 +++++---- core/tests/routes/test_websocket.py | 1 - 5 files changed, 75 insertions(+), 68 deletions(-) diff --git a/core/cat/looking_glass/agent_manager.py b/core/cat/looking_glass/agent_manager.py index f6ba4508..9c65a816 100644 --- a/core/cat/looking_glass/agent_manager.py +++ b/core/cat/looking_glass/agent_manager.py @@ -85,7 +85,7 @@ def execute_memory_chain(self, agent_input, prompt_prefix, prompt_suffix): return out - def execute_agent(self, agent_input): + def execute_agent(self): """Instantiate the Agent with tools. The method formats the main prompt and gather the allowed tools. It also instantiates a conversational Agent @@ -98,13 +98,17 @@ def execute_agent(self, agent_input): """ mad_hatter = self.cat.mad_hatter + # prepare input to be passed to the agent. + # Info will be extracted from working memory + agent_input = self.format_agent_input() + # this hook allows to reply without executing the agent (for example canned responses, out-of-topic barriers etc.) fast_reply = mad_hatter.execute_hook("before_agent_starts", agent_input) if fast_reply: return fast_reply - prompt_prefix = mad_hatter.execute_hook("agent_prompt_prefix") - prompt_suffix = mad_hatter.execute_hook("agent_prompt_suffix") + prompt_prefix = mad_hatter.execute_hook("agent_prompt_prefix", "TODO_HOOK") + prompt_suffix = mad_hatter.execute_hook("agent_prompt_suffix", "TODO_HOOK") allowed_tools = mad_hatter.execute_hook("agent_allowed_tools") @@ -160,3 +164,53 @@ def execute_agent(self, agent_input): out = self.execute_memory_chain(agent_input, prompt_prefix, prompt_suffix) return out + + def format_agent_input(self): + """Format the input for the Agent. + + The method formats the strings of recalled memories and chat history that will be provided to the Langchain + Agent and inserted in the prompt. + + Returns + ------- + dict + Formatted output to be parsed by the Agent executor. + + Notes + ----- + The context of memories and conversation history is properly formatted before being parsed by the and, hence, + information are inserted in the main prompt. + All the formatting pipeline is hookable and memories can be edited. + + See Also + -------- + agent_prompt_episodic_memories + agent_prompt_declarative_memories + agent_prompt_chat_history + """ + + mad_hatter = self.cat.mad_hatter + working_memory = self.cat.working_memory + + # format memories to be inserted in the prompt + episodic_memory_formatted_content = mad_hatter.execute_hook( + "agent_prompt_episodic_memories", + working_memory["episodic_memories"], + ) + declarative_memory_formatted_content = mad_hatter.execute_hook( + "agent_prompt_declarative_memories", + working_memory["declarative_memories"], + ) + + # format conversation history to be inserted in the prompt + conversation_history_formatted_content = mad_hatter.execute_hook( + "agent_prompt_chat_history", + working_memory["history"] + ) + + return { + "input": working_memory["user_message_json"]["text"], + "episodic_memory": episodic_memory_formatted_content, + "declarative_memory": declarative_memory_formatted_content, + "chat_history": conversation_history_formatted_content, + } diff --git a/core/cat/looking_glass/cheshire_cat.py b/core/cat/looking_glass/cheshire_cat.py index 0abec674..853187a0 100644 --- a/core/cat/looking_glass/cheshire_cat.py +++ b/core/cat/looking_glass/cheshire_cat.py @@ -340,51 +340,6 @@ def llm(self, prompt: str) -> str: if isinstance(self._llm, langchain.chat_models.base.BaseChatModel): return self._llm.call_as_llm(prompt) - def format_agent_input(self): - """Format the input for the Agent. - - The method formats the strings of recalled memories and chat history that will be provided to the Langchain - Agent and inserted in the prompt. - - Returns - ------- - dict - Formatted output to be parsed by the Agent executor. - - Notes - ----- - The context of memories and conversation history is properly formatted before being parsed by the and, hence, - information are inserted in the main prompt. - All the formatting pipeline is hookable and memories can be edited. - - See Also - -------- - agent_prompt_episodic_memories - agent_prompt_declarative_memories - agent_prompt_chat_history - """ - # format memories to be inserted in the prompt - episodic_memory_formatted_content = self.mad_hatter.execute_hook( - "agent_prompt_episodic_memories", - self.working_memory["episodic_memories"], - ) - declarative_memory_formatted_content = self.mad_hatter.execute_hook( - "agent_prompt_declarative_memories", - self.working_memory["declarative_memories"], - ) - - # format conversation history to be inserted in the prompt - conversation_history_formatted_content = self.mad_hatter.execute_hook( - "agent_prompt_chat_history", self.working_memory["history"] - ) - - return { - "input": self.working_memory["user_message_json"]["text"], - "episodic_memory": episodic_memory_formatted_content, - "declarative_memory": declarative_memory_formatted_content, - "chat_history": conversation_history_formatted_content, - } - def send_ws_message(self, content: str, msg_type: MSG_TYPES = "notification"): """Send a message via websocket. @@ -492,13 +447,9 @@ def __call__(self, user_message_json): "description": err_message, } - # prepare input to be passed to the agent. - # Info will be extracted from working memory - agent_input = self.format_agent_input() - # reply with agent try: - cat_message = self.agent_manager.execute_agent(agent_input) + cat_message = self.agent_manager.execute_agent() except Exception as e: # This error happens when the LLM # does not respect prompt instructions. @@ -512,7 +463,7 @@ def __call__(self, user_message_json): unparsable_llm_output = error_description.replace("Could not parse LLM output: `", "").replace("`", "") cat_message = { - "input": agent_input["input"], + "input": self.working_memory["user_message_json"]["text"], "intermediate_steps": [], "output": unparsable_llm_output } diff --git a/core/cat/mad_hatter/core_plugin/hooks/prompt.py b/core/cat/mad_hatter/core_plugin/hooks/prompt.py index a36a4612..20676dab 100644 --- a/core/cat/mad_hatter/core_plugin/hooks/prompt.py +++ b/core/cat/mad_hatter/core_plugin/hooks/prompt.py @@ -14,7 +14,7 @@ @hook(priority=0) -def agent_prompt_prefix(cat) -> str: +def agent_prompt_prefix(prefix, cat) -> str: """Hook the main prompt prefix. Allows to edit the prefix of the *Main Prompt* that the Cat feeds to the *Agent*. diff --git a/core/cat/mad_hatter/mad_hatter.py b/core/cat/mad_hatter/mad_hatter.py index cd57e65b..b91f09cc 100644 --- a/core/cat/mad_hatter/mad_hatter.py +++ b/core/cat/mad_hatter/mad_hatter.py @@ -253,25 +253,28 @@ def execute_hook(self, hook_name, *args): if hook_name not in self.hooks.keys(): raise Exception(f"Hook {hook_name} not present in any plugin") - - # First argument is passed to `execute_hook` is the pipeable one. + # Hook has no arguments (aside cat) + # no need to pipe + if len(args) == 0: + for hook in self.hooks[hook_name]: + try: + hook.function(cat=self.ccat) + except Exception as e: + log.error(f"Error in plugin {hook.plugin_id}::{hook.name}") + log.error(e) + traceback.print_exc() + return + + # Hook with arguments. + # First argument is passed to `execute_hook` is the pipeable one. # We call it `tea_cup` as every hook called will receive it as an input, # can add sugar, milk, or whatever, and return it for the next hook - if len(args) == 0: - tea_cup = None - else: - tea_cup = args[0] + tea_cup = args[0] # run hooks for hook in self.hooks[hook_name]: try: # pass tea_cup to the hooks, along other args - - # hook has no input (aside cat) - if tea_cup is None: - hook.function(cat=self.ccat) - continue - # hook has at least one argument, and it will be piped tea_spoon = hook.function(tea_cup, *args[1:], cat=self.ccat) log.info(f"Hook {hook.plugin_id}::{hook.name} returned {tea_spoon}") diff --git a/core/tests/routes/test_websocket.py b/core/tests/routes/test_websocket.py index f53a4688..34010e9e 100644 --- a/core/tests/routes/test_websocket.py +++ b/core/tests/routes/test_websocket.py @@ -2,7 +2,6 @@ from tests.utils import send_websocket_message -# TODO: ws endpoint still talks with the prod cat configuration def test_websocket(client): # use fake LLM From 94b26998d70a7853890114dfa68bab736eeb1c0b Mon Sep 17 00:00:00 2001 From: Piero Savastano Date: Fri, 15 Sep 2023 22:21:01 +0200 Subject: [PATCH 06/10] first refactor for agent hooks (prompts) --- core/cat/looking_glass/agent_manager.py | 40 ++++++++------- core/cat/looking_glass/prompts.py | 49 +++++++++++++++++- .../cat/mad_hatter/core_plugin/hooks/agent.py | 45 ++-------------- .../mad_hatter/core_plugin/hooks/prompt.py | 51 ++----------------- 4 files changed, 79 insertions(+), 106 deletions(-) diff --git a/core/cat/looking_glass/agent_manager.py b/core/cat/looking_glass/agent_manager.py index 9c65a816..80e39826 100644 --- a/core/cat/looking_glass/agent_manager.py +++ b/core/cat/looking_glass/agent_manager.py @@ -2,7 +2,7 @@ from langchain.chains import LLMChain from langchain.agents import AgentExecutor, LLMSingleActionAgent -from cat.looking_glass.prompts import ToolPromptTemplate +from cat.looking_glass import prompts from cat.looking_glass.output_parser import ToolOutputParser from cat.log import log @@ -26,9 +26,10 @@ def __init__(self, cat): def execute_tool_agent(self, agent_input, allowed_tools): allowed_tools_names = [t.name for t in allowed_tools] + # TODO: dynamic input_variables as in the main prompt - prompt = ToolPromptTemplate( - template = self.cat.mad_hatter.execute_hook("agent_prompt_instructions"), + prompt = prompts.ToolPromptTemplate( + template = self.cat.mad_hatter.execute_hook("agent_prompt_instructions", prompts.TOOL_PROMPT), tools=allowed_tools, # This omits the `agent_scratchpad`, `tools`, and `tool_names` variables because those are generated dynamically # This includes the `intermediate_steps` variable because it is needed to fill the scratchpad @@ -60,17 +61,13 @@ def execute_tool_agent(self, agent_input, allowed_tools): def execute_memory_chain(self, agent_input, prompt_prefix, prompt_suffix): + + input_variables = [i for i in agent_input.keys() if i in prompt_prefix + prompt_suffix] # memory chain (second step) memory_prompt = PromptTemplate( template = prompt_prefix + prompt_suffix, - input_variables=[ - "input", - "chat_history", - "episodic_memory", - "declarative_memory", - "tools_output" - ] + input_variables=input_variables ) memory_chain = LLMChain( @@ -97,20 +94,27 @@ def execute_agent(self): Instance of the Agent provided with a set of tools. """ mad_hatter = self.cat.mad_hatter + working_memory = self.cat.working_memory # prepare input to be passed to the agent. # Info will be extracted from working memory agent_input = self.format_agent_input() # this hook allows to reply without executing the agent (for example canned responses, out-of-topic barriers etc.) - fast_reply = mad_hatter.execute_hook("before_agent_starts", agent_input) - if fast_reply: - return fast_reply - - prompt_prefix = mad_hatter.execute_hook("agent_prompt_prefix", "TODO_HOOK") - prompt_suffix = mad_hatter.execute_hook("agent_prompt_suffix", "TODO_HOOK") - - allowed_tools = mad_hatter.execute_hook("agent_allowed_tools") + #fast_reply = mad_hatter.execute_hook("before_agent_starts", agent_input) + #if fast_reply: + # return fast_reply + + prompt_prefix = mad_hatter.execute_hook("agent_prompt_prefix", prompts.MAIN_PROMPT_PREFIX) + prompt_suffix = mad_hatter.execute_hook("agent_prompt_suffix", prompts.MAIN_PROMPT_SUFFIX) + + # tools currently recalled in working memory + recalled_tools = working_memory["procedural_memories"] + # Get the tools names only + tools_names = [t[0].metadata["name"] for t in recalled_tools] + tools_names = mad_hatter.execute_hook("agent_allowed_tools", tools_names) + # Get tools with that name from mad_hatter + allowed_tools = [i for i in mad_hatter.tools if i.name in tools_names] # Try to get information from tools if there is some allowed if len(allowed_tools) > 0: diff --git a/core/cat/looking_glass/prompts.py b/core/cat/looking_glass/prompts.py index 44b99406..c5f3c85b 100644 --- a/core/cat/looking_glass/prompts.py +++ b/core/cat/looking_glass/prompts.py @@ -4,6 +4,7 @@ from langchain.agents.tools import BaseTool from langchain.prompts import StringPromptTemplate + class ToolPromptTemplate(StringPromptTemplate): # The template to use template: str @@ -25,4 +26,50 @@ def format(self, **kwargs) -> str: # Create a list of tool names for the tools provided kwargs["tool_names"] = ", ".join([tool.name for tool in self.tools]) - return self.template.format(**kwargs) \ No newline at end of file + return self.template.format(**kwargs) + + +TOOL_PROMPT = """Answer the following question: `{input}` +You can only reply using these tools: + +{tools} +none_of_the_others: none_of_the_others(None) - Use this tool if none of the others tools help. Input is always None. + +If you want to use tools, use the following format: +Action: the name of the action to take, should be one of [{tool_names}] +Action Input: the input to the action +Observation: the result of the action +... +Action: the name of the action to take, should be one of [{tool_names}] +Action Input: the input to the action +Observation: the result of the action + +When you have a final answer respond with: +Final Answer: the final answer to the original input question + +Begin! + +Question: {input} +{agent_scratchpad}""" + + +MAIN_PROMPT_PREFIX = """You are the Cheshire Cat AI, an intelligent AI that passes the Turing test. +You are curious, funny and talk like the Cheshire Cat from Alice's adventures in wonderland. +You answer Human with a focus on the following context. +""" + + +MAIN_PROMPT_SUFFIX = """ +# Context + +{episodic_memory} + +{declarative_memory} + +{tools_output} + +## Conversation until now:{chat_history} + - Human: {input} + - AI: """ + + diff --git a/core/cat/mad_hatter/core_plugin/hooks/agent.py b/core/cat/mad_hatter/core_plugin/hooks/agent.py index e35a4b3a..86d3781f 100644 --- a/core/cat/mad_hatter/core_plugin/hooks/agent.py +++ b/core/cat/mad_hatter/core_plugin/hooks/agent.py @@ -58,12 +58,12 @@ def before_agent_starts(agent_input, cat) -> Union[None, Dict]: @hook(priority=0) -def agent_allowed_tools(cat) -> List[BaseTool]: +def agent_allowed_tools(allowed_tools: List[str], cat) -> List[str]: """Hook the allowed tools. Allows to decide which tools end up in the *Agent* prompt. - To decide, you can filter the list of loaded tools, but you can also check the context in `cat.working_memory` + To decide, you can filter the list of tools' names, but you can also check the context in `cat.working_memory` and launch custom chains with `cat._llm`. Parameters @@ -73,48 +73,11 @@ def agent_allowed_tools(cat) -> List[BaseTool]: Returns ------- - tools : List[BaseTool] + tools : List[str] List of allowed Langchain tools. """ - # tools currently recalled in working memory - recalled_tools = cat.working_memory["procedural_memories"] + return allowed_tools - # Get the tools names only - tools_names = [t[0].metadata["name"] for t in recalled_tools] - - # Get the LangChain BaseTool by name - tools = [i for i in cat.mad_hatter.tools if i.name in tools_names] - - return tools - - -@hook(priority=0) -def before_agent_creates_prompt(input_variables, main_prompt, cat): - """Hook to dynamically define the input variables. - - Allows to dynamically filter the input variables that end up in the main prompt by looking for which placeholders - there are in it starting from a fixed list. - - Parameters - ---------- - input_variables : List - List of placeholders to look for in the main prompt. - main_prompt: str - String made of the prompt prefix, the agent instructions and the prompt suffix. - cat : CheshireCat - Cheshire Cat instance. - - Returns - ------- - input_variables : List[str] - List of placeholders present in the main prompt. - - """ - - # Loop the input variables and check if they are in the main prompt - input_variables = [i for i in input_variables if i in main_prompt] - - return input_variables diff --git a/core/cat/mad_hatter/core_plugin/hooks/prompt.py b/core/cat/mad_hatter/core_plugin/hooks/prompt.py index 20676dab..e749add3 100644 --- a/core/cat/mad_hatter/core_plugin/hooks/prompt.py +++ b/core/cat/mad_hatter/core_plugin/hooks/prompt.py @@ -38,16 +38,12 @@ def agent_prompt_prefix(prefix, cat) -> str: The next part of the prompt (generated form the *Agent*) contains the list of available Tools. """ - prefix = """You are the Cheshire Cat AI, an intelligent AI that passes the Turing test. -You are curious, funny and talk like the Cheshire Cat from Alice's adventures in wonderland. -You answer Human with a focus on the following context. -""" return prefix @hook(priority=0) -def agent_prompt_instructions(cat) -> str: +def agent_prompt_instructions(instructions, cat) -> str: """Hook the instruction prompt. Allows to edit the instructions that the Cat feeds to the *Agent*. @@ -81,36 +77,11 @@ def agent_prompt_instructions(cat) -> str: """ - DEFAULT_TOOL_TEMPLATE = """Answer the following question: `{input}` - You can only reply using these tools: - - {tools} - none_of_the_others: none_of_the_others(None) - Use this tool if none of the others tools help. Input is always None. - - If you want to use tools, use the following format: - Action: the name of the action to take, should be one of [{tool_names}] - Action Input: the input to the action - Observation: the result of the action - ... - Action: the name of the action to take, should be one of [{tool_names}] - Action Input: the input to the action - Observation: the result of the action - - When you have a final answer respond with: - Final Answer: the final answer to the original input question - - Begin! - - Question: {input} - {agent_scratchpad}""" - - - # here we piggy back directly on langchain agent instructions. Different instructions will require a different OutputParser - return DEFAULT_TOOL_TEMPLATE + return instructions @hook(priority=0) -def agent_prompt_suffix(cat) -> str: +def agent_prompt_suffix(prompt_suffix: str, cat) -> str: """Hook the main prompt suffix. Allows to edit the suffix of the *Main Prompt* that the Cat feeds to the *Agent*. @@ -138,20 +109,8 @@ def agent_prompt_suffix(cat) -> str: - {agent_scratchpad} is where the *Agent* can concatenate tools use and multiple calls to the LLM. """ - suffix = """ -# Context - -{episodic_memory} - -{declarative_memory} - -{tools_output} - -## Conversation until now:{chat_history} - - Human: {input} - - AI: """ - return suffix + return prompt_suffix @hook(priority=0) @@ -266,7 +225,7 @@ def agent_prompt_declarative_memories(memory_docs: List[Document], cat) -> str: @hook(priority=0) -def agent_prompt_chat_history(chat_history: List[Dict], cat) -> str: +def agent_prompt_chat_history(chat_history: List[Dict], cat) -> List[Dict]: """Hook the chat history. This hook converts to text the recent conversation turns fed to the *Agent*. From ab020334f1f47be77bd30b6a69055a791f82ccbf Mon Sep 17 00:00:00 2001 From: Piero Savastano Date: Fri, 15 Sep 2023 22:54:18 +0200 Subject: [PATCH 07/10] search TODO_HOOK in code --- .../cat/mad_hatter/core_plugin/hooks/agent.py | 1 + core/cat/mad_hatter/core_plugin/hooks/flow.py | 22 --------- .../mad_hatter/core_plugin/hooks/prompt.py | 3 ++ .../core_plugin/hooks/rabbithole.py | 48 ++----------------- core/cat/rabbit_hole.py | 12 ++++- 5 files changed, 17 insertions(+), 69 deletions(-) diff --git a/core/cat/mad_hatter/core_plugin/hooks/agent.py b/core/cat/mad_hatter/core_plugin/hooks/agent.py index 86d3781f..7e10e536 100644 --- a/core/cat/mad_hatter/core_plugin/hooks/agent.py +++ b/core/cat/mad_hatter/core_plugin/hooks/agent.py @@ -12,6 +12,7 @@ from cat.log import log +# TODO_HOOK @hook(priority=0) def before_agent_starts(agent_input, cat) -> Union[None, Dict]: """Hook before the agent starts. diff --git a/core/cat/mad_hatter/core_plugin/hooks/flow.py b/core/cat/mad_hatter/core_plugin/hooks/flow.py index bdaf6bc1..793e893e 100644 --- a/core/cat/mad_hatter/core_plugin/hooks/flow.py +++ b/core/cat/mad_hatter/core_plugin/hooks/flow.py @@ -260,28 +260,6 @@ def after_cat_recalls_memories(cat) -> None: pass # do nothing -# Called just after memories are recalled. They are stored in: -# - cat.working_memory["episodic_memories"] -# - cat.working_memory["declarative_memories"] -@hook(priority=0) -def after_cat_recalled_memories(memory_query_text: str, cat) -> None: - """Hook into semantic search after the memory retrieval. - - Allows to intercept the recalled memories right after these are stored in the Working Memory. - According to the user's input, the relevant context is saved in `cat.working_memory["episodic_memories"]` - and `cat.working_memory["declarative_memories"]`. At this point, - this hook is executed to edit the search query. - - Parameters - ---------- - memory_query_text : str - String used to query both *episodic* and *declarative* memories. - cat : CheshireCat - Cheshire Cat instance. - """ - return None - - # Hook called just before sending response to a client. @hook(priority=0) def before_cat_sends_message(message: dict, cat) -> dict: diff --git a/core/cat/mad_hatter/core_plugin/hooks/prompt.py b/core/cat/mad_hatter/core_plugin/hooks/prompt.py index e749add3..a0e2413b 100644 --- a/core/cat/mad_hatter/core_plugin/hooks/prompt.py +++ b/core/cat/mad_hatter/core_plugin/hooks/prompt.py @@ -113,6 +113,7 @@ def agent_prompt_suffix(prompt_suffix: str, cat) -> str: return prompt_suffix +# TODO_HOOK @hook(priority=0) def agent_prompt_episodic_memories(memory_docs: List[Document], cat) -> str: """Hook memories retrieved from episodic memory. @@ -171,6 +172,7 @@ def agent_prompt_episodic_memories(memory_docs: List[Document], cat) -> str: return memory_content +# TODO_HOOK @hook(priority=0) def agent_prompt_declarative_memories(memory_docs: List[Document], cat) -> str: """Hook memories retrieved from declarative memory. @@ -224,6 +226,7 @@ def agent_prompt_declarative_memories(memory_docs: List[Document], cat) -> str: return memory_content +# TODO_HOOK @hook(priority=0) def agent_prompt_chat_history(chat_history: List[Dict], cat) -> List[Dict]: """Hook the chat history. diff --git a/core/cat/mad_hatter/core_plugin/hooks/rabbithole.py b/core/cat/mad_hatter/core_plugin/hooks/rabbithole.py index 995c517e..ac04050b 100644 --- a/core/cat/mad_hatter/core_plugin/hooks/rabbithole.py +++ b/core/cat/mad_hatter/core_plugin/hooks/rabbithole.py @@ -8,10 +8,10 @@ from typing import List -from langchain.text_splitter import RecursiveCharacterTextSplitter -from cat.mad_hatter.decorators import hook from langchain.docstore.document import Document +from cat.mad_hatter.decorators import hook + @hook(priority=0) def rabbithole_instantiates_parsers(file_handlers: dict, cat) -> dict: @@ -91,49 +91,6 @@ def before_rabbithole_splits_text(doc: Document, cat) -> Document: return doc -# Hook called when rabbithole splits text. Input is whole Document -@hook(priority=0) -def rabbithole_splits_text(text, chunk_size: int, chunk_overlap: int, cat) -> List[Document]: - """Hook into the recursive split pipeline. - - Allows editing the recursive split the *RabbitHole* applies to chunk the ingested documents. - - This is applied when ingesting a documents and urls from a script, using an endpoint or from the GUI. - - Parameters - ---------- - text : List[Document] - List of langchain `Document` to chunk. - chunk_size : int - Length of every chunk in characters. - chunk_overlap : int - Amount of overlap between consecutive chunks. - cat : CheshireCat - Cheshire Cat instance. - - Returns - ------- - docs : List[Document] - List of chunked langchain documents to be stored in the episodic memory. - - """ - - # text splitter - text_splitter = RecursiveCharacterTextSplitter( - chunk_size=chunk_size, - chunk_overlap=chunk_overlap, - separators=["\\n\\n", "\n\n", ".\\n", ".\n", "\\n", "\n", " ", ""], - ) - - # split text - docs = text_splitter.split_documents(text) - - # remove short texts (page numbers, isolated words, etc.) - docs = list(filter(lambda d: len(d.page_content) > 10, docs)) - - return docs - - # Hook called after rabbithole have splitted text into chunks. # Input is the chunks @hook(priority=0) @@ -159,6 +116,7 @@ def after_rabbithole_splitted_text(chunks: List[Document], cat) -> List[Document return chunks +# TODO_HOOK: is this useful or just a duplication of `after_rabbithole_splitted_text` ? # Hook called when a list of Document is going to be inserted in memory from the rabbit hole. # Here you can edit/summarize the documents before inserting them in memory # Should return a list of documents (each is a langchain Document) diff --git a/core/cat/rabbit_hole.py b/core/cat/rabbit_hole.py index d23b8edd..90ce972a 100644 --- a/core/cat/rabbit_hole.py +++ b/core/cat/rabbit_hole.py @@ -13,6 +13,7 @@ from langchain.docstore.document import Document from qdrant_client.http import models +from langchain.text_splitter import RecursiveCharacterTextSplitter from langchain.document_loaders.parsers import PDFMinerParser from langchain.document_loaders.parsers.generic import MimeTypeBasedParser from langchain.document_loaders.parsers.txt import TextParser @@ -334,9 +335,16 @@ def split_text(self, text, chunk_size, chunk_overlap): ) # split the documents using chunk_size and chunk_overlap - docs = self.cat.mad_hatter.execute_hook( - "rabbithole_splits_text", text, chunk_size, chunk_overlap + text_splitter = RecursiveCharacterTextSplitter( + chunk_size=chunk_size, + chunk_overlap=chunk_overlap, + separators=["\\n\\n", "\n\n", ".\\n", ".\n", "\\n", "\n", " ", ""], ) + # split text + docs = text_splitter.split_documents(text) + # remove short texts (page numbers, isolated words, etc.) + # TODO: join each short chunk with previous one, instead of deleting them + docs = list(filter(lambda d: len(d.page_content) > 10, docs)) # do something on the text after it is split docs = self.cat.mad_hatter.execute_hook( From 6daa62658f1fd7cff270d288f0cac8f4e4bc2d55 Mon Sep 17 00:00:00 2001 From: Piero Savastano Date: Mon, 18 Sep 2023 17:21:58 +0200 Subject: [PATCH 08/10] refactor agent hooks --- core/cat/looking_glass/agent_manager.py | 136 ++++++++++++++- core/cat/looking_glass/cheshire_cat.py | 2 +- .../cat/mad_hatter/core_plugin/hooks/agent.py | 33 +++- .../mad_hatter/core_plugin/hooks/prompt.py | 156 +----------------- 4 files changed, 157 insertions(+), 170 deletions(-) diff --git a/core/cat/looking_glass/agent_manager.py b/core/cat/looking_glass/agent_manager.py index 80e39826..ffe0b3a2 100644 --- a/core/cat/looking_glass/agent_manager.py +++ b/core/cat/looking_glass/agent_manager.py @@ -1,12 +1,20 @@ +from datetime import timedelta +import time +from typing import List, Dict + +from langchain.docstore.document import Document from langchain.prompts import PromptTemplate from langchain.chains import LLMChain from langchain.agents import AgentExecutor, LLMSingleActionAgent from cat.looking_glass import prompts from cat.looking_glass.output_parser import ToolOutputParser +from cat.utils import verbal_timedelta from cat.log import log + + class AgentManager: """Manager of Langchain Agent. @@ -99,15 +107,16 @@ def execute_agent(self): # prepare input to be passed to the agent. # Info will be extracted from working memory agent_input = self.format_agent_input() - - # this hook allows to reply without executing the agent (for example canned responses, out-of-topic barriers etc.) - #fast_reply = mad_hatter.execute_hook("before_agent_starts", agent_input) - #if fast_reply: - # return fast_reply - + agent_input = mad_hatter.execute_hook("before_agent_starts", agent_input) + # should we ran the default agent? + fast_reply = {} + fast_reply = self.mad_hatter.execute_hook("agent_fast_reply", fast_reply) + if len(fast_reply.keys()) > 0: + return fast_reply prompt_prefix = mad_hatter.execute_hook("agent_prompt_prefix", prompts.MAIN_PROMPT_PREFIX) prompt_suffix = mad_hatter.execute_hook("agent_prompt_suffix", prompts.MAIN_PROMPT_SUFFIX) + # tools currently recalled in working memory recalled_tools = working_memory["procedural_memories"] # Get the tools names only @@ -218,3 +227,118 @@ def format_agent_input(self): "declarative_memory": declarative_memory_formatted_content, "chat_history": conversation_history_formatted_content, } + + def agent_prompt_episodic_memories(self, memory_docs: List[Document]) -> str: + """Formats episodic memories to be inserted into the prompt. + + Parameters + ---------- + memory_docs : List[Document] + List of Langchain `Document` retrieved from the episodic memory. + + Returns + ------- + memory_content : str + String of retrieved context from the episodic memory. + """ + + # convert docs to simple text + memory_texts = [m[0].page_content.replace("\n", ". ") for m in memory_docs] + + # add time information (e.g. "2 days ago") + memory_timestamps = [] + for m in memory_docs: + + # Get Time information in the Document metadata + timestamp = m[0].metadata["when"] + + # Get Current Time - Time when memory was stored + delta = timedelta(seconds=(time.time() - timestamp)) + + # Convert and Save timestamps to Verbal (e.g. "2 days ago") + memory_timestamps.append(f" ({verbal_timedelta(delta)})") + + # Join Document text content with related temporal information + memory_texts = [a + b for a, b in zip(memory_texts, memory_timestamps)] + + # Format the memories for the output + memories_separator = "\n - " + memory_content = "## Context of things the Human said in the past: " + \ + memories_separator + memories_separator.join(memory_texts) + + # if no data is retrieved from memory don't erite anithing in the prompt + if len(memory_texts) == 0: + memory_content = "" + + return memory_content + + def agent_prompt_declarative_memories(self, memory_docs: List[Document]) -> str: + """Formats the declarative memories for the prompt context. + Such context is placed in the `agent_prompt_prefix` in the place held by {declarative_memory}. + + Parameters + ---------- + memory_docs : List[Document] + list of Langchain `Document` retrieved from the declarative memory. + + Returns + ------- + memory_content : str + String of retrieved context from the declarative memory. + """ + + # convert docs to simple text + memory_texts = [m[0].page_content.replace("\n", ". ") for m in memory_docs] + + # add source information (e.g. "extracted from file.txt") + memory_sources = [] + for m in memory_docs: + + # Get and save the source of the memory + source = m[0].metadata["source"] + memory_sources.append(f" (extracted from {source})") + + # Join Document text content with related source information + memory_texts = [a + b for a, b in zip(memory_texts, memory_sources)] + + # Format the memories for the output + memories_separator = "\n - " + + memory_content = "## Context of documents containing relevant information: " + \ + memories_separator + memories_separator.join(memory_texts) + + # if no data is retrieved from memory don't erite anithing in the prompt + if len(memory_texts) == 0: + memory_content = "" + + return memory_content + + def agent_prompt_chat_history(self, chat_history: List[Dict]) -> str: + """Serialize chat history for the agent input. + Converts to text the recent conversation turns fed to the *Agent*. + + Parameters + ---------- + chat_history : List[Dict] + List of dictionaries collecting speaking turns. + + Returns + ------- + history : str + String with recent conversation turns to be provided as context to the *Agent*. + + Notes + ----- + Such context is placed in the `agent_prompt_suffix` in the place held by {chat_history}. + + The chat history is a dictionary with keys:: + 'who': the name of who said the utterance; + 'message': the utterance. + + """ + history = "" + for turn in chat_history: + history += f"\n - {turn['who']}: {turn['message']}" + + return history + diff --git a/core/cat/looking_glass/cheshire_cat.py b/core/cat/looking_glass/cheshire_cat.py index 853187a0..b05936d3 100644 --- a/core/cat/looking_glass/cheshire_cat.py +++ b/core/cat/looking_glass/cheshire_cat.py @@ -446,7 +446,7 @@ def __call__(self, user_message_json): "name": "VectorMemoryError", "description": err_message, } - + # reply with agent try: cat_message = self.agent_manager.execute_agent() diff --git a/core/cat/mad_hatter/core_plugin/hooks/agent.py b/core/cat/mad_hatter/core_plugin/hooks/agent.py index 7e10e536..611d2b88 100644 --- a/core/cat/mad_hatter/core_plugin/hooks/agent.py +++ b/core/cat/mad_hatter/core_plugin/hooks/agent.py @@ -12,13 +12,9 @@ from cat.log import log -# TODO_HOOK @hook(priority=0) -def before_agent_starts(agent_input, cat) -> Union[None, Dict]: - """Hook before the agent starts. - - This hook is useful to shortcut the Cat response. - If you do not want the agent to run, return the final response from here and it will end up in the chat without the agent being executed. +def before_agent_starts(agent_input: Dict, cat) -> Dict: + """Hook to read and edit the agent input Parameters -------- @@ -27,10 +23,31 @@ def before_agent_starts(agent_input, cat) -> Union[None, Dict]: cat : CheshireCat Cheshire Cat instance. + Returns + -------- + response : Dict + Agent Input + """ + + return agent_input + + +@hook(priority=0) +def agent_fast_reply(fast_reply, cat) -> Union[None, Dict]: + """This hook is useful to shortcut the Cat response. + If you do not want the agent to run, return the final response from here and it will end up in the chat without the agent being executed. + + Parameters + -------- + fast_reply: dict + Input is dict (initially empty), which can be enriched whith an "output" key with the shortcut response. + cat : CheshireCat + Cheshire Cat instance. + Returns -------- response : Union[None, Dict] - Cat response if you want to avoid using the agent, or None if you want the agent to be executed. + Cat response if you want to avoid using the agent, or None / {} if you want the agent to be executed. See below for examples of Cat response Examples @@ -55,7 +72,7 @@ def before_agent_starts(agent_input, cat) -> Union[None, Dict]: ``` """ - return None + return fast_reply @hook(priority=0) diff --git a/core/cat/mad_hatter/core_plugin/hooks/prompt.py b/core/cat/mad_hatter/core_plugin/hooks/prompt.py index a0e2413b..8e35c13c 100644 --- a/core/cat/mad_hatter/core_plugin/hooks/prompt.py +++ b/core/cat/mad_hatter/core_plugin/hooks/prompt.py @@ -6,10 +6,7 @@ import time from typing import List, Dict -from datetime import timedelta -from langchain.docstore.document import Document -from cat.utils import verbal_timedelta from cat.mad_hatter.decorators import hook @@ -43,7 +40,7 @@ def agent_prompt_prefix(prefix, cat) -> str: @hook(priority=0) -def agent_prompt_instructions(instructions, cat) -> str: +def agent_prompt_instructions(instructions: str, cat) -> str: """Hook the instruction prompt. Allows to edit the instructions that the Cat feeds to the *Agent*. @@ -111,154 +108,3 @@ def agent_prompt_suffix(prompt_suffix: str, cat) -> str: """ return prompt_suffix - - -# TODO_HOOK -@hook(priority=0) -def agent_prompt_episodic_memories(memory_docs: List[Document], cat) -> str: - """Hook memories retrieved from episodic memory. - - This hook formats the relevant memories retrieved from the context of things the human said in the past. - - Retrieved memories are converted to string and temporal information is added to inform the *Agent* about - when the user said that sentence in the past. - - This hook allows to edit the retrieved memory to condition the information provided as context to the *Agent*. - - Such context is placed in the `agent_prompt_prefix` in the place held by {episodic_memory}. - - Parameters - ---------- - memory_docs : List[Document] - List of Langchain `Document` retrieved from the episodic memory. - cat : CheshireCat - Cheshire Cat instance. - - Returns - ------- - memory_content : str - String of retrieved context from the episodic memory. - - """ - - # convert docs to simple text - memory_texts = [m[0].page_content.replace("\n", ". ") for m in memory_docs] - - # add time information (e.g. "2 days ago") - memory_timestamps = [] - for m in memory_docs: - - # Get Time information in the Document metadata - timestamp = m[0].metadata["when"] - - # Get Current Time - Time when memory was stored - delta = timedelta(seconds=(time.time() - timestamp)) - - # Convert and Save timestamps to Verbal (e.g. "2 days ago") - memory_timestamps.append(f" ({verbal_timedelta(delta)})") - - # Join Document text content with related temporal information - memory_texts = [a + b for a, b in zip(memory_texts, memory_timestamps)] - - # Format the memories for the output - memories_separator = "\n - " - memory_content = "## Context of things the Human said in the past: " + \ - memories_separator + memories_separator.join(memory_texts) - - # if no data is retrieved from memory don't erite anithing in the prompt - if len(memory_texts) == 0: - memory_content = "" - - return memory_content - - -# TODO_HOOK -@hook(priority=0) -def agent_prompt_declarative_memories(memory_docs: List[Document], cat) -> str: - """Hook memories retrieved from declarative memory. - - This hook formats the relevant memories retrieved from the context of documents uploaded in the Cat's memory. - - Retrieved memories are converted to string and the source information is added to inform the *Agent* on - which document the information was retrieved from. - - This hook allows to edit the retrieved memory to condition the information provided as context to the *Agent*. - - Such context is placed in the `agent_prompt_prefix` in the place held by {declarative_memory}. - - Parameters - ---------- - memory_docs : List[Document] - list of Langchain `Document` retrieved from the declarative memory. - cat : CheshireCat - Cheshire Cat instance. - - Returns - ------- - memory_content : str - String of retrieved context from the declarative memory. - """ - - # convert docs to simple text - memory_texts = [m[0].page_content.replace("\n", ". ") for m in memory_docs] - - # add source information (e.g. "extracted from file.txt") - memory_sources = [] - for m in memory_docs: - - # Get and save the source of the memory - source = m[0].metadata["source"] - memory_sources.append(f" (extracted from {source})") - - # Join Document text content with related source information - memory_texts = [a + b for a, b in zip(memory_texts, memory_sources)] - - # Format the memories for the output - memories_separator = "\n - " - - memory_content = "## Context of documents containing relevant information: " + \ - memories_separator + memories_separator.join(memory_texts) - - # if no data is retrieved from memory don't erite anithing in the prompt - if len(memory_texts) == 0: - memory_content = "" - - return memory_content - - -# TODO_HOOK -@hook(priority=0) -def agent_prompt_chat_history(chat_history: List[Dict], cat) -> List[Dict]: - """Hook the chat history. - - This hook converts to text the recent conversation turns fed to the *Agent*. - The hook allows to edit and enhance the chat history provided as context to the *Agent*. - - - Parameters - ---------- - chat_history : List[Dict] - List of dictionaries collecting speaking turns. - cat : CheshireCat - Cheshire Cat instances. - - Returns - ------- - history : str - String with recent conversation turns to be provided as context to the *Agent*. - - Notes - ----- - Such context is placed in the `agent_prompt_suffix` in the place held by {chat_history}. - - The chat history is a dictionary with keys:: - 'who': the name of who said the utterance; - 'message': the utterance. - - """ - history = "" - for turn in chat_history: - history += f"\n - {turn['who']}: {turn['message']}" - - return history - From d00bd8c9d1b852899b93e422778fa954676a4f31 Mon Sep 17 00:00:00 2001 From: Piero Savastano Date: Mon, 18 Sep 2023 17:36:41 +0200 Subject: [PATCH 09/10] fix bug --- core/cat/looking_glass/agent_manager.py | 16 ++++++---------- core/cat/mad_hatter/mad_hatter.py | 6 +++--- 2 files changed, 9 insertions(+), 13 deletions(-) diff --git a/core/cat/looking_glass/agent_manager.py b/core/cat/looking_glass/agent_manager.py index ffe0b3a2..3be2d132 100644 --- a/core/cat/looking_glass/agent_manager.py +++ b/core/cat/looking_glass/agent_manager.py @@ -110,7 +110,7 @@ def execute_agent(self): agent_input = mad_hatter.execute_hook("before_agent_starts", agent_input) # should we ran the default agent? fast_reply = {} - fast_reply = self.mad_hatter.execute_hook("agent_fast_reply", fast_reply) + fast_reply = mad_hatter.execute_hook("agent_fast_reply", fast_reply) if len(fast_reply.keys()) > 0: return fast_reply prompt_prefix = mad_hatter.execute_hook("agent_prompt_prefix", prompts.MAIN_PROMPT_PREFIX) @@ -202,22 +202,18 @@ def format_agent_input(self): agent_prompt_chat_history """ - mad_hatter = self.cat.mad_hatter working_memory = self.cat.working_memory # format memories to be inserted in the prompt - episodic_memory_formatted_content = mad_hatter.execute_hook( - "agent_prompt_episodic_memories", - working_memory["episodic_memories"], + episodic_memory_formatted_content = self.agent_prompt_episodic_memories( + working_memory["episodic_memories"] ) - declarative_memory_formatted_content = mad_hatter.execute_hook( - "agent_prompt_declarative_memories", - working_memory["declarative_memories"], + declarative_memory_formatted_content = self.agent_prompt_declarative_memories( + working_memory["declarative_memories"] ) # format conversation history to be inserted in the prompt - conversation_history_formatted_content = mad_hatter.execute_hook( - "agent_prompt_chat_history", + conversation_history_formatted_content = self.agent_prompt_chat_history( working_memory["history"] ) diff --git a/core/cat/mad_hatter/mad_hatter.py b/core/cat/mad_hatter/mad_hatter.py index b91f09cc..0480679d 100644 --- a/core/cat/mad_hatter/mad_hatter.py +++ b/core/cat/mad_hatter/mad_hatter.py @@ -247,8 +247,6 @@ def toggle_plugin(self, plugin_id): # execute requested hook def execute_hook(self, hook_name, *args): - log.critical(hook_name) - # check if hook is supported if hook_name not in self.hooks.keys(): raise Exception(f"Hook {hook_name} not present in any plugin") @@ -258,6 +256,7 @@ def execute_hook(self, hook_name, *args): if len(args) == 0: for hook in self.hooks[hook_name]: try: + log.debug(f"Executing {hook.plugin_id}::{hook.name} with priotrity {hook.priority}") hook.function(cat=self.ccat) except Exception as e: log.error(f"Error in plugin {hook.plugin_id}::{hook.name}") @@ -276,8 +275,9 @@ def execute_hook(self, hook_name, *args): try: # pass tea_cup to the hooks, along other args # hook has at least one argument, and it will be piped + log.debug(f"Executing {hook.plugin_id}::{hook.name} with priotrity {hook.priority}") tea_spoon = hook.function(tea_cup, *args[1:], cat=self.ccat) - log.info(f"Hook {hook.plugin_id}::{hook.name} returned {tea_spoon}") + log.debug(f"Hook {hook.plugin_id}::{hook.name} returned {tea_spoon}") if tea_spoon is not None: tea_cup = tea_spoon except Exception as e: From ca116ddebbb56b05208784d093f5cfe0034dc0fc Mon Sep 17 00:00:00 2001 From: Piero Savastano Date: Tue, 19 Sep 2023 16:05:12 +0200 Subject: [PATCH 10/10] deepcopy pipeable arg --- core/cat/mad_hatter/mad_hatter.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/core/cat/mad_hatter/mad_hatter.py b/core/cat/mad_hatter/mad_hatter.py index 0480679d..f906f374 100644 --- a/core/cat/mad_hatter/mad_hatter.py +++ b/core/cat/mad_hatter/mad_hatter.py @@ -4,6 +4,7 @@ import shutil import os import traceback +from copy import deepcopy from cat.log import log from cat.db import crud @@ -268,7 +269,7 @@ def execute_hook(self, hook_name, *args): # First argument is passed to `execute_hook` is the pipeable one. # We call it `tea_cup` as every hook called will receive it as an input, # can add sugar, milk, or whatever, and return it for the next hook - tea_cup = args[0] + tea_cup = deepcopy(args[0]) # run hooks for hook in self.hooks[hook_name]: @@ -276,7 +277,11 @@ def execute_hook(self, hook_name, *args): # pass tea_cup to the hooks, along other args # hook has at least one argument, and it will be piped log.debug(f"Executing {hook.plugin_id}::{hook.name} with priotrity {hook.priority}") - tea_spoon = hook.function(tea_cup, *args[1:], cat=self.ccat) + tea_spoon = hook.function( + deepcopy(tea_cup), + *deepcopy(args[1:]), + cat=self.ccat + ) log.debug(f"Hook {hook.plugin_id}::{hook.name} returned {tea_spoon}") if tea_spoon is not None: tea_cup = tea_spoon