diff --git a/core/cat/factory/custom_llm.py b/core/cat/factory/custom_llm.py index b909968d..d1104c32 100644 --- a/core/cat/factory/custom_llm.py +++ b/core/cat/factory/custom_llm.py @@ -14,6 +14,10 @@ def _llm_type(self): def _call(self, prompt, stop=None): return "AI: You did not configure a Language Model. " \ "Do it in the settings!" + + async def _acall(self, prompt, stop=None): + return "AI: You did not configure a Language Model. " \ + "Do it in the settings!" # elaborated from diff --git a/core/cat/looking_glass/agent_manager.py b/core/cat/looking_glass/agent_manager.py index b6810de7..4e5dcc08 100644 --- a/core/cat/looking_glass/agent_manager.py +++ b/core/cat/looking_glass/agent_manager.py @@ -32,7 +32,7 @@ def __init__(self, cat): self.cat = cat - def execute_tool_agent(self, agent_input, allowed_tools): + async def execute_tool_agent(self, agent_input, allowed_tools): allowed_tools_names = [t.name for t in allowed_tools] # TODO: dynamic input_variables as in the main prompt @@ -69,11 +69,11 @@ def execute_tool_agent(self, agent_input, allowed_tools): verbose=True ) - out = agent_executor(agent_input) + out = await agent_executor.acall(agent_input) return out - def execute_memory_chain(self, agent_input, prompt_prefix, prompt_suffix, working_memory: WorkingMemory): + async def execute_memory_chain(self, agent_input, prompt_prefix, prompt_suffix, working_memory: WorkingMemory): input_variables = [i for i in agent_input.keys() if i in prompt_prefix + prompt_suffix] # memory chain (second step) @@ -88,13 +88,13 @@ def execute_memory_chain(self, agent_input, prompt_prefix, prompt_suffix, workin verbose=True ) - out = memory_chain(agent_input, callbacks=[NewTokenHandler(self.cat, working_memory)]) + out = await memory_chain.acall(agent_input, callbacks=[NewTokenHandler(self.cat, working_memory)]) out["output"] = out["text"] del out["text"] return out - def execute_agent(self, working_memory): + async def execute_agent(self, working_memory): """Instantiate the Agent with tools. The method formats the main prompt and gather the allowed tools. It also instantiates a conversational Agent @@ -134,7 +134,7 @@ def execute_agent(self, working_memory): log.debug(f"{len(allowed_tools)} allowed tools retrived.") try: - tools_result = self.execute_tool_agent(agent_input, allowed_tools) + tools_result = await self.execute_tool_agent(agent_input, allowed_tools) # If tools_result["output"] is None the LLM has used the fake tool none_of_the_others # so no relevant information has been obtained from the tools. @@ -177,7 +177,7 @@ def execute_agent(self, working_memory): #Adding the tools_output key in agent input, needed by the memory chain agent_input["tools_output"] = "" # Execute the memory chain - out = self.execute_memory_chain(agent_input, prompt_prefix, prompt_suffix, working_memory) + out = await self.execute_memory_chain(agent_input, prompt_prefix, prompt_suffix, working_memory) return out diff --git a/core/cat/looking_glass/cheshire_cat.py b/core/cat/looking_glass/cheshire_cat.py index 7a633676..53bc6573 100644 --- a/core/cat/looking_glass/cheshire_cat.py +++ b/core/cat/looking_glass/cheshire_cat.py @@ -398,7 +398,7 @@ def send_ws_message(self, content: str, msg_type: MSG_TYPES = "notification", wo ) ) - def __call__(self, user_message_json): + async def __call__(self, user_message_json): """Call the Cat instance. This method is called on the user's message received from the client. @@ -456,7 +456,7 @@ def __call__(self, user_message_json): # reply with agent try: - cat_message = self.agent_manager.execute_agent(user_working_memory) + cat_message = await self.agent_manager.execute_agent(user_working_memory) except Exception as e: # This error happens when the LLM # does not respect prompt instructions. diff --git a/core/cat/routes/websocket.py b/core/cat/routes/websocket.py index 5e2a828b..1cb0998f 100644 --- a/core/cat/routes/websocket.py +++ b/core/cat/routes/websocket.py @@ -62,7 +62,7 @@ async def receive_message(ccat: CheshireCat, user_id: str = "user"): user_message["user_id"] = user_id # Run the `ccat` object's method in a threadpool since it might be a CPU-bound operation. - cat_message = await run_in_threadpool(ccat, user_message) + cat_message = await ccat(user_message) # Send the response message back to the user. await manager.send_personal_message(cat_message, user_id)