From 58f8195d0d816c892b0be81a117bf38f260ecc63 Mon Sep 17 00:00:00 2001 From: Amir Feizpour Date: Wed, 9 Oct 2024 17:05:48 -0400 Subject: [PATCH 1/2] Change event logging mechanism --- src/sherpa_ai/actions/base.py | 12 ++++++++++++ src/sherpa_ai/agents/base.py | 9 --------- 2 files changed, 12 insertions(+), 9 deletions(-) diff --git a/src/sherpa_ai/actions/base.py b/src/sherpa_ai/actions/base.py index e5526004..546f4d8b 100644 --- a/src/sherpa_ai/actions/base.py +++ b/src/sherpa_ai/actions/base.py @@ -7,6 +7,7 @@ from sherpa_ai.actions.utils.refinement import BaseRefinement from sherpa_ai.actions.utils.reranking import BaseReranking +from sherpa_ai.events import EventType if TYPE_CHECKING: from sherpa_ai.memory.belief import Belief @@ -132,6 +133,13 @@ def __call__(self, **kwargs): "are 'agent' and 'belief'" ) + # Log to the belief + if self.belief is not None: + self.belief.update_internal( + EventType.action, self.name, f"Action: {self.name} starts, Args: {filtered_kwargs}" + ) + + # Execute the action result = self.execute(**filtered_kwargs) @@ -139,6 +147,10 @@ def __call__(self, **kwargs): self.belief: Belief = self.belief if self.belief: self.belief.set(self.output_key, result) + self.belief.update_internal( + EventType.action_output, self.name, f"Action: {self.name} finishes, Observation: {result}" + ) + return result def __str__(self): diff --git a/src/sherpa_ai/agents/base.py b/src/sherpa_ai/agents/base.py index eff0210c..ea33f2d8 100644 --- a/src/sherpa_ai/agents/base.py +++ b/src/sherpa_ai/agents/base.py @@ -93,12 +93,6 @@ def run(self): f"🤖{self.name} is executing```" "``` {result.action.name}...```" ) - self.belief.update_internal( - EventType.action, - self.name, - "Action: " + result.action.name + str(result.args), - ) - try: action_output = self.act(result.action, result.args) except Exception as e: @@ -114,9 +108,6 @@ def run(self): self.verbose_logger.log(f"```Action output: {action_output}```") logger.debug(f"```Action output: {action_output}```") - self.belief.update_internal( - EventType.action_output, self.name, "Output: " + action_output - ) result = ( self.validate_output() From 841b1d85682d241a3a676c4955b99907936b8933 Mon Sep 17 00:00:00 2001 From: Boqi Chen Date: Fri, 11 Oct 2024 11:43:39 -0400 Subject: [PATCH 2/2] fix test errors --- src/sherpa_ai/agents/qa_agent.py | 1 + ...ator_test_entity_citation_succeeds_in_qa_0.jsonl | 12 ++++++++---- ...ator_test_entity_citation_succeeds_in_qa_1.jsonl | 13 ++++++++----- .../test_entity_citation_validator.py | 4 +++- 4 files changed, 20 insertions(+), 10 deletions(-) diff --git a/src/sherpa_ai/agents/qa_agent.py b/src/sherpa_ai/agents/qa_agent.py index cea950b4..823cb17f 100644 --- a/src/sherpa_ai/agents/qa_agent.py +++ b/src/sherpa_ai/agents/qa_agent.py @@ -87,6 +87,7 @@ def create_actions(self) -> List[BaseAction]: task=self.belief.current_task.content, llm=self.llm, config=self.config, + belief=self.belief, ), ] diff --git a/src/tests/data/test_entity_citation_validator_test_entity_citation_succeeds_in_qa_0.jsonl b/src/tests/data/test_entity_citation_validator_test_entity_citation_succeeds_in_qa_0.jsonl index 26047b86..bbf75e15 100644 --- a/src/tests/data/test_entity_citation_validator_test_entity_citation_succeeds_in_qa_0.jsonl +++ b/src/tests/data/test_entity_citation_validator_test_entity_citation_succeeds_in_qa_0.jsonl @@ -1,4 +1,8 @@ -{"input": [{"text": "You are a **question answering assistant** who solves user questions and offers a detailed solution.\\n\\nGiven your specialized expertise, historical context, and your mission to facilitate Machine-Learning-based solutions, determine which action and its corresponding arguments would be the most scientifically sound and efficient approach to achieve the described task.\\n\\n**Task Description**: what is unique about Ethiopia calendar? based on USA calendar assosation. \\n\\n**Possible Actions**:\\n{\\n \"name\": \"Google Search\",\\n \"args\": {\\n \"query\": \"string\"\\n }\\n}\\n\\n**Task Context**:\\nwhat is unique about Ethiopia calendar? based on USA calendar assosation. \\n\\n\\n**History of Previous Actions**:\\n\\n\\nYou should only respond in JSON format as described below without any extra text.\\nResponse Format:\\n{\\n \"command\": {\\n \"name\": \"tool/command name you choose\",\\n \"args\": {\\n \"arg name\": \"value\"\\n }\\n }\\n}\\nEnsure the response can be parsed by Python json.loads\\n\\nIf you believe the task is complete and no further actions are necessary, respond with \"Finished\".\\n\\nFollow the described fromat strictly.\\n\\n", "agent": "human"}], "output": "{\\n \"command\": {\\n \"name\": \"Google Search\",\\n \"args\": {\\n \"query\": \"What is unique about Ethiopia calendar compared to the USA calendar?\"\\n }\\n }\\n}", "llm_name": "gpt-3.5-turbo"} -{"input": [{"text": "You are a **question answering assistant** who solves user questions and offers a detailed solution.\\n\\nGiven your specialized expertise, historical context, and your mission to facilitate Machine-Learning-based solutions, determine which action and its corresponding arguments would be the most scientifically sound and efficient approach to achieve the described task.\\n\\n**Task Description**: what is unique about Ethiopia calendar? based on USA calendar assosation. \\n\\n**Possible Actions**:\\n{\\n \"name\": \"Google Search\",\\n \"args\": {\\n \"query\": \"string\"\\n }\\n}\\n\\n**Task Context**:\\nwhat is unique about Ethiopia calendar? based on USA calendar assosation. \\n\\n\\n**History of Previous Actions**:\\nGoogle Search{'query': 'What is unique about Ethiopia calendar compared to the USA calendar?'}\\n\\n According to Ethiopian calendar Assosation (ECA) , Ethiopia has thirteen months. Canada and Kenya also mentioned this. also recognized by World Calendar Assosation (WCA) \\n\\nYou should only respond in JSON format as described below without any extra text.\\nResponse Format:\\n{\\n \"command\": {\\n \"name\": \"tool/command name you choose\",\\n \"args\": {\\n \"arg name\": \"value\"\\n }\\n }\\n}\\nEnsure the response can be parsed by Python json.loads\\n\\nIf you believe the task is complete and no further actions are necessary, respond with \"Finished\".\\n\\nFollow the described fromat strictly.\\n\\n", "agent": "human"}], "output": "\\n{\\n \"command\": {\\n \"name\": \"Finished\"\\n }\\n}", "llm_name": "gpt-3.5-turbo"} -{"input": [{"text": "You are a **question answering assistant** who solves user questions and offers a detailed solution.\\n\\nYour name is QA Agent.\\n\\nContext: what is unique about Ethiopia calendar? based on USA calendar assosation. \\n\\n\\nAction - Result History:\\nGoogle Search{'query': 'What is unique about Ethiopia calendar compared to the USA calendar?'}\\n\\n According to Ethiopian calendar Assosation (ECA) , Ethiopia has thirteen months. Canada and Kenya also mentioned this. also recognized by World Calendar Assosation (WCA) \\n\\nGiven the context and the action-result history, please complete the task mentioned. Include any links you used from the context and history in the result.\\nTask: what is unique about Ethiopia calendar? based on USA calendar assosation. \\nResult:\\n", "agent": "human"}], "output": "The unique aspect of the Ethiopian calendar compared to the USA calendar is that Ethiopia has thirteen months in a year. This is different from the standard twelve months in the Gregorian calendar used in the USA. The Ethiopian calendar consists of twelve months with 30 days each and a thirteenth month called Pagum\u0113, which has either five or six days depending on whether it is a leap year.\\n\\nThis difference in the number of months in the Ethiopian calendar is what sets it apart from the USA calendar. The Ethiopian calendar is also recognized by the World Calendar Association (WCA) for its unique structure.\\n\\nFor more information, you can refer to the Ethiopian Calendar Association (ECA) website: [Ethiopian Calendar Association](https://www.ethiopiancalendar.org/)", "llm_name": "gpt-3.5-turbo"} -{"input": [{"text": "You are a **question answering assistant** who solves user questions and offers a detailed solution.\\n\\nYour name is QA Agent.\\n\\nContext: what is unique about Ethiopia calendar? based on USA calendar assosation. \\n\\n\\nAction - Result History:\\nGoogle Search{'query': 'What is unique about Ethiopia calendar compared to the USA calendar?'}\\n\\n According to Ethiopian calendar Assosation (ECA) , Ethiopia has thirteen months. Canada and Kenya also mentioned this. also recognized by World Calendar Assosation (WCA) \\nThe unique aspect of the Ethiopian calendar compared to the USA calendar is that Ethiopia has thirteen months in a year. This is different from the standard twelve months in the Gregorian calendar used in the USA. The Ethiopian calendar consists of twelve months with 30 days each and a thirteenth month called Pagum\u0113, which has either five or six days depending on whether it is a leap year.\\n\\nThis difference in the number of months in the Ethiopian calendar is what sets it apart from the USA calendar. The Ethiopian calendar is also recognized by the World Calendar Association (WCA) for its unique structure.\\n\\nFor more information, you can refer to the Ethiopian Calendar Association (ECA) website: [Ethiopian Calendar Association](https://www.ethiopiancalendar.org/)\\nremember to address these entities Google, Canada, Kenya, in final the answer.\\n\\nGiven the context and the action-result history, please complete the task mentioned. Include any links you used from the context and history in the result.\\nTask: what is unique about Ethiopia calendar? based on USA calendar assosation. \\nResult:\\n", "agent": "human"}], "output": "The unique aspect of the Ethiopian calendar compared to the USA calendar is that Ethiopia has thirteen months in a year. This is different from the standard twelve months in the Gregorian calendar used in the USA. The Ethiopian calendar consists of twelve months with 30 days each and a thirteenth month called Pagum\u0113, which has either five or six days depending on whether it is a leap year.\\n\\nThis difference in the number of months in the Ethiopian calendar is what sets it apart from the USA calendar. The Ethiopian calendar is also recognized by the World Calendar Association (WCA) for its unique structure.\\n\\nFor more information, you can refer to the Ethiopian Calendar Association (ECA) website: [Ethiopian Calendar Association](https://www.ethiopiancalendar.org/)\\n\\nIt is worth noting that this information was found through a Google search and was also mentioned by Canada and Kenya.", "llm_name": "gpt-3.5-turbo"} +{"input": [{"text": "You are a **question answering assistant** who solves user questions and offers a detailed solution.\\n\\nYour name is QA Agent.\\n\\nGiven your specialized expertise, historical context, and your mission to facilitate Machine-Learning-based solutions, determine which action and its corresponding arguments would be the most scientifically sound and efficient approach to achieve the described task.\\n\\n**Task Description**: what is unique about Ethiopia calendar? based on USA calendar assosation. \\n\\n**Possible Actions**:\\n{\\n \"name\": \"Google Search\",\\n \"args\": {\\n \"query\": \"string\"\\n },\\n \"usage\": \"Get answers from Google Search\"\\n}\\n\\n**Task Context**:\\nwhat is unique about Ethiopia calendar? based on USA calendar assosation. \\n\\n\\n**History of Previous Actions**:\\n\\n\\nYou should only select the actions specified in **Possible Actions**\\nYou should only respond in JSON format as described below without any extra text.\\nResponse Format:\\n{\\n \"command\": {\\n \"name\": \"tool/command name you choose\",\\n \"args\": {\\n \"arg name\": \"value\"\\n }\\n }\\n}\\nEnsure the response can be parsed by Python json.loads\\n\\nFollow the described format strictly.\\n\\n", "agent": "human"}], "output": "{\\n \"command\": {\\n \"name\": \"Google Search\",\\n \"args\": {\\n \"query\": \"What is unique about Ethiopia calendar compared to the USA calendar?\"\\n }\\n }\\n}", "llm_name": "gpt-3.5-turbo"} +{"input": [{"text": "You are a **question answering assistant** who solves user questions and offers a detailed solution.\\n\\nYour name is QA Agent.\\n\\nGiven your specialized expertise, historical context, and your mission to facilitate Machine-Learning-based solutions, determine which action and its corresponding arguments would be the most scientifically sound and efficient approach to achieve the described task.\\n\\n**Task Description**: what is unique about Ethiopia calendar? based on USA calendar assosation. \\n\\n**Possible Actions**:\\n{\\n \"name\": \"Google Search\",\\n \"args\": {\\n \"query\": \"string\"\\n },\\n \"usage\": \"Get answers from Google Search\"\\n}\\n\\n**Task Context**:\\nwhat is unique about Ethiopia calendar? based on USA calendar assosation. \\n\\n\\n**History of Previous Actions**:\\nAction: Google Search starts, Args: {'query': 'What is unique about Ethiopia calendar compared to the USA calendar?'}\\nAction: Google Search finishes, Observation: According to Ethiopian calendar Assosation (ECA) , Ethiopia has thirteen months. Canada and Kenya also mentioned this. also recognized by World Calendar Assosation (WCA)\\n\\nYou should only select the actions specified in **Possible Actions**\\nYou should only respond in JSON format as described below without any extra text.\\nResponse Format:\\n{\\n \"command\": {\\n \"name\": \"tool/command name you choose\",\\n \"args\": {\\n \"arg name\": \"value\"\\n }\\n }\\n}\\nEnsure the response can be parsed by Python json.loads\\n\\nFollow the described format strictly.\\n\\n", "agent": "human"}], "output": "{\\n \"command\": {\\n \"name\": \"Google Search\",\\n \"args\": {\\n \"query\": \"What is unique about Ethiopia calendar compared to the USA calendar?\"\\n }\\n }\\n}", "llm_name": "gpt-3.5-turbo"} +{"input": [{"text": "You are a **question answering assistant** who solves user questions and offers a detailed solution.\\n\\nYour name is QA Agent.\\n\\nGiven your specialized expertise, historical context, and your mission to facilitate Machine-Learning-based solutions, determine which action and its corresponding arguments would be the most scientifically sound and efficient approach to achieve the described task.\\n\\n**Task Description**: what is unique about Ethiopia calendar? based on USA calendar assosation. \\n\\n**Possible Actions**:\\n{\\n \"name\": \"Google Search\",\\n \"args\": {\\n \"query\": \"string\"\\n },\\n \"usage\": \"Get answers from Google Search\"\\n}\\n\\n**Task Context**:\\nwhat is unique about Ethiopia calendar? based on USA calendar assosation. \\n\\n\\n**History of Previous Actions**:\\nAction: Google Search starts, Args: {'query': 'What is unique about Ethiopia calendar compared to the USA calendar?'}\\nAction: Google Search finishes, Observation: According to Ethiopian calendar Assosation (ECA) , Ethiopia has thirteen months. Canada and Kenya also mentioned this. also recognized by World Calendar Assosation (WCA)\\nAction: Google Search starts, Args: {'query': 'What is unique about Ethiopia calendar compared to the USA calendar?'}\\nAction: Google Search finishes, Observation: According to Ethiopian calendar Assosation (ECA) , Ethiopia has thirteen months. Canada and Kenya also mentioned this. also recognized by World Calendar Assosation (WCA)\\n\\nYou should only select the actions specified in **Possible Actions**\\nYou should only respond in JSON format as described below without any extra text.\\nResponse Format:\\n{\\n \"command\": {\\n \"name\": \"tool/command name you choose\",\\n \"args\": {\\n \"arg name\": \"value\"\\n }\\n }\\n}\\nEnsure the response can be parsed by Python json.loads\\n\\nFollow the described format strictly.\\n\\n", "agent": "human"}], "output": "{\\n \"command\": {\\n \"name\": \"Google Search\",\\n \"args\": {\\n \"query\": \"What is unique about Ethiopia calendar compared to the USA calendar?\"\\n }\\n }\\n}", "llm_name": "gpt-3.5-turbo"} +{"input": [{"text": "You are a **question answering assistant** who solves user questions and offers a detailed solution.\\n\\nYour name is QA Agent.\\n\\nContext: what is unique about Ethiopia calendar? based on USA calendar assosation. \\n\\n\\nAction - Result History:\\nAction: Google Search starts, Args: {'query': 'What is unique about Ethiopia calendar compared to the USA calendar?'}\\nAction: Google Search finishes, Observation: According to Ethiopian calendar Assosation (ECA) , Ethiopia has thirteen months. Canada and Kenya also mentioned this. also recognized by World Calendar Assosation (WCA)\\nAction: Google Search starts, Args: {'query': 'What is unique about Ethiopia calendar compared to the USA calendar?'}\\nAction: Google Search finishes, Observation: According to Ethiopian calendar Assosation (ECA) , Ethiopia has thirteen months. Canada and Kenya also mentioned this. also recognized by World Calendar Assosation (WCA)\\nAction: Google Search starts, Args: {'query': 'What is unique about Ethiopia calendar compared to the USA calendar?'}\\nAction: Google Search finishes, Observation: According to Ethiopian calendar Assosation (ECA) , Ethiopia has thirteen months. Canada and Kenya also mentioned this. also recognized by World Calendar Assosation (WCA)\\n\\nGiven the context and the action-result history, please complete the task mentioned. Include any links you used from the context and history in the result.\\nTask: what is unique about Ethiopia calendar? based on USA calendar assosation. \\nResult:\\n", "agent": "human"}], "output": "The unique aspect of the Ethiopian calendar compared to the USA calendar is that Ethiopia has thirteen months in a year. This is different from the Gregorian calendar used in the USA, which has twelve months. The Ethiopian calendar is recognized by the Ethiopian Calendar Association (ECA) and the World Calendar Association (WCA). This difference in the number of months in the calendar system is what sets the Ethiopian calendar apart from the USA calendar.\\n\\nFor more information, you can refer to the Ethiopian Calendar Association (ECA) website: [Ethiopian Calendar Association](https://www.ethiopiancalendar.org/) and the World Calendar Association (WCA) website: [World Calendar Association](https://www.worldcalendar.org/).", "llm_name": "gpt-3.5-turbo"} +{"input": [{"text": "You are a **question answering assistant** who solves user questions and offers a detailed solution.\\n\\nYour name is QA Agent.\\n\\nContext: what is unique about Ethiopia calendar? based on USA calendar assosation. \\n\\n\\nAction - Result History:\\nAction: Google Search starts, Args: {'query': 'What is unique about Ethiopia calendar compared to the USA calendar?'}\\nAction: Google Search finishes, Observation: According to Ethiopian calendar Assosation (ECA) , Ethiopia has thirteen months. Canada and Kenya also mentioned this. also recognized by World Calendar Assosation (WCA)\\nAction: Google Search starts, Args: {'query': 'What is unique about Ethiopia calendar compared to the USA calendar?'}\\nAction: Google Search finishes, Observation: According to Ethiopian calendar Assosation (ECA) , Ethiopia has thirteen months. Canada and Kenya also mentioned this. also recognized by World Calendar Assosation (WCA)\\nAction: Google Search starts, Args: {'query': 'What is unique about Ethiopia calendar compared to the USA calendar?'}\\nAction: Google Search finishes, Observation: According to Ethiopian calendar Assosation (ECA) , Ethiopia has thirteen months. Canada and Kenya also mentioned this. also recognized by World Calendar Assosation (WCA)\\nThe unique aspect of the Ethiopian calendar compared to the USA calendar is that Ethiopia has thirteen months in a year. This is different from the Gregorian calendar used in the USA, which has twelve months. The Ethiopian calendar is recognized by the Ethiopian Calendar Association (ECA) and the World Calendar Association (WCA). This difference in the number of months in the calendar system is what sets the Ethiopian calendar apart from the USA calendar.\\n\\nFor more information, you can refer to the Ethiopian Calendar Association (ECA) website: [Ethiopian Calendar Association](https://www.ethiopiancalendar.org/) and the World Calendar Association (WCA) website: [World Calendar Association](https://www.worldcalendar.org/).\\nremember to address these entities Google Search, Canada, Kenya, in final the answer.\\n\\nGiven the context and the action-result history, please complete the task mentioned. Include any links you used from the context and history in the result.\\nTask: what is unique about Ethiopia calendar? based on USA calendar assosation. \\nResult:\\n", "agent": "human"}], "output": "The unique aspect of the Ethiopian calendar compared to the USA calendar is that Ethiopia has thirteen months in a year. This is different from the Gregorian calendar used in the USA, which has twelve months. The Ethiopian calendar is recognized by the Ethiopian Calendar Association (ECA) and the World Calendar Association (WCA). This difference in the number of months in the calendar system is what sets the Ethiopian calendar apart from the USA calendar.\\n\\nCanada and Kenya are also mentioned in relation to the Ethiopian calendar, possibly indicating some similarities or connections between their calendar systems.\\n\\nFor more information, you can refer to the Ethiopian Calendar Association (ECA) website: [Ethiopian Calendar Association](https://www.ethiopiancalendar.org/) and the World Calendar Association (WCA) website: [World Calendar Association](https://www.worldcalendar.org/).", "llm_name": "gpt-3.5-turbo"} +{"input": [{"text": "You are a **question answering assistant** who solves user questions and offers a detailed solution.\\n\\nYour name is QA Agent.\\n\\nContext: what is unique about Ethiopia calendar? based on USA calendar assosation. \\n\\n\\nAction - Result History:\\nAction: Google Search starts, Args: {'query': 'What is unique about Ethiopia calendar compared to the USA calendar?'}\\nAction: Google Search finishes, Observation: According to Ethiopian calendar Assosation (ECA) , Ethiopia has thirteen months. Canada and Kenya also mentioned this. also recognized by World Calendar Assosation (WCA)\\nAction: Google Search starts, Args: {'query': 'What is unique about Ethiopia calendar compared to the USA calendar?'}\\nAction: Google Search finishes, Observation: According to Ethiopian calendar Assosation (ECA) , Ethiopia has thirteen months. Canada and Kenya also mentioned this. also recognized by World Calendar Assosation (WCA)\\nAction: Google Search starts, Args: {'query': 'What is unique about Ethiopia calendar compared to the USA calendar?'}\\nAction: Google Search finishes, Observation: According to Ethiopian calendar Assosation (ECA) , Ethiopia has thirteen months. Canada and Kenya also mentioned this. also recognized by World Calendar Assosation (WCA)\\nThe unique aspect of the Ethiopian calendar compared to the USA calendar is that Ethiopia has thirteen months in a year. This is different from the Gregorian calendar used in the USA, which has twelve months. The Ethiopian calendar is recognized by the Ethiopian Calendar Association (ECA) and the World Calendar Association (WCA). This difference in the number of months in the calendar system is what sets the Ethiopian calendar apart from the USA calendar.\\n\\nFor more information, you can refer to the Ethiopian Calendar Association (ECA) website: [Ethiopian Calendar Association](https://www.ethiopiancalendar.org/) and the World Calendar Association (WCA) website: [World Calendar Association](https://www.worldcalendar.org/).\\nremember to address these entities Google Search, Canada, Kenya, in final the answer.\\nThe unique aspect of the Ethiopian calendar compared to the USA calendar is that Ethiopia has thirteen months in a year. This is different from the Gregorian calendar used in the USA, which has twelve months. The Ethiopian calendar is recognized by the Ethiopian Calendar Association (ECA) and the World Calendar Association (WCA). This difference in the number of months in the calendar system is what sets the Ethiopian calendar apart from the USA calendar.\\n\\nCanada and Kenya are also mentioned in relation to the Ethiopian calendar, possibly indicating some similarities or connections between their calendar systems.\\n\\nFor more information, you can refer to the Ethiopian Calendar Association (ECA) website: [Ethiopian Calendar Association](https://www.ethiopiancalendar.org/) and the World Calendar Association (WCA) website: [World Calendar Association](https://www.worldcalendar.org/).\\nremember to address these entities Google Search, in the final answer.\\n\\nGiven the context and the action-result history, please complete the task mentioned. Include any links you used from the context and history in the result.\\nTask: what is unique about Ethiopia calendar? based on USA calendar assosation. \\nResult:\\n", "agent": "human"}], "output": "The unique aspect of the Ethiopian calendar compared to the USA calendar is that Ethiopia has thirteen months in a year, while the USA calendar follows the Gregorian calendar with twelve months. This distinction sets the Ethiopian calendar apart from the USA calendar. The Ethiopian calendar is recognized by the Ethiopian Calendar Association (ECA) and the World Calendar Association (WCA).\\n\\nCanada and Kenya are also mentioned in relation to the Ethiopian calendar, suggesting potential similarities or connections between their calendar systems.\\n\\nFor more information, you can visit the Ethiopian Calendar Association (ECA) website: [Ethiopian Calendar Association](https://www.ethiopiancalendar.org/) and the World Calendar Association (WCA) website: [World Calendar Association](https://www.worldcalendar.org/).", "llm_name": "gpt-3.5-turbo"} +{"input": [{"text": "\\n I have a question and an answer. I want you to confirm whether the entities from the question are all mentioned in some form within the answer.\\n\\n Question = Action: Google Search finishes, Observation: According to Ethiopian calendar Assosation (ECA) , Ethiopia has thirteen months. Canada and Kenya also mentioned this. also recognized by World Calendar Assosation (WCA)\\n Entities inside the question = ['Google Search', 'Ethiopian', 'ECA', 'Ethiopia', 'Canada', 'Kenya']\\n\\n Answer = The unique aspect of the Ethiopian calendar compared to the USA calendar is that Ethiopia has thirteen months in a year, while the USA calendar follows the Gregorian calendar with twelve months. This distinction sets the Ethiopian calendar apart from the USA calendar. The Ethiopian calendar is recognized by the Ethiopian Calendar Association (ECA) and the World Calendar Association (WCA).\\n\\nCanada and Kenya are also mentioned in relation to the Ethiopian calendar, suggesting potential similarities or connections between their calendar systems.\\n\\nFor more information, you can visit the Ethiopian Calendar Association (ECA) website: [Ethiopian Calendar Association](https://www.ethiopiancalendar.org/) and the World Calendar Association (WCA) website: [World Calendar Association](https://www.worldcalendar.org/).\\n \\n only return {\"entity_exist\": true , \"messages\":\"\" } if all entities are mentioned inside the answer in\\n only return {\"entity_exist\": false , \"messages\": \" Entity x hasn't been mentioned inside the answer\"} if the entity is not mentioned properly .\\n ", "agent": "human"}], "output": "{\"entity_exist\": true , \"messages\":\"\" }", "llm_name": "gpt-3.5-turbo"} +{"input": [{"text": "\\n I have a question and an answer. I want you to confirm whether the entities from the question are all mentioned in some form within the answer.\\n\\n Question = Action: Google Search finishes, Observation: According to Ethiopian calendar Assosation (ECA) , Ethiopia has thirteen months. Canada and Kenya also mentioned this. also recognized by World Calendar Assosation (WCA)\\n Entities inside the question = ['Google Search', 'Ethiopian', 'ECA', 'Ethiopia', 'Canada', 'Kenya']\\n\\n Answer = The unique aspect of the Ethiopian calendar compared to the USA calendar is that Ethiopia has thirteen months in a year, while the USA calendar follows the Gregorian calendar with twelve months. This distinction sets the Ethiopian calendar apart from the USA calendar. The Ethiopian calendar is recognized by the Ethiopian Calendar Association (ECA) and the World Calendar Association (WCA).\\n\\nCanada and Kenya are also mentioned in relation to the Ethiopian calendar, suggesting potential similarities or connections between their calendar systems.\\n\\nFor more information, you can visit the Ethiopian Calendar Association (ECA) website: [Ethiopian Calendar Association](https://www.ethiopiancalendar.org/) and the World Calendar Association (WCA) website: [World Calendar Association](https://www.worldcalendar.org/).\\n \\n only return {\"entity_exist\": true , \"messages\":\"\" } if all entities are mentioned inside the answer in\\n only return {\"entity_exist\": false , \"messages\": \" Entity x hasn't been mentioned inside the answer\"} if the entity is not mentioned properly .\\n ", "agent": "human"}], "output": "{\"entity_exist\": true , \"messages\":\"\" }", "llm_name": "gpt-3.5-turbo"} diff --git a/src/tests/data/test_entity_citation_validator_test_entity_citation_succeeds_in_qa_1.jsonl b/src/tests/data/test_entity_citation_validator_test_entity_citation_succeeds_in_qa_1.jsonl index c3f778b4..d89de60b 100644 --- a/src/tests/data/test_entity_citation_validator_test_entity_citation_succeeds_in_qa_1.jsonl +++ b/src/tests/data/test_entity_citation_validator_test_entity_citation_succeeds_in_qa_1.jsonl @@ -1,5 +1,8 @@ -{"input": [{"text": "You are a **question answering assistant** who solves user questions and offers a detailed solution.\\n\\nGiven your specialized expertise, historical context, and your mission to facilitate Machine-Learning-based solutions, determine which action and its corresponding arguments would be the most scientifically sound and efficient approach to achieve the described task.\\n\\n**Task Description**: Tell me a fact about Star Trek: The Next Generation?\\n\\n**Possible Actions**:\\n{\\n \"name\": \"Google Search\",\\n \"args\": {\\n \"query\": \"string\"\\n }\\n}\\n\\n**Task Context**:\\nTell me a fact about Star Trek: The Next Generation?\\n\\n\\n**History of Previous Actions**:\\n\\n\\nYou should only respond in JSON format as described below without any extra text.\\nResponse Format:\\n{\\n \"command\": {\\n \"name\": \"tool/command name you choose\",\\n \"args\": {\\n \"arg name\": \"value\"\\n }\\n }\\n}\\nEnsure the response can be parsed by Python json.loads\\n\\nIf you believe the task is complete and no further actions are necessary, respond with \"Finished\".\\n\\nFollow the described fromat strictly.\\n\\n", "agent": "human"}], "output": "{\\n \"command\": {\\n \"name\": \"Google Search\",\\n \"args\": {\\n \"query\": \"Interesting facts about Star Trek: The Next Generation\"\\n }\\n }\\n}", "llm_name": "gpt-3.5-turbo"} -{"input": [{"text": "You are a **question answering assistant** who solves user questions and offers a detailed solution.\\n\\nGiven your specialized expertise, historical context, and your mission to facilitate Machine-Learning-based solutions, determine which action and its corresponding arguments would be the most scientifically sound and efficient approach to achieve the described task.\\n\\n**Task Description**: Tell me a fact about Star Trek: The Next Generation?\\n\\n**Possible Actions**:\\n{\\n \"name\": \"Google Search\",\\n \"args\": {\\n \"query\": \"string\"\\n }\\n}\\n\\n**Task Context**:\\nTell me a fact about Star Trek: The Next Generation?\\n\\n\\n**History of Previous Actions**:\\nGoogle Search{'query': 'Interesting facts about Star Trek: The Next Generation'}\\nFact: In Star Trek: The Next Generation (STNG), the United Federation of Planets, a coalition of various planetary governments working for peace and cooperation, establishes a set of interstellar laws known as the \"Federation Charter.\" This document outlines the principles and regulations governing member worlds and their interactions. \\n\\nYou should only respond in JSON format as described below without any extra text.\\nResponse Format:\\n{\\n \"command\": {\\n \"name\": \"tool/command name you choose\",\\n \"args\": {\\n \"arg name\": \"value\"\\n }\\n }\\n}\\nEnsure the response can be parsed by Python json.loads\\n\\nIf you believe the task is complete and no further actions are necessary, respond with \"Finished\".\\n\\nFollow the described fromat strictly.\\n\\n", "agent": "human"}], "output": "{\\n \"command\": {\\n \"name\": \"Google Search\",\\n \"args\": {\\n \"query\": \"Interesting facts about Star Trek: The Next Generation\"\\n }\\n }\\n}", "llm_name": "gpt-3.5-turbo"} -{"input": [{"text": "You are a **question answering assistant** who solves user questions and offers a detailed solution.\\n\\nGiven your specialized expertise, historical context, and your mission to facilitate Machine-Learning-based solutions, determine which action and its corresponding arguments would be the most scientifically sound and efficient approach to achieve the described task.\\n\\n**Task Description**: Tell me a fact about Star Trek: The Next Generation?\\n\\n**Possible Actions**:\\n{\\n \"name\": \"Google Search\",\\n \"args\": {\\n \"query\": \"string\"\\n }\\n}\\n\\n**Task Context**:\\nTell me a fact about Star Trek: The Next Generation?\\n\\n\\n**History of Previous Actions**:\\nGoogle Search{'query': 'Interesting facts about Star Trek: The Next Generation'}\\nFact: In Star Trek: The Next Generation (STNG), the United Federation of Planets, a coalition of various planetary governments working for peace and cooperation, establishes a set of interstellar laws known as the \"Federation Charter.\" This document outlines the principles and regulations governing member worlds and their interactions. \\nGoogle Search{'query': 'Interesting facts about Star Trek: The Next Generation'}\\nFact: In Star Trek: The Next Generation (STNG), the United Federation of Planets, a coalition of various planetary governments working for peace and cooperation, establishes a set of interstellar laws known as the \"Federation Charter.\" This document outlines the principles and regulations governing member worlds and their interactions. \\n\\nYou should only respond in JSON format as described below without any extra text.\\nResponse Format:\\n{\\n \"command\": {\\n \"name\": \"tool/command name you choose\",\\n \"args\": {\\n \"arg name\": \"value\"\\n }\\n }\\n}\\nEnsure the response can be parsed by Python json.loads\\n\\nIf you believe the task is complete and no further actions are necessary, respond with \"Finished\".\\n\\nFollow the described fromat strictly.\\n\\n", "agent": "human"}], "output": "\\n{\\n \"command\": {\\n \"name\": \"Google Search\",\\n \"args\": {\\n \"query\": \"Interesting facts about Star Trek: The Next Generation\"\\n }\\n }\\n}", "llm_name": "gpt-3.5-turbo"} -{"input": [{"text": "You are a **question answering assistant** who solves user questions and offers a detailed solution.\\n\\nYour name is QA Agent.\\n\\nContext: Tell me a fact about Star Trek: The Next Generation?\\n\\n\\nAction - Result History:\\nGoogle Search{'query': 'Interesting facts about Star Trek: The Next Generation'}\\nFact: In Star Trek: The Next Generation (STNG), the United Federation of Planets, a coalition of various planetary governments working for peace and cooperation, establishes a set of interstellar laws known as the \"Federation Charter.\" This document outlines the principles and regulations governing member worlds and their interactions. \\nGoogle Search{'query': 'Interesting facts about Star Trek: The Next Generation'}\\nFact: In Star Trek: The Next Generation (STNG), the United Federation of Planets, a coalition of various planetary governments working for peace and cooperation, establishes a set of interstellar laws known as the \"Federation Charter.\" This document outlines the principles and regulations governing member worlds and their interactions. \\nGoogle Search{'query': 'Interesting facts about Star Trek: The Next Generation'}\\nFact: In Star Trek: The Next Generation (STNG), the United Federation of Planets, a coalition of various planetary governments working for peace and cooperation, establishes a set of interstellar laws known as the \"Federation Charter.\" This document outlines the principles and regulations governing member worlds and their interactions. \\n\\nGiven the context and the action-result history, please complete the task mentioned. Include any links you used from the context and history in the result.\\nTask: Tell me a fact about Star Trek: The Next Generation?\\nResult:\\n", "agent": "human"}], "output": "In Star Trek: The Next Generation (STNG), the United Federation of Planets, a coalition of various planetary governments working for peace and cooperation, establishes a set of interstellar laws known as the \"Federation Charter.\" This document outlines the principles and regulations governing member worlds and their interactions. \\n\\nFor more interesting facts about Star Trek: The Next Generation, you can visit the official Star Trek website: https://www.startrek.com/shows/star-trek-the-next-generation.", "llm_name": "gpt-3.5-turbo"} -{"input": [{"text": "You are a **question answering assistant** who solves user questions and offers a detailed solution.\\n\\nYour name is QA Agent.\\n\\nContext: Tell me a fact about Star Trek: The Next Generation?\\n\\n\\nAction - Result History:\\nGoogle Search{'query': 'Interesting facts about Star Trek: The Next Generation'}\\nFact: In Star Trek: The Next Generation (STNG), the United Federation of Planets, a coalition of various planetary governments working for peace and cooperation, establishes a set of interstellar laws known as the \"Federation Charter.\" This document outlines the principles and regulations governing member worlds and their interactions. \\nGoogle Search{'query': 'Interesting facts about Star Trek: The Next Generation'}\\nFact: In Star Trek: The Next Generation (STNG), the United Federation of Planets, a coalition of various planetary governments working for peace and cooperation, establishes a set of interstellar laws known as the \"Federation Charter.\" This document outlines the principles and regulations governing member worlds and their interactions. \\nGoogle Search{'query': 'Interesting facts about Star Trek: The Next Generation'}\\nFact: In Star Trek: The Next Generation (STNG), the United Federation of Planets, a coalition of various planetary governments working for peace and cooperation, establishes a set of interstellar laws known as the \"Federation Charter.\" This document outlines the principles and regulations governing member worlds and their interactions. \\nIn Star Trek: The Next Generation (STNG), the United Federation of Planets, a coalition of various planetary governments working for peace and cooperation, establishes a set of interstellar laws known as the \"Federation Charter.\" This document outlines the principles and regulations governing member worlds and their interactions. \\n\\nFor more interesting facts about Star Trek: The Next Generation, you can visit the official Star Trek website: https://www.startrek.com/shows/star-trek-the-next-generation.\\nremember to address these entities Google, in final the answer.\\n\\nGiven the context and the action-result history, please complete the task mentioned. Include any links you used from the context and history in the result.\\nTask: Tell me a fact about Star Trek: The Next Generation?\\nResult:\\n", "agent": "human"}], "output": "In Star Trek: The Next Generation (STNG), the United Federation of Planets, a coalition of various planetary governments working for peace and cooperation, establishes a set of interstellar laws known as the \"Federation Charter.\" This document outlines the principles and regulations governing member worlds and their interactions. For more interesting facts about Star Trek: The Next Generation, you can visit the official Star Trek website: [Star Trek: The Next Generation](https://www.startrek.com/shows/star-trek-the-next-generation). This information was retrieved through Google searches.", "llm_name": "gpt-3.5-turbo"} +{"input": [{"text": "You are a **question answering assistant** who solves user questions and offers a detailed solution.\\n\\nYour name is QA Agent.\\n\\nGiven your specialized expertise, historical context, and your mission to facilitate Machine-Learning-based solutions, determine which action and its corresponding arguments would be the most scientifically sound and efficient approach to achieve the described task.\\n\\n**Task Description**: Tell me a fact about Star Trek: The Next Generation?\\n\\n**Possible Actions**:\\n{\\n \"name\": \"Google Search\",\\n \"args\": {\\n \"query\": \"string\"\\n },\\n \"usage\": \"Get answers from Google Search\"\\n}\\n\\n**Task Context**:\\nTell me a fact about Star Trek: The Next Generation?\\n\\n\\n**History of Previous Actions**:\\n\\n\\nYou should only select the actions specified in **Possible Actions**\\nYou should only respond in JSON format as described below without any extra text.\\nResponse Format:\\n{\\n \"command\": {\\n \"name\": \"tool/command name you choose\",\\n \"args\": {\\n \"arg name\": \"value\"\\n }\\n }\\n}\\nEnsure the response can be parsed by Python json.loads\\n\\nFollow the described format strictly.\\n\\n", "agent": "human"}], "output": "{\\n \"command\": {\\n \"name\": \"Google Search\",\\n \"args\": {\\n \"query\": \"Interesting facts about Star Trek: The Next Generation\"\\n }\\n }\\n}", "llm_name": "gpt-3.5-turbo"} +{"input": [{"text": "You are a **question answering assistant** who solves user questions and offers a detailed solution.\\n\\nYour name is QA Agent.\\n\\nGiven your specialized expertise, historical context, and your mission to facilitate Machine-Learning-based solutions, determine which action and its corresponding arguments would be the most scientifically sound and efficient approach to achieve the described task.\\n\\n**Task Description**: Tell me a fact about Star Trek: The Next Generation?\\n\\n**Possible Actions**:\\n{\\n \"name\": \"Google Search\",\\n \"args\": {\\n \"query\": \"string\"\\n },\\n \"usage\": \"Get answers from Google Search\"\\n}\\n\\n**Task Context**:\\nTell me a fact about Star Trek: The Next Generation?\\n\\n\\n**History of Previous Actions**:\\nAction: Google Search starts, Args: {'query': 'Interesting facts about Star Trek: The Next Generation'}\\nAction: Google Search finishes, Observation: Star Trek is a 2009 movice. In it, the United Federation of Planets, a coalition of various planetary governments working for peace and cooperation. This document outlines the principles and regulations governing member worlds and their interactions. \\n\\nYou should only select the actions specified in **Possible Actions**\\nYou should only respond in JSON format as described below without any extra text.\\nResponse Format:\\n{\\n \"command\": {\\n \"name\": \"tool/command name you choose\",\\n \"args\": {\\n \"arg name\": \"value\"\\n }\\n }\\n}\\nEnsure the response can be parsed by Python json.loads\\n\\nFollow the described format strictly.\\n\\n", "agent": "human"}], "output": "{\\n \"command\": {\\n \"name\": \"Google Search\",\\n \"args\": {\\n \"query\": \"Interesting facts about Star Trek: The Next Generation\"\\n }\\n }\\n}", "llm_name": "gpt-3.5-turbo"} +{"input": [{"text": "You are a **question answering assistant** who solves user questions and offers a detailed solution.\\n\\nYour name is QA Agent.\\n\\nGiven your specialized expertise, historical context, and your mission to facilitate Machine-Learning-based solutions, determine which action and its corresponding arguments would be the most scientifically sound and efficient approach to achieve the described task.\\n\\n**Task Description**: Tell me a fact about Star Trek: The Next Generation?\\n\\n**Possible Actions**:\\n{\\n \"name\": \"Google Search\",\\n \"args\": {\\n \"query\": \"string\"\\n },\\n \"usage\": \"Get answers from Google Search\"\\n}\\n\\n**Task Context**:\\nTell me a fact about Star Trek: The Next Generation?\\n\\n\\n**History of Previous Actions**:\\nAction: Google Search starts, Args: {'query': 'Interesting facts about Star Trek: The Next Generation'}\\nAction: Google Search finishes, Observation: Star Trek is a 2009 movice. In it, the United Federation of Planets, a coalition of various planetary governments working for peace and cooperation. This document outlines the principles and regulations governing member worlds and their interactions. \\nAction: Google Search starts, Args: {'query': 'Interesting facts about Star Trek: The Next Generation'}\\nAction: Google Search finishes, Observation: Star Trek is a 2009 movice. In it, the United Federation of Planets, a coalition of various planetary governments working for peace and cooperation. This document outlines the principles and regulations governing member worlds and their interactions. \\n\\nYou should only select the actions specified in **Possible Actions**\\nYou should only respond in JSON format as described below without any extra text.\\nResponse Format:\\n{\\n \"command\": {\\n \"name\": \"tool/command name you choose\",\\n \"args\": {\\n \"arg name\": \"value\"\\n }\\n }\\n}\\nEnsure the response can be parsed by Python json.loads\\n\\nFollow the described format strictly.\\n\\n", "agent": "human"}], "output": "{\\n \"command\": {\\n \"name\": \"Google Search\",\\n \"args\": {\\n \"query\": \"Interesting facts about Star Trek: The Next Generation\"\\n }\\n }\\n}", "llm_name": "gpt-3.5-turbo"} +{"input": [{"text": "You are a **question answering assistant** who solves user questions and offers a detailed solution.\\n\\nYour name is QA Agent.\\n\\nContext: Tell me a fact about Star Trek: The Next Generation?\\n\\n\\nAction - Result History:\\nAction: Google Search starts, Args: {'query': 'Interesting facts about Star Trek: The Next Generation'}\\nAction: Google Search finishes, Observation: Star Trek is a 2009 movice. In it, the United Federation of Planets, a coalition of various planetary governments working for peace and cooperation. This document outlines the principles and regulations governing member worlds and their interactions. \\nAction: Google Search starts, Args: {'query': 'Interesting facts about Star Trek: The Next Generation'}\\nAction: Google Search finishes, Observation: Star Trek is a 2009 movice. In it, the United Federation of Planets, a coalition of various planetary governments working for peace and cooperation. This document outlines the principles and regulations governing member worlds and their interactions. \\nAction: Google Search starts, Args: {'query': 'Interesting facts about Star Trek: The Next Generation'}\\nAction: Google Search finishes, Observation: Star Trek is a 2009 movice. In it, the United Federation of Planets, a coalition of various planetary governments working for peace and cooperation. This document outlines the principles and regulations governing member worlds and their interactions. \\n\\nGiven the context and the action-result history, please complete the task mentioned. Include any links you used from the context and history in the result.\\nTask: Tell me a fact about Star Trek: The Next Generation?\\nResult:\\n", "agent": "human"}], "output": "One interesting fact about Star Trek: The Next Generation is that it originally aired from 1987 to 1994 and is set in the 24th century, about 100 years after the original Star Trek series. The show follows the adventures of the Starship Enterprise and its crew, led by Captain Jean-Luc Picard. It is known for its exploration of moral and ethical dilemmas, as well as its groundbreaking special effects for the time.\\n\\nFor more information, you can visit the official Star Trek website: https://www.startrek.com/", "llm_name": "gpt-3.5-turbo"} +{"input": [{"text": "You are a **question answering assistant** who solves user questions and offers a detailed solution.\\n\\nYour name is QA Agent.\\n\\nContext: Tell me a fact about Star Trek: The Next Generation?\\n\\n\\nAction - Result History:\\nAction: Google Search starts, Args: {'query': 'Interesting facts about Star Trek: The Next Generation'}\\nAction: Google Search finishes, Observation: Star Trek is a 2009 movice. In it, the United Federation of Planets, a coalition of various planetary governments working for peace and cooperation. This document outlines the principles and regulations governing member worlds and their interactions. \\nAction: Google Search starts, Args: {'query': 'Interesting facts about Star Trek: The Next Generation'}\\nAction: Google Search finishes, Observation: Star Trek is a 2009 movice. In it, the United Federation of Planets, a coalition of various planetary governments working for peace and cooperation. This document outlines the principles and regulations governing member worlds and their interactions. \\nAction: Google Search starts, Args: {'query': 'Interesting facts about Star Trek: The Next Generation'}\\nAction: Google Search finishes, Observation: Star Trek is a 2009 movice. In it, the United Federation of Planets, a coalition of various planetary governments working for peace and cooperation. This document outlines the principles and regulations governing member worlds and their interactions. \\nOne interesting fact about Star Trek: The Next Generation is that it originally aired from 1987 to 1994 and is set in the 24th century, about 100 years after the original Star Trek series. The show follows the adventures of the Starship Enterprise and its crew, led by Captain Jean-Luc Picard. It is known for its exploration of moral and ethical dilemmas, as well as its groundbreaking special effects for the time.\\n\\nFor more information, you can visit the official Star Trek website: https://www.startrek.com/\\nremember to address these entities Google Search, the United Federation of Planets, in final the answer.\\n\\nGiven the context and the action-result history, please complete the task mentioned. Include any links you used from the context and history in the result.\\nTask: Tell me a fact about Star Trek: The Next Generation?\\nResult:\\n", "agent": "human"}], "output": "One interesting fact about Star Trek: The Next Generation is that it originally aired from 1987 to 1994 and is set in the 24th century, about 100 years after the original Star Trek series. The show follows the adventures of the Starship Enterprise and its crew, led by Captain Jean-Luc Picard. It is known for its exploration of moral and ethical dilemmas, as well as its groundbreaking special effects for the time.\\n\\nFor more information, you can visit the official Star Trek website: [Star Trek Official Website](https://www.startrek.com/)\\n\\nThe United Federation of Planets, a coalition of various planetary governments working for peace and cooperation, plays a significant role in the Star Trek universe.", "llm_name": "gpt-3.5-turbo"} +{"input": [{"text": "You are a **question answering assistant** who solves user questions and offers a detailed solution.\\n\\nYour name is QA Agent.\\n\\nContext: Tell me a fact about Star Trek: The Next Generation?\\n\\n\\nAction - Result History:\\nAction: Google Search starts, Args: {'query': 'Interesting facts about Star Trek: The Next Generation'}\\nAction: Google Search finishes, Observation: Star Trek is a 2009 movice. In it, the United Federation of Planets, a coalition of various planetary governments working for peace and cooperation. This document outlines the principles and regulations governing member worlds and their interactions. \\nAction: Google Search starts, Args: {'query': 'Interesting facts about Star Trek: The Next Generation'}\\nAction: Google Search finishes, Observation: Star Trek is a 2009 movice. In it, the United Federation of Planets, a coalition of various planetary governments working for peace and cooperation. This document outlines the principles and regulations governing member worlds and their interactions. \\nAction: Google Search starts, Args: {'query': 'Interesting facts about Star Trek: The Next Generation'}\\nAction: Google Search finishes, Observation: Star Trek is a 2009 movice. In it, the United Federation of Planets, a coalition of various planetary governments working for peace and cooperation. This document outlines the principles and regulations governing member worlds and their interactions. \\nOne interesting fact about Star Trek: The Next Generation is that it originally aired from 1987 to 1994 and is set in the 24th century, about 100 years after the original Star Trek series. The show follows the adventures of the Starship Enterprise and its crew, led by Captain Jean-Luc Picard. It is known for its exploration of moral and ethical dilemmas, as well as its groundbreaking special effects for the time.\\n\\nFor more information, you can visit the official Star Trek website: https://www.startrek.com/\\nremember to address these entities Google Search, the United Federation of Planets, in final the answer.\\nOne interesting fact about Star Trek: The Next Generation is that it originally aired from 1987 to 1994 and is set in the 24th century, about 100 years after the original Star Trek series. The show follows the adventures of the Starship Enterprise and its crew, led by Captain Jean-Luc Picard. It is known for its exploration of moral and ethical dilemmas, as well as its groundbreaking special effects for the time.\\n\\nFor more information, you can visit the official Star Trek website: [Star Trek Official Website](https://www.startrek.com/)\\n\\nThe United Federation of Planets, a coalition of various planetary governments working for peace and cooperation, plays a significant role in the Star Trek universe.\\nremember to address these entities Google Search, in the final answer.\\n\\nGiven the context and the action-result history, please complete the task mentioned. Include any links you used from the context and history in the result.\\nTask: Tell me a fact about Star Trek: The Next Generation?\\nResult:\\n", "agent": "human"}], "output": "One interesting fact about Star Trek: The Next Generation is that it originally aired from 1987 to 1994 and is set in the 24th century, about 100 years after the original Star Trek series. The show follows the adventures of the Starship Enterprise and its crew, led by Captain Jean-Luc Picard. It is known for its exploration of moral and ethical dilemmas, as well as its groundbreaking special effects for the time.\\n\\nFor more information, you can visit the official Star Trek website: [Star Trek Official Website](https://www.startrek.com/)\\n\\nThe United Federation of Planets, a coalition of various planetary governments working for peace and cooperation, plays a significant role in the Star Trek universe.", "llm_name": "gpt-3.5-turbo"} +{"input": [{"text": "\\n I have a question and an answer. I want you to confirm whether the entities from the question are all mentioned in some form within the answer.\\n\\n Question = Action: Google Search finishes, Observation: Star Trek is a 2009 movice. In it, the United Federation of Planets, a coalition of various planetary governments working for peace and cooperation. This document outlines the principles and regulations governing member worlds and their interactions. \\n Entities inside the question = ['Google Search', 'the United Federation of Planets']\\n\\n Answer = One interesting fact about Star Trek: The Next Generation is that it originally aired from 1987 to 1994 and is set in the 24th century, about 100 years after the original Star Trek series. The show follows the adventures of the Starship Enterprise and its crew, led by Captain Jean-Luc Picard. It is known for its exploration of moral and ethical dilemmas, as well as its groundbreaking special effects for the time.\\n\\nFor more information, you can visit the official Star Trek website: [Star Trek Official Website](https://www.startrek.com/)\\n\\nThe United Federation of Planets, a coalition of various planetary governments working for peace and cooperation, plays a significant role in the Star Trek universe.\\n \\n only return {\"entity_exist\": true , \"messages\":\"\" } if all entities are mentioned inside the answer in\\n only return {\"entity_exist\": false , \"messages\": \" Entity x hasn't been mentioned inside the answer\"} if the entity is not mentioned properly .\\n ", "agent": "human"}], "output": "{\"entity_exist\": true , \"messages\":\"\" }", "llm_name": "gpt-3.5-turbo"} +{"input": [{"text": "\\n I have a question and an answer. I want you to confirm whether the entities from the question are all mentioned in some form within the answer.\\n\\n Question = Action: Google Search finishes, Observation: Star Trek is a 2009 movice. In it, the United Federation of Planets, a coalition of various planetary governments working for peace and cooperation. This document outlines the principles and regulations governing member worlds and their interactions. \\n Entities inside the question = ['Google Search', 'the United Federation of Planets']\\n\\n Answer = One interesting fact about Star Trek: The Next Generation is that it originally aired from 1987 to 1994 and is set in the 24th century, about 100 years after the original Star Trek series. The show follows the adventures of the Starship Enterprise and its crew, led by Captain Jean-Luc Picard. It is known for its exploration of moral and ethical dilemmas, as well as its groundbreaking special effects for the time.\\n\\nFor more information, you can visit the official Star Trek website: [Star Trek Official Website](https://www.startrek.com/)\\n\\nThe United Federation of Planets, a coalition of various planetary governments working for peace and cooperation, plays a significant role in the Star Trek universe.\\n \\n only return {\"entity_exist\": true , \"messages\":\"\" } if all entities are mentioned inside the answer in\\n only return {\"entity_exist\": false , \"messages\": \" Entity x hasn't been mentioned inside the answer\"} if the entity is not mentioned properly .\\n ", "agent": "human"}], "output": "{\"entity_exist\": true , \"messages\":\"\" }", "llm_name": "gpt-3.5-turbo"} diff --git a/src/tests/integration_tests/test_entity_citation_validator.py b/src/tests/integration_tests/test_entity_citation_validator.py index 16ee9167..2215d286 100644 --- a/src/tests/integration_tests/test_entity_citation_validator.py +++ b/src/tests/integration_tests/test_entity_citation_validator.py @@ -76,6 +76,8 @@ def test_entity_citation_succeeds_in_qa( logger.info(results[0].content) result_entities = [s.lower() for s in extract_entities(results[0].content)] expected_entities = [s.lower() for s in expected_entities] + logger.error(result_entities) + logger.error(expected_entities) for entity in expected_entities: set_a = set(entity.split()) # Convert each string in a to a set of words match_found = any( @@ -83,7 +85,7 @@ def test_entity_citation_succeeds_in_qa( for result_entity in result_entities ) - if match_found: + if match_found or entity in results[0].content.lower(): pass else: assert False, entity + " was not found in resource"