From 241f6bb1e2eb1c78f9c9180ade5cd696c6ce497e Mon Sep 17 00:00:00 2001 From: Ferran Llamas Date: Fri, 26 Jul 2024 18:09:46 +0200 Subject: [PATCH] Do not log error when we are being optimisting about field presence on rag strategy prompt building --- .../src/nucliadb/search/search/chat/prompt.py | 7 ++++- .../src/nucliadb/search/search/paragraphs.py | 27 +++++++++++-------- 2 files changed, 22 insertions(+), 12 deletions(-) diff --git a/nucliadb/src/nucliadb/search/search/chat/prompt.py b/nucliadb/src/nucliadb/search/search/chat/prompt.py index b832f21f80..cb8f82b5cd 100644 --- a/nucliadb/src/nucliadb/search/search/chat/prompt.py +++ b/nucliadb/src/nucliadb/search/search/chat/prompt.py @@ -217,7 +217,10 @@ async def get_resource_field_extracted_text( try: field_type, field_key = field_id.strip("/").split("/") except ValueError: - logger.error(f"Invalid field id: {field_id}. Skipping getting extracted text.") + logger.info( + f"Invalid field id: {field_id}. Skipping getting extracted text.", + extra={"kbid": kb_obj.kbid}, + ) return None field = await resource.get_field(field_key, KB_REVERSE[field_type], load=False) if field is None: @@ -389,6 +392,7 @@ async def hierarchy_prompt_context( start=0, end=500, extracted_text_cache=etcache, + log_on_missing_field=False, ) summary_text = await paragraphs.get_paragraph_text( kbid=kbid, @@ -397,6 +401,7 @@ async def hierarchy_prompt_context( start=0, end=1000, extracted_text_cache=etcache, + log_on_missing_field=False, ) resources[rid] = ExtraCharsParagraph( title=title_text, diff --git a/nucliadb/src/nucliadb/search/search/paragraphs.py b/nucliadb/src/nucliadb/search/search/paragraphs.py index b62dce2932..0e1e1754b0 100644 --- a/nucliadb/src/nucliadb/search/search/paragraphs.py +++ b/nucliadb/src/nucliadb/search/search/paragraphs.py @@ -117,6 +117,7 @@ async def get_paragraph_from_full_text( end: int, split: Optional[str] = None, extracted_text_cache: Optional[ExtractedTextCache] = None, + log_on_missing_field: bool = True, ) -> str: """ Pull paragraph from full text stored in database. @@ -125,13 +126,14 @@ async def get_paragraph_from_full_text( """ extracted_text = await get_field_extracted_text(field, cache=extracted_text_cache) if extracted_text is None: - logger.warning( - "Extracted_text for field does not exist on DB. This should not happen.", - extra={ - "field_id": field.resource_unique_id, - "kbid": field.kbid, - }, - ) + if log_on_missing_field: + logger.warning( + "Extracted_text for field does not exist on DB. This should not happen.", + extra={ + "field_id": field.resource_unique_id, + "kbid": field.kbid, + }, + ) return "" if split not in (None, ""): @@ -156,14 +158,16 @@ async def get_paragraph_text( ResourceORM ] = None, # allow passing in orm_resource to avoid extra DB calls or txn issues extracted_text_cache: Optional[ExtractedTextCache] = None, + log_on_missing_field: bool = True, ) -> str: if orm_resource is None: orm_resource = await get_resource_from_cache(kbid, rid) if orm_resource is None: - logger.warning( - "Resource does not exist on DB. This should not happen.", - extra={"resource_id": rid, "kbid": kbid, "field": field}, - ) + if log_on_missing_field: + logger.warning( + "Resource does not exist on DB. This should not happen.", + extra={"resource_id": rid, "kbid": kbid, "field": field}, + ) return "" _, field_type, field = field.split("/") @@ -176,6 +180,7 @@ async def get_paragraph_text( end=end, split=split, extracted_text_cache=extracted_text_cache, + log_on_missing_field=log_on_missing_field, ) if highlight: