From eda92ca1898a75c91b1d2f8a4d93230327f98496 Mon Sep 17 00:00:00 2001 From: Ferran Llamas Date: Fri, 23 Aug 2024 17:39:13 +0200 Subject: [PATCH] Be able to log payloads from specific kbids for debugging purposes (#2412) --- nucliadb/src/nucliadb/search/api/v1/ask.py | 2 ++ nucliadb/src/nucliadb/search/api/v1/find.py | 3 ++- nucliadb/src/nucliadb/search/api/v1/search.py | 2 ++ nucliadb/src/nucliadb/search/search/utils.py | 15 ++++++++++++ .../search/unit/search/search/test_utils.py | 24 +++++++++++++++++++ nucliadb_utils/src/nucliadb_utils/const.py | 1 + .../src/nucliadb_utils/featureflagging.py | 8 +++++++ 7 files changed, 54 insertions(+), 1 deletion(-) diff --git a/nucliadb/src/nucliadb/search/api/v1/ask.py b/nucliadb/src/nucliadb/search/api/v1/ask.py index 67b452b0a0..f6f8d08c4b 100644 --- a/nucliadb/src/nucliadb/search/api/v1/ask.py +++ b/nucliadb/src/nucliadb/search/api/v1/ask.py @@ -26,6 +26,7 @@ from nucliadb.models.responses import HTTPClientError from nucliadb.search.api.v1.router import KB_PREFIX, api from nucliadb.search.search.chat.ask import AskResult, ask, handled_ask_exceptions +from nucliadb.search.search.utils import maybe_log_request_payload from nucliadb_models.resource import NucliaDBRoles from nucliadb_models.search import ( AskRequest, @@ -74,6 +75,7 @@ async def create_ask_response( x_synchronous: bool, resource: Optional[str] = None, ) -> Response: + maybe_log_request_payload(kbid, "/ask", ask_request) ask_request.max_tokens = parse_max_tokens(ask_request.max_tokens) ask_result: AskResult = await ask( kbid=kbid, diff --git a/nucliadb/src/nucliadb/search/api/v1/find.py b/nucliadb/src/nucliadb/search/api/v1/find.py index f61a1eb9d7..2cac1a6d9d 100644 --- a/nucliadb/src/nucliadb/search/api/v1/find.py +++ b/nucliadb/src/nucliadb/search/api/v1/find.py @@ -32,7 +32,7 @@ from nucliadb.search.api.v1.utils import fastapi_query from nucliadb.search.search.exceptions import InvalidQueryError from nucliadb.search.search.find import find -from nucliadb.search.search.utils import min_score_from_query_params +from nucliadb.search.search.utils import maybe_log_request_payload, min_score_from_query_params from nucliadb_models.common import FieldTypeName from nucliadb_models.resource import ExtractedDataTypeName, NucliaDBRoles from nucliadb_models.search import ( @@ -191,6 +191,7 @@ async def _find_endpoint( x_forwarded_for: str, ) -> Union[KnowledgeboxFindResults, HTTPClientError]: try: + maybe_log_request_payload(kbid, "/find", item) results, incomplete, _ = await find(kbid, item, x_ndb_client, x_nucliadb_user, x_forwarded_for) response.status_code = 206 if incomplete else 200 return results diff --git a/nucliadb/src/nucliadb/search/api/v1/search.py b/nucliadb/src/nucliadb/search/api/v1/search.py index b432256acf..ebd27a5665 100644 --- a/nucliadb/src/nucliadb/search/api/v1/search.py +++ b/nucliadb/src/nucliadb/search/api/v1/search.py @@ -37,6 +37,7 @@ from nucliadb.search.search.pgcatalog import pgcatalog_enabled, pgcatalog_search from nucliadb.search.search.query import QueryParser from nucliadb.search.search.utils import ( + maybe_log_request_payload, min_score_from_payload, min_score_from_query_params, should_disable_vector_search, @@ -280,6 +281,7 @@ async def catalog( returns bm25 results on titles and it does not support vector search. It is useful for listing resources in a knowledge box. """ + maybe_log_request_payload(kbid, "/catalog", item) start_time = time() try: sort = item.sort diff --git a/nucliadb/src/nucliadb/search/search/utils.py b/nucliadb/src/nucliadb/search/search/utils.py index 539b1e0e5f..22d71e3d33 100644 --- a/nucliadb/src/nucliadb/search/search/utils.py +++ b/nucliadb/src/nucliadb/search/search/utils.py @@ -17,9 +17,16 @@ # You should have received a copy of the GNU Affero General Public License # along with this program. If not, see . # +import logging from typing import Optional, Union +from pydantic import BaseModel + from nucliadb_models.search import BaseSearchRequest, MinScore +from nucliadb_utils import const +from nucliadb_utils.utilities import has_feature + +logger = logging.getLogger(__name__) def is_empty_query(request: BaseSearchRequest) -> bool: @@ -70,3 +77,11 @@ def min_score_from_payload(min_score: Optional[Union[float, MinScore]]) -> MinSc elif isinstance(min_score, float): return MinScore(bm25=0, semantic=min_score) return min_score + + +def maybe_log_request_payload(kbid: str, endpoint: str, item: BaseModel): + if has_feature(const.Features.LOG_REQUEST_PAYLOADS, context={"kbid": kbid}, default=False): + logger.info( + "Request payload", + extra={"kbid": kbid, "endpoint": endpoint, "payload": item.model_dump_json()}, + ) diff --git a/nucliadb/tests/search/unit/search/search/test_utils.py b/nucliadb/tests/search/unit/search/search/test_utils.py index 9c721ebc72..747893146d 100644 --- a/nucliadb/tests/search/unit/search/search/test_utils.py +++ b/nucliadb/tests/search/unit/search/search/test_utils.py @@ -17,12 +17,15 @@ # You should have received a copy of the GNU Affero General Public License # along with this program. If not, see . # +import unittest + import pytest from nucliadb.search.search.utils import ( has_user_vectors, is_empty_query, is_exact_match_only_query, + maybe_log_request_payload, should_disable_vector_search, ) from nucliadb_models.search import SearchRequest @@ -80,3 +83,24 @@ def test_has_user_vectors(item, has_vectors): ) def test_should_disable_vectors(item, disable_vectors): assert should_disable_vector_search(item) is disable_vectors + + +def test_maybe_log_request_payload(): + with unittest.mock.patch("nucliadb.search.search.utils.logger") as mock_logger: + with unittest.mock.patch("nucliadb.search.search.utils.has_feature") as mock_feature: + mock_feature.return_value = True + maybe_log_request_payload( + "kbid", + "/endpoint", + SearchRequest(query="query", vector=[1.0, 2.0]), + ) + assert mock_logger.info.call_count == 1 + + mock_feature.return_value = False + + maybe_log_request_payload( + "kbid", + "/endpoint", + SearchRequest(query="query", vector=[1.0, 2.0]), + ) + assert mock_logger.info.call_count == 1 diff --git a/nucliadb_utils/src/nucliadb_utils/const.py b/nucliadb_utils/src/nucliadb_utils/const.py index e9a4a5d5ee..c9178cdd4e 100644 --- a/nucliadb_utils/src/nucliadb_utils/const.py +++ b/nucliadb_utils/src/nucliadb_utils/const.py @@ -85,3 +85,4 @@ class Features: SKIP_EXTERNAL_INDEX = "nucliadb_skip_external_index" NATS_SYNC_ACK = "nucliadb_nats_sync_ack" DEPRECATED_CHAT_ENABLED = "nucliadb_deprecated_chat_enabled" + LOG_REQUEST_PAYLOADS = "nucliadb_log_request_payloads" diff --git a/nucliadb_utils/src/nucliadb_utils/featureflagging.py b/nucliadb_utils/src/nucliadb_utils/featureflagging.py index 489ae7d4c1..a366b68b90 100644 --- a/nucliadb_utils/src/nucliadb_utils/featureflagging.py +++ b/nucliadb_utils/src/nucliadb_utils/featureflagging.py @@ -85,6 +85,14 @@ class Settings(pydantic_settings.BaseSettings): "rollout": 0, "variants": {"environment": ["local"]}, }, + const.Features.LOG_REQUEST_PAYLOADS: { + "rollout": 0, + "variants": {"environment": ["none"]}, + }, + const.Features.DEPRECATED_CHAT_ENABLED: { + "rollout": 0, + "variants": {"environment": ["none"]}, + }, }