nuclia · javitonino · Jul 26, 2024 · Jul 26, 2024 · Jul 26, 2024
diff --git a/nucliadb/src/nucliadb/ingest/orm/processor/pgcatalog.py b/nucliadb/src/nucliadb/ingest/orm/processor/pgcatalog.py
@@ -23,11 +23,14 @@
 from nucliadb.common.maindb.driver import Transaction
 from nucliadb.common.maindb.pg import PGDriver, PGTransaction
 from nucliadb.common.maindb.utils import get_driver
+from nucliadb_telemetry import metrics
 from nucliadb_utils import const
 from nucliadb_utils.utilities import has_feature
 
 from ..resource import Resource
 
+observer = metrics.Observer("pg_catalog_write", labels={"type": ""})
+
 
 def _pg_transaction(txn: Transaction) -> PGTransaction:
     return cast(PGTransaction, txn)
@@ -39,6 +42,7 @@ def pgcatalog_enabled(kbid):
     )
 
 
+@observer.wrap({"type": "update"})
 async def pgcatalog_update(txn: Transaction, kbid: str, resource: Resource):
     if not pgcatalog_enabled(kbid):
         return
@@ -69,6 +73,7 @@ async def pgcatalog_update(txn: Transaction, kbid: str, resource: Resource):
         )
 
 
+@observer.wrap({"type": "delete"})
 async def pgcatalog_delete(txn: Transaction, kbid: str, rid: str):
     if not pgcatalog_enabled(kbid):
         return

diff --git a/nucliadb/src/nucliadb/search/api/v1/search.py b/nucliadb/src/nucliadb/search/api/v1/search.py
@@ -34,7 +34,7 @@
 from nucliadb.search.api.v1.utils import fastapi_query
 from nucliadb.search.requesters.utils import Method, debug_nodes_info, node_query
 from nucliadb.search.search.exceptions import InvalidQueryError
-from nucliadb.search.search.merge import merge_results
+from nucliadb.search.search.merge import fetch_resources, merge_results
 from nucliadb.search.search.pgcatalog import pgcatalog_enabled, pgcatalog_search
 from nucliadb.search.search.query import QueryParser
 from nucliadb.search.search.utils import (
@@ -328,26 +328,33 @@ async def catalog(
                 # consistent and most up to date results
                 use_read_replica_nodes=False,
             )
+
+            # We need to merge
+            search_results = await merge_results(
+                results,
+                count=item.page_size,
+                page=item.page_number,
+                kbid=kbid,
+                show=[ResourceProperties.BASIC],
+                field_type_filter=[],
+                extracted=[],
+                sort=sort,
+                requested_relations=pb_query.relation_subgraph,
+                min_score=query_parser.min_score,
+                highlight=False,
+            )
         else:
-            result = await pgcatalog_search(query_parser)
-            results = [result]
-            item.page_number = 0
+            search_results = KnowledgeboxSearchResults()
+            search_results.fulltext = await pgcatalog_search(query_parser)
+            search_results.resources = await fetch_resources(
+                resources=[r.rid for r in search_results.fulltext.results],
+                kbid=kbid,
+                show=[ResourceProperties.BASIC],
+                field_type_filter=[],
+                extracted=[],
+            )
             queried_nodes = []
 
-        # We need to merge
-        search_results = await merge_results(
-            results,
-            count=item.page_size,
-            page=item.page_number,
-            kbid=kbid,
-            show=[ResourceProperties.BASIC],
-            field_type_filter=[],
-            extracted=[],
-            sort=sort,
-            requested_relations=pb_query.relation_subgraph,
-            min_score=query_parser.min_score,
-            highlight=False,
-        )
         # We don't need sentences, paragraphs or relations on the catalog
         # response, so we set to None so that fastapi doesn't include them
         # in the response payload

diff --git a/nucliadb/src/nucliadb/search/search/pgcatalog.py b/nucliadb/src/nucliadb/search/search/pgcatalog.py
@@ -24,15 +24,15 @@
 
 from nucliadb.common.maindb.pg import PGDriver
 from nucliadb.common.maindb.utils import get_driver
+from nucliadb_models.labels import translate_system_to_alias_label
 from nucliadb_models.metadata import ResourceProcessingStatus
-from nucliadb_models.search import SortField, SortOrder
-from nucliadb_protos.nodereader_pb2 import (
-    DocumentResult,
-    DocumentSearchResponse,
-    FacetResult,
-    FacetResults,
-    SearchResponse,
+from nucliadb_models.search import (
+    ResourceResult,
+    Resources,
+    SortField,
+    SortOrder,
 )
+from nucliadb_telemetry import metrics
 from nucliadb_utils import const
 from nucliadb_utils.utilities import has_feature
 
@@ -150,17 +150,18 @@ def pgcatalog_enabled(kbid):
     )
 
 
-async def pgcatalog_search(query_parser: QueryParser):
+@metrics.Observer("pg_catalog_search").wrap()
+async def pgcatalog_search(query_parser: QueryParser) -> Resources:
     # Prepare SQL query
     query, query_params = _prepare_query(query_parser)
 
     async with _pg_driver()._get_connection() as conn, conn.cursor(row_factory=dict_row) as cur:
-        facets: dict[str, FacetResults] = {}
+        facets = {}
 
         # Faceted search
         if query_parser.faceted:
-            tmp_facets: dict[str, list[FacetResult]] = {
-                translate_label(f): [] for f in query_parser.faceted
+            tmp_facets: dict[str, dict[str, int]] = {
+                translate_label(f): {} for f in query_parser.faceted
             }
             await cur.execute(
                 f"SELECT unnest(labels) AS label, COUNT(*) FROM ({query}) fc GROUP BY 1 ORDER BY 1",
@@ -171,9 +172,9 @@ async def pgcatalog_search(query_parser: QueryParser):
                 parent = "/".join(label.split("/")[:-1])
                 count = row["count"]
                 if parent in tmp_facets:
-                    tmp_facets[parent].append(FacetResult(tag=label, total=count))
+                    tmp_facets[parent][translate_system_to_alias_label(label)] = count
 
-            facets = {k: FacetResults(facetresults=v) for k, v in tmp_facets.items()}
+            facets = {translate_system_to_alias_label(k): v for k, v in tmp_facets.items()}
 
         # Totals
         await cur.execute(
@@ -194,14 +195,22 @@ async def pgcatalog_search(query_parser: QueryParser):
         )
         data = await cur.fetchall()
 
-    return SearchResponse(
-        document=DocumentSearchResponse(
-            results=[
-                DocumentResult(uuid=str(r["rid"]).replace("-", ""), field="/a/title") for r in data
-            ],
-            facets=facets,
-            total=total,
-            page_number=query_parser.page_number,
-            next_page=(offset + len(data) < total),
-        )
+    return Resources(
+        facets=facets,
+        results=[
+            ResourceResult(
+                rid=str(r["rid"]).replace("-", ""),
+                field="title",
+                field_type="a",
+                labels=[label for label in r["labels"] if label.startswith("/l/")],
+                score=0,
+            )
+            for r in data
+        ],
+        query=query_parser.query,
+        total=total,
+        page_number=query_parser.page_number,
+        page_size=query_parser.page_size,
+        next_page=(offset + len(data) < total),
+        min_score=0,
     )
diff --git a/nucliadb/tests/nucliadb/integration/search/test_search.py b/nucliadb/tests/nucliadb/integration/search/test_search.py
@@ -1618,6 +1618,7 @@ async def test_catalog_pagination(
         assert resp.status_code == 200
         body = resp.json()
         assert len(body["resources"]) <= page_size
+        assert body["fulltext"]["page_number"] == page_number
         for resource_id, resource_data in body["resources"].items():
             resource_created_date = datetime.fromisoformat(resource_data["created"]).timestamp()
             if resource_id in resource_uuids: