Skip to content

Commit

Permalink
Remove deprecated features from search (#2618)
Browse files Browse the repository at this point in the history
* Remove deprecated search features

* Remove tests

* Properly annotate ignore E501 for long lines

* Fix tests using hardcoded features

* Fix more tests

* Fix more tests
  • Loading branch information
jotare authored Nov 11, 2024
1 parent 50d375e commit 2591ec8
Show file tree
Hide file tree
Showing 13 changed files with 71 additions and 146 deletions.
4 changes: 2 additions & 2 deletions nucliadb/tests/nucliadb/integration/search/test_filters.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@
from nucliadb.common.context import ApplicationContext
from nucliadb.tests.vectors import V1, V2, Q
from nucliadb_models.labels import Label, LabelSetKind
from nucliadb_models.search import MinScore
from nucliadb_models.search import MinScore, SearchOptions
from nucliadb_protos.resources_pb2 import (
Classification,
ExtractedTextWrapper,
Expand Down Expand Up @@ -351,7 +351,7 @@ async def _test_filtering(nucliadb_reader: AsyncClient, kbid: str, filters):
json=dict(
query="",
filters=filters,
features=["paragraph", "vector"],
features=[SearchOptions.KEYWORD, SearchOptions.SEMANTIC],
vector=Q,
min_score=MinScore(semantic=-1).model_dump(),
),
Expand Down
11 changes: 6 additions & 5 deletions nucliadb/tests/nucliadb/integration/search/test_search.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@
from nucliadb.ingest.consumer import shard_creator
from nucliadb.search.predict import SendToPredictError
from nucliadb.tests.vectors import V1
from nucliadb_models.search import SearchOptions
from nucliadb_protos import resources_pb2 as rpb
from nucliadb_protos.audit_pb2 import AuditRequest, ClientType
from nucliadb_protos.utils_pb2 import RelationNode
Expand Down Expand Up @@ -973,8 +974,8 @@ async def test_search_pagination(
page_size = 5

for feature, result_key in [
("paragraph", "paragraphs"),
("document", "fulltext"),
(SearchOptions.KEYWORD.value, "paragraphs"),
(SearchOptions.FULLTEXT.value, "fulltext"),
]:
total_pages = math.floor(total / page_size)
for page_number in range(0, total_pages):
Expand Down Expand Up @@ -1069,7 +1070,7 @@ async def test_resource_search_pagination(
f"/kb/{kbid}/resource/{rid}/search",
params={
"query": query,
"features": ["paragraph"],
"features": [SearchOptions.KEYWORD],
"page_number": page_number,
"page_size": page_size,
},
Expand All @@ -1083,7 +1084,7 @@ async def test_resource_search_pagination(
f"/kb/{kbid}/resource/{rid}/search",
params={
"query": query,
"features": ["paragraph"],
"features": [SearchOptions.KEYWORD],
"page_number": page_number + 1,
"page_size": page_size,
},
Expand All @@ -1109,7 +1110,7 @@ async def test_search_endpoints_handle_predict_errors(
resp = await nucliadb_reader.post(
f"/kb/{kbid}/{endpoint}",
json={
"features": ["vector"],
"features": [SearchOptions.SEMANTIC],
"query": "something",
},
)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
from httpx import AsyncClient

from nucliadb.tests.vectors import V1
from nucliadb_models.search import SearchOptions
from tests.nucliadb.integration.search.test_search import get_resource_with_a_sentence
from tests.utils import inject_message

Expand Down Expand Up @@ -76,8 +77,8 @@ async def resource(nucliadb_grpc, knowledgebox):
@pytest.mark.parametrize(
"feature",
[
"paragraph",
"vector",
SearchOptions.KEYWORD,
SearchOptions.SEMANTIC,
],
)
async def test_search_with_date_range_filters_nucliadb_dates(
Expand Down Expand Up @@ -133,8 +134,8 @@ async def test_search_with_date_range_filters_nucliadb_dates(
@pytest.mark.parametrize(
"feature",
[
"paragraph",
"vector",
SearchOptions.KEYWORD,
SearchOptions.SEMANTIC,
],
)
async def test_search_with_date_range_filters_origin_dates(
Expand Down Expand Up @@ -188,7 +189,7 @@ async def _test_find_date_ranges(
found,
):
payload = {"query": "Ramon", "features": features}
if "vector" in features:
if SearchOptions.SEMANTIC in features:
payload["vector"] = V1
if creation_start is not None:
payload["range_creation_start"] = creation_start.isoformat()
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,8 @@
import pytest
from httpx import AsyncClient

from nucliadb_models.search import SearchOptions


@pytest.mark.asyncio
async def test_search_sort_by_score(
Expand Down Expand Up @@ -201,7 +203,7 @@ async def test_list_all_resources_by_creation_and_modification_dates_with_empty_
f"/kb/{kbid}/search",
params={
"query": "",
"features": ["document"],
"features": [SearchOptions.FULLTEXT.value],
"fields": ["a/title"],
"page_number": page_number,
"page_size": page_size,
Expand Down
5 changes: 3 additions & 2 deletions nucliadb/tests/nucliadb/integration/test_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@
)
from nucliadb_models import common, metadata
from nucliadb_models.resource import Resource
from nucliadb_models.search import SearchOptions
from nucliadb_protos import resources_pb2 as rpb
from nucliadb_protos import writer_pb2 as wpb
from nucliadb_protos.dataset_pb2 import TaskType, TrainSet
Expand Down Expand Up @@ -920,7 +921,7 @@ async def test_pagination_limits(
f"/kb/kbid/find",
json={
"query": "foo",
"features": ["vector"],
"features": [SearchOptions.SEMANTIC],
"page_size": 1000,
},
)
Expand All @@ -933,7 +934,7 @@ async def test_pagination_limits(
f"/kb/kbid/find",
json={
"query": "foo",
"features": ["vector"],
"features": [SearchOptions.SEMANTIC],
"page_number": 30,
"page_size": 100,
},
Expand Down
9 changes: 5 additions & 4 deletions nucliadb/tests/nucliadb/integration/test_deletion.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
import pytest
from httpx import AsyncClient

from nucliadb_models.search import SearchOptions
from nucliadb_protos.resources_pb2 import (
ExtractedTextWrapper,
ExtractedVectorsWrapper,
Expand Down Expand Up @@ -141,7 +142,7 @@ class FieldData:
f"/kb/{knowledgebox}/find",
json={
"query": "Original",
"features": ["paragraph"],
"features": [SearchOptions.KEYWORD],
"min_score": {"bm25": 0.0},
},
timeout=None,
Expand All @@ -155,7 +156,7 @@ class FieldData:
f"/kb/{knowledgebox}/find",
json={
"query": "Extracted",
"features": ["paragraph"],
"features": [SearchOptions.KEYWORD],
},
timeout=None,
)
Expand Down Expand Up @@ -233,7 +234,7 @@ class FieldData:
f"/kb/{knowledgebox}/find",
json={
"query": "Extracted",
"features": ["paragraph"],
"features": [SearchOptions.KEYWORD],
"min_score": {"bm25": 0.0},
},
timeout=None,
Expand All @@ -252,7 +253,7 @@ class FieldData:
f"/kb/{knowledgebox}/find",
json={
"query": "Modified",
"features": ["paragraph"],
"features": [SearchOptions.KEYWORD],
},
timeout=None,
)
Expand Down
7 changes: 4 additions & 3 deletions nucliadb/tests/nucliadb/integration/test_find.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
import pytest
from httpx import AsyncClient

from nucliadb_models.search import SearchOptions
from nucliadb_protos.writer_pb2_grpc import WriterStub
from nucliadb_utils.exceptions import LimitsExceededError

Expand Down Expand Up @@ -105,14 +106,14 @@ async def test_find_does_not_support_fulltext_search(
knowledgebox,
):
resp = await nucliadb_reader.get(
f"/kb/{knowledgebox}/find?query=title&features=document&features=paragraph",
f"/kb/{knowledgebox}/find?query=title&features=fulltext&features=keyword",
)
assert resp.status_code == 422
assert "fulltext search not supported" in resp.json()["detail"][0]["msg"]

resp = await nucliadb_reader.post(
f"/kb/{knowledgebox}/find",
json={"query": "title", "features": ["document", "paragraph"]},
json={"query": "title", "features": [SearchOptions.FULLTEXT, SearchOptions.KEYWORD]},
)
assert resp.status_code == 422
assert "fulltext search not supported" in resp.json()["detail"][0]["msg"]
Expand Down Expand Up @@ -244,7 +245,7 @@ async def test_story_7286(
f"/kb/{knowledgebox}/find",
json={
"query": "title",
"features": ["paragraph", "vector", "relations"],
"features": [SearchOptions.KEYWORD, SearchOptions.SEMANTIC, SearchOptions.RELATIONS],
"shards": [],
"highlight": True,
"autofilter": False,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@

from nucliadb.common.maindb.driver import Driver
from nucliadb.learning_proxy import LearningConfiguration
from nucliadb_models.search import SearchOptions
from nucliadb_protos import knowledgebox_pb2, resources_pb2, utils_pb2, writer_pb2
from nucliadb_protos.writer_pb2_grpc import WriterStub
from tests.utils import inject_message
Expand Down Expand Up @@ -127,7 +128,7 @@ async def test_matryoshka_embeddings(
f"/kb/{kbid}/search",
params={
"query": "matryoshka",
"features": ["vector"],
"features": [SearchOptions.SEMANTIC.value],
"min_score": 0.99999,
"with_duplicates": True,
},
Expand Down
4 changes: 3 additions & 1 deletion nucliadb/tests/nucliadb/integration/test_synonyms.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,8 @@
#
import pytest

from nucliadb_models.search import SearchOptions


@pytest.mark.asyncio
async def test_custom_synonyms_api(
Expand Down Expand Up @@ -197,7 +199,7 @@ async def test_search_errors_if_vectors_or_relations_requested(
resp = await nucliadb_reader.post(
f"/kb/{kbid}/search",
json=dict(
features=["paragraph", "vector", "relations"],
features=[SearchOptions.KEYWORD, SearchOptions.SEMANTIC, SearchOptions.RELATIONS],
query="planet",
with_synonyms=True,
),
Expand Down
Loading

0 comments on commit 2591ec8

Please sign in to comment.