diff --git a/nucliadb/src/nucliadb/search/api/v1/knowledgebox.py b/nucliadb/src/nucliadb/search/api/v1/knowledgebox.py index 25156f2f20..f900e5928b 100644 --- a/nucliadb/src/nucliadb/search/api/v1/knowledgebox.py +++ b/nucliadb/src/nucliadb/search/api/v1/knowledgebox.py @@ -37,10 +37,10 @@ from nucliadb.search.api.v1.utils import fastapi_query from nucliadb.search.search.shards import get_shard from nucliadb.search.settings import settings +from nucliadb_models.internal.shards import KnowledgeboxShards from nucliadb_models.resource import NucliaDBRoles from nucliadb_models.search import ( KnowledgeboxCounters, - KnowledgeboxShards, SearchParamDefaults, ) from nucliadb_protos.noderesources_pb2 import Shard diff --git a/nucliadb_models/src/nucliadb_models/internal/shards.py b/nucliadb_models/src/nucliadb_models/internal/shards.py new file mode 100644 index 0000000000..ceae4be23e --- /dev/null +++ b/nucliadb_models/src/nucliadb_models/internal/shards.py @@ -0,0 +1,95 @@ +# Copyright (C) 2021 Bosutech XXI S.L. +# +# nucliadb is offered under the AGPL v3.0 and as commercial software. +# For commercial licensing, contact us at info@nuclia.com. +# +# AGPL: +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as +# published by the Free Software Foundation, either version 3 of the +# License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see . +# +from enum import Enum +from typing import List, Type, TypeVar + +from google.protobuf.json_format import MessageToDict +from pydantic import BaseModel + +from nucliadb_protos.writer_pb2 import ShardObject as PBShardObject +from nucliadb_protos.writer_pb2 import Shards as PBShards + +_T = TypeVar("_T") + + +class DocumentServiceEnum(str, Enum): + DOCUMENT_V0 = "DOCUMENT_V0" + DOCUMENT_V1 = "DOCUMENT_V1" + DOCUMENT_V2 = "DOCUMENT_V2" + + +class ParagraphServiceEnum(str, Enum): + PARAGRAPH_V0 = "PARAGRAPH_V0" + PARAGRAPH_V1 = "PARAGRAPH_V1" + PARAGRAPH_V2 = "PARAGRAPH_V2" + PARAGRAPH_V3 = "PARAGRAPH_V3" + + +class VectorServiceEnum(str, Enum): + VECTOR_V0 = "VECTOR_V0" + VECTOR_V1 = "VECTOR_V1" + + +class RelationServiceEnum(str, Enum): + RELATION_V0 = "RELATION_V0" + RELATION_V1 = "RELATION_V1" + RELATION_V2 = "RELATION_V2" + + +class ShardCreated(BaseModel): + id: str + document_service: DocumentServiceEnum + paragraph_service: ParagraphServiceEnum + vector_service: VectorServiceEnum + relation_service: RelationServiceEnum + + +class ShardReplica(BaseModel): + node: str + shard: ShardCreated + + +class ShardObject(BaseModel): + shard: str + replicas: List[ShardReplica] + + @classmethod + def from_message(cls: Type[_T], message: PBShardObject) -> _T: + return cls( + **MessageToDict( + message, + preserving_proto_field_name=True, + including_default_value_fields=True, + ) + ) + + +class KnowledgeboxShards(BaseModel): + kbid: str + shards: List[ShardObject] + + @classmethod + def from_message(cls: Type[_T], message: PBShards) -> _T: + as_dict = MessageToDict( + message, + preserving_proto_field_name=True, + including_default_value_fields=True, + ) + return cls(**as_dict) diff --git a/nucliadb_models/src/nucliadb_models/search.py b/nucliadb_models/src/nucliadb_models/search.py index b89a0c2b99..cd7e5da88e 100644 --- a/nucliadb_models/src/nucliadb_models/search.py +++ b/nucliadb_models/src/nucliadb_models/search.py @@ -19,9 +19,8 @@ # from dataclasses import dataclass from enum import Enum -from typing import Any, Dict, List, Literal, Optional, Set, Type, TypeVar, Union +from typing import Any, Dict, List, Literal, Optional, Set, TypeVar, Union -from google.protobuf.json_format import MessageToDict from pydantic import BaseModel, Field, field_validator, model_validator from pydantic.json_schema import SkipJsonSchema from typing_extensions import Annotated, Self @@ -31,16 +30,24 @@ from nucliadb_models.resource import ExtractedDataTypeName, Resource from nucliadb_models.security import RequestSecurity from nucliadb_models.utils import DateTime -from nucliadb_models.vectors import SemanticModelMetadata, VectorSimilarity from nucliadb_protos.audit_pb2 import ClientType from nucliadb_protos.nodereader_pb2 import DocumentScored, OrderBy from nucliadb_protos.nodereader_pb2 import ParagraphResult as PBParagraphResult from nucliadb_protos.utils_pb2 import RelationNode -from nucliadb_protos.writer_pb2 import ShardObject as PBShardObject -from nucliadb_protos.writer_pb2 import Shards as PBShards # Bw/c import to avoid breaking users from nucliadb_models.internal.predict import Ner, QueryInfo, SentenceSearch, TokenSearch # noqa isort: skip +from nucliadb_models.internal.shards import ( # noqa isort: skip + DocumentServiceEnum, + ParagraphServiceEnum, + VectorServiceEnum, + RelationServiceEnum, + ShardCreated, + ShardObject, + ShardReplica, + KnowledgeboxShards, +) + _T = TypeVar("_T") @@ -381,78 +388,6 @@ class KnowledgeBoxCount(BaseModel): sentences: int -class DocumentServiceEnum(str, Enum): - DOCUMENT_V0 = "DOCUMENT_V0" - DOCUMENT_V1 = "DOCUMENT_V1" - DOCUMENT_V2 = "DOCUMENT_V2" - - -class ParagraphServiceEnum(str, Enum): - PARAGRAPH_V0 = "PARAGRAPH_V0" - PARAGRAPH_V1 = "PARAGRAPH_V1" - PARAGRAPH_V2 = "PARAGRAPH_V2" - PARAGRAPH_V3 = "PARAGRAPH_V3" - - -class VectorServiceEnum(str, Enum): - VECTOR_V0 = "VECTOR_V0" - VECTOR_V1 = "VECTOR_V1" - - -class RelationServiceEnum(str, Enum): - RELATION_V0 = "RELATION_V0" - RELATION_V1 = "RELATION_V1" - RELATION_V2 = "RELATION_V2" - - -class ShardCreated(BaseModel): - id: str - document_service: DocumentServiceEnum - paragraph_service: ParagraphServiceEnum - vector_service: VectorServiceEnum - relation_service: RelationServiceEnum - - -class ShardReplica(BaseModel): - node: str - shard: ShardCreated - - -class ShardObject(BaseModel): - shard: str - replicas: List[ShardReplica] - - @classmethod - def from_message(cls: Type[_T], message: PBShardObject) -> _T: - return cls( - **MessageToDict( - message, - preserving_proto_field_name=True, - including_default_value_fields=True, - ) - ) - - -class KnowledgeboxShards(BaseModel): - kbid: str - actual: int - similarity: VectorSimilarity - shards: List[ShardObject] - model: Optional[SemanticModelMetadata] = None - - @classmethod - def from_message(cls: Type[_T], message: PBShards) -> _T: - as_dict = MessageToDict( - message, - preserving_proto_field_name=True, - including_default_value_fields=True, - ) - as_dict["similarity"] = VectorSimilarity.from_message(message.similarity) - if message.HasField("model"): - as_dict["model"] = SemanticModelMetadata.from_message(message.model) - return cls(**as_dict) - - class SearchParamDefaults: query = ParamDefault(default="", title="Query", description="The query to search for") suggest_query = ParamDefault(