From db85e1c12b94861e99f3929eaa8f38a607a7cf09 Mon Sep 17 00:00:00 2001 From: Joan Antoni RE Date: Tue, 19 Dec 2023 13:10:40 +0100 Subject: [PATCH] Use new type annotation syntax (#1677) * Use new type annotation syntax * Update setup.py for packages with support for 3.8 * Run pre-checks with all supported python versions --- .github/workflows/nucliadb_dataset.yml | 2 +- .github/workflows/nucliadb_models.yml | 4 +- .github/workflows/nucliadb_sdk.yml | 2 +- .github/workflows/nucliadb_utils.yml | 2 +- .../common/cluster/grpc_node_dummy.py | 6 +- nucliadb/nucliadb/common/maindb/driver.py | 4 +- nucliadb/nucliadb/common/maindb/local.py | 8 +- nucliadb/nucliadb/common/maindb/pg.py | 8 +- nucliadb/nucliadb/common/maindb/redis.py | 10 +-- nucliadb/nucliadb/common/maindb/tikv.py | 4 +- nucliadb/nucliadb/ingest/consumer/pull.py | 4 +- nucliadb/nucliadb/ingest/fields/base.py | 10 +-- .../nucliadb/ingest/fields/conversation.py | 6 +- nucliadb/nucliadb/ingest/orm/entities.py | 20 ++--- nucliadb/nucliadb/ingest/orm/knowledgebox.py | 8 +- .../nucliadb/ingest/orm/processor/__init__.py | 10 +-- nucliadb/nucliadb/ingest/orm/resource.py | 22 +++--- nucliadb/nucliadb/ingest/processing.py | 16 ++-- nucliadb/nucliadb/ingest/serialize.py | 10 +-- nucliadb/nucliadb/ingest/settings.py | 6 +- nucliadb/nucliadb/ingest/tests/fixtures.py | 4 +- nucliadb/nucliadb/reader/api/models.py | 4 +- nucliadb/nucliadb/reader/api/v1/download.py | 4 +- nucliadb/nucliadb/reader/api/v1/resource.py | 40 +++++----- nucliadb/nucliadb/reader/tests/fixtures.py | 4 +- nucliadb/nucliadb/search/api/v1/find.py | 16 ++-- .../nucliadb/search/api/v1/knowledgebox.py | 6 +- .../nucliadb/search/api/v1/resource/search.py | 16 ++-- nucliadb/nucliadb/search/api/v1/search.py | 26 +++---- nucliadb/nucliadb/search/api/v1/suggest.py | 26 +++---- nucliadb/nucliadb/search/predict.py | 18 ++--- nucliadb/nucliadb/search/requesters/utils.py | 14 ++-- nucliadb/nucliadb/search/search/cache.py | 10 +-- nucliadb/nucliadb/search/search/chat/query.py | 10 +-- nucliadb/nucliadb/search/search/fetch.py | 24 +++--- nucliadb/nucliadb/search/search/find.py | 3 +- nucliadb/nucliadb/search/search/find_merge.py | 50 ++++++------- nucliadb/nucliadb/search/search/merge.py | 74 +++++++++---------- nucliadb/nucliadb/search/search/query.py | 46 ++++++------ nucliadb/nucliadb/search/tests/fixtures.py | 8 +- nucliadb/nucliadb/standalone/auth.py | 12 +-- .../tests/integration/test_configuration.py | 4 +- .../tests/integration/test_entities.py | 3 +- .../tests/integration/test_relations.py | 6 +- nucliadb/nucliadb/train/api/models.py | 4 +- .../train/generators/image_classifier.py | 12 +-- .../train/generators/sentence_classifier.py | 6 +- .../train/generators/token_classifier.py | 26 +++---- nucliadb/nucliadb/train/generators/utils.py | 8 +- nucliadb/nucliadb/train/nodes.py | 4 +- .../train/tests/test_token_classification.py | 3 +- nucliadb/nucliadb/train/upload.py | 8 +- nucliadb/nucliadb/writer/api/v1/field.py | 20 ++--- nucliadb/nucliadb/writer/api/v1/upload.py | 34 ++++----- nucliadb/nucliadb/writer/layouts/__init__.py | 4 +- nucliadb/nucliadb/writer/tests/fixtures.py | 6 +- nucliadb/nucliadb/writer/tests/test_files.py | 8 +- .../writer/tests/test_reprocess_file_field.py | 8 +- .../nucliadb/writer/tests/test_resources.py | 14 ++-- nucliadb/nucliadb/writer/tests/test_tus.py | 3 +- nucliadb/nucliadb/writer/tus/dm.py | 6 +- nucliadb/nucliadb/writer/tus/gcs.py | 4 +- nucliadb/nucliadb/writer/tus/s3.py | 4 +- nucliadb/nucliadb/writer/tus/storage.py | 4 +- nucliadb/nucliadb/writer/tus/utils.py | 3 +- nucliadb_client/setup.py | 1 + nucliadb_dataset/setup.py | 1 + nucliadb_sdk/setup.py | 1 + nucliadb_telemetry/setup.py | 1 + nucliadb_utils/setup.py | 1 + 70 files changed, 389 insertions(+), 395 deletions(-) diff --git a/.github/workflows/nucliadb_dataset.yml b/.github/workflows/nucliadb_dataset.yml index 52e5091ab1..eb49079040 100644 --- a/.github/workflows/nucliadb_dataset.yml +++ b/.github/workflows/nucliadb_dataset.yml @@ -16,7 +16,7 @@ jobs: pre-checks: strategy: matrix: - python-version: ["3.9", "3.10", "3.11"] + python-version: ["3.8", "3.9", "3.10", "3.11"] uses: ./.github/workflows/_component_prechecks.yml with: python_version: "${{ matrix.python-version }}" diff --git a/.github/workflows/nucliadb_models.yml b/.github/workflows/nucliadb_models.yml index cb9bfebb2b..b5efbe8adf 100644 --- a/.github/workflows/nucliadb_models.yml +++ b/.github/workflows/nucliadb_models.yml @@ -17,8 +17,8 @@ jobs: name: NucliaDBModelsTests strategy: matrix: - python-version: ["3.9", "3.10", "3.11"] + python-version: ["3.8", "3.9", "3.10", "3.11"] uses: ./.github/workflows/_component_prechecks.yml with: python_version: "${{ matrix.python-version }}" - component: "nucliadb_models" \ No newline at end of file + component: "nucliadb_models" diff --git a/.github/workflows/nucliadb_sdk.yml b/.github/workflows/nucliadb_sdk.yml index aada3a4379..c679772136 100644 --- a/.github/workflows/nucliadb_sdk.yml +++ b/.github/workflows/nucliadb_sdk.yml @@ -16,7 +16,7 @@ jobs: pre-checks: strategy: matrix: - python-version: ["3.9", "3.10", "3.11"] + python-version: ["3.8", "3.9", "3.10", "3.11"] uses: ./.github/workflows/_component_prechecks.yml with: python_version: "${{ matrix.python-version }}" diff --git a/.github/workflows/nucliadb_utils.yml b/.github/workflows/nucliadb_utils.yml index fe07df9ef7..cf1d629b84 100644 --- a/.github/workflows/nucliadb_utils.yml +++ b/.github/workflows/nucliadb_utils.yml @@ -17,7 +17,7 @@ jobs: pre-checks: strategy: matrix: - python-version: ["3.9", "3.10", "3.11"] + python-version: ["3.8", "3.9", "3.10", "3.11"] uses: ./.github/workflows/_component_prechecks.yml with: python_version: "${{ matrix.python-version }}" diff --git a/nucliadb/nucliadb/common/cluster/grpc_node_dummy.py b/nucliadb/nucliadb/common/cluster/grpc_node_dummy.py index 83d2f0f205..4bbde78f97 100644 --- a/nucliadb/nucliadb/common/cluster/grpc_node_dummy.py +++ b/nucliadb/nucliadb/common/cluster/grpc_node_dummy.py @@ -17,7 +17,7 @@ # You should have received a copy of the GNU Affero General Public License # along with this program. If not, see . # -from typing import Any, Dict, List +from typing import Any from nucliadb_protos.nodereader_pb2 import ( EdgeList, @@ -39,7 +39,7 @@ class DummyWriterStub: # pragma: no cover - calls: Dict[str, List[Any]] = {} + calls: dict[str, list[Any]] = {} async def NewShard(self, data): # pragma: no cover self.calls.setdefault("NewShard", []).append(data) @@ -90,7 +90,7 @@ async def GC(self, request: ShardId) -> EmptyResponse: # pragma: no cover class DummyReaderStub: # pragma: no cover - calls: Dict[str, List[Any]] = {} + calls: dict[str, list[Any]] = {} async def GetShard(self, data): # pragma: no cover self.calls.setdefault("GetShard", []).append(data) diff --git a/nucliadb/nucliadb/common/maindb/driver.py b/nucliadb/nucliadb/common/maindb/driver.py index c0e5cd2337..28ad63d9f4 100644 --- a/nucliadb/nucliadb/common/maindb/driver.py +++ b/nucliadb/nucliadb/common/maindb/driver.py @@ -21,7 +21,7 @@ import asyncio from contextlib import asynccontextmanager -from typing import AsyncGenerator, List, Optional +from typing import AsyncGenerator, Optional DEFAULT_SCAN_LIMIT = 10 DEFAULT_BATCH_SCAN_LIMIT = 500 @@ -60,7 +60,7 @@ async def count(self, match: str) -> int: class Driver: initialized = False - _abort_tasks: List[asyncio.Task] = [] + _abort_tasks: list[asyncio.Task] = [] async def initialize(self): raise NotImplementedError() diff --git a/nucliadb/nucliadb/common/maindb/local.py b/nucliadb/nucliadb/common/maindb/local.py index 7ca97ec6c5..63d5e1c0ac 100644 --- a/nucliadb/nucliadb/common/maindb/local.py +++ b/nucliadb/nucliadb/common/maindb/local.py @@ -19,7 +19,7 @@ # import glob import os -from typing import Dict, List, Optional +from typing import Optional from nucliadb.common.maindb.driver import ( DEFAULT_BATCH_SCAN_LIMIT, @@ -37,9 +37,9 @@ class LocalTransaction(Transaction): - modified_keys: Dict[str, bytes] - visited_keys: Dict[str, bytes] - deleted_keys: List[str] + modified_keys: dict[str, bytes] + visited_keys: dict[str, bytes] + deleted_keys: list[str] def __init__(self, url: str, driver: Driver): self.url = url diff --git a/nucliadb/nucliadb/common/maindb/pg.py b/nucliadb/nucliadb/common/maindb/pg.py index 9073e7df6d..decebd6907 100644 --- a/nucliadb/nucliadb/common/maindb/pg.py +++ b/nucliadb/nucliadb/common/maindb/pg.py @@ -20,7 +20,7 @@ from __future__ import annotations import asyncio -from typing import Any, AsyncGenerator, List, Optional, Union +from typing import Any, AsyncGenerator, Optional, Union import asyncpg import backoff @@ -69,7 +69,7 @@ async def delete(self, key: str) -> None: async with self.lock: await self.connection.execute("DELETE FROM resources WHERE key = $1", key) - async def batch_get(self, keys: List[str]) -> List[Optional[bytes]]: + async def batch_get(self, keys: list[str]) -> list[Optional[bytes]]: async with self.lock: records = { record["key"]: record["value"] @@ -146,7 +146,7 @@ async def commit(self): self.open = False await self.connection.close() - async def batch_get(self, keys: List[str]): + async def batch_get(self, keys: list[str]): return await self.data_layer.batch_get(keys) async def get(self, key: str) -> Optional[bytes]: @@ -189,7 +189,7 @@ async def abort(self): async def commit(self): ... - async def batch_get(self, keys: List[str]): + async def batch_get(self, keys: list[str]): return await DataLayer(self.pool).batch_get(keys) async def get(self, key: str) -> Optional[bytes]: diff --git a/nucliadb/nucliadb/common/maindb/redis.py b/nucliadb/nucliadb/common/maindb/redis.py index d33845248e..d409a59eb5 100644 --- a/nucliadb/nucliadb/common/maindb/redis.py +++ b/nucliadb/nucliadb/common/maindb/redis.py @@ -17,7 +17,7 @@ # You should have received a copy of the GNU Affero General Public License # along with this program. If not, see . # -from typing import Any, Dict, List, Optional +from typing import Any, Optional from nucliadb.common.maindb.driver import ( DEFAULT_BATCH_SCAN_LIMIT, @@ -35,9 +35,9 @@ class RedisTransaction(Transaction): - modified_keys: Dict[str, bytes] - visited_keys: Dict[str, bytes] - deleted_keys: List[str] + modified_keys: dict[str, bytes] + visited_keys: dict[str, bytes] + deleted_keys: list[str] def __init__(self, redis: Any, driver: Driver): self.redis = redis @@ -84,7 +84,7 @@ async def batch_get(self, keys: list[str]) -> list[Optional[bytes]]: if len(keys) == 0: return [] - bytes_keys: List[bytes] = [x.encode() for x in keys] + bytes_keys: list[bytes] = [x.encode() for x in keys] results = await self.redis.mget(bytes_keys) for idx, key in enumerate(keys): diff --git a/nucliadb/nucliadb/common/maindb/tikv.py b/nucliadb/nucliadb/common/maindb/tikv.py index cdd129ce21..74dfef2863 100644 --- a/nucliadb/nucliadb/common/maindb/tikv.py +++ b/nucliadb/nucliadb/common/maindb/tikv.py @@ -21,7 +21,7 @@ import asyncio import logging -from typing import Any, List, Optional +from typing import Any, Optional import backoff @@ -221,7 +221,7 @@ async def count(self, match: str) -> int: class TiKVDriver(Driver): tikv = None - def __init__(self, url: List[str]): + def __init__(self, url: list[str]): if TiKV is False: raise ImportError("TiKV is not installed") self.url = url diff --git a/nucliadb/nucliadb/ingest/consumer/pull.py b/nucliadb/nucliadb/ingest/consumer/pull.py index 51915e4624..c717ae5d28 100644 --- a/nucliadb/nucliadb/ingest/consumer/pull.py +++ b/nucliadb/nucliadb/ingest/consumer/pull.py @@ -19,7 +19,7 @@ # import asyncio import base64 -from typing import List, Optional +from typing import Optional import nats import nats.errors @@ -48,7 +48,7 @@ class PullWorker: The processing pull endpoint is also described as the "processing proxy" at times. """ - subscriptions: List[Subscription] + subscriptions: list[Subscription] def __init__( self, diff --git a/nucliadb/nucliadb/ingest/fields/base.py b/nucliadb/nucliadb/ingest/fields/base.py index ae667e31cc..7c3ae29973 100644 --- a/nucliadb/nucliadb/ingest/fields/base.py +++ b/nucliadb/nucliadb/ingest/fields/base.py @@ -21,7 +21,7 @@ import enum from datetime import datetime -from typing import Any, Dict, List, Optional, Tuple, Type +from typing import Any, Optional, Type from nucliadb_protos.resources_pb2 import ( CloudFile, @@ -285,7 +285,7 @@ async def get_extracted_text_cf(self) -> Optional[CloudFile]: async def set_vectors( self, payload: ExtractedVectorsWrapper - ) -> Tuple[Optional[VectorObject], bool, List[str]]: + ) -> tuple[Optional[VectorObject], bool, list[str]]: if self.type in SUBFIELDFIELDS: try: actual_payload: Optional[VectorObject] = await self.get_vectors( @@ -341,7 +341,7 @@ async def get_vectors(self, force=False) -> Optional[VectorObject]: async def set_user_vectors( self, user_vectors: UserVectorsWrapper - ) -> Tuple[UserVectorSet, Dict[str, UserVectorsList]]: + ) -> tuple[UserVectorSet, dict[str, UserVectorsList]]: try: actual_payload: Optional[UserVectorSet] = await self.get_user_vectors( force=True @@ -351,7 +351,7 @@ async def set_user_vectors( sf = self.get_storage_field(FieldTypes.USER_FIELD_VECTORS) - vectors_to_delete: Dict[str, UserVectorsList] = {} + vectors_to_delete: dict[str, UserVectorsList] = {} if actual_payload is not None: for vectorset, user_vector in user_vectors.vectors.vectors.items(): for key, vector in user_vector.vectors.items(): @@ -392,7 +392,7 @@ async def get_vectors_cf(self) -> Optional[CloudFile]: async def set_field_metadata( self, payload: FieldComputedMetadataWrapper - ) -> Tuple[FieldComputedMetadata, List[str], Dict[str, List[str]]]: + ) -> tuple[FieldComputedMetadata, list[str], dict[str, list[str]]]: if self.type in SUBFIELDFIELDS: try: actual_payload: Optional[ diff --git a/nucliadb/nucliadb/ingest/fields/conversation.py b/nucliadb/nucliadb/ingest/fields/conversation.py index e9c25d6ea1..b3b2a32b61 100644 --- a/nucliadb/nucliadb/ingest/fields/conversation.py +++ b/nucliadb/nucliadb/ingest/fields/conversation.py @@ -18,7 +18,7 @@ # along with this program. If not, see . # import uuid -from typing import Any, Dict, Optional +from typing import Any, Optional from nucliadb_protos.resources_pb2 import CloudFile from nucliadb_protos.resources_pb2 import Conversation as PBConversation @@ -39,7 +39,7 @@ class PageNotFound(Exception): class Conversation(Field): pbklass = PBConversation type: str = "c" - value: Dict[int, PBConversation] + value: dict[int, PBConversation] metadata: Optional[FieldConversation] _created: bool = False @@ -49,7 +49,7 @@ def __init__( id: str, resource: Any, pb: Optional[Any] = None, - value: Optional[Dict[int, PBConversation]] = None, + value: Optional[dict[int, PBConversation]] = None, ): super(Conversation, self).__init__(id, resource, pb, value) self.value = {} diff --git a/nucliadb/nucliadb/ingest/orm/entities.py b/nucliadb/nucliadb/ingest/orm/entities.py index 559e498fbd..66325b1518 100644 --- a/nucliadb/nucliadb/ingest/orm/entities.py +++ b/nucliadb/nucliadb/ingest/orm/entities.py @@ -18,7 +18,7 @@ # along with this program. If not, see . # -from typing import AsyncGenerator, Dict, List, Optional, Set, Tuple +from typing import AsyncGenerator, Optional from nucliadb_protos.knowledgebox_pb2 import ( DeletedEntitiesGroups, @@ -87,13 +87,13 @@ async def get_entities_group(self, group: str) -> Optional[EntitiesGroup]: return None return await self.get_entities_group_inner(group) - async def get_entities_groups(self) -> Dict[str, EntitiesGroup]: + async def get_entities_groups(self) -> dict[str, EntitiesGroup]: groups = {} async for group, eg in self.iterate_entities_groups(exclude_deleted=True): groups[group] = eg return groups - async def list_entities_groups(self) -> Dict[str, EntitiesGroupSummary]: + async def list_entities_groups(self) -> dict[str, EntitiesGroupSummary]: groups = {} async for group in self.iterate_entities_groups_names(exclude_deleted=True): stored = await self.get_stored_entities_group(group) @@ -107,7 +107,7 @@ async def list_entities_groups(self) -> Dict[str, EntitiesGroupSummary]: groups[group] = EntitiesGroupSummary() return groups - async def update_entities(self, group: str, entities: Dict[str, Entity]): + async def update_entities(self, group: str, entities: dict[str, Entity]): """Update entities on an entity group. New entities are appended and existing are overwriten. Existing entities not appearing in `entities` are left intact. Use `delete_entities` to delete them instead. @@ -157,7 +157,7 @@ async def set_entities_group_metadata( await self.store_entities_group(group, entities_group) - async def delete_entities(self, group: str, delete: List[str]): + async def delete_entities(self, group: str, delete: list[str]): stored = await self.get_stored_entities_group(group) stored = stored or EntitiesGroup() @@ -229,8 +229,8 @@ async def do_entities_search( eg = EntitiesGroup(entities=entities) return eg - async def get_deleted_entities_groups(self) -> Set[str]: - deleted: Set[str] = set() + async def get_deleted_entities_groups(self) -> set[str]: + deleted: set[str] = set() key = KB_DELETED_ENTITIES_GROUPS.format(kbid=self.kbid) payload = await self.txn.get(key) if payload: @@ -252,7 +252,7 @@ async def entities_group_exists(self, group: str) -> bool: async def iterate_entities_groups( self, exclude_deleted: bool - ) -> AsyncGenerator[Tuple[str, EntitiesGroup], None]: + ) -> AsyncGenerator[tuple[str, EntitiesGroup], None]: async for group in self.iterate_entities_groups_names(exclude_deleted): eg = await self.get_entities_group_inner(group) if eg is None: @@ -284,7 +284,7 @@ async def iterate_entities_groups_names( yield group visited_groups.add(group) - async def get_indexed_entities_groups_names(self) -> Set[str]: + async def get_indexed_entities_groups_names(self) -> set[str]: shard_manager = get_shard_manager() async def query_indexed_entities_group_names( @@ -367,7 +367,7 @@ def merge_entities_groups(indexed: EntitiesGroup, stored: EntitiesGroup): `indexed` share entities. That's also true for common fields. """ - merged_entities: Dict[str, Entity] = {} + merged_entities: dict[str, Entity] = {} merged_entities.update(indexed.entities) merged_entities.update(stored.entities) diff --git a/nucliadb/nucliadb/ingest/orm/knowledgebox.py b/nucliadb/nucliadb/ingest/orm/knowledgebox.py index 4f010e3961..8b13ec89be 100644 --- a/nucliadb/nucliadb/ingest/orm/knowledgebox.py +++ b/nucliadb/nucliadb/ingest/orm/knowledgebox.py @@ -18,7 +18,7 @@ # along with this program. If not, see . # from datetime import datetime -from typing import AsyncGenerator, AsyncIterator, Optional, Sequence, Tuple +from typing import AsyncGenerator, AsyncIterator, Optional, Sequence from uuid import uuid4 from grpc import StatusCode @@ -161,7 +161,7 @@ async def get_kb_uuid(cls, txn: Transaction, slug: str) -> Optional[str]: @classmethod async def get_kbs( cls, txn: Transaction, slug: str, count: int = -1 - ) -> AsyncIterator[Tuple[str, str]]: + ) -> AsyncIterator[tuple[str, str]]: async for key in txn.keys(KB_SLUGS.format(slug=slug), count=count): slug = key.replace(KB_SLUGS_BASE, "") uuid = await cls.get_kb_uuid(txn, slug) @@ -179,7 +179,7 @@ async def create( uuid: Optional[str] = None, config: Optional[KnowledgeBoxConfig] = None, release_channel: ReleaseChannel.ValueType = ReleaseChannel.STABLE, - ) -> Tuple[str, bool]: + ) -> tuple[str, bool]: failed = False exist = await cls.get_kb_uuid(txn, slug) if exist: @@ -272,7 +272,7 @@ async def update( return uuid - async def iterate_kb_nodes(self) -> AsyncIterator[Tuple[AbstractIndexNode, str]]: + async def iterate_kb_nodes(self) -> AsyncIterator[tuple[AbstractIndexNode, str]]: shards_obj = await self.data_manager.get_shards_object(self.kbid) for shard in shards_obj.shards: diff --git a/nucliadb/nucliadb/ingest/orm/processor/__init__.py b/nucliadb/nucliadb/ingest/orm/processor/__init__.py index 79a5ba1be5..f7cb6bf068 100644 --- a/nucliadb/nucliadb/ingest/orm/processor/__init__.py +++ b/nucliadb/nucliadb/ingest/orm/processor/__init__.py @@ -19,7 +19,7 @@ # import asyncio import logging -from typing import Dict, List, Optional, Tuple +from typing import Optional import aiohttp.client_exceptions @@ -93,7 +93,7 @@ class Processor: and can not use the txn id """ - messages: Dict[str, List[writer_pb2.BrokerMessage]] + messages: dict[str, list[writer_pb2.BrokerMessage]] def __init__( self, @@ -217,7 +217,7 @@ async def commit_slug(self, resource: Resource) -> None: @processor_observer.wrap({"type": "txn"}) async def txn( self, - messages: List[writer_pb2.BrokerMessage], + messages: list[writer_pb2.BrokerMessage], seqid: int, partition: str, transaction_check: bool = True, @@ -419,7 +419,7 @@ async def rollback( ) async def deadletter( - self, messages: List[writer_pb2.BrokerMessage], partition: str, seqid: int + self, messages: list[writer_pb2.BrokerMessage], partition: str, seqid: int ) -> None: for seq, message in enumerate(messages): await self.storage.deadletter(message, seq, seqid, partition) @@ -430,7 +430,7 @@ async def apply_resource( message: writer_pb2.BrokerMessage, kb: KnowledgeBox, resource: Optional[Resource] = None, - ) -> Optional[Tuple[Resource, bool]]: + ) -> Optional[tuple[Resource, bool]]: """ Convert a broker message into a resource object, and apply it to the database """ diff --git a/nucliadb/nucliadb/ingest/orm/resource.py b/nucliadb/nucliadb/ingest/orm/resource.py index 6c84772329..91f662df5a 100644 --- a/nucliadb/nucliadb/ingest/orm/resource.py +++ b/nucliadb/nucliadb/ingest/orm/resource.py @@ -23,7 +23,7 @@ import logging from concurrent.futures import ThreadPoolExecutor from functools import partial -from typing import TYPE_CHECKING, Any, AsyncIterator, Optional, Tuple, Type +from typing import TYPE_CHECKING, Any, AsyncIterator, Optional, Type from nucliadb_protos.resources_pb2 import AllFieldIDs as PBAllFieldIDs from nucliadb_protos.resources_pb2 import Basic @@ -143,10 +143,10 @@ def __init__( basic: Optional[PBBasic] = None, disable_vectors: bool = True, ): - self.fields: dict[Tuple[FieldType.ValueType, str], Field] = {} + self.fields: dict[tuple[FieldType.ValueType, str], Field] = {} self.conversations: dict[int, PBConversation] = {} self.relations: Optional[PBRelations] = None - self.all_fields_keys: list[Tuple[FieldType.ValueType, str]] = [] + self.all_fields_keys: list[tuple[FieldType.ValueType, str]] = [] self.origin: Optional[PBOrigin] = None self.extra: Optional[PBExtra] = None self.modified: bool = False @@ -589,14 +589,14 @@ async def generate_broker_message(self) -> BrokerMessage: # Fields async def get_fields( self, force: bool = False - ) -> dict[Tuple[FieldType.ValueType, str], Field]: + ) -> dict[tuple[FieldType.ValueType, str], Field]: # Get all fields for type, field in await self.get_fields_ids(force=force): if (type, field) not in self.fields: self.fields[(type, field)] = await self.get_field(field, type) return self.fields - async def _scan_fields_ids(self) -> AsyncIterator[Tuple[FieldType.ValueType, str]]: + async def _scan_fields_ids(self) -> AsyncIterator[tuple[FieldType.ValueType, str]]: # TODO: Remove this method when we are sure that all KBs have the `allfields` key set prefix = KB_RESOURCE_FIELDS.format(kbid=self.kb.kbid, uuid=self.uuid) async for key in self.txn.keys(prefix, count=-1): @@ -608,7 +608,7 @@ async def _scan_fields_ids(self) -> AsyncIterator[Tuple[FieldType.ValueType, str raise AttributeError("Invalid field type") yield (type_id, field) - async def _inner_get_fields_ids(self) -> list[Tuple[FieldType.ValueType, str]]: + async def _inner_get_fields_ids(self) -> list[tuple[FieldType.ValueType, str]]: result = [] all_fields: Optional[PBAllFieldIDs] = await self.get_all_field_ids() if all_fields is not None: @@ -636,7 +636,7 @@ async def _inner_get_fields_ids(self) -> list[Tuple[FieldType.ValueType, str]]: async def get_fields_ids( self, force: bool = False - ) -> list[Tuple[FieldType.ValueType, str]]: + ) -> list[tuple[FieldType.ValueType, str]]: # Get all fields if len(self.all_fields_keys) == 0 or force: self.all_fields_keys = await self._inner_get_fields_ids() @@ -1127,7 +1127,7 @@ async def iterate_sentences( if fm is None: continue - field_metadatas: list[Tuple[Optional[str], FieldMetadata]] = [ + field_metadatas: list[tuple[Optional[str], FieldMetadata]] = [ (None, fm.metadata) ] for subfield_metadata, splitted_metadata in fm.split_metadata.items(): @@ -1237,7 +1237,7 @@ async def iterate_paragraphs( if fm is None: continue - field_metadatas: list[Tuple[Optional[str], FieldMetadata]] = [ + field_metadatas: list[tuple[Optional[str], FieldMetadata]] = [ (None, fm.metadata) ] for subfield_metadata, splitted_metadata in fm.split_metadata.items(): @@ -1314,7 +1314,7 @@ async def iterate_fields( if fm is None: continue - field_metadatas: list[Tuple[Optional[str], FieldMetadata]] = [ + field_metadatas: list[tuple[Optional[str], FieldMetadata]] = [ (None, fm.metadata) ] for subfield_metadata, splitted_metadata in fm.split_metadata.items(): @@ -1370,7 +1370,7 @@ async def generate_train_resource( if fm is None: continue - field_metadatas: list[Tuple[Optional[str], FieldMetadata]] = [ + field_metadatas: list[tuple[Optional[str], FieldMetadata]] = [ (None, fm.metadata) ] for subfield_metadata, splitted_metadata in fm.split_metadata.items(): diff --git a/nucliadb/nucliadb/ingest/processing.py b/nucliadb/nucliadb/ingest/processing.py index dc270c2124..33b891e1ed 100644 --- a/nucliadb/nucliadb/ingest/processing.py +++ b/nucliadb/nucliadb/ingest/processing.py @@ -23,7 +23,7 @@ from collections import defaultdict from contextlib import AsyncExitStack from enum import Enum -from typing import TYPE_CHECKING, Any, Dict, List, Optional, TypeVar +from typing import TYPE_CHECKING, Any, Optional, TypeVar import aiohttp import backoff @@ -85,22 +85,22 @@ class PushPayload(BaseModel): source: Optional[Source] = None userid: str - genericfield: Dict[str, models.Text] = {} + genericfield: dict[str, models.Text] = {} # New File - filefield: Dict[str, str] = {} + filefield: dict[str, str] = {} # New Link - linkfield: Dict[str, models.LinkUpload] = {} + linkfield: dict[str, models.LinkUpload] = {} # Diff on Text Field - textfield: Dict[str, models.Text] = {} + textfield: dict[str, models.Text] = {} # Diff on a Layout Field - layoutfield: Dict[str, models.LayoutDiff] = {} + layoutfield: dict[str, models.LayoutDiff] = {} # New conversations to process - conversationfield: Dict[str, models.PushConversation] = {} + conversationfield: dict[str, models.PushConversation] = {} # Only internal partition: int @@ -435,7 +435,7 @@ async def send_to_process( class DummyProcessingEngine(ProcessingEngine): def __init__(self): - self.calls: List[List[Any]] = [] # type: ignore + self.calls: list[list[Any]] = [] # type: ignore self.values = defaultdict(list) self.onprem = True diff --git a/nucliadb/nucliadb/ingest/serialize.py b/nucliadb/nucliadb/ingest/serialize.py index 3452747402..1a36215405 100644 --- a/nucliadb/nucliadb/ingest/serialize.py +++ b/nucliadb/nucliadb/ingest/serialize.py @@ -19,7 +19,7 @@ # import asyncio -from typing import List, Optional +from typing import Optional import nucliadb_models as models from nucliadb.common.maindb.driver import Transaction @@ -63,7 +63,7 @@ async def set_resource_field_extracted_data( field: Field, field_data: ExtractedDataType, field_type_name: FieldTypeName, - wanted_extracted_data: List[ExtractedDataTypeName], + wanted_extracted_data: list[ExtractedDataTypeName], ) -> None: if field_data is None: return @@ -129,9 +129,9 @@ async def set_resource_field_extracted_data( async def serialize( kbid: str, rid: Optional[str], - show: List[ResourceProperties], - field_type_filter: List[FieldTypeName], - extracted: List[ExtractedDataTypeName], + show: list[ResourceProperties], + field_type_filter: list[FieldTypeName], + extracted: list[ExtractedDataTypeName], service_name: Optional[str] = None, slug: Optional[str] = None, ) -> Optional[Resource]: diff --git a/nucliadb/nucliadb/ingest/settings.py b/nucliadb/nucliadb/ingest/settings.py index a68567e586..b0a2f21934 100644 --- a/nucliadb/nucliadb/ingest/settings.py +++ b/nucliadb/nucliadb/ingest/settings.py @@ -18,7 +18,7 @@ # along with this program. If not, see . # from enum import Enum -from typing import List, Optional +from typing import Optional from pydantic import BaseSettings, Field @@ -47,7 +47,7 @@ class DriverSettings(BaseSettings): driver_redis_url: Optional[str] = Field( default=None, description="Redis URL. Example: redis://localhost:6379" ) - driver_tikv_url: Optional[List[str]] = Field( + driver_tikv_url: Optional[list[str]] = Field( default=None, description="TiKV PD (Placement Dricer) URL. The URL to the cluster manager of TiKV. Example: tikv-pd.svc:2379", ) @@ -68,7 +68,7 @@ class DriverSettings(BaseSettings): class Settings(DriverSettings): grpc_port: int = 8030 - partitions: List[str] = ["1"] + partitions: list[str] = ["1"] pull_time_error_backoff: int = 100 disable_pull_worker: bool = False diff --git a/nucliadb/nucliadb/ingest/tests/fixtures.py b/nucliadb/nucliadb/ingest/tests/fixtures.py index 8ffa79008c..abef7574df 100644 --- a/nucliadb/nucliadb/ingest/tests/fixtures.py +++ b/nucliadb/nucliadb/ingest/tests/fixtures.py @@ -22,7 +22,7 @@ from dataclasses import dataclass from datetime import datetime from os.path import dirname, getsize -from typing import List, Optional +from typing import Optional from unittest.mock import AsyncMock, patch import nats @@ -655,7 +655,7 @@ async def create_resource( # 2.5 CONVERSATION FIELD def make_message( - text: str, files: Optional[List[rpb.CloudFile]] = None + text: str, files: Optional[list[rpb.CloudFile]] = None ) -> rpb.Message: msg = rpb.Message( who="myself", diff --git a/nucliadb/nucliadb/reader/api/models.py b/nucliadb/nucliadb/reader/api/models.py index 59233b23c9..c4b55f9f49 100644 --- a/nucliadb/nucliadb/reader/api/models.py +++ b/nucliadb/nucliadb/reader/api/models.py @@ -17,7 +17,7 @@ # You should have received a copy of the GNU Affero General Public License # along with this program. If not, see . # -from typing import TYPE_CHECKING, Any, Dict, Optional, Union +from typing import TYPE_CHECKING, Any, Optional, Union from pydantic import BaseModel @@ -62,7 +62,7 @@ class ResourceField(BaseModel): FIELD_NAMES_TO_PB_TYPE_MAP = {v: k for k, v in FIELD_TYPES_MAP.items()} -FIELD_NAME_TO_EXTRACTED_DATA_FIELD_MAP: Dict[FieldTypeName, Any] = { +FIELD_NAME_TO_EXTRACTED_DATA_FIELD_MAP: dict[FieldTypeName, Any] = { FieldTypeName.TEXT: TextFieldExtractedData, FieldTypeName.FILE: FileFieldExtractedData, FieldTypeName.LINK: LinkFieldExtractedData, diff --git a/nucliadb/nucliadb/reader/api/v1/download.py b/nucliadb/nucliadb/reader/api/v1/download.py index 9ad11c8fd8..6fd1a75ff7 100644 --- a/nucliadb/nucliadb/reader/api/v1/download.py +++ b/nucliadb/nucliadb/reader/api/v1/download.py @@ -19,7 +19,7 @@ # import urllib.parse from enum import Enum -from typing import Optional, Tuple +from typing import Optional from fastapi import HTTPException from fastapi.requests import Request @@ -370,7 +370,7 @@ async def _get_resource_uuid_from_params( return rid -def parse_media_range(range_request: str, file_size: int) -> Tuple[int, int, int]: +def parse_media_range(range_request: str, file_size: int) -> tuple[int, int, int]: # Implemented following this docpage: https://developer.mozilla.org/en-US/docs/Web/HTTP/Range_requests ranges = range_request.split("bytes=")[-1].split(", ") if len(ranges) > 1: diff --git a/nucliadb/nucliadb/reader/api/v1/resource.py b/nucliadb/nucliadb/reader/api/v1/resource.py index a2d57a16e7..917cd410c9 100644 --- a/nucliadb/nucliadb/reader/api/v1/resource.py +++ b/nucliadb/nucliadb/reader/api/v1/resource.py @@ -17,7 +17,7 @@ # You should have received a copy of the GNU Affero General Public License # along with this program. If not, see . # -from typing import List, Optional, Union +from typing import Optional, Union from fastapi import Header, HTTPException, Query, Request, Response from fastapi_versioning import version @@ -76,12 +76,12 @@ async def list_resources( txn = await driver.begin() # Filter parameters for serializer - show: List[ResourceProperties] = [ResourceProperties.BASIC] - field_types: List[FieldTypeName] = [] - extracted: List[ExtractedDataTypeName] = [] + show: list[ResourceProperties] = [ResourceProperties.BASIC] + field_types: list[FieldTypeName] = [] + extracted: list[ExtractedDataTypeName] = [] try: - resources: List[Resource] = [] + resources: list[Resource] = [] max_items_to_iterate = (page + 1) * size first_wanted_item_index = (page * size) + 1 # 1-based index current_key_index = 0 @@ -147,11 +147,11 @@ async def get_resource_by_uuid( request: Request, kbid: str, rid: str, - show: List[ResourceProperties] = Query([ResourceProperties.BASIC]), - field_type_filter: List[FieldTypeName] = Query( + show: list[ResourceProperties] = Query([ResourceProperties.BASIC]), + field_type_filter: list[FieldTypeName] = Query( list(FieldTypeName), alias="field_type" ), - extracted: List[ExtractedDataTypeName] = Query( + extracted: list[ExtractedDataTypeName] = Query( [ ExtractedDataTypeName.TEXT, ExtractedDataTypeName.METADATA, @@ -187,11 +187,11 @@ async def get_resource_by_slug( request: Request, kbid: str, rslug: str, - show: List[ResourceProperties] = Query([ResourceProperties.BASIC]), - field_type_filter: List[FieldTypeName] = Query( + show: list[ResourceProperties] = Query([ResourceProperties.BASIC]), + field_type_filter: list[FieldTypeName] = Query( list(FieldTypeName), alias="field_type" ), - extracted: List[ExtractedDataTypeName] = Query( + extracted: list[ExtractedDataTypeName] = Query( [ ExtractedDataTypeName.TEXT, ExtractedDataTypeName.METADATA, @@ -218,9 +218,9 @@ async def _get_resource( rslug: Optional[str] = None, rid: Optional[str] = None, kbid: str, - show: List[ResourceProperties], - field_type_filter: List[FieldTypeName], - extracted: List[ExtractedDataTypeName], + show: list[ResourceProperties], + field_type_filter: list[FieldTypeName], + extracted: list[ExtractedDataTypeName], x_nucliadb_user: str, x_forwarded_for: str, ) -> Resource: @@ -262,8 +262,8 @@ async def get_resource_field_rslug_prefix( rslug: str, field_type: FieldTypeName, field_id: str, - show: List[ResourceFieldProperties] = Query([ResourceFieldProperties.VALUE]), - extracted: List[ExtractedDataTypeName] = Query( + show: list[ResourceFieldProperties] = Query([ResourceFieldProperties.VALUE]), + extracted: list[ExtractedDataTypeName] = Query( [ ExtractedDataTypeName.TEXT, ExtractedDataTypeName.METADATA, @@ -302,8 +302,8 @@ async def get_resource_field_rid_prefix( rid: str, field_type: FieldTypeName, field_id: str, - show: List[ResourceFieldProperties] = Query([ResourceFieldProperties.VALUE]), - extracted: List[ExtractedDataTypeName] = Query( + show: list[ResourceFieldProperties] = Query([ResourceFieldProperties.VALUE]), + extracted: list[ExtractedDataTypeName] = Query( [ ExtractedDataTypeName.TEXT, ExtractedDataTypeName.METADATA, @@ -330,8 +330,8 @@ async def _get_resource_field( kbid: str, field_type: FieldTypeName, field_id: str, - show: List[ResourceFieldProperties], - extracted: List[ExtractedDataTypeName], + show: list[ResourceFieldProperties], + extracted: list[ExtractedDataTypeName], page: Union[str, int], rid: Optional[str] = None, rslug: Optional[str] = None, diff --git a/nucliadb/nucliadb/reader/tests/fixtures.py b/nucliadb/nucliadb/reader/tests/fixtures.py index 0777e7c2aa..46f7c12d2a 100644 --- a/nucliadb/nucliadb/reader/tests/fixtures.py +++ b/nucliadb/nucliadb/reader/tests/fixtures.py @@ -20,7 +20,7 @@ import uuid from datetime import datetime from enum import Enum -from typing import List, Optional +from typing import Optional import pytest from httpx import AsyncClient @@ -56,7 +56,7 @@ async def reader_api(test_settings_reader: None, local_files, event_loop): # ty application = create_application() def make_client_fixture( - roles: Optional[List[Enum]] = None, + roles: Optional[list[Enum]] = None, user: str = "", version: str = "1", ) -> AsyncClient: diff --git a/nucliadb/nucliadb/search/api/v1/find.py b/nucliadb/nucliadb/search/api/v1/find.py index 248cadd72c..d25a8ff828 100644 --- a/nucliadb/nucliadb/search/api/v1/find.py +++ b/nucliadb/nucliadb/search/api/v1/find.py @@ -19,7 +19,7 @@ # import json from datetime import datetime -from typing import List, Optional, Union +from typing import Optional, Union from fastapi import Body, Header, Request, Response from fastapi.openapi.models import Example @@ -73,9 +73,9 @@ async def find_knowledgebox( response: Response, kbid: str, query: str = fastapi_query(SearchParamDefaults.query), - fields: List[str] = fastapi_query(SearchParamDefaults.fields), - filters: List[str] = fastapi_query(SearchParamDefaults.filters), - faceted: List[str] = fastapi_query(SearchParamDefaults.faceted), + fields: list[str] = fastapi_query(SearchParamDefaults.fields), + filters: list[str] = fastapi_query(SearchParamDefaults.filters), + faceted: list[str] = fastapi_query(SearchParamDefaults.faceted), page_number: int = fastapi_query(SearchParamDefaults.page_number), page_size: int = fastapi_query(SearchParamDefaults.page_size), min_score: float = fastapi_query(SearchParamDefaults.min_score), @@ -91,7 +91,7 @@ async def find_knowledgebox( range_modification_end: Optional[datetime] = fastapi_query( SearchParamDefaults.range_modification_end ), - features: List[SearchOptions] = fastapi_query( + features: list[SearchOptions] = fastapi_query( SearchParamDefaults.search_features, default=[ SearchOptions.PARAGRAPH, @@ -100,11 +100,11 @@ async def find_knowledgebox( ), debug: bool = fastapi_query(SearchParamDefaults.debug), highlight: bool = fastapi_query(SearchParamDefaults.highlight), - show: List[ResourceProperties] = fastapi_query(SearchParamDefaults.show), - field_type_filter: List[FieldTypeName] = fastapi_query( + show: list[ResourceProperties] = fastapi_query(SearchParamDefaults.show), + field_type_filter: list[FieldTypeName] = fastapi_query( SearchParamDefaults.field_type_filter, alias="field_type" ), - extracted: List[ExtractedDataTypeName] = fastapi_query( + extracted: list[ExtractedDataTypeName] = fastapi_query( SearchParamDefaults.extracted ), with_duplicates: bool = fastapi_query(SearchParamDefaults.with_duplicates), diff --git a/nucliadb/nucliadb/search/api/v1/knowledgebox.py b/nucliadb/nucliadb/search/api/v1/knowledgebox.py index 6cfdd907d5..1262cc7f38 100644 --- a/nucliadb/nucliadb/search/api/v1/knowledgebox.py +++ b/nucliadb/nucliadb/search/api/v1/knowledgebox.py @@ -18,7 +18,7 @@ # along with this program. If not, see . # import asyncio -from typing import List, Optional +from typing import Optional from fastapi import HTTPException, Request from fastapi_versioning import version @@ -90,7 +90,7 @@ async def knowledgebox_counters( shard_manager = get_shard_manager() try: - shard_groups: List[PBShardObject] = await shard_manager.get_shards_by_kbid(kbid) + shard_groups: list[PBShardObject] = await shard_manager.get_shards_by_kbid(kbid) except ShardsNotFound: raise HTTPException( status_code=404, @@ -122,7 +122,7 @@ async def knowledgebox_counters( ) try: - results: Optional[List[Shard]] = await asyncio.wait_for( # type: ignore + results: Optional[list[Shard]] = await asyncio.wait_for( # type: ignore asyncio.gather(*ops, return_exceptions=True), # type: ignore timeout=settings.search_timeout, ) diff --git a/nucliadb/nucliadb/search/api/v1/resource/search.py b/nucliadb/nucliadb/search/api/v1/resource/search.py index b2343bc214..c4479f100b 100644 --- a/nucliadb/nucliadb/search/api/v1/resource/search.py +++ b/nucliadb/nucliadb/search/api/v1/resource/search.py @@ -18,7 +18,7 @@ # along with this program. If not, see . # from datetime import datetime -from typing import List, Optional, Union +from typing import Optional, Union from fastapi import Header, Request, Response from fastapi_versioning import version @@ -62,9 +62,9 @@ async def resource_search( kbid: str, query: str, rid: str, - fields: List[str] = fastapi_query(SearchParamDefaults.fields), - filters: List[str] = fastapi_query(SearchParamDefaults.filters), - faceted: List[str] = fastapi_query(SearchParamDefaults.faceted), + fields: list[str] = fastapi_query(SearchParamDefaults.fields), + filters: list[str] = fastapi_query(SearchParamDefaults.filters), + faceted: list[str] = fastapi_query(SearchParamDefaults.faceted), sort: Optional[SortField] = fastapi_query( SearchParamDefaults.sort_field, alias="sort_field" ), @@ -84,18 +84,18 @@ async def resource_search( SearchParamDefaults.range_modification_end ), highlight: bool = fastapi_query(SearchParamDefaults.highlight), - show: List[ResourceProperties] = fastapi_query( + show: list[ResourceProperties] = fastapi_query( SearchParamDefaults.show, default=list(ResourceProperties) ), - field_type_filter: List[FieldTypeName] = fastapi_query( + field_type_filter: list[FieldTypeName] = fastapi_query( SearchParamDefaults.field_type_filter, alias="field_type" ), - extracted: List[ExtractedDataTypeName] = fastapi_query( + extracted: list[ExtractedDataTypeName] = fastapi_query( SearchParamDefaults.extracted ), x_ndb_client: NucliaDBClientType = Header(NucliaDBClientType.API), debug: bool = fastapi_query(SearchParamDefaults.debug), - shards: List[str] = fastapi_query(SearchParamDefaults.shards), + shards: list[str] = fastapi_query(SearchParamDefaults.shards), ) -> Union[ResourceSearchResults, HTTPClientError]: # We need to query all nodes try: diff --git a/nucliadb/nucliadb/search/api/v1/search.py b/nucliadb/nucliadb/search/api/v1/search.py index ef4a9bda36..8e85873456 100644 --- a/nucliadb/nucliadb/search/api/v1/search.py +++ b/nucliadb/nucliadb/search/api/v1/search.py @@ -20,7 +20,7 @@ import json from datetime import datetime from time import time -from typing import List, Optional, Tuple, Union +from typing import Optional, Union from fastapi import Body, Header, Request, Response from fastapi.openapi.models import Example @@ -93,9 +93,9 @@ async def search_knowledgebox( response: Response, kbid: str, query: str = fastapi_query(SearchParamDefaults.query), - fields: List[str] = fastapi_query(SearchParamDefaults.fields), - filters: List[str] = fastapi_query(SearchParamDefaults.filters), - faceted: List[str] = fastapi_query(SearchParamDefaults.faceted), + fields: list[str] = fastapi_query(SearchParamDefaults.fields), + filters: list[str] = fastapi_query(SearchParamDefaults.filters), + faceted: list[str] = fastapi_query(SearchParamDefaults.faceted), sort_field: SortField = fastapi_query(SearchParamDefaults.sort_field), sort_limit: Optional[int] = fastapi_query(SearchParamDefaults.sort_limit), sort_order: SortOrder = fastapi_query(SearchParamDefaults.sort_order), @@ -114,7 +114,7 @@ async def search_knowledgebox( range_modification_end: Optional[datetime] = fastapi_query( SearchParamDefaults.range_modification_end ), - features: List[SearchOptions] = fastapi_query( + features: list[SearchOptions] = fastapi_query( SearchParamDefaults.search_features, default=[ SearchOptions.PARAGRAPH, @@ -124,14 +124,14 @@ async def search_knowledgebox( ), debug: bool = fastapi_query(SearchParamDefaults.debug), highlight: bool = fastapi_query(SearchParamDefaults.highlight), - show: List[ResourceProperties] = fastapi_query(SearchParamDefaults.show), - field_type_filter: List[FieldTypeName] = fastapi_query( + show: list[ResourceProperties] = fastapi_query(SearchParamDefaults.show), + field_type_filter: list[FieldTypeName] = fastapi_query( SearchParamDefaults.field_type_filter, alias="field_type" ), - extracted: List[ExtractedDataTypeName] = fastapi_query( + extracted: list[ExtractedDataTypeName] = fastapi_query( SearchParamDefaults.extracted ), - shards: List[str] = fastapi_query(SearchParamDefaults.shards), + shards: list[str] = fastapi_query(SearchParamDefaults.shards), with_duplicates: bool = fastapi_query(SearchParamDefaults.with_duplicates), with_synonyms: bool = fastapi_query(SearchParamDefaults.with_synonyms), autofilter: bool = fastapi_query(SearchParamDefaults.autofilter), @@ -192,14 +192,14 @@ async def catalog( response: Response, kbid: str, query: str = fastapi_query(SearchParamDefaults.query), - filters: List[str] = fastapi_query(SearchParamDefaults.filters), - faceted: List[str] = fastapi_query(SearchParamDefaults.faceted), + filters: list[str] = fastapi_query(SearchParamDefaults.filters), + faceted: list[str] = fastapi_query(SearchParamDefaults.faceted), sort_field: SortField = fastapi_query(SearchParamDefaults.sort_field), sort_limit: Optional[int] = fastapi_query(SearchParamDefaults.sort_limit), sort_order: SortOrder = fastapi_query(SearchParamDefaults.sort_order), page_number: int = fastapi_query(SearchParamDefaults.page_number), page_size: int = fastapi_query(SearchParamDefaults.page_size), - shards: List[str] = fastapi_query(SearchParamDefaults.shards), + shards: list[str] = fastapi_query(SearchParamDefaults.shards), with_status: Optional[ResourceProcessingStatus] = fastapi_query( SearchParamDefaults.with_status ), @@ -320,7 +320,7 @@ async def search( x_forwarded_for: str, do_audit: bool = True, with_status: Optional[ResourceProcessingStatus] = None, -) -> Tuple[KnowledgeboxSearchResults, bool]: +) -> tuple[KnowledgeboxSearchResults, bool]: audit = get_audit() start_time = time() diff --git a/nucliadb/nucliadb/search/api/v1/suggest.py b/nucliadb/nucliadb/search/api/v1/suggest.py index 958ba5b2ac..eef5796c19 100644 --- a/nucliadb/nucliadb/search/api/v1/suggest.py +++ b/nucliadb/nucliadb/search/api/v1/suggest.py @@ -19,7 +19,7 @@ # from datetime import datetime from time import time -from typing import List, Optional, Union +from typing import Optional, Union from fastapi import Header, Request, Response from fastapi_versioning import version @@ -60,9 +60,9 @@ async def suggest_knowledgebox( response: Response, kbid: str, query: str = fastapi_query(SearchParamDefaults.suggest_query), - fields: List[str] = fastapi_query(SearchParamDefaults.fields), - filters: List[str] = fastapi_query(SearchParamDefaults.filters), - faceted: List[str] = fastapi_query(SearchParamDefaults.faceted), + fields: list[str] = fastapi_query(SearchParamDefaults.fields), + filters: list[str] = fastapi_query(SearchParamDefaults.filters), + faceted: list[str] = fastapi_query(SearchParamDefaults.faceted), range_creation_start: Optional[datetime] = fastapi_query( SearchParamDefaults.range_creation_start ), @@ -75,11 +75,11 @@ async def suggest_knowledgebox( range_modification_end: Optional[datetime] = fastapi_query( SearchParamDefaults.range_modification_end ), - features: List[SuggestOptions] = fastapi_query( + features: list[SuggestOptions] = fastapi_query( SearchParamDefaults.suggest_features ), - show: List[ResourceProperties] = fastapi_query(SearchParamDefaults.show), - field_type_filter: List[FieldTypeName] = fastapi_query( + show: list[ResourceProperties] = fastapi_query(SearchParamDefaults.show), + field_type_filter: list[FieldTypeName] = fastapi_query( SearchParamDefaults.field_type_filter, alias="field_type" ), x_ndb_client: NucliaDBClientType = Header(NucliaDBClientType.API), @@ -117,16 +117,16 @@ async def suggest( response, kbid: str, query: str, - fields: List[str], - filters: List[str], - faceted: List[str], + fields: list[str], + filters: list[str], + faceted: list[str], range_creation_start: Optional[datetime], range_creation_end: Optional[datetime], range_modification_start: Optional[datetime], range_modification_end: Optional[datetime], - features: List[SuggestOptions], - show: List[ResourceProperties], - field_type_filter: List[FieldTypeName], + features: list[SuggestOptions], + show: list[ResourceProperties], + field_type_filter: list[FieldTypeName], x_ndb_client: NucliaDBClientType, x_nucliadb_user: str, x_forwarded_for: str, diff --git a/nucliadb/nucliadb/search/predict.py b/nucliadb/nucliadb/search/predict.py index fd0b721b75..96caa36c12 100644 --- a/nucliadb/nucliadb/search/predict.py +++ b/nucliadb/nucliadb/search/predict.py @@ -20,7 +20,7 @@ import json import os from enum import Enum -from typing import AsyncIterator, Dict, List, Optional, Tuple +from typing import AsyncIterator, Optional from unittest.mock import AsyncMock, Mock import aiohttp @@ -136,7 +136,7 @@ async def start_predict_engine(): set_utility(Utility.PREDICT, predict_util) -def convert_relations(data: Dict[str, List[Dict[str, str]]]) -> List[RelationNode]: +def convert_relations(data: dict[str, list[dict[str, str]]]) -> list[RelationNode]: result = [] for token in data["tokens"]: text = token["text"] @@ -156,7 +156,7 @@ def __init__( zone: Optional[str] = None, onprem: bool = False, local_predict: bool = False, - local_predict_headers: Optional[Dict[str, str]] = None, + local_predict_headers: Optional[dict[str, str]] = None, ): self.nuclia_service_account = nuclia_service_account self.cluster_url = cluster_url @@ -307,7 +307,7 @@ async def rephrase_query(self, kbid: str, item: RephraseModel) -> str: @predict_observer.wrap({"type": "chat"}) async def chat_query( self, kbid: str, item: ChatModel - ) -> Tuple[str, AsyncIterator[bytes]]: + ) -> tuple[str, AsyncIterator[bytes]]: try: self.check_nua_key_is_configured_for_onprem() except NUAKeyMissingError: @@ -349,7 +349,7 @@ async def ask_document( return await resp.text() @predict_observer.wrap({"type": "sentence"}) - async def convert_sentence_to_vector(self, kbid: str, sentence: str) -> List[float]: + async def convert_sentence_to_vector(self, kbid: str, sentence: str) -> list[float]: try: self.check_nua_key_is_configured_for_onprem() except NUAKeyMissingError: @@ -371,7 +371,7 @@ async def convert_sentence_to_vector(self, kbid: str, sentence: str) -> List[flo return data["data"] @predict_observer.wrap({"type": "entities"}) - async def detect_entities(self, kbid: str, sentence: str) -> List[RelationNode]: + async def detect_entities(self, kbid: str, sentence: str) -> list[RelationNode]: try: self.check_nua_key_is_configured_for_onprem() except NUAKeyMissingError: @@ -456,7 +456,7 @@ async def rephrase_query(self, kbid: str, item: RephraseModel) -> str: async def chat_query( self, kbid: str, item: ChatModel - ) -> Tuple[str, AsyncIterator[bytes]]: + ) -> tuple[str, AsyncIterator[bytes]]: self.calls.append(item) async def generate(): @@ -472,7 +472,7 @@ async def ask_document( answer = os.environ.get("TEST_ASK_DOCUMENT") or "Answer to your question" return answer - async def convert_sentence_to_vector(self, kbid: str, sentence: str) -> List[float]: + async def convert_sentence_to_vector(self, kbid: str, sentence: str) -> list[float]: self.calls.append(sentence) if ( os.environ.get("TEST_SENTENCE_ENCODER") == "multilingual-2023-02-21" @@ -481,7 +481,7 @@ async def convert_sentence_to_vector(self, kbid: str, sentence: str) -> List[flo else: return Q - async def detect_entities(self, kbid: str, sentence: str) -> List[RelationNode]: + async def detect_entities(self, kbid: str, sentence: str) -> list[RelationNode]: self.calls.append(sentence) dummy_data = os.environ.get("TEST_RELATIONS", None) if dummy_data is not None: # pragma: no cover diff --git a/nucliadb/nucliadb/search/requesters/utils.py b/nucliadb/nucliadb/search/requesters/utils.py index fad62810d7..c6bfafd74c 100644 --- a/nucliadb/nucliadb/search/requesters/utils.py +++ b/nucliadb/nucliadb/search/requesters/utils.py @@ -19,7 +19,7 @@ import asyncio from enum import Enum -from typing import Any, List, Optional, Tuple, TypeVar, Union, overload +from typing import Any, Optional, TypeVar, Union, overload from fastapi import HTTPException from grpc import StatusCode as GrpcStatusCode @@ -87,7 +87,7 @@ async def node_query( pb_query: SuggestRequest, target_replicas: Optional[list[str]] = None, read_only: bool = True, -) -> Tuple[List[SuggestResponse], bool, List[Tuple[str, str, str]], List[str]]: +) -> tuple[list[SuggestResponse], bool, list[tuple[str, str, str]], list[str]]: ... @@ -98,7 +98,7 @@ async def node_query( pb_query: ParagraphSearchRequest, target_replicas: Optional[list[str]] = None, read_only: bool = True, -) -> Tuple[List[ParagraphSearchResponse], bool, List[Tuple[str, str, str]], List[str]]: +) -> tuple[list[ParagraphSearchResponse], bool, list[tuple[str, str, str]], list[str]]: ... @@ -109,7 +109,7 @@ async def node_query( pb_query: SearchRequest, target_replicas: Optional[list[str]] = None, read_only: bool = True, -) -> Tuple[List[SearchResponse], bool, List[Tuple[str, str, str]], List[str]]: +) -> tuple[list[SearchResponse], bool, list[tuple[str, str, str]], list[str]]: ... @@ -120,7 +120,7 @@ async def node_query( pb_query: RelationSearchRequest, target_replicas: Optional[list[str]] = None, read_only: bool = True, -) -> Tuple[List[RelationSearchResponse], bool, List[Tuple[str, str, str]], List[str]]: +) -> tuple[list[RelationSearchResponse], bool, list[tuple[str, str, str]], list[str]]: ... @@ -130,13 +130,13 @@ async def node_query( pb_query: REQUEST_TYPE, target_replicas: Optional[list[str]] = None, read_only: bool = True, -) -> Tuple[List[T], bool, List[Tuple[str, str, str]], List[str]]: +) -> tuple[list[T], bool, list[tuple[str, str, str]], list[str]]: read_only = read_only and has_feature(const.Features.READ_REPLICA_SEARCHES) shard_manager = get_shard_manager() try: - shard_groups: List[PBShardObject] = await shard_manager.get_shards_by_kbid(kbid) + shard_groups: list[PBShardObject] = await shard_manager.get_shards_by_kbid(kbid) except ShardsNotFound: raise HTTPException( status_code=404, diff --git a/nucliadb/nucliadb/search/search/cache.py b/nucliadb/nucliadb/search/search/cache.py index 92e4bb3bbf..862cbe3df0 100644 --- a/nucliadb/nucliadb/search/search/cache.py +++ b/nucliadb/nucliadb/search/search/cache.py @@ -19,7 +19,7 @@ import asyncio from contextvars import ContextVar -from typing import Dict, Optional +from typing import Optional from lru import LRU # type: ignore @@ -31,17 +31,17 @@ from nucliadb_telemetry import metrics from nucliadb_utils.utilities import get_storage -rcache: ContextVar[Optional[Dict[str, ResourceORM]]] = ContextVar( +rcache: ContextVar[Optional[dict[str, ResourceORM]]] = ContextVar( "rcache", default=None ) -RESOURCE_LOCKS: Dict[str, asyncio.Lock] = LRU(1000) # type: ignore +RESOURCE_LOCKS: dict[str, asyncio.Lock] = LRU(1000) # type: ignore RESOURCE_CACHE_OPS = metrics.Counter("nucliadb_resource_cache_ops", labels={"type": ""}) -def get_resource_cache(clear: bool = False) -> Dict[str, ResourceORM]: - value: Optional[Dict[str, ResourceORM]] = rcache.get() +def get_resource_cache(clear: bool = False) -> dict[str, ResourceORM]: + value: Optional[dict[str, ResourceORM]] = rcache.get() if value is None or clear: value = {} rcache.set(value) diff --git a/nucliadb/nucliadb/search/search/chat/query.py b/nucliadb/nucliadb/search/search/chat/query.py index a13f9de4a4..c770e57970 100644 --- a/nucliadb/nucliadb/search/search/chat/query.py +++ b/nucliadb/nucliadb/search/search/chat/query.py @@ -20,7 +20,7 @@ import asyncio from dataclasses import dataclass from time import monotonic as time -from typing import AsyncGenerator, AsyncIterator, List, Optional +from typing import AsyncGenerator, AsyncIterator, Optional from nucliadb_protos.nodereader_pb2 import RelationSearchRequest, RelationSearchResponse @@ -78,7 +78,7 @@ class ChatResult: async def rephrase_query( kbid: str, - chat_history: List[ChatContextMessage], + chat_history: list[ChatContextMessage], query: str, user_id: str, user_context: List[str], @@ -171,7 +171,7 @@ async def get_relations_results( relation_request.subgraph.entry_points.extend(detected_entities) relation_request.subgraph.depth = 1 - relations_results: List[RelationSearchResponse] + relations_results: list[RelationSearchResponse] ( relations_results, _, @@ -324,8 +324,8 @@ async def maybe_audit_chat( rephrased_query: Optional[str], text_answer: bytes, status_code: Optional[AnswerStatusCode], - chat_history: List[ChatContextMessage], - query_context: List[str], + chat_history: list[ChatContextMessage], + query_context: list[str], ): audit = get_audit() if audit is None: diff --git a/nucliadb/nucliadb/search/search/fetch.py b/nucliadb/nucliadb/search/search/fetch.py index bdaa89619f..c494f260ca 100644 --- a/nucliadb/nucliadb/search/search/fetch.py +++ b/nucliadb/nucliadb/search/search/fetch.py @@ -18,7 +18,7 @@ # along with this program. If not, see . # from contextvars import ContextVar -from typing import Dict, List, Optional, Tuple +from typing import Optional from nucliadb_protos.nodereader_pb2 import DocumentResult, ParagraphResult from nucliadb_protos.resources_pb2 import Paragraph @@ -33,18 +33,18 @@ from .cache import get_resource_from_cache -rcache: ContextVar[Optional[Dict[str, ResourceORM]]] = ContextVar( +rcache: ContextVar[Optional[dict[str, ResourceORM]]] = ContextVar( "rcache", default=None ) async def fetch_resources( - resources: List[str], + resources: list[str], kbid: str, - show: List[ResourceProperties], - field_type_filter: List[FieldTypeName], - extracted: List[ExtractedDataTypeName], -) -> Dict[str, Resource]: + show: list[ResourceProperties], + field_type_filter: list[FieldTypeName], + extracted: list[ExtractedDataTypeName], +) -> dict[str, Resource]: result = {} for resource in resources: serialization = await serialize( @@ -77,14 +77,14 @@ async def get_paragraph_from_resource( return paragraph -async def get_labels_resource(result: DocumentResult, kbid: str) -> List[str]: +async def get_labels_resource(result: DocumentResult, kbid: str) -> list[str]: orm_resource = await get_resource_from_cache(kbid, result.uuid) if orm_resource is None: logger.error(f"{result.uuid} does not exist on DB") return [] - labels: List[str] = [] + labels: list[str] = [] basic = await orm_resource.get_basic() if basic is not None: for classification in basic.usermetadata.classifications: @@ -93,14 +93,14 @@ async def get_labels_resource(result: DocumentResult, kbid: str) -> List[str]: return labels -async def get_labels_paragraph(result: ParagraphResult, kbid: str) -> List[str]: +async def get_labels_paragraph(result: ParagraphResult, kbid: str) -> list[str]: orm_resource = await get_resource_from_cache(kbid, result.uuid) if orm_resource is None: logger.error(f"{result.uuid} does not exist on DB") return [] - labels: List[str] = [] + labels: list[str] = [] basic = await orm_resource.get_basic() if basic is not None: for classification in basic.usermetadata.classifications: @@ -127,7 +127,7 @@ async def get_labels_paragraph(result: ParagraphResult, kbid: str) -> List[str]: async def get_seconds_paragraph( result: ParagraphResult, kbid: str -) -> Optional[Tuple[List[int], List[int]]]: +) -> Optional[tuple[list[int], list[int]]]: orm_resource = await get_resource_from_cache(kbid, result.uuid) if orm_resource is None: diff --git a/nucliadb/nucliadb/search/search/find.py b/nucliadb/nucliadb/search/search/find.py index 5831d73f88..1325dd9fb0 100644 --- a/nucliadb/nucliadb/search/search/find.py +++ b/nucliadb/nucliadb/search/search/find.py @@ -18,7 +18,6 @@ # along with this program. If not, see . # from time import time -from typing import Tuple from nucliadb.search.requesters.utils import Method, node_query from nucliadb.search.search.find_merge import find_merge_results @@ -39,7 +38,7 @@ async def find( x_ndb_client: NucliaDBClientType, x_nucliadb_user: str, x_forwarded_for: str, -) -> Tuple[KnowledgeboxFindResults, bool]: +) -> tuple[KnowledgeboxFindResults, bool]: audit = get_audit() start_time = time() diff --git a/nucliadb/nucliadb/search/search/find_merge.py b/nucliadb/nucliadb/search/search/find_merge.py index a56ecfe164..ef6d7ded78 100644 --- a/nucliadb/nucliadb/search/search/find_merge.py +++ b/nucliadb/nucliadb/search/search/find_merge.py @@ -18,7 +18,7 @@ # along with this program. If not, see . # import asyncio -from typing import Any, Dict, Iterator, List, Optional, Tuple, cast +from typing import Any, Iterator, Optional, cast from nucliadb_protos.nodereader_pb2 import ( DocumentScored, @@ -61,7 +61,7 @@ async def set_text_value( result_paragraph: TempFindParagraph, max_operations: asyncio.Semaphore, highlight: bool = False, - ematches: Optional[List[str]] = None, + ematches: Optional[list[str]] = None, extracted_text_cache: Optional[paragraphs.ExtractedTextCache] = None, ): async with max_operations: @@ -85,10 +85,10 @@ async def set_text_value( async def set_resource_metadata_value( kbid: str, resource: str, - show: List[ResourceProperties], - field_type_filter: List[FieldTypeName], - extracted: List[ExtractedDataTypeName], - find_resources: Dict[str, FindResource], + show: list[ResourceProperties], + field_type_filter: list[FieldTypeName], + extracted: list[ExtractedDataTypeName], + find_resources: dict[str, FindResource], max_operations: asyncio.Semaphore, ): async with max_operations: @@ -135,14 +135,14 @@ def sorted_by_insertion(self) -> Iterator[Any]: @merge_observer.wrap({"type": "fetch_find_metadata"}) async def fetch_find_metadata( - find_resources: Dict[str, FindResource], - result_paragraphs: List[TempFindParagraph], + find_resources: dict[str, FindResource], + result_paragraphs: list[TempFindParagraph], kbid: str, - show: List[ResourceProperties], - field_type_filter: List[FieldTypeName], - extracted: List[ExtractedDataTypeName], + show: list[ResourceProperties], + field_type_filter: list[FieldTypeName], + extracted: list[ExtractedDataTypeName], highlight: bool = False, - ematches: Optional[List[str]] = None, + ematches: Optional[list[str]] = None, ): resources = set() operations = [] @@ -228,13 +228,13 @@ async def fetch_find_metadata( @merge_observer.wrap({"type": "merge_paragraphs_vectors"}) def merge_paragraphs_vectors( - paragraphs_shards: List[List[ParagraphResult]], - vectors_shards: List[List[DocumentScored]], + paragraphs_shards: list[list[ParagraphResult]], + vectors_shards: list[list[DocumentScored]], count: int, page: int, min_score: float, -) -> Tuple[List[TempFindParagraph], bool]: - merged_paragrahs: List[TempFindParagraph] = [] +) -> tuple[list[TempFindParagraph], bool]: + merged_paragrahs: list[TempFindParagraph] = [] # We assume that paragraphs_shards and vectors_shards are already ordered for paragraphs_shard in paragraphs_shards: @@ -346,13 +346,13 @@ def merge_paragraphs_vectors( @merge_observer.wrap({"type": "find_merge"}) async def find_merge_results( - search_responses: List[SearchResponse], + search_responses: list[SearchResponse], count: int, page: int, kbid: str, - show: List[ResourceProperties], - field_type_filter: List[FieldTypeName], - extracted: List[ExtractedDataTypeName], + show: list[ResourceProperties], + field_type_filter: list[FieldTypeName], + extracted: list[ExtractedDataTypeName], requested_relations: EntitiesSubgraphRequest, min_score: float, highlight: bool = False, @@ -362,13 +362,13 @@ async def find_merge_results( # this is contextvar magic that is probably not ideal await get_transaction(read_only=True) - paragraphs: List[List[ParagraphResult]] = [] - vectors: List[List[DocumentScored]] = [] + paragraphs: list[list[ParagraphResult]] = [] + vectors: list[list[DocumentScored]] = [] relations = [] # facets_counter = Counter() next_page = True - ematches: List[str] = [] + ematches: list[str] = [] real_query = "" total_paragraphs = 0 for response in search_responses: @@ -382,8 +382,8 @@ async def find_merge_results( next_page = next_page and response.paragraph.next_page total_paragraphs += response.paragraph.total - paragraphs.append(cast(List[ParagraphResult], response.paragraph.results)) - vectors.append(cast(List[DocumentScored], response.vector.documents)) + paragraphs.append(cast(list[ParagraphResult], response.paragraph.results)) + vectors.append(cast(list[DocumentScored], response.vector.documents)) relations.append(response.relation) diff --git a/nucliadb/nucliadb/search/search/merge.py b/nucliadb/nucliadb/search/search/merge.py index 56f98a546f..60c1144bd5 100644 --- a/nucliadb/nucliadb/search/search/merge.py +++ b/nucliadb/nucliadb/search/search/merge.py @@ -19,7 +19,7 @@ # import datetime import math -from typing import Any, Dict, List, Optional, Tuple, Union +from typing import Any, Optional, Union from nucliadb_protos.nodereader_pb2 import ( DocumentResult, @@ -73,13 +73,13 @@ from .metrics import merge_observer from .paragraphs import ExtractedTextCache, get_paragraph_text, get_text_sentence -Bm25Score = Tuple[float, float] +Bm25Score = tuple[float, float] TimestampScore = datetime.datetime TitleScore = str Score = Union[Bm25Score, TimestampScore, TitleScore] -def sort_results_by_score(results: Union[List[ParagraphResult], List[DocumentResult]]): +def sort_results_by_score(results: Union[list[ParagraphResult], list[DocumentResult]]): results.sort(key=lambda x: (x.score.bm25, x.score.booster), reverse=True) @@ -115,15 +115,15 @@ async def text_score( async def merge_documents_results( - document_responses: List[DocumentSearchResponse], - resources: List[str], + document_responses: list[DocumentSearchResponse], + resources: list[str], count: int, page: int, kbid: str, sort: SortOptions, ) -> Resources: - raw_resource_list: List[Tuple[DocumentResult, Score]] = [] - facets: Dict[str, Any] = {} + raw_resource_list: list[tuple[DocumentResult, Score]] = [] + facets: dict[str, Any] = {} query = None total = 0 next_page = False @@ -155,7 +155,7 @@ async def merge_documents_results( if length > end: next_page = True - result_resource_list: List[ResourceResult] = [] + result_resource_list: list[ResourceResult] = [] for result, _ in raw_resource_list[min(skip, length) : min(end, length)]: # /f/file @@ -186,11 +186,11 @@ async def merge_documents_results( async def merge_suggest_paragraph_results( - suggest_responses: List[SuggestResponse], + suggest_responses: list[SuggestResponse], kbid: str, highlight: bool, ): - raw_paragraph_list: List[ParagraphResult] = [] + raw_paragraph_list: list[ParagraphResult] = [] query = None ematches = None for suggest_response in suggest_responses: @@ -207,7 +207,7 @@ async def merge_suggest_paragraph_results( rcache = get_resource_cache(clear=True) etcache = ExtractedTextCache() try: - result_paragraph_list: List[Paragraph] = [] + result_paragraph_list: list[Paragraph] = [] for result in raw_paragraph_list[:10]: _, field_type, field = result.field.split("/") text = await get_paragraph_text( @@ -258,15 +258,15 @@ async def merge_suggest_paragraph_results( async def merge_vectors_results( - vector_responses: List[VectorSearchResponse], - resources: List[str], + vector_responses: list[VectorSearchResponse], + resources: list[str], kbid: str, count: int, page: int, min_score: float, ): - facets: Dict[str, Any] = {} - raw_vectors_list: List[DocumentScored] = [] + facets: dict[str, Any] = {} + raw_vectors_list: list[DocumentScored] = [] for vector_response in vector_responses: for document in vector_response.documents: @@ -283,7 +283,7 @@ async def merge_vectors_results( end_element = skip + count length = len(raw_vectors_list) - result_sentence_list: List[Sentence] = [] + result_sentence_list: list[Sentence] = [] for result in raw_vectors_list[min(skip, length) : min(end_element, length)]: id_count = result.doc_id.id.count("/") if id_count == 4: @@ -339,19 +339,19 @@ async def merge_vectors_results( async def merge_paragraph_results( - paragraph_responses: List[ParagraphSearchResponse], - resources: List[str], + paragraph_responses: list[ParagraphSearchResponse], + resources: list[str], kbid: str, count: int, page: int, highlight: bool, sort: SortOptions, ): - raw_paragraph_list: List[Tuple[ParagraphResult, Score]] = [] - facets: Dict[str, Any] = {} + raw_paragraph_list: list[tuple[ParagraphResult, Score]] = [] + facets: dict[str, Any] = {} query = None next_page = False - ematches: Optional[List[str]] = None + ematches: Optional[list[str]] = None total = 0 for paragraph_response in paragraph_responses: if ematches is None: @@ -383,7 +383,7 @@ async def merge_paragraph_results( if length > end: next_page = True - result_paragraph_list: List[Paragraph] = [] + result_paragraph_list: list[Paragraph] = [] etcache = ExtractedTextCache() try: for result, _ in raw_paragraph_list[min(skip, length) : min(end, length)]: @@ -449,7 +449,7 @@ async def merge_paragraph_results( @merge_observer.wrap({"type": "merge_relations"}) def merge_relations_results( - relations_responses: List[RelationSearchResponse], + relations_responses: list[RelationSearchResponse], query: EntitiesSubgraphRequest, ) -> Relations: relations = Relations(entities={}) @@ -498,13 +498,13 @@ def merge_relations_results( @merge_observer.wrap({"type": "merge"}) async def merge_results( - search_responses: List[SearchResponse], + search_responses: list[SearchResponse], count: int, page: int, kbid: str, - show: List[ResourceProperties], - field_type_filter: List[FieldTypeName], - extracted: List[ExtractedDataTypeName], + show: list[ResourceProperties], + field_type_filter: list[FieldTypeName], + extracted: list[ExtractedDataTypeName], sort: SortOptions, requested_relations: EntitiesSubgraphRequest, min_score: float, @@ -525,7 +525,7 @@ async def merge_results( rcache = get_resource_cache(clear=True) try: - resources: List[str] = list() + resources: list[str] = list() api_results.fulltext = await merge_documents_results( documents, resources, count, page, kbid, sort ) @@ -555,13 +555,13 @@ async def merge_results( async def merge_paragraphs_results( - paragraph_responses: List[ParagraphSearchResponse], + paragraph_responses: list[ParagraphSearchResponse], count: int, page: int, kbid: str, - show: List[ResourceProperties], - field_type_filter: List[FieldTypeName], - extracted: List[ExtractedDataTypeName], + show: list[ResourceProperties], + field_type_filter: list[FieldTypeName], + extracted: list[ExtractedDataTypeName], highlight_split: bool, ) -> ResourceSearchResults: paragraphs = [] @@ -572,7 +572,7 @@ async def merge_paragraphs_results( rcache = get_resource_cache(clear=True) try: - resources: List[str] = list() + resources: list[str] = list() api_results.paragraphs = await merge_paragraph_results( paragraphs, resources, @@ -592,7 +592,7 @@ async def merge_paragraphs_results( async def merge_suggest_entities_results( - suggest_responses: List[SuggestResponse], + suggest_responses: list[SuggestResponse], ) -> RelatedEntities: merge = RelatedEntities(entities=[], total=0) @@ -604,10 +604,10 @@ async def merge_suggest_entities_results( async def merge_suggest_results( - suggest_responses: List[SuggestResponse], + suggest_responses: list[SuggestResponse], kbid: str, - show: List[ResourceProperties], - field_type_filter: List[FieldTypeName], + show: list[ResourceProperties], + field_type_filter: list[FieldTypeName], highlight: bool = False, ) -> KnowledgeboxSuggestResults: api_results = KnowledgeboxSuggestResults() diff --git a/nucliadb/nucliadb/search/search/query.py b/nucliadb/nucliadb/search/search/query.py index 736650f0a2..95bde6c7bd 100644 --- a/nucliadb/nucliadb/search/search/query.py +++ b/nucliadb/nucliadb/search/search/query.py @@ -19,7 +19,7 @@ # import asyncio from datetime import datetime -from typing import Awaitable, List, Optional, Tuple +from typing import Awaitable, Optional from async_lru import alru_cache from nucliadb_protos.noderesources_pb2 import Resource @@ -85,10 +85,10 @@ def __init__( self, *, kbid: str, - features: List[SearchOptions], + features: list[SearchOptions], query: str, - filters: List[str], - faceted: List[str], + filters: list[str], + faceted: list[str], page_number: int, page_size: int, min_score: Optional[float] = None, @@ -97,14 +97,14 @@ def __init__( range_creation_end: Optional[datetime] = None, range_modification_start: Optional[datetime] = None, range_modification_end: Optional[datetime] = None, - fields: Optional[List[str]] = None, - user_vector: Optional[List[float]] = None, + fields: Optional[list[str]] = None, + user_vector: Optional[list[float]] = None, vectorset: Optional[str] = None, with_duplicates: bool = False, with_status: Optional[ResourceProcessingStatus] = None, with_synonyms: bool = False, autofilter: bool = False, - key_filters: Optional[List[str]] = None, + key_filters: Optional[list[str]] = None, ): self.kbid = kbid self.features = features @@ -137,14 +137,14 @@ def _get_default_min_score(self) -> Awaitable[float]: self._min_score_task = asyncio.create_task(get_default_min_score(self.kbid)) return self._min_score_task - def _get_converted_vectors(self) -> Awaitable[List[float]]: + def _get_converted_vectors(self) -> Awaitable[list[float]]: if self._convert_vectors_task is None: # pragma: no cover self._convert_vectors_task = asyncio.create_task( convert_vectors(self.kbid, self.query) ) return self._convert_vectors_task - def _get_detected_entities(self) -> Awaitable[List[utils_pb2.RelationNode]]: + def _get_detected_entities(self) -> Awaitable[list[utils_pb2.RelationNode]]: if self._detected_entities_task is None: # pragma: no cover self._detected_entities_task = asyncio.create_task( detect_entities(self.kbid, self.query) @@ -197,7 +197,7 @@ async def _schedule_dependency_tasks(self) -> None: if self.with_synonyms and self.query: asyncio.ensure_future(self._get_synomyns()) - async def parse(self) -> Tuple[nodereader_pb2.SearchRequest, bool, List[str]]: + async def parse(self) -> tuple[nodereader_pb2.SearchRequest, bool, list[str]]: """ :return: (request, incomplete, autofilters) where: @@ -390,7 +390,7 @@ async def parse_synonyms(self, request: nodereader_pb2.SearchRequest) -> None: # No synonyms found return - synonyms_found: List[str] = [] + synonyms_found: list[str] = [] advanced_query = [] for term in self.query.split(" "): advanced_query.append(term) @@ -407,12 +407,12 @@ async def parse_synonyms(self, request: nodereader_pb2.SearchRequest) -> None: async def paragraph_query_to_pb( kbid: str, - features: List[SearchOptions], + features: list[SearchOptions], rid: str, query: str, - fields: List[str], - filters: List[str], - faceted: List[str], + fields: list[str], + filters: list[str], + faceted: list[str], page_number: int, page_size: int, range_creation_start: Optional[datetime] = None, @@ -466,13 +466,13 @@ async def paragraph_query_to_pb( @query_parse_dependency_observer.wrap({"type": "convert_vectors"}) -async def convert_vectors(kbid: str, query: str) -> List[utils_pb2.RelationNode]: +async def convert_vectors(kbid: str, query: str) -> list[utils_pb2.RelationNode]: predict = get_predict() return await predict.convert_sentence_to_vector(kbid, query) @query_parse_dependency_observer.wrap({"type": "detect_entities"}) -async def detect_entities(kbid: str, query: str) -> List[utils_pb2.RelationNode]: +async def detect_entities(kbid: str, query: str) -> list[utils_pb2.RelationNode]: predict = get_predict() try: return await predict.detect_entities(kbid, query) @@ -532,8 +532,8 @@ def expand_entities( def parse_entities_to_filters( request: nodereader_pb2.SearchRequest, - detected_entities: List[utils_pb2.RelationNode], -) -> List[str]: + detected_entities: list[utils_pb2.RelationNode], +) -> list[str]: added_filters = [] for entity_filter in [ f"/e/{entity.subtype}/{entity.value}" @@ -547,11 +547,11 @@ def parse_entities_to_filters( def suggest_query_to_pb( - features: List[SuggestOptions], + features: list[SuggestOptions], query: str, - fields: List[str], - filters: List[str], - faceted: List[str], + fields: list[str], + filters: list[str], + faceted: list[str], range_creation_start: Optional[datetime] = None, range_creation_end: Optional[datetime] = None, range_modification_start: Optional[datetime] = None, diff --git a/nucliadb/nucliadb/search/tests/fixtures.py b/nucliadb/nucliadb/search/tests/fixtures.py index 1af87e3122..205cc8eafe 100644 --- a/nucliadb/nucliadb/search/tests/fixtures.py +++ b/nucliadb/nucliadb/search/tests/fixtures.py @@ -20,7 +20,7 @@ import asyncio from enum import Enum from os.path import dirname -from typing import Dict, List, Optional +from typing import Optional import pytest from httpx import AsyncClient @@ -97,11 +97,11 @@ async def search_api(test_settings_search, transaction_utility, redis): # type: count += 1 def make_client_fixture( - roles: Optional[List[Enum]] = None, + roles: Optional[list[Enum]] = None, user: str = "", version: str = "1", root: bool = False, - extra_headers: Optional[Dict[str, str]] = None, + extra_headers: Optional[dict[str, str]] = None, ) -> AsyncClient: roles = roles or [] client_base_url = "http://test" @@ -186,7 +186,7 @@ async def wait_for_shard(knowledgebox_ingest: str, count: int) -> str: raise Exception("Could not find shard") await txn.abort() - checks: Dict[str, bool] = {} + checks: dict[str, bool] = {} for replica in shard.replicas: if replica.shard.id not in checks: checks[replica.shard.id] = False diff --git a/nucliadb/nucliadb/standalone/auth.py b/nucliadb/nucliadb/standalone/auth.py index 7ac5e0b9a8..0b6c9310c3 100644 --- a/nucliadb/nucliadb/standalone/auth.py +++ b/nucliadb/nucliadb/standalone/auth.py @@ -19,7 +19,7 @@ import base64 import logging import time -from typing import Optional, Tuple +from typing import Optional import orjson from jwcrypto import jwe, jwk # type: ignore @@ -51,7 +51,7 @@ def get_mapped_roles(*, settings: Settings, data: dict[str, str]) -> list[str]: async def authenticate_auth_token( settings: Settings, request: HTTPConnection -) -> Optional[Tuple[AuthCredentials, BaseUser]]: +) -> Optional[tuple[AuthCredentials, BaseUser]]: if "eph-token" not in request.query_params or settings.jwk_key is None: return None @@ -83,7 +83,7 @@ def __init__(self, settings: Settings) -> None: async def authenticate( self, request: HTTPConnection - ) -> Optional[Tuple[AuthCredentials, BaseUser]]: + ) -> Optional[tuple[AuthCredentials, BaseUser]]: token_resp = await authenticate_auth_token(self.settings, request) if token_resp is not None: return token_resp @@ -115,7 +115,7 @@ def __init__(self, settings: Settings) -> None: async def authenticate( self, request: HTTPConnection - ) -> Optional[Tuple[AuthCredentials, BaseUser]]: + ) -> Optional[tuple[AuthCredentials, BaseUser]]: token_resp = await authenticate_auth_token(self.settings, request) if token_resp is not None: return token_resp @@ -170,7 +170,7 @@ def __init__(self, settings: Settings) -> None: async def authenticate( self, request: HTTPConnection - ) -> Optional[Tuple[AuthCredentials, BaseUser]]: + ) -> Optional[tuple[AuthCredentials, BaseUser]]: token_resp = await authenticate_auth_token(self.settings, request) if token_resp is not None: return token_resp @@ -203,7 +203,7 @@ def __init__(self, settings: Settings) -> None: async def authenticate( self, request: HTTPConnection - ) -> Optional[Tuple[AuthCredentials, BaseUser]]: + ) -> Optional[tuple[AuthCredentials, BaseUser]]: token_resp = await authenticate_auth_token(self.settings, request) if token_resp is not None: return token_resp diff --git a/nucliadb/nucliadb/tests/integration/test_configuration.py b/nucliadb/nucliadb/tests/integration/test_configuration.py index 73c6fc7931..acb269f8b9 100644 --- a/nucliadb/nucliadb/tests/integration/test_configuration.py +++ b/nucliadb/nucliadb/tests/integration/test_configuration.py @@ -18,8 +18,6 @@ # along with this program. If not, see . # -from typing import Dict - import pytest from httpx import AsyncClient @@ -43,7 +41,7 @@ async def test_kb_configuration( resp = await nucliadb_reader.get(f"/kb/{kbid}/configuration") assert resp.status_code == 200 - body: Dict[str, str] = resp.json() + body: dict[str, str] = resp.json() assert len(body) == 1 assert body.get("semantic_model") == "test1" diff --git a/nucliadb/nucliadb/tests/integration/test_entities.py b/nucliadb/nucliadb/tests/integration/test_entities.py index 2e2376d77c..02708269bc 100644 --- a/nucliadb/nucliadb/tests/integration/test_entities.py +++ b/nucliadb/nucliadb/tests/integration/test_entities.py @@ -32,7 +32,6 @@ """ import asyncio -from typing import Tuple import pytest from httpx import AsyncClient @@ -117,7 +116,7 @@ async def processing_entities(nucliadb_grpc: WriterStub, knowledgebox: str): @pytest.fixture async def annotated_entities( - nucliadb_writer: AsyncClient, text_field: Tuple[str, str, str], nucliadb_grpc + nucliadb_writer: AsyncClient, text_field: tuple[str, str, str], nucliadb_grpc ): kbid, rid, field_id = text_field diff --git a/nucliadb/nucliadb/tests/integration/test_relations.py b/nucliadb/nucliadb/tests/integration/test_relations.py index 40752859d9..b297c1f970 100644 --- a/nucliadb/nucliadb/tests/integration/test_relations.py +++ b/nucliadb/nucliadb/tests/integration/test_relations.py @@ -17,8 +17,6 @@ # You should have received a copy of the GNU Affero General Public License # along with this program. If not, see . # -from typing import Tuple - import pytest from httpx import AsyncClient from nucliadb_protos.resources_pb2 import ( @@ -64,7 +62,7 @@ async def resource_with_bm_relations( async def test_api_aliases( nucliadb_reader: AsyncClient, knowledgebox: str, - resource_with_bm_relations: Tuple[str, str], + resource_with_bm_relations: tuple[str, str], ): rid, field_id = resource_with_bm_relations @@ -101,7 +99,7 @@ async def test_api_aliases( async def test_broker_message_relations( nucliadb_reader: AsyncClient, knowledgebox: str, - resource_with_bm_relations: Tuple[str, str], + resource_with_bm_relations: tuple[str, str], ): """ Test description: diff --git a/nucliadb/nucliadb/train/api/models.py b/nucliadb/nucliadb/train/api/models.py index 1dd12e06bd..efd0319f0c 100644 --- a/nucliadb/nucliadb/train/api/models.py +++ b/nucliadb/nucliadb/train/api/models.py @@ -19,10 +19,8 @@ # -from typing import List - from pydantic import BaseModel class TrainSetPartitions(BaseModel): - partitions: List[str] + partitions: list[str] diff --git a/nucliadb/nucliadb/train/generators/image_classifier.py b/nucliadb/nucliadb/train/generators/image_classifier.py index 70521b85d5..75707b81b2 100644 --- a/nucliadb/nucliadb/train/generators/image_classifier.py +++ b/nucliadb/nucliadb/train/generators/image_classifier.py @@ -19,7 +19,7 @@ # import json -from typing import Any, AsyncGenerator, Dict, List, Tuple +from typing import Any, AsyncGenerator from nucliadb_protos.dataset_pb2 import ( ImageClassification, @@ -38,7 +38,7 @@ VISUALLY_ANNOTABLE_FIELDS = {FieldType.FILE, FieldType.LINK} # PAWLS JSON format -PawlsPayload = Dict[str, Any] +PawlsPayload = dict[str, Any] def image_classification_batch_generator( @@ -133,8 +133,8 @@ async def generate_image_classification_payloads( async def get_page_selections( resource: Resource, field: Field -) -> Dict[int, List[VisualSelection]]: - page_selections: Dict[int, List[VisualSelection]] = {} +) -> dict[int, list[VisualSelection]]: + page_selections: dict[int, list[VisualSelection]] = {} basic = await resource.get_basic() if basic is None or basic.fieldmetadata is None: return page_selections @@ -153,8 +153,8 @@ async def get_page_selections( return page_selections -async def get_page_structure(field: Field) -> List[Tuple[str, PageStructure]]: - page_structures: List[Tuple[str, PageStructure]] = [] +async def get_page_structure(field: Field) -> list[tuple[str, PageStructure]]: + page_structures: list[tuple[str, PageStructure]] = [] field_type = KB_REVERSE[field.type] if field_type == FieldType.FILE: fed = await field.get_file_extracted_data() # type: ignore diff --git a/nucliadb/nucliadb/train/generators/sentence_classifier.py b/nucliadb/nucliadb/train/generators/sentence_classifier.py index 97335aae28..0be6e7b555 100644 --- a/nucliadb/nucliadb/train/generators/sentence_classifier.py +++ b/nucliadb/nucliadb/train/generators/sentence_classifier.py @@ -18,7 +18,7 @@ # along with this program. If not, see . # -from typing import AsyncGenerator, List +from typing import AsyncGenerator from fastapi import HTTPException from nucliadb_protos.dataset_pb2 import ( @@ -72,7 +72,7 @@ async def generate_sentence_classification_payloads( request.filter.labels.append(labelset) async for paragraph_item in node.stream_get_paragraphs(request): - text_labels: List[str] = [] + text_labels: list[str] = [] for label in paragraph_item.labels: for labelset in labelsets: if label.startswith(labelset): @@ -93,7 +93,7 @@ async def generate_sentence_classification_payloads( yield tl -async def get_sentences(kbid: str, result: str) -> List[str]: +async def get_sentences(kbid: str, result: str) -> list[str]: if result.count("/") == 4: rid, field_type, field, split_str, _ = result.split("/") split = int(split_str) diff --git a/nucliadb/nucliadb/train/generators/token_classifier.py b/nucliadb/nucliadb/train/generators/token_classifier.py index 90942bfa3f..162eac5d87 100644 --- a/nucliadb/nucliadb/train/generators/token_classifier.py +++ b/nucliadb/nucliadb/train/generators/token_classifier.py @@ -19,7 +19,7 @@ # from collections import OrderedDict -from typing import AsyncGenerator, Dict, List, Tuple, cast +from typing import AsyncGenerator, cast from nucliadb_protos.dataset_pb2 import ( TokenClassificationBatch, @@ -33,8 +33,8 @@ from nucliadb.train import logger from nucliadb.train.generators.utils import batchify, get_resource_from_cache_or_db -NERS_DICT = Dict[str, Dict[str, List[Tuple[int, int]]]] -POSITION_DICT = OrderedDict[Tuple[int, int], Tuple[str, str]] +NERS_DICT = dict[str, dict[str, list[tuple[int, int]]]] +POSITION_DICT = OrderedDict[tuple[int, int], tuple[str, str]] MAIN = "__main__" @@ -73,7 +73,7 @@ async def generate_token_classification_payloads( field_item.uuid, field, field_type, - cast(List[str], trainset.filter.labels), + cast(list[str], trainset.filter.labels), ) for split, text in split_text.items(): ners: POSITION_DICT = ordered_positions.get(split, OrderedDict()) @@ -89,8 +89,8 @@ async def generate_token_classification_payloads( async def get_field_text( - kbid: str, rid: str, field: str, field_type: str, valid_entity_groups: List[str] -) -> Tuple[Dict[str, str], Dict[str, POSITION_DICT], Dict[str, List[Tuple[int, int]]]]: + kbid: str, rid: str, field: str, field_type: str, valid_entity_groups: list[str] +) -> tuple[dict[str, str], dict[str, POSITION_DICT], dict[str, list[tuple[int, int]]]]: orm_resource = await get_resource_from_cache_or_db(kbid, rid) if orm_resource is None: @@ -106,16 +106,16 @@ async def get_field_text( ) return {}, {}, {} - split_text: Dict[str, str] = extracted_text.split_text + split_text: dict[str, str] = extracted_text.split_text split_text[MAIN] = extracted_text.text - split_ners: Dict[ + split_ners: dict[ str, NERS_DICT ] = {} # Dict of entity group , with entity and list of positions in field split_ners[MAIN] = {} basic_data = await orm_resource.get_basic() - invalid_tokens_split: Dict[str, List[Tuple[str, str, int, int]]] = {} + invalid_tokens_split: dict[str, list[tuple[str, str, int, int]]] = {} # Check user definition of entities if basic_data is not None: for userfieldmetadata in basic_data.fieldmetadata: @@ -189,9 +189,9 @@ async def get_field_text( if len(split_ners[split][token.klass]) == 0: del split_ners[split][token.klass] - ordered_positions: Dict[str, POSITION_DICT] = {} + ordered_positions: dict[str, POSITION_DICT] = {} for split, ners in split_ners.items(): - split_positions: Dict[Tuple[int, int], Tuple[str, str]] = {} + split_positions: dict[tuple[int, int], tuple[str, str]] = {} for entity_group, entities in ners.items(): for entity, positions in entities.items(): for position in positions: @@ -201,7 +201,7 @@ async def get_field_text( sorted(split_positions.items(), key=lambda x: x[0]) ) - split_paragraphs: Dict[str, List[Tuple[int, int]]] = {} + split_paragraphs: dict[str, list[tuple[int, int]]] = {} if field_metadata is not None: split_paragraphs[MAIN] = sorted( [(p.start, p.end) for p in field_metadata.metadata.paragraphs], @@ -249,7 +249,7 @@ def compute_segments(field_text: str, ners: POSITION_DICT, start: int, end: int) return segments -def process_entities(text: str, ners: POSITION_DICT, paragraphs: List[Tuple[int, int]]): +def process_entities(text: str, ners: POSITION_DICT, paragraphs: list[tuple[int, int]]): if len(paragraphs) > 0: for paragraph in paragraphs: segments = compute_segments( diff --git a/nucliadb/nucliadb/train/generators/utils.py b/nucliadb/nucliadb/train/generators/utils.py index 11929e4c28..c13e6b36c6 100644 --- a/nucliadb/nucliadb/train/generators/utils.py +++ b/nucliadb/nucliadb/train/generators/utils.py @@ -19,7 +19,7 @@ # from contextvars import ContextVar -from typing import Any, AsyncIterator, Dict, Optional +from typing import Any, AsyncIterator, Optional from nucliadb.ingest.orm.knowledgebox import KnowledgeBox as KnowledgeBoxORM from nucliadb.ingest.orm.resource import KB_REVERSE @@ -29,13 +29,13 @@ from nucliadb.train.types import TrainBatchType from nucliadb_utils.utilities import get_storage -rcache: ContextVar[Optional[Dict[str, ResourceORM]]] = ContextVar( +rcache: ContextVar[Optional[dict[str, ResourceORM]]] = ContextVar( "rcache", default=None ) -def get_resource_cache(clear: bool = False) -> Dict[str, ResourceORM]: - value: Optional[Dict[str, ResourceORM]] = rcache.get() +def get_resource_cache(clear: bool = False) -> dict[str, ResourceORM]: + value: Optional[dict[str, ResourceORM]] = rcache.get() if value is None or clear: value = {} rcache.set(value) diff --git a/nucliadb/nucliadb/train/nodes.py b/nucliadb/nucliadb/train/nodes.py index d2bdc20639..f98b4b5bca 100644 --- a/nucliadb/nucliadb/train/nodes.py +++ b/nucliadb/nucliadb/train/nodes.py @@ -17,7 +17,7 @@ # You should have received a copy of the GNU Affero General Public License # along with this program. If not, see . # -from typing import AsyncIterator, Optional, Tuple +from typing import AsyncIterator, Optional from nucliadb_protos.train_pb2 import ( GetFieldsRequest, @@ -46,7 +46,7 @@ def __init__(self, driver: Driver, storage: Storage): self.driver = driver self.storage = storage - async def get_reader(self, kbid: str, shard: str) -> Tuple[AbstractIndexNode, str]: + async def get_reader(self, kbid: str, shard: str) -> tuple[AbstractIndexNode, str]: shards = await self.get_shards_by_kbid_inner(kbid) try: shard_object: ShardObject = next( diff --git a/nucliadb/nucliadb/train/tests/test_token_classification.py b/nucliadb/nucliadb/train/tests/test_token_classification.py index 8543e304a6..bbcd57aaaa 100644 --- a/nucliadb/nucliadb/train/tests/test_token_classification.py +++ b/nucliadb/nucliadb/train/tests/test_token_classification.py @@ -18,7 +18,6 @@ # along with this program. If not, see . import asyncio -from typing import List import aiohttp import pytest @@ -64,7 +63,7 @@ async def test_generator_token_classification( data=trainset.SerializeToString(), ) as response: assert response.status == 200 - batches: List[TokenClassificationBatch] = [] + batches: list[TokenClassificationBatch] = [] async for batch in get_batches_from_train_response_stream( response, TokenClassificationBatch ): diff --git a/nucliadb/nucliadb/train/upload.py b/nucliadb/nucliadb/train/upload.py index 05350b428e..efa6230cd9 100644 --- a/nucliadb/nucliadb/train/upload.py +++ b/nucliadb/nucliadb/train/upload.py @@ -20,7 +20,7 @@ import argparse import asyncio from asyncio import tasks -from typing import Callable, List +from typing import Callable import pkg_resources @@ -40,11 +40,11 @@ def arg_parse(): parser.add_argument("-k", "--kb", dest="kb", help="Knowledge Box", required=True) -async def main() -> List[Callable]: +async def main() -> list[Callable]: parser = arg_parse() await start_upload(parser.request, parser.kb) - finalizers: List[Callable] = [] + finalizers: list[Callable] = [] return finalizers @@ -81,7 +81,7 @@ def run() -> None: raise RuntimeError("cannot be called from a running event loop") loop = asyncio.new_event_loop() - finalizers: List[Callable] = [] + finalizers: list[Callable] = [] try: asyncio.set_event_loop(loop) if running_settings.debug is not None: diff --git a/nucliadb/nucliadb/writer/api/v1/field.py b/nucliadb/nucliadb/writer/api/v1/field.py index 442cf542e8..f93ecdd09e 100644 --- a/nucliadb/nucliadb/writer/api/v1/field.py +++ b/nucliadb/nucliadb/writer/api/v1/field.py @@ -17,7 +17,7 @@ # You should have received a copy of the GNU Affero General Public License # along with this program. If not, see . # -from typing import TYPE_CHECKING, Dict, List, Optional, Tuple +from typing import TYPE_CHECKING, Optional from fastapi import HTTPException, Response from fastapi_versioning import version # type: ignore @@ -62,9 +62,9 @@ ) if TYPE_CHECKING: # pragma: no cover - FIELD_TYPE_NAME_TO_FIELD_TYPE_MAP: Dict[models.FieldTypeName, FieldType.V] + FIELD_TYPE_NAME_TO_FIELD_TYPE_MAP: dict[models.FieldTypeName, FieldType.V] else: - FIELD_TYPE_NAME_TO_FIELD_TYPE_MAP: Dict[models.FieldTypeName, int] + FIELD_TYPE_NAME_TO_FIELD_TYPE_MAP: dict[models.FieldTypeName, int] FIELD_TYPE_NAME_TO_FIELD_TYPE_MAP = { @@ -81,7 +81,7 @@ def prepare_field_put( kbid: str, rid: str, request: Request -) -> Tuple[BrokerMessage, PushPayload, int]: +) -> tuple[BrokerMessage, PushPayload, int]: partitioning = get_partitioning() partition = partitioning.generate_partition(kbid, rid) @@ -636,7 +636,7 @@ async def append_messages_to_conversation_field_rslug_prefix( kbid: str, rslug: str, field_id: str, - messages: List[models.InputMessage], + messages: list[models.InputMessage], x_synchronous: bool = SYNC_CALL, ) -> ResourceFieldAdded: return await _append_messages_to_conversation_field( @@ -658,7 +658,7 @@ async def append_messages_to_conversation_field_rid_prefix( kbid: str, rid: str, field_id: str, - messages: List[models.InputMessage], + messages: list[models.InputMessage], x_synchronous: bool = SYNC_CALL, ) -> ResourceFieldAdded: return await _append_messages_to_conversation_field( @@ -670,7 +670,7 @@ async def _append_messages_to_conversation_field( request: Request, kbid: str, field_id: str, - messages: List[models.InputMessage], + messages: list[models.InputMessage], x_synchronous: bool, rid: Optional[str] = None, rslug: Optional[str] = None, @@ -732,7 +732,7 @@ async def append_blocks_to_layout_field_rslug_prefix( kbid: str, rslug: str, field_id: str, - blocks: Dict[str, models.InputBlock], + blocks: dict[str, models.InputBlock], x_synchronous: bool = SYNC_CALL, ) -> ResourceFieldAdded: return await _append_blocks_to_layout_field( @@ -754,7 +754,7 @@ async def append_blocks_to_layout_field_rid_prefix( kbid: str, rid: str, field_id: str, - blocks: Dict[str, models.InputBlock], + blocks: dict[str, models.InputBlock], x_synchronous: bool = SYNC_CALL, ) -> ResourceFieldAdded: return await _append_blocks_to_layout_field( @@ -766,7 +766,7 @@ async def _append_blocks_to_layout_field( request: Request, kbid: str, field_id: str, - blocks: Dict[str, models.InputBlock], + blocks: dict[str, models.InputBlock], x_synchronous: bool, rid: Optional[str] = None, rslug: Optional[str] = None, diff --git a/nucliadb/nucliadb/writer/api/v1/upload.py b/nucliadb/nucliadb/writer/api/v1/upload.py index ea641d8ffd..28179c22f7 100644 --- a/nucliadb/nucliadb/writer/api/v1/upload.py +++ b/nucliadb/nucliadb/writer/api/v1/upload.py @@ -24,7 +24,7 @@ from datetime import datetime from hashlib import md5 from io import BytesIO -from typing import List, Optional +from typing import Optional from fastapi import HTTPException from fastapi.params import Header @@ -580,10 +580,10 @@ async def upload_rslug_prefix( kbid: str, rslug: str, field: str, - x_filename: Optional[List[str]] = Header(None), # type: ignore - x_password: Optional[List[str]] = Header(None), # type: ignore - x_language: Optional[List[str]] = Header(None), # type: ignore - x_md5: Optional[List[str]] = Header(None), # type: ignore + x_filename: Optional[list[str]] = Header(None), # type: ignore + x_password: Optional[list[str]] = Header(None), # type: ignore + x_language: Optional[list[str]] = Header(None), # type: ignore + x_md5: Optional[list[str]] = Header(None), # type: ignore x_synchronous: bool = Header(False), # type: ignore ) -> ResourceFileUploaded: return await _upload( @@ -613,10 +613,10 @@ async def upload_rid_prefix( kbid: str, path_rid: str, field: str, - x_filename: Optional[List[str]] = Header(None), # type: ignore - x_password: Optional[List[str]] = Header(None), # type: ignore - x_language: Optional[List[str]] = Header(None), # type: ignore - x_md5: Optional[List[str]] = Header(None), # type: ignore + x_filename: Optional[list[str]] = Header(None), # type: ignore + x_password: Optional[list[str]] = Header(None), # type: ignore + x_language: Optional[list[str]] = Header(None), # type: ignore + x_md5: Optional[list[str]] = Header(None), # type: ignore x_synchronous: bool = Header(False), # type: ignore ) -> ResourceFileUploaded: return await _upload( @@ -644,10 +644,10 @@ async def upload_rid_prefix( async def upload( request: StarletteRequest, kbid: str, - x_filename: Optional[List[str]] = Header(None), # type: ignore - x_password: Optional[List[str]] = Header(None), # type: ignore - x_language: Optional[List[str]] = Header(None), # type: ignore - x_md5: Optional[List[str]] = Header(None), # type: ignore + x_filename: Optional[list[str]] = Header(None), # type: ignore + x_password: Optional[list[str]] = Header(None), # type: ignore + x_language: Optional[list[str]] = Header(None), # type: ignore + x_md5: Optional[list[str]] = Header(None), # type: ignore x_synchronous: bool = Header(False), # type: ignore ) -> ResourceFileUploaded: return await _upload( @@ -668,10 +668,10 @@ async def _upload( path_rid: Optional[str] = None, rslug: Optional[str] = None, field: Optional[str] = None, - x_filename: Optional[List[str]] = Header(None), # type: ignore - x_password: Optional[List[str]] = Header(None), # type: ignore - x_language: Optional[List[str]] = Header(None), # type: ignore - x_md5: Optional[List[str]] = Header(None), # type: ignore + x_filename: Optional[list[str]] = Header(None), # type: ignore + x_password: Optional[list[str]] = Header(None), # type: ignore + x_language: Optional[list[str]] = Header(None), # type: ignore + x_md5: Optional[list[str]] = Header(None), # type: ignore x_synchronous: bool = Header(False), # type: ignore ) -> ResourceFileUploaded: if rslug is not None: diff --git a/nucliadb/nucliadb/writer/layouts/__init__.py b/nucliadb/nucliadb/writer/layouts/__init__.py index e9a49bc12f..2ad8697b26 100644 --- a/nucliadb/nucliadb/writer/layouts/__init__.py +++ b/nucliadb/nucliadb/writer/layouts/__init__.py @@ -17,14 +17,14 @@ # You should have received a copy of the GNU Affero General Public License # along with this program. If not, see . # -from typing import Any, Callable, Coroutine, Dict +from typing import Any, Callable, Coroutine from nucliadb_protos.resources_pb2 import FieldLayout import nucliadb_models as models from nucliadb_utils.storages.storage import Storage -VERSION: Dict[ +VERSION: dict[ int, Callable[ [models.InputLayoutField, str, str, str, Storage], diff --git a/nucliadb/nucliadb/writer/tests/fixtures.py b/nucliadb/nucliadb/writer/tests/fixtures.py index d89e772d8c..d934bb6eb8 100644 --- a/nucliadb/nucliadb/writer/tests/fixtures.py +++ b/nucliadb/nucliadb/writer/tests/fixtures.py @@ -18,7 +18,7 @@ # along with this program. If not, see . # from enum import Enum -from typing import AsyncIterator, Callable, List, Optional +from typing import AsyncIterator, Callable, Optional import pytest from httpx import AsyncClient @@ -51,14 +51,14 @@ async def writer_api( processing_utility, tus_manager, event_loop, -) -> AsyncIterator[Callable[[List[Enum], str, str], AsyncClient]]: +) -> AsyncIterator[Callable[[list[Enum], str, str], AsyncClient]]: nucliadb_settings.nucliadb_ingest = grpc_servicer.host from nucliadb.writer.app import create_application application = create_application() def make_client_fixture( - roles: Optional[List[Enum]] = None, + roles: Optional[list[Enum]] = None, user: str = "", version: str = "1", ) -> AsyncClient: diff --git a/nucliadb/nucliadb/writer/tests/test_files.py b/nucliadb/nucliadb/writer/tests/test_files.py index 6076cb0bff..a38d333027 100644 --- a/nucliadb/nucliadb/writer/tests/test_files.py +++ b/nucliadb/nucliadb/writer/tests/test_files.py @@ -21,7 +21,7 @@ import base64 import io import os -from typing import Callable, List +from typing import Callable import pytest from httpx import AsyncClient @@ -40,7 +40,7 @@ @pytest.mark.asyncio async def test_knowledgebox_file_tus_options( - writer_api: Callable[[List[NucliaDBRoles]], AsyncClient], knowledgebox_writer: str + writer_api: Callable[[list[NucliaDBRoles]], AsyncClient], knowledgebox_writer: str ): client: AsyncClient async with writer_api([NucliaDBRoles.WRITER]) as client: @@ -169,7 +169,7 @@ async def test_knowledgebox_file_tus_upload_root(writer_api, knowledgebox_writer @pytest.mark.asyncio async def test_knowledgebox_file_upload_root( - writer_api: Callable[[List[NucliaDBRoles]], AsyncClient], + writer_api: Callable[[list[NucliaDBRoles]], AsyncClient], knowledgebox_writer: str, ): async with writer_api([NucliaDBRoles.WRITER]) as client: @@ -225,7 +225,7 @@ async def test_knowledgebox_file_upload_root( @pytest.mark.asyncio async def test_knowledgebox_file_upload_root_headers( - writer_api: Callable[[List[NucliaDBRoles]], AsyncClient], + writer_api: Callable[[list[NucliaDBRoles]], AsyncClient], knowledgebox_writer: str, ): async with writer_api([NucliaDBRoles.WRITER]) as client: diff --git a/nucliadb/nucliadb/writer/tests/test_reprocess_file_field.py b/nucliadb/nucliadb/writer/tests/test_reprocess_file_field.py index fe1895ac82..7508e8117f 100644 --- a/nucliadb/nucliadb/writer/tests/test_reprocess_file_field.py +++ b/nucliadb/nucliadb/writer/tests/test_reprocess_file_field.py @@ -17,7 +17,7 @@ # You should have received a copy of the GNU Affero General Public License # along with this program. If not, see . # -from typing import AsyncIterator, Tuple +from typing import AsyncIterator from unittest.mock import AsyncMock import pytest @@ -48,7 +48,7 @@ def processing_mock(mocker): @pytest.mark.asyncio async def file_field( writer_api, knowledgebox_writer: str -) -> AsyncIterator[Tuple[str, str, str]]: +) -> AsyncIterator[tuple[str, str, str]]: kbid = knowledgebox_writer field_id = "myfile" @@ -107,7 +107,7 @@ async def test_reprocess_nonexistent_file_field( @pytest.mark.asyncio async def test_reprocess_file_field_with_password( - writer_api, file_field: Tuple[str, str, str], processing_mock + writer_api, file_field: tuple[str, str, str], processing_mock ): kbid, rid, field_id = file_field password = "secret-password" @@ -126,7 +126,7 @@ async def test_reprocess_file_field_with_password( @pytest.mark.asyncio async def test_reprocess_file_field_without_password( - writer_api, file_field: Tuple[str, str, str], processing_mock + writer_api, file_field: tuple[str, str, str], processing_mock ): kbid, rid, field_id = file_field diff --git a/nucliadb/nucliadb/writer/tests/test_resources.py b/nucliadb/nucliadb/writer/tests/test_resources.py index 8cb359bd98..8924645826 100644 --- a/nucliadb/nucliadb/writer/tests/test_resources.py +++ b/nucliadb/nucliadb/writer/tests/test_resources.py @@ -18,7 +18,7 @@ # along with this program. If not, see . # from datetime import datetime -from typing import Any, Callable, Dict, List, Optional +from typing import Any, Callable, Optional from unittest.mock import AsyncMock # type: ignore import pytest @@ -52,7 +52,7 @@ @pytest.mark.asyncio async def test_resource_crud_min( - writer_api: Callable[[List[str]], AsyncClient], knowledgebox_writer: str + writer_api: Callable[[list[str]], AsyncClient], knowledgebox_writer: str ): knowledgebox_id = knowledgebox_writer async with writer_api([NucliaDBRoles.WRITER]) as client: @@ -85,7 +85,7 @@ async def test_resource_crud_min( @pytest.mark.asyncio async def test_resource_crud_min_no_vectorset( - writer_api: Callable[[List[str]], AsyncClient], knowledgebox_writer: str + writer_api: Callable[[list[str]], AsyncClient], knowledgebox_writer: str ): knowledgebox_id = knowledgebox_writer async with writer_api([NucliaDBRoles.WRITER]) as client: @@ -113,7 +113,7 @@ async def test_resource_crud_min_no_vectorset( @pytest.mark.asyncio async def test_resource_crud( - writer_api: Callable[[List[str]], AsyncClient], knowledgebox_writer: str + writer_api: Callable[[list[str]], AsyncClient], knowledgebox_writer: str ): knowledgebox_id = knowledgebox_writer async with writer_api([NucliaDBRoles.WRITER]) as client: @@ -209,7 +209,7 @@ async def test_resource_crud( @pytest.mark.asyncio async def test_resource_crud_sync( - writer_api: Callable[[List[str]], AsyncClient], knowledgebox_writer: str + writer_api: Callable[[list[str]], AsyncClient], knowledgebox_writer: str ): knowledgebox_id = knowledgebox_writer async with writer_api([NucliaDBRoles.WRITER]) as client: @@ -387,11 +387,11 @@ async def test_reprocess_resource( ], ) async def test_resource_endpoints_by_slug( - writer_api: Callable[[List[str]], AsyncClient], + writer_api: Callable[[list[str]], AsyncClient], knowledgebox_ingest: str, method: str, endpoint: str, - payload: Optional[Dict[Any, Any]], + payload: Optional[dict[Any, Any]], ): async with writer_api([NucliaDBRoles.WRITER]) as client: slug = "my-resource" diff --git a/nucliadb/nucliadb/writer/tests/test_tus.py b/nucliadb/nucliadb/writer/tests/test_tus.py index a7d8365ed8..6ab3168f6c 100644 --- a/nucliadb/nucliadb/writer/tests/test_tus.py +++ b/nucliadb/nucliadb/writer/tests/test_tus.py @@ -19,7 +19,6 @@ # import tempfile import uuid -from typing import Dict import asyncpg import pytest @@ -153,7 +152,7 @@ async def storage_test(storage: BlobStore, file_storage_manager: FileStorageMana rid = "myrid" kbid = "mykb_tus_test" - metadata: Dict[str, str] = {} + metadata: dict[str, str] = {} bucket_name = storage.get_bucket_name(kbid) assert bucket_name in [ "test_mykb_tus_test", diff --git a/nucliadb/nucliadb/writer/tus/dm.py b/nucliadb/nucliadb/writer/tus/dm.py index 7ced8410a3..c7e03e9282 100644 --- a/nucliadb/nucliadb/writer/tus/dm.py +++ b/nucliadb/nucliadb/writer/tus/dm.py @@ -18,7 +18,7 @@ # along with this program. If not, see . # import time -from typing import Any, Dict, Optional +from typing import Any, Optional import orjson from redis import asyncio as aioredis @@ -33,11 +33,11 @@ class NoRedisConfigured(Exception): pass -DATA: Dict[str, Any] = {} +DATA: dict[str, Any] = {} class FileDataManager: - _data: Optional[Dict[str, Any]] = None + _data: Optional[dict[str, Any]] = None _loaded = False key = None _ttl = 60 * 50 * 5 # 5 minutes should be plenty of time between activity diff --git a/nucliadb/nucliadb/writer/tus/gcs.py b/nucliadb/nucliadb/writer/tus/gcs.py index ed7e77ae14..45fbdebc11 100644 --- a/nucliadb/nucliadb/writer/tus/gcs.py +++ b/nucliadb/nucliadb/writer/tus/gcs.py @@ -28,7 +28,7 @@ from concurrent.futures import ThreadPoolExecutor from copy import deepcopy from datetime import datetime -from typing import AsyncIterator, Dict, Optional +from typing import AsyncIterator, Optional from urllib.parse import quote_plus import aiohttp @@ -353,7 +353,7 @@ async def finish(self, dm: FileDataManager): await dm.finish() return path - async def iter_data(self, uri, kbid: str, headers: Optional[Dict[str, str]] = None): + async def iter_data(self, uri, kbid: str, headers: Optional[dict[str, str]] = None): if self.storage.session is None: raise AttributeError() if headers is None: diff --git a/nucliadb/nucliadb/writer/tus/s3.py b/nucliadb/nucliadb/writer/tus/s3.py index 04baadab44..8999653b99 100644 --- a/nucliadb/nucliadb/writer/tus/s3.py +++ b/nucliadb/nucliadb/writer/tus/s3.py @@ -21,7 +21,7 @@ import uuid from contextlib import AsyncExitStack -from typing import AsyncIterator, Dict, Optional +from typing import AsyncIterator, Optional import aiobotocore # type: ignore import aiohttp @@ -142,7 +142,7 @@ async def _download(self, uri: str, kbid: str, **kwargs): ) async def iter_data( - self, uri: str, kbid: str, headers: Optional[Dict[str, str]] = None + self, uri: str, kbid: str, headers: Optional[dict[str, str]] = None ): if headers is None: headers = {} diff --git a/nucliadb/nucliadb/writer/tus/storage.py b/nucliadb/nucliadb/writer/tus/storage.py index 6e9c3746f5..ccbed75c8a 100644 --- a/nucliadb/nucliadb/writer/tus/storage.py +++ b/nucliadb/nucliadb/writer/tus/storage.py @@ -19,7 +19,7 @@ # from __future__ import annotations -from typing import AsyncIterator, Dict, Optional +from typing import AsyncIterator, Optional from lru import LRU # type: ignore from nucliadb_protos.resources_pb2 import CloudFile @@ -65,7 +65,7 @@ def read_range( raise NotImplementedError() def iter_data( - self, uri: str, kbid: str, headers: Optional[Dict[str, str]] = None + self, uri: str, kbid: str, headers: Optional[dict[str, str]] = None ) -> AsyncIterator[bytes]: raise NotImplementedError() diff --git a/nucliadb/nucliadb/writer/tus/utils.py b/nucliadb/nucliadb/writer/tus/utils.py index eb07c90aba..079e135d74 100644 --- a/nucliadb/nucliadb/writer/tus/utils.py +++ b/nucliadb/nucliadb/writer/tus/utils.py @@ -20,7 +20,6 @@ import base64 import binascii import re -from typing import Dict from nucliadb.writer.tus.exceptions import InvalidTUSMetadata @@ -35,7 +34,7 @@ def to_str(value): match_b64 = re.compile(r"[^-A-Za-z0-9+/=]|=[^=]|={3,}$") -def parse_tus_metadata(header: str) -> Dict: +def parse_tus_metadata(header: str) -> dict: """ https://tus.io/protocols/resumable-upload.html#upload-metadata """ diff --git a/nucliadb_client/setup.py b/nucliadb_client/setup.py index 0967fbe0c1..7b58f79215 100644 --- a/nucliadb_client/setup.py +++ b/nucliadb_client/setup.py @@ -29,6 +29,7 @@ def load_reqs(filename): classifiers=[ "Development Status :: 4 - Beta", "Programming Language :: Python", + "Programming Language :: Python :: 3.8", "Programming Language :: Python :: 3.9", "Programming Language :: Python :: 3.10", "Programming Language :: Python :: 3.11", diff --git a/nucliadb_dataset/setup.py b/nucliadb_dataset/setup.py index c42e14b3d4..676748fbbe 100644 --- a/nucliadb_dataset/setup.py +++ b/nucliadb_dataset/setup.py @@ -37,6 +37,7 @@ def load_reqs(filename): "Intended Audience :: Information Technology", "License :: OSI Approved :: GNU Affero General Public License v3", "Programming Language :: Python", + "Programming Language :: Python :: 3.8", "Programming Language :: Python :: 3.9", "Programming Language :: Python :: 3.10", "Programming Language :: Python :: 3.11", diff --git a/nucliadb_sdk/setup.py b/nucliadb_sdk/setup.py index d53d522931..b13aead4cd 100644 --- a/nucliadb_sdk/setup.py +++ b/nucliadb_sdk/setup.py @@ -33,6 +33,7 @@ def load_reqs(filename): "Topic :: Software Development :: Libraries :: Python Modules", "License :: OSI Approved :: GNU Affero General Public License v3 or later (AGPLv3+)", "Programming Language :: Python", + "Programming Language :: Python :: 3.8", "Programming Language :: Python :: 3.9", "Programming Language :: Python :: 3.10", "Programming Language :: Python :: 3.11", diff --git a/nucliadb_telemetry/setup.py b/nucliadb_telemetry/setup.py index 1193f0320c..8be2907ec9 100644 --- a/nucliadb_telemetry/setup.py +++ b/nucliadb_telemetry/setup.py @@ -61,6 +61,7 @@ def load_reqs(filename): "License :: OSI Approved :: GNU Affero General Public License v3 or later (AGPLv3+)", "Framework :: AsyncIO", "Programming Language :: Python", + "Programming Language :: Python :: 3.8", "Programming Language :: Python :: 3.9", "Programming Language :: Python :: 3.10", "Programming Language :: Python :: 3.11", diff --git a/nucliadb_utils/setup.py b/nucliadb_utils/setup.py index 3cf5fba8cc..1e9d1f821c 100644 --- a/nucliadb_utils/setup.py +++ b/nucliadb_utils/setup.py @@ -48,6 +48,7 @@ def load_extra(sections: List[str]): classifiers=[ "Development Status :: 4 - Beta", "Programming Language :: Python", + "Programming Language :: Python :: 3.8", "Programming Language :: Python :: 3.9", "Programming Language :: Python :: 3.10", "Programming Language :: Python :: 3.11",