diff --git a/nucliadb_utils/nucliadb_utils/storages/pg.py b/nucliadb_utils/nucliadb_utils/storages/pg.py index 14e0433660..9f0c628736 100644 --- a/nucliadb_utils/nucliadb_utils/storages/pg.py +++ b/nucliadb_utils/nucliadb_utils/storages/pg.py @@ -20,6 +20,7 @@ from __future__ import annotations import asyncio +import logging import uuid from typing import Any, AsyncIterator, Optional, TypedDict @@ -29,6 +30,8 @@ from nucliadb_utils.storages import CHUNK_SIZE from nucliadb_utils.storages.storage import Storage, StorageField +logger = logging.getLogger(__name__) + # Table design notes # - No foreign key constraints ON PURPOSE # - No cascade handling ON PURPOSE @@ -497,12 +500,18 @@ async def finish(self): await dl.delete_file(self.bucket, self.field.uri) if self.field.upload_uri != self.key: - await dl.move( - origin_key=self.field.upload_uri, - destination_key=self.key, - origin_kb=self.field.bucket_name, - destination_kb=self.bucket, - ) + try: + await dl.move( + origin_key=self.field.upload_uri, + destination_key=self.key, + origin_kb=self.field.bucket_name, + destination_kb=self.bucket, + ) + except Exception: + logger.exception( + f"Error moving file {self.field.bucket_name}://{self.field.upload_uri} -> {self.bucket}://{self.key}" + ) + raise self.field.uri = self.key self.field.ClearField("offset") diff --git a/nucliadb_utils/nucliadb_utils/tests/conftest.py b/nucliadb_utils/nucliadb_utils/tests/conftest.py index 1d955aeae8..05a33cb06c 100644 --- a/nucliadb_utils/nucliadb_utils/tests/conftest.py +++ b/nucliadb_utils/nucliadb_utils/tests/conftest.py @@ -20,6 +20,7 @@ import asyncpg import pytest +from pytest_docker_fixtures import images # type: ignore from pytest_lazy_fixtures import lazy_fixture from nucliadb_utils.storages.pg import PostgresStorage @@ -34,6 +35,17 @@ "nucliadb_utils.tests.local", ] +images.settings["postgresql"].update( + { + "version": "16.1", + "env": { + "POSTGRES_PASSWORD": "postgres", + "POSTGRES_DB": "postgres", + "POSTGRES_USER": "postgres", + }, + } +) + @pytest.fixture(scope="function") async def pg_storage(pg): diff --git a/nucliadb_utils/nucliadb_utils/tests/integration/storages/test_pg.py b/nucliadb_utils/nucliadb_utils/tests/integration/storages/test_pg.py new file mode 100644 index 0000000000..73cc548459 --- /dev/null +++ b/nucliadb_utils/nucliadb_utils/tests/integration/storages/test_pg.py @@ -0,0 +1,122 @@ +# Copyright (C) 2021 Bosutech XXI S.L. +# +# nucliadb is offered under the AGPL v3.0 and as commercial software. +# For commercial licensing, contact us at info@nuclia.com. +# +# AGPL: +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as +# published by the Free Software Foundation, either version 3 of the +# License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see . +import pytest + +from nucliadb_utils.storages.pg import PostgresFileDataLayer, PostgresStorage + +pytestmark = pytest.mark.asyncio + + +class TestPostgresFileDataLayer: + @pytest.fixture() + async def data_layer(self, pg_storage: PostgresStorage): + async with pg_storage.pool.acquire() as conn: + yield PostgresFileDataLayer(conn) + + async def test_move_file(self, data_layer: PostgresFileDataLayer): + await data_layer.create_file( + kb_id="kb_id", + file_id="file_id", + filename="filename", + size=5, + content_type="content_type", + ) + await data_layer.append_chunk(kb_id="kb_id", file_id="file_id", data=b"12345") + await data_layer.move( + origin_key="file_id", + destination_key="new_file_id", + origin_kb="kb_id", + destination_kb="kb_id", + ) + + assert ( + await data_layer.get_file_info( + kb_id="kb_id", + file_id="file_id", + ) + is None + ) + assert ( + await data_layer.get_file_info( + kb_id="kb_id", + file_id="new_file_id", + ) + is not None + ) + + assert ( + b"".join( + [ + chunk["data"] + async for chunk in data_layer.iterate_chunks("kb_id", "new_file_id") + ] + ) + == b"12345" + ) + + async def test_move_file_overwrites(self, data_layer: PostgresFileDataLayer): + await data_layer.create_file( + kb_id="kb_id", + file_id="file_id1", + filename="filename", + size=5, + content_type="content_type", + ) + await data_layer.append_chunk(kb_id="kb_id", file_id="file_id1", data=b"12345") + await data_layer.create_file( + kb_id="kb_id", + file_id="file_id2", + filename="filename", + size=5, + content_type="content_type", + ) + await data_layer.append_chunk(kb_id="kb_id", file_id="file_id2", data=b"67890") + + await data_layer.move( + origin_key="file_id2", + destination_key="file_id1", + origin_kb="kb_id", + destination_kb="kb_id", + ) + + assert ( + await data_layer.get_file_info( + kb_id="kb_id", + file_id="file_id2", + ) + is None + ) + assert ( + await data_layer.get_file_info( + kb_id="kb_id", + file_id="file_id1", + ) + is not None + ) + + assert ( + b"".join( + [ + chunk["data"] + async for chunk in data_layer.iterate_chunks("kb_id", "file_id1") + ] + ) + == b"67890" + ) + assert True