Skip to content

Commit

Permalink
add pg storage tests (#1669)
Browse files Browse the repository at this point in the history
* add pg storage tests

* log erro

* fix
  • Loading branch information
vangheem authored Dec 14, 2023
1 parent 8e7164e commit 40663e4
Show file tree
Hide file tree
Showing 3 changed files with 149 additions and 6 deletions.
21 changes: 15 additions & 6 deletions nucliadb_utils/nucliadb_utils/storages/pg.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
from __future__ import annotations

import asyncio
import logging
import uuid
from typing import Any, AsyncIterator, Optional, TypedDict

Expand All @@ -29,6 +30,8 @@
from nucliadb_utils.storages import CHUNK_SIZE
from nucliadb_utils.storages.storage import Storage, StorageField

logger = logging.getLogger(__name__)

# Table design notes
# - No foreign key constraints ON PURPOSE
# - No cascade handling ON PURPOSE
Expand Down Expand Up @@ -497,12 +500,18 @@ async def finish(self):
await dl.delete_file(self.bucket, self.field.uri)

if self.field.upload_uri != self.key:
await dl.move(
origin_key=self.field.upload_uri,
destination_key=self.key,
origin_kb=self.field.bucket_name,
destination_kb=self.bucket,
)
try:
await dl.move(
origin_key=self.field.upload_uri,
destination_key=self.key,
origin_kb=self.field.bucket_name,
destination_kb=self.bucket,
)
except Exception:
logger.exception(
f"Error moving file {self.field.bucket_name}://{self.field.upload_uri} -> {self.bucket}://{self.key}"
)
raise

self.field.uri = self.key
self.field.ClearField("offset")
Expand Down
12 changes: 12 additions & 0 deletions nucliadb_utils/nucliadb_utils/tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@

import asyncpg
import pytest
from pytest_docker_fixtures import images # type: ignore
from pytest_lazy_fixtures import lazy_fixture

from nucliadb_utils.storages.pg import PostgresStorage
Expand All @@ -34,6 +35,17 @@
"nucliadb_utils.tests.local",
]

images.settings["postgresql"].update(
{
"version": "16.1",
"env": {
"POSTGRES_PASSWORD": "postgres",
"POSTGRES_DB": "postgres",
"POSTGRES_USER": "postgres",
},
}
)


@pytest.fixture(scope="function")
async def pg_storage(pg):
Expand Down
122 changes: 122 additions & 0 deletions nucliadb_utils/nucliadb_utils/tests/integration/storages/test_pg.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,122 @@
# Copyright (C) 2021 Bosutech XXI S.L.
#
# nucliadb is offered under the AGPL v3.0 and as commercial software.
# For commercial licensing, contact us at [email protected].
#
# AGPL:
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as
# published by the Free Software Foundation, either version 3 of the
# License, or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU Affero General Public License for more details.
#
# You should have received a copy of the GNU Affero General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
import pytest

from nucliadb_utils.storages.pg import PostgresFileDataLayer, PostgresStorage

pytestmark = pytest.mark.asyncio


class TestPostgresFileDataLayer:
@pytest.fixture()
async def data_layer(self, pg_storage: PostgresStorage):
async with pg_storage.pool.acquire() as conn:
yield PostgresFileDataLayer(conn)

async def test_move_file(self, data_layer: PostgresFileDataLayer):
await data_layer.create_file(
kb_id="kb_id",
file_id="file_id",
filename="filename",
size=5,
content_type="content_type",
)
await data_layer.append_chunk(kb_id="kb_id", file_id="file_id", data=b"12345")
await data_layer.move(
origin_key="file_id",
destination_key="new_file_id",
origin_kb="kb_id",
destination_kb="kb_id",
)

assert (
await data_layer.get_file_info(
kb_id="kb_id",
file_id="file_id",
)
is None
)
assert (
await data_layer.get_file_info(
kb_id="kb_id",
file_id="new_file_id",
)
is not None
)

assert (
b"".join(
[
chunk["data"]
async for chunk in data_layer.iterate_chunks("kb_id", "new_file_id")
]
)
== b"12345"
)

async def test_move_file_overwrites(self, data_layer: PostgresFileDataLayer):
await data_layer.create_file(
kb_id="kb_id",
file_id="file_id1",
filename="filename",
size=5,
content_type="content_type",
)
await data_layer.append_chunk(kb_id="kb_id", file_id="file_id1", data=b"12345")
await data_layer.create_file(
kb_id="kb_id",
file_id="file_id2",
filename="filename",
size=5,
content_type="content_type",
)
await data_layer.append_chunk(kb_id="kb_id", file_id="file_id2", data=b"67890")

await data_layer.move(
origin_key="file_id2",
destination_key="file_id1",
origin_kb="kb_id",
destination_kb="kb_id",
)

assert (
await data_layer.get_file_info(
kb_id="kb_id",
file_id="file_id2",
)
is None
)
assert (
await data_layer.get_file_info(
kb_id="kb_id",
file_id="file_id1",
)
is not None
)

assert (
b"".join(
[
chunk["data"]
async for chunk in data_layer.iterate_chunks("kb_id", "file_id1")
]
)
== b"67890"
)
assert True

1 comment on commit 40663e4

@github-actions
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Benchmark

Benchmark suite Current: 40663e4 Previous: 5a633b0 Ratio
nucliadb/search/tests/unit/search/test_fetch.py::test_highligh_error 13225.055940695998 iter/sec (stddev: 5.940716741905013e-7) 12745.686329086004 iter/sec (stddev: 1.7317806991721728e-7) 0.96

This comment was automatically generated by workflow using github-action-benchmark.

Please sign in to comment.