From 02713f2112baf9efcadfa7ce12530a5667a2d495 Mon Sep 17 00:00:00 2001 From: Hagen Wierstorf Date: Thu, 1 Feb 2024 14:40:05 +0100 Subject: [PATCH] Fix load tests --- audb/core/dependencies.py | 23 +++++++++++++++-------- audb/core/publish.py | 9 ++++++--- tests/test_load.py | 3 +-- 3 files changed, 22 insertions(+), 13 deletions(-) diff --git a/audb/core/dependencies.py b/audb/core/dependencies.py index d30a8454..8b75e66a 100644 --- a/audb/core/dependencies.py +++ b/audb/core/dependencies.py @@ -401,7 +401,11 @@ def save(self, path: str): columns = self._table.column_names columns[0] = "" table = self._table.rename_columns(columns) - csv.write_csv(table, path) + csv.write_csv( + table, + path, + write_options=csv.WriteOptions(quoting_style="none"), + ) elif path.endswith("parquet"): parquet.write_table(self._table, path) @@ -574,13 +578,20 @@ def _table_add_rows( self, rows: typing.Sequence[typing.Dict[str, typing.Any]], ): - r"""Add rows. + r"""Add or replace rows. Args: rows: list of tuples, where each tuple holds the values of a new row """ + # Remove rows with matching `"file"` + if self._table is not None: + files = [row["file"] for row in rows] + mask = dataset.field("file").isin(files) + table = self._table.filter(~mask) + self._table_replace(table) + # Append new rows table = pa.Table.from_pylist(rows, schema=self._schema) self._table_append(table) @@ -596,6 +607,7 @@ def _table_append( """ if self._table is not None: table = pa.concat_tables([self._table, table]) + table = table.combine_chunks() self._table_replace(table) def _table_column( @@ -721,12 +733,7 @@ def update_version(row): rows = [ update_version(self._table_row(file, raise_error=True)) for file in files ] - # Remove all selected files - mask = dataset.field("file").isin(files) - self._table = self._table.filter(~mask) - # Add updates as new entries - table = pa.Table.from_pylist(rows, schema=self._schema) - self._table_append(table) + self._table_add_rows(rows) def error_message_missing_object( diff --git a/audb/core/publish.py b/audb/core/publish.py index c4576625..5e517b62 100644 --- a/audb/core/publish.py +++ b/audb/core/publish.py @@ -184,15 +184,13 @@ def _find_media( if deps._table is not None: removed_files = set(deps.media) - db_media if len(removed_files) > 0: - print(f'PUBLISH: {removed_files=}') - print(f'PUBLISH: {deps()=}') + print(f"PUBLISH: removed media '{removed_files=}'") mask = dataset.field("file").isin(removed_files) media_archives = set( deps._table.filter(mask).column("archive").to_pylist() ) table = deps._table.filter(~mask) deps._table_replace(table) - print(f'PUBLISH: {deps()=}') # limit to relevant media db_media_in_root = db_media.intersection(db_root_files) @@ -262,6 +260,8 @@ def job(file): if not deps.removed(file) and deps.version(file) == version: media_archives.add(deps.archive(file)) + print(f"PUBLISH: media archives to be updated '{media_archives}'") + return media_archives @@ -279,6 +279,7 @@ def _find_tables( db_tables = [f"db.{table}.csv" for table in list(db)] removed_tables = set(deps.tables) - set(db_tables) if len(removed_tables) > 0: + print(f"PUBLISH: removed table '{removed_tables=}'") mask = dataset.field("file").isin(removed_tables) table = deps._table.filter(~mask) deps._table_replace(table) @@ -295,6 +296,8 @@ def _find_tables( deps._add_meta(file, version, table, checksum) tables.append(table) + print(f"PUBLISH: tables to be updated '{tables}'") + return tables diff --git a/tests/test_load.py b/tests/test_load.py index a0883231..3ff46f73 100644 --- a/tests/test_load.py +++ b/tests/test_load.py @@ -250,7 +250,7 @@ def dbs(tmpdir_factory, persistent_repository): ) print(deps()) - assert False + # assert False return paths @@ -372,7 +372,6 @@ def test_load(dbs, format, version, only_metadata): # Assert all files are listed in dependency table deps = audb.dependencies(DB_NAME, version=version) - assert str(deps().to_string()) == str(deps) assert len(deps) == ( len(db.files) + len(db.tables) + len(db.misc_tables) + len(db.attachments) )