audeering · hagenw · May 8, 2024 · Apr 29, 2024 · Apr 29, 2024 · Apr 29, 2024
diff --git a/audb/core/load.py b/audb/core/load.py
@@ -459,13 +459,18 @@ def job(archive: str, version: str):
                 src_path = os.path.join(db_root_tmp, file)
                 file = flavor.destination(file)
                 dst_path = os.path.join(db_root_tmp, file)
-                flavor(
-                    src_path,
-                    dst_path,
-                    src_bit_depth=bit_depth,
-                    src_channels=channels,
-                    src_sampling_rate=sampling_rate,
-                )
+                try:
+                    flavor(
+                        src_path,
+                        dst_path,
+                        src_bit_depth=bit_depth,
+                        src_channels=channels,
+                        src_sampling_rate=sampling_rate,
+                    )
+                except RuntimeError:
+                    raise RuntimeError(
+                        f"Media file '{file}' does not support requesting a flavor."
+                    )
                 if src_path != dst_path:
                     os.remove(src_path)
 
@@ -1001,6 +1006,10 @@ def load(
             ``format``,
             or ``sampling_rate``
             is requested
+        RuntimeError: if a flavor is requested,
+            but the database contains media files,
+            that don't contain audio,
+            e.g. text files
 
     Examples:
         >>> db = audb.load(

diff --git a/audb/core/publish.py b/audb/core/publish.py
@@ -300,9 +300,38 @@ def _media_values(
     archive: str,
     checksum: str,
 ) -> typing.Tuple[str, str, int, int, str, float, str, int, float, int, str]:
-    r"""Return values of a media entry in dependencies."""
+    r"""Return values of a media entry in dependencies.
+
+    The dependency table expects the following columns:
+
+    * file
+    * archive
+    * bit depth
+    * channels
+    * checksum
+    * duration
+    * format
+    * removed
+    * sampling rate
+    * dependency type
+    * version
+
+    Args:
+        root: root of database
+        file: relative media file path
+        version: database version
+        archive: archive the media file is stored in
+        checksum: checksum of the media file
+
+    Returns:
+        row to be added to the dependency table as tuple
+
+    """
+    dependency_type = define.DependType.MEDIA
     format = audeer.file_extension(file).lower()
+    removed = 0
 
+    # Inspect media file to get audio/video metadata
     try:
         path = os.path.join(root, file)
         bit_depth = audiofile.bit_depth(path)
@@ -318,6 +347,14 @@ def _media_values(
             f"sox and mediainfo have to be installed "
             f"to publish '{format}' media files."
         )
+    except RuntimeError:
+        # Skip audio/video metadata for media files,
+        # that don't support them
+        # (e.g. text files)
+        bit_depth = 0
+        channels = 0
+        duration = 0.0
+        sampling_rate = 0
 
     return (
         file,
@@ -327,9 +364,9 @@ def _media_values(
         checksum,
         duration,
         format,
-        0,  # removed
+        removed,
         sampling_rate,
-        define.DependType.MEDIA,
+        dependency_type,
         version,
     )
 

diff --git a/tests/test_publish.py b/tests/test_publish.py
@@ -45,6 +45,7 @@ def dbs(tmpdir_factory):
     #
     # tables:
     #   - emotion
+    #   - files
     # misc tables:
     #   - misc-in-scheme
     #   - misc-not-in-scheme
@@ -133,6 +134,7 @@ def dbs(tmpdir_factory):
     #
     # tables:
     #   - emotion
+    #   - files
     # misc tables:
     #   - misc-in-scheme
     #   - misc-not-in-scheme
@@ -179,6 +181,7 @@ def dbs(tmpdir_factory):
     #
     # tables:
     #   - emotion
+    #   - files
     # misc tables:
     #   - misc-in-scheme
     #   - misc-not-in-scheme
@@ -208,6 +211,7 @@ def dbs(tmpdir_factory):
     #
     # tables:
     #   - emotion
+    #   - files
     # misc tables:
     #   - misc-in-scheme
     #   - misc-not-in-scheme
@@ -232,6 +236,7 @@ def dbs(tmpdir_factory):
     #
     # tables:
     #   - emotion
+    #   - files
     # misc tables:
     #   - misc-in-scheme
     #   - misc-not-in-scheme
@@ -267,6 +272,7 @@ def dbs(tmpdir_factory):
     #
     # tables:
     #   - emotion
+    #   - files
     # misc tables:
     #   - misc-in-scheme
     #   - misc-not-in-scheme
@@ -1058,6 +1064,85 @@ def test_publish_error_version(tmpdir, repository):
         audb.publish(db_path, "2.0.0", repository, previous_version="1.0.0?")
 
 
+def test_publish_text_media_files(tmpdir, dbs, repository):
+    r"""Test publishing databases containing text files as media files."""
+    # Create a database, containing text media file
+    build_dir = audeer.path(tmpdir, "./build")
+    audeer.mkdir(build_dir)
+    data_dir = audeer.mkdir(build_dir, "data")
+    with open(audeer.path(data_dir, "file1.txt"), "w") as file:
+        file.write("Text written by a person.\n")
+    name = "text-db"
+    db = audformat.Database(name)
+    db.schemes["speaker"] = audformat.Scheme("str")
+    index = audformat.filewise_index(["data/file1.txt"])
+    db["files"] = audformat.Table(index)
+    db["files"]["speaker"] = audformat.Column(scheme_id="speaker")
+    db["files"]["speaker"].set(["adam"])
+    db.save(build_dir)
+
+    # Publish database, containing text media file
+    version = "1.0.0"
+    deps = audb.publish(build_dir, version, repository)
+
+    assert deps.tables == ["db.files.csv"]
+    file = "data/file1.txt"
+    assert deps.media == [file]
+    assert deps.bit_depth(file) == 0
+    assert deps.channels(file) == 0
+    assert deps.duration(file) == 0.0
+    assert deps.format(file) == "txt"
+    assert deps.sampling_rate(file) == 0
+
+    db = audb.load(name, version=version, verbose=False, full_path=False)
+    assert db.files == [file]
+    assert list(db) == ["files"]
+    assert os.path.exists(audeer.path(db.root, file))
+
+    error_msg = f"Media file '{file}' does not support requesting a flavor."
+    with pytest.raises(RuntimeError, match=error_msg):
+        db = audb.load(name, version=version, channels=[0], verbose=False)
+
+    # Publish database, containing text and media files
+    audeer.rmdir(build_dir)
+    shutil.copytree(dbs["1.0.0"], build_dir)  # start with db containing audio files
+    db = audformat.Database.load(build_dir)
+    speaker = db["files"]["speaker"].get()
+    files = list(db.files)
+    tables = list(db)
+    data_dir = audeer.mkdir(build_dir, "data")
+    with open(audeer.path(data_dir, "file1.txt"), "w") as file:
+        file.write("Text written by a person.\n")
+    index = audformat.filewise_index(["data/file1.txt"])
+    db["files"].extend_index(index, inplace=True)
+    db["files"]["speaker"] = audformat.Column(scheme_id="speaker")
+    db["files"]["speaker"].set(list(speaker.values) + ["adam"])
+    db.name = name
+    db.save(build_dir)
+
+    # Publish database, containing text media file
+    version = "2.0.0"
+    deps = audb.publish(build_dir, version, repository, previous_version=None)
+
+    assert deps.table_ids == tables
+    file = "data/file1.txt"
+    assert deps.media == files + [file]
+    assert deps.bit_depth(file) == 0
+    assert deps.channels(file) == 0
+    assert deps.duration(file) == 0.0
+    assert deps.format(file) == "txt"
+    assert deps.sampling_rate(file) == 0
+
+    db = audb.load(name, version=version, verbose=False, full_path=False)
+    assert db.files == files + [file]
+    assert list(db) == tables
+    assert os.path.exists(audeer.path(db.root, file))
+
+    error_msg = f"Media file '{file}' does not support requesting a flavor."
+    with pytest.raises(RuntimeError, match=error_msg):
+        db = audb.load(name, version=version, channels=[0], verbose=False)
+
+
 def test_update_database(dbs, persistent_repository):
     version = "2.1.0"
     start_version = "2.0.0"