From d6e9c7fba98d0e36090b4c803d3dfd821b5859f4 Mon Sep 17 00:00:00 2001
From: Hagen Wierstorf <hwierstorf@audeering.com>
Date: Mon, 29 Apr 2024 09:05:04 +0200
Subject: [PATCH 1/6] Add support for TXT files as media files

---
 audb/core/publish.py  | 79 ++++++++++++++++++++++++++++++++-----------
 tests/test_publish.py | 36 ++++++++++++++++++++
 2 files changed, 96 insertions(+), 19 deletions(-)

diff --git a/audb/core/publish.py b/audb/core/publish.py
index 76d70d25..48aca693 100644
--- a/audb/core/publish.py
+++ b/audb/core/publish.py
@@ -300,24 +300,65 @@ def _media_values(
     archive: str,
     checksum: str,
 ) -> typing.Tuple[str, str, int, int, str, float, str, int, float, int, str]:
-    r"""Return values of a media entry in dependencies."""
-    format = audeer.file_extension(file).lower()
+    r"""Return values of a media entry in dependencies.
+
+    The dependency table expects the following columns:
+
+    * file
+    * archive
+    * bit depth
+    * channels
+    * checksum
+    * duration
+    * format
+    * removed
+    * sampling rate
+    * dependency type
+    * version
 
-    try:
-        path = os.path.join(root, file)
-        bit_depth = audiofile.bit_depth(path)
-        if bit_depth is None:  # pragma: nocover (non SND files)
-            bit_depth = 0
-        channels = audiofile.channels(path)
-        duration = audiofile.duration(path, sloppy=True)
-        sampling_rate = audiofile.sampling_rate(path)
-    except FileNotFoundError:  # pragma: nocover
-        # If sox or mediafile are not installed
-        # we get a FileNotFoundError error
-        raise RuntimeError(
-            f"sox and mediainfo have to be installed "
-            f"to publish '{format}' media files."
-        )
+    Args:
+        root: root of database
+        file: relative media file path
+        version: database version
+        archive: archive the media file is stored in
+        checksum: checksum of the media file
+
+    Returns:
+        Tuple with a row to be added to the dependency table,
+        containing entries for `
+
+    """
+    # Allow media file extensions,
+    # that do not support audio/video metadata
+    # (e.g. channels, sampling rate)
+    special_formats = ["txt"]
+
+    dependency_type = define.DependType.MEDIA
+    format = audeer.file_extension(file).lower()
+    removed = 0
+
+    if format in special_formats:
+        bit_depth = 0
+        channels = 0
+        duration = 0.0
+        sampling_rate = 0
+    else:
+        # Inspect media file to get audio/video metadata
+        try:
+            path = os.path.join(root, file)
+            bit_depth = audiofile.bit_depth(path)
+            if bit_depth is None:  # pragma: nocover (non SND files)
+                bit_depth = 0
+            channels = audiofile.channels(path)
+            duration = audiofile.duration(path, sloppy=True)
+            sampling_rate = audiofile.sampling_rate(path)
+        except FileNotFoundError:  # pragma: nocover
+            # If sox or mediafile are not installed
+            # we get a FileNotFoundError error
+            raise RuntimeError(
+                f"sox and mediainfo have to be installed "
+                f"to publish '{format}' media files."
+            )
 
     return (
         file,
@@ -327,9 +368,9 @@ def _media_values(
         checksum,
         duration,
         format,
-        0,  # removed
+        removed,
         sampling_rate,
-        define.DependType.MEDIA,
+        dependency_type,
         version,
     )
 
diff --git a/tests/test_publish.py b/tests/test_publish.py
index ea0fc2b4..50c6c166 100644
--- a/tests/test_publish.py
+++ b/tests/test_publish.py
@@ -1058,6 +1058,42 @@ def test_publish_error_version(tmpdir, repository):
         audb.publish(db_path, "2.0.0", repository, previous_version="1.0.0?")
 
 
+def test_publish_text_media_files(tmpdir, dbs, repository):
+    r"""Test publishing databases containing text files as media files."""
+    # Create a database, containing text media file
+    build_dir = audeer.path(tmpdir, "./build")
+    audeer.mkdir(build_dir)
+    data_dir = audeer.mkdir(build_dir, "data")
+    with open(audeer.path(data_dir, "file1.txt"), "w") as file:
+        file.write("Text written by a person.\n")
+    name = "text-db"
+    db = audformat.Database(name)
+    db.schemes["speaker"] = audformat.Scheme("str")
+    index = audformat.filewise_index(["data/file1.txt"])
+    db["files"] = audformat.Table(index)
+    db["files"]["speaker"] = audformat.Column(scheme_id="speaker")
+    db["files"]["speaker"].set(["speaker-a"])
+    db.save(build_dir)
+
+    # Publish database, containing text media file
+    version = "1.0.0"
+    deps = audb.publish(build_dir, version, repository)
+
+    assert deps.tables == ["db.files.csv"]
+    file = "data/file1.txt"
+    assert deps.media == [file]
+    assert deps.bit_depth(file) == 0
+    assert deps.channels(file) == 0
+    assert deps.duration(file) == 0.0
+    assert deps.format(file) == "txt"
+    assert deps.sampling_rate(file) == 0
+
+    db = audb.load(name, version=version, verbose=False, full_path=False)
+    assert db.files == [file]
+    assert list(db) == ["files"]
+    assert os.path.exists(audeer.path(db.root, file))
+
+
 def test_update_database(dbs, persistent_repository):
     version = "2.1.0"
     start_version = "2.0.0"

From 9c635b31656fd5ad1d9446a4e1a0dfe0072dcc02 Mon Sep 17 00:00:00 2001
From: Hagen Wierstorf <hwierstorf@audeering.com>
Date: Mon, 29 Apr 2024 09:13:49 +0200
Subject: [PATCH 2/6] Catch RuntimeError instead

---
 audb/core/publish.py | 43 ++++++++++++++++++++-----------------------
 1 file changed, 20 insertions(+), 23 deletions(-)

diff --git a/audb/core/publish.py b/audb/core/publish.py
index 48aca693..0711e839 100644
--- a/audb/core/publish.py
+++ b/audb/core/publish.py
@@ -328,37 +328,34 @@ def _media_values(
         containing entries for `
 
     """
-    # Allow media file extensions,
-    # that do not support audio/video metadata
-    # (e.g. channels, sampling rate)
-    special_formats = ["txt"]
-
     dependency_type = define.DependType.MEDIA
     format = audeer.file_extension(file).lower()
     removed = 0
 
-    if format in special_formats:
+    # Inspect media file to get audio/video metadata
+    try:
+        path = os.path.join(root, file)
+        bit_depth = audiofile.bit_depth(path)
+        if bit_depth is None:  # pragma: nocover (non SND files)
+            bit_depth = 0
+        channels = audiofile.channels(path)
+        duration = audiofile.duration(path, sloppy=True)
+        sampling_rate = audiofile.sampling_rate(path)
+    except FileNotFoundError:  # pragma: nocover
+        # If sox or mediafile are not installed
+        # we get a FileNotFoundError error
+        raise RuntimeError(
+            f"sox and mediainfo have to be installed "
+            f"to publish '{format}' media files."
+        )
+    except RuntimeError:
+        # Skip audio/video metadata for media files,
+        # that don't support them
+        # (e.g. text files)
         bit_depth = 0
         channels = 0
         duration = 0.0
         sampling_rate = 0
-    else:
-        # Inspect media file to get audio/video metadata
-        try:
-            path = os.path.join(root, file)
-            bit_depth = audiofile.bit_depth(path)
-            if bit_depth is None:  # pragma: nocover (non SND files)
-                bit_depth = 0
-            channels = audiofile.channels(path)
-            duration = audiofile.duration(path, sloppy=True)
-            sampling_rate = audiofile.sampling_rate(path)
-        except FileNotFoundError:  # pragma: nocover
-            # If sox or mediafile are not installed
-            # we get a FileNotFoundError error
-            raise RuntimeError(
-                f"sox and mediainfo have to be installed "
-                f"to publish '{format}' media files."
-            )
 
     return (
         file,

From b1383e34f4b4f3c9e4d6ddbe4d184e80f977f94c Mon Sep 17 00:00:00 2001
From: Hagen Wierstorf <hwierstorf@audeering.com>
Date: Mon, 29 Apr 2024 09:17:48 +0200
Subject: [PATCH 3/6] Fix docstring

---
 audb/core/publish.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/audb/core/publish.py b/audb/core/publish.py
index 0711e839..687f3a2e 100644
--- a/audb/core/publish.py
+++ b/audb/core/publish.py
@@ -324,8 +324,7 @@ def _media_values(
         checksum: checksum of the media file
 
     Returns:
-        Tuple with a row to be added to the dependency table,
-        containing entries for `
+        row to be added to the dependency table as tuple
 
     """
     dependency_type = define.DependType.MEDIA

From 97e409e3bc4915bbe0cb0541cc6ee4ee6a9e2698 Mon Sep 17 00:00:00 2001
From: Hagen Wierstorf <hwierstorf@audeering.com>
Date: Mon, 29 Apr 2024 09:32:23 +0200
Subject: [PATCH 4/6] Raise RuntimeError when a flavor is requested

---
 audb/core/load.py     | 23 ++++++++++++++++-------
 tests/test_publish.py |  4 ++++
 2 files changed, 20 insertions(+), 7 deletions(-)

diff --git a/audb/core/load.py b/audb/core/load.py
index 66b0e672..99e5dcfa 100644
--- a/audb/core/load.py
+++ b/audb/core/load.py
@@ -459,13 +459,18 @@ def job(archive: str, version: str):
                 src_path = os.path.join(db_root_tmp, file)
                 file = flavor.destination(file)
                 dst_path = os.path.join(db_root_tmp, file)
-                flavor(
-                    src_path,
-                    dst_path,
-                    src_bit_depth=bit_depth,
-                    src_channels=channels,
-                    src_sampling_rate=sampling_rate,
-                )
+                try:
+                    flavor(
+                        src_path,
+                        dst_path,
+                        src_bit_depth=bit_depth,
+                        src_channels=channels,
+                        src_sampling_rate=sampling_rate,
+                    )
+                except RuntimeError:
+                    raise RuntimeError(
+                        f"Media file '{file}' does not support requesting a flavor."
+                    )
                 if src_path != dst_path:
                     os.remove(src_path)
 
@@ -1001,6 +1006,10 @@ def load(
             ``format``,
             or ``sampling_rate``
             is requested
+        RuntimeError: if a flavor is requested,
+            but the dataset contains media files,
+            that don't contain audio,
+            e.g. text files
 
     Examples:
         >>> db = audb.load(
diff --git a/tests/test_publish.py b/tests/test_publish.py
index 50c6c166..557b9f32 100644
--- a/tests/test_publish.py
+++ b/tests/test_publish.py
@@ -1093,6 +1093,10 @@ def test_publish_text_media_files(tmpdir, dbs, repository):
     assert list(db) == ["files"]
     assert os.path.exists(audeer.path(db.root, file))
 
+    error_msg = f"Media file '{file}' does not support requesting a flavor."
+    with pytest.raises(RuntimeError, match=error_msg):
+        db = audb.load(name, version=version, channels=[0], verbose=False)
+
 
 def test_update_database(dbs, persistent_repository):
     version = "2.1.0"

From 250240b02d6321eb468f42b90e477a66d2ed3d22 Mon Sep 17 00:00:00 2001
From: Hagen Wierstorf <hwierstorf@audeering.com>
Date: Mon, 29 Apr 2024 11:47:50 +0200
Subject: [PATCH 5/6] Use database instead of dataset in docstring

---
 audb/core/load.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/audb/core/load.py b/audb/core/load.py
index 99e5dcfa..72c0bf19 100644
--- a/audb/core/load.py
+++ b/audb/core/load.py
@@ -1007,7 +1007,7 @@ def load(
             or ``sampling_rate``
             is requested
         RuntimeError: if a flavor is requested,
-            but the dataset contains media files,
+            but the database contains media files,
             that don't contain audio,
             e.g. text files
 

From 509fdb03b84ee6fb518d25058ec6610ce1533ada Mon Sep 17 00:00:00 2001
From: Hagen Wierstorf <hwierstorf@audeering.com>
Date: Mon, 29 Apr 2024 12:19:05 +0200
Subject: [PATCH 6/6] Add test for mixed text/audio database

---
 tests/test_publish.py | 47 ++++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 46 insertions(+), 1 deletion(-)

diff --git a/tests/test_publish.py b/tests/test_publish.py
index 557b9f32..2d6491d6 100644
--- a/tests/test_publish.py
+++ b/tests/test_publish.py
@@ -45,6 +45,7 @@ def dbs(tmpdir_factory):
     #
     # tables:
     #   - emotion
+    #   - files
     # misc tables:
     #   - misc-in-scheme
     #   - misc-not-in-scheme
@@ -133,6 +134,7 @@ def dbs(tmpdir_factory):
     #
     # tables:
     #   - emotion
+    #   - files
     # misc tables:
     #   - misc-in-scheme
     #   - misc-not-in-scheme
@@ -179,6 +181,7 @@ def dbs(tmpdir_factory):
     #
     # tables:
     #   - emotion
+    #   - files
     # misc tables:
     #   - misc-in-scheme
     #   - misc-not-in-scheme
@@ -208,6 +211,7 @@ def dbs(tmpdir_factory):
     #
     # tables:
     #   - emotion
+    #   - files
     # misc tables:
     #   - misc-in-scheme
     #   - misc-not-in-scheme
@@ -232,6 +236,7 @@ def dbs(tmpdir_factory):
     #
     # tables:
     #   - emotion
+    #   - files
     # misc tables:
     #   - misc-in-scheme
     #   - misc-not-in-scheme
@@ -267,6 +272,7 @@ def dbs(tmpdir_factory):
     #
     # tables:
     #   - emotion
+    #   - files
     # misc tables:
     #   - misc-in-scheme
     #   - misc-not-in-scheme
@@ -1072,7 +1078,7 @@ def test_publish_text_media_files(tmpdir, dbs, repository):
     index = audformat.filewise_index(["data/file1.txt"])
     db["files"] = audformat.Table(index)
     db["files"]["speaker"] = audformat.Column(scheme_id="speaker")
-    db["files"]["speaker"].set(["speaker-a"])
+    db["files"]["speaker"].set(["adam"])
     db.save(build_dir)
 
     # Publish database, containing text media file
@@ -1097,6 +1103,45 @@ def test_publish_text_media_files(tmpdir, dbs, repository):
     with pytest.raises(RuntimeError, match=error_msg):
         db = audb.load(name, version=version, channels=[0], verbose=False)
 
+    # Publish database, containing text and media files
+    audeer.rmdir(build_dir)
+    shutil.copytree(dbs["1.0.0"], build_dir)  # start with db containing audio files
+    db = audformat.Database.load(build_dir)
+    speaker = db["files"]["speaker"].get()
+    files = list(db.files)
+    tables = list(db)
+    data_dir = audeer.mkdir(build_dir, "data")
+    with open(audeer.path(data_dir, "file1.txt"), "w") as file:
+        file.write("Text written by a person.\n")
+    index = audformat.filewise_index(["data/file1.txt"])
+    db["files"].extend_index(index, inplace=True)
+    db["files"]["speaker"] = audformat.Column(scheme_id="speaker")
+    db["files"]["speaker"].set(list(speaker.values) + ["adam"])
+    db.name = name
+    db.save(build_dir)
+
+    # Publish database, containing text media file
+    version = "2.0.0"
+    deps = audb.publish(build_dir, version, repository, previous_version=None)
+
+    assert deps.table_ids == tables
+    file = "data/file1.txt"
+    assert deps.media == files + [file]
+    assert deps.bit_depth(file) == 0
+    assert deps.channels(file) == 0
+    assert deps.duration(file) == 0.0
+    assert deps.format(file) == "txt"
+    assert deps.sampling_rate(file) == 0
+
+    db = audb.load(name, version=version, verbose=False, full_path=False)
+    assert db.files == files + [file]
+    assert list(db) == tables
+    assert os.path.exists(audeer.path(db.root, file))
+
+    error_msg = f"Media file '{file}' does not support requesting a flavor."
+    with pytest.raises(RuntimeError, match=error_msg):
+        db = audb.load(name, version=version, channels=[0], verbose=False)
+
 
 def test_update_database(dbs, persistent_repository):
     version = "2.1.0"