Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add support for TXT files as media files #392

Merged
merged 6 commits into from
May 8, 2024
Merged
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
23 changes: 16 additions & 7 deletions audb/core/load.py
Original file line number Diff line number Diff line change
Expand Up @@ -459,13 +459,18 @@ def job(archive: str, version: str):
src_path = os.path.join(db_root_tmp, file)
file = flavor.destination(file)
dst_path = os.path.join(db_root_tmp, file)
flavor(
src_path,
dst_path,
src_bit_depth=bit_depth,
src_channels=channels,
src_sampling_rate=sampling_rate,
)
try:
flavor(
src_path,
dst_path,
src_bit_depth=bit_depth,
src_channels=channels,
src_sampling_rate=sampling_rate,
)
except RuntimeError:
raise RuntimeError(
f"Media file '{file}' does not support requesting a flavor."
)
if src_path != dst_path:
os.remove(src_path)

Expand Down Expand Up @@ -1001,6 +1006,10 @@ def load(
``format``,
or ``sampling_rate``
is requested
RuntimeError: if a flavor is requested,
but the dataset contains media files,
that don't contain audio,
e.g. text files

Examples:
>>> db = audb.load(
Expand Down
43 changes: 40 additions & 3 deletions audb/core/publish.py
Original file line number Diff line number Diff line change
Expand Up @@ -300,9 +300,38 @@ def _media_values(
archive: str,
checksum: str,
) -> typing.Tuple[str, str, int, int, str, float, str, int, float, int, str]:
r"""Return values of a media entry in dependencies."""
r"""Return values of a media entry in dependencies.

The dependency table expects the following columns:

* file
* archive
* bit depth
* channels
* checksum
* duration
* format
* removed
* sampling rate
* dependency type
* version

Args:
root: root of database
file: relative media file path
version: database version
archive: archive the media file is stored in
checksum: checksum of the media file

Returns:
row to be added to the dependency table as tuple

"""
dependency_type = define.DependType.MEDIA
format = audeer.file_extension(file).lower()
removed = 0

# Inspect media file to get audio/video metadata
try:
path = os.path.join(root, file)
bit_depth = audiofile.bit_depth(path)
Expand All @@ -318,6 +347,14 @@ def _media_values(
f"sox and mediainfo have to be installed "
f"to publish '{format}' media files."
)
except RuntimeError:
# Skip audio/video metadata for media files,
# that don't support them
# (e.g. text files)
bit_depth = 0
channels = 0
duration = 0.0
sampling_rate = 0

return (
file,
Expand All @@ -327,9 +364,9 @@ def _media_values(
checksum,
duration,
format,
0, # removed
removed,
sampling_rate,
define.DependType.MEDIA,
dependency_type,
version,
)

Expand Down
40 changes: 40 additions & 0 deletions tests/test_publish.py
Original file line number Diff line number Diff line change
Expand Up @@ -1058,6 +1058,46 @@ def test_publish_error_version(tmpdir, repository):
audb.publish(db_path, "2.0.0", repository, previous_version="1.0.0?")


def test_publish_text_media_files(tmpdir, dbs, repository):
r"""Test publishing databases containing text files as media files."""
# Create a database, containing text media file
build_dir = audeer.path(tmpdir, "./build")
audeer.mkdir(build_dir)
data_dir = audeer.mkdir(build_dir, "data")
with open(audeer.path(data_dir, "file1.txt"), "w") as file:
file.write("Text written by a person.\n")
name = "text-db"
db = audformat.Database(name)
db.schemes["speaker"] = audformat.Scheme("str")
index = audformat.filewise_index(["data/file1.txt"])
db["files"] = audformat.Table(index)
db["files"]["speaker"] = audformat.Column(scheme_id="speaker")
db["files"]["speaker"].set(["speaker-a"])
db.save(build_dir)

# Publish database, containing text media file
version = "1.0.0"
deps = audb.publish(build_dir, version, repository)

assert deps.tables == ["db.files.csv"]
file = "data/file1.txt"
assert deps.media == [file]
assert deps.bit_depth(file) == 0
assert deps.channels(file) == 0
assert deps.duration(file) == 0.0
assert deps.format(file) == "txt"
assert deps.sampling_rate(file) == 0

db = audb.load(name, version=version, verbose=False, full_path=False)
assert db.files == [file]
assert list(db) == ["files"]
assert os.path.exists(audeer.path(db.root, file))

error_msg = f"Media file '{file}' does not support requesting a flavor."
with pytest.raises(RuntimeError, match=error_msg):
db = audb.load(name, version=version, channels=[0], verbose=False)


def test_update_database(dbs, persistent_repository):
version = "2.1.0"
start_version = "2.0.0"
Expand Down
Loading