Skip to content

Commit

Permalink
Revert "Allow list of files in Dependencies methods (#370)"
Browse files Browse the repository at this point in the history
This reverts commit dd476f1.
  • Loading branch information
hagenw committed May 3, 2024
1 parent cc1f4d9 commit da93e27
Show file tree
Hide file tree
Showing 4 changed files with 174 additions and 268 deletions.
129 changes: 44 additions & 85 deletions audb/core/dependencies.py
Original file line number Diff line number Diff line change
Expand Up @@ -208,95 +208,77 @@ def tables(self) -> typing.List[str]:
"""
return self._df[self._df["type"] == define.DependType.META].index.tolist()

def archive(
self,
files: typing.Union[str, typing.Sequence[str]],
) -> typing.Union[str, typing.List[str]]:
r"""Name of archive a file belong to.
def archive(self, file: str) -> str:
r"""Name of archive the file belongs to.
Args:
files: relative file path(s)
file: relative file path
Returns:
archive name(s)
archive name
"""
return self._column_loc("archive", files)
return self._df.archive[file]

def bit_depth(
self,
files: typing.Union[str, typing.Sequence[str]],
) -> typing.Union[int, typing.List[int]]:
def bit_depth(self, file: str) -> int:
r"""Bit depth of media file.
Args:
files: relative file path(s)
file: relative file path
Returns:
bit depth(s)
bit depth
"""
return self._column_loc("bit_depth", files, int)
return self._column_loc("bit_depth", file, int)

def channels(
self,
files: typing.Union[str, typing.Sequence[str]],
) -> typing.Union[int, typing.List[int]]:
def channels(self, file: str) -> int:
r"""Number of channels of media file.
Args:
files: relative file path(s)
file: relative file path
Returns:
number(s) of channels
number of channels
"""
return self._column_loc("channels", files, int)
return self._column_loc("channels", file, int)

def checksum(
self,
files: typing.Union[str, typing.Sequence[str]],
) -> typing.Union[str, typing.List[str]]:
def checksum(self, file: str) -> str:
r"""Checksum of file.
Args:
files: relative file path(s)
file: relative file path
Returns:
checksum of file(s)
checksum of file
"""
return self._column_loc("checksum", files)
return self._column_loc("checksum", file)

def duration(
self,
files: typing.Union[str, typing.Sequence[str]],
) -> typing.Union[float, typing.List[float]]:
def duration(self, file: str) -> float:
r"""Duration of file.
Args:
files: relative file path(s)
file: relative file path
Returns:
duration(s) in seconds
duration in seconds
"""
return self._column_loc("duration", files, float)
return self._column_loc("duration", file, float)

def format(
self,
files: typing.Union[str, typing.Sequence[str]],
) -> typing.Union[str, typing.List[str]]:
def format(self, file: str) -> str:
r"""Format of file.
Args:
files: relative file path(s)
file: relative file path
Returns:
file format(s) (always lower case)
file format (always lower case)
"""
return self._column_loc("format", files)
return self._column_loc("format", file)

def load(self, path: str):
r"""Read dependencies from file.
Expand Down Expand Up @@ -353,35 +335,29 @@ def load(self, path: str):
table = parquet.read_table(path)
self._df = self._table_to_dataframe(table)

def removed(
self,
files: typing.Union[str, typing.Sequence[str]],
) -> typing.Union[bool, typing.List[bool]]:
def removed(self, file: str) -> bool:
r"""Check if file is marked as removed.
Args:
files: relative file path(s)
file: relative file path
Returns:
``True`` if file was removed
"""
return self._column_loc("removed", files, bool)
return self._column_loc("removed", file, bool)

def sampling_rate(
self,
files: typing.Union[str, typing.Sequence[str]],
) -> typing.Union[int, typing.List[int]]:
def sampling_rate(self, file: str) -> int:
r"""Sampling rate of media file.
Args:
files: relative file path(s)
file: relative file path
Returns:
sampling rate(s) in Hz
sampling rate in Hz
"""
return self._column_loc("sampling_rate", files, int)
return self._column_loc("sampling_rate", file, int)

def save(self, path: str):
r"""Write dependencies to file.
Expand All @@ -408,35 +384,29 @@ def save(self, path: str):
table = self._dataframe_to_table(self._df, file_column=True)
parquet.write_table(table, path)

def type(
self,
files: typing.Union[str, typing.Sequence[str]],
) -> typing.Union[int, typing.List[int]]:
def type(self, file: str) -> int:
r"""Type of file.
Args:
files: relative file path(s)
file: relative file path
Returns:
type(s)
type
"""
return self._column_loc("type", files, int)
return self._column_loc("type", file, int)

def version(
self,
files: typing.Union[str, typing.Sequence[str]],
) -> typing.Union[str, typing.List[str]]:
def version(self, file: str) -> str:
r"""Version of file.
Args:
files: relative file path(s)
file: relative file path
Returns:
version string(s)
version string
"""
return self._column_loc("version", files)
return self._column_loc("version", file)

def _add_attachment(
self,
Expand Down Expand Up @@ -540,21 +510,10 @@ def _column_loc(
dtype: typing.Callable = None,
) -> typing.Union[typing.Any, typing.List[typing.Any]]:
r"""Column content for selected files."""
# Single file
if isinstance(files, str):
value = self._df.at[files, column]
if dtype is not None:
value = dtype(value)
return value

# Multiple files
else:
values = self._df.loc[files, column]
if dtype is not None:
values = [dtype(value) for value in values]
else:
values = values.tolist()
return values
value = self._df.at[files, column]
if dtype is not None:
value = dtype(value)
return value

def _dataframe_to_table(
self,
Expand Down
66 changes: 28 additions & 38 deletions benchmarks/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -50,45 +50,35 @@ using `pyarrow` dtypes).
| method | string | object | pyarrow |
|-------------------------------------------------|----------|----------|-----------|
| Dependencies.\_\_call__() | 0.000 | 0.000 | 0.000 |
| Dependencies.\_\_contains__(10000 files) | 0.004 | 0.005 | 0.004 |
| Dependencies.\_\_get_item__(10000 files) | 0.319 | 0.225 | 0.871 |
| Dependencies.\_\_contains__(10000 files) | 0.005 | 0.004 | 0.004 |
| Dependencies.\_\_get_item__(10000 files) | 0.322 | 0.224 | 0.900 |
| Dependencies.\_\_len__() | 0.000 | 0.000 | 0.000 |
| Dependencies.\_\_str__() | 0.005 | 0.005 | 0.006 |
| Dependencies.archives | 0.142 | 0.112 | 0.140 |
| Dependencies.attachments | 0.024 | 0.018 | 0.017 |
| Dependencies.attachment_ids | 0.027 | 0.018 | 0.019 |
| Dependencies.files | 0.011 | 0.011 | 0.010 |
| Dependencies.media | 0.105 | 0.070 | 0.064 |
| Dependencies.removed_media | 0.102 | 0.062 | 0.063 |
| Dependencies.table_ids | 0.032 | 0.026 | 0.023 |
| Dependencies.tables | 0.024 | 0.017 | 0.017 |
| Dependencies.archive(10000 files) | 0.028 | 0.026 | 0.050 |
| Dependencies.archive([10000 files]) | 0.008 | 0.008 | 0.008 |
| Dependencies.bit_depth(10000 files) | 0.024 | 0.024 | 0.044 |
| Dependencies.bit_depth([10000 files]) | 0.002 | 0.002 | 0.004 |
| Dependencies.channels(10000 files) | 0.024 | 0.024 | 0.045 |
| Dependencies.channels([10000 files]) | 0.002 | 0.002 | 0.005 |
| Dependencies.checksum(10000 files) | 0.025 | 0.024 | 0.048 |
| Dependencies.checksum([10000 files]) | 0.002 | 0.002 | 0.002 |
| Dependencies.duration(10000 files) | 0.024 | 0.025 | 0.044 |
| Dependencies.duration([10000 files]) | 0.003 | 0.003 | 0.004 |
| Dependencies.format(10000 files) | 0.026 | 0.023 | 0.049 |
| Dependencies.format([10000 files]) | 0.002 | 0.002 | 0.002 |
| Dependencies.removed(10000 files) | 0.024 | 0.024 | 0.044 |
| Dependencies.removed([10000 files]) | 0.002 | 0.002 | 0.004 |
| Dependencies.sampling_rate(10000 files) | 0.024 | 0.024 | 0.046 |
| Dependencies.sampling_rate([10000 files]) | 0.002 | 0.002 | 0.004 |
| Dependencies.type(10000 files) | 0.024 | 0.024 | 0.044 |
| Dependencies.type([10000 files]) | 0.002 | 0.002 | 0.004 |
| Dependencies.version(10000 files) | 0.026 | 0.024 | 0.049 |
| Dependencies.version([10000 files]) | 0.002 | 0.002 | 0.002 |
| Dependencies._add_attachment() | 0.055 | 0.055 | 0.178 |
| Dependencies._add_media(10000 files) | 0.055 | 0.055 | 0.066 |
| Dependencies._add_meta() | 0.120 | 0.124 | 0.122 |
| Dependencies._drop() | 0.076 | 0.076 | 0.118 |
| Dependencies._remove() | 0.068 | 0.069 | 0.065 |
| Dependencies._update_media() | 0.081 | 0.079 | 0.138 |
| Dependencies._update_media_version(10000 files) | 0.012 | 0.011 | 0.021 |
| Dependencies.\_\_str__() | 0.006 | 0.005 | 0.006 |
| Dependencies.archives | 0.144 | 0.116 | 0.152 |
| Dependencies.attachments | 0.030 | 0.018 | 0.018 |
| Dependencies.attachment_ids | 0.029 | 0.018 | 0.018 |
| Dependencies.files | 0.030 | 0.011 | 0.046 |
| Dependencies.media | 0.129 | 0.073 | 0.095 |
| Dependencies.removed_media | 0.117 | 0.070 | 0.087 |
| Dependencies.table_ids | 0.037 | 0.026 | 0.023 |
| Dependencies.tables | 0.029 | 0.017 | 0.017 |
| Dependencies.archive(10000 files) | 0.045 | 0.042 | 0.065 |
| Dependencies.bit_depth(10000 files) | 0.024 | 0.024 | 0.045 |
| Dependencies.channels(10000 files) | 0.023 | 0.023 | 0.045 |
| Dependencies.checksum(10000 files) | 0.026 | 0.023 | 0.047 |
| Dependencies.duration(10000 files) | 0.023 | 0.023 | 0.043 |
| Dependencies.format(10000 files) | 0.026 | 0.023 | 0.047 |
| Dependencies.removed(10000 files) | 0.023 | 0.023 | 0.043 |
| Dependencies.sampling_rate(10000 files) | 0.023 | 0.023 | 0.043 |
| Dependencies.type(10000 files) | 0.023 | 0.023 | 0.043 |
| Dependencies.version(10000 files) | 0.026 | 0.023 | 0.047 |
| Dependencies._add_attachment() | 0.055 | 0.062 | 0.220 |
| Dependencies._add_media(10000 files) | 0.057 | 0.057 | 0.066 |
| Dependencies._add_meta() | 0.117 | 0.129 | 0.145 |
| Dependencies._drop() | 0.075 | 0.078 | 0.121 |
| Dependencies._remove() | 0.061 | 0.069 | 0.064 |
| Dependencies._update_media() | 0.087 | 0.086 | 0.145 |
| Dependencies._update_media_version(10000 files) | 0.011 | 0.011 | 0.020 |


## audb.Dependencies loading/writing to file
Expand Down
Loading

0 comments on commit da93e27

Please sign in to comment.