diff --git a/audb/core/dependencies.py b/audb/core/dependencies.py index 5628d4ba..cd4de023 100644 --- a/audb/core/dependencies.py +++ b/audb/core/dependencies.py @@ -210,7 +210,7 @@ def bit_depth(self, file: str) -> int: bit depth """ - return int(self._df.bit_depth[file]) + return self._column_loc("bit_depth", file, int) def channels(self, file: str) -> int: r"""Number of channels of media file. @@ -222,7 +222,7 @@ def channels(self, file: str) -> int: number of channels """ - return int(self._df.channels[file]) + return self._column_loc("channels", file, int) def checksum(self, file: str) -> str: r"""Checksum of file. @@ -234,7 +234,7 @@ def checksum(self, file: str) -> str: checksum of file """ - return self._df.checksum[file] + return self._column_loc("checksum", file) def duration(self, file: str) -> float: r"""Duration of file. @@ -246,7 +246,7 @@ def duration(self, file: str) -> float: duration in seconds """ - return float(self._df.duration[file]) + return self._column_loc("duration", file, float) def format(self, file: str) -> str: r"""Format of file. @@ -258,7 +258,7 @@ def format(self, file: str) -> str: file format (always lower case) """ - return self._df.format[file] + return self._column_loc("format", file) def load(self, path: str): r"""Read dependencies from file. @@ -319,7 +319,7 @@ def removed(self, file: str) -> bool: ``True`` if file was removed """ - return bool(self._df.removed[file]) + return self._column_loc("removed", file, bool) def sampling_rate(self, file: str) -> int: r"""Sampling rate of media file. @@ -331,7 +331,7 @@ def sampling_rate(self, file: str) -> int: sampling rate in Hz """ - return int(self._df.sampling_rate[file]) + return self._column_loc("sampling_rate", file, int) def save(self, path: str): r"""Write dependencies to file. @@ -360,7 +360,7 @@ def type(self, file: str) -> int: type """ - return int(self._df.type[file]) + return self._column_loc("type", file, int) def version(self, file: str) -> str: r"""Version of file. @@ -372,7 +372,7 @@ def version(self, file: str) -> str: version string """ - return self._df.version[file] + return self._column_loc("version", file) def _add_attachment( self, @@ -468,6 +468,18 @@ def _add_meta( version, # version ] + def _column_loc( + self, + column: str, + files: typing.Union[str, typing.Sequence[str]], + dtype: typing.Callable = None, + ) -> typing.Union[typing.Any, typing.List[typing.Any]]: + r"""Column content for selected files.""" + value = self._df.at[files, column] + if dtype is not None: + value = dtype(value) + return value + def _drop(self, files: typing.Sequence[str]): r"""Drop files from table. diff --git a/benchmarks/README.md b/benchmarks/README.md index d7d41de3..aa05d728 100644 --- a/benchmarks/README.md +++ b/benchmarks/README.md @@ -45,41 +45,40 @@ stored as a `pandas.DataFrame` using different dtype representations (storing string as `string`, storing string as `object`, -using `pyarrow` dtypes) -as of commit 91528e4. +using `pyarrow` dtypes). | method | string | object | pyarrow | |-------------------------------------------------|----------|----------|-----------| -| Dependencies.__call__() | 0.000 | 0.000 | 0.000 | -| Dependencies.__contains__(10000 files) | 0.005 | 0.005 | 0.004 | -| Dependencies.__get_item__(10000 files) | 0.311 | 0.223 | 0.907 | -| Dependencies.__len__() | 0.000 | 0.000 | 0.000 | -| Dependencies.__str__() | 0.006 | 0.005 | 0.006 | -| Dependencies.archives | 0.145 | 0.112 | 0.144 | -| Dependencies.attachments | 0.029 | 0.018 | 0.017 | -| Dependencies.attachment_ids | 0.028 | 0.018 | 0.016 | -| Dependencies.files | 0.031 | 0.011 | 0.042 | -| Dependencies.media | 0.132 | 0.072 | 0.088 | -| Dependencies.removed_media | 0.118 | 0.063 | 0.081 | -| Dependencies.table_ids | 0.035 | 0.025 | 0.022 | -| Dependencies.tables | 0.028 | 0.017 | 0.016 | -| Dependencies.archive(10000 files) | 0.046 | 0.043 | 0.064 | -| Dependencies.bit_depth(10000 files) | 0.042 | 0.042 | 0.060 | -| Dependencies.channels(10000 files) | 0.041 | 0.042 | 0.060 | -| Dependencies.checksum(10000 files) | 0.043 | 0.041 | 0.064 | -| Dependencies.duration(10000 files) | 0.042 | 0.042 | 0.059 | -| Dependencies.format(10000 files) | 0.044 | 0.042 | 0.064 | -| Dependencies.removed(10000 files) | 0.041 | 0.042 | 0.059 | -| Dependencies.sampling_rate(10000 files) | 0.043 | 0.043 | 0.061 | -| Dependencies.type(10000 files) | 0.043 | 0.042 | 0.060 | -| Dependencies.version(10000 files) | 0.044 | 0.041 | 0.066 | -| Dependencies._add_attachment() | 0.068 | 0.057 | 0.222 | -| Dependencies._add_media(10000 files) | 0.057 | 0.057 | 0.068 | -| Dependencies._add_meta() | 0.121 | 0.138 | 0.148 | -| Dependencies._drop() | 0.077 | 0.076 | 0.117 | -| Dependencies._remove() | 0.061 | 0.065 | 0.066 | -| Dependencies._update_media() | 0.087 | 0.087 | 0.149 | -| Dependencies._update_media_version(10000 files) | 0.011 | 0.011 | 0.026 | +| Dependencies.\_\_call__() | 0.000 | 0.000 | 0.000 | +| Dependencies.\_\_contains__(10000 files) | 0.005 | 0.004 | 0.004 | +| Dependencies.\_\_get_item__(10000 files) | 0.322 | 0.224 | 0.900 | +| Dependencies.\_\_len__() | 0.000 | 0.000 | 0.000 | +| Dependencies.\_\_str__() | 0.006 | 0.005 | 0.006 | +| Dependencies.archives | 0.144 | 0.116 | 0.152 | +| Dependencies.attachments | 0.030 | 0.018 | 0.018 | +| Dependencies.attachment_ids | 0.029 | 0.018 | 0.018 | +| Dependencies.files | 0.030 | 0.011 | 0.046 | +| Dependencies.media | 0.129 | 0.073 | 0.095 | +| Dependencies.removed_media | 0.117 | 0.070 | 0.087 | +| Dependencies.table_ids | 0.037 | 0.026 | 0.023 | +| Dependencies.tables | 0.029 | 0.017 | 0.017 | +| Dependencies.archive(10000 files) | 0.045 | 0.042 | 0.065 | +| Dependencies.bit_depth(10000 files) | 0.024 | 0.024 | 0.045 | +| Dependencies.channels(10000 files) | 0.023 | 0.023 | 0.045 | +| Dependencies.checksum(10000 files) | 0.026 | 0.023 | 0.047 | +| Dependencies.duration(10000 files) | 0.023 | 0.023 | 0.043 | +| Dependencies.format(10000 files) | 0.026 | 0.023 | 0.047 | +| Dependencies.removed(10000 files) | 0.023 | 0.023 | 0.043 | +| Dependencies.sampling_rate(10000 files) | 0.023 | 0.023 | 0.043 | +| Dependencies.type(10000 files) | 0.023 | 0.023 | 0.043 | +| Dependencies.version(10000 files) | 0.026 | 0.023 | 0.047 | +| Dependencies._add_attachment() | 0.055 | 0.062 | 0.220 | +| Dependencies._add_media(10000 files) | 0.057 | 0.057 | 0.066 | +| Dependencies._add_meta() | 0.117 | 0.129 | 0.145 | +| Dependencies._drop() | 0.075 | 0.078 | 0.121 | +| Dependencies._remove() | 0.061 | 0.069 | 0.064 | +| Dependencies._update_media() | 0.087 | 0.086 | 0.145 | +| Dependencies._update_media_version(10000 files) | 0.011 | 0.011 | 0.020 | ## audb.Dependencies loading/writing to file