Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Switch to new backend interface #306

Merged
merged 4 commits into from
Oct 16, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
99 changes: 52 additions & 47 deletions audb/core/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,15 +9,14 @@
import audformat

from audb.core import define
from audb.core import utils
from audb.core.cache import database_cache_root
from audb.core.cache import default_cache_root
from audb.core.config import config
from audb.core.dependencies import Dependencies
from audb.core.flavor import Flavor
from audb.core.lock import FolderLock
from audb.core.repository import Repository
from audb.core.utils import _lookup
from audb.core.utils import lookup_backend


def available(
Expand All @@ -42,32 +41,39 @@ def available(
""" # noqa: E501
databases = []
for repository in config.REPOSITORIES:
backend = audbackend.create(
repository.backend,
repository.host,
repository.name,
)
try:
names = backend.ls('')
except FileNotFoundError:
# Handle missing repos
backend = utils.access_backend(repository)
if isinstance(backend, audbackend.Artifactory):
# avoid backend.ls('/')
# which is very slow on Artifactory
# see https://github.com/audeering/audbackend/issues/132
for p in backend._repo.path:
name = p.name
for version in [str(x).split('/')[-1] for x in p / 'db']:
databases.append(
[
name,
repository.backend,
repository.host,
repository.name,
version,
]
)
else:
for path, version in backend.ls('/'):
if path.endswith(define.HEADER_FILE):
name = path.split('/')[1]
databases.append(
[
name,
repository.backend,
repository.host,
repository.name,
version,
]
)
except audbackend.BackendError:
continue
for name in names:
try:
versions = backend.ls(f'{name}/{define.DB}')
for version in versions:
databases.append(
[
name,
repository.backend,
repository.host,
repository.name,
version,
]
)
except FileNotFoundError:
# Handle broken databases
continue

df = pd.DataFrame.from_records(
databases,
Expand Down Expand Up @@ -262,9 +268,9 @@ def dependencies(
deps.load(deps_path)
except (AttributeError, FileNotFoundError, ValueError, EOFError):
# If loading pickled cached file fails, load again from backend
backend = lookup_backend(name, version)
backend = utils.lookup_backend(name, version)
with tempfile.TemporaryDirectory() as tmp_root:
archive = backend.join(name, define.DB)
archive = backend.join('/', name, define.DB + '.zip')
backend.get_archive(
archive,
tmp_root,
Expand Down Expand Up @@ -467,12 +473,12 @@ def remove_media(

for version in versions(name):

backend = lookup_backend(name, version)
backend = utils.lookup_backend(name, version)

with tempfile.TemporaryDirectory() as db_root:

# download dependencies
archive = backend.join(name, define.DB)
archive = backend.join('/', name, define.DB + '.zip')
deps_path = backend.get_archive(
archive,
db_root,
Expand All @@ -495,14 +501,12 @@ def remove_media(
# if archive exists in this version,
# remove file from it and re-publish
remote_archive = backend.join(
'/',
name,
define.DEPEND_TYPE_NAMES[define.DependType.MEDIA],
archive,
archive + '.zip',
)
if backend.exists(
f'{remote_archive}.zip',
version,
):
if backend.exists(remote_archive, version):

files_in_archive = backend.get_archive(
remote_archive,
Expand All @@ -522,9 +526,9 @@ def remove_media(
files_in_archive.remove(file)
backend.put_archive(
db_root,
files_in_archive,
remote_archive,
version,
files=files_in_archive,
)

# mark file as removed
Expand All @@ -534,12 +538,12 @@ def remove_media(
# upload dependencies
if upload:
deps.save(deps_path)
remote_archive = backend.join(name, define.DB)
remote_archive = backend.join('/', name, define.DB + '.zip')
backend.put_archive(
db_root,
define.DEPENDENCIES_FILE,
remote_archive,
version,
files=define.DEPENDENCIES_FILE,
verbose=verbose,
)

Expand All @@ -563,14 +567,19 @@ def repository(
repository that contains the database

Raises:
RuntimeError: if database is not found
RuntimeError: if database or version is not found

Examples:
>>> audb.repository('emodb', '1.4.1')
Repository('data-public', 'https://audeering.jfrog.io/artifactory', 'artifactory')

""" # noqa: E501
return _lookup(name, version)[0]
if not versions(name):
raise RuntimeError(
f"Cannot find database "
f"'{name}'."
)
return utils._lookup(name, version)[0]


def versions(
Expand All @@ -591,11 +600,7 @@ def versions(
"""
vs = []
for repository in config.REPOSITORIES:
backend = audbackend.create(
repository.backend,
repository.host,
repository.name,
)
header = backend.join(name, 'db.yaml')
vs.extend(backend.versions(header))
backend = utils.access_backend(repository)
header = backend.join('/', name, 'db.yaml')
vs.extend(backend.versions(header, suppress_backend_errors=True))
return audeer.sort_versions(vs)
11 changes: 7 additions & 4 deletions audb/core/load.py
Original file line number Diff line number Diff line change
Expand Up @@ -374,9 +374,10 @@ def job(path: str):
archive = deps.archive(path)
version = deps.version(path)
archive = backend.join(
'/',
db.name,
define.DEPEND_TYPE_NAMES[define.DependType.ATTACHMENT],
archive,
archive + '.zip',
)
backend.get_archive(
archive,
Expand Down Expand Up @@ -441,9 +442,10 @@ def _get_media_from_backend(

def job(archive: str, version: str):
archive = backend.join(
'/',
name,
define.DEPEND_TYPE_NAMES[define.DependType.MEDIA],
archive,
archive + '.zip',
)
# extract and move all files that are stored in the archive,
# even if only a single file from the archive was requested
Expand Down Expand Up @@ -503,9 +505,10 @@ def _get_tables_from_backend(

def job(table: str):
archive = backend.join(
'/',
db.name,
define.DEPEND_TYPE_NAMES[define.DependType.META],
deps.archive(table),
deps.archive(table) + '.zip',
)
backend.get_archive(
archive,
Expand Down Expand Up @@ -1350,7 +1353,7 @@ def load_header_to(
local_header = os.path.join(db_root, define.HEADER_FILE)
if overwrite or not os.path.exists(local_header):
backend = lookup_backend(name, version)
remote_header = backend.join(name, define.HEADER_FILE)
remote_header = backend.join('/', name, define.HEADER_FILE)
if add_audb_meta:
db_root_tmp = database_tmp_root(db_root)
local_header = os.path.join(db_root_tmp, define.HEADER_FILE)
Expand Down
9 changes: 6 additions & 3 deletions audb/core/load_to.py
Original file line number Diff line number Diff line change
Expand Up @@ -122,9 +122,10 @@ def job(path: str):
archive = deps.archive(path)
version = deps.version(path)
archive = backend.join(
'/',
db_name,
define.DEPEND_TYPE_NAMES[define.DependType.ATTACHMENT],
archive,
archive + '.zip',
)
backend.get_archive(
archive,
Expand Down Expand Up @@ -173,9 +174,10 @@ def _get_media(

def job(archive: str, version: str):
archive = backend.join(
'/',
db_name,
define.DEPEND_TYPE_NAMES[define.DependType.MEDIA],
archive,
archive + '.zip',
)
files = backend.get_archive(
archive,
Expand Down Expand Up @@ -221,9 +223,10 @@ def job(table: str):
if os.path.exists(path_pkl):
os.remove(path_pkl)
archive = backend.join(
'/',
db_name,
define.DEPEND_TYPE_NAMES[define.DependType.META],
deps.archive(table),
deps.archive(table) + '.zip',
)
backend.get_archive(
archive,
Expand Down
38 changes: 21 additions & 17 deletions audb/core/publish.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
import audiofile

from audb.core import define
from audb.core import utils
from audb.core.api import dependencies
from audb.core.dependencies import Dependencies
from audb.core.repository import Repository
Expand Down Expand Up @@ -345,12 +346,13 @@ def _put_attachments(
):
def job(attachment_id: str):
archive_file = backend.join(
'/',
db.name,
define.DEPEND_TYPE_NAMES[define.DependType.ATTACHMENT],
attachment_id,
attachment_id + '.zip',
)
files = db.attachments[attachment_id].files
backend.put_archive(db_root, files, archive_file, version)
backend.put_archive(db_root, archive_file, version, files=files)

audeer.run_tasks(
job,
Expand Down Expand Up @@ -389,9 +391,10 @@ def job(archive):
update_media.append(file)

archive_file = backend.join(
'/',
db_name,
define.DEPEND_TYPE_NAMES[define.DependType.MEDIA],
archive,
archive + '.zip',
)

if previous_version is not None:
Expand Down Expand Up @@ -423,9 +426,9 @@ def job(archive):

backend.put_archive(
db_root,
files,
archive_file,
version,
files=files,
)

update_media = []
Expand All @@ -451,11 +454,12 @@ def _put_tables(
def job(table: str):
file = f'db.{table}.csv'
archive_file = backend.join(
'/',
db_name,
define.DEPEND_TYPE_NAMES[define.DependType.META],
table,
table + '.zip',
)
backend.put_archive(db_root, file, archive_file, version)
backend.put_archive(db_root, archive_file, version, files=file)

audeer.run_tasks(
job,
Expand Down Expand Up @@ -602,14 +606,10 @@ def publish(
verbose=verbose,
)

backend = audbackend.create(
repository.backend,
repository.host,
repository.name,
)
backend = utils.access_backend(repository)

remote_header = backend.join(db.name, define.HEADER_FILE)
versions = backend.versions(remote_header)
remote_header = backend.join('/', db.name, define.HEADER_FILE)
versions = backend.versions(remote_header, suppress_backend_errors=True)
if version in versions:
raise RuntimeError(
'A version '
Expand Down Expand Up @@ -755,12 +755,16 @@ def publish(

# publish dependencies and header
deps.save(deps_path)
archive_file = backend.join(db.name, define.DB)
backend.put_archive(db_root, define.DEPENDENCIES_FILE, archive_file,
version)
archive_file = backend.join('/', db.name, define.DB + '.zip')
backend.put_archive(
db_root,
archive_file,
version,
files=define.DEPENDENCIES_FILE,
)
try:
local_header = os.path.join(db_root, define.HEADER_FILE)
remote_header = db.name + '/' + define.HEADER_FILE
remote_header = backend.join('/', db.name, define.HEADER_FILE)
backend.put_file(local_header, remote_header, version)
except Exception: # pragma: no cover
# after the header is published
Expand Down
Loading