From d9169c7f0cf9bf8662bb0f1337562815a4cdec63 Mon Sep 17 00:00:00 2001 From: Kristjan Eimre Date: Wed, 5 Jul 2023 09:32:35 +0200 Subject: [PATCH 01/59] preliminary backup script and verdi profile dbdump command --- backup_script/check_if_profile_locked.py | 14 ++++ backup_script/make_backup.sh | 83 +++++++++++++++++++++++ docs/source/reference/command_line.rst | 1 + src/aiida/cmdline/commands/cmd_profile.py | 37 ++++++++++ 4 files changed, 135 insertions(+) create mode 100644 backup_script/check_if_profile_locked.py create mode 100644 backup_script/make_backup.sh diff --git a/backup_script/check_if_profile_locked.py b/backup_script/check_if_profile_locked.py new file mode 100644 index 0000000000..e453a6bb89 --- /dev/null +++ b/backup_script/check_if_profile_locked.py @@ -0,0 +1,14 @@ +# -*- coding: utf-8 -*- +""" +Simple script to check if an AiiDA profile is locked +(e.g. when doing `verdi storage maintain --full`) +""" +from aiida.common.exceptions import LockedProfileError +from aiida.manage import get_manager +from aiida.manage.profile_access import ProfileAccessManager + +try: + ProfileAccessManager(get_manager().get_config().get_profile('mc3d')).request_access() + print(0) +except LockedProfileError: + print(1) diff --git a/backup_script/make_backup.sh b/backup_script/make_backup.sh new file mode 100644 index 0000000000..2ecfa3a9a3 --- /dev/null +++ b/backup_script/make_backup.sh @@ -0,0 +1,83 @@ +#!/bin/bash +set -e + +AIIDA_PATH=/home/kristjan/.aiida # .aiida/config.json +VERDIEXEC=/home/kristjan/opt/anaconda/envs/aiida/bin/verdi +PROFILE_NAME=qs +DEST=/home/kristjan/backup-test +#DEST=dev-aiida:/home/ubuntu/kristjan-tests/backup-test + +export AIIDA_PATH + +# If DEST includes the remote specifier user@host:/directory, extract both parts. +# if REMOTE_ID is empty, local destination is assumed +if [[ $DEST =~ .*:.* ]]; then + REMOTE_ID="${DEST%%:*}" +else + REMOTE_ID="" +fi +DEST_PATH="${DEST#*:}" + +# While backup is running, use LIVE_DIR directory and when it succeeds, move & overwrite the FINAL_DIR +LIVE_DIR=tempbackup +FINAL_DIR=lastbackup +if [ -z "$REMOTE_ID" ]; then + mkdir -p $DEST_PATH/$LIVE_DIR +else + ssh $REMOTE_ID "mkdir -p $DEST_PATH/$LIVE_DIR" +fi + +TMP_DIR=/tmp/$PROFILE_NAME-$(date +"%FT%H%M%S") +mkdir -p $TMP_DIR + +# Step 1: first run the storage maintenance version that can safely be performed while aiida is running +# q: should we use --compress? +# q: should we clean_storage as well? +$VERDIEXEC storage maintain --force + +# Step 2: dump the PostgreSQL database onto a temporary local file +$VERDIEXEC --profile=$PROFILE_NAME profile dbdump --output_file=$TMP_DIR/db.psql #--stop-if-existing + +# Step 3: transfer the PostgreSQL database file +rsync -azh $TMP_DIR/db.psql $DEST/$LIVE_DIR + +#### Disk-objectstore +#### Safest if backed up in order: 1) loose files; 2) sqlite database; 3) packed files. + +DOS_PATH=$AIIDA_PATH/repository/$PROFILE_NAME/container +DOS_DEST=$DEST/$LIVE_DIR/container + +# Step 4: transfer the loose objects of the disk-objectstore +rsync -azh $DOS_PATH/loose $DOS_DEST + +# Step 4: dump locally the disk-objectstore Sqlite database +sqlite3 $DOS_PATH/packs.idx ".backup $TMP_DIR/packs.idx" + +# Step 5: transfer the Sqlite dump +# transfer the dump backup.db TO THE CORRECT NAME in the backup # DO NOT RUN A FULL REPACK BETWEEN STEP 5 and 6, it might happen that... +# check_no_repack() +rsync -azh $TMP_DIR/packs.idx $DOS_DEST + +# step 6: transfer the pack files +# check_no_repack() +rsync -azh $DOS_PATH/packs $DOS_DEST + +# step 7: final rsync of the remaining parts of disk-objectstore +rsync -azh $DOS_PATH/config.json $DOS_PATH/duplicates $DOS_PATH/sandbox $DOS_DEST + +# Possibly run a dostore verify on top! + +# step 8: overwrite the old backup folder +if [ -z "$REMOTE_ID" ]; then + echo $(date +"%FT%H%M%S") > $DEST_PATH/$LIVE_DIR/COMPLETED_DATE + if [ -d $DEST_PATH/$FINAL_DIR ]; then mv $DEST_PATH/$FINAL_DIR $DEST_PATH/$FINAL_DIR-old; fi + mv $DEST_PATH/$LIVE_DIR $DEST_PATH/$FINAL_DIR + rm -rf $DEST_PATH/$FINAL_DIR-old +else + ssh $REMOTE_ID "echo $(date +'%FT%H%M%S') > $DEST_PATH/$LIVE_DIR/COMPLETED_DATE" + ssh $REMOTE_ID "if [ -d $DEST_PATH/$FINAL_DIR ]; then mv $DEST_PATH/$FINAL_DIR $DEST_PATH/$FINAL_DIR-old; fi" + ssh $REMOTE_ID "mv $DEST_PATH/$LIVE_DIR $DEST_PATH/$FINAL_DIR" + ssh $REMOTE_ID "rm -rf $DEST_PATH/$FINAL_DIR-old" +fi + +echo "Success! Backup completed to $DEST/$FINAL_DIR" diff --git a/docs/source/reference/command_line.rst b/docs/source/reference/command_line.rst index 2f9a10e12e..98d91d96e1 100644 --- a/docs/source/reference/command_line.rst +++ b/docs/source/reference/command_line.rst @@ -355,6 +355,7 @@ Below is a list with all available subcommands. --help Show this message and exit. Commands: + dbdump Dump the PostgreSQL database into a file. delete Delete one or more profiles. list Display a list of all available profiles. setdefault Set a profile as the default one. diff --git a/src/aiida/cmdline/commands/cmd_profile.py b/src/aiida/cmdline/commands/cmd_profile.py index 8be4ed3bbf..0aaffd41d4 100644 --- a/src/aiida/cmdline/commands/cmd_profile.py +++ b/src/aiida/cmdline/commands/cmd_profile.py @@ -165,3 +165,40 @@ def profile_delete(force, delete_data, profiles): get_config().delete_profile(profile.name, delete_storage=delete_data) echo.echo_success(f'Profile `{profile.name}` was deleted.') + + +@verdi_profile.command('dbdump') +@options.PROFILE(default=defaults.get_default_profile) +@click.option('--output_file', type=click.Path(), help='Specify the output file path.') +def profile_dbdump(profile, output_file): + """Dump the PostgreSQL database into a file.""" + + import os + import pathlib + import subprocess + + if not output_file: + output_file = f'{profile.name}.psql' + + output_file = pathlib.Path(output_file) + + db_config = profile.dictionary['storage']['config'] + + cmd = [ + 'pg_dump', f'--host={db_config["database_hostname"]}', f'--port={db_config["database_port"]}', + f'--dbname={db_config["database_name"]}', f'--username={db_config["database_username"]}', '--no-password', + '--format=p', f'--file={output_file}' + ] + + env = os.environ.copy() + env['PGPASSWORD'] = db_config['database_password'] + + pg_dump_output = subprocess.check_output(cmd, env=env).decode('utf-8') + + if len(pg_dump_output) > 0: + echo.echo_warning(f'Output from pg_dump: {pg_dump_output}') + + if output_file.is_file(): + echo.echo_success(f'Output written to `{output_file}`') + else: + echo.echo_error(f'Something went wrong, `{output_file}` not written.') From 10b85b7718bb00c40dc1391135702687273d52e6 Mon Sep 17 00:00:00 2001 From: Kristjan Eimre Date: Thu, 3 Aug 2023 18:08:04 +0200 Subject: [PATCH 02/59] convert the script to 'verdi storage backup' command --- backup_script/check_if_profile_locked.py | 14 - backup_script/make_backup.sh | 83 ----- docs/source/reference/command_line.rst | 2 +- src/aiida/cmdline/commands/cmd_storage.py | 25 ++ src/aiida/storage/psql_dos/backend.py | 377 ++++++++++++++++++++++ 5 files changed, 403 insertions(+), 98 deletions(-) delete mode 100644 backup_script/check_if_profile_locked.py delete mode 100644 backup_script/make_backup.sh diff --git a/backup_script/check_if_profile_locked.py b/backup_script/check_if_profile_locked.py deleted file mode 100644 index e453a6bb89..0000000000 --- a/backup_script/check_if_profile_locked.py +++ /dev/null @@ -1,14 +0,0 @@ -# -*- coding: utf-8 -*- -""" -Simple script to check if an AiiDA profile is locked -(e.g. when doing `verdi storage maintain --full`) -""" -from aiida.common.exceptions import LockedProfileError -from aiida.manage import get_manager -from aiida.manage.profile_access import ProfileAccessManager - -try: - ProfileAccessManager(get_manager().get_config().get_profile('mc3d')).request_access() - print(0) -except LockedProfileError: - print(1) diff --git a/backup_script/make_backup.sh b/backup_script/make_backup.sh deleted file mode 100644 index 2ecfa3a9a3..0000000000 --- a/backup_script/make_backup.sh +++ /dev/null @@ -1,83 +0,0 @@ -#!/bin/bash -set -e - -AIIDA_PATH=/home/kristjan/.aiida # .aiida/config.json -VERDIEXEC=/home/kristjan/opt/anaconda/envs/aiida/bin/verdi -PROFILE_NAME=qs -DEST=/home/kristjan/backup-test -#DEST=dev-aiida:/home/ubuntu/kristjan-tests/backup-test - -export AIIDA_PATH - -# If DEST includes the remote specifier user@host:/directory, extract both parts. -# if REMOTE_ID is empty, local destination is assumed -if [[ $DEST =~ .*:.* ]]; then - REMOTE_ID="${DEST%%:*}" -else - REMOTE_ID="" -fi -DEST_PATH="${DEST#*:}" - -# While backup is running, use LIVE_DIR directory and when it succeeds, move & overwrite the FINAL_DIR -LIVE_DIR=tempbackup -FINAL_DIR=lastbackup -if [ -z "$REMOTE_ID" ]; then - mkdir -p $DEST_PATH/$LIVE_DIR -else - ssh $REMOTE_ID "mkdir -p $DEST_PATH/$LIVE_DIR" -fi - -TMP_DIR=/tmp/$PROFILE_NAME-$(date +"%FT%H%M%S") -mkdir -p $TMP_DIR - -# Step 1: first run the storage maintenance version that can safely be performed while aiida is running -# q: should we use --compress? -# q: should we clean_storage as well? -$VERDIEXEC storage maintain --force - -# Step 2: dump the PostgreSQL database onto a temporary local file -$VERDIEXEC --profile=$PROFILE_NAME profile dbdump --output_file=$TMP_DIR/db.psql #--stop-if-existing - -# Step 3: transfer the PostgreSQL database file -rsync -azh $TMP_DIR/db.psql $DEST/$LIVE_DIR - -#### Disk-objectstore -#### Safest if backed up in order: 1) loose files; 2) sqlite database; 3) packed files. - -DOS_PATH=$AIIDA_PATH/repository/$PROFILE_NAME/container -DOS_DEST=$DEST/$LIVE_DIR/container - -# Step 4: transfer the loose objects of the disk-objectstore -rsync -azh $DOS_PATH/loose $DOS_DEST - -# Step 4: dump locally the disk-objectstore Sqlite database -sqlite3 $DOS_PATH/packs.idx ".backup $TMP_DIR/packs.idx" - -# Step 5: transfer the Sqlite dump -# transfer the dump backup.db TO THE CORRECT NAME in the backup # DO NOT RUN A FULL REPACK BETWEEN STEP 5 and 6, it might happen that... -# check_no_repack() -rsync -azh $TMP_DIR/packs.idx $DOS_DEST - -# step 6: transfer the pack files -# check_no_repack() -rsync -azh $DOS_PATH/packs $DOS_DEST - -# step 7: final rsync of the remaining parts of disk-objectstore -rsync -azh $DOS_PATH/config.json $DOS_PATH/duplicates $DOS_PATH/sandbox $DOS_DEST - -# Possibly run a dostore verify on top! - -# step 8: overwrite the old backup folder -if [ -z "$REMOTE_ID" ]; then - echo $(date +"%FT%H%M%S") > $DEST_PATH/$LIVE_DIR/COMPLETED_DATE - if [ -d $DEST_PATH/$FINAL_DIR ]; then mv $DEST_PATH/$FINAL_DIR $DEST_PATH/$FINAL_DIR-old; fi - mv $DEST_PATH/$LIVE_DIR $DEST_PATH/$FINAL_DIR - rm -rf $DEST_PATH/$FINAL_DIR-old -else - ssh $REMOTE_ID "echo $(date +'%FT%H%M%S') > $DEST_PATH/$LIVE_DIR/COMPLETED_DATE" - ssh $REMOTE_ID "if [ -d $DEST_PATH/$FINAL_DIR ]; then mv $DEST_PATH/$FINAL_DIR $DEST_PATH/$FINAL_DIR-old; fi" - ssh $REMOTE_ID "mv $DEST_PATH/$LIVE_DIR $DEST_PATH/$FINAL_DIR" - ssh $REMOTE_ID "rm -rf $DEST_PATH/$FINAL_DIR-old" -fi - -echo "Success! Backup completed to $DEST/$FINAL_DIR" diff --git a/docs/source/reference/command_line.rst b/docs/source/reference/command_line.rst index 98d91d96e1..e1bd3c9a9d 100644 --- a/docs/source/reference/command_line.rst +++ b/docs/source/reference/command_line.rst @@ -355,7 +355,6 @@ Below is a list with all available subcommands. --help Show this message and exit. Commands: - dbdump Dump the PostgreSQL database into a file. delete Delete one or more profiles. list Display a list of all available profiles. setdefault Set a profile as the default one. @@ -568,6 +567,7 @@ Below is a list with all available subcommands. --help Show this message and exit. Commands: + backup Create a backup of the profile data. info Summarise the contents of the storage. integrity Checks for the integrity of the data storage. maintain Performs maintenance tasks on the repository. diff --git a/src/aiida/cmdline/commands/cmd_storage.py b/src/aiida/cmdline/commands/cmd_storage.py index bb5970b846..9e223d2fba 100644 --- a/src/aiida/cmdline/commands/cmd_storage.py +++ b/src/aiida/cmdline/commands/cmd_storage.py @@ -164,3 +164,28 @@ def storage_maintain(ctx, full, no_repack, force, dry_run, compress): except LockingProfileError as exception: echo.echo_critical(str(exception)) echo.echo_success('Requested maintenance procedures finished.') + + +@verdi_storage.command('backup') +@click.option('--path', type=click.Path(), required=True, help='Specify the backup location.') +@click.option( + '--remote', + type=click.STRING, + default=None, + help='Specify remote host for backup location. If set, path needs to be absolute.' +) +@click.option( + '--pg_dump_exec', type=click.STRING, default='pg_dump', help="Specify the 'pg_dump' executable, if needed." +) +@click.option('--rsync_exec', type=click.STRING, default='rsync', help="Specify the 'rsync' executable, if needed.") +@decorators.with_dbenv() +def storage_backup(path, remote, pg_dump_exec, rsync_exec): + """Create a backup of the profile data.""" + import pathlib + + from aiida.manage.manager import get_manager + + manager = get_manager() + storage = manager.get_profile_storage() + + storage.backup_auto(pathlib.Path(path), remote=remote, pg_dump_exec=pg_dump_exec, rsync_exec=rsync_exec) diff --git a/src/aiida/storage/psql_dos/backend.py b/src/aiida/storage/psql_dos/backend.py index 8a38c2d7c8..acde748afa 100644 --- a/src/aiida/storage/psql_dos/backend.py +++ b/src/aiida/storage/psql_dos/backend.py @@ -477,3 +477,380 @@ def get_info(self, detailed: bool = False) -> dict: results = super().get_info(detailed=detailed) results['repository'] = self.get_repository().get_info(detailed) return results + + def _call_rsync( + self, + args: list, + src: pathlib.Path, + dest: pathlib.Path, + link_dest: Optional[pathlib.Path] = None, + remote: Optional[str] = None, + src_trailing_slash: bool = False, + dest_trailing_slash: bool = False + ) -> bool: + """Call rsync with specified arguments and handle possible errors & stdout/stderr + + :param link_dest: + Path to the hardlinked files location (previous backup). + + :param src_trailing_slash: + Add a trailing slash to the source path. This makes rsync copy the contents + of the folder instead of the folder itself. + + :param dest_trailing_slash: + Add a trailing slash to the destination path. This makes rsync interpret the + destination as a folder and create it if it doesn't exists. + + :return: + True if successful and False if unsuccessful. + """ + import subprocess + + all_args = args[:] + if link_dest: + if not remote: + # for local paths, use resolve() to get absolute path + link_dest_str = str(link_dest.resolve()) + else: + # for remote paths, we require absolute paths anyways + link_dest_str = str(link_dest) + all_args += [f'--link-dest={link_dest_str}'] + + if src_trailing_slash: + all_args += [str(src) + '/'] + else: + all_args += [str(src)] + + dest_str = str(dest) + if dest_trailing_slash: + dest_str += '/' + + if not remote: + all_args += [dest_str] + else: + all_args += [f'{remote}:{dest_str}'] + + try: + res = subprocess.run(all_args, check=True, capture_output=True) + STORAGE_LOGGER.debug(f"stdout: {all_args}\n{res.stdout.decode('utf-8')}") + STORAGE_LOGGER.debug(f"stderr: {all_args}\n{res.stderr.decode('utf-8')}") + except subprocess.CalledProcessError as exc: + STORAGE_LOGGER.error(f'rsync: {exc}') + return False + return True + + def _backup_dos( + self, + location: pathlib.Path, + rsync_args: list, + remote: Optional[str] = None, + prev_backup: Optional[pathlib.Path] = None + ) -> bool: + """Create a backup of the disk-objectstore container + + It should be done in the following order: + 1) loose files; + 2) sqlite database; + 3) packed files. + + :return: + True is successful and False if unsuccessful. + """ + import sqlite3 + import tempfile + + container_path = get_filepath_container(self._profile) + + # step 1: loose files + loose_path = container_path / 'loose' + success = self._call_rsync( + rsync_args, loose_path, location, remote=remote, link_dest=prev_backup / 'loose' if prev_backup else None + ) + if not success: + return False + + # step 2: sqlite db + + sqlite_path = container_path / 'packs.idx' + + # make a temporary directory to dump sqlite db locally + with tempfile.TemporaryDirectory() as temp_dir_name: + sqlite_temp_loc = pathlib.Path(temp_dir_name) / 'packs.idx' + + # Safe way to make a backup of the sqlite db, while it might potentially be accessed + # https://docs.python.org/3/library/sqlite3.html#sqlite3.Connection.backup + src = sqlite3.connect(str(sqlite_path)) + dst = sqlite3.connect(str(sqlite_temp_loc)) + with dst: + src.backup(dst) + dst.close() + src.close() + + if sqlite_temp_loc.is_file(): + STORAGE_LOGGER.info(f'Dumped the SQLite database to {str(sqlite_temp_loc)}') + else: + STORAGE_LOGGER.error(f"'{str(sqlite_temp_loc)}' was not created.") + return False + + # step 3: transfer the SQLITE database file + success = self._call_rsync(rsync_args, sqlite_temp_loc, location, remote=remote, link_dest=prev_backup) + if not success: + return False + + # step 4: transfer the packed files + packs_path = container_path / 'packs' + success = self._call_rsync( + rsync_args, packs_path, location, remote=remote, link_dest=prev_backup / 'packs' if prev_backup else None + ) + if not success: + return False + + # step 5: transfer anything else in the container folder + success = self._call_rsync( + rsync_args + [ + '--exclude', + 'loose', + '--exclude', + 'packs.idx', + '--exclude', + 'packs', + ], + container_path, + location, + link_dest=prev_backup, + remote=remote, + src_trailing_slash=True + ) + if not success: + return False + + return True + + def _run_bash_cmd(self, args: list, remote: Optional[str] = None, shell: bool = False, suppress_log: bool = False): + import subprocess + all_args = args[:] + if remote: + all_args = ['ssh', remote] + all_args + try: + subprocess.run(all_args, check=True, shell=shell) + except subprocess.CalledProcessError as exc: + if not suppress_log: + STORAGE_LOGGER.error(f'{all_args}: {exc}') + return False + return True + + def backup( # pylint: disable=too-many-locals, too-many-return-statements, too-many-branches, too-many-statements + self, + path: pathlib.Path, + remote: Optional[str] = None, + prev_backup: Optional[pathlib.Path] = None, + pg_dump_exec: str = 'pg_dump', + rsync_exec: str = 'rsync' + ) -> bool: + """Create a backup of the postgres database and disk-objectstore to the provided path. + + :param path: + Path to where the backup will be created. If 'remote' is specified, must be an absolute path, + otherwise can be relative. + + :param remote: + Remote host of the backup location. 'ssh' executable is called via subprocess and therefore remote + hosts configured for it are supported (e.g. via .ssh/config file). + + :param prev_backup: + Path to the previous backup. Rsync calls will be hard-linked to this path, making the backup + incremental and efficient. + + :param pg_dump_exec: + Path to the `pg_dump` executable. + + :param rsync_exec: + Path to the `rsync` executable. + + :return: + True is successful and False if unsuccessful. + """ + + from datetime import datetime + import os + import shutil + import subprocess + import tempfile + + from aiida.common import exceptions + from aiida.common.exceptions import LockedProfileError + from aiida.manage.configuration import get_config + from aiida.manage.profile_access import ProfileAccessManager + + if remote: + # check if accessible + success = self._run_bash_cmd(['exit'], remote=remote) + if not success: + STORAGE_LOGGER.error(f"Remote '{remote}' is not accessible!") + return False + STORAGE_LOGGER.info(f"Remote '{remote}' is accessible!") + + # check if the specified executables are found + for exe in [pg_dump_exec, rsync_exec]: + if shutil.which(exe) is None: + STORAGE_LOGGER.error(f"executable '{exe}' not found!") + return False + + # subprocess arguments shared by all rsync calls: + rsync_args = [rsync_exec, '-azh', '-vv', '--no-whole-file'] + + cfg = self._profile.storage_config + + # check that 'path' doesn't exist + success = self._run_bash_cmd([f'[ ! -e "{str(path)}" ]'], + remote=remote, + shell=remote is None, + suppress_log=True) + if not success: + # path exists, check if it's an empty folder + success = self._run_bash_cmd([f'[ -d "{str(path)}" ] && [ -z "$(ls -A "{str(path)}")" ]'], + remote=remote, + shell=remote is None) + if not success: + # it's not an empty folder, so stop the backup + STORAGE_LOGGER.error(f"The path '{str(path)}' exists and is not empty!") + return False + + # check that the AiiDA profile is not locked and request access for the duration of this backup process + # (locked means that possibly a maintenance operation is running that could interfere with the backup) + try: + ProfileAccessManager(self._profile).request_access() + except LockedProfileError: + STORAGE_LOGGER.error('The profile is locked!') + return False + + # step 1: first run the storage maintenance version that can safely be performed while aiida is running + self.maintain(full=False, compress=True) + + # step 2: dump the PostgreSQL database into a temporary directory + with tempfile.TemporaryDirectory() as temp_dir_name: + psql_temp_loc = pathlib.Path(temp_dir_name) / 'db.psql' + + env = os.environ.copy() + env['PGPASSWORD'] = cfg['database_password'] + cmd = [ + pg_dump_exec, f'--host={cfg["database_hostname"]}', f'--port={cfg["database_port"]}', + f'--dbname={cfg["database_name"]}', f'--username={cfg["database_username"]}', '--no-password', + '--format=p', f'--file={str(psql_temp_loc)}' + ] + try: + subprocess.run(cmd, check=True, env=env) + except subprocess.CalledProcessError as exc: + STORAGE_LOGGER.error(f'pg_dump: {exc}') + return False + + if psql_temp_loc.is_file(): + STORAGE_LOGGER.info(f'Dumped the PostgreSQL database to {str(psql_temp_loc)}') + else: + STORAGE_LOGGER.error(f"'{str(psql_temp_loc)}' was not created.") + return False + + # step 3: transfer the PostgreSQL database file + success = self._call_rsync( + rsync_args, psql_temp_loc, path, link_dest=prev_backup, remote=remote, dest_trailing_slash=True + ) + if not success: + return False + + # step 4: back up the disk-objectstore + success = self._backup_dos( + path / 'container', + rsync_args, + remote=remote, + prev_backup=prev_backup / 'container' if prev_backup else None + ) + if not success: + return False + + # step 6: back up aiida config.json file + try: + config = get_config() + success = self._call_rsync(rsync_args, pathlib.Path(config.filepath), path, remote=remote) + if not success: + return False + except (exceptions.MissingConfigurationError, exceptions.ConfigurationError): + STORAGE_LOGGER.info('aiida config.json not found!') + + # step 5: write a file including date that signifies the backup completed successfully + success = self._run_bash_cmd(['touch', str(path / f'COMPLETED_{datetime.today().isoformat()}')], remote=remote) + if not success: + return False + + STORAGE_LOGGER.info(f"Success! Backup completed to {f'{remote}:' if remote else ''}{str(path)}") + return True + + def backup_auto( + self, + path: pathlib.Path, + remote: Optional[str] = None, + pg_dump_exec: str = 'pg_dump', + rsync_exec: str = 'rsync' + ): + """Create a backup of the AiiDA profile data, managing live and previous backup folders automatically + + The running backup is done to `/live-backup`. When it completes, it is moved to + the final path: `/last-backup`. This done so that the last backup wouldn't be + corrupted, in case the live one crashes or gets interrupted. Rsync `link-dest` is used between + the two folders to keep the backups incremental and performant. + + :param path: + Path to where the backup will be created. If 'remote' is specified, must be an absolute path, + otherwise can be relative. + + :param remote: + Remote host of the backup location. 'ssh' executable is called via subprocess and therefore remote + hosts configured for it are supported (e.g. via .ssh/config file). + + :param pg_dump_exec: + Path to the `pg_dump` executable. + + :param rsync_exec: + Path to the `rsync` executable. + + :return: + True is successful and False if unsuccessful. + """ + + live_folder = path / 'live_backup' + final_folder = path / 'last-backup' + + # does previous backup exist? + prev_exists = self._run_bash_cmd([f'[ -d "{str(final_folder)}" ]'], + remote=remote, + shell=remote is None, + suppress_log=True) + + success = self.backup( + live_folder, + remote=remote, + prev_backup=final_folder if prev_exists else None, + pg_dump_exec=pg_dump_exec, + rsync_exec=rsync_exec + ) + if not success: + return False + + # move live-backup -> last-backup in a safe manner + # (such that if the process stops at any point, that we wouldn't lose data) + # step 1: last-backup -> last-backup-old + if prev_exists: + success = self._run_bash_cmd(['mv', str(final_folder), str(final_folder) + '-old'], remote=remote) + if not success: + return False + # step 2: live-backup -> last-backup + success = self._run_bash_cmd(['mv', str(live_folder), str(final_folder)], remote=remote) + if not success: + return False + # step 3: remote last-backup-old + if prev_exists: + success = self._run_bash_cmd(['rm', '-rf', str(final_folder) + '-old'], remote=remote) + if not success: + return False + + STORAGE_LOGGER.info(f"Backup moved from '{str(live_folder)}' to '{str(final_folder)}'.") + return True From 7c277d431ef44a796609dfcc5fd427c5b02bb900 Mon Sep 17 00:00:00 2001 From: Kristjan Eimre Date: Thu, 7 Sep 2023 19:49:57 +0200 Subject: [PATCH 03/59] reorganize; separate utility functions to backup_utils --- aiida/storage/psql_dos/backup_utils.py | 115 ++++++++++ src/aiida/cmdline/commands/cmd_storage.py | 47 +++- .../orm/implementation/storage_backend.py | 33 +++ src/aiida/storage/psql_dos/backend.py | 210 +++++++----------- 4 files changed, 266 insertions(+), 139 deletions(-) create mode 100644 aiida/storage/psql_dos/backup_utils.py diff --git a/aiida/storage/psql_dos/backup_utils.py b/aiida/storage/psql_dos/backup_utils.py new file mode 100644 index 0000000000..4f66f9b330 --- /dev/null +++ b/aiida/storage/psql_dos/backup_utils.py @@ -0,0 +1,115 @@ +# -*- coding: utf-8 -*- +########################################################################### +# Copyright (c), The AiiDA team. All rights reserved. # +# This file is part of the AiiDA code. # +# # +# The code is hosted on GitHub at https://github.com/aiidateam/aiida-core # +# For further information on the license, see the LICENSE.txt file # +# For further information please visit http://www.aiida.net # +########################################################################### +# pylint: disable=import-error,no-name-in-module +"""Utility functions for running the psql_dos backend backups.""" + +import logging +import pathlib +import subprocess +from typing import Optional + +from aiida.storage.log import STORAGE_LOGGER + + +def run_cmd(args: list, remote: Optional[str] = None, check: bool = True, logger: logging.Logger = STORAGE_LOGGER): + """ + Run a command locally or remotely. + """ + all_args = args[:] + if remote: + all_args = ['ssh', remote] + all_args + + try: + res = subprocess.run(all_args, capture_output=True, text=True, check=check) + except subprocess.CalledProcessError as exc: + logger.error(exc) + return False + + logger.info(f'stdout: {all_args}\n{res.stdout}') + logger.info(f'stderr: {all_args}\n{res.stderr}') + + success = not bool(res.returncode) + + return success + + +def check_path_exists(path, remote: Optional[str] = None): + cmd = ['[', '-e', str(path), ']'] + return run_cmd(cmd, remote=remote, check=False) + + +def check_path_is_empty_folder(path, remote: Optional[str] = None): + cmd = ['[', '-d', str(path), ']', '&&', '[', '-z', f'$(ls -A "{str(path)}")', ']'] + return run_cmd(cmd, remote=remote, check=False) + + +def call_rsync( + args: list, + src: pathlib.Path, + dest: pathlib.Path, + link_dest: Optional[pathlib.Path] = None, + remote: Optional[str] = None, + src_trailing_slash: bool = False, + dest_trailing_slash: bool = False, + logger: logging.Logger = STORAGE_LOGGER +) -> bool: + """Call rsync with specified arguments and handle possible errors & stdout/stderr + + :param link_dest: + Path to the hardlinked files location (previous backup). + + :param src_trailing_slash: + Add a trailing slash to the source path. This makes rsync copy the contents + of the folder instead of the folder itself. + + :param dest_trailing_slash: + Add a trailing slash to the destination path. This makes rsync interpret the + destination as a folder and create it if it doesn't exists. + + :return: + True if successful and False if unsuccessful. + """ + + all_args = args[:] + if link_dest: + if not remote: + # for local paths, use resolve() to get absolute path + link_dest_str = str(link_dest.resolve()) + else: + # for remote paths, we require absolute paths anyways + link_dest_str = str(link_dest) + all_args += [f'--link-dest={link_dest_str}'] + + if src_trailing_slash: + all_args += [str(src) + '/'] + else: + all_args += [str(src)] + + dest_str = str(dest) + if dest_trailing_slash: + dest_str += '/' + + if not remote: + all_args += [dest_str] + else: + all_args += [f'{remote}:{dest_str}'] + + try: + res = subprocess.run(all_args, capture_output=True, text=True, check=True) + except subprocess.CalledProcessError as exc: + logger.error(exc) + return False + + logger.info(f'stdout: {all_args}\n{res.stdout}') + logger.info(f'stderr: {all_args}\n{res.stderr}') + + success = not bool(res.returncode) + + return success diff --git a/src/aiida/cmdline/commands/cmd_storage.py b/src/aiida/cmdline/commands/cmd_storage.py index 9e223d2fba..66a3162e8d 100644 --- a/src/aiida/cmdline/commands/cmd_storage.py +++ b/src/aiida/cmdline/commands/cmd_storage.py @@ -167,20 +167,47 @@ def storage_maintain(ctx, full, no_repack, force, dry_run, compress): @verdi_storage.command('backup') -@click.option('--path', type=click.Path(), required=True, help='Specify the backup location.') +@click.option( + '--path', + type=click.Path(), + required=True, + help=( + "Path to where the backup will be created. If 'remote' is specified, must be an absolute path, " + 'otherwise can be relative.' + ) +) @click.option( '--remote', type=click.STRING, default=None, - help='Specify remote host for backup location. If set, path needs to be absolute.' + help=( + "Remote host of the backup location. 'ssh' executable is called via subprocess and therefore remote" + 'hosts configured for it are supported (e.g. via .ssh/config file).' + ) +) +@click.option( + '--prev_backup', + type=click.Path(), + default=None, + help=( + 'Path to the previous backup. Rsync calls will be hard-linked to this path, making the backup' + 'incremental and efficient. If this is specified, the automatic folder management is not used.' + ) +) +@click.option( + '--pg_dump_exec', type=click.STRING, default='pg_dump', help="Specify the 'pg_dump' executable, if not in PATH." ) @click.option( - '--pg_dump_exec', type=click.STRING, default='pg_dump', help="Specify the 'pg_dump' executable, if needed." + '--rsync_exec', type=click.STRING, default='rsync', help="Specify the 'rsync' executable, if not in PATH." ) -@click.option('--rsync_exec', type=click.STRING, default='rsync', help="Specify the 'rsync' executable, if needed.") @decorators.with_dbenv() -def storage_backup(path, remote, pg_dump_exec, rsync_exec): - """Create a backup of the profile data.""" +def storage_backup(path, remote, prev_backup, pg_dump_exec, rsync_exec): + """Create a backup of the profile data. + + By default, automatically manages incremental/delta backup: creates a subfolder in the specified path + and if the subfolder already exists, creates an incremental backup from it. The 'prev_backup' argument + disables this automatic management. + """ import pathlib from aiida.manage.manager import get_manager @@ -188,4 +215,10 @@ def storage_backup(path, remote, pg_dump_exec, rsync_exec): manager = get_manager() storage = manager.get_profile_storage() - storage.backup_auto(pathlib.Path(path), remote=remote, pg_dump_exec=pg_dump_exec, rsync_exec=rsync_exec) + storage.backup( + pathlib.Path(path), + remote=remote, + prev_backup=pathlib.Path(prev_backup) if prev_backup else None, + pg_dump=pg_dump_exec, + rsync=rsync_exec + ) diff --git a/src/aiida/orm/implementation/storage_backend.py b/src/aiida/orm/implementation/storage_backend.py index c438ebe1ec..85a5b25676 100644 --- a/src/aiida/orm/implementation/storage_backend.py +++ b/src/aiida/orm/implementation/storage_backend.py @@ -10,6 +10,7 @@ from __future__ import annotations import abc +import pathlib from typing import TYPE_CHECKING, Any, ContextManager, List, Optional, Sequence, TypeVar, Union if TYPE_CHECKING: @@ -304,6 +305,38 @@ def maintain(self, full: bool = False, dry_run: bool = False, **kwargs) -> None: :param dry_run: flag to only print the actions that would be taken without actually executing them. """ + @abc.abstractmethod + def backup( + self, + path: pathlib.Path, + remote: Optional[str] = None, + prev_backup: Optional[pathlib.Path] = None, + **kwargs + ) -> bool: + """Create a backup of the storage contents. + + By default, automatically manages incremental/delta backup: creates a subfolder in the specified path + and if the subfolder already exists, creates an incremental backup from it. + + :param path: + Path to where the backup will be created. If 'remote' is specified, must be an absolute path, + otherwise can be relative. + + :param remote: + Remote host of the backup location. 'ssh' executable is called via subprocess and therefore remote + hosts configured for it are supported (e.g. via .ssh/config file). + + :param prev_backup: + Path to the previous backup. Rsync calls will be hard-linked to this path, making the backup + incremental and efficient. If this is specified, the automatic folder management is not used. + + :param kwargs: + * Executable paths, if not default. + + :return: + True is successful and False if unsuccessful. + """ + def get_info(self, detailed: bool = False) -> dict: """Return general information on the storage. diff --git a/src/aiida/storage/psql_dos/backend.py b/src/aiida/storage/psql_dos/backend.py index acde748afa..d53c008fe6 100644 --- a/src/aiida/storage/psql_dos/backend.py +++ b/src/aiida/storage/psql_dos/backend.py @@ -23,6 +23,7 @@ from aiida.orm.entities import EntityTypes from aiida.orm.implementation import BackendEntity, StorageBackend from aiida.storage.log import STORAGE_LOGGER +from aiida.storage.psql_dos import backup_utils from aiida.storage.psql_dos.migrator import REPOSITORY_UUID_KEY, PsqlDosMigrator from aiida.storage.psql_dos.models import base @@ -478,67 +479,6 @@ def get_info(self, detailed: bool = False) -> dict: results['repository'] = self.get_repository().get_info(detailed) return results - def _call_rsync( - self, - args: list, - src: pathlib.Path, - dest: pathlib.Path, - link_dest: Optional[pathlib.Path] = None, - remote: Optional[str] = None, - src_trailing_slash: bool = False, - dest_trailing_slash: bool = False - ) -> bool: - """Call rsync with specified arguments and handle possible errors & stdout/stderr - - :param link_dest: - Path to the hardlinked files location (previous backup). - - :param src_trailing_slash: - Add a trailing slash to the source path. This makes rsync copy the contents - of the folder instead of the folder itself. - - :param dest_trailing_slash: - Add a trailing slash to the destination path. This makes rsync interpret the - destination as a folder and create it if it doesn't exists. - - :return: - True if successful and False if unsuccessful. - """ - import subprocess - - all_args = args[:] - if link_dest: - if not remote: - # for local paths, use resolve() to get absolute path - link_dest_str = str(link_dest.resolve()) - else: - # for remote paths, we require absolute paths anyways - link_dest_str = str(link_dest) - all_args += [f'--link-dest={link_dest_str}'] - - if src_trailing_slash: - all_args += [str(src) + '/'] - else: - all_args += [str(src)] - - dest_str = str(dest) - if dest_trailing_slash: - dest_str += '/' - - if not remote: - all_args += [dest_str] - else: - all_args += [f'{remote}:{dest_str}'] - - try: - res = subprocess.run(all_args, check=True, capture_output=True) - STORAGE_LOGGER.debug(f"stdout: {all_args}\n{res.stdout.decode('utf-8')}") - STORAGE_LOGGER.debug(f"stderr: {all_args}\n{res.stderr.decode('utf-8')}") - except subprocess.CalledProcessError as exc: - STORAGE_LOGGER.error(f'rsync: {exc}') - return False - return True - def _backup_dos( self, location: pathlib.Path, @@ -563,7 +503,7 @@ def _backup_dos( # step 1: loose files loose_path = container_path / 'loose' - success = self._call_rsync( + success = backup_utils.call_rsync( rsync_args, loose_path, location, remote=remote, link_dest=prev_backup / 'loose' if prev_backup else None ) if not success: @@ -593,20 +533,22 @@ def _backup_dos( return False # step 3: transfer the SQLITE database file - success = self._call_rsync(rsync_args, sqlite_temp_loc, location, remote=remote, link_dest=prev_backup) + success = backup_utils.call_rsync( + rsync_args, sqlite_temp_loc, location, remote=remote, link_dest=prev_backup + ) if not success: return False # step 4: transfer the packed files packs_path = container_path / 'packs' - success = self._call_rsync( + success = backup_utils.call_rsync( rsync_args, packs_path, location, remote=remote, link_dest=prev_backup / 'packs' if prev_backup else None ) if not success: return False # step 5: transfer anything else in the container folder - success = self._call_rsync( + success = backup_utils.call_rsync( rsync_args + [ '--exclude', 'loose', @@ -626,26 +568,12 @@ def _backup_dos( return True - def _run_bash_cmd(self, args: list, remote: Optional[str] = None, shell: bool = False, suppress_log: bool = False): - import subprocess - all_args = args[:] - if remote: - all_args = ['ssh', remote] + all_args - try: - subprocess.run(all_args, check=True, shell=shell) - except subprocess.CalledProcessError as exc: - if not suppress_log: - STORAGE_LOGGER.error(f'{all_args}: {exc}') - return False - return True - - def backup( # pylint: disable=too-many-locals, too-many-return-statements, too-many-branches, too-many-statements + def _backup( # pylint: disable=too-many-locals, too-many-return-statements, too-many-branches, too-many-statements self, path: pathlib.Path, remote: Optional[str] = None, prev_backup: Optional[pathlib.Path] = None, - pg_dump_exec: str = 'pg_dump', - rsync_exec: str = 'rsync' + **kwargs ) -> bool: """Create a backup of the postgres database and disk-objectstore to the provided path. @@ -661,11 +589,8 @@ def backup( # pylint: disable=too-many-locals, too-many-return-statements, too-m Path to the previous backup. Rsync calls will be hard-linked to this path, making the backup incremental and efficient. - :param pg_dump_exec: - Path to the `pg_dump` executable. - - :param rsync_exec: - Path to the `rsync` executable. + :param kwargs: + * Executable paths if not default: 'pg_dump', 'rsync' :return: True is successful and False if unsuccessful. @@ -684,11 +609,14 @@ def backup( # pylint: disable=too-many-locals, too-many-return-statements, too-m if remote: # check if accessible - success = self._run_bash_cmd(['exit'], remote=remote) + success = backup_utils.run_cmd(['exit'], remote=remote) if not success: STORAGE_LOGGER.error(f"Remote '{remote}' is not accessible!") return False - STORAGE_LOGGER.info(f"Remote '{remote}' is accessible!") + STORAGE_LOGGER.report(f"Remote '{remote}' is accessible!") + + pg_dump_exec = kwargs.get('pg_dump', 'pg_dump') + rsync_exec = kwargs.get('rsync', 'rsync') # check if the specified executables are found for exe in [pg_dump_exec, rsync_exec]: @@ -701,19 +629,17 @@ def backup( # pylint: disable=too-many-locals, too-many-return-statements, too-m cfg = self._profile.storage_config - # check that 'path' doesn't exist - success = self._run_bash_cmd([f'[ ! -e "{str(path)}" ]'], - remote=remote, - shell=remote is None, - suppress_log=True) - if not success: - # path exists, check if it's an empty folder - success = self._run_bash_cmd([f'[ -d "{str(path)}" ] && [ -z "$(ls -A "{str(path)}")" ]'], - remote=remote, - shell=remote is None) + path_exists = backup_utils.check_path_exists(path, remote) + + if path_exists: + if not backup_utils.check_path_is_empty_folder(path, remote): + STORAGE_LOGGER.error(f"The path '{str(path)}' exists and is not an empty folder!") + return False + else: + # path doesn't exist, check if it can be created + success = backup_utils.run_cmd(['mkdir', str(path)], remote=remote) if not success: - # it's not an empty folder, so stop the backup - STORAGE_LOGGER.error(f"The path '{str(path)}' exists and is not empty!") + STORAGE_LOGGER.error(f"Couldn't access/create '{str(path)}'!") return False # check that the AiiDA profile is not locked and request access for the duration of this backup process @@ -751,7 +677,7 @@ def backup( # pylint: disable=too-many-locals, too-many-return-statements, too-m return False # step 3: transfer the PostgreSQL database file - success = self._call_rsync( + success = backup_utils.call_rsync( rsync_args, psql_temp_loc, path, link_dest=prev_backup, remote=remote, dest_trailing_slash=True ) if not success: @@ -770,27 +696,22 @@ def backup( # pylint: disable=too-many-locals, too-many-return-statements, too-m # step 6: back up aiida config.json file try: config = get_config() - success = self._call_rsync(rsync_args, pathlib.Path(config.filepath), path, remote=remote) + success = backup_utils.call_rsync(rsync_args, pathlib.Path(config.filepath), path, remote=remote) if not success: return False except (exceptions.MissingConfigurationError, exceptions.ConfigurationError): - STORAGE_LOGGER.info('aiida config.json not found!') + STORAGE_LOGGER.warning('aiida config.json not found!') # step 5: write a file including date that signifies the backup completed successfully - success = self._run_bash_cmd(['touch', str(path / f'COMPLETED_{datetime.today().isoformat()}')], remote=remote) + success = backup_utils.run_cmd(['touch', str(path / f'COMPLETED_{datetime.today().isoformat()}')], + remote=remote) if not success: return False - STORAGE_LOGGER.info(f"Success! Backup completed to {f'{remote}:' if remote else ''}{str(path)}") + STORAGE_LOGGER.report(f"Success! Backup completed to {f'{remote}:' if remote else ''}{str(path)}") return True - def backup_auto( - self, - path: pathlib.Path, - remote: Optional[str] = None, - pg_dump_exec: str = 'pg_dump', - rsync_exec: str = 'rsync' - ): + def _backup_auto_folders(self, path: pathlib.Path, remote: Optional[str] = None, **kwargs): """Create a backup of the AiiDA profile data, managing live and previous backup folders automatically The running backup is done to `/live-backup`. When it completes, it is moved to @@ -806,11 +727,8 @@ def backup_auto( Remote host of the backup location. 'ssh' executable is called via subprocess and therefore remote hosts configured for it are supported (e.g. via .ssh/config file). - :param pg_dump_exec: - Path to the `pg_dump` executable. - - :param rsync_exec: - Path to the `rsync` executable. + :param kwargs: + * Executable paths if not default: 'pg_dump', 'rsync' :return: True is successful and False if unsuccessful. @@ -820,18 +738,9 @@ def backup_auto( final_folder = path / 'last-backup' # does previous backup exist? - prev_exists = self._run_bash_cmd([f'[ -d "{str(final_folder)}" ]'], - remote=remote, - shell=remote is None, - suppress_log=True) + prev_exists = backup_utils.check_path_exists(final_folder, remote) - success = self.backup( - live_folder, - remote=remote, - prev_backup=final_folder if prev_exists else None, - pg_dump_exec=pg_dump_exec, - rsync_exec=rsync_exec - ) + success = self._backup(live_folder, remote=remote, prev_backup=final_folder if prev_exists else None, **kwargs) if not success: return False @@ -839,18 +748,55 @@ def backup_auto( # (such that if the process stops at any point, that we wouldn't lose data) # step 1: last-backup -> last-backup-old if prev_exists: - success = self._run_bash_cmd(['mv', str(final_folder), str(final_folder) + '-old'], remote=remote) + success = backup_utils.run_cmd(['mv', str(final_folder), str(final_folder) + '-old'], remote=remote) if not success: return False # step 2: live-backup -> last-backup - success = self._run_bash_cmd(['mv', str(live_folder), str(final_folder)], remote=remote) + success = backup_utils.run_cmd(['mv', str(live_folder), str(final_folder)], remote=remote) if not success: return False # step 3: remote last-backup-old if prev_exists: - success = self._run_bash_cmd(['rm', '-rf', str(final_folder) + '-old'], remote=remote) + success = backup_utils.run_cmd(['rm', '-rf', str(final_folder) + '-old'], remote=remote) if not success: return False - STORAGE_LOGGER.info(f"Backup moved from '{str(live_folder)}' to '{str(final_folder)}'.") + STORAGE_LOGGER.report(f"Backup moved from '{str(live_folder)}' to '{str(final_folder)}'.") return True + + def backup( # pylint: disable=too-many-locals, too-many-return-statements, too-many-branches, too-many-statements + self, + path: pathlib.Path, + remote: Optional[str] = None, + prev_backup: Optional[pathlib.Path] = None, + **kwargs + ) -> bool: + """Create a backup of the postgres database and disk-objectstore. + + By default, automatically manages incremental/delta backup: creates a subfolder in the specified path + and if the subfolder already exists, creates an incremental backup from it. + + :param path: + Path to where the backup will be created. If 'remote' is specified, must be an absolute path, + otherwise can be relative. + + :param remote: + Remote host of the backup location. 'ssh' executable is called via subprocess and therefore remote + hosts configured for it are supported (e.g. via .ssh/config file). + + :param prev_backup: + Path to the previous backup. Rsync calls will be hard-linked to this path, making the backup + incremental and efficient. If this is specified, the automatic folder management is not used. + + :param kwargs: + * Executable paths if not default: 'pg_dump', 'rsync' + + :return: + True is successful and False if unsuccessful. + """ + + if prev_backup: + success = self._backup(path, remote=remote, prev_backup=prev_backup, **kwargs) + else: + success = self._backup_auto_folders(path, remote=remote, **kwargs) + return success From d7fa50832dee62c8ec3b382c9ad2057e967e0205 Mon Sep 17 00:00:00 2001 From: Kristjan Eimre Date: Thu, 2 Nov 2023 12:08:01 +0100 Subject: [PATCH 04/59] use backup utilities from disk_objectstore --- aiida/storage/psql_dos/backup_utils.py | 115 -------- docs/source/reference/command_line.rst | 2 +- src/aiida/cmdline/commands/cmd_storage.py | 85 +++--- .../orm/implementation/storage_backend.py | 16 +- src/aiida/storage/psql_dos/backend.py | 259 +----------------- 5 files changed, 63 insertions(+), 414 deletions(-) delete mode 100644 aiida/storage/psql_dos/backup_utils.py diff --git a/aiida/storage/psql_dos/backup_utils.py b/aiida/storage/psql_dos/backup_utils.py deleted file mode 100644 index 4f66f9b330..0000000000 --- a/aiida/storage/psql_dos/backup_utils.py +++ /dev/null @@ -1,115 +0,0 @@ -# -*- coding: utf-8 -*- -########################################################################### -# Copyright (c), The AiiDA team. All rights reserved. # -# This file is part of the AiiDA code. # -# # -# The code is hosted on GitHub at https://github.com/aiidateam/aiida-core # -# For further information on the license, see the LICENSE.txt file # -# For further information please visit http://www.aiida.net # -########################################################################### -# pylint: disable=import-error,no-name-in-module -"""Utility functions for running the psql_dos backend backups.""" - -import logging -import pathlib -import subprocess -from typing import Optional - -from aiida.storage.log import STORAGE_LOGGER - - -def run_cmd(args: list, remote: Optional[str] = None, check: bool = True, logger: logging.Logger = STORAGE_LOGGER): - """ - Run a command locally or remotely. - """ - all_args = args[:] - if remote: - all_args = ['ssh', remote] + all_args - - try: - res = subprocess.run(all_args, capture_output=True, text=True, check=check) - except subprocess.CalledProcessError as exc: - logger.error(exc) - return False - - logger.info(f'stdout: {all_args}\n{res.stdout}') - logger.info(f'stderr: {all_args}\n{res.stderr}') - - success = not bool(res.returncode) - - return success - - -def check_path_exists(path, remote: Optional[str] = None): - cmd = ['[', '-e', str(path), ']'] - return run_cmd(cmd, remote=remote, check=False) - - -def check_path_is_empty_folder(path, remote: Optional[str] = None): - cmd = ['[', '-d', str(path), ']', '&&', '[', '-z', f'$(ls -A "{str(path)}")', ']'] - return run_cmd(cmd, remote=remote, check=False) - - -def call_rsync( - args: list, - src: pathlib.Path, - dest: pathlib.Path, - link_dest: Optional[pathlib.Path] = None, - remote: Optional[str] = None, - src_trailing_slash: bool = False, - dest_trailing_slash: bool = False, - logger: logging.Logger = STORAGE_LOGGER -) -> bool: - """Call rsync with specified arguments and handle possible errors & stdout/stderr - - :param link_dest: - Path to the hardlinked files location (previous backup). - - :param src_trailing_slash: - Add a trailing slash to the source path. This makes rsync copy the contents - of the folder instead of the folder itself. - - :param dest_trailing_slash: - Add a trailing slash to the destination path. This makes rsync interpret the - destination as a folder and create it if it doesn't exists. - - :return: - True if successful and False if unsuccessful. - """ - - all_args = args[:] - if link_dest: - if not remote: - # for local paths, use resolve() to get absolute path - link_dest_str = str(link_dest.resolve()) - else: - # for remote paths, we require absolute paths anyways - link_dest_str = str(link_dest) - all_args += [f'--link-dest={link_dest_str}'] - - if src_trailing_slash: - all_args += [str(src) + '/'] - else: - all_args += [str(src)] - - dest_str = str(dest) - if dest_trailing_slash: - dest_str += '/' - - if not remote: - all_args += [dest_str] - else: - all_args += [f'{remote}:{dest_str}'] - - try: - res = subprocess.run(all_args, capture_output=True, text=True, check=True) - except subprocess.CalledProcessError as exc: - logger.error(exc) - return False - - logger.info(f'stdout: {all_args}\n{res.stdout}') - logger.info(f'stderr: {all_args}\n{res.stderr}') - - success = not bool(res.returncode) - - return success diff --git a/docs/source/reference/command_line.rst b/docs/source/reference/command_line.rst index e1bd3c9a9d..178163732f 100644 --- a/docs/source/reference/command_line.rst +++ b/docs/source/reference/command_line.rst @@ -567,7 +567,7 @@ Below is a list with all available subcommands. --help Show this message and exit. Commands: - backup Create a backup of the profile data. + backup Create a backup of the profile data to destination location DEST, in a... info Summarise the contents of the storage. integrity Checks for the integrity of the data storage. maintain Performs maintenance tasks on the repository. diff --git a/src/aiida/cmdline/commands/cmd_storage.py b/src/aiida/cmdline/commands/cmd_storage.py index 66a3162e8d..52c2567842 100644 --- a/src/aiida/cmdline/commands/cmd_storage.py +++ b/src/aiida/cmdline/commands/cmd_storage.py @@ -14,6 +14,7 @@ from aiida.cmdline.params import options from aiida.cmdline.utils import decorators, echo from aiida.common import exceptions +from aiida.storage.log import STORAGE_LOGGER @verdi.group('storage') @@ -167,58 +168,62 @@ def storage_maintain(ctx, full, no_repack, force, dry_run, compress): @verdi_storage.command('backup') +@click.argument('dest', type=click.Path(), nargs=1) @click.option( - '--path', - type=click.Path(), - required=True, - help=( - "Path to where the backup will be created. If 'remote' is specified, must be an absolute path, " - 'otherwise can be relative.' - ) + '--keep', + default=1, + help='Number of previous backups to keep in the destination. (default: 1)', ) @click.option( - '--remote', - type=click.STRING, - default=None, - help=( - "Remote host of the backup location. 'ssh' executable is called via subprocess and therefore remote" - 'hosts configured for it are supported (e.g. via .ssh/config file).' - ) + '--pg_dump_exe', type=click.STRING, default='pg_dump', help="Specify the 'pg_dump' executable, if not in PATH." ) @click.option( - '--prev_backup', - type=click.Path(), - default=None, - help=( - 'Path to the previous backup. Rsync calls will be hard-linked to this path, making the backup' - 'incremental and efficient. If this is specified, the automatic folder management is not used.' - ) -) -@click.option( - '--pg_dump_exec', type=click.STRING, default='pg_dump', help="Specify the 'pg_dump' executable, if not in PATH." -) -@click.option( - '--rsync_exec', type=click.STRING, default='rsync', help="Specify the 'rsync' executable, if not in PATH." + '--rsync_exe', + type=click.STRING, + default='rsync', + help="Specify the 'rsync' executable, if not in PATH. Used for both local and remote destinations" ) @decorators.with_dbenv() -def storage_backup(path, remote, prev_backup, pg_dump_exec, rsync_exec): - """Create a backup of the profile data. - - By default, automatically manages incremental/delta backup: creates a subfolder in the specified path - and if the subfolder already exists, creates an incremental backup from it. The 'prev_backup' argument - disables this automatic management. +def storage_backup(dest: str, keep: int, pg_dump_exe: str, rsync_exe: str): + """Create a backup of the profile data to destination location DEST, in a subfolder + backup__ and point a symlink called `last-backup` to it. + + NOTE: This is safe to run while the AiiDA profile is being used. + + Destination (DEST) can either be a local path, or a remote destination (reachable via ssh). + In the latter case, remote destination needs to have the following syntax: + [@]: + i.e., contain the remote host name and the remote path, separated by a colon (and optionally the + remote user separated by an @ symbol). You can tune SSH parameters using the standard options given + by OpenSSH, such as adding configuration options to ~/.ssh/config (e.g. to allow for passwordless + login - recommended, since this script might ask multiple times for the password). + + NOTE: 'rsync' and other UNIX-specific commands are called, thus the command will not work on + non-UNIX environments. """ - import pathlib + from disk_objectstore import backup_utils from aiida.manage.manager import get_manager manager = get_manager() storage = manager.get_profile_storage() - storage.backup( - pathlib.Path(path), - remote=remote, - prev_backup=pathlib.Path(prev_backup) if prev_backup else None, - pg_dump=pg_dump_exec, - rsync=rsync_exec + try: + backup_utils_instance = backup_utils.BackupUtilities(dest, keep, rsync_exe, STORAGE_LOGGER) + except ValueError as exc: + click.echo(f'Error: {exc}') + return + + success = backup_utils_instance.validate_inputs(additional_exes=[pg_dump_exe]) + if not success: + click.echo('Input validation failed.') + return + + success = backup_utils_instance.backup_auto_folders( + lambda path, prev: storage.backup(backup_utils_instance, path, prev_backup=prev, pg_dump_exe=pg_dump_exe) ) + if not success: + click.echo('Error: backup failed.') + return + + click.echo(f'Success! Profile backed up to {dest}') diff --git a/src/aiida/orm/implementation/storage_backend.py b/src/aiida/orm/implementation/storage_backend.py index 85a5b25676..cebcf7fc67 100644 --- a/src/aiida/orm/implementation/storage_backend.py +++ b/src/aiida/orm/implementation/storage_backend.py @@ -13,6 +13,8 @@ import pathlib from typing import TYPE_CHECKING, Any, ContextManager, List, Optional, Sequence, TypeVar, Union +from disk_objectstore import backup_utils + if TYPE_CHECKING: from aiida.manage.configuration.profile import Profile from aiida.orm.autogroup import AutogroupManager @@ -308,31 +310,21 @@ def maintain(self, full: bool = False, dry_run: bool = False, **kwargs) -> None: @abc.abstractmethod def backup( self, + backup_utils_instance: backup_utils.BackupUtilities, path: pathlib.Path, - remote: Optional[str] = None, prev_backup: Optional[pathlib.Path] = None, - **kwargs + pg_dump_exe: str = 'pg_dump', ) -> bool: """Create a backup of the storage contents. - By default, automatically manages incremental/delta backup: creates a subfolder in the specified path - and if the subfolder already exists, creates an incremental backup from it. - :param path: Path to where the backup will be created. If 'remote' is specified, must be an absolute path, otherwise can be relative. - :param remote: - Remote host of the backup location. 'ssh' executable is called via subprocess and therefore remote - hosts configured for it are supported (e.g. via .ssh/config file). - :param prev_backup: Path to the previous backup. Rsync calls will be hard-linked to this path, making the backup incremental and efficient. If this is specified, the automatic folder management is not used. - :param kwargs: - * Executable paths, if not default. - :return: True is successful and False if unsuccessful. """ diff --git a/src/aiida/storage/psql_dos/backend.py b/src/aiida/storage/psql_dos/backend.py index d53c008fe6..50eb9f4900 100644 --- a/src/aiida/storage/psql_dos/backend.py +++ b/src/aiida/storage/psql_dos/backend.py @@ -13,6 +13,7 @@ from contextlib import contextmanager, nullcontext from typing import TYPE_CHECKING, Iterator, List, Optional, Sequence, Set, Union +from disk_objectstore import Container, backup_utils from pydantic import BaseModel, Field from sqlalchemy import column, insert, update from sqlalchemy.orm import Session, scoped_session, sessionmaker @@ -23,7 +24,6 @@ from aiida.orm.entities import EntityTypes from aiida.orm.implementation import BackendEntity, StorageBackend from aiida.storage.log import STORAGE_LOGGER -from aiida.storage.psql_dos import backup_utils from aiida.storage.psql_dos.migrator import REPOSITORY_UUID_KEY, PsqlDosMigrator from aiida.storage.psql_dos.models import base @@ -217,8 +217,6 @@ def _clear(self) -> None: ) def get_repository(self) -> 'DiskObjectStoreRepositoryBackend': - from disk_objectstore import Container - from aiida.repository.backend import DiskObjectStoreRepositoryBackend container = Container(get_filepath_container(self.profile)) @@ -479,126 +477,27 @@ def get_info(self, detailed: bool = False) -> dict: results['repository'] = self.get_repository().get_info(detailed) return results - def _backup_dos( - self, - location: pathlib.Path, - rsync_args: list, - remote: Optional[str] = None, - prev_backup: Optional[pathlib.Path] = None - ) -> bool: - """Create a backup of the disk-objectstore container - - It should be done in the following order: - 1) loose files; - 2) sqlite database; - 3) packed files. - - :return: - True is successful and False if unsuccessful. - """ - import sqlite3 - import tempfile - - container_path = get_filepath_container(self._profile) - - # step 1: loose files - loose_path = container_path / 'loose' - success = backup_utils.call_rsync( - rsync_args, loose_path, location, remote=remote, link_dest=prev_backup / 'loose' if prev_backup else None - ) - if not success: - return False - - # step 2: sqlite db - - sqlite_path = container_path / 'packs.idx' - - # make a temporary directory to dump sqlite db locally - with tempfile.TemporaryDirectory() as temp_dir_name: - sqlite_temp_loc = pathlib.Path(temp_dir_name) / 'packs.idx' - - # Safe way to make a backup of the sqlite db, while it might potentially be accessed - # https://docs.python.org/3/library/sqlite3.html#sqlite3.Connection.backup - src = sqlite3.connect(str(sqlite_path)) - dst = sqlite3.connect(str(sqlite_temp_loc)) - with dst: - src.backup(dst) - dst.close() - src.close() - - if sqlite_temp_loc.is_file(): - STORAGE_LOGGER.info(f'Dumped the SQLite database to {str(sqlite_temp_loc)}') - else: - STORAGE_LOGGER.error(f"'{str(sqlite_temp_loc)}' was not created.") - return False - - # step 3: transfer the SQLITE database file - success = backup_utils.call_rsync( - rsync_args, sqlite_temp_loc, location, remote=remote, link_dest=prev_backup - ) - if not success: - return False - - # step 4: transfer the packed files - packs_path = container_path / 'packs' - success = backup_utils.call_rsync( - rsync_args, packs_path, location, remote=remote, link_dest=prev_backup / 'packs' if prev_backup else None - ) - if not success: - return False - - # step 5: transfer anything else in the container folder - success = backup_utils.call_rsync( - rsync_args + [ - '--exclude', - 'loose', - '--exclude', - 'packs.idx', - '--exclude', - 'packs', - ], - container_path, - location, - link_dest=prev_backup, - remote=remote, - src_trailing_slash=True - ) - if not success: - return False - return True - - def _backup( # pylint: disable=too-many-locals, too-many-return-statements, too-many-branches, too-many-statements + def backup( # pylint: disable=too-many-locals, too-many-return-statements, too-many-branches, too-many-statements self, + backup_utils_instance: backup_utils.BackupUtilities, path: pathlib.Path, - remote: Optional[str] = None, prev_backup: Optional[pathlib.Path] = None, - **kwargs + pg_dump_exe: str = 'pg_dump', ) -> bool: """Create a backup of the postgres database and disk-objectstore to the provided path. :param path: - Path to where the backup will be created. If 'remote' is specified, must be an absolute path, - otherwise can be relative. - - :param remote: - Remote host of the backup location. 'ssh' executable is called via subprocess and therefore remote - hosts configured for it are supported (e.g. via .ssh/config file). + Path to where the backup will be created. :param prev_backup: Path to the previous backup. Rsync calls will be hard-linked to this path, making the backup incremental and efficient. - :param kwargs: - * Executable paths if not default: 'pg_dump', 'rsync' - :return: True is successful and False if unsuccessful. """ - - from datetime import datetime import os - import shutil import subprocess import tempfile @@ -607,40 +506,8 @@ def _backup( # pylint: disable=too-many-locals, too-many-return-statements, too- from aiida.manage.configuration import get_config from aiida.manage.profile_access import ProfileAccessManager - if remote: - # check if accessible - success = backup_utils.run_cmd(['exit'], remote=remote) - if not success: - STORAGE_LOGGER.error(f"Remote '{remote}' is not accessible!") - return False - STORAGE_LOGGER.report(f"Remote '{remote}' is accessible!") - - pg_dump_exec = kwargs.get('pg_dump', 'pg_dump') - rsync_exec = kwargs.get('rsync', 'rsync') - - # check if the specified executables are found - for exe in [pg_dump_exec, rsync_exec]: - if shutil.which(exe) is None: - STORAGE_LOGGER.error(f"executable '{exe}' not found!") - return False - - # subprocess arguments shared by all rsync calls: - rsync_args = [rsync_exec, '-azh', '-vv', '--no-whole-file'] - cfg = self._profile.storage_config - - path_exists = backup_utils.check_path_exists(path, remote) - - if path_exists: - if not backup_utils.check_path_is_empty_folder(path, remote): - STORAGE_LOGGER.error(f"The path '{str(path)}' exists and is not an empty folder!") - return False - else: - # path doesn't exist, check if it can be created - success = backup_utils.run_cmd(['mkdir', str(path)], remote=remote) - if not success: - STORAGE_LOGGER.error(f"Couldn't access/create '{str(path)}'!") - return False + container = Container(str(get_filepath_container(self.profile))) # check that the AiiDA profile is not locked and request access for the duration of this backup process # (locked means that possibly a maintenance operation is running that could interfere with the backup) @@ -660,7 +527,7 @@ def _backup( # pylint: disable=too-many-locals, too-many-return-statements, too- env = os.environ.copy() env['PGPASSWORD'] = cfg['database_password'] cmd = [ - pg_dump_exec, f'--host={cfg["database_hostname"]}', f'--port={cfg["database_port"]}', + pg_dump_exe, f'--host={cfg["database_hostname"]}', f'--port={cfg["database_port"]}', f'--dbname={cfg["database_name"]}', f'--username={cfg["database_username"]}', '--no-password', '--format=p', f'--file={str(psql_temp_loc)}' ] @@ -677,126 +544,26 @@ def _backup( # pylint: disable=too-many-locals, too-many-return-statements, too- return False # step 3: transfer the PostgreSQL database file - success = backup_utils.call_rsync( - rsync_args, psql_temp_loc, path, link_dest=prev_backup, remote=remote, dest_trailing_slash=True + success = backup_utils_instance.call_rsync( + psql_temp_loc, path, link_dest=prev_backup, dest_trailing_slash=True ) if not success: return False # step 4: back up the disk-objectstore - success = self._backup_dos( - path / 'container', - rsync_args, - remote=remote, - prev_backup=prev_backup / 'container' if prev_backup else None + success = backup_utils_instance.backup_container( + container, path / 'container', prev_backup=prev_backup / 'container' if prev_backup else None ) if not success: return False - # step 6: back up aiida config.json file + # step 5: back up aiida config.json file try: config = get_config() - success = backup_utils.call_rsync(rsync_args, pathlib.Path(config.filepath), path, remote=remote) + success = backup_utils_instance.call_rsync(pathlib.Path(config.filepath), path) if not success: return False except (exceptions.MissingConfigurationError, exceptions.ConfigurationError): STORAGE_LOGGER.warning('aiida config.json not found!') - # step 5: write a file including date that signifies the backup completed successfully - success = backup_utils.run_cmd(['touch', str(path / f'COMPLETED_{datetime.today().isoformat()}')], - remote=remote) - if not success: - return False - - STORAGE_LOGGER.report(f"Success! Backup completed to {f'{remote}:' if remote else ''}{str(path)}") return True - - def _backup_auto_folders(self, path: pathlib.Path, remote: Optional[str] = None, **kwargs): - """Create a backup of the AiiDA profile data, managing live and previous backup folders automatically - - The running backup is done to `/live-backup`. When it completes, it is moved to - the final path: `/last-backup`. This done so that the last backup wouldn't be - corrupted, in case the live one crashes or gets interrupted. Rsync `link-dest` is used between - the two folders to keep the backups incremental and performant. - - :param path: - Path to where the backup will be created. If 'remote' is specified, must be an absolute path, - otherwise can be relative. - - :param remote: - Remote host of the backup location. 'ssh' executable is called via subprocess and therefore remote - hosts configured for it are supported (e.g. via .ssh/config file). - - :param kwargs: - * Executable paths if not default: 'pg_dump', 'rsync' - - :return: - True is successful and False if unsuccessful. - """ - - live_folder = path / 'live_backup' - final_folder = path / 'last-backup' - - # does previous backup exist? - prev_exists = backup_utils.check_path_exists(final_folder, remote) - - success = self._backup(live_folder, remote=remote, prev_backup=final_folder if prev_exists else None, **kwargs) - if not success: - return False - - # move live-backup -> last-backup in a safe manner - # (such that if the process stops at any point, that we wouldn't lose data) - # step 1: last-backup -> last-backup-old - if prev_exists: - success = backup_utils.run_cmd(['mv', str(final_folder), str(final_folder) + '-old'], remote=remote) - if not success: - return False - # step 2: live-backup -> last-backup - success = backup_utils.run_cmd(['mv', str(live_folder), str(final_folder)], remote=remote) - if not success: - return False - # step 3: remote last-backup-old - if prev_exists: - success = backup_utils.run_cmd(['rm', '-rf', str(final_folder) + '-old'], remote=remote) - if not success: - return False - - STORAGE_LOGGER.report(f"Backup moved from '{str(live_folder)}' to '{str(final_folder)}'.") - return True - - def backup( # pylint: disable=too-many-locals, too-many-return-statements, too-many-branches, too-many-statements - self, - path: pathlib.Path, - remote: Optional[str] = None, - prev_backup: Optional[pathlib.Path] = None, - **kwargs - ) -> bool: - """Create a backup of the postgres database and disk-objectstore. - - By default, automatically manages incremental/delta backup: creates a subfolder in the specified path - and if the subfolder already exists, creates an incremental backup from it. - - :param path: - Path to where the backup will be created. If 'remote' is specified, must be an absolute path, - otherwise can be relative. - - :param remote: - Remote host of the backup location. 'ssh' executable is called via subprocess and therefore remote - hosts configured for it are supported (e.g. via .ssh/config file). - - :param prev_backup: - Path to the previous backup. Rsync calls will be hard-linked to this path, making the backup - incremental and efficient. If this is specified, the automatic folder management is not used. - - :param kwargs: - * Executable paths if not default: 'pg_dump', 'rsync' - - :return: - True is successful and False if unsuccessful. - """ - - if prev_backup: - success = self._backup(path, remote=remote, prev_backup=prev_backup, **kwargs) - else: - success = self._backup_auto_folders(path, remote=remote, **kwargs) - return success From 078ad40b02f71410086616d69ff4fc7a3f322a73 Mon Sep 17 00:00:00 2001 From: Kristjan Eimre Date: Thu, 7 Dec 2023 23:18:31 +0200 Subject: [PATCH 05/59] adapt to latest disk-objectstore PR161 --- src/aiida/cmdline/commands/cmd_storage.py | 23 ++------- .../orm/implementation/storage_backend.py | 19 ++------ src/aiida/storage/psql_dos/backend.py | 48 ++++++++++--------- 3 files changed, 34 insertions(+), 56 deletions(-) diff --git a/src/aiida/cmdline/commands/cmd_storage.py b/src/aiida/cmdline/commands/cmd_storage.py index 52c2567842..13dc1107b7 100644 --- a/src/aiida/cmdline/commands/cmd_storage.py +++ b/src/aiida/cmdline/commands/cmd_storage.py @@ -7,6 +7,8 @@ # For further information please visit http://www.aiida.net # ########################################################################### """`verdi storage` commands.""" +import sys + import click from click_spinner import spinner @@ -14,7 +16,6 @@ from aiida.cmdline.params import options from aiida.cmdline.utils import decorators, echo from aiida.common import exceptions -from aiida.storage.log import STORAGE_LOGGER @verdi.group('storage') @@ -201,29 +202,15 @@ def storage_backup(dest: str, keep: int, pg_dump_exe: str, rsync_exe: str): NOTE: 'rsync' and other UNIX-specific commands are called, thus the command will not work on non-UNIX environments. """ - from disk_objectstore import backup_utils from aiida.manage.manager import get_manager manager = get_manager() storage = manager.get_profile_storage() - try: - backup_utils_instance = backup_utils.BackupUtilities(dest, keep, rsync_exe, STORAGE_LOGGER) - except ValueError as exc: - click.echo(f'Error: {exc}') - return - - success = backup_utils_instance.validate_inputs(additional_exes=[pg_dump_exe]) + success = storage.backup(dest, keep, exes={'rsync': rsync_exe, 'pg_dump': pg_dump_exe}) if not success: - click.echo('Input validation failed.') - return - - success = backup_utils_instance.backup_auto_folders( - lambda path, prev: storage.backup(backup_utils_instance, path, prev_backup=prev, pg_dump_exe=pg_dump_exe) - ) - if not success: - click.echo('Error: backup failed.') - return + click.echo('Backup was not successful.') + sys.exit(1) click.echo(f'Success! Profile backed up to {dest}') diff --git a/src/aiida/orm/implementation/storage_backend.py b/src/aiida/orm/implementation/storage_backend.py index cebcf7fc67..00654ec9e0 100644 --- a/src/aiida/orm/implementation/storage_backend.py +++ b/src/aiida/orm/implementation/storage_backend.py @@ -10,11 +10,8 @@ from __future__ import annotations import abc -import pathlib from typing import TYPE_CHECKING, Any, ContextManager, List, Optional, Sequence, TypeVar, Union -from disk_objectstore import backup_utils - if TYPE_CHECKING: from aiida.manage.configuration.profile import Profile from aiida.orm.autogroup import AutogroupManager @@ -310,24 +307,16 @@ def maintain(self, full: bool = False, dry_run: bool = False, **kwargs) -> None: @abc.abstractmethod def backup( self, - backup_utils_instance: backup_utils.BackupUtilities, - path: pathlib.Path, - prev_backup: Optional[pathlib.Path] = None, - pg_dump_exe: str = 'pg_dump', + dest: str, + keep: int, + exes: dict, ) -> bool: """Create a backup of the storage contents. - :param path: - Path to where the backup will be created. If 'remote' is specified, must be an absolute path, - otherwise can be relative. - - :param prev_backup: - Path to the previous backup. Rsync calls will be hard-linked to this path, making the backup - incremental and efficient. If this is specified, the automatic folder management is not used. - :return: True is successful and False if unsuccessful. """ + raise NotImplementedError def get_info(self, detailed: bool = False) -> dict: """Return general information on the storage. diff --git a/src/aiida/storage/psql_dos/backend.py b/src/aiida/storage/psql_dos/backend.py index 50eb9f4900..21dedc640b 100644 --- a/src/aiida/storage/psql_dos/backend.py +++ b/src/aiida/storage/psql_dos/backend.py @@ -477,14 +477,12 @@ def get_info(self, detailed: bool = False) -> dict: results['repository'] = self.get_repository().get_info(detailed) return results - - def backup( # pylint: disable=too-many-locals, too-many-return-statements, too-many-branches, too-many-statements + def _backup( self, - backup_utils_instance: backup_utils.BackupUtilities, + manager: backup_utils.BackupManager, path: pathlib.Path, prev_backup: Optional[pathlib.Path] = None, - pg_dump_exe: str = 'pg_dump', - ) -> bool: + ) -> None: """Create a backup of the postgres database and disk-objectstore to the provided path. :param path: @@ -514,13 +512,13 @@ def backup( # pylint: disable=too-many-locals, too-many-return-statements, too-m try: ProfileAccessManager(self._profile).request_access() except LockedProfileError: - STORAGE_LOGGER.error('The profile is locked!') - return False + raise backup_utils.BackupError('The profile is locked!') # step 1: first run the storage maintenance version that can safely be performed while aiida is running self.maintain(full=False, compress=True) # step 2: dump the PostgreSQL database into a temporary directory + pg_dump_exe = manager.exes['pg_dump'] with tempfile.TemporaryDirectory() as temp_dir_name: psql_temp_loc = pathlib.Path(temp_dir_name) / 'db.psql' @@ -534,36 +532,40 @@ def backup( # pylint: disable=too-many-locals, too-many-return-statements, too-m try: subprocess.run(cmd, check=True, env=env) except subprocess.CalledProcessError as exc: - STORAGE_LOGGER.error(f'pg_dump: {exc}') - return False + raise backup_utils.BackupError(f'pg_dump: {exc}') if psql_temp_loc.is_file(): STORAGE_LOGGER.info(f'Dumped the PostgreSQL database to {str(psql_temp_loc)}') else: - STORAGE_LOGGER.error(f"'{str(psql_temp_loc)}' was not created.") - return False + raise backup_utils.BackupError(f"'{str(psql_temp_loc)}' was not created.") # step 3: transfer the PostgreSQL database file - success = backup_utils_instance.call_rsync( - psql_temp_loc, path, link_dest=prev_backup, dest_trailing_slash=True - ) - if not success: - return False + manager.call_rsync(psql_temp_loc, path, link_dest=prev_backup, dest_trailing_slash=True) # step 4: back up the disk-objectstore - success = backup_utils_instance.backup_container( - container, path / 'container', prev_backup=prev_backup / 'container' if prev_backup else None + backup_utils.backup_container( + manager, container, path / 'container', prev_backup=prev_backup / 'container' if prev_backup else None ) - if not success: - return False # step 5: back up aiida config.json file try: config = get_config() - success = backup_utils_instance.call_rsync(pathlib.Path(config.filepath), path) - if not success: - return False + manager.call_rsync(pathlib.Path(config.filepath), path) except (exceptions.MissingConfigurationError, exceptions.ConfigurationError): STORAGE_LOGGER.warning('aiida config.json not found!') + def backup( + self, + dest: str, + keep: int, + exes: dict, + ) -> bool: + + try: + backup_manager = backup_utils.BackupManager(dest, STORAGE_LOGGER, exes=exes, keep=keep) + backup_manager.backup_auto_folders(lambda path, prev: self._backup(backup_manager, path, prev)) + except backup_utils.BackupError as exc: + STORAGE_LOGGER.error(f'Error: {exc}') + return False + return True From b8ddf42d409a9e91b1da01cd1627491cd9d88447 Mon Sep 17 00:00:00 2001 From: Kristjan Eimre Date: Fri, 12 Jan 2024 14:33:52 +0200 Subject: [PATCH 06/59] rm dbdump cli command --- docs/source/reference/command_line.rst | 15 --------- src/aiida/cmdline/commands/cmd_profile.py | 37 ----------------------- 2 files changed, 52 deletions(-) diff --git a/docs/source/reference/command_line.rst b/docs/source/reference/command_line.rst index 178163732f..652a223d29 100644 --- a/docs/source/reference/command_line.rst +++ b/docs/source/reference/command_line.rst @@ -575,21 +575,6 @@ Below is a list with all available subcommands. version Print the current version of the storage schema. -.. _reference:command-line:verdi-tui: - -``verdi tui`` -------------- - -.. code:: console - - Usage: [OPTIONS] - - Open Textual TUI. - - Options: - --help Show this message and exit. - - .. _reference:command-line:verdi-user: ``verdi user`` diff --git a/src/aiida/cmdline/commands/cmd_profile.py b/src/aiida/cmdline/commands/cmd_profile.py index 0aaffd41d4..8be4ed3bbf 100644 --- a/src/aiida/cmdline/commands/cmd_profile.py +++ b/src/aiida/cmdline/commands/cmd_profile.py @@ -165,40 +165,3 @@ def profile_delete(force, delete_data, profiles): get_config().delete_profile(profile.name, delete_storage=delete_data) echo.echo_success(f'Profile `{profile.name}` was deleted.') - - -@verdi_profile.command('dbdump') -@options.PROFILE(default=defaults.get_default_profile) -@click.option('--output_file', type=click.Path(), help='Specify the output file path.') -def profile_dbdump(profile, output_file): - """Dump the PostgreSQL database into a file.""" - - import os - import pathlib - import subprocess - - if not output_file: - output_file = f'{profile.name}.psql' - - output_file = pathlib.Path(output_file) - - db_config = profile.dictionary['storage']['config'] - - cmd = [ - 'pg_dump', f'--host={db_config["database_hostname"]}', f'--port={db_config["database_port"]}', - f'--dbname={db_config["database_name"]}', f'--username={db_config["database_username"]}', '--no-password', - '--format=p', f'--file={output_file}' - ] - - env = os.environ.copy() - env['PGPASSWORD'] = db_config['database_password'] - - pg_dump_output = subprocess.check_output(cmd, env=env).decode('utf-8') - - if len(pg_dump_output) > 0: - echo.echo_warning(f'Output from pg_dump: {pg_dump_output}') - - if output_file.is_file(): - echo.echo_success(f'Output written to `{output_file}`') - else: - echo.echo_error(f'Something went wrong, `{output_file}` not written.') From 839aa1cbf794e6d7e52a232cdfe88a782e282231 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Fri, 12 Jan 2024 12:10:17 +0000 Subject: [PATCH 07/59] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- src/aiida/cmdline/commands/cmd_profile.py | 42 +++++++++++++++++++++++ src/aiida/cmdline/commands/cmd_storage.py | 2 +- src/aiida/storage/psql_dos/backend.py | 16 +++++---- 3 files changed, 53 insertions(+), 7 deletions(-) diff --git a/src/aiida/cmdline/commands/cmd_profile.py b/src/aiida/cmdline/commands/cmd_profile.py index 8be4ed3bbf..435d4eca20 100644 --- a/src/aiida/cmdline/commands/cmd_profile.py +++ b/src/aiida/cmdline/commands/cmd_profile.py @@ -165,3 +165,45 @@ def profile_delete(force, delete_data, profiles): get_config().delete_profile(profile.name, delete_storage=delete_data) echo.echo_success(f'Profile `{profile.name}` was deleted.') + + +@verdi_profile.command('dbdump') +@options.PROFILE(default=defaults.get_default_profile) +@click.option('--output_file', type=click.Path(), help='Specify the output file path.') +def profile_dbdump(profile, output_file): + """Dump the PostgreSQL database into a file.""" + + import os + import pathlib + import subprocess + + if not output_file: + output_file = f'{profile.name}.psql' + + output_file = pathlib.Path(output_file) + + db_config = profile.dictionary['storage']['config'] + + cmd = [ + 'pg_dump', + f'--host={db_config["database_hostname"]}', + f'--port={db_config["database_port"]}', + f'--dbname={db_config["database_name"]}', + f'--username={db_config["database_username"]}', + '--no-password', + '--format=p', + f'--file={output_file}', + ] + + env = os.environ.copy() + env['PGPASSWORD'] = db_config['database_password'] + + pg_dump_output = subprocess.check_output(cmd, env=env).decode('utf-8') + + if len(pg_dump_output) > 0: + echo.echo_warning(f'Output from pg_dump: {pg_dump_output}') + + if output_file.is_file(): + echo.echo_success(f'Output written to `{output_file}`') + else: + echo.echo_error(f'Something went wrong, `{output_file}` not written.') diff --git a/src/aiida/cmdline/commands/cmd_storage.py b/src/aiida/cmdline/commands/cmd_storage.py index 13dc1107b7..b5ad306961 100644 --- a/src/aiida/cmdline/commands/cmd_storage.py +++ b/src/aiida/cmdline/commands/cmd_storage.py @@ -182,7 +182,7 @@ def storage_maintain(ctx, full, no_repack, force, dry_run, compress): '--rsync_exe', type=click.STRING, default='rsync', - help="Specify the 'rsync' executable, if not in PATH. Used for both local and remote destinations" + help="Specify the 'rsync' executable, if not in PATH. Used for both local and remote destinations", ) @decorators.with_dbenv() def storage_backup(dest: str, keep: int, pg_dump_exe: str, rsync_exe: str): diff --git a/src/aiida/storage/psql_dos/backend.py b/src/aiida/storage/psql_dos/backend.py index 21dedc640b..dc7020e40d 100644 --- a/src/aiida/storage/psql_dos/backend.py +++ b/src/aiida/storage/psql_dos/backend.py @@ -525,9 +525,14 @@ def _backup( env = os.environ.copy() env['PGPASSWORD'] = cfg['database_password'] cmd = [ - pg_dump_exe, f'--host={cfg["database_hostname"]}', f'--port={cfg["database_port"]}', - f'--dbname={cfg["database_name"]}', f'--username={cfg["database_username"]}', '--no-password', - '--format=p', f'--file={str(psql_temp_loc)}' + pg_dump_exe, + f'--host={cfg["database_hostname"]}', + f'--port={cfg["database_port"]}', + f'--dbname={cfg["database_name"]}', + f'--username={cfg["database_username"]}', + '--no-password', + '--format=p', + f'--file={psql_temp_loc!s}', ] try: subprocess.run(cmd, check=True, env=env) @@ -535,9 +540,9 @@ def _backup( raise backup_utils.BackupError(f'pg_dump: {exc}') if psql_temp_loc.is_file(): - STORAGE_LOGGER.info(f'Dumped the PostgreSQL database to {str(psql_temp_loc)}') + STORAGE_LOGGER.info(f'Dumped the PostgreSQL database to {psql_temp_loc!s}') else: - raise backup_utils.BackupError(f"'{str(psql_temp_loc)}' was not created.") + raise backup_utils.BackupError(f"'{psql_temp_loc!s}' was not created.") # step 3: transfer the PostgreSQL database file manager.call_rsync(psql_temp_loc, path, link_dest=prev_backup, dest_trailing_slash=True) @@ -560,7 +565,6 @@ def backup( keep: int, exes: dict, ) -> bool: - try: backup_manager = backup_utils.BackupManager(dest, STORAGE_LOGGER, exes=exes, keep=keep) backup_manager.backup_auto_folders(lambda path, prev: self._backup(backup_manager, path, prev)) From 28ea090708cef34a5fdf724a2ee04efbd6c9a633 Mon Sep 17 00:00:00 2001 From: Kristjan Eimre Date: Fri, 12 Jan 2024 15:39:00 +0200 Subject: [PATCH 08/59] implement sphuber's review --- docs/source/reference/command_line.rst | 2 +- src/aiida/cmdline/commands/cmd_storage.py | 33 +++++++++---------- src/aiida/common/exceptions.py | 5 +++ .../orm/implementation/storage_backend.py | 10 ++++-- src/aiida/storage/psql_dos/backend.py | 16 ++++----- 5 files changed, 35 insertions(+), 31 deletions(-) diff --git a/docs/source/reference/command_line.rst b/docs/source/reference/command_line.rst index 652a223d29..a50d2b29f8 100644 --- a/docs/source/reference/command_line.rst +++ b/docs/source/reference/command_line.rst @@ -567,7 +567,7 @@ Below is a list with all available subcommands. --help Show this message and exit. Commands: - backup Create a backup of the profile data to destination location DEST, in a... + backup Backup the data storage of a profile. info Summarise the contents of the storage. integrity Checks for the integrity of the data storage. maintain Performs maintenance tasks on the repository. diff --git a/src/aiida/cmdline/commands/cmd_storage.py b/src/aiida/cmdline/commands/cmd_storage.py index b5ad306961..73a66553d9 100644 --- a/src/aiida/cmdline/commands/cmd_storage.py +++ b/src/aiida/cmdline/commands/cmd_storage.py @@ -7,7 +7,6 @@ # For further information please visit http://www.aiida.net # ########################################################################### """`verdi storage` commands.""" -import sys import click from click_spinner import spinner @@ -169,7 +168,7 @@ def storage_maintain(ctx, full, no_repack, force, dry_run, compress): @verdi_storage.command('backup') -@click.argument('dest', type=click.Path(), nargs=1) +@click.argument('dest', type=click.Path(file_okay=False), nargs=1) @click.option( '--keep', default=1, @@ -184,16 +183,21 @@ def storage_maintain(ctx, full, no_repack, force, dry_run, compress): default='rsync', help="Specify the 'rsync' executable, if not in PATH. Used for both local and remote destinations", ) -@decorators.with_dbenv() -def storage_backup(dest: str, keep: int, pg_dump_exe: str, rsync_exe: str): - """Create a backup of the profile data to destination location DEST, in a subfolder - backup__ and point a symlink called `last-backup` to it. +@decorators.with_manager +@click.pass_context +def storage_backup(ctx, manager, dest: str, keep: int, pg_dump_exe: str, rsync_exe: str): + """Backup the data storage of a profile. + + The backup is created in the destination `DEST`, in a subfolder that follows the naming convention + backup__ and a symlink called `last-backup` is pointed to it. NOTE: This is safe to run while the AiiDA profile is being used. Destination (DEST) can either be a local path, or a remote destination (reachable via ssh). In the latter case, remote destination needs to have the following syntax: - [@]: + + [@]: + i.e., contain the remote host name and the remote path, separated by a colon (and optionally the remote user separated by an @ symbol). You can tune SSH parameters using the standard options given by OpenSSH, such as adding configuration options to ~/.ssh/config (e.g. to allow for passwordless @@ -203,14 +207,9 @@ def storage_backup(dest: str, keep: int, pg_dump_exe: str, rsync_exe: str): non-UNIX environments. """ - from aiida.manage.manager import get_manager - - manager = get_manager() storage = manager.get_profile_storage() - - success = storage.backup(dest, keep, exes={'rsync': rsync_exe, 'pg_dump': pg_dump_exe}) - if not success: - click.echo('Backup was not successful.') - sys.exit(1) - - click.echo(f'Success! Profile backed up to {dest}') + try: + storage.backup(dest, keep, exes={'rsync': rsync_exe, 'pg_dump': pg_dump_exe}) + except (ValueError, exceptions.StorageBackupError) as exception: + echo.echo_criticial(f'An error occurred during the backup: {exception}') + click.echo(f'Data storage of profile `{ctx.obj.profile.name}` backed up to `{dest}`') diff --git a/src/aiida/common/exceptions.py b/src/aiida/common/exceptions.py index c1250b076b..6fdd1c2620 100644 --- a/src/aiida/common/exceptions.py +++ b/src/aiida/common/exceptions.py @@ -48,6 +48,7 @@ 'OutputParsingError', 'HashingError', 'StorageMigrationError', + 'StorageBackupError', 'LockedProfileError', 'LockingProfileError', 'ClosedStorage', @@ -218,6 +219,10 @@ class StorageMigrationError(DatabaseMigrationError): """Raised if a critical error is encountered during a storage migration.""" +class StorageBackupError(AiidaException): + """Raised if a critical error is encountered during a storage backup.""" + + class DbContentError(AiidaException): """Raised when the content of the DB is not valid. This should never happen if the user does not play directly diff --git a/src/aiida/orm/implementation/storage_backend.py b/src/aiida/orm/implementation/storage_backend.py index 00654ec9e0..70f0bd13b5 100644 --- a/src/aiida/orm/implementation/storage_backend.py +++ b/src/aiida/orm/implementation/storage_backend.py @@ -310,11 +310,15 @@ def backup( dest: str, keep: int, exes: dict, - ) -> bool: + ): """Create a backup of the storage contents. - :return: - True is successful and False if unsuccessful. + :param dest: The path to the destination folder. + :param keep: The number of backups to keep in the target destination. + :param exes: Dictionary of absolute paths to executables that are required during the backup + but that are not present in the `PATH`. + :raises ValueError: If the input parameters are invalid. + :raises StorageBackupError: If an error occurred during the backup procedure. """ raise NotImplementedError diff --git a/src/aiida/storage/psql_dos/backend.py b/src/aiida/storage/psql_dos/backend.py index dc7020e40d..f34bec0d49 100644 --- a/src/aiida/storage/psql_dos/backend.py +++ b/src/aiida/storage/psql_dos/backend.py @@ -18,6 +18,7 @@ from sqlalchemy import column, insert, update from sqlalchemy.orm import Session, scoped_session, sessionmaker +from aiida.common import exceptions from aiida.common.exceptions import ClosedStorage, ConfigurationError, IntegrityError from aiida.common.log import AIIDA_LOGGER from aiida.manage.configuration.profile import Profile @@ -499,20 +500,18 @@ def _backup( import subprocess import tempfile - from aiida.common import exceptions - from aiida.common.exceptions import LockedProfileError from aiida.manage.configuration import get_config from aiida.manage.profile_access import ProfileAccessManager cfg = self._profile.storage_config - container = Container(str(get_filepath_container(self.profile))) + container = Container(get_filepath_container(self.profile)) # check that the AiiDA profile is not locked and request access for the duration of this backup process # (locked means that possibly a maintenance operation is running that could interfere with the backup) try: ProfileAccessManager(self._profile).request_access() - except LockedProfileError: - raise backup_utils.BackupError('The profile is locked!') + except exceptions.LockedProfileError as exc: + raise exceptions.StorageBackupError('The profile is locked!') from exc # step 1: first run the storage maintenance version that can safely be performed while aiida is running self.maintain(full=False, compress=True) @@ -564,12 +563,9 @@ def backup( dest: str, keep: int, exes: dict, - ) -> bool: + ): try: backup_manager = backup_utils.BackupManager(dest, STORAGE_LOGGER, exes=exes, keep=keep) backup_manager.backup_auto_folders(lambda path, prev: self._backup(backup_manager, path, prev)) except backup_utils.BackupError as exc: - STORAGE_LOGGER.error(f'Error: {exc}') - return False - - return True + raise exceptions.StorageBackupError from exc From 32a7a397a03b6bc42becc2ffb291f32be10a49df Mon Sep 17 00:00:00 2001 From: Kristjan Eimre Date: Fri, 12 Jan 2024 17:30:20 +0200 Subject: [PATCH 09/59] config.json - only the backed up profile --- src/aiida/storage/psql_dos/backend.py | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/src/aiida/storage/psql_dos/backend.py b/src/aiida/storage/psql_dos/backend.py index f34bec0d49..853306010f 100644 --- a/src/aiida/storage/psql_dos/backend.py +++ b/src/aiida/storage/psql_dos/backend.py @@ -500,7 +500,7 @@ def _backup( import subprocess import tempfile - from aiida.manage.configuration import get_config + from aiida.manage.configuration import Config, get_config from aiida.manage.profile_access import ProfileAccessManager cfg = self._profile.storage_config @@ -551,10 +551,16 @@ def _backup( manager, container, path / 'container', prev_backup=prev_backup / 'container' if prev_backup else None ) - # step 5: back up aiida config.json file + # step 5: back up aiida config.json file (strip other profiles!) try: config = get_config() - manager.call_rsync(pathlib.Path(config.filepath), path) + profile = config.get_profile(self.profile.name) # Get the profile being backed up + with tempfile.TemporaryDirectory() as tmpdir: + filepath_config = pathlib.Path(tmpdir) / 'config.json' + backup_config = Config(str(filepath_config), {}) # Create empty config at temporary file location + backup_config.add_profile(profile) # Add the profile being backed up + backup_config.store() # Write the contents to disk + manager.call_rsync(filepath_config, path) except (exceptions.MissingConfigurationError, exceptions.ConfigurationError): STORAGE_LOGGER.warning('aiida config.json not found!') From 10f25ed50d20e20cec0e9e9eeb045d62ca5ae58d Mon Sep 17 00:00:00 2001 From: Kristjan Eimre Date: Thu, 18 Jan 2024 19:31:34 +0200 Subject: [PATCH 10/59] add a minimal backup pytest --- tests/cmdline/commands/test_storage.py | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/tests/cmdline/commands/test_storage.py b/tests/cmdline/commands/test_storage.py index 0fa0238c31..9824b39ebe 100644 --- a/tests/cmdline/commands/test_storage.py +++ b/tests/cmdline/commands/test_storage.py @@ -7,6 +7,9 @@ # For further information please visit http://www.aiida.net # ########################################################################### """Tests for `verdi storage`.""" +import tempfile +from pathlib import Path + import pytest from aiida import get_profile from aiida.cmdline.commands import cmd_storage @@ -176,3 +179,13 @@ def mock_maintain(*args, **kwargs): assert ' > full: True' in message_list assert ' > do_repack: False' in message_list assert ' > dry_run: False' in message_list + + +def tests_storage_backup(run_cli_command): + """Test the ``verdi storage backup`` command.""" + with tempfile.TemporaryDirectory() as tmpdir: + result = run_cli_command(cmd_storage.storage_backup, parameters=[tmpdir]) + assert ' backed up to ' in result.output + assert result.exit_code == 0 + last_backup = Path(tmpdir) / 'last-backup' + assert last_backup.is_symlink() From 63bf76b28b4f69ba2be2d1585cf77cbf1a6a7aba Mon Sep 17 00:00:00 2001 From: Kristjan Eimre Date: Tue, 23 Jan 2024 15:02:02 +0200 Subject: [PATCH 11/59] test psql_dos backup; raise NotImplementedError for other backends --- .../orm/implementation/storage_backend.py | 5 ++-- src/aiida/storage/psql_dos/backend.py | 6 ++--- src/aiida/storage/sqlite_dos/backend.py | 10 +++++++- src/aiida/storage/sqlite_temp/backend.py | 10 +++++++- src/aiida/storage/sqlite_zip/backend.py | 8 +++++++ tests/storage/psql_dos/test_backend.py | 23 +++++++++++++++++++ 6 files changed, 54 insertions(+), 8 deletions(-) diff --git a/src/aiida/orm/implementation/storage_backend.py b/src/aiida/orm/implementation/storage_backend.py index 70f0bd13b5..907fd6480e 100644 --- a/src/aiida/orm/implementation/storage_backend.py +++ b/src/aiida/orm/implementation/storage_backend.py @@ -308,8 +308,8 @@ def maintain(self, full: bool = False, dry_run: bool = False, **kwargs) -> None: def backup( self, dest: str, - keep: int, - exes: dict, + keep: int = 1, + exes: Optional[dict] = None, ): """Create a backup of the storage contents. @@ -320,7 +320,6 @@ def backup( :raises ValueError: If the input parameters are invalid. :raises StorageBackupError: If an error occurred during the backup procedure. """ - raise NotImplementedError def get_info(self, detailed: bool = False) -> dict: """Return general information on the storage. diff --git a/src/aiida/storage/psql_dos/backend.py b/src/aiida/storage/psql_dos/backend.py index 853306010f..7aa09b6e22 100644 --- a/src/aiida/storage/psql_dos/backend.py +++ b/src/aiida/storage/psql_dos/backend.py @@ -517,7 +517,7 @@ def _backup( self.maintain(full=False, compress=True) # step 2: dump the PostgreSQL database into a temporary directory - pg_dump_exe = manager.exes['pg_dump'] + pg_dump_exe = manager.exes.get('pg_dump', 'pg_dump') with tempfile.TemporaryDirectory() as temp_dir_name: psql_temp_loc = pathlib.Path(temp_dir_name) / 'db.psql' @@ -567,8 +567,8 @@ def _backup( def backup( self, dest: str, - keep: int, - exes: dict, + keep: int = 1, + exes: Optional[dict] = None, ): try: backup_manager = backup_utils.BackupManager(dest, STORAGE_LOGGER, exes=exes, keep=keep) diff --git a/src/aiida/storage/sqlite_dos/backend.py b/src/aiida/storage/sqlite_dos/backend.py index d738c7c856..25187753f3 100644 --- a/src/aiida/storage/sqlite_dos/backend.py +++ b/src/aiida/storage/sqlite_dos/backend.py @@ -12,7 +12,7 @@ from functools import cached_property from pathlib import Path from shutil import rmtree -from typing import TYPE_CHECKING +from typing import TYPE_CHECKING, Optional from uuid import uuid4 from disk_objectstore import Container @@ -200,3 +200,11 @@ def nodes(self): @cached_property def users(self): return orm.SqliteUserCollection(self) + + def backup( + self, + dest: str, + keep: int = 1, + exes: Optional[dict] = None, + ): + raise NotImplementedError diff --git a/src/aiida/storage/sqlite_temp/backend.py b/src/aiida/storage/sqlite_temp/backend.py index 398dc01264..ea97e5ea6f 100644 --- a/src/aiida/storage/sqlite_temp/backend.py +++ b/src/aiida/storage/sqlite_temp/backend.py @@ -16,7 +16,7 @@ from contextlib import contextmanager, nullcontext from pathlib import Path from tempfile import mkdtemp -from typing import Any, BinaryIO, Iterator, Sequence +from typing import Any, BinaryIO, Iterator, Optional, Sequence from pydantic import BaseModel, Field from sqlalchemy import column, insert, update @@ -287,6 +287,14 @@ def delete(self) -> None: def delete_nodes_and_connections(self, pks_to_delete: Sequence[int]): raise NotImplementedError + def backup( + self, + dest: str, + keep: int = 1, + exes: Optional[dict] = None, + ): + raise NotImplementedError + class SandboxShaRepositoryBackend(SandboxRepositoryBackend): """A sandbox repository backend that uses the sha256 of the file as the key. diff --git a/src/aiida/storage/sqlite_zip/backend.py b/src/aiida/storage/sqlite_zip/backend.py index 62e1f080a7..429f642d2b 100644 --- a/src/aiida/storage/sqlite_zip/backend.py +++ b/src/aiida/storage/sqlite_zip/backend.py @@ -312,6 +312,14 @@ def get_info(self, detailed: bool = False) -> dict: results['repository'] = self.get_repository().get_info(detailed) return results + def backup( + self, + dest: str, + keep: int = 1, + exes: Optional[dict] = None, + ): + raise NotImplementedError + class _RoBackendRepository(AbstractRepositoryBackend): """A backend abstract for a read-only folder or zip file.""" diff --git a/tests/storage/psql_dos/test_backend.py b/tests/storage/psql_dos/test_backend.py index 01f019adf5..7cadacbc49 100644 --- a/tests/storage/psql_dos/test_backend.py +++ b/tests/storage/psql_dos/test_backend.py @@ -151,3 +151,26 @@ def test_unload_profile(): assert len(_sessions) == current_sessions - 1, str(_sessions) finally: manager.load_profile(profile_name) + + +def test_backup(): + """Test that the backup function creates all the necessary files and folders""" + + import tempfile + from pathlib import Path + + storage_backend = get_manager().get_profile_storage() + + with tempfile.TemporaryDirectory() as tmpdir: + # note: this assumes that rsync and pg_dump are in PATH + storage_backend.backup(tmpdir) + + last_backup = Path(tmpdir) / 'last-backup' + assert last_backup.is_symlink() + + # make sure the necessary files are there + # note: disk-objectstore container backup is already + # tested in its own repo + contents = [c.name for c in last_backup.iterdir()] + for name in ['config.json', 'container', 'db.psql']: + assert name in contents From db08d0c630788fecc8ab22f899db5caa793014c6 Mon Sep 17 00:00:00 2001 From: Kristjan Eimre Date: Wed, 24 Jan 2024 13:14:19 +0200 Subject: [PATCH 12/59] docs: update backup instructions --- docs/source/howto/installation.rst | 42 ++++++++++++++++++------------ 1 file changed, 25 insertions(+), 17 deletions(-) diff --git a/docs/source/howto/installation.rst b/docs/source/howto/installation.rst index 534582963a..3e427a738b 100644 --- a/docs/source/howto/installation.rst +++ b/docs/source/howto/installation.rst @@ -547,17 +547,26 @@ See the :doc:`../reference/_changelog` for a list of breaking changes. .. _how-to:installation:backup: -Backing up your installation +Backing up your data ============================ -A full backup of an AiiDA instance and AiiDA managed data requires a backup of: +The most convenient way to back up the data of a single AiiDA profile is to use -* the AiiDA configuration folder, which is named ``.aiida``. - The location of the folder is shown in the output of ``verdi status``. - This folder contains, among other things, the ``config.json`` configuration file and log files. +.. code:: bash + + $ verdi --profile storage backup /path/to/destination + +This command is safe to use when AiiDA is running and it automatically manages a subfolder structure +of previous backups at the destination. The primary underlying tool is ``rsync``, which is used such that +a new backup is hard-linked to the previous one, keeping the process incremental and efficient. + +This command is safe to use when AiiDA is running, it automatically manages a subfolder structure of previous backups, and new backups are done in an efficient way (using ``rsync`` hard-link functionality to the previous backup). +The command backs up everything that's needed to restore the profile later: + +* the AiiDA configuration file ``.aiida/config.json``, from which other profiles are removed (see ``verdi status`` for exact location); +* all the data of the backed up profile (which depends on the storage backend). -* the data stored for each profile. - Where the data is stored, depends on the storage backend used by each profile. +Alternatively, one can also manually create a backup. This requires a backup of the configuration file ``.aiida/config.json`` and the storage backend. The panels below provide instructions for storage backends provided by ``aiida-core``. To determine what storage backend a profile uses, call ``verdi profile show``. The panels below provide instructions for storage backends provided by ``aiida-core``. To determine what storage backend a profile uses, call ``verdi profile show``. @@ -605,35 +614,34 @@ To determine what storage backend a profile uses, call ``verdi profile show``. .. _how-to:installation:backup:restore: -Restoring your installation +Restoring data from a backup =========================== -Restoring a backed up AiiDA installation requires: +Restoring a backed up AiiDA profile requires: -* restoring the backed up ``.aiida`` folder, with at the very least the ``config.json`` file it contains. - It should be placed in the path defined by the ``AIIDA_PATH`` environment variable. - To test the restoration worked, run ``verdi profile list`` to verify that all profiles are displayed. +* restoring the profile information in the ``.aiida/config.json`` based on the backed up ``config.json`` file (entry under the "profiles" key). Some information (e.g. the database parameters) might need to be updated. -* restoring the data of each backed up profile. +* restoring the data of of the backed up profile according to the ``config.json`` entry. Like the backup procedure, this is dependent on the storage backend used by the profile. The panels below provide instructions for storage backends provided by ``aiida-core``. To determine what storage backend a profile uses, call ``verdi profile show``. +To test if the restoration worked, run ``verdi profile list`` to verify that the profile is displayed. .. tab-set:: .. tab-item:: psql_dos - To fully backup the data stored for a profile using the ``core.psql_dos`` backend, you should restore the associated database and file repository. + To restore the backed up data for a profile using the ``core.psql_dos`` backend, you should restore the associated database and file repository. **PostgreSQL database** - To restore the PostgreSQL database from the ``.psql`` file that was backed up, first you should create an empty database following the instructions described in :ref:`database ` skipping the ``verdi setup`` phase. + To restore the PostgreSQL database from the ``db.psql`` file that was backed up, first you should create an empty database following the instructions described in :ref:`database ` skipping the ``verdi setup`` phase. The backed up data can then be imported by calling: .. code-block:: console - psql -h -p -d -W < aiida_backup.psql + psql -h -p - U -d -W < db.psql **File repository** @@ -642,7 +650,7 @@ To determine what storage backend a profile uses, call ``verdi profile show``. .. code-block:: console - rsync -arvz /some/path/aiida_backup + rsync -arvz /path/to/backup/container .. _how-to:installation:multi-user: From 0304201f2d5d93f045e22b6cdf83ed4d921b2716 Mon Sep 17 00:00:00 2001 From: Sebastiaan Huber Date: Wed, 24 Jan 2024 12:34:30 +0100 Subject: [PATCH 13/59] temporarily install `disk-objectstore` from repo --- environment.yml | 2 +- pyproject.toml | 2 +- requirements/requirements-py-3.10.txt | 2 +- requirements/requirements-py-3.11.txt | 2 +- requirements/requirements-py-3.12.txt | 2 +- requirements/requirements-py-3.9.txt | 2 +- 6 files changed, 6 insertions(+), 6 deletions(-) diff --git a/environment.yml b/environment.yml index 559aebcae9..fa1536dab0 100644 --- a/environment.yml +++ b/environment.yml @@ -12,7 +12,7 @@ dependencies: - circus~=0.18.0 - click-spinner~=0.1.8 - click~=8.1 -- disk-objectstore~=1.0 +- disk-objectstore@ git+https://github.com/aiidateam/disk-objectstore - docstring_parser - get-annotations~=0.1 - python-graphviz~=0.19 diff --git a/pyproject.toml b/pyproject.toml index 2982a26cae..12ac66f0ab 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -24,7 +24,7 @@ dependencies = [ 'circus~=0.18.0', 'click-spinner~=0.1.8', 'click~=8.1', - 'disk-objectstore~=1.0', + 'disk-objectstore@git+https://github.com/aiidateam/disk-objectstore', 'docstring-parser', 'get-annotations~=0.1;python_version<"3.10"', 'graphviz~=0.19', diff --git a/requirements/requirements-py-3.10.txt b/requirements/requirements-py-3.10.txt index e9c9e0a079..bf63cb4ce5 100644 --- a/requirements/requirements-py-3.10.txt +++ b/requirements/requirements-py-3.10.txt @@ -41,7 +41,7 @@ debugpy==1.6.7 decorator==5.1.1 defusedxml==0.7.1 deprecation==2.1.0 -disk-objectstore==1.0.0 +git+https://github.com/aiidateam/disk-objectstore docstring-parser==0.15 docutils==0.20.1 emmet-core==0.57.1 diff --git a/requirements/requirements-py-3.11.txt b/requirements/requirements-py-3.11.txt index 183e5181b9..7a046ae5fd 100644 --- a/requirements/requirements-py-3.11.txt +++ b/requirements/requirements-py-3.11.txt @@ -41,7 +41,7 @@ debugpy==1.6.7 decorator==5.1.1 defusedxml==0.7.1 deprecation==2.1.0 -disk-objectstore==1.0.0 +git+https://github.com/aiidateam/disk-objectstore docstring-parser==0.15 docutils==0.20.1 emmet-core==0.57.1 diff --git a/requirements/requirements-py-3.12.txt b/requirements/requirements-py-3.12.txt index 71786d7003..bafd70bf7a 100644 --- a/requirements/requirements-py-3.12.txt +++ b/requirements/requirements-py-3.12.txt @@ -41,7 +41,7 @@ debugpy==1.8.0 decorator==5.1.1 defusedxml==0.7.1 deprecation==2.1.0 -disk-objectstore==1.0.0 +git+https://github.com/aiidateam/disk-objectstore docstring-parser==0.15 docutils==0.20.1 executing==2.0.0 diff --git a/requirements/requirements-py-3.9.txt b/requirements/requirements-py-3.9.txt index 214a70f8d9..446747356e 100644 --- a/requirements/requirements-py-3.9.txt +++ b/requirements/requirements-py-3.9.txt @@ -41,7 +41,7 @@ debugpy==1.6.7 decorator==5.1.1 defusedxml==0.7.1 deprecation==2.1.0 -disk-objectstore==1.0.0 +git+https://github.com/aiidateam/disk-objectstore docstring-parser==0.15 docutils==0.20.1 emmet-core==0.57.1 From 78ed0da5416901ba4f53498495efdf48a01e84d8 Mon Sep 17 00:00:00 2001 From: Sebastiaan Huber Date: Wed, 24 Jan 2024 12:50:39 +0100 Subject: [PATCH 14/59] docs: restore tui section --- docs/source/reference/command_line.rst | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/docs/source/reference/command_line.rst b/docs/source/reference/command_line.rst index a50d2b29f8..d15c3b3ce4 100644 --- a/docs/source/reference/command_line.rst +++ b/docs/source/reference/command_line.rst @@ -575,6 +575,21 @@ Below is a list with all available subcommands. version Print the current version of the storage schema. +.. _reference:command-line:verdi-tui: + +``verdi tui`` +------------- + +.. code:: console + + Usage: [OPTIONS] + + Open Textual TUI. + + Options: + --help Show this message and exit. + + .. _reference:command-line:verdi-user: ``verdi user`` From ec913e9c693f497c47afd31e783aa4b85c4e9f34 Mon Sep 17 00:00:00 2001 From: Kristjan Eimre Date: Wed, 24 Jan 2024 14:43:34 +0200 Subject: [PATCH 15/59] Update docs/source/howto/installation.rst Co-authored-by: Sebastiaan Huber --- docs/source/howto/installation.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/source/howto/installation.rst b/docs/source/howto/installation.rst index 3e427a738b..74cc5a11d8 100644 --- a/docs/source/howto/installation.rst +++ b/docs/source/howto/installation.rst @@ -626,7 +626,7 @@ Restoring a backed up AiiDA profile requires: The panels below provide instructions for storage backends provided by ``aiida-core``. To determine what storage backend a profile uses, call ``verdi profile show``. -To test if the restoration worked, run ``verdi profile list`` to verify that the profile is displayed. +To test if the restoration worked, run ``verdi -p status`` to verify that AiiDA can successfully connect to the data storage. .. tab-set:: From c4d52bf89ec910124b1be34a66590d747e35d042 Mon Sep 17 00:00:00 2001 From: Kristjan Eimre Date: Wed, 24 Jan 2024 14:44:17 +0200 Subject: [PATCH 16/59] Update src/aiida/cmdline/commands/cmd_storage.py Co-authored-by: Sebastiaan Huber --- src/aiida/cmdline/commands/cmd_storage.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/aiida/cmdline/commands/cmd_storage.py b/src/aiida/cmdline/commands/cmd_storage.py index 73a66553d9..f898a3e75d 100644 --- a/src/aiida/cmdline/commands/cmd_storage.py +++ b/src/aiida/cmdline/commands/cmd_storage.py @@ -175,7 +175,7 @@ def storage_maintain(ctx, full, no_repack, force, dry_run, compress): help='Number of previous backups to keep in the destination. (default: 1)', ) @click.option( - '--pg_dump_exe', type=click.STRING, default='pg_dump', help="Specify the 'pg_dump' executable, if not in PATH." + '--pg-dump-exe', type=click.STRING, default='pg_dump', help="Specify the 'pg_dump' executable, if not in PATH." ) @click.option( '--rsync_exe', From 877827d8c6f88ac0d4c92f5b5dc10321eca3fbf5 Mon Sep 17 00:00:00 2001 From: Kristjan Eimre Date: Wed, 24 Jan 2024 14:48:13 +0200 Subject: [PATCH 17/59] Update src/aiida/cmdline/commands/cmd_storage.py Co-authored-by: Sebastiaan Huber --- src/aiida/cmdline/commands/cmd_storage.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/aiida/cmdline/commands/cmd_storage.py b/src/aiida/cmdline/commands/cmd_storage.py index f898a3e75d..e24725f838 100644 --- a/src/aiida/cmdline/commands/cmd_storage.py +++ b/src/aiida/cmdline/commands/cmd_storage.py @@ -178,7 +178,7 @@ def storage_maintain(ctx, full, no_repack, force, dry_run, compress): '--pg-dump-exe', type=click.STRING, default='pg_dump', help="Specify the 'pg_dump' executable, if not in PATH." ) @click.option( - '--rsync_exe', + '--rsync-exe', type=click.STRING, default='rsync', help="Specify the 'rsync' executable, if not in PATH. Used for both local and remote destinations", From 07f81685ed0325917bc25affe922d0ed00b22c25 Mon Sep 17 00:00:00 2001 From: Kristjan Eimre Date: Wed, 24 Jan 2024 14:48:42 +0200 Subject: [PATCH 18/59] Update src/aiida/cmdline/commands/cmd_storage.py Co-authored-by: Sebastiaan Huber --- src/aiida/cmdline/commands/cmd_storage.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/aiida/cmdline/commands/cmd_storage.py b/src/aiida/cmdline/commands/cmd_storage.py index e24725f838..ee7653a85d 100644 --- a/src/aiida/cmdline/commands/cmd_storage.py +++ b/src/aiida/cmdline/commands/cmd_storage.py @@ -212,4 +212,4 @@ def storage_backup(ctx, manager, dest: str, keep: int, pg_dump_exe: str, rsync_e storage.backup(dest, keep, exes={'rsync': rsync_exe, 'pg_dump': pg_dump_exe}) except (ValueError, exceptions.StorageBackupError) as exception: echo.echo_criticial(f'An error occurred during the backup: {exception}') - click.echo(f'Data storage of profile `{ctx.obj.profile.name}` backed up to `{dest}`') + echo.echo_success(f'Data storage of profile `{ctx.obj.profile.name}` backed up to `{dest}`') From de74427a433d79940174ed9928705a1c4474f60d Mon Sep 17 00:00:00 2001 From: Kristjan Eimre Date: Wed, 24 Jan 2024 14:49:19 +0200 Subject: [PATCH 19/59] Update tests/cmdline/commands/test_storage.py Co-authored-by: Sebastiaan Huber --- tests/cmdline/commands/test_storage.py | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/tests/cmdline/commands/test_storage.py b/tests/cmdline/commands/test_storage.py index 9824b39ebe..847609d936 100644 --- a/tests/cmdline/commands/test_storage.py +++ b/tests/cmdline/commands/test_storage.py @@ -181,11 +181,10 @@ def mock_maintain(*args, **kwargs): assert ' > dry_run: False' in message_list -def tests_storage_backup(run_cli_command): +def tests_storage_backup(run_cli_command, tmp_path): """Test the ``verdi storage backup`` command.""" - with tempfile.TemporaryDirectory() as tmpdir: - result = run_cli_command(cmd_storage.storage_backup, parameters=[tmpdir]) - assert ' backed up to ' in result.output - assert result.exit_code == 0 - last_backup = Path(tmpdir) / 'last-backup' - assert last_backup.is_symlink() + result = run_cli_command(cmd_storage.storage_backup, parameters=[str(tmp_path)]) + assert ' backed up to ' in result.output + assert result.exit_code == 0 + last_backup = tmp_path / 'last-backup' + assert last_backup.is_symlink() From 980f74cf9535c68562a26a25b9ce1451f140f632 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Wed, 24 Jan 2024 12:49:34 +0000 Subject: [PATCH 20/59] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- tests/cmdline/commands/test_storage.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/tests/cmdline/commands/test_storage.py b/tests/cmdline/commands/test_storage.py index 847609d936..5646768e38 100644 --- a/tests/cmdline/commands/test_storage.py +++ b/tests/cmdline/commands/test_storage.py @@ -7,8 +7,6 @@ # For further information please visit http://www.aiida.net # ########################################################################### """Tests for `verdi storage`.""" -import tempfile -from pathlib import Path import pytest from aiida import get_profile From c4c67ae2453b20eb36cbd7eeeb60b652f12db625 Mon Sep 17 00:00:00 2001 From: Kristjan Eimre Date: Wed, 24 Jan 2024 14:50:26 +0200 Subject: [PATCH 21/59] Update tests/storage/psql_dos/test_backend.py Co-authored-by: Sebastiaan Huber --- tests/storage/psql_dos/test_backend.py | 26 ++++++++++---------------- 1 file changed, 10 insertions(+), 16 deletions(-) diff --git a/tests/storage/psql_dos/test_backend.py b/tests/storage/psql_dos/test_backend.py index 7cadacbc49..a8d260dc3b 100644 --- a/tests/storage/psql_dos/test_backend.py +++ b/tests/storage/psql_dos/test_backend.py @@ -153,24 +153,18 @@ def test_unload_profile(): manager.load_profile(profile_name) -def test_backup(): +def test_backup(tmp_path): """Test that the backup function creates all the necessary files and folders""" - - import tempfile - from pathlib import Path - storage_backend = get_manager().get_profile_storage() - with tempfile.TemporaryDirectory() as tmpdir: - # note: this assumes that rsync and pg_dump are in PATH - storage_backend.backup(tmpdir) + # note: this assumes that rsync and pg_dump are in PATH + storage_backend.backup(str(tmp_path)) - last_backup = Path(tmpdir) / 'last-backup' - assert last_backup.is_symlink() + last_backup = tmp_path / 'last-backup' + assert last_backup.is_symlink() - # make sure the necessary files are there - # note: disk-objectstore container backup is already - # tested in its own repo - contents = [c.name for c in last_backup.iterdir()] - for name in ['config.json', 'container', 'db.psql']: - assert name in contents + # make sure the necessary files are there + # note: disk-objectstore container backup is already tested in its own repo + contents = [c.name for c in last_backup.iterdir()] + for name in ['config.json', 'container', 'db.psql']: + assert name in contents From 78fa31196ddbb440b07238483a7c7bff7797d514 Mon Sep 17 00:00:00 2001 From: Kristjan Eimre Date: Wed, 24 Jan 2024 15:00:58 +0200 Subject: [PATCH 22/59] storage_backend backup non-abstract --- src/aiida/orm/implementation/storage_backend.py | 2 +- src/aiida/storage/sqlite_temp/backend.py | 10 +--------- src/aiida/storage/sqlite_zip/backend.py | 8 -------- 3 files changed, 2 insertions(+), 18 deletions(-) diff --git a/src/aiida/orm/implementation/storage_backend.py b/src/aiida/orm/implementation/storage_backend.py index 907fd6480e..c1b2ca235f 100644 --- a/src/aiida/orm/implementation/storage_backend.py +++ b/src/aiida/orm/implementation/storage_backend.py @@ -304,7 +304,6 @@ def maintain(self, full: bool = False, dry_run: bool = False, **kwargs) -> None: :param dry_run: flag to only print the actions that would be taken without actually executing them. """ - @abc.abstractmethod def backup( self, dest: str, @@ -320,6 +319,7 @@ def backup( :raises ValueError: If the input parameters are invalid. :raises StorageBackupError: If an error occurred during the backup procedure. """ + raise NotImplementedError def get_info(self, detailed: bool = False) -> dict: """Return general information on the storage. diff --git a/src/aiida/storage/sqlite_temp/backend.py b/src/aiida/storage/sqlite_temp/backend.py index ea97e5ea6f..398dc01264 100644 --- a/src/aiida/storage/sqlite_temp/backend.py +++ b/src/aiida/storage/sqlite_temp/backend.py @@ -16,7 +16,7 @@ from contextlib import contextmanager, nullcontext from pathlib import Path from tempfile import mkdtemp -from typing import Any, BinaryIO, Iterator, Optional, Sequence +from typing import Any, BinaryIO, Iterator, Sequence from pydantic import BaseModel, Field from sqlalchemy import column, insert, update @@ -287,14 +287,6 @@ def delete(self) -> None: def delete_nodes_and_connections(self, pks_to_delete: Sequence[int]): raise NotImplementedError - def backup( - self, - dest: str, - keep: int = 1, - exes: Optional[dict] = None, - ): - raise NotImplementedError - class SandboxShaRepositoryBackend(SandboxRepositoryBackend): """A sandbox repository backend that uses the sha256 of the file as the key. diff --git a/src/aiida/storage/sqlite_zip/backend.py b/src/aiida/storage/sqlite_zip/backend.py index 429f642d2b..62e1f080a7 100644 --- a/src/aiida/storage/sqlite_zip/backend.py +++ b/src/aiida/storage/sqlite_zip/backend.py @@ -312,14 +312,6 @@ def get_info(self, detailed: bool = False) -> dict: results['repository'] = self.get_repository().get_info(detailed) return results - def backup( - self, - dest: str, - keep: int = 1, - exes: Optional[dict] = None, - ): - raise NotImplementedError - class _RoBackendRepository(AbstractRepositoryBackend): """A backend abstract for a read-only folder or zip file.""" From c656feb7591faa2e11a9a377b16fb03e39f5dd64 Mon Sep 17 00:00:00 2001 From: Kristjan Eimre Date: Wed, 24 Jan 2024 15:38:02 +0200 Subject: [PATCH 23/59] docs: add backup mention in FAQ --- docs/source/howto/faq.rst | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/docs/source/howto/faq.rst b/docs/source/howto/faq.rst index 57122c267f..0fa6e72046 100644 --- a/docs/source/howto/faq.rst +++ b/docs/source/howto/faq.rst @@ -111,3 +111,9 @@ When the SSH key pair expires, AiiDA will fail to connect to the remote computer This will cause all calculations submitted on that computer to pause. To restart them, one needs to generate a new SSH key pair and play the paused processes using ``verdi process play --all``. Typically, this is all one needs to do - AiiDA will re-establish the connection to the computer and will continue following the calculations. + +How to back up AiiDA data? +============================================================================= + +The most convenient way to back up an AiiDA profile is to use the ``verdi --profile storage backup`` command. +For more information, see :ref:`how-to:installation:backup`. From 4a154ffcda09f3b238fab1603ebe22e11b57e977 Mon Sep 17 00:00:00 2001 From: Kristjan Eimre Date: Wed, 24 Jan 2024 15:53:14 +0200 Subject: [PATCH 24/59] docs: update backup section --- docs/source/howto/installation.rst | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/docs/source/howto/installation.rst b/docs/source/howto/installation.rst index 74cc5a11d8..9650c50e4e 100644 --- a/docs/source/howto/installation.rst +++ b/docs/source/howto/installation.rst @@ -550,26 +550,27 @@ See the :doc:`../reference/_changelog` for a list of breaking changes. Backing up your data ============================ +General information +----------------- + The most convenient way to back up the data of a single AiiDA profile is to use .. code:: bash $ verdi --profile storage backup /path/to/destination -This command is safe to use when AiiDA is running and it automatically manages a subfolder structure -of previous backups at the destination. The primary underlying tool is ``rsync``, which is used such that -a new backup is hard-linked to the previous one, keeping the process incremental and efficient. - This command is safe to use when AiiDA is running, it automatically manages a subfolder structure of previous backups, and new backups are done in an efficient way (using ``rsync`` hard-link functionality to the previous backup). The command backs up everything that's needed to restore the profile later: * the AiiDA configuration file ``.aiida/config.json``, from which other profiles are removed (see ``verdi status`` for exact location); * all the data of the backed up profile (which depends on the storage backend). -Alternatively, one can also manually create a backup. This requires a backup of the configuration file ``.aiida/config.json`` and the storage backend. The panels below provide instructions for storage backends provided by ``aiida-core``. To determine what storage backend a profile uses, call ``verdi profile show``. +The specific procedure and whether it is even implemented depends on the storage backend. -The panels below provide instructions for storage backends provided by ``aiida-core``. -To determine what storage backend a profile uses, call ``verdi profile show``. +Storage backend specific information +----------------- + +Alternatively to the CLI command, one can also manually create a backup. This requires a backup of the configuration file ``.aiida/config.json`` and the storage backend. The panels below provide instructions for storage backends provided by ``aiida-core``. To determine what storage backend a profile uses, call ``verdi profile show``. .. tip:: Before creating a backup, it is recommended to run ``verdi storage maintain``. This will optimize the storage which can significantly reduce the time required to create the backup. From b30120a1575d3c5aaafc0f89d6b705f4c271d7f8 Mon Sep 17 00:00:00 2001 From: Kristjan Eimre Date: Wed, 24 Jan 2024 21:16:23 +0200 Subject: [PATCH 25/59] adapt docs --- docs/source/howto/installation.rst | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/docs/source/howto/installation.rst b/docs/source/howto/installation.rst index 9650c50e4e..2e4696cc4a 100644 --- a/docs/source/howto/installation.rst +++ b/docs/source/howto/installation.rst @@ -551,7 +551,7 @@ Backing up your data ============================ General information ------------------ +----------------------------------------- The most convenient way to back up the data of a single AiiDA profile is to use @@ -559,16 +559,19 @@ The most convenient way to back up the data of a single AiiDA profile is to use $ verdi --profile storage backup /path/to/destination -This command is safe to use when AiiDA is running, it automatically manages a subfolder structure of previous backups, and new backups are done in an efficient way (using ``rsync`` hard-link functionality to the previous backup). +This command automatically manages a subfolder structure of previous backups, and new backups are done in an efficient way (using ``rsync`` hard-link functionality to the previous backup). The command backs up everything that's needed to restore the profile later: * the AiiDA configuration file ``.aiida/config.json``, from which other profiles are removed (see ``verdi status`` for exact location); * all the data of the backed up profile (which depends on the storage backend). -The specific procedure and whether it is even implemented depends on the storage backend. +The specific procedure of the command and whether it even is implemented depends on the storage backend. + +.. note:: + The ``verdi storage backup`` command is implemented in a way to be as safe as possible to use when AiiDA is running, meaning that it will most likely produce an uncorrupted backup even when data is being modified. However, the exact conditions depend on the specific storage backend and to err on the safe side, only perform a backup when the profile is not in use. Storage backend specific information ------------------ +----------------------------------------- Alternatively to the CLI command, one can also manually create a backup. This requires a backup of the configuration file ``.aiida/config.json`` and the storage backend. The panels below provide instructions for storage backends provided by ``aiida-core``. To determine what storage backend a profile uses, call ``verdi profile show``. @@ -616,7 +619,7 @@ Alternatively to the CLI command, one can also manually create a backup. This re .. _how-to:installation:backup:restore: Restoring data from a backup -=========================== +================================== Restoring a backed up AiiDA profile requires: From c6834ce7c499763dd3479f85a4c2d56158b10f17 Mon Sep 17 00:00:00 2001 From: Kristjan Eimre Date: Wed, 24 Jan 2024 21:26:15 +0200 Subject: [PATCH 26/59] config.json check at the start; doc changes --- docs/source/reference/command_line.rst | 15 --------- src/aiida/cmdline/commands/cmd_storage.py | 2 -- src/aiida/storage/psql_dos/backend.py | 40 +++++++++++------------ 3 files changed, 20 insertions(+), 37 deletions(-) diff --git a/docs/source/reference/command_line.rst b/docs/source/reference/command_line.rst index d15c3b3ce4..a50d2b29f8 100644 --- a/docs/source/reference/command_line.rst +++ b/docs/source/reference/command_line.rst @@ -575,21 +575,6 @@ Below is a list with all available subcommands. version Print the current version of the storage schema. -.. _reference:command-line:verdi-tui: - -``verdi tui`` -------------- - -.. code:: console - - Usage: [OPTIONS] - - Open Textual TUI. - - Options: - --help Show this message and exit. - - .. _reference:command-line:verdi-user: ``verdi user`` diff --git a/src/aiida/cmdline/commands/cmd_storage.py b/src/aiida/cmdline/commands/cmd_storage.py index ee7653a85d..1f9d2fcf85 100644 --- a/src/aiida/cmdline/commands/cmd_storage.py +++ b/src/aiida/cmdline/commands/cmd_storage.py @@ -191,8 +191,6 @@ def storage_backup(ctx, manager, dest: str, keep: int, pg_dump_exe: str, rsync_e The backup is created in the destination `DEST`, in a subfolder that follows the naming convention backup__ and a symlink called `last-backup` is pointed to it. - NOTE: This is safe to run while the AiiDA profile is being used. - Destination (DEST) can either be a local path, or a remote destination (reachable via ssh). In the latter case, remote destination needs to have the following syntax: diff --git a/src/aiida/storage/psql_dos/backend.py b/src/aiida/storage/psql_dos/backend.py index 7aa09b6e22..b0f19e9ac0 100644 --- a/src/aiida/storage/psql_dos/backend.py +++ b/src/aiida/storage/psql_dos/backend.py @@ -486,15 +486,15 @@ def _backup( ) -> None: """Create a backup of the postgres database and disk-objectstore to the provided path. + :param manager: + BackupManager from backup_utils containing utilities such as for calling the rsync. + :param path: Path to where the backup will be created. :param prev_backup: Path to the previous backup. Rsync calls will be hard-linked to this path, making the backup incremental and efficient. - - :return: - True is successful and False if unsuccessful. """ import os import subprocess @@ -513,10 +513,23 @@ def _backup( except exceptions.LockedProfileError as exc: raise exceptions.StorageBackupError('The profile is locked!') from exc - # step 1: first run the storage maintenance version that can safely be performed while aiida is running + # step 1: back up aiida config.json file (strip other profiles!) + try: + config = get_config() + profile = config.get_profile(self.profile.name) # Get the profile being backed up + with tempfile.TemporaryDirectory() as tmpdir: + filepath_config = pathlib.Path(tmpdir) / 'config.json' + backup_config = Config(str(filepath_config), {}) # Create empty config at temporary file location + backup_config.add_profile(profile) # Add the profile being backed up + backup_config.store() # Write the contents to disk + manager.call_rsync(filepath_config, path) + except (exceptions.MissingConfigurationError, exceptions.ConfigurationError) as exc: + raise exceptions.StorageBackupError('aiida config.json not found!') from exc + + # step 2: first run the storage maintenance version that can safely be performed while aiida is running self.maintain(full=False, compress=True) - # step 2: dump the PostgreSQL database into a temporary directory + # step 3: dump the PostgreSQL database into a temporary directory pg_dump_exe = manager.exes.get('pg_dump', 'pg_dump') with tempfile.TemporaryDirectory() as temp_dir_name: psql_temp_loc = pathlib.Path(temp_dir_name) / 'db.psql' @@ -543,27 +556,14 @@ def _backup( else: raise backup_utils.BackupError(f"'{psql_temp_loc!s}' was not created.") - # step 3: transfer the PostgreSQL database file + # step 4: transfer the PostgreSQL database file manager.call_rsync(psql_temp_loc, path, link_dest=prev_backup, dest_trailing_slash=True) - # step 4: back up the disk-objectstore + # step 5: back up the disk-objectstore backup_utils.backup_container( manager, container, path / 'container', prev_backup=prev_backup / 'container' if prev_backup else None ) - # step 5: back up aiida config.json file (strip other profiles!) - try: - config = get_config() - profile = config.get_profile(self.profile.name) # Get the profile being backed up - with tempfile.TemporaryDirectory() as tmpdir: - filepath_config = pathlib.Path(tmpdir) / 'config.json' - backup_config = Config(str(filepath_config), {}) # Create empty config at temporary file location - backup_config.add_profile(profile) # Add the profile being backed up - backup_config.store() # Write the contents to disk - manager.call_rsync(filepath_config, path) - except (exceptions.MissingConfigurationError, exceptions.ConfigurationError): - STORAGE_LOGGER.warning('aiida config.json not found!') - def backup( self, dest: str, From d6bd0b165762f2d261d7d305b50e9b22473aab1d Mon Sep 17 00:00:00 2001 From: Kristjan Eimre Date: Thu, 25 Jan 2024 16:29:59 +0200 Subject: [PATCH 27/59] cli: pg-dump-exe comment --- src/aiida/cmdline/commands/cmd_storage.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/aiida/cmdline/commands/cmd_storage.py b/src/aiida/cmdline/commands/cmd_storage.py index 1f9d2fcf85..064b5ffcbf 100644 --- a/src/aiida/cmdline/commands/cmd_storage.py +++ b/src/aiida/cmdline/commands/cmd_storage.py @@ -175,7 +175,10 @@ def storage_maintain(ctx, full, no_repack, force, dry_run, compress): help='Number of previous backups to keep in the destination. (default: 1)', ) @click.option( - '--pg-dump-exe', type=click.STRING, default='pg_dump', help="Specify the 'pg_dump' executable, if not in PATH." + '--pg-dump-exe', + type=click.STRING, + default='pg_dump', + help="Specify the 'pg_dump' executable, if not in PATH. Only needed for a PostgreSQL-based backend", ) @click.option( '--rsync-exe', From 26203bd6219a831df64d3ecb3c5333b51b68824d Mon Sep 17 00:00:00 2001 From: Kristjan Eimre Date: Thu, 25 Jan 2024 17:33:26 +0200 Subject: [PATCH 28/59] add backup_utils.BackupManager to docs nitpick-exceptions --- docs/source/nitpick-exceptions | 1 + 1 file changed, 1 insertion(+) diff --git a/docs/source/nitpick-exceptions b/docs/source/nitpick-exceptions index 6e09890b38..35f134c155 100644 --- a/docs/source/nitpick-exceptions +++ b/docs/source/nitpick-exceptions @@ -148,6 +148,7 @@ py:class concurrent.futures._base.TimeoutError py:class concurrent.futures._base.Future py:class disk_objectstore.utils.LazyOpener +py:class disk_objectstore.backup_utils.BackupManager py:class frozenset From 7028dbf9c8f26edcee7d52d1c883421e9a438daf Mon Sep 17 00:00:00 2001 From: Kristjan Eimre Date: Wed, 7 Feb 2024 14:50:14 +0200 Subject: [PATCH 29/59] Update src/aiida/cmdline/commands/cmd_storage.py Co-authored-by: Edan Bainglass <45081142+edan-bainglass@users.noreply.github.com> --- src/aiida/cmdline/commands/cmd_storage.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/aiida/cmdline/commands/cmd_storage.py b/src/aiida/cmdline/commands/cmd_storage.py index 064b5ffcbf..fd7796f931 100644 --- a/src/aiida/cmdline/commands/cmd_storage.py +++ b/src/aiida/cmdline/commands/cmd_storage.py @@ -212,5 +212,5 @@ def storage_backup(ctx, manager, dest: str, keep: int, pg_dump_exe: str, rsync_e try: storage.backup(dest, keep, exes={'rsync': rsync_exe, 'pg_dump': pg_dump_exe}) except (ValueError, exceptions.StorageBackupError) as exception: - echo.echo_criticial(f'An error occurred during the backup: {exception}') + echo.echo_critical(f'An error occurred during the backup: {exception}') echo.echo_success(f'Data storage of profile `{ctx.obj.profile.name}` backed up to `{dest}`') From 6dd3915735ac7233dde49b88576e538bc734c9a8 Mon Sep 17 00:00:00 2001 From: Kristjan Eimre Date: Wed, 7 Feb 2024 16:25:41 +0200 Subject: [PATCH 30/59] backup: turn off compression for maintain to match default cli cmd --- src/aiida/storage/psql_dos/backend.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/aiida/storage/psql_dos/backend.py b/src/aiida/storage/psql_dos/backend.py index b0f19e9ac0..d1bfb417c2 100644 --- a/src/aiida/storage/psql_dos/backend.py +++ b/src/aiida/storage/psql_dos/backend.py @@ -527,7 +527,7 @@ def _backup( raise exceptions.StorageBackupError('aiida config.json not found!') from exc # step 2: first run the storage maintenance version that can safely be performed while aiida is running - self.maintain(full=False, compress=True) + self.maintain(full=False, compress=False) # step 3: dump the PostgreSQL database into a temporary directory pg_dump_exe = manager.exes.get('pg_dump', 'pg_dump') From 740ff42cf35bea518cc0303fad8ba7cfacdee29e Mon Sep 17 00:00:00 2001 From: Kristjan Eimre Date: Wed, 7 Feb 2024 16:39:30 +0200 Subject: [PATCH 31/59] correct pass exception message --- src/aiida/cmdline/commands/cmd_storage.py | 2 +- src/aiida/storage/psql_dos/backend.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/aiida/cmdline/commands/cmd_storage.py b/src/aiida/cmdline/commands/cmd_storage.py index fd7796f931..53d832ddc8 100644 --- a/src/aiida/cmdline/commands/cmd_storage.py +++ b/src/aiida/cmdline/commands/cmd_storage.py @@ -212,5 +212,5 @@ def storage_backup(ctx, manager, dest: str, keep: int, pg_dump_exe: str, rsync_e try: storage.backup(dest, keep, exes={'rsync': rsync_exe, 'pg_dump': pg_dump_exe}) except (ValueError, exceptions.StorageBackupError) as exception: - echo.echo_critical(f'An error occurred during the backup: {exception}') + echo.echo_critical(str(exception)) echo.echo_success(f'Data storage of profile `{ctx.obj.profile.name}` backed up to `{dest}`') diff --git a/src/aiida/storage/psql_dos/backend.py b/src/aiida/storage/psql_dos/backend.py index d1bfb417c2..d5430a84f4 100644 --- a/src/aiida/storage/psql_dos/backend.py +++ b/src/aiida/storage/psql_dos/backend.py @@ -574,4 +574,4 @@ def backup( backup_manager = backup_utils.BackupManager(dest, STORAGE_LOGGER, exes=exes, keep=keep) backup_manager.backup_auto_folders(lambda path, prev: self._backup(backup_manager, path, prev)) except backup_utils.BackupError as exc: - raise exceptions.StorageBackupError from exc + raise exceptions.StorageBackupError(*exc.args) from exc From f5501cc5162cb3716ccd7ae957c6c7f3eff659b0 Mon Sep 17 00:00:00 2001 From: Kristjan Eimre Date: Wed, 7 Feb 2024 16:44:21 +0200 Subject: [PATCH 32/59] fix live-backup created as a file --- src/aiida/storage/psql_dos/backend.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/aiida/storage/psql_dos/backend.py b/src/aiida/storage/psql_dos/backend.py index d5430a84f4..0eb60b6a0a 100644 --- a/src/aiida/storage/psql_dos/backend.py +++ b/src/aiida/storage/psql_dos/backend.py @@ -522,7 +522,7 @@ def _backup( backup_config = Config(str(filepath_config), {}) # Create empty config at temporary file location backup_config.add_profile(profile) # Add the profile being backed up backup_config.store() # Write the contents to disk - manager.call_rsync(filepath_config, path) + manager.call_rsync(filepath_config, path, dest_trailing_slash=True) except (exceptions.MissingConfigurationError, exceptions.ConfigurationError) as exc: raise exceptions.StorageBackupError('aiida config.json not found!') from exc From 61cde3d3b7e8f0ad6ccc5a6681df2f6ca7767715 Mon Sep 17 00:00:00 2001 From: Kristjan Eimre Date: Thu, 8 Feb 2024 01:01:39 +0200 Subject: [PATCH 33/59] remove rsync and pg_dump arguments from CLI, check them in the backend --- src/aiida/cmdline/commands/cmd_storage.py | 18 +++--------------- .../orm/implementation/storage_backend.py | 1 - src/aiida/storage/psql_dos/backend.py | 9 +++++++-- 3 files changed, 10 insertions(+), 18 deletions(-) diff --git a/src/aiida/cmdline/commands/cmd_storage.py b/src/aiida/cmdline/commands/cmd_storage.py index 53d832ddc8..f981a0159b 100644 --- a/src/aiida/cmdline/commands/cmd_storage.py +++ b/src/aiida/cmdline/commands/cmd_storage.py @@ -174,21 +174,9 @@ def storage_maintain(ctx, full, no_repack, force, dry_run, compress): default=1, help='Number of previous backups to keep in the destination. (default: 1)', ) -@click.option( - '--pg-dump-exe', - type=click.STRING, - default='pg_dump', - help="Specify the 'pg_dump' executable, if not in PATH. Only needed for a PostgreSQL-based backend", -) -@click.option( - '--rsync-exe', - type=click.STRING, - default='rsync', - help="Specify the 'rsync' executable, if not in PATH. Used for both local and remote destinations", -) @decorators.with_manager @click.pass_context -def storage_backup(ctx, manager, dest: str, keep: int, pg_dump_exe: str, rsync_exe: str): +def storage_backup(ctx, manager, dest: str, keep: int): """Backup the data storage of a profile. The backup is created in the destination `DEST`, in a subfolder that follows the naming convention @@ -205,12 +193,12 @@ def storage_backup(ctx, manager, dest: str, keep: int, pg_dump_exe: str, rsync_e login - recommended, since this script might ask multiple times for the password). NOTE: 'rsync' and other UNIX-specific commands are called, thus the command will not work on - non-UNIX environments. + non-UNIX environments. What other executables are called, depend on the storage backend. """ storage = manager.get_profile_storage() try: - storage.backup(dest, keep, exes={'rsync': rsync_exe, 'pg_dump': pg_dump_exe}) + storage.backup(dest, keep) except (ValueError, exceptions.StorageBackupError) as exception: echo.echo_critical(str(exception)) echo.echo_success(f'Data storage of profile `{ctx.obj.profile.name}` backed up to `{dest}`') diff --git a/src/aiida/orm/implementation/storage_backend.py b/src/aiida/orm/implementation/storage_backend.py index c1b2ca235f..ed83faa1bd 100644 --- a/src/aiida/orm/implementation/storage_backend.py +++ b/src/aiida/orm/implementation/storage_backend.py @@ -308,7 +308,6 @@ def backup( self, dest: str, keep: int = 1, - exes: Optional[dict] = None, ): """Create a backup of the storage contents. diff --git a/src/aiida/storage/psql_dos/backend.py b/src/aiida/storage/psql_dos/backend.py index 0eb60b6a0a..fac64cbbb2 100644 --- a/src/aiida/storage/psql_dos/backend.py +++ b/src/aiida/storage/psql_dos/backend.py @@ -497,12 +497,18 @@ def _backup( incremental and efficient. """ import os + import shutil import subprocess import tempfile from aiida.manage.configuration import Config, get_config from aiida.manage.profile_access import ProfileAccessManager + # This command calls `rsync` and `pg_dump` executables. check that they are in PATH + for exe in ['rsync', 'pg_dump']: + if shutil.which(exe) is None: + raise exceptions.StorageBackupError(f"Required executable '{exe}' not found in PATH, please add it.") + cfg = self._profile.storage_config container = Container(get_filepath_container(self.profile)) @@ -568,10 +574,9 @@ def backup( self, dest: str, keep: int = 1, - exes: Optional[dict] = None, ): try: - backup_manager = backup_utils.BackupManager(dest, STORAGE_LOGGER, exes=exes, keep=keep) + backup_manager = backup_utils.BackupManager(dest, STORAGE_LOGGER, keep=keep) backup_manager.backup_auto_folders(lambda path, prev: self._backup(backup_manager, path, prev)) except backup_utils.BackupError as exc: raise exceptions.StorageBackupError(*exc.args) from exc From 8700cbe1b8b76ab4852e8b2d8d8e199470b678c5 Mon Sep 17 00:00:00 2001 From: Kristjan Eimre Date: Thu, 8 Feb 2024 01:11:38 +0200 Subject: [PATCH 34/59] add logger.report commands to indicate different steps --- src/aiida/storage/psql_dos/backend.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/src/aiida/storage/psql_dos/backend.py b/src/aiida/storage/psql_dos/backend.py index fac64cbbb2..315cfd21db 100644 --- a/src/aiida/storage/psql_dos/backend.py +++ b/src/aiida/storage/psql_dos/backend.py @@ -504,6 +504,8 @@ def _backup( from aiida.manage.configuration import Config, get_config from aiida.manage.profile_access import ProfileAccessManager + STORAGE_LOGGER.report('Starting backup...') + # This command calls `rsync` and `pg_dump` executables. check that they are in PATH for exe in ['rsync', 'pg_dump']: if shutil.which(exe) is None: @@ -520,6 +522,7 @@ def _backup( raise exceptions.StorageBackupError('The profile is locked!') from exc # step 1: back up aiida config.json file (strip other profiles!) + STORAGE_LOGGER.report('Backing up config.json...') try: config = get_config() profile = config.get_profile(self.profile.name) # Get the profile being backed up @@ -533,9 +536,11 @@ def _backup( raise exceptions.StorageBackupError('aiida config.json not found!') from exc # step 2: first run the storage maintenance version that can safely be performed while aiida is running + STORAGE_LOGGER.report('Running basic maintenance...') self.maintain(full=False, compress=False) # step 3: dump the PostgreSQL database into a temporary directory + STORAGE_LOGGER.report('Backing up PostgreSQL...') pg_dump_exe = manager.exes.get('pg_dump', 'pg_dump') with tempfile.TemporaryDirectory() as temp_dir_name: psql_temp_loc = pathlib.Path(temp_dir_name) / 'db.psql' @@ -566,6 +571,7 @@ def _backup( manager.call_rsync(psql_temp_loc, path, link_dest=prev_backup, dest_trailing_slash=True) # step 5: back up the disk-objectstore + STORAGE_LOGGER.report('Backing up DOS container...') backup_utils.backup_container( manager, container, path / 'container', prev_backup=prev_backup / 'container' if prev_backup else None ) From 164400734ca1c75131e33a1e93fd8350a6834697 Mon Sep 17 00:00:00 2001 From: Sebastiaan Huber Date: Thu, 22 Feb 2024 11:55:45 +0100 Subject: [PATCH 35/59] Logging: Add the `disk_objectstore` logger to the config The `disk_objectstore` logger is added to the default logging configuration in `aiida.common.log.get_logging_config`. The log level can be controlled through the `logging.disk_objectstore_loglevel` config option. Its default is intentionally set to `INFO` because information during backup is logged on that level and we want it to be shown by default. --- src/aiida/common/log.py | 5 +++++ src/aiida/manage/configuration/config.py | 3 +++ 2 files changed, 8 insertions(+) diff --git a/src/aiida/common/log.py b/src/aiida/common/log.py index 13e932cc9e..e2cab99c4a 100644 --- a/src/aiida/common/log.py +++ b/src/aiida/common/log.py @@ -100,6 +100,11 @@ def get_logging_config(): 'level': lambda: get_config_option('logging.verdi_loglevel'), 'propagate': False, }, + 'disk_objectstore': { + 'handlers': ['console'], + 'level': lambda: get_config_option('logging.disk_objectstore_loglevel'), + 'propagate': False, + }, 'plumpy': { 'handlers': ['console'], 'level': lambda: get_config_option('logging.plumpy_loglevel'), diff --git a/src/aiida/manage/configuration/config.py b/src/aiida/manage/configuration/config.py index 19efc61ca0..3804545a50 100644 --- a/src/aiida/manage/configuration/config.py +++ b/src/aiida/manage/configuration/config.py @@ -82,6 +82,9 @@ class ProfileOptionsSchema(BaseModel, defer_build=True): logging__verdi_loglevel: LogLevels = Field( 'REPORT', description='Minimum level to log to console when running a `verdi` command.' ) + logging__disk_objectstore_loglevel: LogLevels = Field( + 'INFO', description='Minimum level to log to daemon log and the `DbLog` table for `disk_objectstore` logger.' + ) logging__db_loglevel: LogLevels = Field('REPORT', description='Minimum level to log to the DbLog table.') logging__plumpy_loglevel: LogLevels = Field( 'WARNING', description='Minimum level to log to daemon log and the `DbLog` table for the `plumpy` logger.' From 966b90c7d433e2f8a20847ccb950c415666a6102 Mon Sep 17 00:00:00 2001 From: Kristjan Eimre Date: Thu, 22 Feb 2024 17:29:41 +0200 Subject: [PATCH 36/59] adapt to latest disk-objectstore --- src/aiida/orm/implementation/storage_backend.py | 2 -- src/aiida/storage/psql_dos/backend.py | 7 ++++--- 2 files changed, 4 insertions(+), 5 deletions(-) diff --git a/src/aiida/orm/implementation/storage_backend.py b/src/aiida/orm/implementation/storage_backend.py index ed83faa1bd..9077e399db 100644 --- a/src/aiida/orm/implementation/storage_backend.py +++ b/src/aiida/orm/implementation/storage_backend.py @@ -313,8 +313,6 @@ def backup( :param dest: The path to the destination folder. :param keep: The number of backups to keep in the target destination. - :param exes: Dictionary of absolute paths to executables that are required during the backup - but that are not present in the `PATH`. :raises ValueError: If the input parameters are invalid. :raises StorageBackupError: If an error occurred during the backup procedure. """ diff --git a/src/aiida/storage/psql_dos/backend.py b/src/aiida/storage/psql_dos/backend.py index 315cfd21db..ab4960c4e2 100644 --- a/src/aiida/storage/psql_dos/backend.py +++ b/src/aiida/storage/psql_dos/backend.py @@ -501,7 +501,8 @@ def _backup( import subprocess import tempfile - from aiida.manage.configuration import Config, get_config + from aiida.manage.configuration import get_config + from aiida.manage.configuration.config import Config from aiida.manage.profile_access import ProfileAccessManager STORAGE_LOGGER.report('Starting backup...') @@ -541,7 +542,7 @@ def _backup( # step 3: dump the PostgreSQL database into a temporary directory STORAGE_LOGGER.report('Backing up PostgreSQL...') - pg_dump_exe = manager.exes.get('pg_dump', 'pg_dump') + pg_dump_exe = 'pg_dump' with tempfile.TemporaryDirectory() as temp_dir_name: psql_temp_loc = pathlib.Path(temp_dir_name) / 'db.psql' @@ -582,7 +583,7 @@ def backup( keep: int = 1, ): try: - backup_manager = backup_utils.BackupManager(dest, STORAGE_LOGGER, keep=keep) + backup_manager = backup_utils.BackupManager(dest, keep=keep) backup_manager.backup_auto_folders(lambda path, prev: self._backup(backup_manager, path, prev)) except backup_utils.BackupError as exc: raise exceptions.StorageBackupError(*exc.args) from exc From a63756f549dffe7ebbe01110aeabcef06b1c04fe Mon Sep 17 00:00:00 2001 From: Kristjan Eimre Date: Fri, 23 Feb 2024 15:00:03 +0200 Subject: [PATCH 37/59] rm CLI profile dbdump command that was added by mistake --- src/aiida/cmdline/commands/cmd_profile.py | 42 ----------------------- 1 file changed, 42 deletions(-) diff --git a/src/aiida/cmdline/commands/cmd_profile.py b/src/aiida/cmdline/commands/cmd_profile.py index 435d4eca20..8be4ed3bbf 100644 --- a/src/aiida/cmdline/commands/cmd_profile.py +++ b/src/aiida/cmdline/commands/cmd_profile.py @@ -165,45 +165,3 @@ def profile_delete(force, delete_data, profiles): get_config().delete_profile(profile.name, delete_storage=delete_data) echo.echo_success(f'Profile `{profile.name}` was deleted.') - - -@verdi_profile.command('dbdump') -@options.PROFILE(default=defaults.get_default_profile) -@click.option('--output_file', type=click.Path(), help='Specify the output file path.') -def profile_dbdump(profile, output_file): - """Dump the PostgreSQL database into a file.""" - - import os - import pathlib - import subprocess - - if not output_file: - output_file = f'{profile.name}.psql' - - output_file = pathlib.Path(output_file) - - db_config = profile.dictionary['storage']['config'] - - cmd = [ - 'pg_dump', - f'--host={db_config["database_hostname"]}', - f'--port={db_config["database_port"]}', - f'--dbname={db_config["database_name"]}', - f'--username={db_config["database_username"]}', - '--no-password', - '--format=p', - f'--file={output_file}', - ] - - env = os.environ.copy() - env['PGPASSWORD'] = db_config['database_password'] - - pg_dump_output = subprocess.check_output(cmd, env=env).decode('utf-8') - - if len(pg_dump_output) > 0: - echo.echo_warning(f'Output from pg_dump: {pg_dump_output}') - - if output_file.is_file(): - echo.echo_success(f'Output written to `{output_file}`') - else: - echo.echo_error(f'Something went wrong, `{output_file}` not written.') From 6755b979fa1e21072c71e0b0c56724efa9a65351 Mon Sep 17 00:00:00 2001 From: Kristjan Eimre Date: Fri, 23 Feb 2024 15:50:52 +0200 Subject: [PATCH 38/59] catch NotImplementedError --- src/aiida/cmdline/commands/cmd_storage.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/src/aiida/cmdline/commands/cmd_storage.py b/src/aiida/cmdline/commands/cmd_storage.py index f981a0159b..622223d9ad 100644 --- a/src/aiida/cmdline/commands/cmd_storage.py +++ b/src/aiida/cmdline/commands/cmd_storage.py @@ -197,8 +197,14 @@ def storage_backup(ctx, manager, dest: str, keep: int): """ storage = manager.get_profile_storage() + profile = ctx.obj.profile try: storage.backup(dest, keep) + except NotImplementedError: + echo.echo_critical( + f'Profile {profile.name} uses the storage plugin ' + f'{profile.storage_backend} which does not implement a backup mechanism.' + ) except (ValueError, exceptions.StorageBackupError) as exception: echo.echo_critical(str(exception)) - echo.echo_success(f'Data storage of profile `{ctx.obj.profile.name}` backed up to `{dest}`') + echo.echo_success(f'Data storage of profile `{profile.name}` backed up to `{dest}`') From 5cafc17df5d6b675b155e463b524c16ee81a0667 Mon Sep 17 00:00:00 2001 From: Kristjan Eimre Date: Fri, 1 Mar 2024 12:58:16 +0200 Subject: [PATCH 39/59] keep default None, which keeps all backups --- src/aiida/cmdline/commands/cmd_storage.py | 7 +++++-- src/aiida/orm/implementation/storage_backend.py | 4 ++-- src/aiida/storage/psql_dos/backend.py | 2 +- 3 files changed, 8 insertions(+), 5 deletions(-) diff --git a/src/aiida/cmdline/commands/cmd_storage.py b/src/aiida/cmdline/commands/cmd_storage.py index 622223d9ad..e0bb3b213e 100644 --- a/src/aiida/cmdline/commands/cmd_storage.py +++ b/src/aiida/cmdline/commands/cmd_storage.py @@ -171,8 +171,11 @@ def storage_maintain(ctx, full, no_repack, force, dry_run, compress): @click.argument('dest', type=click.Path(file_okay=False), nargs=1) @click.option( '--keep', - default=1, - help='Number of previous backups to keep in the destination. (default: 1)', + required=False, + help=( + 'Number of previous backups to keep in the destination, ' + 'if the storage backend supports it. If not set, keeps all previous backups.' + ), ) @decorators.with_manager @click.pass_context diff --git a/src/aiida/orm/implementation/storage_backend.py b/src/aiida/orm/implementation/storage_backend.py index 9077e399db..3d05af900d 100644 --- a/src/aiida/orm/implementation/storage_backend.py +++ b/src/aiida/orm/implementation/storage_backend.py @@ -307,12 +307,12 @@ def maintain(self, full: bool = False, dry_run: bool = False, **kwargs) -> None: def backup( self, dest: str, - keep: int = 1, + keep: Optional[int] = None, ): """Create a backup of the storage contents. :param dest: The path to the destination folder. - :param keep: The number of backups to keep in the target destination. + :param keep: The number of backups to keep in the target destination, if the backend supports it. :raises ValueError: If the input parameters are invalid. :raises StorageBackupError: If an error occurred during the backup procedure. """ diff --git a/src/aiida/storage/psql_dos/backend.py b/src/aiida/storage/psql_dos/backend.py index ab4960c4e2..207d85871c 100644 --- a/src/aiida/storage/psql_dos/backend.py +++ b/src/aiida/storage/psql_dos/backend.py @@ -580,7 +580,7 @@ def _backup( def backup( self, dest: str, - keep: int = 1, + keep: Optional[int] = None, ): try: backup_manager = backup_utils.BackupManager(dest, keep=keep) From 52110d9a9da657e4596106cf5ccf86842083fad0 Mon Sep 17 00:00:00 2001 From: Kristjan Eimre Date: Fri, 1 Mar 2024 13:06:50 +0200 Subject: [PATCH 40/59] aiida-backup.json: checks profile match or empty dest --- .../orm/implementation/storage_backend.py | 60 ++++++++++++++++++- src/aiida/storage/psql_dos/backend.py | 2 +- 2 files changed, 60 insertions(+), 2 deletions(-) diff --git a/src/aiida/orm/implementation/storage_backend.py b/src/aiida/orm/implementation/storage_backend.py index 3d05af900d..9ac5b471d4 100644 --- a/src/aiida/orm/implementation/storage_backend.py +++ b/src/aiida/orm/implementation/storage_backend.py @@ -304,6 +304,63 @@ def maintain(self, full: bool = False, dry_run: bool = False, **kwargs) -> None: :param dry_run: flag to only print the actions that would be taken without actually executing them. """ + def _backup_backend( + self, + dest: str, + keep: Optional[int] = None, + ): + raise NotImplementedError + + def _validate_or_init_backup_folder(self, dest, keep): + import json + import pathlib + import tempfile + + from disk_objectstore import backup_utils + + from aiida.common import exceptions + from aiida.storage.log import STORAGE_LOGGER + + backup_info_fname = 'aiida-backup.json' + backup_info = { + 'PROFILE_NAME': self.profile.name, + 'PROFILE_UUID': self.profile.uuid, + 'STORAGE_BACKEND': self.profile.storage_backend, + } + + try: + # this creates the dest folder if it doesn't exist + backup_manager = backup_utils.BackupManager(dest, keep=keep) + + backup_info_path = backup_manager.path / backup_info_fname + if backup_manager.check_path_exists(backup_info_path): + success, stdout = backup_manager.run_cmd(['cat', str(backup_info_path)]) + if not success: + raise exceptions.StorageBackupError(f"Couldn't read {backup_info_path!s}.") + backup_info_existing = json.loads(stdout) + if backup_info_existing != backup_info: + raise exceptions.StorageBackupError( + 'The chosen destination contains backups of a different profile! Aborting!' + ) + else: + STORAGE_LOGGER.warn('Initializing a new backup folder.') + # make sure the folder is empty + success, stdout = backup_manager.run_cmd(['ls', '-A', str(backup_manager.path)]) + if not success: + raise exceptions.StorageBackupError(f"Couldn't read {backup_info_path!s}.") + if stdout: + raise exceptions.StorageBackupError("Can't initialize the backup folder, destination is not empty.") + + with tempfile.TemporaryDirectory() as tmpdir: + tmp_path = pathlib.Path(tmpdir) / backup_info_fname + with open(tmp_path, 'w', encoding='utf-8') as fhandle: + json.dump(backup_info, fhandle, indent=4) + fhandle.write('\n') + backup_manager.call_rsync(tmp_path, backup_info_path) + + except backup_utils.BackupError as exc: + raise exceptions.StorageBackupError(*exc.args) from exc + def backup( self, dest: str, @@ -316,7 +373,8 @@ def backup( :raises ValueError: If the input parameters are invalid. :raises StorageBackupError: If an error occurred during the backup procedure. """ - raise NotImplementedError + self._validate_or_init_backup_folder(dest, keep) + self._backup_backend(dest, keep) def get_info(self, detailed: bool = False) -> dict: """Return general information on the storage. diff --git a/src/aiida/storage/psql_dos/backend.py b/src/aiida/storage/psql_dos/backend.py index 207d85871c..96b653a76d 100644 --- a/src/aiida/storage/psql_dos/backend.py +++ b/src/aiida/storage/psql_dos/backend.py @@ -577,7 +577,7 @@ def _backup( manager, container, path / 'container', prev_backup=prev_backup / 'container' if prev_backup else None ) - def backup( + def _backup_backend( self, dest: str, keep: Optional[int] = None, From 7ecfdc0dcbb8ba8a3670440c37efb884d23b3890 Mon Sep 17 00:00:00 2001 From: Kristjan Eimre Date: Fri, 15 Mar 2024 00:01:07 +0200 Subject: [PATCH 41/59] disk-objectstore dependency to 1.1 instead of master --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 12ac66f0ab..87a098f921 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -24,7 +24,7 @@ dependencies = [ 'circus~=0.18.0', 'click-spinner~=0.1.8', 'click~=8.1', - 'disk-objectstore@git+https://github.com/aiidateam/disk-objectstore', + 'disk-objectstore~=1.1', 'docstring-parser', 'get-annotations~=0.1;python_version<"3.10"', 'graphviz~=0.19', From 3ddbaaace191a5102542f0caed0331e7d5fe22b1 Mon Sep 17 00:00:00 2001 From: Kristjan Eimre Date: Fri, 15 Mar 2024 00:30:14 +0200 Subject: [PATCH 42/59] make verbosity exception to affect disk_objectstore logger --- src/aiida/common/log.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/aiida/common/log.py b/src/aiida/common/log.py index e2cab99c4a..0b2ab4cd4f 100644 --- a/src/aiida/common/log.py +++ b/src/aiida/common/log.py @@ -225,7 +225,7 @@ def configure_logging(with_orm=False, daemon=False, daemon_log_file=None): # can still configure those manually beforehand through the config options. if CLI_LOG_LEVEL is not None: for name, logger in config['loggers'].items(): - if name in ['aiida', 'verdi']: + if name in ['aiida', 'verdi', 'disk_objectstore']: logger['level'] = CLI_LOG_LEVEL # Add the `DbLogHandler` if `with_orm` is `True` From 45377515dd06be34d885bccb6bf5723b7932e164 Mon Sep 17 00:00:00 2001 From: Kristjan Eimre Date: Fri, 15 Mar 2024 00:38:53 +0200 Subject: [PATCH 43/59] fix pre-commit --- environment.yml | 2 +- src/aiida/storage/sqlite_dos/backend.py | 10 +--------- 2 files changed, 2 insertions(+), 10 deletions(-) diff --git a/environment.yml b/environment.yml index fa1536dab0..ba2bff4c93 100644 --- a/environment.yml +++ b/environment.yml @@ -12,7 +12,7 @@ dependencies: - circus~=0.18.0 - click-spinner~=0.1.8 - click~=8.1 -- disk-objectstore@ git+https://github.com/aiidateam/disk-objectstore +- disk-objectstore~=1.1 - docstring_parser - get-annotations~=0.1 - python-graphviz~=0.19 diff --git a/src/aiida/storage/sqlite_dos/backend.py b/src/aiida/storage/sqlite_dos/backend.py index 25187753f3..d738c7c856 100644 --- a/src/aiida/storage/sqlite_dos/backend.py +++ b/src/aiida/storage/sqlite_dos/backend.py @@ -12,7 +12,7 @@ from functools import cached_property from pathlib import Path from shutil import rmtree -from typing import TYPE_CHECKING, Optional +from typing import TYPE_CHECKING from uuid import uuid4 from disk_objectstore import Container @@ -200,11 +200,3 @@ def nodes(self): @cached_property def users(self): return orm.SqliteUserCollection(self) - - def backup( - self, - dest: str, - keep: int = 1, - exes: Optional[dict] = None, - ): - raise NotImplementedError From 2231f2b99675ee70676ec2f09b59a2bb2cc9a2ea Mon Sep 17 00:00:00 2001 From: Kristjan Eimre Date: Fri, 15 Mar 2024 00:52:20 +0200 Subject: [PATCH 44/59] Remove the backup of config.json --- src/aiida/storage/psql_dos/backend.py | 24 ++++-------------------- 1 file changed, 4 insertions(+), 20 deletions(-) diff --git a/src/aiida/storage/psql_dos/backend.py b/src/aiida/storage/psql_dos/backend.py index 96b653a76d..c56f70ccf0 100644 --- a/src/aiida/storage/psql_dos/backend.py +++ b/src/aiida/storage/psql_dos/backend.py @@ -501,8 +501,6 @@ def _backup( import subprocess import tempfile - from aiida.manage.configuration import get_config - from aiida.manage.configuration.config import Config from aiida.manage.profile_access import ProfileAccessManager STORAGE_LOGGER.report('Starting backup...') @@ -522,25 +520,11 @@ def _backup( except exceptions.LockedProfileError as exc: raise exceptions.StorageBackupError('The profile is locked!') from exc - # step 1: back up aiida config.json file (strip other profiles!) - STORAGE_LOGGER.report('Backing up config.json...') - try: - config = get_config() - profile = config.get_profile(self.profile.name) # Get the profile being backed up - with tempfile.TemporaryDirectory() as tmpdir: - filepath_config = pathlib.Path(tmpdir) / 'config.json' - backup_config = Config(str(filepath_config), {}) # Create empty config at temporary file location - backup_config.add_profile(profile) # Add the profile being backed up - backup_config.store() # Write the contents to disk - manager.call_rsync(filepath_config, path, dest_trailing_slash=True) - except (exceptions.MissingConfigurationError, exceptions.ConfigurationError) as exc: - raise exceptions.StorageBackupError('aiida config.json not found!') from exc - - # step 2: first run the storage maintenance version that can safely be performed while aiida is running + # step 1: first run the storage maintenance version that can safely be performed while aiida is running STORAGE_LOGGER.report('Running basic maintenance...') self.maintain(full=False, compress=False) - # step 3: dump the PostgreSQL database into a temporary directory + # step 2: dump the PostgreSQL database into a temporary directory STORAGE_LOGGER.report('Backing up PostgreSQL...') pg_dump_exe = 'pg_dump' with tempfile.TemporaryDirectory() as temp_dir_name: @@ -568,10 +552,10 @@ def _backup( else: raise backup_utils.BackupError(f"'{psql_temp_loc!s}' was not created.") - # step 4: transfer the PostgreSQL database file + # step 3: transfer the PostgreSQL database file manager.call_rsync(psql_temp_loc, path, link_dest=prev_backup, dest_trailing_slash=True) - # step 5: back up the disk-objectstore + # step 4: back up the disk-objectstore STORAGE_LOGGER.report('Backing up DOS container...') backup_utils.backup_container( manager, container, path / 'container', prev_backup=prev_backup / 'container' if prev_backup else None From 2424b2048bf19b8f094c9a180fce44ecf1892c0e Mon Sep 17 00:00:00 2001 From: Kristjan Eimre Date: Fri, 15 Mar 2024 14:48:30 +0200 Subject: [PATCH 45/59] manually update requirements files --- requirements/requirements-py-3.10.txt | 2 +- requirements/requirements-py-3.11.txt | 2 +- requirements/requirements-py-3.12.txt | 2 +- requirements/requirements-py-3.9.txt | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/requirements/requirements-py-3.10.txt b/requirements/requirements-py-3.10.txt index bf63cb4ce5..bc28d7eb39 100644 --- a/requirements/requirements-py-3.10.txt +++ b/requirements/requirements-py-3.10.txt @@ -41,7 +41,7 @@ debugpy==1.6.7 decorator==5.1.1 defusedxml==0.7.1 deprecation==2.1.0 -git+https://github.com/aiidateam/disk-objectstore +disk-objectstore==1.1.0 docstring-parser==0.15 docutils==0.20.1 emmet-core==0.57.1 diff --git a/requirements/requirements-py-3.11.txt b/requirements/requirements-py-3.11.txt index 7a046ae5fd..dfa35673a8 100644 --- a/requirements/requirements-py-3.11.txt +++ b/requirements/requirements-py-3.11.txt @@ -41,7 +41,7 @@ debugpy==1.6.7 decorator==5.1.1 defusedxml==0.7.1 deprecation==2.1.0 -git+https://github.com/aiidateam/disk-objectstore +disk-objectstore==1.1.0 docstring-parser==0.15 docutils==0.20.1 emmet-core==0.57.1 diff --git a/requirements/requirements-py-3.12.txt b/requirements/requirements-py-3.12.txt index bafd70bf7a..86d44d4c36 100644 --- a/requirements/requirements-py-3.12.txt +++ b/requirements/requirements-py-3.12.txt @@ -41,7 +41,7 @@ debugpy==1.8.0 decorator==5.1.1 defusedxml==0.7.1 deprecation==2.1.0 -git+https://github.com/aiidateam/disk-objectstore +disk-objectstore==1.1.0 docstring-parser==0.15 docutils==0.20.1 executing==2.0.0 diff --git a/requirements/requirements-py-3.9.txt b/requirements/requirements-py-3.9.txt index 446747356e..d59b8e2f1d 100644 --- a/requirements/requirements-py-3.9.txt +++ b/requirements/requirements-py-3.9.txt @@ -41,7 +41,7 @@ debugpy==1.6.7 decorator==5.1.1 defusedxml==0.7.1 deprecation==2.1.0 -git+https://github.com/aiidateam/disk-objectstore +disk-objectstore==1.1.0 docstring-parser==0.15 docutils==0.20.1 emmet-core==0.57.1 From 1952eda6b8b814886e121b6c2d5384ad205ff74e Mon Sep 17 00:00:00 2001 From: Kristjan Eimre Date: Fri, 15 Mar 2024 15:11:44 +0200 Subject: [PATCH 46/59] fix tui --- docs/source/reference/command_line.rst | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/docs/source/reference/command_line.rst b/docs/source/reference/command_line.rst index a50d2b29f8..d15c3b3ce4 100644 --- a/docs/source/reference/command_line.rst +++ b/docs/source/reference/command_line.rst @@ -575,6 +575,21 @@ Below is a list with all available subcommands. version Print the current version of the storage schema. +.. _reference:command-line:verdi-tui: + +``verdi tui`` +------------- + +.. code:: console + + Usage: [OPTIONS] + + Open Textual TUI. + + Options: + --help Show this message and exit. + + .. _reference:command-line:verdi-user: ``verdi user`` From 35c6ef5ca246e9ce7be81da18a83fdee9446310e Mon Sep 17 00:00:00 2001 From: Kristjan Eimre Date: Fri, 15 Mar 2024 19:06:37 +0200 Subject: [PATCH 47/59] adapt psql_dos backup test --- tests/storage/psql_dos/test_backend.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/storage/psql_dos/test_backend.py b/tests/storage/psql_dos/test_backend.py index a8d260dc3b..50fbe5ec0e 100644 --- a/tests/storage/psql_dos/test_backend.py +++ b/tests/storage/psql_dos/test_backend.py @@ -166,5 +166,5 @@ def test_backup(tmp_path): # make sure the necessary files are there # note: disk-objectstore container backup is already tested in its own repo contents = [c.name for c in last_backup.iterdir()] - for name in ['config.json', 'container', 'db.psql']: + for name in ['container', 'db.psql']: assert name in contents From c8b26d9ea8b95434a62c756c7ee7047a2bbcbb9a Mon Sep 17 00:00:00 2001 From: Kristjan Eimre Date: Fri, 15 Mar 2024 19:08:12 +0200 Subject: [PATCH 48/59] test failure on non-empty backup destination --- src/aiida/orm/implementation/storage_backend.py | 2 +- tests/cmdline/commands/test_storage.py | 12 +++++++++++- 2 files changed, 12 insertions(+), 2 deletions(-) diff --git a/src/aiida/orm/implementation/storage_backend.py b/src/aiida/orm/implementation/storage_backend.py index 9ac5b471d4..dfe35be0a2 100644 --- a/src/aiida/orm/implementation/storage_backend.py +++ b/src/aiida/orm/implementation/storage_backend.py @@ -343,7 +343,7 @@ def _validate_or_init_backup_folder(self, dest, keep): 'The chosen destination contains backups of a different profile! Aborting!' ) else: - STORAGE_LOGGER.warn('Initializing a new backup folder.') + STORAGE_LOGGER.warning('Initializing a new backup folder.') # make sure the folder is empty success, stdout = backup_manager.run_cmd(['ls', '-A', str(backup_manager.path)]) if not success: diff --git a/tests/cmdline/commands/test_storage.py b/tests/cmdline/commands/test_storage.py index 5646768e38..d4f5df354f 100644 --- a/tests/cmdline/commands/test_storage.py +++ b/tests/cmdline/commands/test_storage.py @@ -182,7 +182,17 @@ def mock_maintain(*args, **kwargs): def tests_storage_backup(run_cli_command, tmp_path): """Test the ``verdi storage backup`` command.""" result = run_cli_command(cmd_storage.storage_backup, parameters=[str(tmp_path)]) - assert ' backed up to ' in result.output + assert 'backed up to' in result.output assert result.exit_code == 0 last_backup = tmp_path / 'last-backup' assert last_backup.is_symlink() + + +def tests_storage_backup_nonempty_dest(run_cli_command, tmp_path): + """Test that the ``verdi storage backup`` fails for non-empty destination.""" + # add a file to the destination + with open(tmp_path / 'test.txt', 'w') as _: + pass + result = run_cli_command(cmd_storage.storage_backup, parameters=[str(tmp_path)], raises=True) + assert result.exit_code == 1 + assert 'destination is not empty' in result.output From 1eaf97dcb376ffb881a6356678323071f66618cb Mon Sep 17 00:00:00 2001 From: Kristjan Eimre Date: Fri, 15 Mar 2024 20:46:47 +0200 Subject: [PATCH 49/59] Update tests/cmdline/commands/test_storage.py Co-authored-by: Sebastiaan Huber --- tests/cmdline/commands/test_storage.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/tests/cmdline/commands/test_storage.py b/tests/cmdline/commands/test_storage.py index d4f5df354f..2930fed412 100644 --- a/tests/cmdline/commands/test_storage.py +++ b/tests/cmdline/commands/test_storage.py @@ -191,8 +191,7 @@ def tests_storage_backup(run_cli_command, tmp_path): def tests_storage_backup_nonempty_dest(run_cli_command, tmp_path): """Test that the ``verdi storage backup`` fails for non-empty destination.""" # add a file to the destination - with open(tmp_path / 'test.txt', 'w') as _: - pass + (tmp_path / 'test.txt').touch() result = run_cli_command(cmd_storage.storage_backup, parameters=[str(tmp_path)], raises=True) assert result.exit_code == 1 assert 'destination is not empty' in result.output From 48adfd625b236aa0bf2121a3c4c13596dbff6d62 Mon Sep 17 00:00:00 2001 From: Kristjan Eimre Date: Mon, 18 Mar 2024 17:15:39 +0200 Subject: [PATCH 50/59] add failure test on backup profile mismatch --- tests/cmdline/commands/test_storage.py | 29 +++++++++++++++++++++----- 1 file changed, 24 insertions(+), 5 deletions(-) diff --git a/tests/cmdline/commands/test_storage.py b/tests/cmdline/commands/test_storage.py index 2930fed412..cd50d4e612 100644 --- a/tests/cmdline/commands/test_storage.py +++ b/tests/cmdline/commands/test_storage.py @@ -8,6 +8,8 @@ ########################################################################### """Tests for `verdi storage`.""" +import json + import pytest from aiida import get_profile from aiida.cmdline.commands import cmd_storage @@ -181,11 +183,14 @@ def mock_maintain(*args, **kwargs): def tests_storage_backup(run_cli_command, tmp_path): """Test the ``verdi storage backup`` command.""" - result = run_cli_command(cmd_storage.storage_backup, parameters=[str(tmp_path)]) - assert 'backed up to' in result.output - assert result.exit_code == 0 - last_backup = tmp_path / 'last-backup' - assert last_backup.is_symlink() + result1 = run_cli_command(cmd_storage.storage_backup, parameters=[str(tmp_path)]) + assert 'backed up to' in result1.output + assert result1.exit_code == 0 + assert (tmp_path / 'last-backup').is_symlink() + # make another backup in the same folder + result2 = run_cli_command(cmd_storage.storage_backup, parameters=[str(tmp_path)]) + assert 'backed up to' in result2.output + assert result2.exit_code == 0 def tests_storage_backup_nonempty_dest(run_cli_command, tmp_path): @@ -195,3 +200,17 @@ def tests_storage_backup_nonempty_dest(run_cli_command, tmp_path): result = run_cli_command(cmd_storage.storage_backup, parameters=[str(tmp_path)], raises=True) assert result.exit_code == 1 assert 'destination is not empty' in result.output + + +def tests_storage_backup_other_profile(run_cli_command, tmp_path): + """Test that the ``verdi storage backup`` fails for a destination that has been used for another profile.""" + existing_backup_info = { + 'PROFILE_NAME': 'test-profile', + 'PROFILE_UUID': 'test-uuid', + 'STORAGE_BACKEND': 'core.psql_dos', + } + with open(tmp_path / 'aiida-backup.json', 'w', encoding='utf-8') as fhandle: + json.dump(existing_backup_info, fhandle, indent=4) + result = run_cli_command(cmd_storage.storage_backup, parameters=[str(tmp_path)], raises=True) + assert result.exit_code == 1 + assert 'contains backups of a different profile' in result.output From 517a53b24758245436dec0f8c426f4faea5220d1 Mon Sep 17 00:00:00 2001 From: Kristjan Eimre Date: Mon, 1 Apr 2024 16:23:22 +0300 Subject: [PATCH 51/59] Update src/aiida/cmdline/commands/cmd_storage.py Co-authored-by: Sebastiaan Huber --- src/aiida/cmdline/commands/cmd_storage.py | 1 + 1 file changed, 1 insertion(+) diff --git a/src/aiida/cmdline/commands/cmd_storage.py b/src/aiida/cmdline/commands/cmd_storage.py index e0bb3b213e..a1a120da0e 100644 --- a/src/aiida/cmdline/commands/cmd_storage.py +++ b/src/aiida/cmdline/commands/cmd_storage.py @@ -171,6 +171,7 @@ def storage_maintain(ctx, full, no_repack, force, dry_run, compress): @click.argument('dest', type=click.Path(file_okay=False), nargs=1) @click.option( '--keep', + type=int, required=False, help=( 'Number of previous backups to keep in the destination, ' From 59ad9506080510081be80d4c1d86292f16a40939 Mon Sep 17 00:00:00 2001 From: Kristjan Eimre Date: Mon, 1 Apr 2024 17:57:45 +0300 Subject: [PATCH 52/59] test for keep argument --- tests/cmdline/commands/test_storage.py | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/tests/cmdline/commands/test_storage.py b/tests/cmdline/commands/test_storage.py index cd50d4e612..ce01c5d854 100644 --- a/tests/cmdline/commands/test_storage.py +++ b/tests/cmdline/commands/test_storage.py @@ -193,6 +193,17 @@ def tests_storage_backup(run_cli_command, tmp_path): assert result2.exit_code == 0 +def tests_storage_backup_keep(run_cli_command, tmp_path): + """Test the ``verdi storage backup`` command with the keep argument""" + params = [str(tmp_path), '--keep', '1'] + for i in range(3): + result = run_cli_command(cmd_storage.storage_backup, parameters=params) + assert 'backed up to' in result.output + assert result.exit_code == 0 + # make sure only two copies of the backup are kept + assert len(list((tmp_path.glob('backup_*')))) == 2 + + def tests_storage_backup_nonempty_dest(run_cli_command, tmp_path): """Test that the ``verdi storage backup`` fails for non-empty destination.""" # add a file to the destination From 98f8456ece8eb4b6758edfe54449b6b862099486 Mon Sep 17 00:00:00 2001 From: Kristjan Eimre Date: Mon, 1 Apr 2024 18:04:56 +0300 Subject: [PATCH 53/59] raise NotImplementedError for sqlite_dos --- src/aiida/storage/sqlite_dos/backend.py | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/src/aiida/storage/sqlite_dos/backend.py b/src/aiida/storage/sqlite_dos/backend.py index d738c7c856..515cb1153f 100644 --- a/src/aiida/storage/sqlite_dos/backend.py +++ b/src/aiida/storage/sqlite_dos/backend.py @@ -7,12 +7,13 @@ # For further information please visit http://www.aiida.net # ########################################################################### """Storage implementation using Sqlite database and disk-objectstore container.""" + from __future__ import annotations from functools import cached_property from pathlib import Path from shutil import rmtree -from typing import TYPE_CHECKING +from typing import TYPE_CHECKING, Optional from uuid import uuid4 from disk_objectstore import Container @@ -145,6 +146,13 @@ def _initialise_session(self): engine = create_sqla_engine(Path(self._profile.storage_config['filepath']) / 'database.sqlite') self._session_factory = scoped_session(sessionmaker(bind=engine, future=True, expire_on_commit=True)) + def _backup_backend( + self, + dest: str, + keep: Optional[int] = None, + ): + raise NotImplementedError + def delete(self) -> None: # type: ignore[override] """Delete the storage and all the data.""" filepath = Path(self.profile.storage_config['filepath']) From 95ffe145b9ba1f77d3cfa91f7199d75875cfb06d Mon Sep 17 00:00:00 2001 From: Kristjan Eimre Date: Thu, 11 Apr 2024 02:27:37 +0300 Subject: [PATCH 54/59] check is_backup_implemented before creating the folder --- src/aiida/orm/implementation/storage_backend.py | 16 ++++++++++++++-- src/aiida/storage/psql_dos/backend.py | 4 ++++ src/aiida/storage/sqlite_dos/backend.py | 3 +++ 3 files changed, 21 insertions(+), 2 deletions(-) diff --git a/src/aiida/orm/implementation/storage_backend.py b/src/aiida/orm/implementation/storage_backend.py index dfe35be0a2..0c9b3cb519 100644 --- a/src/aiida/orm/implementation/storage_backend.py +++ b/src/aiida/orm/implementation/storage_backend.py @@ -7,6 +7,7 @@ # For further information please visit http://www.aiida.net # ########################################################################### """Generic backend related objects""" + from __future__ import annotations import abc @@ -304,6 +305,13 @@ def maintain(self, full: bool = False, dry_run: bool = False, **kwargs) -> None: :param dry_run: flag to only print the actions that would be taken without actually executing them. """ + def is_backup_implemented(self): + """Check if the storage backend implements a backup procedure. + + Note: subclasses that implement a backup procedure need to overload this method and return True. + """ + return False + def _backup_backend( self, dest: str, @@ -372,9 +380,13 @@ def backup( :param keep: The number of backups to keep in the target destination, if the backend supports it. :raises ValueError: If the input parameters are invalid. :raises StorageBackupError: If an error occurred during the backup procedure. + :raises NotImplementedError: If the storage backend doesn't implement a backup procedure. """ - self._validate_or_init_backup_folder(dest, keep) - self._backup_backend(dest, keep) + if self.is_backup_implemented(): + self._validate_or_init_backup_folder(dest, keep) + self._backup_backend(dest, keep) + else: + raise NotImplementedError def get_info(self, detailed: bool = False) -> dict: """Return general information on the storage. diff --git a/src/aiida/storage/psql_dos/backend.py b/src/aiida/storage/psql_dos/backend.py index c56f70ccf0..a0d172e1e7 100644 --- a/src/aiida/storage/psql_dos/backend.py +++ b/src/aiida/storage/psql_dos/backend.py @@ -7,6 +7,7 @@ # For further information please visit http://www.aiida.net # ########################################################################### """SqlAlchemy implementation of `aiida.orm.implementation.backends.Backend`.""" + import functools import gc import pathlib @@ -478,6 +479,9 @@ def get_info(self, detailed: bool = False) -> dict: results['repository'] = self.get_repository().get_info(detailed) return results + def is_backup_implemented(self): + return True + def _backup( self, manager: backup_utils.BackupManager, diff --git a/src/aiida/storage/sqlite_dos/backend.py b/src/aiida/storage/sqlite_dos/backend.py index 515cb1153f..9481177241 100644 --- a/src/aiida/storage/sqlite_dos/backend.py +++ b/src/aiida/storage/sqlite_dos/backend.py @@ -146,6 +146,9 @@ def _initialise_session(self): engine = create_sqla_engine(Path(self._profile.storage_config['filepath']) / 'database.sqlite') self._session_factory = scoped_session(sessionmaker(bind=engine, future=True, expire_on_commit=True)) + def is_backup_implemented(self): + return False + def _backup_backend( self, dest: str, From 2428fc571f3c598400749376996fcb8daacd14b9 Mon Sep 17 00:00:00 2001 From: Kristjan Eimre Date: Mon, 15 Apr 2024 19:30:10 +0300 Subject: [PATCH 55/59] replace top-level aiida-backup.json with config.json --- .../orm/implementation/storage_backend.py | 77 +++++++++++++------ tests/cmdline/commands/test_storage.py | 17 ++-- 2 files changed, 64 insertions(+), 30 deletions(-) diff --git a/src/aiida/orm/implementation/storage_backend.py b/src/aiida/orm/implementation/storage_backend.py index 0c9b3cb519..d693cadaa1 100644 --- a/src/aiida/orm/implementation/storage_backend.py +++ b/src/aiida/orm/implementation/storage_backend.py @@ -319,56 +319,81 @@ def _backup_backend( ): raise NotImplementedError + def _write_backup_config(self, backup_manager): + import pathlib + import tempfile + + from aiida.common import exceptions + from aiida.manage.configuration import get_config + from aiida.manage.configuration.config import Config + + try: + config = get_config() + profile = config.get_profile(self.profile.name) # Get the profile being backed up + with tempfile.TemporaryDirectory() as tmpdir: + filepath_config = pathlib.Path(tmpdir) / 'config.json' + backup_config = Config(str(filepath_config), {}) # Create empty config at temporary file location + backup_config.add_profile(profile) # Add the profile being backed up + backup_config.store() # Write the contents to disk + backup_manager.call_rsync(filepath_config, backup_manager.path / 'config.json') + except (exceptions.MissingConfigurationError, exceptions.ConfigurationError) as exc: + raise exceptions.StorageBackupError('AiiDA config.json not found!') from exc + def _validate_or_init_backup_folder(self, dest, keep): import json - import pathlib import tempfile from disk_objectstore import backup_utils from aiida.common import exceptions + from aiida.manage.configuration.config import Config from aiida.storage.log import STORAGE_LOGGER - backup_info_fname = 'aiida-backup.json' - backup_info = { - 'PROFILE_NAME': self.profile.name, - 'PROFILE_UUID': self.profile.uuid, - 'STORAGE_BACKEND': self.profile.storage_backend, - } + backup_config_fname = 'config.json' try: # this creates the dest folder if it doesn't exist backup_manager = backup_utils.BackupManager(dest, keep=keep) + backup_config_path = backup_manager.path / backup_config_fname - backup_info_path = backup_manager.path / backup_info_fname - if backup_manager.check_path_exists(backup_info_path): - success, stdout = backup_manager.run_cmd(['cat', str(backup_info_path)]) + if backup_manager.check_path_exists(backup_config_path): + success, stdout = backup_manager.run_cmd(['cat', str(backup_config_path)]) if not success: - raise exceptions.StorageBackupError(f"Couldn't read {backup_info_path!s}.") - backup_info_existing = json.loads(stdout) - if backup_info_existing != backup_info: - raise exceptions.StorageBackupError( - 'The chosen destination contains backups of a different profile! Aborting!' - ) + raise exceptions.StorageBackupError(f"Couldn't read {backup_config_path!s}.") + try: + backup_config_existing = json.loads(stdout) + except json.decoder.JSONDecodeError as exc: + raise exceptions.StorageBackupError(f'JSON parsing failed for {backup_config_path!s}: {exc.msg}') + + # create a temporary config file to access the profile info + with tempfile.NamedTemporaryFile() as temp_file: + backup_config = Config(temp_file.name, backup_config_existing, validate=False) + if len(backup_config.profiles) != 1: + raise exceptions.StorageBackupError(f"{backup_config_path!s} doesn't contain exactly 1 profile") + + if ( + backup_config.profiles[0].uuid != self.profile.uuid + or backup_config.profiles[0].storage_backend != self.profile.storage_backend + ): + raise exceptions.StorageBackupError( + 'The chosen destination contains backups of a different profile! Aborting!' + ) else: STORAGE_LOGGER.warning('Initializing a new backup folder.') # make sure the folder is empty success, stdout = backup_manager.run_cmd(['ls', '-A', str(backup_manager.path)]) if not success: - raise exceptions.StorageBackupError(f"Couldn't read {backup_info_path!s}.") + raise exceptions.StorageBackupError(f"Couldn't read {backup_manager.path!s}.") if stdout: raise exceptions.StorageBackupError("Can't initialize the backup folder, destination is not empty.") - with tempfile.TemporaryDirectory() as tmpdir: - tmp_path = pathlib.Path(tmpdir) / backup_info_fname - with open(tmp_path, 'w', encoding='utf-8') as fhandle: - json.dump(backup_info, fhandle, indent=4) - fhandle.write('\n') - backup_manager.call_rsync(tmp_path, backup_info_path) + self._write_backup_config(backup_manager) except backup_utils.BackupError as exc: raise exceptions.StorageBackupError(*exc.args) from exc + return backup_manager + def backup( self, dest: str, @@ -382,9 +407,13 @@ def backup( :raises StorageBackupError: If an error occurred during the backup procedure. :raises NotImplementedError: If the storage backend doesn't implement a backup procedure. """ + from aiida.storage.log import STORAGE_LOGGER + if self.is_backup_implemented(): - self._validate_or_init_backup_folder(dest, keep) + backup_manager = self._validate_or_init_backup_folder(dest, keep) self._backup_backend(dest, keep) + STORAGE_LOGGER.report('Overwriting the config.json file.') + self._write_backup_config(backup_manager) else: raise NotImplementedError diff --git a/tests/cmdline/commands/test_storage.py b/tests/cmdline/commands/test_storage.py index ce01c5d854..8c374885a2 100644 --- a/tests/cmdline/commands/test_storage.py +++ b/tests/cmdline/commands/test_storage.py @@ -215,13 +215,18 @@ def tests_storage_backup_nonempty_dest(run_cli_command, tmp_path): def tests_storage_backup_other_profile(run_cli_command, tmp_path): """Test that the ``verdi storage backup`` fails for a destination that has been used for another profile.""" - existing_backup_info = { - 'PROFILE_NAME': 'test-profile', - 'PROFILE_UUID': 'test-uuid', - 'STORAGE_BACKEND': 'core.psql_dos', + existing_backup_config = { + 'CONFIG_VERSION': {'CURRENT': 9, 'OLDEST_COMPATIBLE': 9}, + 'profiles': { + 'test': { + 'PROFILE_UUID': 'test-uuid', + 'storage': {'backend': 'core.psql_dos'}, + 'process_control': {'backend': 'rabbitmq'}, + } + }, } - with open(tmp_path / 'aiida-backup.json', 'w', encoding='utf-8') as fhandle: - json.dump(existing_backup_info, fhandle, indent=4) + with open(tmp_path / 'config.json', 'w', encoding='utf-8') as fhandle: + json.dump(existing_backup_config, fhandle, indent=4) result = run_cli_command(cmd_storage.storage_backup, parameters=[str(tmp_path)], raises=True) assert result.exit_code == 1 assert 'contains backups of a different profile' in result.output From 029c0c457deb314f40b96827ae9cdc1a92df77a0 Mon Sep 17 00:00:00 2001 From: Kristjan Eimre Date: Mon, 15 Apr 2024 19:43:37 +0300 Subject: [PATCH 56/59] adapt docs slightly --- docs/source/howto/installation.rst | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/docs/source/howto/installation.rst b/docs/source/howto/installation.rst index 438c69791d..911678b08d 100644 --- a/docs/source/howto/installation.rst +++ b/docs/source/howto/installation.rst @@ -612,7 +612,9 @@ Restoring data from a backup Restoring a backed up AiiDA profile requires: -* restoring the profile information in the ``.aiida/config.json`` based on the backed up ``config.json`` file (entry under the "profiles" key). Some information (e.g. the database parameters) might need to be updated. +* restoring the profile information in the AiiDA ``config.json`` file. Simply copy the`profiles` entry from + the backed up `config.json`to the one of the running AiiDA instance (see `verdi status` for exact location). + Some information (e.g. the database parameters) might need to be updated. * restoring the data of of the backed up profile according to the ``config.json`` entry. Like the backup procedure, this is dependent on the storage backend used by the profile. @@ -636,9 +638,11 @@ To test if the restoration worked, run ``verdi -p status`` to ver psql -h -p - U -d -W < db.psql + where the parameters need to match with the corresponding AiiDA `config.json` profile entry. + **File repository** - To restore the file repository, simply copy the directory that was backed up to the location indicated by the ``storage.config.repository_uri`` key returned by the ``verdi profile show`` command. + To restore the file repository, simply copy the directory that was backed up to the location indicated in AiiDA `config.json` (or the ``storage.config.repository_uri`` key returned by the ``verdi profile show`` command). Like the backing up process, we recommend using ``rsync`` for this: .. code-block:: console From b6aeb0d39ff935dde1d5b68a0e9e2abe33046ef7 Mon Sep 17 00:00:00 2001 From: Kristjan Eimre Date: Mon, 15 Apr 2024 20:08:36 +0300 Subject: [PATCH 57/59] docs: nitpick typing_extensions.Literal --- docs/source/nitpick-exceptions | 2 ++ 1 file changed, 2 insertions(+) diff --git a/docs/source/nitpick-exceptions b/docs/source/nitpick-exceptions index 35f134c155..d563fbf63a 100644 --- a/docs/source/nitpick-exceptions +++ b/docs/source/nitpick-exceptions @@ -257,3 +257,5 @@ py:exc seekpath.hpkot.EdgeCaseWarning py:class graphviz.graphs.Digraph py:class Digraph + +py:class typing_extensions.Literal From 573c8faf86c199adc905e3cb7f3d53348629ddf0 Mon Sep 17 00:00:00 2001 From: Sebastiaan Huber Date: Wed, 17 Apr 2024 21:12:10 +0200 Subject: [PATCH 58/59] Alternative solution to `is_backup_implemented` The `is_backup_implemented` property was added to the storage interface such that the base class can call this before calling the actual storage backend. The reason is that the creation of the output folder has to be done before `_backup_backend` is called, but if that then raises `NotImplementedError` the output directory would have been created for nothing and remain empty. The current solution adds an additional method to the abstract interface called `is_backup_implemented`. Storage plugins should override this to return `True` in addition to implementing `_backup_backend`. This feels a bit redundant and can easily be forgotten by developers. Here as an alternative, the `is_backup_implemented` is removed and the base class `backend` will simply create the output folder and then delete it if the plugin raises `NotImplementedError`. --- docs/source/nitpick-exceptions | 2 - src/aiida/cmdline/commands/cmd_storage.py | 6 +-- .../orm/implementation/storage_backend.py | 45 +++++++++++------- src/aiida/storage/psql_dos/backend.py | 9 ++-- src/aiida/storage/sqlite_dos/backend.py | 5 +- tests/orm/implementation/test_backend.py | 47 +++++++++++++++++++ 6 files changed, 83 insertions(+), 31 deletions(-) diff --git a/docs/source/nitpick-exceptions b/docs/source/nitpick-exceptions index 5b96ac2e1b..d3fdc420f0 100644 --- a/docs/source/nitpick-exceptions +++ b/docs/source/nitpick-exceptions @@ -262,5 +262,3 @@ py:exc seekpath.hpkot.EdgeCaseWarning py:class graphviz.graphs.Digraph py:class Digraph - -py:class typing_extensions.Literal diff --git a/src/aiida/cmdline/commands/cmd_storage.py b/src/aiida/cmdline/commands/cmd_storage.py index a1a120da0e..5382ff455f 100644 --- a/src/aiida/cmdline/commands/cmd_storage.py +++ b/src/aiida/cmdline/commands/cmd_storage.py @@ -206,9 +206,9 @@ def storage_backup(ctx, manager, dest: str, keep: int): storage.backup(dest, keep) except NotImplementedError: echo.echo_critical( - f'Profile {profile.name} uses the storage plugin ' - f'{profile.storage_backend} which does not implement a backup mechanism.' + f'Profile {profile.name} uses the storage plugin `{profile.storage_backend}` which does not implement a ' + 'backup mechanism.' ) except (ValueError, exceptions.StorageBackupError) as exception: echo.echo_critical(str(exception)) - echo.echo_success(f'Data storage of profile `{profile.name}` backed up to `{dest}`') + echo.echo_success(f'Data storage of profile `{profile.name}` backed up to `{dest}`.') diff --git a/src/aiida/orm/implementation/storage_backend.py b/src/aiida/orm/implementation/storage_backend.py index d693cadaa1..cc4c3c229e 100644 --- a/src/aiida/orm/implementation/storage_backend.py +++ b/src/aiida/orm/implementation/storage_backend.py @@ -305,14 +305,7 @@ def maintain(self, full: bool = False, dry_run: bool = False, **kwargs) -> None: :param dry_run: flag to only print the actions that would be taken without actually executing them. """ - def is_backup_implemented(self): - """Check if the storage backend implements a backup procedure. - - Note: subclasses that implement a backup procedure need to overload this method and return True. - """ - return False - - def _backup_backend( + def _backup( self, dest: str, keep: Optional[int] = None, @@ -324,6 +317,7 @@ def _write_backup_config(self, backup_manager): import tempfile from aiida.common import exceptions + from aiida.common.log import override_log_level from aiida.manage.configuration import get_config from aiida.manage.configuration.config import Config @@ -335,7 +329,11 @@ def _write_backup_config(self, backup_manager): backup_config = Config(str(filepath_config), {}) # Create empty config at temporary file location backup_config.add_profile(profile) # Add the profile being backed up backup_config.store() # Write the contents to disk - backup_manager.call_rsync(filepath_config, backup_manager.path / 'config.json') + + # Temporarily disable all logging because the verbose rsync output just for copying the config file + # is a bit much. + with override_log_level(): + backup_manager.call_rsync(filepath_config, backup_manager.path / 'config.json') except (exceptions.MissingConfigurationError, exceptions.ConfigurationError) as exc: raise exceptions.StorageBackupError('AiiDA config.json not found!') from exc @@ -409,13 +407,28 @@ def backup( """ from aiida.storage.log import STORAGE_LOGGER - if self.is_backup_implemented(): - backup_manager = self._validate_or_init_backup_folder(dest, keep) - self._backup_backend(dest, keep) - STORAGE_LOGGER.report('Overwriting the config.json file.') - self._write_backup_config(backup_manager) - else: - raise NotImplementedError + backup_manager = self._validate_or_init_backup_folder(dest, keep) + + try: + self._backup(dest, keep) + except NotImplementedError: + success, stdout = backup_manager.run_cmd(['ls', '-A', str(backup_manager.path)]) + + if not success: + STORAGE_LOGGER.warning(f'Failed to determine contents of destination folder `{dest}`: not deleting it.') + raise + + # If the backup directory was just initialized for the first time, it should only contain the config.json + # file and nothing else. If anything else is found, do not delete the directory for safety reasons. + if stdout.strip() != 'config.json': + STORAGE_LOGGER.warning(f'The destination folder `{dest}` is not empty: not deleting it.') + raise + + backup_manager.run_cmd(['rm', '-rf', str(backup_manager.path)]) + raise + + STORAGE_LOGGER.report('Overwriting the config.json file.') + self._write_backup_config(backup_manager) def get_info(self, detailed: bool = False) -> dict: """Return general information on the storage. diff --git a/src/aiida/storage/psql_dos/backend.py b/src/aiida/storage/psql_dos/backend.py index 451f98ee92..2431f456dd 100644 --- a/src/aiida/storage/psql_dos/backend.py +++ b/src/aiida/storage/psql_dos/backend.py @@ -483,10 +483,7 @@ def get_info(self, detailed: bool = False) -> dict: results['repository'] = self.get_repository().get_info(detailed) return results - def is_backup_implemented(self): - return True - - def _backup( + def _backup_storage( self, manager: backup_utils.BackupManager, path: pathlib.Path, @@ -569,13 +566,13 @@ def _backup( manager, container, path / 'container', prev_backup=prev_backup / 'container' if prev_backup else None ) - def _backup_backend( + def _backup( self, dest: str, keep: Optional[int] = None, ): try: backup_manager = backup_utils.BackupManager(dest, keep=keep) - backup_manager.backup_auto_folders(lambda path, prev: self._backup(backup_manager, path, prev)) + backup_manager.backup_auto_folders(lambda path, prev: self._backup_storage(backup_manager, path, prev)) except backup_utils.BackupError as exc: raise exceptions.StorageBackupError(*exc.args) from exc diff --git a/src/aiida/storage/sqlite_dos/backend.py b/src/aiida/storage/sqlite_dos/backend.py index 9481177241..890e082914 100644 --- a/src/aiida/storage/sqlite_dos/backend.py +++ b/src/aiida/storage/sqlite_dos/backend.py @@ -146,10 +146,7 @@ def _initialise_session(self): engine = create_sqla_engine(Path(self._profile.storage_config['filepath']) / 'database.sqlite') self._session_factory = scoped_session(sessionmaker(bind=engine, future=True, expire_on_commit=True)) - def is_backup_implemented(self): - return False - - def _backup_backend( + def _backup( self, dest: str, keep: Optional[int] = None, diff --git a/tests/orm/implementation/test_backend.py b/tests/orm/implementation/test_backend.py index b1ea884bc4..001564f057 100644 --- a/tests/orm/implementation/test_backend.py +++ b/tests/orm/implementation/test_backend.py @@ -8,6 +8,10 @@ ########################################################################### """Unit tests for the ORM Backend class.""" +from __future__ import annotations + +import json +import pathlib import uuid import pytest @@ -161,3 +165,46 @@ def test_delete_nodes_and_connections(self): orm.Node.collection.get(id=node_pk) assert len(calc_node.base.links.get_outgoing().all()) == 0 assert len(group.nodes) == 0 + + +def test_backup_not_implemented(aiida_config, backend, monkeypatch, tmp_path): + """Test the backup functionality if the plugin does not implement it.""" + + def _backup(*args, **kwargs): + raise NotImplementedError + + monkeypatch.setattr(backend, '_backup', _backup) + + filepath_backup = tmp_path / 'backup_dir' + + with pytest.raises(NotImplementedError): + backend.backup(str(filepath_backup)) + + # The backup directory should have been initialized but then cleaned up when the plugin raised the exception + assert not filepath_backup.is_dir() + + # Now create the backup directory with the config file and some other content to it. + filepath_backup.mkdir() + (filepath_backup / 'config.json').write_text(json.dumps(aiida_config.dictionary)) + (filepath_backup / 'backup-deadbeef').mkdir() + + with pytest.raises(NotImplementedError): + backend.backup(str(filepath_backup)) + + # The backup directory should not have been delete + assert filepath_backup.is_dir() + assert (filepath_backup / 'config.json').is_file() + + +def test_backup_implemented(backend, monkeypatch, tmp_path): + """Test the backup functionality if the plugin does implement it.""" + + def _backup(dest: str, keep: int | None = None): + (pathlib.Path(dest) / 'backup.file').touch() + + monkeypatch.setattr(backend, '_backup', _backup) + + filepath_backup = tmp_path / 'backup_dir' + backend.backup(str(filepath_backup)) + assert (filepath_backup / 'config.json').is_file() + assert (filepath_backup / 'backup.file').is_file() From 7ed4313794d54de0f5a286489febcaadd4617747 Mon Sep 17 00:00:00 2001 From: Sebastiaan Huber Date: Thu, 18 Apr 2024 18:14:11 +0200 Subject: [PATCH 59/59] Use settings variable for `config.json` literal --- src/aiida/orm/implementation/storage_backend.py | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/src/aiida/orm/implementation/storage_backend.py b/src/aiida/orm/implementation/storage_backend.py index cc4c3c229e..10a0c96875 100644 --- a/src/aiida/orm/implementation/storage_backend.py +++ b/src/aiida/orm/implementation/storage_backend.py @@ -320,12 +320,13 @@ def _write_backup_config(self, backup_manager): from aiida.common.log import override_log_level from aiida.manage.configuration import get_config from aiida.manage.configuration.config import Config + from aiida.manage.configuration.settings import DEFAULT_CONFIG_FILE_NAME try: config = get_config() profile = config.get_profile(self.profile.name) # Get the profile being backed up with tempfile.TemporaryDirectory() as tmpdir: - filepath_config = pathlib.Path(tmpdir) / 'config.json' + filepath_config = pathlib.Path(tmpdir) / DEFAULT_CONFIG_FILE_NAME backup_config = Config(str(filepath_config), {}) # Create empty config at temporary file location backup_config.add_profile(profile) # Add the profile being backed up backup_config.store() # Write the contents to disk @@ -333,7 +334,7 @@ def _write_backup_config(self, backup_manager): # Temporarily disable all logging because the verbose rsync output just for copying the config file # is a bit much. with override_log_level(): - backup_manager.call_rsync(filepath_config, backup_manager.path / 'config.json') + backup_manager.call_rsync(filepath_config, backup_manager.path / DEFAULT_CONFIG_FILE_NAME) except (exceptions.MissingConfigurationError, exceptions.ConfigurationError) as exc: raise exceptions.StorageBackupError('AiiDA config.json not found!') from exc @@ -345,14 +346,13 @@ def _validate_or_init_backup_folder(self, dest, keep): from aiida.common import exceptions from aiida.manage.configuration.config import Config + from aiida.manage.configuration.settings import DEFAULT_CONFIG_FILE_NAME from aiida.storage.log import STORAGE_LOGGER - backup_config_fname = 'config.json' - try: # this creates the dest folder if it doesn't exist backup_manager = backup_utils.BackupManager(dest, keep=keep) - backup_config_path = backup_manager.path / backup_config_fname + backup_config_path = backup_manager.path / DEFAULT_CONFIG_FILE_NAME if backup_manager.check_path_exists(backup_config_path): success, stdout = backup_manager.run_cmd(['cat', str(backup_config_path)]) @@ -405,6 +405,7 @@ def backup( :raises StorageBackupError: If an error occurred during the backup procedure. :raises NotImplementedError: If the storage backend doesn't implement a backup procedure. """ + from aiida.manage.configuration.settings import DEFAULT_CONFIG_FILE_NAME from aiida.storage.log import STORAGE_LOGGER backup_manager = self._validate_or_init_backup_folder(dest, keep) @@ -418,16 +419,16 @@ def backup( STORAGE_LOGGER.warning(f'Failed to determine contents of destination folder `{dest}`: not deleting it.') raise - # If the backup directory was just initialized for the first time, it should only contain the config.json + # If the backup directory was just initialized for the first time, it should only contain the configuration # file and nothing else. If anything else is found, do not delete the directory for safety reasons. - if stdout.strip() != 'config.json': + if stdout.strip() != DEFAULT_CONFIG_FILE_NAME: STORAGE_LOGGER.warning(f'The destination folder `{dest}` is not empty: not deleting it.') raise backup_manager.run_cmd(['rm', '-rf', str(backup_manager.path)]) raise - STORAGE_LOGGER.report('Overwriting the config.json file.') + STORAGE_LOGGER.report(f'Overwriting the `{DEFAULT_CONFIG_FILE_NAME} file.') self._write_backup_config(backup_manager) def get_info(self, detailed: bool = False) -> dict: