From 9cdcc5ba8b2a17e99aa9bfc9347ae57a46650075 Mon Sep 17 00:00:00 2001 From: Douglas Cerna Date: Tue, 30 Jan 2024 12:53:07 -0600 Subject: [PATCH] Add sample script to update database character set Co-authored-by: Miguel Angel Medinilla --- .../installation/scripts/am-jammy-deb.sh | 2 +- .../installation/scripts/am-rocky-rpm.sh | 4 +- .../mysql-change-encoding-collation.sh | 104 ++++++++++++++++++ .../upgrading/upgrading.rst | 57 ++++++++++ 4 files changed, 164 insertions(+), 3 deletions(-) create mode 100644 admin-manual/installation-setup/upgrading/scripts/mysql-change-encoding-collation.sh diff --git a/admin-manual/installation-setup/installation/scripts/am-jammy-deb.sh b/admin-manual/installation-setup/installation/scripts/am-jammy-deb.sh index 7088b05e..d1eeaea5 100644 --- a/admin-manual/installation-setup/installation/scripts/am-jammy-deb.sh +++ b/admin-manual/installation-setup/installation/scripts/am-jammy-deb.sh @@ -23,7 +23,7 @@ sudo apt-get -y upgrade sudo apt-get install -y openjdk-8-jre-headless mysql-server sudo apt-get install -y elasticsearch -sudo mysql -e "DROP DATABASE IF EXISTS SS; CREATE DATABASE SS CHARACTER SET utf8 COLLATE utf8_unicode_ci;" +sudo mysql -e "DROP DATABASE IF EXISTS SS; CREATE DATABASE SS CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci;" sudo mysql -e "CREATE USER 'archivematica'@'localhost' IDENTIFIED BY 'demo';" sudo mysql -e "GRANT ALL ON SS.* TO 'archivematica'@'localhost';" diff --git a/admin-manual/installation-setup/installation/scripts/am-rocky-rpm.sh b/admin-manual/installation-setup/installation/scripts/am-rocky-rpm.sh index 0885d3b3..e7097ff7 100644 --- a/admin-manual/installation-setup/installation/scripts/am-rocky-rpm.sh +++ b/admin-manual/installation-setup/installation/scripts/am-rocky-rpm.sh @@ -55,8 +55,8 @@ sudo -u root systemctl start mariadb sudo -u root systemctl enable gearmand sudo -u root systemctl start gearmand -sudo -H -u root mysql -hlocalhost -uroot -e "DROP DATABASE IF EXISTS MCP; CREATE DATABASE MCP CHARACTER SET utf8 COLLATE utf8_unicode_ci;" -sudo -H -u root mysql -hlocalhost -uroot -e "DROP DATABASE IF EXISTS SS; CREATE DATABASE SS CHARACTER SET utf8 COLLATE utf8_unicode_ci;" +sudo -H -u root mysql -hlocalhost -uroot -e "DROP DATABASE IF EXISTS MCP; CREATE DATABASE MCP CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci;" +sudo -H -u root mysql -hlocalhost -uroot -e "DROP DATABASE IF EXISTS SS; CREATE DATABASE SS CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci;" sudo -H -u root mysql -hlocalhost -uroot -e "CREATE USER 'archivematica'@'localhost' IDENTIFIED BY 'demo';" sudo -H -u root mysql -hlocalhost -uroot -e "GRANT ALL ON MCP.* TO 'archivematica'@'localhost';" sudo -H -u root mysql -hlocalhost -uroot -e "GRANT ALL ON SS.* TO 'archivematica'@'localhost';" diff --git a/admin-manual/installation-setup/upgrading/scripts/mysql-change-encoding-collation.sh b/admin-manual/installation-setup/upgrading/scripts/mysql-change-encoding-collation.sh new file mode 100644 index 00000000..16785104 --- /dev/null +++ b/admin-manual/installation-setup/upgrading/scripts/mysql-change-encoding-collation.sh @@ -0,0 +1,104 @@ +#!/usr/bin/env bash + +set -o errexit # abort on nonzero exitstatus +set -o nounset # abort on unbound variable +set -o pipefail # do not hide errors within pipes + +# Array of database names +DATABASES=( + MCP + SS +) + +# Collation and CHARSET +CHARSET="utf8mb4" +COLLATION="utf8mb4_0900_ai_ci" + +# MySQL authentication (optional, default no auth) +MYSQL_USE_AUTH=False +MYSQL_USER=root +MYSQL_PASSWORD="THE_PASSWORD" + +# Function to execute a query +execute_query() { + local query="$1" + local db_name="$2" + local user_arg="" + + if [ "$MYSQL_USE_AUTH" = "True" ]; then + user_arg="-u$MYSQL_USER" + export MYSQL_PWD="$MYSQL_PASSWORD" + fi + + mysql -N -B $user_arg -e "$query" "$db_name" +} + +# Function to fix database charset and collation +fix_database_charset() { + local query="ALTER DATABASE ${DB_NAME} CHARACTER SET $CHARSET COLLATE $COLLATION;" + echo "Fixing database charset and collation" + execute_query "$query" "$DB_NAME" + echo "Fixed database charset and collation" +} + +# Function to fix tables charset and collation +fix_tables_charset() { + local query="SELECT CONCAT('ALTER TABLE \`', table_name, '\` CHARACTER SET $CHARSET COLLATE $COLLATION;') \ + FROM information_schema.TABLES AS T, information_schema.\`COLLATION_CHARACTER_SET_APPLICABILITY\` AS C \ + WHERE C.collation_name = T.table_collation \ + AND T.table_schema = '$DB_NAME' \ + AND (C.CHARACTER_SET_NAME != '$CHARSET' OR C.COLLATION_NAME != '$COLLATION');" + + local alter_table_queries=$(execute_query "$query" "$DB_NAME") + alter_table_queries_no_foreign_key_checks=$(echo -e "SET FOREIGN_KEY_CHECKS=0;\n$alter_table_queries\nSET FOREIGN_KEY_CHECKS=1;") + # echo "$alter_table_queries_no_foreign_key_checks" + echo "Fixing tables charset and collation" + execute_query "$alter_table_queries_no_foreign_key_checks" "$DB_NAME" + echo "Fixed tables charset and collation" +} + +# Function to fix column collation for varchar columns +fix_varchar_columns_collation() { + local query="SELECT CONCAT('ALTER TABLE \`', table_name, '\` MODIFY \`', column_name, '\` ', DATA_TYPE, \ + '(', CHARACTER_MAXIMUM_LENGTH, ') CHARACTER SET $CHARSET COLLATE $COLLATION', \ + (CASE WHEN IS_NULLABLE = 'NO' THEN ' NOT NULL' ELSE '' END), ';') \ + FROM information_schema.COLUMNS WHERE TABLE_SCHEMA = '$DB_NAME' AND DATA_TYPE = 'varchar' AND \ + ( CHARACTER_SET_NAME != '$CHARSET' OR COLLATION_NAME != '$COLLATION');" + + local alter_table_queries=$(execute_query "$query" "$DB_NAME") + alter_table_queries_no_foreign_key_checks=$(echo -e "SET FOREIGN_KEY_CHECKS=0;\n$alter_table_queries\nSET FOREIGN_KEY_CHECKS=1;") + # echo "$alter_table_queries_no_foreign_key_checks" + echo "Fixing column collation for varchar columns" + execute_query "$alter_table_queries_no_foreign_key_checks" "$DB_NAME" + echo "Fixed column collation for varchar columns" +} + +# Function to fix column collation for non-varchar columns +fix_non_varchar_columns_collation() { + local query="SELECT CONCAT('ALTER TABLE \`', table_name, '\` MODIFY \`', column_name, '\` ', DATA_TYPE, ' \ + CHARACTER SET $CHARSET COLLATE $COLLATION', (CASE WHEN IS_NULLABLE = 'NO' THEN ' NOT NULL' ELSE '' END), ';') \ + FROM information_schema.COLUMNS \ + WHERE TABLE_SCHEMA = '$DB_NAME' \ + AND DATA_TYPE != 'varchar' \ + AND (CHARACTER_SET_NAME != '$CHARSET' OR COLLATION_NAME != '$COLLATION');" + + local alter_table_queries=$(execute_query "$query" "$DB_NAME") + alter_table_queries_no_foreign_key_checks=$(echo -e "SET FOREIGN_KEY_CHECKS=0;\n$alter_table_queries\nSET FOREIGN_KEY_CHECKS=1;") + # echo "$alter_table_queries_no_foreign_key_checks" + echo "Fixing column collation for non-varchar columns" + execute_query "$alter_table_queries_no_foreign_key_checks" "$DB_NAME" + echo "Fixed column collation for non-varchar columns" +} + +# Loop through each database in the array +for DB_NAME in "${DATABASES[@]}"; do + echo "Processing database: $DB_NAME" + fix_database_charset + fix_tables_charset + fix_varchar_columns_collation + fix_non_varchar_columns_collation + echo "Migration completed for $DB_NAME" +done + +# Unset the MYSQL_PWD environment variable after executing the queries +unset MYSQL_PWD diff --git a/admin-manual/installation-setup/upgrading/upgrading.rst b/admin-manual/installation-setup/upgrading/upgrading.rst index 6b48241e..f800a06e 100644 --- a/admin-manual/installation-setup/upgrading/upgrading.rst +++ b/admin-manual/installation-setup/upgrading/upgrading.rst @@ -14,6 +14,7 @@ Upgrade from Archivematica |previous_version|.x to |release| * :ref:`Upgrade with output capturing disabled ` * :ref:`Update search indices ` * :ref:`Review the processing configuration ` +* :ref:`Migrate from MySQL 5.x to 8.x ` .. note:: @@ -502,6 +503,62 @@ introduced in Archivematica 1.13. The ``default`` and ``automated`` bundled configurations can be reset to the Archivematica defaults. +.. _migrate-mysql: + +Migrate from MySQL 5.x to 8.x +----------------------------- + +It is recommended the MySQL databases for Archivematica and Storage Service use +the MySQL 8 ``utf8mb4`` character set and its default collation +``utf8mb4_0900_ai_ci`` (or ``utf8mb4_general_ci`` in MariaDB). + +If you migrate your databases from MySQL 5.x you can check the character set +and encoding of their tables with: + +.. code:: sql + + SELECT + t.table_schema, t.table_name, c.character_set_name, t.table_collation + FROM + information_schema.tables t, + information_schema.collation_character_set_applicability c + WHERE + c.collation_name = t.table_collation + AND t.table_type = 'BASE TABLE' + AND (t.table_schema = 'MCP' OR t.table_schema = 'SS'); + +If they use the ``utf8mb3`` character set and collation you should update them +to avoid potential migration conflicts like this: + +.. code:: bash + + Running migrations: + Applying admin.0003_logentry_add_action_flag_choices... OK + Applying auth.0009_alter_user_last_name_max_length... OK + Applying auth.0010_alter_group_name_max_length... OK + Applying auth.0011_update_proxy_permissions... OK + Applying auth.0012_alter_user_first_name_max_length... OK + Applying locations.0031_rclone_space...Traceback (most recent call last): + File "/pyenv/data/versions/3.9.18/lib/python3.9/site-packages/django/db/backends/utils.py", line 84, in _execute + return self.cursor.execute(sql, params) + File "/pyenv/data/versions/3.9.18/lib/python3.9/site-packages/django/db/backends/mysql/base.py", line 73, in execute + return self.cursor.execute(query, args) + File "/pyenv/data/versions/3.9.18/lib/python3.9/site-packages/MySQLdb/cursors.py", line 179, in execute + res = self._query(mogrified_query) + File "/pyenv/data/versions/3.9.18/lib/python3.9/site-packages/MySQLdb/cursors.py", line 330, in _query + db.query(q) + File "/pyenv/data/versions/3.9.18/lib/python3.9/site-packages/MySQLdb/connections.py", line 255, in query + _mysql.connection.query(self, query) + MySQLdb.OperationalError: (3780, "Referencing column 'space_id' and referenced column 'uuid' in foreign key constraint 'locations_rclone_space_id_adb7fd1d_fk_locations_space_uuid' are incompatible.") + + django.db.utils.OperationalError: (3780, "Referencing column 'space_id' and referenced column 'uuid' in foreign key constraint 'locations_rclone_space_id_adb7fd1d_fk_locations_space_uuid' are incompatible.") + +The following script can be used as a reference to update the character set of +the databases and their tables. + +.. literalinclude:: scripts/mysql-change-encoding-collation.sh + :language: bash + :lines: 1-104 .. _`Elasticsearch 6.8 docs`: https://www.elastic.co/guide/en/elasticsearch/reference/6.8/modules-snapshots.html .. _`release notes`: https://wiki.archivematica.org/Release_Notes