Skip to content

Commit

Permalink
Add sample script to update database character set
Browse files Browse the repository at this point in the history
Co-authored-by: Miguel Angel Medinilla <[email protected]>
  • Loading branch information
replaceafill and mamedin authored Jan 30, 2024
1 parent 002bbf0 commit 9cdcc5b
Show file tree
Hide file tree
Showing 4 changed files with 164 additions and 3 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ sudo apt-get -y upgrade
sudo apt-get install -y openjdk-8-jre-headless mysql-server
sudo apt-get install -y elasticsearch

sudo mysql -e "DROP DATABASE IF EXISTS SS; CREATE DATABASE SS CHARACTER SET utf8 COLLATE utf8_unicode_ci;"
sudo mysql -e "DROP DATABASE IF EXISTS SS; CREATE DATABASE SS CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci;"
sudo mysql -e "CREATE USER 'archivematica'@'localhost' IDENTIFIED BY 'demo';"
sudo mysql -e "GRANT ALL ON SS.* TO 'archivematica'@'localhost';"

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -55,8 +55,8 @@ sudo -u root systemctl start mariadb
sudo -u root systemctl enable gearmand
sudo -u root systemctl start gearmand

sudo -H -u root mysql -hlocalhost -uroot -e "DROP DATABASE IF EXISTS MCP; CREATE DATABASE MCP CHARACTER SET utf8 COLLATE utf8_unicode_ci;"
sudo -H -u root mysql -hlocalhost -uroot -e "DROP DATABASE IF EXISTS SS; CREATE DATABASE SS CHARACTER SET utf8 COLLATE utf8_unicode_ci;"
sudo -H -u root mysql -hlocalhost -uroot -e "DROP DATABASE IF EXISTS MCP; CREATE DATABASE MCP CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci;"
sudo -H -u root mysql -hlocalhost -uroot -e "DROP DATABASE IF EXISTS SS; CREATE DATABASE SS CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci;"
sudo -H -u root mysql -hlocalhost -uroot -e "CREATE USER 'archivematica'@'localhost' IDENTIFIED BY 'demo';"
sudo -H -u root mysql -hlocalhost -uroot -e "GRANT ALL ON MCP.* TO 'archivematica'@'localhost';"
sudo -H -u root mysql -hlocalhost -uroot -e "GRANT ALL ON SS.* TO 'archivematica'@'localhost';"
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,104 @@
#!/usr/bin/env bash

set -o errexit # abort on nonzero exitstatus
set -o nounset # abort on unbound variable
set -o pipefail # do not hide errors within pipes

# Array of database names
DATABASES=(
MCP
SS
)

# Collation and CHARSET
CHARSET="utf8mb4"
COLLATION="utf8mb4_0900_ai_ci"

# MySQL authentication (optional, default no auth)
MYSQL_USE_AUTH=False
MYSQL_USER=root
MYSQL_PASSWORD="THE_PASSWORD"

# Function to execute a query
execute_query() {
local query="$1"
local db_name="$2"
local user_arg=""

if [ "$MYSQL_USE_AUTH" = "True" ]; then
user_arg="-u$MYSQL_USER"
export MYSQL_PWD="$MYSQL_PASSWORD"
fi

mysql -N -B $user_arg -e "$query" "$db_name"
}

# Function to fix database charset and collation
fix_database_charset() {
local query="ALTER DATABASE ${DB_NAME} CHARACTER SET $CHARSET COLLATE $COLLATION;"
echo "Fixing database charset and collation"
execute_query "$query" "$DB_NAME"
echo "Fixed database charset and collation"
}

# Function to fix tables charset and collation
fix_tables_charset() {
local query="SELECT CONCAT('ALTER TABLE \`', table_name, '\` CHARACTER SET $CHARSET COLLATE $COLLATION;') \
FROM information_schema.TABLES AS T, information_schema.\`COLLATION_CHARACTER_SET_APPLICABILITY\` AS C \
WHERE C.collation_name = T.table_collation \
AND T.table_schema = '$DB_NAME' \
AND (C.CHARACTER_SET_NAME != '$CHARSET' OR C.COLLATION_NAME != '$COLLATION');"

local alter_table_queries=$(execute_query "$query" "$DB_NAME")
alter_table_queries_no_foreign_key_checks=$(echo -e "SET FOREIGN_KEY_CHECKS=0;\n$alter_table_queries\nSET FOREIGN_KEY_CHECKS=1;")
# echo "$alter_table_queries_no_foreign_key_checks"
echo "Fixing tables charset and collation"
execute_query "$alter_table_queries_no_foreign_key_checks" "$DB_NAME"
echo "Fixed tables charset and collation"
}

# Function to fix column collation for varchar columns
fix_varchar_columns_collation() {
local query="SELECT CONCAT('ALTER TABLE \`', table_name, '\` MODIFY \`', column_name, '\` ', DATA_TYPE, \
'(', CHARACTER_MAXIMUM_LENGTH, ') CHARACTER SET $CHARSET COLLATE $COLLATION', \
(CASE WHEN IS_NULLABLE = 'NO' THEN ' NOT NULL' ELSE '' END), ';') \
FROM information_schema.COLUMNS WHERE TABLE_SCHEMA = '$DB_NAME' AND DATA_TYPE = 'varchar' AND \
( CHARACTER_SET_NAME != '$CHARSET' OR COLLATION_NAME != '$COLLATION');"

local alter_table_queries=$(execute_query "$query" "$DB_NAME")
alter_table_queries_no_foreign_key_checks=$(echo -e "SET FOREIGN_KEY_CHECKS=0;\n$alter_table_queries\nSET FOREIGN_KEY_CHECKS=1;")
# echo "$alter_table_queries_no_foreign_key_checks"
echo "Fixing column collation for varchar columns"
execute_query "$alter_table_queries_no_foreign_key_checks" "$DB_NAME"
echo "Fixed column collation for varchar columns"
}

# Function to fix column collation for non-varchar columns
fix_non_varchar_columns_collation() {
local query="SELECT CONCAT('ALTER TABLE \`', table_name, '\` MODIFY \`', column_name, '\` ', DATA_TYPE, ' \
CHARACTER SET $CHARSET COLLATE $COLLATION', (CASE WHEN IS_NULLABLE = 'NO' THEN ' NOT NULL' ELSE '' END), ';') \
FROM information_schema.COLUMNS \
WHERE TABLE_SCHEMA = '$DB_NAME' \
AND DATA_TYPE != 'varchar' \
AND (CHARACTER_SET_NAME != '$CHARSET' OR COLLATION_NAME != '$COLLATION');"

local alter_table_queries=$(execute_query "$query" "$DB_NAME")
alter_table_queries_no_foreign_key_checks=$(echo -e "SET FOREIGN_KEY_CHECKS=0;\n$alter_table_queries\nSET FOREIGN_KEY_CHECKS=1;")
# echo "$alter_table_queries_no_foreign_key_checks"
echo "Fixing column collation for non-varchar columns"
execute_query "$alter_table_queries_no_foreign_key_checks" "$DB_NAME"
echo "Fixed column collation for non-varchar columns"
}

# Loop through each database in the array
for DB_NAME in "${DATABASES[@]}"; do
echo "Processing database: $DB_NAME"
fix_database_charset
fix_tables_charset
fix_varchar_columns_collation
fix_non_varchar_columns_collation
echo "Migration completed for $DB_NAME"
done

# Unset the MYSQL_PWD environment variable after executing the queries
unset MYSQL_PWD
57 changes: 57 additions & 0 deletions admin-manual/installation-setup/upgrading/upgrading.rst
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ Upgrade from Archivematica |previous_version|.x to |release|
* :ref:`Upgrade with output capturing disabled <upgrade-no-output-capture>`
* :ref:`Update search indices <update-search-indices>`
* :ref:`Review the processing configuration <review-processing-configuration>`
* :ref:`Migrate from MySQL 5.x to 8.x <migrate-mysql>`

.. note::

Expand Down Expand Up @@ -502,6 +503,62 @@ introduced in Archivematica 1.13.
The ``default`` and ``automated`` bundled configurations can be reset to the
Archivematica defaults.

.. _migrate-mysql:

Migrate from MySQL 5.x to 8.x
-----------------------------

It is recommended the MySQL databases for Archivematica and Storage Service use
the MySQL 8 ``utf8mb4`` character set and its default collation
``utf8mb4_0900_ai_ci`` (or ``utf8mb4_general_ci`` in MariaDB).

If you migrate your databases from MySQL 5.x you can check the character set
and encoding of their tables with:

.. code:: sql
SELECT
t.table_schema, t.table_name, c.character_set_name, t.table_collation
FROM
information_schema.tables t,
information_schema.collation_character_set_applicability c
WHERE
c.collation_name = t.table_collation
AND t.table_type = 'BASE TABLE'
AND (t.table_schema = 'MCP' OR t.table_schema = 'SS');
If they use the ``utf8mb3`` character set and collation you should update them
to avoid potential migration conflicts like this:

.. code:: bash
Running migrations:
Applying admin.0003_logentry_add_action_flag_choices... OK
Applying auth.0009_alter_user_last_name_max_length... OK
Applying auth.0010_alter_group_name_max_length... OK
Applying auth.0011_update_proxy_permissions... OK
Applying auth.0012_alter_user_first_name_max_length... OK
Applying locations.0031_rclone_space...Traceback (most recent call last):
File "/pyenv/data/versions/3.9.18/lib/python3.9/site-packages/django/db/backends/utils.py", line 84, in _execute
return self.cursor.execute(sql, params)
File "/pyenv/data/versions/3.9.18/lib/python3.9/site-packages/django/db/backends/mysql/base.py", line 73, in execute
return self.cursor.execute(query, args)
File "/pyenv/data/versions/3.9.18/lib/python3.9/site-packages/MySQLdb/cursors.py", line 179, in execute
res = self._query(mogrified_query)
File "/pyenv/data/versions/3.9.18/lib/python3.9/site-packages/MySQLdb/cursors.py", line 330, in _query
db.query(q)
File "/pyenv/data/versions/3.9.18/lib/python3.9/site-packages/MySQLdb/connections.py", line 255, in query
_mysql.connection.query(self, query)
MySQLdb.OperationalError: (3780, "Referencing column 'space_id' and referenced column 'uuid' in foreign key constraint 'locations_rclone_space_id_adb7fd1d_fk_locations_space_uuid' are incompatible.")
django.db.utils.OperationalError: (3780, "Referencing column 'space_id' and referenced column 'uuid' in foreign key constraint 'locations_rclone_space_id_adb7fd1d_fk_locations_space_uuid' are incompatible.")
The following script can be used as a reference to update the character set of
the databases and their tables.

.. literalinclude:: scripts/mysql-change-encoding-collation.sh
:language: bash
:lines: 1-104

.. _`Elasticsearch 6.8 docs`: https://www.elastic.co/guide/en/elasticsearch/reference/6.8/modules-snapshots.html
.. _`release notes`: https://wiki.archivematica.org/Release_Notes
Expand Down

0 comments on commit 9cdcc5b

Please sign in to comment.