Merge pull request #774 from rug-cit-hpc/develop

New PR in preparation for new release
rug-cit-hpc · Apr 12, 2023 · e945778 · e945778
2 parents 24d64e5 + 94c947c
commit e945778
Show file tree

Hide file tree

Showing 623 changed files with 33,321 additions and 9,081 deletions.
diff --git a/.ansible-lint b/.ansible-lint
@@ -2,7 +2,9 @@
 exclude_paths:
   - '~/.ansible' # Exclude external playbooks.
 skip_list:
-  # We explicitly use latest combined with other tech to pin versions (e.g. Spacewalk).
+  # We explicitly use latest combined with other tech to pin versions (e.g. Pulp).
   - 'package-latest'  # "Package installs should not use latest (403)."
   - 'meta-no-info'  # "No 'galaxy_info' found in meta/main.yml of a role (701)."
+  - 'experimental'  # All rules tagged as experimental.
+  - 'name[template]'  # Allow jinja templating anywhere in a task name.
 ...
diff --git a/.circleci/config.yml b/.circleci/config.yml
@@ -1,25 +1,38 @@
 ---
-
-# Python CircleCI 2.0 configuration file
 #
-# Check https://circleci.com/docs/2.0/language-python/ for more details
+# Python CircleCI 2.0 configuration file
+# Check https://circleci.com/docs/2.0/language-python/ for more details.
 #
-
 version: 2.1
 jobs:
   build:
     docker:
-      - image: circleci/python:3
+      - image: cimg/python:3.10
     working_directory: ~/repo
+    resource_class: small
     steps:
       - checkout
+      - run: python --version
+      - run: pip --version
       - run:
           name: install dependencies
           command: |
-            python3 -m venv venv
+            python -m venv venv
             . venv/bin/activate
-            pip install "ansible-lint[community,yamllint]"
-            ansible-galaxy install -r galaxy-requirements.yml
+            pip install jmespath
+            pip install ansible
+            pip install ansible-lint
+            attempt=1
+            until [[ "${attempt}" -gt 3 ]]; do
+                [[ "${attempt}" -gt 1 ]] && sleep 30
+                ansible-galaxy install -r requirements.yml --timeout 120 && status="${?}" && break
+                attempt=$((attempt+1))
+            done
+            if [[ "${status}" -ne 0 ]]; then
+                exit "${status}"
+            fi
+            ansible --version
+            ansible-lint --version
       - run:
           name: run tests
           shell: /bin/bash
@@ -33,7 +46,7 @@ jobs:
             else
                 export ANSIBLE_ROLES_PATH="${HOME}/.ansible/roles/"
             fi
-            if ansible-lint -p --nocolor cluster.yml deploy-os_servers.yml > lint_results 2>&1; then
+            if ansible-lint -p --nocolor --offline *.yml > lint_results 2>&1; then
               lint_errors=0
             else
               cat lint_results

diff --git a/README.md b/README.md
diff --git a/ansible.cfg b/ansible.cfg
@@ -3,15 +3,29 @@ retry_files_enabled = False
 #stdout_callback = ansible.posix.debug
 stdout_callback = community.general.yaml
 timeout = 30
+inventory_plugins = inventory_plugins
+
+#
+# Do not define strategy_plugins path here as we cannot dynamically determine Python major.minor version numbers.
+# We define ANSIBLE_STRATEGY_PLUGINS in lor-init instead:
+#	export ANSIBLE_STRATEGY_PLUGINS=$(ls -d *.venv/lib/python*/site-packages/ansible_mitogen/plugins/strategy | sort -V | tail -1)
 #
-# Do not use a hard-code vault_password_file file here:
-# we have multiple .vault_pass.txt.clustername files with specific passwords for each cluster.
-# source the .lorrc file in the root of the repo and then use the lor-config function:
-#	lor-config [name-of-a-cluster]
+
+#
+# Do not use a hard-coded vault_password_file file here:
+# we have multiple .vault_pass.txt.${stack_name} files with specific passwords for each stack.
+# The stack_name and stack_prefix variables are defined in
+#	group_vars/${stack_name}/vars.yml
+# source the .lor-init file in the root of the repo and then use the lor-config function:
+#	source ./lor-init
+#	lor-config ${stack_prefix}
 # This wil configure various environment variables including ANSIBLE_VAULT_PASSWORD_FILE.
 # 
 #vault_password_file = .vault_pass.txt
 
 [ssh_connection]
 pipelining = True
-ssh_args = -C -o ControlMaster=auto -o ControlPersist=60s -o ForwardAgent=yes
+ssh_args = -C -o ControlPath='~/.ssh/tmp/%C' -o ControlMaster=auto -o ControlPersist=60s -o ForwardAgent=yes -o PreferredAuthentications=publickey,keyboard-interactive
+
+[inventory]
+enable_plugins = yaml_with_jumphost
diff --git a/callback_plugins/homsaplog.py b/callback_plugins/homsaplog.py
diff --git a/cluster.yml b/cluster.yml
@@ -1,38 +1,7 @@
 #
-# Order of deployment required to prevent chicken versus the egg issues:
-#  0. For all deployment phases:
-#       export AI_PROXY="${jumphost_name}"
-#       export AI_INVENTORY="static_inventories/${cluster_name}_hosts.ini"
-#       ANSIBLE_VAULT_PASSWORD_FILE=".vault_pass.txt.${cluster_name}"
-#  1. Use standard CentOS cloud image user 'centos' or 'root' user and without host key checking:
-#       export ANSIBLE_HOST_KEY_CHECKING=False
-#       ansible-playbook -i inventory.py -u centos -l 'jumphost,cluster'  single_role_playbooks/admin_users.yml
-#       ansible-playbook -i inventory.py -u root   -l 'docs'              single_role_playbooks/admin_users.yml
-#  2. Use local admin user's account and without host key checking:
-#       export ANSIBLE_HOST_KEY_CHECKING=False
-#       ansible-playbook -i inventory.py -u [admin_account] single_role_playbooks/ssh_host_signer.yml
-#  3. Use local admin user's account and with strict host key checking to deploy everything else:
-#       export ANSIBLE_HOST_KEY_CHECKING=True
-#       ansible-playbook -i inventory.py -u [admin_account] cluster.yml
-#     This will configure:
-#       * Jumphost first as it is required to access the other machines.
-#       * Repo management server second as it is required for version control of the packages
-#         installed on all other machines except for the jumphost, which will always get the latest updates for security.
-#       * Basic roles for all cluster machines part 1:
-#           * Roles that do NOT require regular accounts or groups to be present.
-#       * An LDAP with regular user accounts, which may be required for additional roles.
-#             (E.g. a chmod or chgrp for a file/folder requires the corresponding user or group to be present.)
-#       * Basic roles for all cluster machines part 2:
-#           * Roles that DO depend on regular accounts and groups.
-#       * SAI as it is required to:
-#           * Configure layout on shared storage devices used by other machines.
-#           * Configure Slurm control and Slurm database.
-#       * DAI
-#       * UI
-#       * Compute nodes
-#       * Documentation server
+# See README.md for instructins how to use this playbook.
 #
-
+---
 #
 # Dummy play to ping jumphosts and establish a persisting SSH connection
 # before trying to connect to the machines behind the jumphost,
@@ -41,25 +10,45 @@
 - name: 'Dummy play to ping jumphosts and establish a persistent SSH connection.'
   hosts: jumphost
 
-- import_playbook: single_group_playbooks/pre_deploy_checks.yml
+- name: Run pre deploy checks.
+  ansible.builtin.import_playbook: single_group_playbooks/pre_deploy_checks.yml
+
+- name: Run playbook for jumphosts.
+  ansible.builtin.import_playbook: single_group_playbooks/jumphost.yml
+
+- name: Run playbook for repo servers.
+  ansible.builtin.import_playbook: single_group_playbooks/repo.yml
+
+- name: Run playbook for all cluster machines part 1.
+  ansible.builtin.import_playbook: single_group_playbooks/cluster_part1.yml
 
-- import_playbook: single_group_playbooks/jumphost.yml
+- name: Run playbook for LDAP servers.
+  ansible.builtin.import_playbook: single_group_playbooks/ldap_server.yml
 
-- import_playbook: single_group_playbooks/repo.yml
+- name: Run playbook for NFS servers.
+  ansible.builtin.import_playbook: single_group_playbooks/nfs_server.yml
 
-- import_playbook: single_group_playbooks/cluster_part1.yml
+- name: Run playbook for all cluster machines part 2.
+  ansible.builtin.import_playbook: single_group_playbooks/cluster_part2.yml
 
-- import_playbook: single_group_playbooks/ldap_server.yml
+- name: Run playbook for sys admin interfaces.
+  ansible.builtin.import_playbook: single_group_playbooks/sys_admin_interface.yml
 
-- import_playbook: single_group_playbooks/cluster_part2.yml
+- name: Run playbook for deploy admin Interfaces.
+  ansible.builtin.import_playbook: single_group_playbooks/deploy_admin_interface.yml
 
-- import_playbook: single_group_playbooks/sys_admin_interface.yml
+- name: Run playbook for user interfaces.
+  ansible.builtin.import_playbook: single_group_playbooks/user_interface.yml
 
-- import_playbook: single_group_playbooks/deploy_admin_interface.yml
+- name: Run playbook for compute nodes.
+  ansible.builtin.import_playbook: single_group_playbooks/compute_vm.yml
 
-- import_playbook: single_group_playbooks/user_interface.yml
+- name: Run playbook for documentation servers.
+  ansible.builtin.import_playbook: single_group_playbooks/docs.yml
 
-- import_playbook: single_group_playbooks/compute_vm.yml
+- name: Run playbook for data transfer servers.
+  ansible.builtin.import_playbook: single_group_playbooks/data_transfer.yml
 
-- import_playbook: single_group_playbooks/docs.yml
+- name: Run playbook for rsyslog servers.
+  ansible.builtin.import_playbook: single_group_playbooks/rsyslog.yml
 ...