From 7d831b476f37afa4e89eeace4f4f42c9f97072da Mon Sep 17 00:00:00 2001 From: pneerincx Date: Mon, 3 Aug 2020 14:30:49 +0200 Subject: [PATCH 01/30] Removed dependency that is no longer used. --- galaxy-requirements.yml | 1 - 1 file changed, 1 deletion(-) diff --git a/galaxy-requirements.yml b/galaxy-requirements.yml index 7679b9e6a..3ef758b39 100644 --- a/galaxy-requirements.yml +++ b/galaxy-requirements.yml @@ -1,7 +1,6 @@ --- - src: geerlingguy.firewall version: 2.4.0 -- src: geerlingguy.postfix - src: geerlingguy.repo-epel - src: geerlingguy.security ... \ No newline at end of file From 9de4eded0787b9ae0add19926fd1764d49963ca0 Mon Sep 17 00:00:00 2001 From: pneerincx Date: Tue, 4 Aug 2020 13:51:08 +0200 Subject: [PATCH 02/30] Updated README for static inventories relocated to subdir. --- README.md | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/README.md b/README.md index c2b81f72f..daffbabc2 100644 --- a/README.md +++ b/README.md @@ -123,9 +123,9 @@ Deploying a fully functional virtual cluster from scratch involves the following 3. Configure Ansible settings including the vault. - To create a new virtual cluster you will need ```group_vars``` and an inventory for that HPC cluster: + To create a new virtual cluster you will need ```group_vars``` and an static inventory for that HPC cluster: - * See the ```*_hosts.ini``` files for existing clusters for examples to create a new ```[name-of-the-cluster]*_hosts.ini```. + * See the ```static_inventories/*_hosts.ini``` files for existing clusters for examples to create a new ```[name-of-the-cluster]*_hosts.ini```. * Create a ```group_vars/[name-of-the-cluster]/``` folder with a ```vars.yml```. You'll find and example ```vars.yml``` file in ```group_vars/template/```. To generate a new ```secrets.yml``` with new random passwords for the various daemons/components and encrypt this new ```secrets.yml``` file: @@ -196,7 +196,7 @@ Deploying a fully functional virtual cluster from scratch involves the following Some examples for the *Talos* development cluster: * Configure the dynamic inventory and jumphost for the *Talos* test cluster: ```bash - export AI_INVENTORY='talos_hosts.ini' + export AI_INVENTORY='static_inventories/talos_hosts.ini' export AI_PROXY='reception' export ANSIBLE_VAULT_IDENTITY_LIST='all@.vault/vault_pass.txt.all, talos@.vault/vault_pass.txt.talos' ``` @@ -206,7 +206,7 @@ Deploying a fully functional virtual cluster from scratch involves the following . ./lor-init lof-config talos ``` - * Firstly + * Firstly, * Create local admin accounts, which can then be used to deploy the rest of the playbook. * Deploy the signed hosts keys. Without local admin accounts we'll need to use either a ```root``` account for direct login or the default user account of the image used to create the VMs. From 611c6643eb7f6778cec88a26e2ece32c8937a027 Mon Sep 17 00:00:00 2001 From: pneerincx Date: Tue, 4 Aug 2020 13:56:41 +0200 Subject: [PATCH 03/30] Removed unnecessary slurm-management host group: use sys-admin-interface instead. --- roles/slurm-client/tasks/main.yml | 4 ++-- roles/slurm-management/templates/slurm.conf | 6 +++--- single_role_playbooks/cadvisor.yml | 2 +- single_role_playbooks/fuse-layer.yml | 1 + single_role_playbooks/grafana.yml | 2 +- single_role_playbooks/monitoring.yml | 2 +- single_role_playbooks/prom_server.yml | 2 +- single_role_playbooks/slurm-client.yml | 4 ++-- single_role_playbooks/slurm-management.yml | 2 +- static_inventories/fender_hosts.ini | 3 --- static_inventories/gearshift_hosts.ini | 3 --- static_inventories/hyperchicken_hosts.ini | 3 --- static_inventories/marvin_hosts.ini | 3 --- static_inventories/nibbler-hosts.ini | 3 --- static_inventories/talos_hosts.ini | 3 --- 15 files changed, 13 insertions(+), 30 deletions(-) diff --git a/roles/slurm-client/tasks/main.yml b/roles/slurm-client/tasks/main.yml index e133a5499..87da7ce25 100644 --- a/roles/slurm-client/tasks/main.yml +++ b/roles/slurm-client/tasks/main.yml @@ -1,9 +1,9 @@ --- -- name: 'Gather facts from servers in "slurm-management" group.' +- name: 'Gather facts from servers in "sys-admin-interface" group.' setup: delegate_to: "{{ item }}" delegate_facts: true - with_items: "{{ groups['slurm-management'] }}" + with_items: "{{ groups['sys-admin-interface'] }}" - name: 'Include Slurm defaults from "slurm-management" role.' include_vars: diff --git a/roles/slurm-management/templates/slurm.conf b/roles/slurm-management/templates/slurm.conf index 2570732ae..6b4a2a53e 100644 --- a/roles/slurm-management/templates/slurm.conf +++ b/roles/slurm-management/templates/slurm.conf @@ -1,6 +1,6 @@ ClusterName={{ slurm_cluster_name }} -ControlMachine={{ hostvars[groups['slurm-management'][0]]['ansible_hostname'] }} -ControlAddr={{ hostvars[groups['slurm-management'][0]]['ansible_hostname'] }} +ControlMachine={{ hostvars[groups['sys-admin-interface'][0]]['ansible_hostname'] }} +ControlAddr={{ hostvars[groups['sys-admin-interface'][0]]['ansible_hostname'] }} #BackupController= #BackupAddr= # @@ -120,7 +120,7 @@ AccountingStorageEnforce=limits,qos # will also enable: associations #AcctGatherProfileType=acct_gather_profile/hdf5 #JobAcctGatherFrequency=30 AccountingStorageType=accounting_storage/slurmdbd -AccountingStorageHost={{ hostvars[groups['slurm-management'][0]]['ansible_hostname'] }} +AccountingStorageHost={{ hostvars[groups['sys-admin-interface'][0]]['ansible_hostname'] }} #AccountingStorageLoc=/var/log/slurm/slurm.accounting #AccountingStoragePass= #AccountingStorageUser= diff --git a/single_role_playbooks/cadvisor.yml b/single_role_playbooks/cadvisor.yml index 98c529c15..36f022524 100644 --- a/single_role_playbooks/cadvisor.yml +++ b/single_role_playbooks/cadvisor.yml @@ -1,5 +1,5 @@ --- -- hosts: slurm-management +- hosts: sys-admin-interface roles: - cadvisor ... diff --git a/single_role_playbooks/fuse-layer.yml b/single_role_playbooks/fuse-layer.yml index 9bf036172..87dba9d91 100644 --- a/single_role_playbooks/fuse-layer.yml +++ b/single_role_playbooks/fuse-layer.yml @@ -2,3 +2,4 @@ - hosts: user-interface roles: - fuse-layer +... diff --git a/single_role_playbooks/grafana.yml b/single_role_playbooks/grafana.yml index d856ecb8a..85bac0ed7 100644 --- a/single_role_playbooks/grafana.yml +++ b/single_role_playbooks/grafana.yml @@ -1,5 +1,5 @@ --- -- hosts: slurm-management +- hosts: sys-admin-interface roles: - grafana ... diff --git a/single_role_playbooks/monitoring.yml b/single_role_playbooks/monitoring.yml index 1bc1ba13f..99d31eac9 100644 --- a/single_role_playbooks/monitoring.yml +++ b/single_role_playbooks/monitoring.yml @@ -1,6 +1,6 @@ --- - name: 'Install monitoring tools.' - hosts: slurm-management + hosts: sys-admin-interface roles: - prom_server - grafana diff --git a/single_role_playbooks/prom_server.yml b/single_role_playbooks/prom_server.yml index 03174f789..4eee6b60d 100644 --- a/single_role_playbooks/prom_server.yml +++ b/single_role_playbooks/prom_server.yml @@ -1,5 +1,5 @@ --- -- hosts: slurm-management +- hosts: sys-admin-interface roles: - prom_server ... diff --git a/single_role_playbooks/slurm-client.yml b/single_role_playbooks/slurm-client.yml index 463f2779f..158c7d202 100644 --- a/single_role_playbooks/slurm-client.yml +++ b/single_role_playbooks/slurm-client.yml @@ -1,6 +1,6 @@ --- -- hosts: slurm-management - name: Dummy to gather facts +- name: Dummy to gather facts + hosts: sys-admin-interface tasks: [] - name: Install virtual compute nodes diff --git a/single_role_playbooks/slurm-management.yml b/single_role_playbooks/slurm-management.yml index 1799eb6a3..0343c4dd5 100644 --- a/single_role_playbooks/slurm-management.yml +++ b/single_role_playbooks/slurm-management.yml @@ -1,5 +1,5 @@ --- -- hosts: slurm-management +- hosts: sys-admin-interface roles: - mariadb - slurm-management diff --git a/static_inventories/fender_hosts.ini b/static_inventories/fender_hosts.ini index ca70b9bc4..3385890eb 100644 --- a/static_inventories/fender_hosts.ini +++ b/static_inventories/fender_hosts.ini @@ -4,9 +4,6 @@ corridor [docs] docs -[slurm-management] -fd-sai - [sys-admin-interface] fd-sai diff --git a/static_inventories/gearshift_hosts.ini b/static_inventories/gearshift_hosts.ini index 3165dd367..f2c981652 100644 --- a/static_inventories/gearshift_hosts.ini +++ b/static_inventories/gearshift_hosts.ini @@ -4,9 +4,6 @@ airlock [docs] docs -[slurm-management] -imperator - [sys-admin-interface] imperator diff --git a/static_inventories/hyperchicken_hosts.ini b/static_inventories/hyperchicken_hosts.ini index 625fa1e57..ad680b82c 100644 --- a/static_inventories/hyperchicken_hosts.ini +++ b/static_inventories/hyperchicken_hosts.ini @@ -4,9 +4,6 @@ portal [docs] docs -[slurm-management] -hc-sai - [sys-admin-interface] hc-sai diff --git a/static_inventories/marvin_hosts.ini b/static_inventories/marvin_hosts.ini index e4ad83a94..09d43c732 100644 --- a/static_inventories/marvin_hosts.ini +++ b/static_inventories/marvin_hosts.ini @@ -4,9 +4,6 @@ dockingport [docs] docs -[slurm-management] -mv-sai - [sys-admin-interface] mv-sai diff --git a/static_inventories/nibbler-hosts.ini b/static_inventories/nibbler-hosts.ini index 5486fdf09..24a534c08 100644 --- a/static_inventories/nibbler-hosts.ini +++ b/static_inventories/nibbler-hosts.ini @@ -4,9 +4,6 @@ portal ansible_host=10.0.42.3 [nfs-server] nb-nfs ansible_host=10.0.42.16 -[slurm-management] -nb-sai ansible_host=10.0.42.12 - [sys-admin-interface] nb-sai ansible_host=10.0.42.12 diff --git a/static_inventories/talos_hosts.ini b/static_inventories/talos_hosts.ini index a5a4c5313..220c20005 100644 --- a/static_inventories/talos_hosts.ini +++ b/static_inventories/talos_hosts.ini @@ -7,9 +7,6 @@ talos [docs] docs -[slurm-management] -tl-sai - [sys-admin-interface] tl-sai From 0ba59ac665eab54aeb74e8b66142d2554458627e Mon Sep 17 00:00:00 2001 From: pneerincx Date: Tue, 4 Aug 2020 14:04:40 +0200 Subject: [PATCH 04/30] Updated main cluster.yml playbook: Only deploy grafany-proxy on airlock as it will crash the playbook on other jumphosts due to hard-coded values for airlock, re-organised and grouped common role used by all cluster machines, so they are deployed in parallel for faster deployments, inserted ldap-server role in the right place, so roles that depend on regular accounts and groups being present do not crash, added missing subgroup_directories and fuse-layer roles for UIs, use sys-admin-interface host group instead of removed slurm-management host group. --- cluster.yml | 108 ++++++++++++++++++++++------------------------------ 1 file changed, 45 insertions(+), 63 deletions(-) diff --git a/cluster.yml b/cluster.yml index f98d91a5d..dad191d7b 100644 --- a/cluster.yml +++ b/cluster.yml @@ -2,9 +2,8 @@ # Order of deployment required to prevent chicken versus the egg issues: # 0. For all deployment phases: # export AI_PROXY="${jumphost_name}" -# export AI_INVENTORY="${cluster_name}_hosts.ini" +# export AI_INVENTORY="static_inventories/${cluster_name}_hosts.ini" # ANSIBLE_VAULT_PASSWORD_FILE=".vault_pass.txt.${cluster_name}" -# # 1. Use standard CentOS cloud image user 'centos' or 'root' user and without host key checking: # export ANSIBLE_HOST_KEY_CHECKING=False # ansible-playbook -i inventory.py -u centos -l 'jumphost,cluster' single_role_playbooks/admin-users.yml @@ -17,13 +16,19 @@ # ansible-playbook -i inventory.py -u [admin_account] cluster.yml # This will configure: # A. Jumphost first as it is required to access the other machines. -# B. SAI as it is required to -# * configure layout on shared storage devices used by other machines. -# * configure Slurm control and Slurm database. -# C. DAI -# D. UI -# E. Compute nodes -# F. Documentation server +# B. Basic roles for all cluster machines part 1: +# * Roles that do NOT require regular accounts or groups to be present. +# C. An LDAP with regular user accounts, which may be required for additional roles. +# (E.g. a chmod or chgrp for a file/folder requires the corresponding user or group to be present.) +# D. Basic roles for all cluster machines part 2: +# * Roles that DO depend on regular accounts and groups. +# E. SAI as it is required to: +# * Configure layout on shared storage devices used by other machines. +# * Configure Slurm control and Slurm database. +# F. DAI +# G. UI +# H. Compute nodes +# I. Documentation server # - name: 'Sanity checks before we start.' hosts: all @@ -47,7 +52,7 @@ - sshd - node_exporter - {role: geerlingguy.security, become: true} - - grafana_proxy + - {role: grafana_proxy, when: ansible_hostname == 'airlock'} - regular-users tasks: - name: 'Install cron job to reboot jumphost regularly to activate kernel updates.' @@ -61,9 +66,9 @@ cron_file: reboot become: true -- name: 'B. Roles for SAIs.' +- name: 'B. Basic roles for all cluster machines part 1.' hosts: - - sys-admin-interface + - cluster roles: - admin-users - ssh_host_signer @@ -71,18 +76,35 @@ - spacewalk_client - logins - figlet_motd - - mount-volume - - ldap - node_exporter - static-hostname-lookup - cluster - - sshd - resolver - - shared_storage + - coredumps + +- name: 'C. Create LDAP account server.' + hosts: + - ldap-server + roles: + - ldap-server + when: + - use_ldap | default(true, true) | bool + - create_ldap | default(false, true) | bool + +- name: 'D. Basic roles for all cluster machines part 2.' + hosts: + - cluster + roles: + - ldap # client + - sshd - regular-users + - shared_storage -- hosts: slurm-management +- name: 'E. Roles for SAIs.' + hosts: + - sys-admin-interface roles: + - mount-volume - slurm-management - prom_server - grafana @@ -94,70 +116,30 @@ hostname_node0: "{{ ansible_hostname }}" ip_node0: "{{ ansible_default_ipv4['address'] }}" -- name: 'C. Roles for DAIs.' +- name: 'F. Roles for DAIs.' hosts: deploy-admin-interface roles: - - admin-users - - ssh_host_signer - - ssh_known_hosts - - spacewalk_client - - logins - - figlet_motd - mount-volume - build-environment - - ldap - - node_exporter - - static-hostname-lookup - - cluster - - sshd - - resolver - - shared_storage - - regular-users - envsync -- name: 'D. Roles for UIs.' +- name: 'G. Roles for UIs.' hosts: user-interface roles: - - admin-users - - ssh_host_signer - - ssh_known_hosts - - spacewalk_client - - logins - - figlet_motd - build-environment - - ldap - - node_exporter - - static-hostname-lookup - - cluster - - sshd - - resolver - - shared_storage - slurm_exporter - slurm-client - - regular-users - sudoers + - subgroup_directories + - {role fuse-layer, when: fuse_mountpoint is defined and fuse_mountpoint | length >= 1} -- name: 'E. Roles for compute nodes.' +- name: 'H. Roles for compute nodes.' hosts: compute-vm roles: - - admin-users - - ssh_host_signer - - ssh_known_hosts - - spacewalk_client - - logins - - figlet_motd - mount-volume - - ldap - - node_exporter - - static-hostname-lookup - - cluster - - sshd - - resolver - - shared_storage - slurm-client - - regular-users -- name: 'F. Roles for documentation servers.' +- name: 'I. Roles for documentation servers.' hosts: - docs roles: From 3129a7aa9c899199afb35dc8792cc53f29dabb47 Mon Sep 17 00:00:00 2001 From: pneerincx Date: Mon, 3 Aug 2020 14:30:49 +0200 Subject: [PATCH 05/30] Removed dependency that is no longer used. --- galaxy-requirements.yml | 1 - 1 file changed, 1 deletion(-) diff --git a/galaxy-requirements.yml b/galaxy-requirements.yml index 7679b9e6a..3ef758b39 100644 --- a/galaxy-requirements.yml +++ b/galaxy-requirements.yml @@ -1,7 +1,6 @@ --- - src: geerlingguy.firewall version: 2.4.0 -- src: geerlingguy.postfix - src: geerlingguy.repo-epel - src: geerlingguy.security ... \ No newline at end of file From 6e26743879b8574326b173249aa3400224433fdb Mon Sep 17 00:00:00 2001 From: pneerincx Date: Tue, 4 Aug 2020 13:51:08 +0200 Subject: [PATCH 06/30] Updated README for static inventories relocated to subdir. --- README.md | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/README.md b/README.md index c2b81f72f..daffbabc2 100644 --- a/README.md +++ b/README.md @@ -123,9 +123,9 @@ Deploying a fully functional virtual cluster from scratch involves the following 3. Configure Ansible settings including the vault. - To create a new virtual cluster you will need ```group_vars``` and an inventory for that HPC cluster: + To create a new virtual cluster you will need ```group_vars``` and an static inventory for that HPC cluster: - * See the ```*_hosts.ini``` files for existing clusters for examples to create a new ```[name-of-the-cluster]*_hosts.ini```. + * See the ```static_inventories/*_hosts.ini``` files for existing clusters for examples to create a new ```[name-of-the-cluster]*_hosts.ini```. * Create a ```group_vars/[name-of-the-cluster]/``` folder with a ```vars.yml```. You'll find and example ```vars.yml``` file in ```group_vars/template/```. To generate a new ```secrets.yml``` with new random passwords for the various daemons/components and encrypt this new ```secrets.yml``` file: @@ -196,7 +196,7 @@ Deploying a fully functional virtual cluster from scratch involves the following Some examples for the *Talos* development cluster: * Configure the dynamic inventory and jumphost for the *Talos* test cluster: ```bash - export AI_INVENTORY='talos_hosts.ini' + export AI_INVENTORY='static_inventories/talos_hosts.ini' export AI_PROXY='reception' export ANSIBLE_VAULT_IDENTITY_LIST='all@.vault/vault_pass.txt.all, talos@.vault/vault_pass.txt.talos' ``` @@ -206,7 +206,7 @@ Deploying a fully functional virtual cluster from scratch involves the following . ./lor-init lof-config talos ``` - * Firstly + * Firstly, * Create local admin accounts, which can then be used to deploy the rest of the playbook. * Deploy the signed hosts keys. Without local admin accounts we'll need to use either a ```root``` account for direct login or the default user account of the image used to create the VMs. From 0a9563f9387cf11f68af12d4f2bebc17cf6af749 Mon Sep 17 00:00:00 2001 From: pneerincx Date: Tue, 4 Aug 2020 13:56:41 +0200 Subject: [PATCH 07/30] Removed unnecessary slurm-management host group: use sys-admin-interface instead. --- roles/slurm-client/tasks/main.yml | 4 ++-- roles/slurm-management/templates/slurm.conf | 6 +++--- single_role_playbooks/cadvisor.yml | 2 +- single_role_playbooks/fuse-layer.yml | 1 + single_role_playbooks/grafana.yml | 2 +- single_role_playbooks/monitoring.yml | 2 +- single_role_playbooks/prom_server.yml | 2 +- single_role_playbooks/slurm-client.yml | 4 ++-- single_role_playbooks/slurm-management.yml | 2 +- static_inventories/fender_hosts.ini | 3 --- static_inventories/gearshift_hosts.ini | 3 --- static_inventories/hyperchicken_hosts.ini | 3 --- static_inventories/marvin_hosts.ini | 3 --- static_inventories/nibbler-hosts.ini | 3 --- static_inventories/talos_hosts.ini | 3 --- 15 files changed, 13 insertions(+), 30 deletions(-) diff --git a/roles/slurm-client/tasks/main.yml b/roles/slurm-client/tasks/main.yml index e133a5499..87da7ce25 100644 --- a/roles/slurm-client/tasks/main.yml +++ b/roles/slurm-client/tasks/main.yml @@ -1,9 +1,9 @@ --- -- name: 'Gather facts from servers in "slurm-management" group.' +- name: 'Gather facts from servers in "sys-admin-interface" group.' setup: delegate_to: "{{ item }}" delegate_facts: true - with_items: "{{ groups['slurm-management'] }}" + with_items: "{{ groups['sys-admin-interface'] }}" - name: 'Include Slurm defaults from "slurm-management" role.' include_vars: diff --git a/roles/slurm-management/templates/slurm.conf b/roles/slurm-management/templates/slurm.conf index 2570732ae..6b4a2a53e 100644 --- a/roles/slurm-management/templates/slurm.conf +++ b/roles/slurm-management/templates/slurm.conf @@ -1,6 +1,6 @@ ClusterName={{ slurm_cluster_name }} -ControlMachine={{ hostvars[groups['slurm-management'][0]]['ansible_hostname'] }} -ControlAddr={{ hostvars[groups['slurm-management'][0]]['ansible_hostname'] }} +ControlMachine={{ hostvars[groups['sys-admin-interface'][0]]['ansible_hostname'] }} +ControlAddr={{ hostvars[groups['sys-admin-interface'][0]]['ansible_hostname'] }} #BackupController= #BackupAddr= # @@ -120,7 +120,7 @@ AccountingStorageEnforce=limits,qos # will also enable: associations #AcctGatherProfileType=acct_gather_profile/hdf5 #JobAcctGatherFrequency=30 AccountingStorageType=accounting_storage/slurmdbd -AccountingStorageHost={{ hostvars[groups['slurm-management'][0]]['ansible_hostname'] }} +AccountingStorageHost={{ hostvars[groups['sys-admin-interface'][0]]['ansible_hostname'] }} #AccountingStorageLoc=/var/log/slurm/slurm.accounting #AccountingStoragePass= #AccountingStorageUser= diff --git a/single_role_playbooks/cadvisor.yml b/single_role_playbooks/cadvisor.yml index 98c529c15..36f022524 100644 --- a/single_role_playbooks/cadvisor.yml +++ b/single_role_playbooks/cadvisor.yml @@ -1,5 +1,5 @@ --- -- hosts: slurm-management +- hosts: sys-admin-interface roles: - cadvisor ... diff --git a/single_role_playbooks/fuse-layer.yml b/single_role_playbooks/fuse-layer.yml index 9bf036172..87dba9d91 100644 --- a/single_role_playbooks/fuse-layer.yml +++ b/single_role_playbooks/fuse-layer.yml @@ -2,3 +2,4 @@ - hosts: user-interface roles: - fuse-layer +... diff --git a/single_role_playbooks/grafana.yml b/single_role_playbooks/grafana.yml index d856ecb8a..85bac0ed7 100644 --- a/single_role_playbooks/grafana.yml +++ b/single_role_playbooks/grafana.yml @@ -1,5 +1,5 @@ --- -- hosts: slurm-management +- hosts: sys-admin-interface roles: - grafana ... diff --git a/single_role_playbooks/monitoring.yml b/single_role_playbooks/monitoring.yml index 1bc1ba13f..99d31eac9 100644 --- a/single_role_playbooks/monitoring.yml +++ b/single_role_playbooks/monitoring.yml @@ -1,6 +1,6 @@ --- - name: 'Install monitoring tools.' - hosts: slurm-management + hosts: sys-admin-interface roles: - prom_server - grafana diff --git a/single_role_playbooks/prom_server.yml b/single_role_playbooks/prom_server.yml index 03174f789..4eee6b60d 100644 --- a/single_role_playbooks/prom_server.yml +++ b/single_role_playbooks/prom_server.yml @@ -1,5 +1,5 @@ --- -- hosts: slurm-management +- hosts: sys-admin-interface roles: - prom_server ... diff --git a/single_role_playbooks/slurm-client.yml b/single_role_playbooks/slurm-client.yml index 463f2779f..158c7d202 100644 --- a/single_role_playbooks/slurm-client.yml +++ b/single_role_playbooks/slurm-client.yml @@ -1,6 +1,6 @@ --- -- hosts: slurm-management - name: Dummy to gather facts +- name: Dummy to gather facts + hosts: sys-admin-interface tasks: [] - name: Install virtual compute nodes diff --git a/single_role_playbooks/slurm-management.yml b/single_role_playbooks/slurm-management.yml index 1799eb6a3..0343c4dd5 100644 --- a/single_role_playbooks/slurm-management.yml +++ b/single_role_playbooks/slurm-management.yml @@ -1,5 +1,5 @@ --- -- hosts: slurm-management +- hosts: sys-admin-interface roles: - mariadb - slurm-management diff --git a/static_inventories/fender_hosts.ini b/static_inventories/fender_hosts.ini index ca70b9bc4..3385890eb 100644 --- a/static_inventories/fender_hosts.ini +++ b/static_inventories/fender_hosts.ini @@ -4,9 +4,6 @@ corridor [docs] docs -[slurm-management] -fd-sai - [sys-admin-interface] fd-sai diff --git a/static_inventories/gearshift_hosts.ini b/static_inventories/gearshift_hosts.ini index 3165dd367..f2c981652 100644 --- a/static_inventories/gearshift_hosts.ini +++ b/static_inventories/gearshift_hosts.ini @@ -4,9 +4,6 @@ airlock [docs] docs -[slurm-management] -imperator - [sys-admin-interface] imperator diff --git a/static_inventories/hyperchicken_hosts.ini b/static_inventories/hyperchicken_hosts.ini index 625fa1e57..ad680b82c 100644 --- a/static_inventories/hyperchicken_hosts.ini +++ b/static_inventories/hyperchicken_hosts.ini @@ -4,9 +4,6 @@ portal [docs] docs -[slurm-management] -hc-sai - [sys-admin-interface] hc-sai diff --git a/static_inventories/marvin_hosts.ini b/static_inventories/marvin_hosts.ini index e4ad83a94..09d43c732 100644 --- a/static_inventories/marvin_hosts.ini +++ b/static_inventories/marvin_hosts.ini @@ -4,9 +4,6 @@ dockingport [docs] docs -[slurm-management] -mv-sai - [sys-admin-interface] mv-sai diff --git a/static_inventories/nibbler-hosts.ini b/static_inventories/nibbler-hosts.ini index 5486fdf09..24a534c08 100644 --- a/static_inventories/nibbler-hosts.ini +++ b/static_inventories/nibbler-hosts.ini @@ -4,9 +4,6 @@ portal ansible_host=10.0.42.3 [nfs-server] nb-nfs ansible_host=10.0.42.16 -[slurm-management] -nb-sai ansible_host=10.0.42.12 - [sys-admin-interface] nb-sai ansible_host=10.0.42.12 diff --git a/static_inventories/talos_hosts.ini b/static_inventories/talos_hosts.ini index a5a4c5313..220c20005 100644 --- a/static_inventories/talos_hosts.ini +++ b/static_inventories/talos_hosts.ini @@ -7,9 +7,6 @@ talos [docs] docs -[slurm-management] -tl-sai - [sys-admin-interface] tl-sai From 4c053c0b56e7a5d37126d503e7435c7ebbc7d1f8 Mon Sep 17 00:00:00 2001 From: pneerincx Date: Tue, 4 Aug 2020 14:04:40 +0200 Subject: [PATCH 08/30] Updated main cluster.yml playbook: Only deploy grafany-proxy on airlock as it will crash the playbook on other jumphosts due to hard-coded values for airlock, re-organised and grouped common role used by all cluster machines, so they are deployed in parallel for faster deployments, inserted ldap-server role in the right place, so roles that depend on regular accounts and groups being present do not crash, added missing subgroup_directories and fuse-layer roles for UIs, use sys-admin-interface host group instead of removed slurm-management host group. --- cluster.yml | 108 ++++++++++++++++++++++------------------------------ 1 file changed, 45 insertions(+), 63 deletions(-) diff --git a/cluster.yml b/cluster.yml index df4e09e04..5476e3487 100644 --- a/cluster.yml +++ b/cluster.yml @@ -2,9 +2,8 @@ # Order of deployment required to prevent chicken versus the egg issues: # 0. For all deployment phases: # export AI_PROXY="${jumphost_name}" -# export AI_INVENTORY="${cluster_name}_hosts.ini" +# export AI_INVENTORY="static_inventories/${cluster_name}_hosts.ini" # ANSIBLE_VAULT_PASSWORD_FILE=".vault_pass.txt.${cluster_name}" -# # 1. Use standard CentOS cloud image user 'centos' or 'root' user and without host key checking: # export ANSIBLE_HOST_KEY_CHECKING=False # ansible-playbook -i inventory.py -u centos -l 'jumphost,cluster' single_role_playbooks/admin-users.yml @@ -17,13 +16,19 @@ # ansible-playbook -i inventory.py -u [admin_account] cluster.yml # This will configure: # A. Jumphost first as it is required to access the other machines. -# B. SAI as it is required to -# * configure layout on shared storage devices used by other machines. -# * configure Slurm control and Slurm database. -# C. DAI -# D. UI -# E. Compute nodes -# F. Documentation server +# B. Basic roles for all cluster machines part 1: +# * Roles that do NOT require regular accounts or groups to be present. +# C. An LDAP with regular user accounts, which may be required for additional roles. +# (E.g. a chmod or chgrp for a file/folder requires the corresponding user or group to be present.) +# D. Basic roles for all cluster machines part 2: +# * Roles that DO depend on regular accounts and groups. +# E. SAI as it is required to: +# * Configure layout on shared storage devices used by other machines. +# * Configure Slurm control and Slurm database. +# F. DAI +# G. UI +# H. Compute nodes +# I. Documentation server # - name: 'Sanity checks before we start.' hosts: all @@ -47,7 +52,7 @@ - sshd - node_exporter - {role: geerlingguy.security, become: true} - - grafana_proxy + - {role: grafana_proxy, when: ansible_hostname == 'airlock'} - regular-users tasks: - name: 'Install cron job to reboot jumphost regularly to activate kernel updates.' @@ -61,9 +66,9 @@ cron_file: reboot become: true -- name: 'B. Roles for SAIs.' +- name: 'B. Basic roles for all cluster machines part 1.' hosts: - - sys-admin-interface + - cluster roles: - admin-users - ssh_host_signer @@ -71,18 +76,35 @@ - spacewalk_client - logins - figlet_motd - - mount-volume - - ldap - node_exporter - static-hostname-lookup - cluster - - sshd - resolver - - shared_storage + - coredumps + +- name: 'C. Create LDAP account server.' + hosts: + - ldap-server + roles: + - ldap-server + when: + - use_ldap | default(true, true) | bool + - create_ldap | default(false, true) | bool + +- name: 'D. Basic roles for all cluster machines part 2.' + hosts: + - cluster + roles: + - ldap # client + - sshd - regular-users + - shared_storage -- hosts: slurm-management +- name: 'E. Roles for SAIs.' + hosts: + - sys-admin-interface roles: + - mount-volume - slurm-management - prom_server - grafana @@ -94,70 +116,30 @@ hostname_node0: "{{ ansible_hostname }}" ip_node0: "{{ ansible_default_ipv4['address'] }}" -- name: 'C. Roles for DAIs.' +- name: 'F. Roles for DAIs.' hosts: deploy-admin-interface roles: - - admin-users - - ssh_host_signer - - ssh_known_hosts - - spacewalk_client - - logins - - figlet_motd - mount-volume - build-environment - - ldap - - node_exporter - - static-hostname-lookup - - cluster - - sshd - - resolver - - shared_storage - - regular-users - envsync -- name: 'D. Roles for UIs.' +- name: 'G. Roles for UIs.' hosts: user-interface roles: - - admin-users - - ssh_host_signer - - ssh_known_hosts - - spacewalk_client - - logins - - figlet_motd - build-environment - - ldap - - node_exporter - - static-hostname-lookup - - cluster - - sshd - - resolver - - shared_storage - slurm_exporter - slurm-client - - regular-users - sudoers + - subgroup_directories + - {role fuse-layer, when: fuse_mountpoint is defined and fuse_mountpoint | length >= 1} -- name: 'E. Roles for compute nodes.' +- name: 'H. Roles for compute nodes.' hosts: compute-vm roles: - - admin-users - - ssh_host_signer - - ssh_known_hosts - - spacewalk_client - - logins - - figlet_motd - mount-volume - - ldap - - node_exporter - - static-hostname-lookup - - cluster - - sshd - - resolver - - shared_storage - slurm-client - - regular-users -- name: 'F. Roles for documentation servers.' +- name: 'I. Roles for documentation servers.' hosts: - docs roles: From 2c5c55281a110d52b0431f31afb0d272ef4dd463 Mon Sep 17 00:00:00 2001 From: pneerincx Date: Tue, 4 Aug 2020 15:34:33 +0200 Subject: [PATCH 09/30] Fixed typos / syntax issues. --- cluster.yml | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/cluster.yml b/cluster.yml index 5476e3487..bca70a2e0 100644 --- a/cluster.yml +++ b/cluster.yml @@ -86,10 +86,10 @@ hosts: - ldap-server roles: - - ldap-server - when: - - use_ldap | default(true, true) | bool - - create_ldap | default(false, true) | bool + - role: openldap + when: + - use_ldap | default(true, true) | bool + - create_ldap | default(false, true) | bool - name: 'D. Basic roles for all cluster machines part 2.' hosts: @@ -131,7 +131,8 @@ - slurm-client - sudoers - subgroup_directories - - {role fuse-layer, when: fuse_mountpoint is defined and fuse_mountpoint | length >= 1} + - role: fuse-layer + when: fuse_mountpoint is defined and fuse_mountpoint | length >= 1} - name: 'H. Roles for compute nodes.' hosts: compute-vm From 08597d23ee20fd1ca0ccfce7e924b275b77176a3 Mon Sep 17 00:00:00 2001 From: pneerincx Date: Tue, 4 Aug 2020 16:04:43 +0200 Subject: [PATCH 10/30] Fix: made spacewalk_client role idempotent. --- roles/spacewalk_client/tasks/main.yml | 13 +++++-------- 1 file changed, 5 insertions(+), 8 deletions(-) diff --git a/roles/spacewalk_client/tasks/main.yml b/roles/spacewalk_client/tasks/main.yml index 66d6ed097..7142e982b 100644 --- a/roles/spacewalk_client/tasks/main.yml +++ b/roles/spacewalk_client/tasks/main.yml @@ -51,22 +51,19 @@ become: true - name: 'Remove all (non-spacewalk) repo config files from /etc/yum.repos.d/.' - shell: 'rm -rf /etc/yum.repos.d/*' + shell: 'rm -rfv /etc/yum.repos.d/*' args: warn: false - become: true - -- name: 'Clear the yum cache.' - command: 'yum clean all' - args: - warn: false - ignore_errors: yes + register: deleted_items + changed_when: deleted_items.stdout | length >= 1 + failed_when: deleted_items.stderr | length >= 1 become: true - name: 'Upgrade all packages to version specified in spacewalk channel.' yum: name: '*' state: latest + update_cache: yes exclude: 'slurm*' # Specific Slurm versions configured in slurm-* roles, because we use a custom patched version. become: true ... From 6561ec6545b1e2a4579050c06b502f38466c5f5b Mon Sep 17 00:00:00 2001 From: pneerincx Date: Tue, 4 Aug 2020 19:23:03 +0200 Subject: [PATCH 11/30] Added dummy play at beginning to ping jumphost and establish a persistent SSH connection. --- cluster.yml | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/cluster.yml b/cluster.yml index bca70a2e0..598d9781c 100644 --- a/cluster.yml +++ b/cluster.yml @@ -30,6 +30,15 @@ # H. Compute nodes # I. Documentation server # + +# +# Dummy play to ping jumphosts and establish a persisting SSH connection +# before trying to connect to the machines behind the jumphost, +# which may otherwise fail when SSH connection multiplexing is used. +# +- name: 'Dummy play to ping jumphosts and establish a persistent SSH connection.' + hosts: jumphost + - name: 'Sanity checks before we start.' hosts: all pre_tasks: From f86d02de3d693775940ff5aa41fa584fddeffe0d Mon Sep 17 00:00:00 2001 From: pneerincx Date: Thu, 6 Aug 2020 18:01:03 +0200 Subject: [PATCH 12/30] Consistent use of ldap_* variable names, ldap_uri now includes the protocol to make it a real URI and remove hardcoded ldap:// or ldaps:// in various places, removed ldaps_uri variable, create private and primary groups for the home dirs of LDAP users just like for local users and fixed wrong local users/groups created on Hyperchicken that were conflicting with with LDAP entries. --- group_vars/boxy-cluster/vars.yml | 2 +- group_vars/fender-cluster/vars.yml | 3 +- group_vars/gearshift-cluster/vars.yml | 3 +- group_vars/hyperchicken-cluster/vars.yml | 9 +----- group_vars/marvin-cluster/vars.yml | 3 +- group_vars/nibbler-cluster/vars.yml | 3 +- group_vars/talos-cluster/vars.yml | 3 +- roles/ldap/defaults/main.yml | 3 +- roles/ldap/templates/ldap.conf | 2 +- roles/ldap/templates/nslcd.conf | 2 +- roles/openldap/templates/phpldapadmin.service | 2 +- .../tasks/ldap-regular-users.yml | 28 +++++++++++++------ .../tasks/create_subgroup_directories.yml | 4 +-- 13 files changed, 33 insertions(+), 34 deletions(-) diff --git a/group_vars/boxy-cluster/vars.yml b/group_vars/boxy-cluster/vars.yml index fd80cfa45..8216914ed 100644 --- a/group_vars/boxy-cluster/vars.yml +++ b/group_vars/boxy-cluster/vars.yml @@ -2,7 +2,7 @@ slurm_cluster_name: 'boxy' slurm_cluster_domain: 'hpc.rug.nl' stack_prefix: 'bx' -uri_ldap: 172.23.40.249 +ldap_uri: ldap://172.23.40.249 ldap_base: ou=umcg,o=asds ldap_binddn: cn=clusteradminumcg,o=asds regular_groups: diff --git a/group_vars/fender-cluster/vars.yml b/group_vars/fender-cluster/vars.yml index a63eac7b7..0ecc67af1 100644 --- a/group_vars/fender-cluster/vars.yml +++ b/group_vars/fender-cluster/vars.yml @@ -30,8 +30,7 @@ ui_ethernet_interfaces: ssh_host_signer_ca_private_key: "{{ ssh_host_signer_ca_keypair_dir }}/ca-key-production-ebi" use_ldap: yes create_ldap: yes -uri_ldap: fd-dai -uri_ldaps: fd-dai +ldap_uri: ldap://fd-dai ldap_port: 389 ldaps_port: 636 ldap_base: dc=hpc,dc=rug,dc=nl diff --git a/group_vars/gearshift-cluster/vars.yml b/group_vars/gearshift-cluster/vars.yml index 02870da60..ff6348a6d 100644 --- a/group_vars/gearshift-cluster/vars.yml +++ b/group_vars/gearshift-cluster/vars.yml @@ -59,8 +59,7 @@ ui_ethernet_interfaces: ssh_host_signer_ca_private_key: "{{ ssh_host_signer_ca_keypair_dir }}/umcg-hpc-ca" use_ldap: yes create_ldap: no -uri_ldap: '172.23.40.249' -uri_ldaps: 'comanage-in.id.rug.nl' +ldap_uri: 'ldap://172.23.40.249' ldap_port: '389' ldaps_port: '636' ldap_base: 'ou=research,o=asds' diff --git a/group_vars/hyperchicken-cluster/vars.yml b/group_vars/hyperchicken-cluster/vars.yml index 5deeac1b6..e39246d72 100644 --- a/group_vars/hyperchicken-cluster/vars.yml +++ b/group_vars/hyperchicken-cluster/vars.yml @@ -30,8 +30,7 @@ ui_ethernet_interfaces: ssh_host_signer_ca_private_key: "{{ ssh_host_signer_ca_keypair_dir }}/umcg-hpc-development-ca" use_ldap: yes create_ldap: yes -uri_ldap: hc-dai -uri_ldaps: hc-dai +ldap_uri: ldap://hc-dai ldap_port: 389 ldaps_port: 636 ldap_base: dc=hpc,dc=rug,dc=nl @@ -65,9 +64,6 @@ nameservers: [ local_admin_groups: - 'admin' - 'docker' - - 'solve-rd' - - 'umcg-atd' - - 'depad' local_admin_users: - 'centos' - 'egon' @@ -77,9 +73,6 @@ local_admin_users: - 'morris' - 'pieter' - 'wim' - - 'umcg-atd-dm' - - 'solve-rd-dm' - - 'envsync' envsync_user: 'envsync' envsync_group: 'depad' hpc_env_prefix: '/apps' diff --git a/group_vars/marvin-cluster/vars.yml b/group_vars/marvin-cluster/vars.yml index ac2f18689..bebf2a9fa 100644 --- a/group_vars/marvin-cluster/vars.yml +++ b/group_vars/marvin-cluster/vars.yml @@ -30,8 +30,7 @@ ui_ethernet_interfaces: ssh_host_signer_ca_private_key: "{{ ssh_host_signer_ca_keypair_dir }}/ca-key-production-ebi" use_ldap: yes create_ldap: yes -uri_ldap: mv-dai -uri_ldaps: mv-dai +ldap_uri: ldap://mv-dai ldap_port: 389 ldaps_port: 636 ldap_base: dc=ejp,dc=rd,dc=nl diff --git a/group_vars/nibbler-cluster/vars.yml b/group_vars/nibbler-cluster/vars.yml index 479229685..6420054e4 100644 --- a/group_vars/nibbler-cluster/vars.yml +++ b/group_vars/nibbler-cluster/vars.yml @@ -27,8 +27,7 @@ ui_ethernet_interfaces: ssh_host_signer_ca_private_key: "{{ ssh_host_signer_ca_keypair_dir }}/umcg-hpc-development-ca" use_ldap: yes create_ldap: no -uri_ldap: ldap.pilot.scz.lab.surf.nl -uri_ldaps: ldap.pilot.scz.lab.surf.nl +ldap_uri: ldap://ldap.pilot.scz.lab.surf.nl ldap_port: 636 ldaps_port: 636 ldap_base: o=ElixirNL,dc=pilot-clients,dc=scz,dc=lab,dc=surf,dc=nl diff --git a/group_vars/talos-cluster/vars.yml b/group_vars/talos-cluster/vars.yml index f4159f0fa..611126195 100644 --- a/group_vars/talos-cluster/vars.yml +++ b/group_vars/talos-cluster/vars.yml @@ -45,8 +45,7 @@ ui_ethernet_interfaces: ssh_host_signer_ca_private_key: "{{ ssh_host_signer_ca_keypair_dir }}/umcg-hpc-development-ca" use_ldap: yes create_ldap: no -uri_ldap: '172.23.40.249' -uri_ldaps: 'comanage-in.id.rug.nl' +ldap_uri: 'ldap://172.23.40.249' ldap_port: '389' ldaps_port: '636' ldap_base: 'ou=umcg,o=asds' diff --git a/roles/ldap/defaults/main.yml b/roles/ldap/defaults/main.yml index e5a9d4ff5..61e9ade5f 100644 --- a/roles/ldap/defaults/main.yml +++ b/roles/ldap/defaults/main.yml @@ -2,8 +2,7 @@ use_ldap: yes # needed for the sshd template ldap_port: 389 ldaps_port: 636 -uri_ldap: '' -uri_ldaps: '' +ldap_uri: '' ldap_base: '' ldap_binddn: '' ... diff --git a/roles/ldap/templates/ldap.conf b/roles/ldap/templates/ldap.conf index 7d7792f65..db68b379a 100644 --- a/roles/ldap/templates/ldap.conf +++ b/roles/ldap/templates/ldap.conf @@ -5,7 +5,7 @@ # See ldap.conf(5) for details. # -uri ldap://{{ uri_ldap }} +uri {{ ldap_uri }} base {{ ldap_base }} ssl no tls_cacertdir /etc/openldap/cacerts diff --git a/roles/ldap/templates/nslcd.conf b/roles/ldap/templates/nslcd.conf index 4045bdae0..34a38024c 100644 --- a/roles/ldap/templates/nslcd.conf +++ b/roles/ldap/templates/nslcd.conf @@ -2,7 +2,7 @@ uid nslcd gid ldap ssl no tls_cacertdir /etc/openldap/cacerts -uri ldap://{{ uri_ldap }} +uri {{ ldap_uri }} base {{ ldap_base }} {% if filter_passwd is defined %} filter passwd {{ filter_passwd }} diff --git a/roles/openldap/templates/phpldapadmin.service b/roles/openldap/templates/phpldapadmin.service index a55cb0877..1075e46a9 100644 --- a/roles/openldap/templates/phpldapadmin.service +++ b/roles/openldap/templates/phpldapadmin.service @@ -10,7 +10,7 @@ ExecStartPre=-/usr/bin/docker kill %n ExecStartPre=-/usr/bin/docker rm %n ExecStartPre=/usr/bin/docker pull osixia/phpldapadmin:0.7.2 ExecStart=/usr/bin/docker run -i --name %n --network host \ - --env PHPLDAPADMIN_LDAP_HOSTS={{ uri_ldap }} \ + --env PHPLDAPADMIN_LDAP_HOSTS={{ ldap_uri | regex_replace('^ldaps?://','')}} \ osixia/phpldapadmin:0.7.2 [Install] diff --git a/roles/regular-users/tasks/ldap-regular-users.yml b/roles/regular-users/tasks/ldap-regular-users.yml index bc0e28f32..616794999 100644 --- a/roles/regular-users/tasks/ldap-regular-users.yml +++ b/roles/regular-users/tasks/ldap-regular-users.yml @@ -5,7 +5,7 @@ objectClass: 'organizationalUnit' bind_dn: "{{ ldap_binddn }}" bind_pw: "{{ bindpw }}" - server_uri: 'ldap:///' + server_uri: "{{ ldap_uri }}" - name: 'Make sure we have a parent entry for groups.' ldap_entry: @@ -13,25 +13,37 @@ objectClass: 'organizationalUnit' bind_dn: "{{ ldap_binddn }}" bind_pw: "{{ bindpw }}" - server_uri: 'ldap:///' + server_uri: "{{ ldap_uri }}" -- name: 'Add groups to parent entry for groups.' +- name: 'Add regular groups to parent entry for groups.' ldap_entry: dn: "cn={{ item }},ou=groups,{{ ldap_base }}" objectClass: 'posixGroup' state: present - server_uri: 'ldap:///' + server_uri: "{{ ldap_uri }}" bind_dn: "{{ ldap_binddn }}" bind_pw: "{{ bindpw }}" attributes: gidNumber: "{{ auth_groups[item].gid }}" with_items: "{{ regular_groups }}" +- name: 'Add private groups for home dirs of users to parent entry for groups.' + ldap_entry: + dn: "cn={{ item.user }},ou=groups,{{ ldap_base }}" # Use same name as user's account name for user's private group. + objectClass: 'posixGroup' + state: present + server_uri: "{{ ldap_uri }}" + bind_dn: "{{ ldap_binddn }}" + bind_pw: "{{ bindpw }}" + attributes: + gidNumber: "{{ auth_users[item.user].uid }}" # Use same GID as user's UID for user's private group. + with_items: "{{ regular_users }}" + - name: 'Add users to parent entry for users.' ldap_entry: dn: "cn={{ item.user }},ou=users,{{ ldap_base }}" state: present - server_uri: 'ldap:///' + server_uri: "{{ ldap_uri }}" objectClass: - 'inetOrgPerson' - 'posixAccount' @@ -45,7 +57,7 @@ cn: "{{ item.user }}" uid: "{{ item.user }}" uidNumber: "{{ auth_users[item.user].uid }}" - gidNumber: "{{ auth_groups['users'].gid }}" + gidNumber: "{{ auth_users[item.user].uid }}" # primary group == private group for user. homeDirectory: "/home/{{ item.user }}" loginShell: '/bin/bash' sshPublicKey: "{{ auth_users[item.user].pub_keys }}" @@ -55,7 +67,7 @@ ldap_attr: dn: "cn={{ item.user }},ou=users,{{ ldap_base }}" state: exact - server_uri: 'ldap:///' + server_uri: "{{ ldap_uri }}" bind_dn: "{{ ldap_binddn }}" bind_pw: "{{ bindpw }}" name: 'sshPublicKey' @@ -66,7 +78,7 @@ ldap_attr: dn: "cn={{ item.1 }},ou=groups,{{ ldap_base }}" state: present - server_uri: 'ldap:///' + server_uri: "{{ ldap_uri }}" bind_dn: "{{ ldap_binddn }}" bind_pw: "{{ bindpw }}" name: 'memberUid' diff --git a/roles/subgroup_directories/tasks/create_subgroup_directories.yml b/roles/subgroup_directories/tasks/create_subgroup_directories.yml index f7a93e561..93e008f7d 100644 --- a/roles/subgroup_directories/tasks/create_subgroup_directories.yml +++ b/roles/subgroup_directories/tasks/create_subgroup_directories.yml @@ -2,7 +2,7 @@ - block: - name: "Get list of {{ group }} subgroups with version number from the LDAP." shell: | - ldapsearch -LLL -D '{{ ldap_binddn }}' -w '{{ bindpw }}' -b '{{ ldap_base }}' -H 'ldap://{{ uri_ldap }}' \ + ldapsearch -LLL -D '{{ ldap_binddn }}' -w '{{ bindpw }}' -b '{{ ldap_base }}' -H '{{ ldap_uri }}' \ "(ObjectClass=GroupofNames)" dn \ | tr "=," "\n" \ | fgrep "{{ group }}" \ @@ -16,7 +16,7 @@ - block: - name: "Get list of {{ group }} subgroups without version number and excluding *-dms groups from the LDAP." shell: | - ldapsearch -LLL -D '{{ ldap_binddn }}' -w '{{ bindpw }}' -b '{{ ldap_base }}' -H 'ldap://{{ uri_ldap }}' \ + ldapsearch -LLL -D '{{ ldap_binddn }}' -w '{{ bindpw }}' -b '{{ ldap_base }}' -H '{{ ldap_uri }}' \ "(ObjectClass=GroupofNames)" dn \ | tr "=," "\n" \ | fgrep "{{ group }}" \ From b158389c440ae95af700977c8eb10477de2c6cff Mon Sep 17 00:00:00 2001 From: pneerincx Date: Thu, 6 Aug 2020 18:01:58 +0200 Subject: [PATCH 13/30] Fixed permissions for config files created by openldap role. --- roles/openldap/tasks/main.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/roles/openldap/tasks/main.yml b/roles/openldap/tasks/main.yml index 55a060d97..54703f826 100644 --- a/roles/openldap/tasks/main.yml +++ b/roles/openldap/tasks/main.yml @@ -3,7 +3,7 @@ template: src: templates/{{ item }} dest: /etc/systemd/system/{{ item }} - mode: 644 + mode: 0600 owner: root group: root with_items: @@ -23,7 +23,7 @@ copy: src: files/01-overlay-memberof dest: /srv/openldap/custom/01-overlay-memberof - mode: 644 + mode: 0644 owner: root group: root become: true From ff1229dc4dd0f490c618819e5689c739164553cd Mon Sep 17 00:00:00 2001 From: pneerincx Date: Thu, 6 Aug 2020 18:23:36 +0200 Subject: [PATCH 14/30] Fixed syntax error. --- cluster.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cluster.yml b/cluster.yml index 598d9781c..671820649 100644 --- a/cluster.yml +++ b/cluster.yml @@ -141,7 +141,7 @@ - sudoers - subgroup_directories - role: fuse-layer - when: fuse_mountpoint is defined and fuse_mountpoint | length >= 1} + when: fuse_mountpoint is defined and fuse_mountpoint | length >= 1 - name: 'H. Roles for compute nodes.' hosts: compute-vm From 9d7062194d1290abbed63fe2a643d120237a3a7c Mon Sep 17 00:00:00 2001 From: pneerincx Date: Thu, 6 Aug 2020 18:46:56 +0200 Subject: [PATCH 15/30] Fixed some ansible linter errors. --- roles/fuse-layer/tasks/main.yml | 13 +++++++------ .../tasks/create_subgroup_directories.yml | 2 +- 2 files changed, 8 insertions(+), 7 deletions(-) diff --git a/roles/fuse-layer/tasks/main.yml b/roles/fuse-layer/tasks/main.yml index 4a3c1b4ab..4d2efe438 100644 --- a/roles/fuse-layer/tasks/main.yml +++ b/roles/fuse-layer/tasks/main.yml @@ -1,5 +1,5 @@ --- -- name: Install EGA Fuse Layer +- name: Install EGA Fuse client. yum: state: latest name: @@ -7,10 +7,11 @@ become: true notify: restart_fuser-layer -- file: +- name: Create /usr/local/fuse-layer directory. + file: path: /usr/local/fuse-layer state: directory - mode: 0755 + mode: '0755' owner: root group: root become: true @@ -20,7 +21,7 @@ template: src: templates/ega-fuse-client.service dest: /etc/systemd/system/ega-fuse-client.service - mode: 644 + mode: '0644' owner: root group: root tags: @@ -28,11 +29,11 @@ become: true notify: restart_fuser-layer -- name: Install config.ini files. +- name: Install config.ini files. template: src: templates/config.ini dest: /usr/local/fuse-layer/config.ini - mode: 644 + mode: '0644' owner: root group: root become: true diff --git a/roles/subgroup_directories/tasks/create_subgroup_directories.yml b/roles/subgroup_directories/tasks/create_subgroup_directories.yml index 93e008f7d..795d4bb48 100644 --- a/roles/subgroup_directories/tasks/create_subgroup_directories.yml +++ b/roles/subgroup_directories/tasks/create_subgroup_directories.yml @@ -56,7 +56,7 @@ when: versioned_subgroups_list | length > 0 become: true become_user: "{{ group }}-dm" - + - name: "Create directory structure for projects on {{ lfs }}." block: - name: "Create /groups/{{ group }}/{{ lfs }}/projects directory." From 751f53c9d1b758b78d5d02d477e8cd6053dbd021 Mon Sep 17 00:00:00 2001 From: pneerincx Date: Fri, 7 Aug 2020 15:51:53 +0200 Subject: [PATCH 16/30] Lowered allowed number of ansible-linter errors 18 -> 5 and disable check rule 701 related to Ansible Galaxy, which we do not use. --- .ansible-lint | 3 ++- .circleci/config.yml | 2 +- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/.ansible-lint b/.ansible-lint index 1caaf4bf7..2a69bc008 100644 --- a/.ansible-lint +++ b/.ansible-lint @@ -3,5 +3,6 @@ exclude_paths: - '~/.ansible' # Exclude external playbooks. skip_list: # We explicitly use latest combined with other tech to pin versions (e.g. Spacewalk). - - '403' # "Package installs should not use latest". + - '403' # "Package installs should not use latest." + - '701' # "No 'galaxy_info' found in meta/main.yml of a role." ... diff --git a/.circleci/config.yml b/.circleci/config.yml index 6b2af9681..0268484cb 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -29,5 +29,5 @@ jobs: echo '###############################################' printf 'Counted %d ansible-lint errors.' ${errors:-0} echo '###############################################' - if (( errors > 18 )); then /bin/false; fi + if (( errors > 5 )); then /bin/false; fi ... \ No newline at end of file From 16afbf2d9b448a6f773005c1505d79a02d7b3d25 Mon Sep 17 00:00:00 2001 From: pneerincx Date: Fri, 7 Aug 2020 15:52:32 +0200 Subject: [PATCH 17/30] Made build_lustre_client role idempotent. --- roles/cluster/tasks/build_lustre_client.yml | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/roles/cluster/tasks/build_lustre_client.yml b/roles/cluster/tasks/build_lustre_client.yml index dc3f75888..fbe7f6861 100644 --- a/roles/cluster/tasks/build_lustre_client.yml +++ b/roles/cluster/tasks/build_lustre_client.yml @@ -5,5 +5,7 @@ dest: '/tmp/lustre-client-dkms-2.11.0-1.el7.src.rpm' - name: 'Build the Lustre client.' - command: rpmbuild --rebuild --without servers /tmp/lustre-client-dkms-2.11.0-1.el7.src.rpm + command: + cmd: 'rpmbuild --rebuild --without servers /tmp/lustre-client-dkms-2.11.0-1.el7.src.rpm' + creates: '/tmp/lustre-client-dkms-2.11.0-1.el7.src.rpm.rebuild' ... \ No newline at end of file From 95e0a7822e44aff4e31c24be45072d56e929ee28 Mon Sep 17 00:00:00 2001 From: pneerincx Date: Fri, 7 Aug 2020 15:53:38 +0200 Subject: [PATCH 18/30] Fixed linter errors: added pipefail option to shell tasks. --- roles/cluster/tasks/main.yml | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/roles/cluster/tasks/main.yml b/roles/cluster/tasks/main.yml index c95e3066f..306a97603 100644 --- a/roles/cluster/tasks/main.yml +++ b/roles/cluster/tasks/main.yml @@ -50,8 +50,10 @@ become: true - name: Check if rsync >= 3.1.2 is installed on the managed hosts. - shell: | - rsync --version 2>&1 | head -n 1 | sed 's|^rsync *version *\([0-9\.]*\).*$|\1|' | tr -d '\n' + shell: + cmd: | + set -o pipefail + rsync --version 2>&1 | head -n 1 | sed 's|^rsync *version *\([0-9\.]*\).*$|\1|' | tr -d '\n' args: warn: no changed_when: false @@ -66,8 +68,10 @@ failed_when: 'rsync_version_managed_host is failed or (rsync_version_managed_host.stdout is version_compare("3.1.2", operator="<"))' - name: Check if rsync >= 3.1.2 is installed on the control host. - shell: | - rsync --version 2>&1 | head -n 1 | sed 's|^rsync *version *\([0-9\.]*\).*$|\1|' | tr -d '\n' + shell: + cmd: | + set -o pipefail + rsync --version 2>&1 | head -n 1 | sed 's|^rsync *version *\([0-9\.]*\).*$|\1|' | tr -d '\n' args: warn: no changed_when: false From e7cc0f965a47a3e6b32fff464d9a408507086ef6 Mon Sep 17 00:00:00 2001 From: pneerincx Date: Fri, 7 Aug 2020 15:54:15 +0200 Subject: [PATCH 19/30] Fixed linter error. --- roles/mariadb/tasks/main.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/roles/mariadb/tasks/main.yml b/roles/mariadb/tasks/main.yml index 14948b03d..5fd4ce7be 100644 --- a/roles/mariadb/tasks/main.yml +++ b/roles/mariadb/tasks/main.yml @@ -61,8 +61,8 @@ # This command will fail when the root password was set previously - name: 'Check if MariaDB/MySQL root password is set.' - shell: > - mysqladmin -u root status + command: + cmd: mysqladmin -u root status changed_when: false failed_when: false register: root_pwd_check From 99215c33a5538f91b018db223d8faabcef259000 Mon Sep 17 00:00:00 2001 From: pneerincx Date: Fri, 7 Aug 2020 15:54:56 +0200 Subject: [PATCH 20/30] Fixed various linter errors in openldap role. --- roles/openldap/tasks/main.yml | 32 ++++++++++++-------------------- 1 file changed, 12 insertions(+), 20 deletions(-) diff --git a/roles/openldap/tasks/main.yml b/roles/openldap/tasks/main.yml index 54703f826..11b46ee45 100644 --- a/roles/openldap/tasks/main.yml +++ b/roles/openldap/tasks/main.yml @@ -1,8 +1,8 @@ --- -- name: install service file. +- name: Install service files. template: - src: templates/{{ item }} - dest: /etc/systemd/system/{{ item }} + src: "templates/{{ item }}" + dest: "/etc/systemd/system/{{ item }}" mode: 0600 owner: root group: root @@ -11,41 +11,33 @@ - phpldapadmin.service become: true -- name: make directory for the openldap ldif volume. +- name: Make directory for the openldap ldif volume. file: - path: /srv/openldap/custom + path: "/srv/openldap/custom" state: directory owner: root group: root become: true -- name: install ldif file for "member of" +- name: Install ldif files for overlays. copy: - src: files/01-overlay-memberof - dest: /srv/openldap/custom/01-overlay-memberof + src: "files/{{ item }}" + dest: "/srv/openldap/custom/{{ item }}" mode: 0644 owner: root group: root + with_items: + - 01-overlay-memberof become: true -- name: Daemon reload (the inplicit doesn't work) - command: bash -c "systemctl daemon-reload" - become: true - -- name: make sure service is started +- name: Make sure services are started. systemd: name: "{{ item }}" state: started + enabled: yes daemon_reload: yes with_items: - openldap.service - phpldapadmin.service become: true - -- name: start service at boot. - command: systemctl reenable "{{ item }}" - with_items: - - openldap.service - - phpldapadmin.service - become: true ... From f55c1719774ef8d432e95f1ba8ebe0db2ced7278 Mon Sep 17 00:00:00 2001 From: pneerincx Date: Fri, 7 Aug 2020 15:55:48 +0200 Subject: [PATCH 21/30] Fixed various linter errors in slurm-management role. --- roles/slurm-management/tasks/main.yml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/roles/slurm-management/tasks/main.yml b/roles/slurm-management/tasks/main.yml index 24ea9e0a6..477f6f888 100644 --- a/roles/slurm-management/tasks/main.yml +++ b/roles/slurm-management/tasks/main.yml @@ -226,8 +226,8 @@ - meta: 'flush_handlers' - name: 'Execute Slurm DB initialization script on host running slurmdbd.' - shell: | - /etc/slurm/configure_slurm_accounting_db.bash + command: + cmd: '/etc/slurm/configure_slurm_accounting_db.bash' register: command_result retries: 3 delay: 5 @@ -260,7 +260,7 @@ tags: 'backup' become: true -- name: 'Create Slurm accounting DB backup now.' +- name: 'Create Slurm accounting DB backup now.' # noqa 301 shell: | mysqldump --all-databases \ -uroot -p{{ MYSQL_ROOT_PASSWORD }} \ From 811c8404c9f9fecb862ff7fe38e317e97e2ba970 Mon Sep 17 00:00:00 2001 From: pneerincx Date: Fri, 7 Aug 2020 15:56:44 +0200 Subject: [PATCH 22/30] Wrapped long line in spacewalk_client role. --- roles/spacewalk_client/tasks/main.yml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/roles/spacewalk_client/tasks/main.yml b/roles/spacewalk_client/tasks/main.yml index 7142e982b..ce374b7bd 100644 --- a/roles/spacewalk_client/tasks/main.yml +++ b/roles/spacewalk_client/tasks/main.yml @@ -1,7 +1,8 @@ --- - name: 'Install spacewalk client repo.' yum: - name: 'https://copr-be.cloud.fedoraproject.org/results/@spacewalkproject/spacewalk-2.8-client/epel-7-x86_64/00742644-spacewalk-repo/spacewalk-client-repo-2.8-11.el7.centos.noarch.rpm' + name: "https://copr-be.cloud.fedoraproject.org/results/@spacewalkproject/\ + spacewalk-2.8-client/epel-7-x86_64/00742644-spacewalk-repo/spacewalk-client-repo-2.8-11.el7.centos.noarch.rpm" state: present become: true From 0d23336eb0ba71d81477f218ec709986b7bdf6c3 Mon Sep 17 00:00:00 2001 From: pneerincx Date: Fri, 7 Aug 2020 16:03:22 +0200 Subject: [PATCH 23/30] Fixed linter error in ssh_host_signer role and added symlink in single-role-playbooks subdir, so role can find the keys for the CA key pairs for ssh-host-key-signing. --- roles/ssh_host_signer/tasks/main.yml | 13 ++++++++++--- single_role_playbooks/ssh-host-ca | 1 + 2 files changed, 11 insertions(+), 3 deletions(-) create mode 120000 single_role_playbooks/ssh-host-ca diff --git a/roles/ssh_host_signer/tasks/main.yml b/roles/ssh_host_signer/tasks/main.yml index d90af7623..f925aac43 100644 --- a/roles/ssh_host_signer/tasks/main.yml +++ b/roles/ssh_host_signer/tasks/main.yml @@ -38,7 +38,13 @@ delegate_to: localhost - name: 'Sign SSH keys.' - command: ssh-keygen -s {{ ssh_host_signer_ca_private_key | quote }} -P {{ ssh_host_signer_ca_private_key_pass | quote }} -I {{ ssh_host_signer_id | quote }} -h -n {{ ssh_host_signer_hostnames | quote }} "{{ temporary_directory.path }}/public_keys/{{ inventory_hostname | quote }}{{ item.path | quote }}.pub" + command: > + ssh-keygen -h + -s {{ ssh_host_signer_ca_private_key | quote }} + -P {{ ssh_host_signer_ca_private_key_pass | quote }} + -I {{ ssh_host_signer_id | quote }} + -n {{ ssh_host_signer_hostnames | quote }} + "{{ temporary_directory.path }}/public_keys/{{ inventory_hostname | quote }}{{ item.path | quote }}.pub" with_items: "{{ private_keys.files }}" changed_when: false delegate_to: localhost @@ -53,8 +59,9 @@ - name: 'Compare certificates.' shell: | - diff <(ssh-keygen -L -f {{ item.path | quote }} | tail -n +2) \ - <(ssh-keygen -L -f {{ temporary_directory.path | quote }}/existing_certificates/{{ inventory_hostname | quote }}{{ ssh_host_signer_key_directory }}/{{ item.path | basename | quote }} | tail -n +2) + set -o pipefail + diff <(ssh-keygen -L -f {{ item.path | quote }} | tail -n +2) \ + <(ssh-keygen -L -f {{ temporary_directory.path | quote }}/existing_certificates/{{ inventory_hostname | quote }}{{ ssh_host_signer_key_directory }}/{{ item.path | basename | quote }} | tail -n +2) args: executable: '/bin/bash' with_items: "{{ certificates.files }}" diff --git a/single_role_playbooks/ssh-host-ca b/single_role_playbooks/ssh-host-ca new file mode 120000 index 000000000..b858c2172 --- /dev/null +++ b/single_role_playbooks/ssh-host-ca @@ -0,0 +1 @@ +../ssh-host-ca/ \ No newline at end of file From c56809dfc255dfa95d545ab15f07a818758ec048 Mon Sep 17 00:00:00 2001 From: pneerincx Date: Fri, 7 Aug 2020 16:04:08 +0200 Subject: [PATCH 24/30] Fixed various linter errors in create_subgroup_directories role. --- .../tasks/create_subgroup_directories.yml | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/roles/subgroup_directories/tasks/create_subgroup_directories.yml b/roles/subgroup_directories/tasks/create_subgroup_directories.yml index 795d4bb48..2fdc66fc7 100644 --- a/roles/subgroup_directories/tasks/create_subgroup_directories.yml +++ b/roles/subgroup_directories/tasks/create_subgroup_directories.yml @@ -10,7 +10,7 @@ | grep -- "-v[0-9][0-9]*$" \ || true register: versioned_subgroups - - set_fact: + - set_fact: # noqa 502 versioned_subgroups_list: "{% if versioned_subgroups.stdout | length %}{{ versioned_subgroups.stdout.split('\n') | list }}{% endif %}" - block: @@ -24,7 +24,7 @@ | grep -v -- "-v[0-9][0-9]*$\|-dms$" \ || true register: unversioned_subgroups - - set_fact: + - set_fact: # noqa 502 unversioned_subgroups_list: "{% if unversioned_subgroups.stdout | length %}{{ unversioned_subgroups.stdout.split('\n') | list }}{% endif %}" - name: "Create directory structure for releases with version number on {{ lfs }}." @@ -46,7 +46,9 @@ with_items: "{{ versioned_subgroups_list }}" - name: "Create /groups/{{ group }}/{{ lfs }}/releases/${dataset}/${version} directory." file: - path: "/groups/{{ group }}/{{ lfs }}/releases/{{ item | regex_replace('^' + group + '-(.*)-(v[0-9][0-9]*)$', '\\1') }}/{{ item | regex_replace('^' + group + '-(.*)-(v[0-9][0-9]*)$', '\\2') }}" + path: "/groups/{{ group }}/{{ lfs }}/releases/\ + {{ item | regex_replace('^' + group + '-(.*)-(v[0-9][0-9]*)$', '\\1') }}/\ + {{ item | regex_replace('^' + group + '-(.*)-(v[0-9][0-9]*)$', '\\2') }}" owner: "{{ group }}-dm" group: "{% if item | length %}{{ item }}{% else %}{{ group }}{% endif %}" mode: "{{ mode_version }}" From 952ca6ed783600741492626c4878918e35a7fe0a Mon Sep 17 00:00:00 2001 From: pneerincx Date: Fri, 7 Aug 2020 16:05:59 +0200 Subject: [PATCH 25/30] Fixed linter errors, improved idempotency and resolved issue #302. --- roles/online_docs/handlers/main.yml | 18 +++++ roles/online_docs/tasks/main.yml | 81 ++++++++++--------- .../attachments/ssh-client-config.bash | 9 ++- .../templates/mkdocs/docs/logins-linux.md | 9 ++- 4 files changed, 71 insertions(+), 46 deletions(-) diff --git a/roles/online_docs/handlers/main.yml b/roles/online_docs/handlers/main.yml index 9085cf6df..6ff92d6ad 100644 --- a/roles/online_docs/handlers/main.yml +++ b/roles/online_docs/handlers/main.yml @@ -13,6 +13,24 @@ become: true listen: 'restart_httpd' +- name: 'Zip the scripted tools from the tmp subfolder and save the archives in the attachments subfolder.' + archive: + path: "/srv/mkdocs/{{ slurm_cluster_name }}/tmp/{{ item.path }}" + dest: "/srv/mkdocs/{{ slurm_cluster_name }}/docs/attachments/{{ item.dest }}" + format: 'zip' + owner: 'root' + group: 'root' + mode: '0640' + with_items: + - path: "ssh-client-config-for-{{ slurm_cluster_name }}.app" + dest: "ssh-client-config-for-{{ slurm_cluster_name }}-macos.zip" + - path: "ssh-client-config-for-{{ slurm_cluster_name }}.bash" + dest: "ssh-client-config-for-{{ slurm_cluster_name }}-linux.zip" + - path: 'mount-cluster-drives.app' + dest: 'mount-cluster-drives-macos.zip' + become: true + listen: 'zip_attachments' + - name: '(Re)build webpages.' shell: | cd /srv/mkdocs/{{ slurm_cluster_name }}/ diff --git a/roles/online_docs/tasks/main.yml b/roles/online_docs/tasks/main.yml index 8c5588998..9bc721b9c 100644 --- a/roles/online_docs/tasks/main.yml +++ b/roles/online_docs/tasks/main.yml @@ -3,8 +3,10 @@ # --- - name: 'Check if rsync >= 3.1.2 is installed on the control host.' - shell: | - rsync --version 2>&1 | head -n 1 | sed 's|^rsync *version *\([0-9\.]*\).*$|\1|' | tr -d '\n' + shell: + cmd: | + set -o pipefail + rsync --version 2>&1 | head -n 1 | sed 's|^rsync *version *\([0-9\.]*\).*$|\1|' | tr -d '\n' args: warn: no changed_when: false @@ -40,8 +42,10 @@ - "{{ groups['user-interface'] }}" - name: 'Get Slurm version from scontrol on UI.' - shell: | - scontrol version | head -n 1 | sed 's|^slurm *\([0-9\.]*\).*$|\1|' | tr -d '\n' + shell: + cmd: | + set -o pipefail + scontrol version | head -n 1 | sed 's|^slurm *\([0-9\.]*\).*$|\1|' | tr -d '\n' args: executable: '/bin/bash' warn: no @@ -59,10 +63,12 @@ # Modules based on Lua: Version 6.5.8 2016-09-03 13:41 -05:00 (CDT) # by Robert McLay mclay@tacc.utexas.edu # - shell: | - unset MODULEPATH - source ~/.modulesrc >/dev/null 2>&1 - lmod -v | sed '/^$/d' | sed 's|^.*Version \([0-9\.]*\).*$|\1|' | head -1 + shell: + cmd: | + set -o pipefail + unset MODULEPATH + source ~/.modulesrc >/dev/null 2>&1 + lmod -v | sed '/^$/d' | sed 's|^.*Version \([0-9\.]*\).*$|\1|' | head -1 args: executable: '/bin/bash' warn: no @@ -116,18 +122,24 @@ - name: 'Create base directories for MarkDown and HTML files.' file: - path: "{{ item }}" + path: "{{ item.path }}" state: 'directory' owner: 'root' group: 'root' - mode: '0755' + mode: "{{ item.mode }}" with_items: - - "/srv/mkdocs/{{ slurm_cluster_name }}/" - - "/srv/mkdocs/{{ slurm_cluster_name }}/tmp/" - - "/srv/mkdocs/{{ slurm_cluster_name }}/docs/" - - "/srv/mkdocs/{{ slurm_cluster_name }}/docs/attachments" - - "/var/www/html/{{ slurm_cluster_name }}/" - - "/var/www/html/{{ slurm_cluster_name }}/attachments/" + - path: "/srv/mkdocs/{{ slurm_cluster_name }}/" + mode: '0750' + - path: "/srv/mkdocs/{{ slurm_cluster_name }}/tmp/" + mode: '0750' + - path: "/srv/mkdocs/{{ slurm_cluster_name }}/docs/" + mode: '0750' + - path: "/srv/mkdocs/{{ slurm_cluster_name }}/docs/attachments" + mode: '0750' + - path: "/var/www/html/{{ slurm_cluster_name }}/" + mode: '0755' + - path: "/var/www/html/{{ slurm_cluster_name }}/attachments/" + mode: '0755' become: true - name: 'Create static files for index in document root.' @@ -167,7 +179,7 @@ state: 'directory' owner: 'root' group: 'root' - mode: '0700' + mode: '0750' with_filetree: "{{ playbook_dir }}/roles/online_docs/templates/mkdocs" when: item.state == 'directory' notify: @@ -201,7 +213,7 @@ - '--relative' - '--omit-dir-times' - '--omit-link-times' - - '--chmod=Du=rwx,Dgo-rwx,Fu=rw,Fgo-rwx' + - '--chmod=Du=rwx,Dg=rx,Do-rwx,Fu=rwX,Fug=rX,Fo-rwx' - '--perms' - '--force' with_items: @@ -223,7 +235,7 @@ - '--relative' - '--omit-dir-times' - '--omit-link-times' - - '--chmod=Dug=rwx,Do-rwx,Fug=rwX,Fo-rwx' + - '--chmod=Du=rwx,Dg=rx,Do-rwx,Fu=rwX,Fg=rX,Fo-rwx' - '--perms' - '--force' with_items: @@ -231,9 +243,11 @@ dest: "ssh-client-config-for-{{ slurm_cluster_name }}.app" - src: 'mount-cluster-drives.app' dest: 'mount-cluster-drives.app' + notify: + - 'zip_attachments' become: true -- name: 'Make sure ssh-client-config AppleScript app main script is executable.' +- name: 'Make sure main script of AppleScript apps are executable.' file: path: "/srv/mkdocs/{{ slurm_cluster_name }}/tmp/{{ item }}/Contents/MacOS/applet" state: 'file' @@ -243,6 +257,8 @@ with_items: - "ssh-client-config-for-{{ slurm_cluster_name }}.app" - 'mount-cluster-drives.app' + notify: + - 'zip_attachments' become: true - name: 'Create files for attachments based on templates.' @@ -259,30 +275,15 @@ # - src: 'ssh-client-config.bash' dest: "ssh-client-config-for-{{ slurm_cluster_name }}.app/Contents/MacOS/ssh-client-config.command" - mode: '0755' + mode: '0750' # # For Linux/Unix users: just generate a bare Bash script. # - src: 'ssh-client-config.bash' dest: "ssh-client-config-for-{{ slurm_cluster_name }}.bash" - mode: '0755' - become: true - -- name: 'Zip the scripted tools from the tmp subfolder and save the archives in the attachments subfolder.' - archive: - path: "/srv/mkdocs/{{ slurm_cluster_name }}/tmp/{{ item.path }}" - dest: "/srv/mkdocs/{{ slurm_cluster_name }}/docs/attachments/{{ item.dest }}" - format: 'zip' - owner: 'root' - group: 'root' - mode: '0644' - with_items: - - path: "ssh-client-config-for-{{ slurm_cluster_name }}.app" - dest: "ssh-client-config-for-{{ slurm_cluster_name }}-macos.zip" - - path: "ssh-client-config-for-{{ slurm_cluster_name }}.bash" - dest: "ssh-client-config-for-{{ slurm_cluster_name }}-linux.zip" - - path: 'mount-cluster-drives.app' - dest: 'mount-cluster-drives-macos.zip' + mode: '0750' + notify: + - 'zip_attachments' become: true - name: 'Create MarkDown files based on templates.' @@ -291,7 +292,7 @@ dest: "/srv/mkdocs/{{ slurm_cluster_name }}/{{ item.path }}" owner: 'root' group: 'root' - mode: '0600' + mode: '0640' with_filetree: "{{ playbook_dir }}/roles/online_docs/templates/mkdocs" # Exclude temporary *.html preview files, which are also exlcuded in .gitignore and should not be transferred. when: item.state == 'file' and '.md.html' not in item.path diff --git a/roles/online_docs/templates/attachments/ssh-client-config.bash b/roles/online_docs/templates/attachments/ssh-client-config.bash index a6d63b979..489fedf36 100755 --- a/roles/online_docs/templates/attachments/ssh-client-config.bash +++ b/roles/online_docs/templates/attachments/ssh-client-config.bash @@ -375,13 +375,16 @@ Host *+*+* # Double-hop SSH settings to connect via specific jumphosts. # Host {% for jumphost in groups['jumphost'] %}{{ jumphost | regex_replace('^' + ai_jumphost + '\\+','')}}+* {% endfor %}{% raw %}{% endraw %} - ProxyCommand ssh -x -q -i "${_private_key_file}" ${_user}@\$(echo %h | sed 's/+[^+]*$//'){% if slurm_cluster_domain | length %}.{{ slurm_cluster_domain }}{% endif %} -W \$(echo %h | sed 's/^[^+]*+//'):%p + User ${_user} + ProxyCommand ssh -x -q -i "${_private_key_file}" %r@\$(echo %h | sed 's/+[^+]*$//'){% if slurm_cluster_domain | length %}.{{ slurm_cluster_domain }}{% endif %} -W \$(echo %h | sed 's/^[^+]*+//'):%p # # Sometimes port 22 for the SSH protocol is blocked by firewalls; in that case you can try to use SSH on port 443 as fall-back. -# Do not use port 443 by default for SSH as it is officially assigned to HTTPS traffic and some firewalls will cause problems with SSH traffic over port 443. +# Do not use port 443 by default for SSH as it is officially assigned to HTTPS traffic +# and some firewalls will cause problems with SSH traffic over port 443. # Host {% for jumphost in groups['jumphost'] %}{{ jumphost | regex_replace('^' + ai_jumphost + '\\+','')}}443+* {% endfor %}{% raw %}{% endraw %} - ProxyCommand ssh -x -q -i "${_private_key_file}" ${_user}@\$(echo %h | sed 's/443+[^+]*$//'){% if slurm_cluster_domain | length %}.{{ slurm_cluster_domain }}{% endif %} -W \$(echo %h | sed 's/^[^+]*+//'):%p -p 443 + User ${_user} + ProxyCommand ssh -x -q -i "${_private_key_file}" %r@\$(echo %h | sed 's/443+[^+]*$//'){% if slurm_cluster_domain | length %}.{{ slurm_cluster_domain }}{% endif %} -W \$(echo %h | sed 's/^[^+]*+//'):%p -p 443 EOF } diff --git a/roles/online_docs/templates/mkdocs/docs/logins-linux.md b/roles/online_docs/templates/mkdocs/docs/logins-linux.md index a33ee2777..c01aa8858 100644 --- a/roles/online_docs/templates/mkdocs/docs/logins-linux.md +++ b/roles/online_docs/templates/mkdocs/docs/logins-linux.md @@ -125,13 +125,16 @@ Host *+*+* # Double-hop SSH settings to connect via Jumphosts{% if slurm_cluster_domain | length %}{{ slurm_cluster_domain }}{% endif %}. # Host {% for jumphost in groups['jumphost'] %}{{ jumphost | regex_replace('^' + ai_jumphost + '\\+','')}}+* {% endfor %}{% raw %}{% endraw %} - ProxyCommand ssh -x -q youraccount@$(echo %h | sed 's/+[^+]*$//'){% if slurm_cluster_domain | length %}.{{ slurm_cluster_domain }}{% endif %} -W $(echo %h | sed 's/^[^+]*+//'):%p + User youraccount + ProxyCommand ssh -x -q %r@$(echo %h | sed 's/+[^+]*$//'){% if slurm_cluster_domain | length %}.{{ slurm_cluster_domain }}{% endif %} -W $(echo %h | sed 's/^[^+]*+//'):%p # # Sometimes port 22 for the SSH protocol is blocked by firewalls; in that case you can try to use SSH on port 443 as fall-back. -# Do not use port 443 by default for SSH as it officially assigned to HTTPS traffic and some firewalls will cause problems when trying to route SSH over port 443. +# Do not use port 443 by default for SSH as it officially assigned to HTTPS traffic +# and some firewalls will cause problems when trying to route SSH over port 443. # Host {% for jumphost in groups['jumphost'] %}{{ jumphost | regex_replace('^' + ai_jumphost + '\\+','')}}443+* {% endfor %}{% raw %}{% endraw %} - ProxyCommand ssh -x -q youraccount@$(echo %h | sed 's/443+[^+]*$//'){% if slurm_cluster_domain | length %}.{{ slurm_cluster_domain }}{% endif %} -W $(echo %h | sed 's/^[^+]*+//'):%p -p 443 + User youraccount + ProxyCommand ssh -x -q %r@$(echo %h | sed 's/443+[^+]*$//'){% if slurm_cluster_domain | length %}.{{ slurm_cluster_domain }}{% endif %} -W $(echo %h | sed 's/^[^+]*+//'):%p -p 443 ``` ##### 5. Login via Jumphost From 96f2e3e8d7877efe4400d8650218a78ba7a8e707 Mon Sep 17 00:00:00 2001 From: pneerincx Date: Fri, 7 Aug 2020 16:54:23 +0200 Subject: [PATCH 26/30] Commented tasks only required for debugging. --- roles/shared_storage/tasks/main.yml | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/roles/shared_storage/tasks/main.yml b/roles/shared_storage/tasks/main.yml index 0c97ca10f..21178ac6d 100644 --- a/roles/shared_storage/tasks/main.yml +++ b/roles/shared_storage/tasks/main.yml @@ -4,15 +4,15 @@ ### List content of variables for debugging. ## # -- name: 'Show the content of the group_vars/[cluster]/vars.yml lfs_mounts variable for debugging purposes.' - debug: - msg: "{{ lfs_mounts }}" - -- name: 'Show the content of the group_vars/[cluster]/vars.yml pfs_mounts variable for debugging purposes.' - debug: - msg: "pfs: {{ item.pfs }} | source: {{ item.source }} | type: {{ item.type }} | rw_options: {{ item.rw_options }}." - with_items: "{{ pfs_mounts }}" - when: inventory_hostname in groups['sys-admin-interface'] +# - name: 'Show the content of the group_vars/[cluster]/vars.yml lfs_mounts variable for debugging purposes.' +# debug: +# msg: "{{ lfs_mounts }}" +# +# - name: 'Show the content of the group_vars/[cluster]/vars.yml pfs_mounts variable for debugging purposes.' +# debug: +# msg: "pfs: {{ item.pfs }} | source: {{ item.source }} | type: {{ item.type }} | rw_options: {{ item.rw_options }}." +# with_items: "{{ pfs_mounts }}" +# when: inventory_hostname in groups['sys-admin-interface'] # ## From c6ae22dd11f2a27f5bf311cbeeab59b0b771b72c Mon Sep 17 00:00:00 2001 From: pneerincx Date: Fri, 7 Aug 2020 16:54:49 +0200 Subject: [PATCH 27/30] Reduced allowed linter errors further 5 -> 2. --- .circleci/config.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.circleci/config.yml b/.circleci/config.yml index 0268484cb..fd6a67b75 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -29,5 +29,5 @@ jobs: echo '###############################################' printf 'Counted %d ansible-lint errors.' ${errors:-0} echo '###############################################' - if (( errors > 5 )); then /bin/false; fi + if (( errors > 2 )); then /bin/false; fi ... \ No newline at end of file From a9a8eba1cb8d09e637f76cd3b51f7603e6f028a5 Mon Sep 17 00:00:00 2001 From: pneerincx Date: Fri, 7 Aug 2020 17:10:23 +0200 Subject: [PATCH 28/30] Silly format fix. --- .circleci/config.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.circleci/config.yml b/.circleci/config.yml index fd6a67b75..848d152bf 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -27,7 +27,7 @@ jobs: cat lint_results errors=$(grep -c '^[0-9]* [A-Z].*' lint_results) echo '###############################################' - printf 'Counted %d ansible-lint errors.' ${errors:-0} + printf 'Counted %d ansible-lint errors.\n' ${errors:-0} echo '###############################################' if (( errors > 2 )); then /bin/false; fi ... \ No newline at end of file From 1f6719650252ff7afe580c1fc90e492c28c894f3 Mon Sep 17 00:00:00 2001 From: pneerincx Date: Fri, 7 Aug 2020 17:31:46 +0200 Subject: [PATCH 29/30] Fixed linter issue in ssh_host_signer role and reduced allowed linter errors 2 -> 1. --- .circleci/config.yml | 2 +- roles/ssh_host_signer/tasks/main.yml | 12 +++++++----- 2 files changed, 8 insertions(+), 6 deletions(-) diff --git a/.circleci/config.yml b/.circleci/config.yml index 848d152bf..042a4f17c 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -29,5 +29,5 @@ jobs: echo '###############################################' printf 'Counted %d ansible-lint errors.\n' ${errors:-0} echo '###############################################' - if (( errors > 2 )); then /bin/false; fi + if (( errors > 1 )); then /bin/false; fi ... \ No newline at end of file diff --git a/roles/ssh_host_signer/tasks/main.yml b/roles/ssh_host_signer/tasks/main.yml index f925aac43..d5cf8a1d8 100644 --- a/roles/ssh_host_signer/tasks/main.yml +++ b/roles/ssh_host_signer/tasks/main.yml @@ -37,7 +37,7 @@ mode: '0600' delegate_to: localhost -- name: 'Sign SSH keys.' +- name: 'Sign SSH host keys.' command: > ssh-keygen -h -s {{ ssh_host_signer_ca_private_key | quote }} @@ -50,27 +50,29 @@ delegate_to: localhost no_log: true -- name: 'Find certificates.' +- name: 'Find generated certificates (== signed host keys).' find: path: "{{ temporary_directory.path }}/public_keys/{{ inventory_hostname }}{{ ssh_host_signer_key_directory }}" pattern: 'ssh_host_*_key-cert.pub' delegate_to: localhost register: certificates -- name: 'Compare certificates.' +- name: 'Compare generated certificates to existing certificates from servers.' shell: | set -o pipefail diff <(ssh-keygen -L -f {{ item.path | quote }} | tail -n +2) \ - <(ssh-keygen -L -f {{ temporary_directory.path | quote }}/existing_certificates/{{ inventory_hostname | quote }}{{ ssh_host_signer_key_directory }}/{{ item.path | basename | quote }} | tail -n +2) + <(ssh-keygen -L -f {{ existing_certificates_dir }}/{{ item.path | basename | quote }} | tail -n +2) args: executable: '/bin/bash' + vars: + existing_certificates_dir: "{{ temporary_directory.path | quote }}/existing_certificates/{{ inventory_hostname | quote }}{{ ssh_host_signer_key_directory }}" with_items: "{{ certificates.files }}" changed_when: false failed_when: false delegate_to: localhost register: certificate_comparison -- name: 'Copy certificates back to server.' +- name: 'Copy certificates back to server if the generated ones are different from the existing ones.' copy: src: "{{ item.item.path }}" dest: "{{ ssh_host_signer_key_directory }}" From b5f37d3de31917529c4d7f6065fdc74621edbc07 Mon Sep 17 00:00:00 2001 From: pneerincx Date: Fri, 7 Aug 2020 17:38:04 +0200 Subject: [PATCH 30/30] Wrapped long line. --- roles/ssh_host_signer/tasks/main.yml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/roles/ssh_host_signer/tasks/main.yml b/roles/ssh_host_signer/tasks/main.yml index d5cf8a1d8..ae2a7fa56 100644 --- a/roles/ssh_host_signer/tasks/main.yml +++ b/roles/ssh_host_signer/tasks/main.yml @@ -65,7 +65,8 @@ args: executable: '/bin/bash' vars: - existing_certificates_dir: "{{ temporary_directory.path | quote }}/existing_certificates/{{ inventory_hostname | quote }}{{ ssh_host_signer_key_directory }}" + existing_certificates_dir: "{{ temporary_directory.path | quote }}/existing_certificates/\ + {{ inventory_hostname | quote }}{{ ssh_host_signer_key_directory }}" with_items: "{{ certificates.files }}" changed_when: false failed_when: false