From ccc93514e9c0ed29e73cf1c794f8ee5e4ccada9e Mon Sep 17 00:00:00 2001 From: Bruno Travouillon Date: Fri, 29 Sep 2023 17:03:42 -0400 Subject: [PATCH] Merge upstream and cherry-pick fixes (#3) * Added cluster creation to slurmdb install * Refactored order in which cluster is created * review order of restarting service should follow this order when restarting : - slurmdbd - slurmctld - slurmd * Fix syntax error in handlers * Fix typo in handler name (cherry picked from commit 712cf3278112ff97d6e40694d5172fd4701efc6d) * Remove key 'SlurmctldPidFile' from __slurmdbd_config_default (cherry picked from commit 936e64adf0a9d4bbc5641bf8e4471db73c9037cb) --------- Co-authored-by: slugger70 Co-authored-by: Christian IUGA Co-authored-by: Nate Coraor Co-authored-by: Nuwan Goonasekera <2070605+nuwang@users.noreply.github.com> Co-authored-by: Christopher Lilienthal Co-authored-by: cat-bro --- defaults/main.yml | 7 +++++-- handlers/main.yml | 33 +++++++++++++++------------------ tasks/main.yml | 4 ++++ tasks/slurmdbd_cluster.yml | 20 ++++++++++++++++++++ 4 files changed, 44 insertions(+), 20 deletions(-) create mode 100644 tasks/slurmdbd_cluster.yml diff --git a/defaults/main.yml b/defaults/main.yml index 2934b5a..9066924 100644 --- a/defaults/main.yml +++ b/defaults/main.yml @@ -17,6 +17,10 @@ slurmd_service_name: slurmd slurmctld_service_name: slurmctld slurmdbd_service_name: slurmdbd +#Cluster name for slurm config. This is required to correctly setup slurmdbd and attune it to the slurm config. +__slurm_cluster_name: cluster +__cluster_not_setup: true #Default value. Is modified if cluster already exists. + slurm_start_services: true @@ -48,7 +52,7 @@ __slurm_config_default: AuthType: auth/munge CryptoType: crypto/munge SlurmUser: "{{ __slurm_user_name }}" - ClusterName: cluster + ClusterName: "{{ __slurm_cluster_name }}" # default is proctrack/cgroup which is the best but also less than 100% chance of working e.g. in docker ProctrackType: proctrack/pgid # slurmctld options @@ -93,6 +97,5 @@ __slurmdbd_config_default: AuthType: auth/munge DbdPort: 6819 SlurmUser: "{{ __slurm_user_name }}" - SlurmctldPidFile: "{{ __slurm_run_dir ~ '/slurmdbd.pid' if __slurm_debian else omit }}" LogFile: "{{ __slurm_log_dir ~ '/slurmdbd.log' if __slurm_debian else omit }}" __slurmdbd_config_merged: "{{ __slurmdbd_config_default | combine(slurmdbd_config | default({})) }}" diff --git a/handlers/main.yml b/handlers/main.yml index 63897e0..0f6fc57 100644 --- a/handlers/main.yml +++ b/handlers/main.yml @@ -4,20 +4,20 @@ name: munge state: restarted -- name: Reload slurmd +- name: Reload slurmdbd ansible.builtin.service: - name: "{{ slurmd_service_name }}" + name: "{{ slurmdbd_service_name }}" state: reloaded - when: "slurm_start_services and ('slurmexechosts' in group_names or 'exec' in slurm_roles)" + when: "slurm_start_services and ('slurmdbdservers' in group_names or 'dbd' in slurm_roles)" -- name: Restart slurmd +- name: Restart slurmdbd ansible.builtin.systemd: - name: "{{ slurmd_service_name }}" + name: "{{ slurmdbd_service_name }}" state: restarted masked: no enabled: yes daemon_reload: yes - when: "slurm_start_services and ('slurmexechosts' in group_names or 'exec' in slurm_roles)" + when: "slurm_start_services and ('slurmservers' in group_names or 'controller' in slurm_roles)" - name: Reload slurmctld ansible.builtin.service: @@ -34,17 +34,14 @@ daemon_reload: yes when: "slurm_start_services and ('slurmservers' in group_names or 'controller' in slurm_roles)" -- name: Restart slurmdbd - ansible.builtin.systemd: - name: "{{ slurmdbd_service_name }}" - state: restarted - masked: no - enabled: yes - daemon_reload: yes - when: "slurm_start_services and ('slurmservers' in group_names or 'controller' in slurm_roles)" - -- name: Reload slurmdbd +- name: Reload slurmd ansible.builtin.service: - name: "{{ slurmdbd_service_name }}" + name: "{{ slurmd_service_name }}" state: reloaded - when: "slurm_start_services and ('slurmdbdservers' in group_names or 'dbd' in slurm_roles)" + when: "slurm_start_services and ('slurmexechosts' in group_names or 'exec' in slurm_roles)" + +- name: Restart slurmd + ansible.builtin.service: + name: "{{ slurmd_service_name }}" + state: restarted + when: "slurm_start_services and ('slurmexechosts' in group_names or 'exec' in slurm_roles)" diff --git a/tasks/main.yml b/tasks/main.yml index 05348b3..de8f0b3 100644 --- a/tasks/main.yml +++ b/tasks/main.yml @@ -43,3 +43,7 @@ enabled: true state: started when: "slurm_start_services and ('slurmexechosts' in group_names or 'exec' in slurm_roles)" + +- name: Setup cluster on slurmdb + include_tasks: slurmdbd_cluster.yml + when: "slurm_start_services and ('slurmdbdservers' in group_names or 'dbd' in slurm_roles)" diff --git a/tasks/slurmdbd_cluster.yml b/tasks/slurmdbd_cluster.yml new file mode 100644 index 0000000..66dda91 --- /dev/null +++ b/tasks/slurmdbd_cluster.yml @@ -0,0 +1,20 @@ +--- + +- name: Check for existence of cluster in db. + register: cluster_check + shell: "sacctmgr -n list cluster | cut -f 4 -d ' '" + become: yes + become_user: root + +- name: set cluster_check_boolean + set_fact: + __cluster_not_setup: false + when: cluster_check.stdout == "cluster" + +- name: Create the slurmdbd cluster + command: sacctmgr -i -n add cluster {{ __slurm_cluster_name }} + become: yes + become_user: root + notify: + - Reload slurmdbd + when: __cluster_not_setup