From 076ad682519ce9ab7bd9817b7d3d84d4878bb149 Mon Sep 17 00:00:00 2001 From: Timothy Middelkoop Date: Thu, 17 Oct 2024 09:58:03 -0500 Subject: [PATCH] Stateless Warewulf 4.5 on Rocky 9.4 with Slurm for x86_64 Recipe for using Warewulf 4 as a provisioner. Signed-off-by: Timothy Middelkoop Co-authored-by: GodloveD --- components/admin/docs/SPECS/docs.spec | 7 + docs/recipes/install/.gitignore | 1 + .../install/common/add_ww4_hosts_finalize.tex | 29 ++ .../install/common/add_ww4_hosts_intro.tex | 10 + .../install/common/add_ww4_hosts_slurm.tex | 11 + docs/recipes/install/common/bos.tex | 2 + .../finalize_warewulf4_provisioning.tex | 24 ++ .../install/common/import_ww4_files.tex | 17 + .../common/import_ww4_files_ib_centos.tex | 19 ++ .../install/common/import_ww4_files_slurm.tex | 17 + docs/recipes/install/common/inputs.tex | 1 + .../install_provisioning_warewulf4_intro.tex | 20 ++ docs/recipes/install/common/ohpc-doc.sty | 8 + .../recipes/install/common/reset_computes.tex | 2 +- docs/recipes/install/common/rocky_repos.tex | 2 +- .../warewulf4_add_to_compute_chroot_intro.tex | 24 ++ .../install/common/warewulf4_kargs_post.tex | 12 + .../common/warewulf4_mkchroot_rocky.tex | 34 ++ .../install/common/warewulf4_setup.tex | 5 + .../install/common/warewulf4_setup_centos.tex | 41 +++ .../common/warewulf4_slurm_test_job.tex | 122 +++++++ docs/recipes/install/parse_doc.pl | 14 +- .../rocky9/x86_64/warewulf4/slurm/.gitignore | 1 + .../rocky9/x86_64/warewulf4/slurm/Makefile | 24 ++ .../rocky9/x86_64/warewulf4/slurm/common | 1 + .../rocky9/x86_64/warewulf4/slurm/manifest | 1 + .../x86_64/warewulf4/slurm/manifest.tex | 1 + .../rocky9/x86_64/warewulf4/slurm/steps.tex | 302 ++++++++++++++++++ 28 files changed, 744 insertions(+), 8 deletions(-) create mode 100644 docs/recipes/install/common/add_ww4_hosts_finalize.tex create mode 100644 docs/recipes/install/common/add_ww4_hosts_intro.tex create mode 100644 docs/recipes/install/common/add_ww4_hosts_slurm.tex create mode 100644 docs/recipes/install/common/finalize_warewulf4_provisioning.tex create mode 100644 docs/recipes/install/common/import_ww4_files.tex create mode 100644 docs/recipes/install/common/import_ww4_files_ib_centos.tex create mode 100644 docs/recipes/install/common/import_ww4_files_slurm.tex create mode 100644 docs/recipes/install/common/install_provisioning_warewulf4_intro.tex create mode 100644 docs/recipes/install/common/warewulf4_add_to_compute_chroot_intro.tex create mode 100644 docs/recipes/install/common/warewulf4_kargs_post.tex create mode 100644 docs/recipes/install/common/warewulf4_mkchroot_rocky.tex create mode 100644 docs/recipes/install/common/warewulf4_setup.tex create mode 100644 docs/recipes/install/common/warewulf4_setup_centos.tex create mode 100644 docs/recipes/install/common/warewulf4_slurm_test_job.tex create mode 100644 docs/recipes/install/rocky9/x86_64/warewulf4/slurm/.gitignore create mode 100644 docs/recipes/install/rocky9/x86_64/warewulf4/slurm/Makefile create mode 120000 docs/recipes/install/rocky9/x86_64/warewulf4/slurm/common create mode 120000 docs/recipes/install/rocky9/x86_64/warewulf4/slurm/manifest create mode 120000 docs/recipes/install/rocky9/x86_64/warewulf4/slurm/manifest.tex create mode 100644 docs/recipes/install/rocky9/x86_64/warewulf4/slurm/steps.tex diff --git a/components/admin/docs/SPECS/docs.spec b/components/admin/docs/SPECS/docs.spec index 4e4bd0cb8f..71c77619df 100644 --- a/components/admin/docs/SPECS/docs.spec +++ b/components/admin/docs/SPECS/docs.spec @@ -94,6 +94,9 @@ from the OpenHPC software stack. #pushd docs/recipes/install/centos8/x86_64/warewulf/slurm #make ; %{parser} steps.tex > recipe.sh ; popd +pushd docs/recipes/install/rocky9/x86_64/warewulf4/slurm +make ; %{parser} steps.tex > recipe.sh ; popd + pushd docs/recipes/install/rocky9/x86_64/warewulf/slurm make ; %{parser} steps.tex > recipe.sh ; popd @@ -170,6 +173,10 @@ install -m 0644 -p docs/Release_Notes.txt %{buildroot}/%{OHPC_PUB}/doc/Release_N # x86_64 guides +%define lpath rocky9/x86_64/warewulf4/slurm +install -m 0644 -p -D docs/recipes/install/%{lpath}/steps.pdf %{buildroot}/%{OHPC_PUB}/doc/recipes/%{lpath}/Install_guide.pdf +install -m 0755 -p -D docs/recipes/install/%{lpath}/recipe.sh %{buildroot}/%{OHPC_PUB}/doc/recipes/%{lpath}/recipe.sh + %define lpath rocky9/x86_64/warewulf/slurm install -m 0644 -p -D docs/recipes/install/%{lpath}/steps.pdf %{buildroot}/%{OHPC_PUB}/doc/recipes/%{lpath}/Install_guide.pdf install -m 0755 -p -D docs/recipes/install/%{lpath}/recipe.sh %{buildroot}/%{OHPC_PUB}/doc/recipes/%{lpath}/recipe.sh diff --git a/docs/recipes/install/.gitignore b/docs/recipes/install/.gitignore index 1946db7dc8..30511377c3 100644 --- a/docs/recipes/install/.gitignore +++ b/docs/recipes/install/.gitignore @@ -8,3 +8,4 @@ steps.pdf vc.tex pkg-ohpc.chglog* +steps.synctex.gz diff --git a/docs/recipes/install/common/add_ww4_hosts_finalize.tex b/docs/recipes/install/common/add_ww4_hosts_finalize.tex new file mode 100644 index 0000000000..e72537e1f3 --- /dev/null +++ b/docs/recipes/install/common/add_ww4_hosts_finalize.tex @@ -0,0 +1,29 @@ +\iftoggleverb{isx86} +% ohpc_validation_newline +% ohpc_validation_comment Optionally, define IPoIB network settings (required if planning to mount Lustre over IB) +% ohpc_command if [[ ${enable_ipoib} -eq 1 ]];then +% ohpc_indent 5 +\begin{lstlisting}[language=bash,keywords={},upquote=true,basicstyle=\footnotesize\ttfamily] +# Optionally define IPoIB network settings (required if planning to mount Lustre/BeeGFS over IB) +[sms](*\#*) for ((i=0; i<$num_computes; i++)) ; do + wwctl node set --yes ${c_name[$i]} --netdev=ib0 --ipaddr=${c_ipoib[$i]} --netmask=${ipoib_netmask} +done +\end{lstlisting} +% ohpc_indent 0 +% ohpc_command fi +% ohpc_validation_newline +% end_ohpc_run +\fi + +Finally, we reconfigure build the overlays and update the Warewulf configuration. +It is necessary to rebuild the overlays whenever a overlay is modified. + +% begin_ohpc_run +\begin{lstlisting}[language=bash,keywords={},upquote=true,basicstyle=\footnotesize\ttfamily,literate={BOSVER}{\baseos{}}1] +# build the overlays for all the nodes +[sms](*\#*) wwctl overlay build + +# Update Warewulf configure +[sms](*\#*) wwctl configure --all +\end{lstlisting} +% end_ohpc_run diff --git a/docs/recipes/install/common/add_ww4_hosts_intro.tex b/docs/recipes/install/common/add_ww4_hosts_intro.tex new file mode 100644 index 0000000000..d706a8645a --- /dev/null +++ b/docs/recipes/install/common/add_ww4_hosts_intro.tex @@ -0,0 +1,10 @@ +%\iftoggle{isx86}{\clearpage} +% begin_ohpc_run +% ohpc_validation_comment Add hosts to cluster +\begin{lstlisting}[language=bash,keywords={},upquote=true,basicstyle=\footnotesize\ttfamily,literate={BOSVER}{\baseos{}}1] +[sms](*\#*) for ((i=0; i<$num_computes; i++)) ; do +wwctl node add --container=rocky-9.4 \ + --ipaddr=${c_ip[$i]} --hwaddr=${c_mac[$i]} --netmask=${internal_netmask} ${c_name[i]} +done +\end{lstlisting} +% end_ohpc_run diff --git a/docs/recipes/install/common/add_ww4_hosts_slurm.tex b/docs/recipes/install/common/add_ww4_hosts_slurm.tex new file mode 100644 index 0000000000..2ba546f778 --- /dev/null +++ b/docs/recipes/install/common/add_ww4_hosts_slurm.tex @@ -0,0 +1,11 @@ +Now that the nodes are defined, we can start munge and Slurm. This must be done +after the nodes are defined and the Warewulf configuration is updated. + +% begin_ohpc_run +% ohpc_validation_comment Enable and start munge and slurmctld (Cont.) +\begin{lstlisting}[language=bash,keywords={},upquote=true,basicstyle=\footnotesize\ttfamily,literate={BOSVER}{\baseos{}}1] +# Enable and start munge and slurmctld +[sms](*\#*) systemctl enable --now munge +[sms](*\#*) systemctl enable --now slurmctld +\end{lstlisting} +% end_ohpc_run diff --git a/docs/recipes/install/common/bos.tex b/docs/recipes/install/common/bos.tex index 736749fed8..75cc184ebc 100644 --- a/docs/recipes/install/common/bos.tex +++ b/docs/recipes/install/common/bos.tex @@ -5,6 +5,7 @@ master} host. Alternatively, if choosing to use a pre-installed server, please verify that it is provisioned with the required \baseOS{} distribution. \\ +\ifnottoggleverb{isWarewulf4} Prior to beginning the installation process of \OHPC{} components, several additional considerations are noted here for the SMS host configuration. First, the installation recipe herein assumes that @@ -15,6 +16,7 @@ \begin{lstlisting}[language=bash,keywords={}] [sms](*\#*) echo ${sms_ip} ${sms_name} >> /etc/hosts \end{lstlisting} +\fi While it is theoretically possible to enable SELinux on a cluster provisioned with \provisioner{}, diff --git a/docs/recipes/install/common/finalize_warewulf4_provisioning.tex b/docs/recipes/install/common/finalize_warewulf4_provisioning.tex new file mode 100644 index 0000000000..99f5f194c4 --- /dev/null +++ b/docs/recipes/install/common/finalize_warewulf4_provisioning.tex @@ -0,0 +1,24 @@ +\subsection{Finalizing provisioning configuration} \label{sec:assemble_bootstrap} + +\Warewulf{} provisions a node with an image then customizes it with overlays. +This section highlights creation of the node image and overlays, followed by the +registration of desired compute nodes. + +\subsubsection{Build container image and overlays} + +The bootstrap image includes the runtime kernel and associated modules, as well +as some simple scripts to complete the provisioning process. + +% begin_ohpc_run +% ohpc_comment_header Assemble bootstrap image \ref{sec:assemble_bootstrap} +\begin{lstlisting}[language=bash,literate={-}{-}1,keywords={},upquote=true,literate={BOSVER}{\baseos{}}1] +# Build image +[sms](*\#*) wwctl container build BOSVER +[sms](*\#*) wwctl overlay build +\end{lstlisting} +% end_ohpc_run + +\subsubsection{Register nodes for provisioning} + +Nodes can be registered for provisioning using the following syntax. + diff --git a/docs/recipes/install/common/import_ww4_files.tex b/docs/recipes/install/common/import_ww4_files.tex new file mode 100644 index 0000000000..b9d042a817 --- /dev/null +++ b/docs/recipes/install/common/import_ww4_files.tex @@ -0,0 +1,17 @@ +The \Warewulf{} system includes functionality to import arbitrary files from +the provisioning server for distribution to managed hosts through a system +called "overlays". Some files, like \texttt{/etc/passwd}, and \texttt{/etc/hosts} +handled in this way by default. Here we add directories and files to the +\texttt{generic} overlay that is applied to all nodes. + +% begin_ohpc_run +% ohpc_comment_header Import files \ref{sec:file_import} +\begin{lstlisting}[language=bash,literate={-}{-}1,keywords={},upquote=true] +# Add the following to support unprivileged user namespaces for tools like Apptainer +[sms](*\#*) wwctl overlay import generic /etc/subuid +[sms](*\#*) wwctl overlay import generic /etc/subgid + +# Identify master host as local NTP server +[sms](*\#*) echo "server ${sms_ip} iburst" | wwctl overlay import generic <(cat) /etc/chrony.conf +\end{lstlisting} +% \end_ohpc_run diff --git a/docs/recipes/install/common/import_ww4_files_ib_centos.tex b/docs/recipes/install/common/import_ww4_files_ib_centos.tex new file mode 100644 index 0000000000..7a092e9b71 --- /dev/null +++ b/docs/recipes/install/common/import_ww4_files_ib_centos.tex @@ -0,0 +1,19 @@ +%\iftoggle{isCentOS}{\clearpage} + +\noindent Finally, to add {\em optional} support for controlling IPoIB +interfaces (see \S\ref{sec:add_ofed}), \OHPC{} includes a +template file for \Warewulf{} that can optionally be imported and used later to provision +\texttt{ib0} network settings. + +% begin_ohpc_run +% ohpc_validation_newline +% ohpc_command if [[ ${enable_ipoib} -eq 1 ]];then +% ohpc_indent 5 +\begin{lstlisting}[language=bash,literate={-}{-}1,keywords={},upquote=true] +[sms](*\#*) wwctl overlay mkdir generic /etc/sysconfig/network-scripts/ +[sms](*\#*) wwctl overlay import generic /opt/ohpc/pub/examples/network/centos/ifcfg-ib0.ww \ + /etc/sysconfig/network-scripts/ifcfg-ib0.ww +\end{lstlisting} +% ohpc_indent 0 +% ohpc_command fi +% \end_ohpc_run diff --git a/docs/recipes/install/common/import_ww4_files_slurm.tex b/docs/recipes/install/common/import_ww4_files_slurm.tex new file mode 100644 index 0000000000..95fb469c8d --- /dev/null +++ b/docs/recipes/install/common/import_ww4_files_slurm.tex @@ -0,0 +1,17 @@ +\noindent Similarly, we can configure Slurm and import the cryptographic +key that is required by the {\em munge} authentication library to be available +on every host in the resource management pool, issue the following: + +% begin_ohpc_run +\begin{lstlisting}[language=bash,literate={-}{-}1,keywords={},upquote=true] +# Configure Slurm server in the overlay (using "configless" option) +[sms](*\#*) wwctl overlay mkdir generic /etc/sysconfig/ +[sms](*\#*) wwctl overlay import generic <(echo SLURMD_OPTIONS="--conf-server ${sms_ip}") /etc/sysconfig/slurmd + +# Configure munge +[sms](*\#*) wwctl overlay mkdir generic --mode 0700 /etc/munge +[sms](*\#*) wwctl overlay import generic /etc/munge/munge.key +[sms](*\#*) wwctl overlay chown generic /etc/munge/munge.key $(id -u munge) $(id -g munge) +[sms](*\#*) wwctl overlay chown generic /etc/munge $(id -u munge) $(id -g munge) +\end{lstlisting} +% \end_ohpc_run diff --git a/docs/recipes/install/common/inputs.tex b/docs/recipes/install/common/inputs.tex index 2f7aa9d9d5..c7c968d1cf 100644 --- a/docs/recipes/install/common/inputs.tex +++ b/docs/recipes/install/common/inputs.tex @@ -20,6 +20,7 @@ \subsection{Inputs} \label{sec:inputs} \iftoggleverb{isWarewulf} & \texttt{\$\{eth\_provision\}} & {\small \# Provisioning interface for computes} \\ \fi +& \texttt{\$\{internal\_network\}} & {\small \# Subnet network address for internal network} \\ & \texttt{\$\{internal\_netmask\}} & {\small \# Subnet netmask for internal network} \\ & \texttt{\$\{ntp\_server\}} & {\small \# Local ntp server for time synchronization} \\ & \texttt{\$\{bmc\_username\}} & {\small \# BMC username for use by IPMI} \\ diff --git a/docs/recipes/install/common/install_provisioning_warewulf4_intro.tex b/docs/recipes/install/common/install_provisioning_warewulf4_intro.tex new file mode 100644 index 0000000000..bb16fc16b2 --- /dev/null +++ b/docs/recipes/install/common/install_provisioning_warewulf4_intro.tex @@ -0,0 +1,20 @@ +With the \OHPC{} repository enabled, we can now begin adding desired components onto the +{\em master} server. This repository provides a number of aliases that group +logical components together in order to help aid in this process. For +reference, a complete list of available group aliases and RPM packages available +via \OHPC{} are provided in Appendix~\ref{appendix:manifest}. To add +support for provisioning services, the following command adds a common base +package followed along with the Warewulf provisioning system. Then the main +Warewulf configuration file is edited to reflect the environment. + +%\nottoggle{isCentOS}{\clearpage} + +% begin_ohpc_run +% ohpc_comment_header Add baseline OpenHPC and provisioning services \ref{sec:add_provisioning} +\begin{lstlisting}[language=bash,keywords={}] +# Install base packages +[sms](*\#*) (*\install*) ohpc-base warewulf-ohpc hwloc-ohpc netmask +\end{lstlisting} +% end_ohpc_run + + diff --git a/docs/recipes/install/common/ohpc-doc.sty b/docs/recipes/install/common/ohpc-doc.sty index 4ddc189894..5283a3a609 100644 --- a/docs/recipes/install/common/ohpc-doc.sty +++ b/docs/recipes/install/common/ohpc-doc.sty @@ -57,6 +57,8 @@ \newtoggle{isaarch} \newtoggle{ispbs} \newtoggle{isWarewulf} +\newtoggle{isWarewulf3} +\newtoggle{isWarewulf4} \newtoggle{isSLURM} \newtoggle{isxCAT} \newtoggle{isxCATstateful} @@ -76,6 +78,12 @@ {\csname etb@tgl@#1\endcsname\iftrue\iffalse} {\etb@noglobal\etb@err@notoggle{#1}\iffalse}% } +% inverse of above +\newcommand{\ifnottoggleverb}[1]{% + \ifcsdef{etb@tgl@#1} + {\csname etb@tgl@#1\endcsname\iffalse\iftrue} + {\etb@noglobal\etb@err@notoggle{#1}\iftrue}% +} \pagestyle{fancy} \setlength\headheight{59pt} diff --git a/docs/recipes/install/common/reset_computes.tex b/docs/recipes/install/common/reset_computes.tex index 593bf610e8..b0f89af05e 100644 --- a/docs/recipes/install/common/reset_computes.tex +++ b/docs/recipes/install/common/reset_computes.tex @@ -32,7 +32,7 @@ c4 05:03am up 0:02, 0 users, load average: 0.15, 0.12, 0.05 \end{lstlisting} -\iftoggleverb{isWarewulf} +\iftoggleverb{isWarewulf3} \begin{center} \begin{tcolorbox}[] \small While the \texttt{pxelinux.0} and \texttt{lpxelinux.0} files that ship diff --git a/docs/recipes/install/common/rocky_repos.tex b/docs/recipes/install/common/rocky_repos.tex index c36bb4df54..565fb38502 100644 --- a/docs/recipes/install/common/rocky_repos.tex +++ b/docs/recipes/install/common/rocky_repos.tex @@ -20,6 +20,6 @@ disabled in a standard install, but can be enabled from EPEL as follows: \begin{lstlisting}[language=bash,literate={-}{-}1,keywords={},upquote=true] -[sms](*\#*) dnf install dnf-plugins-core +[sms](*\#*) dnf -y install dnf-plugins-core [sms](*\#*) dnf config-manager --set-enabled crb \end{lstlisting} diff --git a/docs/recipes/install/common/warewulf4_add_to_compute_chroot_intro.tex b/docs/recipes/install/common/warewulf4_add_to_compute_chroot_intro.tex new file mode 100644 index 0000000000..8c251d34ad --- /dev/null +++ b/docs/recipes/install/common/warewulf4_add_to_compute_chroot_intro.tex @@ -0,0 +1,24 @@ +The process used in the previous step is designed to +provide a minimal \baseOS{} configuration. Next, we add additional components +to include resource management client services, NTP support, and +other additional packages to support the default \OHPC{} environment. This +process modifies the base provisioning image and will access the BOS and \OHPC{} +repositories to resolve package install requests. We begin by installing a few +common base packages: + +% begin_ohpc_run +% ohpc_comment_header Add OpenHPC base components to compute image \ref{sec:add_components} +\begin{lstlisting}[language=bash,literate={-}{-}1,keywords={},upquote=true,literate={BOSVER}{\baseos{}}1] +# Install compute node base meta-package +[sms](*\#*) wwctl container exec rocky-9.4 /bin/bash <<- EOF + dnf -y install ohpc-base-compute +EOF +\end{lstlisting} +% end_ohpc_run + +\noindent Now, we can include additional required components to the compute +instance including resource manager client, NTP, and development environment modules support. + +Adding packages can be done by entering the image with \texttt{wwctl container shell}, +\texttt{wwctl container exec}, or using a CHROOT. + diff --git a/docs/recipes/install/common/warewulf4_kargs_post.tex b/docs/recipes/install/common/warewulf4_kargs_post.tex new file mode 100644 index 0000000000..48126342c6 --- /dev/null +++ b/docs/recipes/install/common/warewulf4_kargs_post.tex @@ -0,0 +1,12 @@ +\noindent If any components have added to the boot time kernel command line arguments for the compute nodes, +the following command is required to store the configuration in Warewulf: +% begin_ohpc_run +% ohpc_validation_newline +% ohpc_validation_comment Optionally, add arguments to bootstrap kernel +% ohpc_command if [[ ${enable_kargs} -eq 1 ]]; then +\begin{lstlisting}[language=bash,keywords={},upquote=true,basicstyle=\footnotesize\ttfamily] +# Set optional compute node kernel command line arguments. +[sms](*\#*) wwctl node set --yes --kernelargs="${kargs}" "${compute_regex}" +\end{lstlisting} +% ohpc_command fi +% end_ohpc_run diff --git a/docs/recipes/install/common/warewulf4_mkchroot_rocky.tex b/docs/recipes/install/common/warewulf4_mkchroot_rocky.tex new file mode 100644 index 0000000000..17c0688b17 --- /dev/null +++ b/docs/recipes/install/common/warewulf4_mkchroot_rocky.tex @@ -0,0 +1,34 @@ +With the provisioning services enabled, the next step is to define and +customize a system image that can subsequently be used to provision one or more +{\em compute} nodes. The following subsections highlight this process. + +\subsubsection{Build initial BOS image} \label{sec:assemble_bos} +\Warewulf{} 4 supports using container images as the base file system for +provisioning, and it can import these images directly from an OCI registry like +Docker Hub. Container images must be created especially for use with \Warewulf{} +since they need to include things like a kernel and an init system. In this +example we will import our base image from a set maintained by the \Warewulf{} +community on the GitHub container registry. + +The \texttt{wwctl container exec} command runs the commands below it, these commands +also be run interactively one a time with the command \texttt{wwctl container +shell \baseos{}}. You can add \texttt{/bin/false} as the last command to prevent +the image from rebuilding (it will show an error) and rebuild later with the +`wwctl container build` command. + +% begin_ohpc_run +% ohpc_comment_header Create compute image for Warewulf \ref{sec:assemble_bos} +\begin{lstlisting}[language=bash,literate={-}{-}1,keywords={},upquote=true,keepspaces,literate={BOSVER}{\baseos{}}1] +# Import the base image from ghcr +[sms](*\#*) wwctl container import docker://ghcr.io/warewulf/warewulf-rockylinux:9 BOSVER --syncuser + +# Enable OpenHPC inside image and update container +[sms](*\#*) wwctl container exec rocky-9.4 /bin/bash <<- EOF + dnf -y install http://repos.openhpc.community/OpenHPC/3/EL_9/x86_64/ohpc-release-3-1.el9.x86_64.rpm + dnf -y update +EOF + +# Define chroot location +[sms](*\#*) export CHROOT=/srv/warewulf/chroots/BOSVER/rootfs +\end{lstlisting} +% end_ohpc_run diff --git a/docs/recipes/install/common/warewulf4_setup.tex b/docs/recipes/install/common/warewulf4_setup.tex new file mode 100644 index 0000000000..05930d0b07 --- /dev/null +++ b/docs/recipes/install/common/warewulf4_setup.tex @@ -0,0 +1,5 @@ +At this point, all of the packages necessary to use \Warewulf{} on the {\em +master} host should be installed. Next, we need to update the configuration +to allow \Warewulf{} to work with \baseOS{}, update the hosts file, and to +support local provisioning using a second private interface (refer to +Figure~\ref{fig:physical_arch}). diff --git a/docs/recipes/install/common/warewulf4_setup_centos.tex b/docs/recipes/install/common/warewulf4_setup_centos.tex new file mode 100644 index 0000000000..f8ae30869e --- /dev/null +++ b/docs/recipes/install/common/warewulf4_setup_centos.tex @@ -0,0 +1,41 @@ +% begin_ohpc_run +% ohpc_comment_header Complete basic Warewulf setup for master node \ref{sec:setup_ww} +\begin{lstlisting}[language=bash,literate={-}{-}1,keywords={},upquote=true,keepspaces] +# Enable internal interface for provisioning +[sms](*\#*) ip link set dev ${sms_eth_internal} up +[sms](*\#*) ip address add ${sms_ip}/${internal_netmask} broadcast + dev ${sms_eth_internal} + +# Compute the network address for the internal network +[sms](*\#*) internal_cidr=$(netmask ${sms_ip}/${internal_netmask}) +[sms](*\#*) internal_network=${internal_cidr%/*} + +# Edit the warewulf.conf file to use appropriate interface and settings +[sms](*\#*) perl -pi -e "s/ipaddr:.*/ipaddr: ${sms_ip}/" /etc/warewulf/warewulf.conf +[sms](*\#*) perl -pi -e "s/netmask:.*/netmask: ${internal_netmask}/" /etc/warewulf/warewulf.conf +[sms](*\#*) perl -pi -e "s/network:.*/network: ${internal_network}/" /etc/warewulf/warewulf.conf +[sms](*\#*) perl -pi -e 's/template:.*/template: static/' /etc/warewulf/warewulf.conf +[sms](*\#*) perl -pi -e "s/range start:.*/range start: ${c_ip[0]}/" /etc/warewulf/warewulf.conf +[sms](*\#*) perl -pi -e "s/range end:.*/range end: ${c_ip[$((num_computes-1))]}/" /etc/warewulf/warewulf.conf +[sms](*\#*) perl -pi -e "s/mount: false/mount: true/" /etc/warewulf/warewulf.conf + +# Configure /etc/hostname on master and compute nodes +[sms](*\#*) perl -pi -e "s/warewulf/${sms_name}/" /srv/warewulf/overlays/host/rootfs/etc/hosts.ww +[sms](*\#*) perl -pi -e "s/warewulf/${sms_name}/" /srv/warewulf/overlays/generic/rootfs/etc/hosts.ww + +# Bugfix: dhcpd.template does not set next-server +[sms](*\#*) echo "next-server ${sms_ip};" >> /srv/warewulf/overlays/host/rootfs/etc/dhcpd.conf.ww + +# Configuring Warewulf will restart/enable relevant services to support provisioning +[sms](*\#*) systemctl enable --now warewulfd +[sms](*\#*) wwctl configure --all + +# Generate ssh keys (usually generated on login) +[sms](*\#*) bash /etc/profile.d/ssh_setup.sh +\end{lstlisting} +% end_ohpc_run + +% begin_ohpc_run +% ohpc_validation_newline +% ohpc_validation_comment Update /etc/hosts template to have ${hostname}.localdomain as the first host entry +% ohpc_command sed -e 's_\({{$node.Id.Get}}{{end}}\)_{{$node.Id.Get}}.localdomain \1_g' -i /srv/warewulf/overlays/host/rootfs/etc/hosts.ww +% end_ohpc_run diff --git a/docs/recipes/install/common/warewulf4_slurm_test_job.tex b/docs/recipes/install/common/warewulf4_slurm_test_job.tex new file mode 100644 index 0000000000..230952f731 --- /dev/null +++ b/docs/recipes/install/common/warewulf4_slurm_test_job.tex @@ -0,0 +1,122 @@ +With the resource manager enabled for production usage, users should now be +able to run jobs. To demonstrate this, we will add a ``test'' user on the {\em master} +host that can be used to run an example job. + +% begin_ohpc_run +\begin{lstlisting}[language=bash,keywords={}] +[sms](*\#*) useradd -m test +\end{lstlisting} +% end_ohpc_run + +\Warewulf{} installs a utility on the compute nodes to automatically +synchronize overlay files from the provisioning server at one minute intervals. +To rebuild the overlay, run the following: + +% begin_ohpc_run +\begin{lstlisting}[language=bash,keywords={}] +[sms](*\#*) wwctl overlay build +\end{lstlisting} +% end_ohpc_run + +% begin_ohpc_run +% ohpc_command sleep 90 +% end_ohpc_run + +After re-syncing to notify Warewulf of file modifications made on the {\em +master} host, it should take approximately one minute for the changes to +propagate. + +\input{common/prun} + +%\iftoggle{isSLES_ww_slurm_x86}{\clearpage} +%\iftoggle{isCentOS_ww_slurm_x86}{\clearpage} + + +\subsection{Interactive execution} +To use the newly created ``test'' account to compile and execute the +application {\em interactively} through the resource manager, execute the +following (note the use of \texttt{prun} for parallel job launch which summarizes +the underlying native job launch mechanism being used): + +\begin{lstlisting}[language=bash,keywords={}] +# Switch to "test" user +[sms](*\#*) su - test + +# Compile MPI "hello world" example +[test@sms ~]$ mpicc -O3 /opt/ohpc/pub/examples/mpi/hello.c + +# Submit interactive job request and use prun to launch executable +[test@sms ~]$ salloc -n 8 -N 2 + +[test@c1 ~]$ prun ./a.out + +[prun] Master compute host = c1 +[prun] Resource manager = slurm +[prun] Launch cmd = mpiexec.hydra -bootstrap slurm ./a.out + + Hello, world (8 procs total) + --> Process # 0 of 8 is alive. -> c1 + --> Process # 4 of 8 is alive. -> c2 + --> Process # 1 of 8 is alive. -> c1 + --> Process # 5 of 8 is alive. -> c2 + --> Process # 2 of 8 is alive. -> c1 + --> Process # 6 of 8 is alive. -> c2 + --> Process # 3 of 8 is alive. -> c1 + --> Process # 7 of 8 is alive. -> c2 +\end{lstlisting} + +\begin{center} +\begin{tcolorbox}[] +The following table provides approximate command equivalences between SLURM and +OpenPBS: + +\vspace*{0.15cm} +\input common/rms_equivalence_table +\end{tcolorbox} +\end{center} +\nottoggle{isCentOS}{\clearpage} + +\iftoggle{isCentOS}{\clearpage} + +\subsection{Batch execution} + +For batch execution, \OHPC{} provides a simple job script for reference (also +housed in the \path{/opt/ohpc/pub/examples} directory. This example script can +be used as a starting point for submitting batch jobs to the resource manager +and the example below illustrates use of the script to submit a batch job for +execution using the same executable referenced in the previous interactive example. + +\begin{lstlisting}[language=bash,keywords={}] +# Copy example job script +[test@sms ~]$ cp /opt/ohpc/pub/examples/slurm/job.mpi . + +# Examine contents (and edit to set desired job sizing characteristics) +[test@sms ~]$ cat job.mpi +#!/bin/bash + +#SBATCH -J test # Job name +#SBATCH -o job.%j.out # Name of stdout output file (%j expands to %jobId) +#SBATCH -N 2 # Total number of nodes requested +#SBATCH -n 16 # Total number of mpi tasks #requested +#SBATCH -t 01:30:00 # Run time (hh:mm:ss) - 1.5 hours + +# Launch MPI-based executable + +prun ./a.out + +# Submit job for batch execution +[test@sms ~]$ sbatch job.mpi +Submitted batch job 339 +\end{lstlisting} + +\begin{center} +\begin{tcolorbox}[] +\small +The use of the \texttt{\%j} option in the example batch job script shown is a convenient +way to track application output on an individual job basis. The \texttt{\%j} token +is replaced with the \SLURM{} job allocation number once assigned (job~\#339 in +this example). +\end{tcolorbox} +\end{center} + + diff --git a/docs/recipes/install/parse_doc.pl b/docs/recipes/install/parse_doc.pl index a057b31785..e71d881269 100755 --- a/docs/recipes/install/parse_doc.pl +++ b/docs/recipes/install/parse_doc.pl @@ -169,20 +169,22 @@ # <<- indicates the HERE document will ignore leadings tabs (not spaces) } elsif( $_ =~ /$prompt (.+ <<-[ ]*([^ ]+).*)$/ ) { my $cmd = update_cmd($1); + chomp $cmd; my $here = $2; + chomp $here; # commands that begin with a % are for CI only next if( $_ =~ /^%/ && !$ci_run ); print $fh ' ' x $indent . "$cmd\n"; my $next_line; - do { - $next_line = ; - # trim leading and trailing space - $next_line =~ s/^\s+|\s+$//g; + do { + $next_line = ; + # trim leading and trailing space + $next_line =~ s/^\s+|\s+$//g; - print $fh "$next_line\n"; - } while( $next_line !~ /^$here/ ); + print $fh "$next_line\n"; + } while( $next_line !~ /^$here/ ); # handle commands line line continuation: prompt$ command \ } elsif( $_ =~ /$prompt (.+) \\$/ ) { diff --git a/docs/recipes/install/rocky9/x86_64/warewulf4/slurm/.gitignore b/docs/recipes/install/rocky9/x86_64/warewulf4/slurm/.gitignore new file mode 100644 index 0000000000..290d156aa7 --- /dev/null +++ b/docs/recipes/install/rocky9/x86_64/warewulf4/slurm/.gitignore @@ -0,0 +1 @@ +recipe.sh diff --git a/docs/recipes/install/rocky9/x86_64/warewulf4/slurm/Makefile b/docs/recipes/install/rocky9/x86_64/warewulf4/slurm/Makefile new file mode 100644 index 0000000000..b8bc9fcb21 --- /dev/null +++ b/docs/recipes/install/rocky9/x86_64/warewulf4/slurm/Makefile @@ -0,0 +1,24 @@ +PAPER := steps.pdf +TEX_SUFS := .aux .log .nav .out .snm .toc .vrb .fdb_latexmk .bbl .blg .fls +GITID := $(shell git describe 2> /dev/null) + +#$(info $$GITID is [${GITID}]) + + +%.pdf:%.tex steps.tex manifest.tex $(wildcard common/*.tex common/ohpc-doc.sty common/base_edition/*.tex common/figures/*.pdf manifest/*.tex) +ifdef GITID + common/vc +else + test -s vc.tex || { echo "vc.tex file does not exist (and this is not a git repo)!"; exit 1; } +endif + latexmk -pdf $< + +all: $(PAPER) + +clean: + $(RM) $(foreach suf, ${TEX_SUFS}, $(PAPER:.pdf=${suf})) +ifdef GITID + $(RM) vc.tex +endif + $(RM) $(PAPER) + diff --git a/docs/recipes/install/rocky9/x86_64/warewulf4/slurm/common b/docs/recipes/install/rocky9/x86_64/warewulf4/slurm/common new file mode 120000 index 0000000000..3f6330a4b5 --- /dev/null +++ b/docs/recipes/install/rocky9/x86_64/warewulf4/slurm/common @@ -0,0 +1 @@ +../../../../common/ \ No newline at end of file diff --git a/docs/recipes/install/rocky9/x86_64/warewulf4/slurm/manifest b/docs/recipes/install/rocky9/x86_64/warewulf4/slurm/manifest new file mode 120000 index 0000000000..29057155d1 --- /dev/null +++ b/docs/recipes/install/rocky9/x86_64/warewulf4/slurm/manifest @@ -0,0 +1 @@ +../../warewulf/slurm/manifest \ No newline at end of file diff --git a/docs/recipes/install/rocky9/x86_64/warewulf4/slurm/manifest.tex b/docs/recipes/install/rocky9/x86_64/warewulf4/slurm/manifest.tex new file mode 120000 index 0000000000..9417c314c9 --- /dev/null +++ b/docs/recipes/install/rocky9/x86_64/warewulf4/slurm/manifest.tex @@ -0,0 +1 @@ +../../warewulf/slurm/manifest.tex \ No newline at end of file diff --git a/docs/recipes/install/rocky9/x86_64/warewulf4/slurm/steps.tex b/docs/recipes/install/rocky9/x86_64/warewulf4/slurm/steps.tex new file mode 100644 index 0000000000..ba5623cf58 --- /dev/null +++ b/docs/recipes/install/rocky9/x86_64/warewulf4/slurm/steps.tex @@ -0,0 +1,302 @@ +\documentclass[letterpaper]{article} +\usepackage{common/ohpc-doc} +\setcounter{secnumdepth}{5} +\setcounter{tocdepth}{5} + +% Include git variables +\input{vc.tex} + +% Define Base OS and other local macros +\newcommand{\baseOS}{Rocky 9.4} +\newcommand{\OSRepo}{Rocky\_9.4} +\newcommand{\OSTree}{EL\_9} +\newcommand{\OSTag}{el9} +\newcommand{\baseos}{rocky-9.4} +\newcommand{\baseosshort}{rocky9} +\newcommand{\provisioner}{Warewulf4} +\newcommand{\provheader}{\provisioner{}} +\newcommand{\rms}{SLURM} +\newcommand{\rmsshort}{slurm} +\newcommand{\arch}{x86\_64} + +% Define package manager commands +\newcommand{\pkgmgr}{dnf} +\newcommand{\addrepo}{wget -P /etc/yum.repos.d} +\newcommand{\chrootaddrepo}{wget -P \$CHROOT/etc/yum.repos.d} +\newcommand{\clean}{dnf clean expire-cache} +\newcommand{\chrootclean}{dnf --installroot=\$CHROOT clean expire-cache} +\newcommand{\install}{dnf -y install} +\newcommand{\chrootinstall}{dnf -y --installroot=\$CHROOT install} +\newcommand{\groupinstall}{dnf -y groupinstall} +\newcommand{\groupchrootinstall}{dnf -y --installroot=\$CHROOT groupinstall} +\newcommand{\remove}{dnf -y remove} +\newcommand{\upgrade}{dnf -y upgrade} +\newcommand{\chrootupgrade}{dnf -y --installroot=\$CHROOT upgrade} +\newcommand{\tftppkg}{syslinux-tftpboot} +\newcommand{\beegfsrepo}{https://www.beegfs.io/release/beegfs\_7.4.5/dists/beegfs-rhel9.repo} + +% boolean for os-specific formatting +\toggletrue{isCentOS} +\toggletrue{isCentOS_ww_slurm_x86} +\toggletrue{isSLURM} +\toggletrue{isWarewulf} +\toggletrue{isWarewulf4} +\toggletrue{isx86} +\toggletrue{isCentOS_x86} + +\begin{document} +\graphicspath{{common/figures/}} +\thispagestyle{empty} + +% Title Page +\input{common/title} +% Disclaimer +\input{common/legal} + +\newpage +\tableofcontents +\newpage + +% Introduction -------------------------------------------------- + +\section{Introduction} \label{sec:introduction} +\input{common/install_header} +\input{common/intro} \\ + +\input{common/base_edition/edition} +\input{common/audience} +\input{common/requirements} +\input{common/inputs} + +% begin_ohpc_run +% ohpc_validation_newline +% ohpc_validation_comment Verify OpenHPC repository has been enabled before proceeding +% ohpc_validation_newline +% ohpc_command dnf repolist | grep -q OpenHPC +% ohpc_command if [ $? -ne 0 ];then +% ohpc_command echo "Error: OpenHPC repository must be enabled locally" +% ohpc_command exit 1 +% ohpc_command fi +% end_ohpc_run + +% Base Operating System -------------------------------------------- + +\section{Install Base Operating System (BOS)} +\input{common/bos} + +%\clearpage +% begin_ohpc_run +% ohpc_validation_newline +% ohpc_validation_comment Disable firewall +\begin{lstlisting}[language=bash,keywords={}] +[sms](*\#*) systemctl disable --now firewalld +\end{lstlisting} +% end_ohpc_run + +% ------------------------------------------------------------------ + +\section{Install \OHPC{} Components} \label{sec:basic_install} +\input{common/install_ohpc_components_intro.tex} + +\subsection{Enable \OHPC{} repository for local use} \label{sec:enable_repo} +\input{common/enable_ohpc_repo} +\input{common/rocky_repos} +\input{common/automation} + + +\subsection{Add provisioning services on {\em master} node} \label{sec:add_provisioning} +\input{common/install_provisioning_warewulf4_intro} +\input{common/enable_pxe} +\input{common/time} + +\vspace*{0.15cm} +\subsection{Add resource management services on {\em master} node} \label{sec:add_rm} +\input{common/install_slurm} + +\subsection{Optionally add \InfiniBand{} support services on {\em master} node} \label{sec:add_ofed} +\input{common/ibsupport_sms_centos} + +\subsection{Optionally add \OmniPath{} support services on {\em master} node} \label{sec:add_opa} +\input{common/opasupport_sms_centos} + +\vspace*{-0.15cm} +\subsection{Complete basic Warewulf setup for {\em master} node} \label{sec:setup_ww} +\input{common/warewulf4_setup} +\input{common/warewulf4_setup_centos} + +\subsection{Define {\em compute} image for provisioning} +\input{common/warewulf4_mkchroot_rocky} + +\subsubsection{Add \OHPC{} components} \label{sec:add_components} +\input{common/warewulf4_add_to_compute_chroot_intro} + +%\newpage +% begin_ohpc_run +% ohpc_validation_comment Add SLURM and other components to compute instance +\begin{lstlisting}[language=bash,literate={-}{-}1,keywords={},upquote=true,literate={BOSVER}{\baseos{}}1] +[sms](*\#*) wwctl container exec rocky-9.4 /bin/bash <<- EOF + # Add Slurm client support meta-package and enable munge and slurmd + dnf -y install ohpc-slurm-client + systemctl enable munge + systemctl enable slurmd + + # Add Network Time Protocol (NTP) support + dnf -y install chrony + + # Include modules user environment + dnf -y install lmod-ohpc +EOF +\end{lstlisting} +% end_ohpc_run + +\vspace*{.2cm} +\subsubsection{Customize system configuration} \label{sec:master_customization} +\input{common/oneapi_mountpoint} + +% Additional commands when additional computes are requested + +% begin_ohpc_run +% ohpc_validation_newline +% ohpc_validation_comment Update basic slurm configuration if additional computes defined +% ohpc_command if [ ${num_computes} -gt 4 ];then +% ohpc_command perl -pi -e "s/^NodeName=(\S+)/NodeName=${compute_prefix}[1-${num_computes}]/" /etc/slurm/slurm.conf +% ohpc_command perl -pi -e "s/^PartitionName=normal Nodes=(\S+)/PartitionName=normal Nodes=${compute_prefix}[1-${num_computes}]/" /etc/slurm/slurm.conf + +% ohpc_command fi +% end_ohpc_run + +%\clearpage +\subsubsection{Additional Customization ({\em optional})} \label{sec:addl_customizations} +\input{common/compute_customizations_intro} + +%\clearpage +\paragraph{Enable \InfiniBand{} drivers} +\input{common/ibsupport_compute_centos.tex} + +\paragraph{Enable \OmniPath{} drivers} +\input{common/opasupport_compute_centos.tex} + +\vspace*{0.28cm} +\paragraph{Increase locked memory limits} +\input{common/memlimits} + +\vspace*{-.17cm} +\paragraph{Enable ssh control via resource manager} +\input{common/slurm_pam} + +\vspace*{-.17cm} +\paragraph{Add \beegfs{}} \label{sec:add_beegfs} +\input{common/install_beegfs_client_centos} + +\paragraph{Add \Lustre{} client} \label{sec:lustre_client} +\input{common/lustre-client} +%\vspace*{0.25cm} +\input{common/lustre-client-centos} +\input{common/lustre-client-post} + +%\vspace*{.45cm} +\paragraph{Enable forwarding of system logs} \label{sec:add_syslog} +\input{common/syslog} + +\paragraph{Add \clustershell{}} +\input{common/clustershell} + +\paragraph{Add \genders{}} +\input{common/genders} + +\paragraph{Add Magpie} +\input{common/magpie} + +\paragraph{Add \conman{}} \label{sec:add_conman} +\input{common/conman} + +\paragraph{Add \nhc{}} \label{sec:add_nhc} +\input{common/nhc} +\input{common/nhc_slurm} + +\vspace*{0.3cm} +\paragraph{Add \GEOPM{}} \label{sec:add_geopm} +\input{common/geopm_config} + +%\clearpage +\subsubsection{Import files} \label{sec:file_import} +\input{common/import_ww4_files} +%\vspace*{0.3cm} +\input{common/import_ww4_files_slurm} +\input{common/import_ww4_files_ib_centos} +%\vspace*{0.3cm} +\input{common/finalize_warewulf4_provisioning} +%\vspace*{0.2cm} +\input{common/add_ww4_hosts_intro} +\input{common/add_ww4_hosts_finalize} +\input{common/add_ww4_hosts_slurm} + +\subsubsection{Optional kernel arguments} \label{sec:optional_kargs} +\input{common/charliecloud_centos_warewulf_post} +\input{common/warewulf4_kargs_post} + +%\vspace*{-0.4cm} +\subsection{Boot compute nodes} \label{sec:boot_computes} +\input{common/reset_computes} + +%\clearpage +\section{Install \OHPC{} Development Components} +\input{common/dev_intro.tex} + +%\vspace*{-0.15cm} +\subsection{Development Tools} \label{sec:install_dev_tools} +\input{common/dev_tools} + +\vspace*{-0.15cm} +\subsection{Compilers} \label{sec:install_compilers} +\input{common/compilers} + +%\clearpage +\subsection{MPI Stacks} \label{sec:mpi} +\input{common/mpi_slurm} + +\subsection{Performance Tools} \label{sec:install_perf_tools} +\input{common/perf_tools_with_geopm} + +\subsection{Setup default development environment} +\input{common/default_dev} + +\vspace*{0.3cm} +\subsection{3rd Party Libraries and Tools} \label{sec:3rdparty} +\input{common/third_party_libs_intro} +\input{common/third_party_libs_petsc_centos} +\input{common/third_party_libs} +\vspace*{0.1cm} +\input{common/third_party_mpi_libs_x86} +\vspace*{0.5cm} +\subsection{Optional Development Tool Builds} \label{sec:3rdparty_intel} +\input{common/oneapi_enabled_builds_slurm.tex} + +\clearpage +\section{Resource Manager Startup} \label{sec:rms_startup} +\input{common/slurm_startup} + +\section{Post-boot compute node configuration} \label{sec:post_boot} +\input{common/post_boot} + +\section{Run a Test Job} \label{sec:test_job} +\input{common/warewulf4_slurm_test_job} + +\clearpage +\appendix +%\section*{Appendices} +{\bf \LARGE \centerline{Appendices}} \vspace*{0.2cm} + +\addcontentsline{toc}{section}{Appendices} +\renewcommand{\thesubsection}{\Alph{subsection}} + +\input{common/automation_appendix} +\input{common/upgrade} +\input{common/test_suite} +\input{common/customization_appendix_centos} +\input{manifest} +\input{common/signature} + + +\end{document} +