From 2bcf5c5c3179e8b0d75bf87a3a3ec7f4690f0c5c Mon Sep 17 00:00:00 2001 From: EdwardSnyder-NOAA Date: Mon, 9 Sep 2024 13:48:47 +0000 Subject: [PATCH 01/16] update to ss160 --- modulefiles/build_singularity_intel.lua | 40 ++++++++++++------------- 1 file changed, 20 insertions(+), 20 deletions(-) diff --git a/modulefiles/build_singularity_intel.lua b/modulefiles/build_singularity_intel.lua index 80843f2e..6e2526aa 100644 --- a/modulefiles/build_singularity_intel.lua +++ b/modulefiles/build_singularity_intel.lua @@ -4,22 +4,22 @@ loads UFS Model prerequisites for Hera/Intel setenv("EPICHOME", "/opt") -prepend_path("MODULEPATH", pathJoin(os.getenv("EPICHOME"),"spack-stack/spack-stack-1.3.0/envs/unified-dev/install/modulefiles/Core")) +prepend_path("MODULEPATH", pathJoin(os.getenv("EPICHOME"),"spack-stack/spack-stack-1.6.0/envs/unified-env/install/modulefiles/Core")) -stack_intel_ver=os.getenv("stack_intel_ver") or "2021.8.0" +stack_intel_ver=os.getenv("stack_intel_ver") or "2021.10.0" load(pathJoin("stack-intel", stack_intel_ver)) -load("intel-oneapi-mpi/2021.8.0") -stack_intel_oneapi_mpi_ver=os.getenv("stack_intel_oneapi_mpi_ver") or "2021.8.0" +load("intel-oneapi-mpi/2021.9.0") +stack_intel_oneapi_mpi_ver=os.getenv("stack_intel_oneapi_mpi_ver") or "2021.9.0" load(pathJoin("stack-intel-oneapi-mpi", stack_intel_oneapi_mpi_ver)) -stack_python_ver=os.getenv("stack_python_ver") or "3.8.10" +stack_python_ver=os.getenv("stack_python_ver") or "3.10.13" load(pathJoin("stack-python", stack_python_ver)) cmake_ver=os.getenv("cmake_ver") or "3.23.1" load(pathJoin("cmake", cmake_ver)) -ecbuild_ver=os.getenv("ecbuild_ver") or "3.6.5" +ecbuild_ver=os.getenv("ecbuild_ver") or "3.7.2" load(pathJoin("ecbuild", ecbuild_ver)) jasper_ver=os.getenv("jasper_ver") or "2.0.32" @@ -37,22 +37,22 @@ load(pathJoin("hdf5", hdf5_ver)) netcdf_c_ver=os.getenv("netcdf_ver") or "4.9.2" load(pathJoin("netcdf-c", netcdf_c_ver)) -netcdf_fortran_ver=os.getenv("netcdf_fortran_ver") or "4.6.0" +netcdf_fortran_ver=os.getenv("netcdf_fortran_ver") or "4.6.1" load(pathJoin("netcdf-fortran", netcdf_fortran_ver)) -pio_ver=os.getenv("pio_ver") or "2.5.9" +pio_ver=os.getenv("pio_ver") or "2.5.10" load(pathJoin("parallelio", pio_ver)) -esmf_ver=os.getenv("esmf_ver") or "8.3.0b09" +esmf_ver=os.getenv("esmf_ver") or "8.5.0" load(pathJoin("esmf", esmf_ver)) -fms_ver=os.getenv("fms_ver") or "2022.04" +fms_ver=os.getenv("fms_ver") or "2023.04" load(pathJoin("fms",fms_ver)) bacio_ver=os.getenv("bacio_ver") or "2.4.1" load(pathJoin("bacio", bacio_ver)) -crtm_ver=os.getenv("crtm_ver") or "2.4.0" +crtm_ver=os.getenv("crtm_ver") or "2.4.0.1" load(pathJoin("crtm", crtm_ver)) g2_ver=os.getenv("g2_ver") or "3.4.5" @@ -61,28 +61,28 @@ load(pathJoin("g2", g2_ver)) g2tmpl_ver=os.getenv("g2tmpl_ver") or "1.10.2" load(pathJoin("g2tmpl", g2tmpl_ver)) -ip_ver=os.getenv("ip_ver") or "3.3.3" +ip_ver=os.getenv("ip_ver") or "4.3.0" load(pathJoin("ip", ip_ver)) -sp_ver=os.getenv("sp_ver") or "2.3.3" +sp_ver=os.getenv("sp_ver") or "2.5.0" load(pathJoin("sp", sp_ver)) -w3emc_ver=os.getenv("w3emc_ver") or "2.9.2" +w3emc_ver=os.getenv("w3emc_ver") or "2.10.0" load(pathJoin("w3emc", w3emc_ver)) -gftl_shared_ver=os.getenv("gftl_shared_ver") or "1.5.0" +gftl_shared_ver=os.getenv("gftl_shared_ver") or "1.6.1" load(pathJoin("gftl-shared", gftl_shared_ver)) -mapl_ver=os.getenv("mapl_ver") or "2.22.0-esmf-8.3.0b09" +mapl_ver=os.getenv("mapl_ver") or "2.40.3-esmf-8.5.0" load(pathJoin("mapl", mapl_ver)) load("py-cftime/1.0.3.4") -load("py-cython/0.29.32") +load("py-cython/0.29.36") load("py-f90nml/1.4.3") -load("py-jinja2/3.1.2") -load("py-netcdf4/1.5.3") +load("py-jinja2/3.0.3") +load("py-netcdf4/1.5.8") load("py-numpy/1.22.3") -load("py-pandas/1.4.0") +load("py-pandas/1.5.3") load("py-python-dateutil/2.8.2") load("py-pyyaml/6.0") From 3dee3be0e1f43597bf456009815e9f39ae471ab8 Mon Sep 17 00:00:00 2001 From: EdwardSnyder-NOAA Date: Mon, 9 Sep 2024 14:00:51 +0000 Subject: [PATCH 02/16] update python and cmake vars --- modulefiles/build_singularity_intel.lua | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/modulefiles/build_singularity_intel.lua b/modulefiles/build_singularity_intel.lua index 6e2526aa..86cc6548 100644 --- a/modulefiles/build_singularity_intel.lua +++ b/modulefiles/build_singularity_intel.lua @@ -13,8 +13,8 @@ load("intel-oneapi-mpi/2021.9.0") stack_intel_oneapi_mpi_ver=os.getenv("stack_intel_oneapi_mpi_ver") or "2021.9.0" load(pathJoin("stack-intel-oneapi-mpi", stack_intel_oneapi_mpi_ver)) -stack_python_ver=os.getenv("stack_python_ver") or "3.10.13" -load(pathJoin("stack-python", stack_python_ver)) +--stack_python_ver=os.getenv("stack_python_ver") or "3.10.13" +--load(pathJoin("stack-python", stack_python_ver)) cmake_ver=os.getenv("cmake_ver") or "3.23.1" load(pathJoin("cmake", cmake_ver)) @@ -88,9 +88,12 @@ load("py-pyyaml/6.0") load("atlas") -setenv("CC", "mpiicc") -setenv("CXX", "mpiicpc") -setenv("FC", "mpiifort") +setenv("CMAKE_C_COMPILER","mpiicc") +setenv("CMAKE_CXX_COMPILER","mpicxx") +setenv("CMAKE_Fortran_COMPILER","mpif90") +--setenv("CC", "mpiicc") +--setenv("CXX", "mpiicpc") +--setenv("FC", "mpiifort") setenv("JEDI_INSTALL", pathJoin(os.getenv("EPICHOME"),"")) From 9c3c204b48ea4667f60f973befaad7ca5f279008 Mon Sep 17 00:00:00 2001 From: EdwardSnyder-NOAA Date: Fri, 20 Sep 2024 14:36:00 +0000 Subject: [PATCH 03/16] updated info --- modulefiles/build_singularity_intel.lua | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modulefiles/build_singularity_intel.lua b/modulefiles/build_singularity_intel.lua index 86cc6548..0765a88c 100644 --- a/modulefiles/build_singularity_intel.lua +++ b/modulefiles/build_singularity_intel.lua @@ -1,5 +1,5 @@ help([[ -loads UFS Model prerequisites for Hera/Intel +loads UFS Model prerequisites for Singularity container ]]) setenv("EPICHOME", "/opt") From a5d2eea3a52e30fd60bebd852f1efd108e3885cb Mon Sep 17 00:00:00 2001 From: EdwardSnyder-NOAA Date: Fri, 20 Sep 2024 14:36:43 +0000 Subject: [PATCH 04/16] update file to work with new setup container script --- parm/run_container_executable.sh | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/parm/run_container_executable.sh b/parm/run_container_executable.sh index 232a2e89..cdd40992 100755 --- a/parm/run_container_executable.sh +++ b/parm/run_container_executable.sh @@ -3,17 +3,17 @@ export SINGULARITYENV_FI_PROVIDER=tcp export SINGULARITY_SHELL=/bin/bash BINDDIR="/"`pwd | awk -F"/" '{print $2}'` -CONTAINERLOC=${EPICCONTAINERS:-${HOME}} -img=${img:-${CONTAINERLOC}/ubuntu20.04-intel-ue-landda.img} +img=$IMAGE CONTAINERBASE="/"`echo $img | xargs realpath | awk -F"/" '{print $2}'` cmd=$(basename "$0") arg="$@" -if [ ! -z "$FIXlandda" ]; then - INPUTBASE="/"`echo $FIXlandda | xargs realpath | awk -F"/" '{print $2}'` +if [ ! -z "$LANDDAROOT" ]; then + INPUTBASE="/"`echo $LANDDAROOT | xargs realpath | awk -F"/" '{print $2}'` INPUTBIND="-B $INPUTBASE:$INPUTBASE" else INPUTBIND="" fi -echo running: ${SINGULARITYBIN} exec $img $cmd $arg -${SINGULARITYBIN} exec -B $BINDDIR:$BINDDIR -B $CONTAINERBASE:$CONTAINERBASE $INPUTBIND $img $cmd $arg +container_env=$PWD/container.env +echo running: ${SINGULARITYBIN} exec --env-file $container_env -B $BINDDIR:$BINDDIR -B $CONTAINERBASE:$CONTAINERBASE $INPUTBIND $img $cmd $arg +${SINGULARITYBIN} exec --env-file $container_env -B $BINDDIR:$BINDDIR -B $CONTAINERBASE:$CONTAINERBASE $INPUTBIND $img $cmd $arg From f8d9ec7028ab76d6de9ed637f139316ddc89d105 Mon Sep 17 00:00:00 2001 From: EdwardSnyder-NOAA Date: Fri, 20 Sep 2024 14:37:17 +0000 Subject: [PATCH 05/16] added singularity files --- .../tasks/singularity/task.analysis.lua | 2 + .../tasks/singularity/task.forecast.lua | 2 + .../tasks/singularity/task.plot_stats.lua | 2 + .../tasks/singularity/task.post_anal.lua | 2 + .../tasks/singularity/task.pre_anal.lua | 2 + .../tasks/singularity/task.prep_obs.lua | 2 + parm/land_analysis_singularity.yaml | 254 ++++++++++++++++++ 7 files changed, 266 insertions(+) create mode 100644 modulefiles/tasks/singularity/task.analysis.lua create mode 100644 modulefiles/tasks/singularity/task.forecast.lua create mode 100644 modulefiles/tasks/singularity/task.plot_stats.lua create mode 100644 modulefiles/tasks/singularity/task.post_anal.lua create mode 100644 modulefiles/tasks/singularity/task.pre_anal.lua create mode 100644 modulefiles/tasks/singularity/task.prep_obs.lua create mode 100644 parm/land_analysis_singularity.yaml diff --git a/modulefiles/tasks/singularity/task.analysis.lua b/modulefiles/tasks/singularity/task.analysis.lua new file mode 100644 index 00000000..e057b8c0 --- /dev/null +++ b/modulefiles/tasks/singularity/task.analysis.lua @@ -0,0 +1,2 @@ +load("$COMPILER") +load("$MPI") diff --git a/modulefiles/tasks/singularity/task.forecast.lua b/modulefiles/tasks/singularity/task.forecast.lua new file mode 100644 index 00000000..e057b8c0 --- /dev/null +++ b/modulefiles/tasks/singularity/task.forecast.lua @@ -0,0 +1,2 @@ +load("$COMPILER") +load("$MPI") diff --git a/modulefiles/tasks/singularity/task.plot_stats.lua b/modulefiles/tasks/singularity/task.plot_stats.lua new file mode 100644 index 00000000..e057b8c0 --- /dev/null +++ b/modulefiles/tasks/singularity/task.plot_stats.lua @@ -0,0 +1,2 @@ +load("$COMPILER") +load("$MPI") diff --git a/modulefiles/tasks/singularity/task.post_anal.lua b/modulefiles/tasks/singularity/task.post_anal.lua new file mode 100644 index 00000000..e057b8c0 --- /dev/null +++ b/modulefiles/tasks/singularity/task.post_anal.lua @@ -0,0 +1,2 @@ +load("$COMPILER") +load("$MPI") diff --git a/modulefiles/tasks/singularity/task.pre_anal.lua b/modulefiles/tasks/singularity/task.pre_anal.lua new file mode 100644 index 00000000..e057b8c0 --- /dev/null +++ b/modulefiles/tasks/singularity/task.pre_anal.lua @@ -0,0 +1,2 @@ +load("$COMPILER") +load("$MPI") diff --git a/modulefiles/tasks/singularity/task.prep_obs.lua b/modulefiles/tasks/singularity/task.prep_obs.lua new file mode 100644 index 00000000..e057b8c0 --- /dev/null +++ b/modulefiles/tasks/singularity/task.prep_obs.lua @@ -0,0 +1,2 @@ +load("$COMPILER") +load("$MPI") diff --git a/parm/land_analysis_singularity.yaml b/parm/land_analysis_singularity.yaml new file mode 100644 index 00000000..b2e7e2e2 --- /dev/null +++ b/parm/land_analysis_singularity.yaml @@ -0,0 +1,254 @@ +workflow: + attrs: + realtime: false + scheduler: slurm + cyclethrottle: 24 + taskthrottle: 24 + cycledef: + - attrs: + group: cycled + spec: 200001030000 200001040000 24:00:00 + entities: + MACHINE: "singularity" + SCHED: "slurm" + ACCOUNT: "epic" + EXP_BASEDIR: "$SINGULARITY_WORKING_DIR/landda_test" + JEDI_INSTALL: "$SINGULARITY_WORKING_DIR" + WARMSTART_DIR: "$SINGULARITY_WORKING_DIR/land-DA_workflow/fix/DATA_RESTART" + ATMOS_FORC: "gswp3" + RES: "96" + FCSTHR: "24" + NPROCS_ANALYSIS: "6" + NPROCS_FORECAST: "7" + OBSDIR: "" + OBSDIR_SUBDIR: "" + OBS_TYPES: "GHCN" + DAtype: "letkfoi_snow" + TSTUB: "oro_C96.mx100" + WE2E_VAV: "NO" + WE2E_ATOL: "1e-7" + WE2E_LOG_FN: "we2e.log" + NET: "landda" + envir: "test" + model_ver: "v2.0.0" + RUN: "landda" + HOMElandda: "&EXP_BASEDIR;/land-DA_workflow" + PTMP: "&EXP_BASEDIR;/ptmp" + COMROOT: "&PTMP;/&envir;/com" + DATAROOT: "&PTMP;/&envir;/tmp" + KEEPDATA: "YES" + LOGDIR: "&COMROOT;/output/logs" + LOGFN_SUFFIX: "_@Y@m@d@H.log" + PDY: "@Y@m@d" + cyc: "@H" + DATADEP_FILE1: "&WARMSTART_DIR;/ufs_land_restart.@Y-@m-@d_@H-00-00.tile1.nc" + DATADEP_FILE2: "&WARMSTART_DIR;/ufs_land_restart.@Y-@m-@d_@H-00-00.nc" + DATADEP_FILE3: "&DATAROOT;/DATA_SHARE/RESTART/ufs_land_restart.@Y-@m-@d_@H-00-00.tile1.nc" + DATADEP_FILE4: "&DATAROOT;/DATA_SHARE/RESTART/ufs_land_restart.@Y-@m-@d_@H-00-00.nc" + log: "&LOGDIR;/workflow.log" + tasks: + task_prep_obs: + attrs: + cycledefs: cycled + maxtries: 2 + envars: + OBSDIR: "&OBSDIR;" + OBSDIR_SUBDIR: "&OBSDIR_SUBDIR;" + OBS_TYPES: "&OBS_TYPES;" + MACHINE: "&MACHINE;" + SCHED: "&SCHED;" + ACCOUNT: "&ACCOUNT;" + ATMOS_FORC: "&ATMOS_FORC;" + model_ver: "&model_ver;" + HOMElandda: "&HOMElandda;" + COMROOT: "&COMROOT;" + DATAROOT: "&DATAROOT;" + KEEPDATA: "&KEEPDATA;" + PDY: "&PDY;" + cyc: "&cyc;" + account: "&ACCOUNT;" + command: '&HOMElandda;/parm/task_load_modules_run_jjob.sh "prep_obs" "&HOMElandda;" "&MACHINE;"' + jobname: prep_obs + cores: 1 + walltime: 00:02:00 + queue: batch + join: "&LOGDIR;/prep_obs&LOGFN_SUFFIX;" + task_pre_anal: + attrs: + cycledefs: cycled + maxtries: 2 + envars: + MACHINE: "&MACHINE;" + SCHED: "&SCHED;" + ACCOUNT: "&ACCOUNT;" + RES: "&RES;" + TSTUB: "&TSTUB;" + WARMSTART_DIR: "&WARMSTART_DIR;" + model_ver: "&model_ver;" + RUN: "&RUN;" + HOMElandda: "&HOMElandda;" + COMROOT: "&COMROOT;" + DATAROOT: "&DATAROOT;" + KEEPDATA: "&KEEPDATA;" + PDY: "&PDY;" + cyc: "&cyc;" + account: "&ACCOUNT;" + command: '&HOMElandda;/parm/task_load_modules_run_jjob.sh "pre_anal" "&HOMElandda;" "&MACHINE;"' + jobname: pre_anal + cores: 1 + walltime: 00:05:00 + queue: batch + join: "&LOGDIR;/pre_anal&LOGFN_SUFFIX;" + dependency: + or: + datadep_file1: + attrs: + age: 5 + value: "&DATADEP_FILE1;" + datadep_file2: + attrs: + age: 5 + value: "&DATADEP_FILE2;" + datadep_file3: + attrs: + age: 5 + value: "&DATADEP_FILE3;" + datadep_file4: + attrs: + age: 5 + value: "&DATADEP_FILE4;" + task_analysis: + attrs: + cycledefs: cycled + maxtries: 2 + envars: + OBS_TYPES: "&OBS_TYPES;" + MACHINE: "&MACHINE;" + SCHED: "&SCHED;" + ACCOUNT: "&ACCOUNT;" + RES: "&RES;" + TSTUB: "&TSTUB;" + WE2E_VAV: "&WE2E_VAV;" + WE2E_ATOL: "&WE2E_ATOL;" + WE2E_LOG_FN: "&WE2E_LOG_FN;" + LOGDIR: "&LOGDIR;" + model_ver: "&model_ver;" + HOMElandda: "&HOMElandda;" + COMROOT: "&COMROOT;" + DATAROOT: "&DATAROOT;" + KEEPDATA: "&KEEPDATA;" + PDY: "&PDY;" + cyc: "&cyc;" + DAtype: "&DAtype;" + NPROCS_ANALYSIS: "&NPROCS_ANALYSIS;" + JEDI_INSTALL: "&JEDI_INSTALL;" + account: "&ACCOUNT;" + command: '&HOMElandda;/parm/task_load_modules_run_jjob.sh "analysis" "&HOMElandda;" "&MACHINE;"' + jobname: analysis + nodes: "1:ppn=&NPROCS_ANALYSIS;" + walltime: 00:15:00 + queue: batch + join: "&LOGDIR;/analysis&LOGFN_SUFFIX;" + dependency: + taskdep: + attrs: + task: pre_anal + task_post_anal: + attrs: + cycledefs: cycled + maxtries: 2 + envars: + MACHINE: "&MACHINE;" + SCHED: "&SCHED;" + ACCOUNT: "&ACCOUNT;" + RES: "&RES;" + TSTUB: "&TSTUB;" + WE2E_VAV: "&WE2E_VAV;" + WE2E_ATOL: "&WE2E_ATOL;" + WE2E_LOG_FN: "&WE2E_LOG_FN;" + LOGDIR: "&LOGDIR;" + model_ver: "&model_ver;" + RUN: "&RUN;" + HOMElandda: "&HOMElandda;" + COMROOT: "&COMROOT;" + DATAROOT: "&DATAROOT;" + KEEPDATA: "&KEEPDATA;" + PDY: "&PDY;" + cyc: "&cyc;" + FCSTHR: "&FCSTHR;" + account: "&ACCOUNT;" + command: '&HOMElandda;/parm/task_load_modules_run_jjob.sh "post_anal" "&HOMElandda;" "&MACHINE;"' + jobname: post_anal + cores: 1 + walltime: 00:05:00 + queue: batch + join: "&LOGDIR;/post_anal&LOGFN_SUFFIX;" + dependency: + taskdep: + attrs: + task: analysis + task_plot_stats: + attrs: + cycledefs: cycled + maxtries: 2 + envars: + MACHINE: "&MACHINE;" + SCHED: "&SCHED;" + ACCOUNT: "&ACCOUNT;" + model_ver: "&model_ver;" + RUN: "&RUN;" + HOMElandda: "&HOMElandda;" + COMROOT: "&COMROOT;" + DATAROOT: "&DATAROOT;" + KEEPDATA: "&KEEPDATA;" + PDY: "&PDY;" + cyc: "&cyc;" + account: "&ACCOUNT;" + command: '&HOMElandda;/parm/task_load_modules_run_jjob.sh "plot_stats" "&HOMElandda;" "&MACHINE;"' + jobname: plot_stats + cores: 1 + walltime: 00:10:00 + queue: batch + join: "&LOGDIR;/plot_stats&LOGFN_SUFFIX;" + dependency: + taskdep: + attrs: + task: analysis + task_forecast: + attrs: + cycledefs: cycled + maxtries: 2 + envars: + OBS_TYPES: "&OBS_TYPES;" + MACHINE: "&MACHINE;" + SCHED: "&SCHED;" + ACCOUNT: "&ACCOUNT;" + ATMOS_FORC: "&ATMOS_FORC;" + RES: "&RES;" + WARMSTART_DIR: "&WARMSTART_DIR;" + WE2E_VAV: "&WE2E_VAV;" + WE2E_ATOL: "&WE2E_ATOL;" + WE2E_LOG_FN: "&WE2E_LOG_FN;" + LOGDIR: "&LOGDIR;" + model_ver: "&model_ver;" + HOMElandda: "&HOMElandda;" + COMROOT: "&COMROOT;" + DATAROOT: "&DATAROOT;" + KEEPDATA: "&KEEPDATA;" + LOGDIR: "&LOGDIR;" + PDY: "&PDY;" + cyc: "&cyc;" + DAtype: "&DAtype;" + FCSTHR: "&FCSTHR;" + NPROCS_FORECAST: "&NPROCS_FORECAST;" + account: "&ACCOUNT;" + command: '&HOMElandda;/parm/task_load_modules_run_jjob.sh "forecast" "&HOMElandda;" "&MACHINE;"' + jobname: forecast + nodes: "1:ppn=&NPROCS_FORECAST;" + walltime: 01:00:00 + queue: batch + join: "&LOGDIR;/forecast&LOGFN_SUFFIX;" + dependency: + taskdep: + attrs: + task: post_anal From 035e7f2330b10ccc3ea4060d7d77a0fe44e890ca Mon Sep 17 00:00:00 2001 From: EdwardSnyder-NOAA Date: Fri, 20 Sep 2024 16:56:35 +0000 Subject: [PATCH 06/16] fixed typo --- modulefiles/tasks/singularity/task.analysis.lua | 4 ++-- modulefiles/tasks/singularity/task.forecast.lua | 4 ++-- modulefiles/tasks/singularity/task.plot_stats.lua | 4 ++-- modulefiles/tasks/singularity/task.post_anal.lua | 4 ++-- modulefiles/tasks/singularity/task.pre_anal.lua | 4 ++-- modulefiles/tasks/singularity/task.prep_obs.lua | 4 ++-- 6 files changed, 12 insertions(+), 12 deletions(-) diff --git a/modulefiles/tasks/singularity/task.analysis.lua b/modulefiles/tasks/singularity/task.analysis.lua index e057b8c0..517c4e82 100644 --- a/modulefiles/tasks/singularity/task.analysis.lua +++ b/modulefiles/tasks/singularity/task.analysis.lua @@ -1,2 +1,2 @@ -load("$COMPILER") -load("$MPI") +load("COMPILER") +load("MPI") diff --git a/modulefiles/tasks/singularity/task.forecast.lua b/modulefiles/tasks/singularity/task.forecast.lua index e057b8c0..517c4e82 100644 --- a/modulefiles/tasks/singularity/task.forecast.lua +++ b/modulefiles/tasks/singularity/task.forecast.lua @@ -1,2 +1,2 @@ -load("$COMPILER") -load("$MPI") +load("COMPILER") +load("MPI") diff --git a/modulefiles/tasks/singularity/task.plot_stats.lua b/modulefiles/tasks/singularity/task.plot_stats.lua index e057b8c0..517c4e82 100644 --- a/modulefiles/tasks/singularity/task.plot_stats.lua +++ b/modulefiles/tasks/singularity/task.plot_stats.lua @@ -1,2 +1,2 @@ -load("$COMPILER") -load("$MPI") +load("COMPILER") +load("MPI") diff --git a/modulefiles/tasks/singularity/task.post_anal.lua b/modulefiles/tasks/singularity/task.post_anal.lua index e057b8c0..517c4e82 100644 --- a/modulefiles/tasks/singularity/task.post_anal.lua +++ b/modulefiles/tasks/singularity/task.post_anal.lua @@ -1,2 +1,2 @@ -load("$COMPILER") -load("$MPI") +load("COMPILER") +load("MPI") diff --git a/modulefiles/tasks/singularity/task.pre_anal.lua b/modulefiles/tasks/singularity/task.pre_anal.lua index e057b8c0..517c4e82 100644 --- a/modulefiles/tasks/singularity/task.pre_anal.lua +++ b/modulefiles/tasks/singularity/task.pre_anal.lua @@ -1,2 +1,2 @@ -load("$COMPILER") -load("$MPI") +load("COMPILER") +load("MPI") diff --git a/modulefiles/tasks/singularity/task.prep_obs.lua b/modulefiles/tasks/singularity/task.prep_obs.lua index e057b8c0..517c4e82 100644 --- a/modulefiles/tasks/singularity/task.prep_obs.lua +++ b/modulefiles/tasks/singularity/task.prep_obs.lua @@ -1,2 +1,2 @@ -load("$COMPILER") -load("$MPI") +load("COMPILER") +load("MPI") From 81728b8fa81a271f9c8944229bd4418d52073cc8 Mon Sep 17 00:00:00 2001 From: EdwardSnyder-NOAA Date: Fri, 20 Sep 2024 17:19:38 +0000 Subject: [PATCH 07/16] fix typos --- parm/land_analysis_singularity.yaml | 6 +++--- parm/run_container_executable.sh | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/parm/land_analysis_singularity.yaml b/parm/land_analysis_singularity.yaml index b2e7e2e2..374ed53d 100644 --- a/parm/land_analysis_singularity.yaml +++ b/parm/land_analysis_singularity.yaml @@ -12,9 +12,9 @@ workflow: MACHINE: "singularity" SCHED: "slurm" ACCOUNT: "epic" - EXP_BASEDIR: "$SINGULARITY_WORKING_DIR/landda_test" - JEDI_INSTALL: "$SINGULARITY_WORKING_DIR" - WARMSTART_DIR: "$SINGULARITY_WORKING_DIR/land-DA_workflow/fix/DATA_RESTART" + EXP_BASEDIR: "SINGULARITY_WORKING_DIR/landda_test" + JEDI_INSTALL: "SINGULARITY_WORKING_DIR" + WARMSTART_DIR: "SINGULARITY_WORKING_DIR/land-DA_workflow/fix/DATA_RESTART" ATMOS_FORC: "gswp3" RES: "96" FCSTHR: "24" diff --git a/parm/run_container_executable.sh b/parm/run_container_executable.sh index cdd40992..16b0ee85 100755 --- a/parm/run_container_executable.sh +++ b/parm/run_container_executable.sh @@ -3,7 +3,7 @@ export SINGULARITYENV_FI_PROVIDER=tcp export SINGULARITY_SHELL=/bin/bash BINDDIR="/"`pwd | awk -F"/" '{print $2}'` -img=$IMAGE +img=IMAGE CONTAINERBASE="/"`echo $img | xargs realpath | awk -F"/" '{print $2}'` cmd=$(basename "$0") arg="$@" From cee8927cf6fc136820ccbab7f4d57e75f8b6b41a Mon Sep 17 00:00:00 2001 From: EdwardSnyder-NOAA Date: Mon, 23 Sep 2024 22:55:03 +0000 Subject: [PATCH 08/16] removed extra dir --- parm/land_analysis_singularity.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/parm/land_analysis_singularity.yaml b/parm/land_analysis_singularity.yaml index 374ed53d..cf29f2d6 100644 --- a/parm/land_analysis_singularity.yaml +++ b/parm/land_analysis_singularity.yaml @@ -12,7 +12,7 @@ workflow: MACHINE: "singularity" SCHED: "slurm" ACCOUNT: "epic" - EXP_BASEDIR: "SINGULARITY_WORKING_DIR/landda_test" + EXP_BASEDIR: "SINGULARITY_WORKING_DIR" JEDI_INSTALL: "SINGULARITY_WORKING_DIR" WARMSTART_DIR: "SINGULARITY_WORKING_DIR/land-DA_workflow/fix/DATA_RESTART" ATMOS_FORC: "gswp3" From 6fcbc7ddcaae35d065c8d568015cd9fad4aff824 Mon Sep 17 00:00:00 2001 From: EdwardSnyder-NOAA Date: Mon, 23 Sep 2024 23:37:17 +0000 Subject: [PATCH 09/16] update run container script --- parm/run_container_executable.sh | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/parm/run_container_executable.sh b/parm/run_container_executable.sh index 16b0ee85..039ab1d7 100755 --- a/parm/run_container_executable.sh +++ b/parm/run_container_executable.sh @@ -2,6 +2,7 @@ export SINGULARITYENV_FI_PROVIDER=tcp export SINGULARITY_SHELL=/bin/bash +SINGULARITYBIN=`which singularity` BINDDIR="/"`pwd | awk -F"/" '{print $2}'` img=IMAGE CONTAINERBASE="/"`echo $img | xargs realpath | awk -F"/" '{print $2}'` @@ -13,7 +14,9 @@ if [ ! -z "$LANDDAROOT" ]; then else INPUTBIND="" fi -container_env=$PWD/container.env -echo running: ${SINGULARITYBIN} exec --env-file $container_env -B $BINDDIR:$BINDDIR -B $CONTAINERBASE:$CONTAINERBASE $INPUTBIND $img $cmd $arg -${SINGULARITYBIN} exec --env-file $container_env -B $BINDDIR:$BINDDIR -B $CONTAINERBASE:$CONTAINERBASE $INPUTBIND $img $cmd $arg +# Remove echo for ndate command as it messes with the PTIME variable +if [ $cmd != "ndate" ]; then + echo running: ${SINGULARITYBIN} exec -B $BINDDIR:$BINDDIR -B $CONTAINERBASE:$CONTAINERBASE $INPUTBIND $img $cmd $arg +fi +${SINGULARITYBIN} exec -B $BINDDIR:$BINDDIR -B $CONTAINERBASE:$CONTAINERBASE $INPUTBIND $img $cmd $arg From 755011b85c5e971f89049fd4887f0644bdc51a59 Mon Sep 17 00:00:00 2001 From: EdwardSnyder-NOAA Date: Tue, 24 Sep 2024 01:11:48 +0000 Subject: [PATCH 10/16] init commit --- modulefiles/wflow_singularity.lua | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) create mode 100644 modulefiles/wflow_singularity.lua diff --git a/modulefiles/wflow_singularity.lua b/modulefiles/wflow_singularity.lua new file mode 100644 index 00000000..693dc9f5 --- /dev/null +++ b/modulefiles/wflow_singularity.lua @@ -0,0 +1,16 @@ +help([[ +This module loads python environment for running the land-DA workflow for +the singularity container. +]]) + +whatis([===[Loads libraries needed for running the land-DA workflow on unsupported platforms ]===]) + +load("rocoto") + +load("conda") + +if mode() == "load" then + LmodMsgRaw([===[Please do the following to activate conda: + > conda activate land_da +]===]) +end From 7689263b51bd9aa5c8b0b2355ce069f7f1e6e094 Mon Sep 17 00:00:00 2001 From: EdwardSnyder-NOAA Date: Fri, 4 Oct 2024 15:36:21 +0000 Subject: [PATCH 11/16] added logic to use staged data for the singularity container --- ush/hofx_analysis_stats.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/ush/hofx_analysis_stats.py b/ush/hofx_analysis_stats.py index f1d4f7dc..65b9a3a9 100755 --- a/ush/hofx_analysis_stats.py +++ b/ush/hofx_analysis_stats.py @@ -68,6 +68,8 @@ def plot_scatter(): cartopy.config['data_dir']='/scratch2/NAGAPE/epic/UFS_Land-DA_Dev/inputs/NaturalEarth' elif yaml_data['machine']=='orion' or yaml_data['machine']=='hercules': cartopy.config['data_dir']='/work/noaa/epic/UFS_Land-DA_Dev/inputs/NaturalEarth' + elif yaml_data['machine']=='singularity': + cartopy.config['data_dir']='PWD/land-DA_workflow/fix/NaturalEarth' field_mean=float("{:.2f}".format(np.mean(np.absolute(field)))) field_std=float("{:.2f}".format(np.std(np.absolute(field)))) From 9e7a52b537fcf0d9cec97a7e83245719b1b9bc52 Mon Sep 17 00:00:00 2001 From: EdwardSnyder-NOAA Date: Fri, 4 Oct 2024 15:37:44 +0000 Subject: [PATCH 12/16] remove wflow singularity lua file --- modulefiles/wflow_singularity.lua | 16 ---------------- 1 file changed, 16 deletions(-) delete mode 100644 modulefiles/wflow_singularity.lua diff --git a/modulefiles/wflow_singularity.lua b/modulefiles/wflow_singularity.lua deleted file mode 100644 index 693dc9f5..00000000 --- a/modulefiles/wflow_singularity.lua +++ /dev/null @@ -1,16 +0,0 @@ -help([[ -This module loads python environment for running the land-DA workflow for -the singularity container. -]]) - -whatis([===[Loads libraries needed for running the land-DA workflow on unsupported platforms ]===]) - -load("rocoto") - -load("conda") - -if mode() == "load" then - LmodMsgRaw([===[Please do the following to activate conda: - > conda activate land_da -]===]) -end From 3cbf5718f0eb2a6082f453e239b8a1fd29f14877 Mon Sep 17 00:00:00 2001 From: Parallel Works app-run user Date: Mon, 7 Oct 2024 20:06:52 +0000 Subject: [PATCH 13/16] update docs to reflect new container workflow --- .../BuildingRunningTesting/Container.rst | 214 ++++-------------- 1 file changed, 44 insertions(+), 170 deletions(-) diff --git a/doc/source/BuildingRunningTesting/Container.rst b/doc/source/BuildingRunningTesting/Container.rst index 6949e8d0..b06b5894 100644 --- a/doc/source/BuildingRunningTesting/Container.rst +++ b/doc/source/BuildingRunningTesting/Container.rst @@ -6,10 +6,7 @@ Containerized Land DA Workflow These instructions will help users build and run a basic case for the Unified Forecast System (:term:`UFS`) Land Data Assimilation (DA) System using a `Singularity/Apptainer `_ container. The Land DA :term:`container` packages together the Land DA System with its dependencies (e.g., :term:`spack-stack`, :term:`JEDI`) and provides a uniform environment in which to build and run the Land DA System. Normally, the details of building and running Earth systems models will vary based on the computing platform because there are many possible combinations of operating systems, compilers, :term:`MPIs `, and package versions available. Installation via Singularity/Apptainer container reduces this variability and allows for a smoother experience building and running Land DA. This approach is recommended for users not running Land DA on a supported :ref:`Level 1 ` system (i.e., Hera, Orion). -This chapter provides instructions for building and running basic Land DA cases in a container. Users can choose between two options: - - * A Jan. 3-4, 2000 00z sample case using :term:`GSWP3` data with the UFS Noah-MP land component - * A Dec. 21-22, 2019 00z sample case using :term:`ERA5` data with the UFS Land Driver +This chapter provides instructions for building and running basic Land DA case for the UFS Land DA System using a Jan. 3-4, 2000 00z sample case using :term:`GSWP3` data with the UFS Noah-MP land component in a container. .. attention:: @@ -93,7 +90,7 @@ where ``/path/to/landda`` is the path to this top-level directory (e.g., ``/User NOAA RDHPCS Systems ---------------------- -On many NOAA :term:`RDHPCS`, a container named ``ubuntu20.04-intel-landda-release-public-v1.2.0.img`` has already been built, and users may access the container at the locations in :numref:`Table %s `. +On many NOAA :term:`RDHPCS`, a container named ``ubuntu22.04-intel-ue-1.6.0-landda-dev.img`` has already been built, and users may access the container at the locations in :numref:`Table %s `. .. _PreBuiltContainers: @@ -119,18 +116,18 @@ Users can simply set an environment variable to point to the container: .. code-block:: console - export img=path/to/ubuntu20.04-intel-landda-release-public-v1.2.0.img + export img=path/to/ubuntu22.04-intel-ue-1.6.0-landda-dev.img If users prefer, they may copy the container to their local working directory. For example, on Jet: .. code-block:: console - cp /mnt/lfs4/HFIP/hfv3gfs/role.epic/containers/ubuntu20.04-intel-landda-release-public-v1.2.0.img . + cp /mnt/lfs4/HFIP/hfv3gfs/role.epic/containers/ubuntu22.04-intel-ue-1.6.0-landda-dev.img . Other Systems ---------------- -On other systems, users can build the Singularity container from a public Docker :term:`container` image or download the ``ubuntu20.04-intel-landda-release-public-v1.2.0.img`` container from the `Land DA Data Bucket `_. Downloading may be faster depending on the download speed on the user's system. However, the container in the data bucket is the ``release/v1.2.0`` container rather than the updated ``develop`` branch container. +On other systems, users can build the Singularity container from a public Docker :term:`container` image or download the ``ubuntu22.04-intel-ue-1.6.0-landda-dev.img`` container from the `Land DA Data Bucket `_. Downloading may be faster depending on the download speed on the user's system. However, the container in the data bucket is the ``release/v1.2.0`` container rather than the updated ``develop`` branch container. To download from the data bucket, users can run: @@ -142,7 +139,7 @@ To build the container from a Docker image, users can run: .. code-block:: console - singularity build --force ubuntu20.04-intel-landda-release-public-v1.2.0.img docker://noaaepic/ubuntu20.04-intel-landda:release-public-v1.2.0 + singularity build --force ubuntu22.04-intel-ue-1.6.0-landda-dev.img docker://noaaepic/ubuntu22.04-intel21.10-landda:ue160-dev This process may take several hours depending on the system. @@ -162,16 +159,16 @@ Users on any system may download and untar the data from the `Land DA Data Bucke .. code-block:: console cd $LANDDAROOT - wget https://noaa-ufs-land-da-pds.s3.amazonaws.com/current_land_da_release_data/v1.2.0/Landdav1.2.0_input_data.tar.gz - tar xvfz Landdav1.2.0_input_data.tar.gz + wget https://noaa-ufs-land-da-pds.s3.amazonaws.com/develop-20240809/inputs.tar.gz + tar xvfz inputs.tar.gz If users choose to add data in a location other than ``$LANDDAROOT``, they can set the input data directory by running: .. code-block:: console - export LANDDA_INPUTS=/path/to/input/data + export LANDDA_INPUTS=/path/to/inputs -where ``/path/to/input/data`` is replaced by the absolute path to the location of their Land DA input data. +where ``/path/to`` is replaced by the absolute path to the location of their Land DA input data. .. _RunContainer: @@ -193,21 +190,13 @@ Save the location of the container in an environment variable. .. code-block:: console - export img=path/to/ubuntu20.04-intel-landda-release-public-v1.2.0.img - -Set the ``USE_SINGULARITY`` environment variable to "yes". - -.. code-block:: console - - export USE_SINGULARITY=yes - -This variable tells the workflow to use the containerized version of all the executables (including python) when running a cycle. + export img=/path/to/ubuntu22.04-intel-ue-1.6.0-landda-dev.img Users may convert a container ``.img`` file to a writable sandbox. This step is optional on most systems: .. code-block:: console - singularity build --sandbox ubuntu20.04-intel-landda-release-public-v1.2.0 $img + singularity build --sandbox ubuntu22.04-intel-ue-1.6.0-landda-dev $img When making a writable sandbox on NOAA :term:`RDHPCS`, the following warnings commonly appear and can be ignored: @@ -218,17 +207,17 @@ When making a writable sandbox on NOAA :term:`RDHPCS`, the following warnings co WARNING: integrity: signature not found for object group 1 WARNING: Bootstrap image could not be verified, but build will continue. -From within the ``$LANDDAROOT`` directory, copy the ``land-DA_workflow`` directory out of the container. +From within the ``$LANDDAROOT`` directory, copy the ``setup_container.sh`` script out of the container. .. code-block:: console - singularity exec -H $PWD $img cp -r /opt/land-DA_workflow . + singularity exec -H $PWD $img cp -r /opt/land-DA_workflow/setup_container.sh . -There should now be a ``land-DA_workflow`` directory in the ``$LANDDAROOT`` directory. Navigate into the ``land-DA_workflow`` directory. If for some reason, this is unsuccessful, users may try a version of the following command instead: +The ``setup_container.sh`` script should now be in the ``$LANDDAROOT`` directory. If for some reason, the previous command was unsuccessful, users may try a version of the following command instead: .. code-block:: console - singularity exec -B /:/ $img cp -r /opt/land-DA_workflow . + singularity exec -B /:/ $img cp -r /opt/land-DA_workflow/setup_container.sh . where ```` and ```` are replaced with a top-level directory on the local system and in the container, respectively. Additional directories can be bound by adding another ``-B /:/`` argument before the container location (``$img``). Note that if previous steps included a ``sudo`` command, ``sudo`` may be required in front of this command. @@ -240,90 +229,49 @@ where ```` and ```` are replaced with a top-level Sometimes binding directories with different names can cause problems. In general, it is recommended that the local base directory and the container directory have the same name. For example, if the host system's top-level directory is ``/user1234``, the user may want to convert the ``.img`` file to a writable sandbox and create a ``user1234`` directory in the sandbox to bind to. -Navigate to the ``land-DA_workflow`` directory after it has been successfully copied into ``$LANDDAROOT``. +Run the ``setup_container.sh`` script with the proper arguments. Ensure ``LANDDA_INPUTS`` variable is set before running this script. .. code-block:: console - cd land-DA_workflow + ./setup_container.sh -c= -m= -i=$img -When using a Singularity container, Intel compilers and Intel :term:`MPI` (preferably 2020 versions or newer) need to be available on the host system to properly launch MPI jobs. The Level 1 systems that have Intel compilers and Intel MPI available are: Hera, Jet, NOAA Cloud, and Orion. Generally, this is accomplished by loading a module with a recent Intel compiler and then loading the corresponding Intel MPI. For example, users can modify the following commands to load their system's compiler/MPI combination: - -.. code-block:: console +where: - module load intel/2022.1.2 impi/2022.1.2 - -.. note:: - - :term:`Spack-stack` uses lua modules, which require Lmod to be initialized for the ``module load`` command to work. If for some reason, Lmod is not initialized, users can source the ``init/bash`` file on their system before running the command above. For example, users can modify and run the following command: - - .. code-block:: console - - source /path/to/init/bash + * ``-c`` is the compiler on the user's local machine (e.g., ``intel/2022.1.2``) + * ``-m`` is the :term:`MPI` on the user's local machine (e.g., ``impi/2022.1.2``) + * ``-i`` is the full path to the container image ( e.g., ``$LANDDAROOT/ubuntu22.04-intel-ue-1.6.0-landda-dev.img``). - Then they should be able to load the appropriate modules. - -The remaining Level 1 systems that do not have Intel MPI available will need to load a different Intel compiler and MPI combination. Refer to :numref:`Table %s ` for which Intel compiler and MPI to load for these systems. - -.. _NonIMPICompilers: - -.. table:: Intel compilers and MPIs for non-Intel MPI Level 1 systems - - +-----------------+-------------------------------------------------------------------------+ - | Machine | Intel compiler and MPI combinations | - +=================+=========================================================================+ - | Derecho | module load intel-oneapi/2023.2.1 cray-mpich/8.1.25 | - +-----------------+-------------------------------------------------------------------------+ - | Gaea | module load intel-classic/2023.1.0 cray-mpich/8.1.25 | - +-----------------+-------------------------------------------------------------------------+ - | Hercules | module load intel-oneapi-compilers/2022.2.1 intel-oneapi-mpi/2021.7.1 | - +-----------------+-------------------------------------------------------------------------+ - -For Derecho and Gaea, an additional script is needed to help set up the ``land-DA_workflow`` scripts so that the container can run there. - -.. code-block:: console - - ./setup_container.sh -p= - -where ```` is ``derecho`` or ``gaea``. +When using a Singularity container, Intel compilers and Intel :term:`MPI` (preferably 2020 versions or newer) need to be available on the host system to properly launch MPI jobs. Generally, this is accomplished by loading a module with a recent Intel compiler and then loading the corresponding Intel MPI. .. _ConfigureExptC: Configure the Experiment =========================== -Modify Machine Settings ------------------------- +The user should now see the ``Land-DA_workflow`` and ``jedi-bundle`` directories in the ``$LANDDAROOT`` directory. -Users on a system with a Slurm job scheduler will need to make some minor changes to the ``submit_cycle.sh`` file. Open the file and change the account and queue (qos) to match the desired account and qos on the system. Users may also need to add the following line to the script to specify the partition. For example, on Jet, users should set: +Because of a conda conflict between the container and the host system, it is best to load rocoto separately instead of using workflow files found in the ``modulefiles`` directory. .. code-block:: console - #SBATCH --partition=xjet + module load rocoto -When using the GSWP3 forcing option, users will need to update line 7 to say ``#SBATCH --cpus-per-task=4``. Users can perform this change manually in a code editor or run: +The ``setup_container.sh`` script creates the ``land_analysis.yaml`` from the ``land_analysis_singularity.yaml`` file. Update any relevant variables in this file (e.g. ``ACCOUNT`` or ``cycledef/spec``) before creating the Rocoto XML file. .. code-block:: console - sed -i 's/--cpus-per-task=1/--cpus-per-task=4/g' submit_cycle.sh + cd $LANDDAROOT/land-DA_workflow/parm + vi land_analysis.yaml Save and close the file. -Modify Experiment Settings ---------------------------- - -The Land DA System uses a script-based workflow that is launched using the ``do_submit_cycle.sh`` script. That script requires an input file that details all the specifics of a given experiment. EPIC has provided two sample ``settings_*`` files as examples: ``settings_DA_cycle_era5`` and ``settings_DA_cycle_gswp3``. - -.. attention:: - - Note that the GSWP3 option will only run as-is on Hera and Orion. Users on other systems may need to make significant changes to configuration files, which is not a supported option for the |latestr| release. It is recommended that users on other systems use the UFS land driver ERA5 sample experiment set in ``settings_DA_cycle_era5``. - -First, update the ``$BASELINE`` environment variable in the selected ``settings_DA_*`` file to say ``singularity.internal`` instead of ``hera.internal``: +Once everything looks good, run the uwtools script to create the Rocoto XML file: .. code-block:: console - export BASELINE=singularity.internal + ../sorc/conda/envs/land_da/bin/uw rocoto realize --input-file land_analysis.yaml --output-file land_analysis.xml -When using the GSWP3 forcing option, users must also update the ``MACHINE_ID`` to ``orion`` in ``settings_DA_cycle_gswp3`` if running on Orion. +A successful run of this command will output a “0 errors found” message. .. _RunExptC: @@ -334,107 +282,33 @@ To start the experiment, run: .. code-block:: console - ./do_submit_cycle.sh settings_DA_cycle_era5 + rocotorun -w land_analysis.xml -d land_analysis.db -The ``do_submit_cycle.sh`` script will read the ``settings_DA_cycle_*`` file and the ``release.environment`` file, which contain sensible experiment default values to simplify the process of running the workflow for the first time. Advanced users will wish to modify the parameters in ``do_submit_cycle.sh`` to fit their particular needs. After reading the defaults and other variables from the settings files, ``do_submit_cycle.sh`` creates a working directory (named ``workdir`` by default) and an output directory called ``landda_expts`` in the parent directory of ``land-DA_workflow`` and then submits a job (``submit_cycle.sh``) to the queue that will run through the workflow. If all succeeds, users will see ``log`` and ``err`` files created in ``land-DA_workflow`` along with a ``cycle.log`` file, which will show where the cycle has ended. +See the :ref:`Workflow Overview ` section to learn more about the workflow process. -.. _CheckProgress: +.. _TrackProgress: -Check Progress +Track Progress ---------------- -To check on the experiment status, users on a system with a Slurm job scheduler may run: +To check on the job status, users on a system with a Slurm job scheduler may run: .. code-block:: console squeue -u $USER -To view progress, users can open the ``log*`` and ``err*`` files once they have been generated: - -.. code-block:: console - - tail -f log* err* - -Users will need to type ``Ctrl+C`` to exit the files. For examples of what the log and error files should look like in a successful experiment, reference :ref:`ERA5 Experiment Logs ` or :ref:`GSWP3 Experiment Logs ` below. - -.. attention:: - - If the log file contains a NetCDF error (e.g., ``ModuleNotFoundError: No module named 'netCDF4'``), run: - - .. code-block:: console - - python -m pip install netCDF4 - - Then, resubmit the job (``sbatch submit_cycle.sh``). - -Next, check for the background and analysis files in the test directory. - -.. code-block:: console - - ls -l ../landda_expts/DA__test/mem000/restarts/`` - -where: - - * ```` is either ``era5`` or ``gswp3``, and - * ```` is either ``vector`` or ``tile`` depending on whether ERA5 or GSWP3 forcing data were used, respectively. - -The experiment should populate the ``landda_expts`` directory with data in the following locations: - -.. code-block:: console - - landda_expts/DA_GHCN_test/DA/ - # AND - landda_expts/DA_GHCN_test/mem000/restarts/vector/ - # OR - landda_expts/DA_GHCN_test/mem000/restarts/tile/ - -Depending on the experiment, either the ``vector`` or the ``tile`` directory will have data, but not both. - - -.. _era5-log-output: - -ERA5 Experiment Logs -===================== - -For the ERA5 experiment, the ``log*`` file for a successful experiment will contain a message like: +To view the experiment status, run: .. code-block:: console - Creating: .//ufs_land_restart.2019-12-22_00-00-00.nc - Searching for forcing at time: 2019-12-22 01:00:00 - -The ``err*`` file for a successful experiment will end with something similar to: + rocotostat -w land_analysis.xml -d land_analysis.db -.. code-block:: console +See the :ref:`Track Experiment Status ` section to learn more about the ``rocotostat`` output. - + THISDATE=2019122200 - + date_count=1 - + '[' 1 -lt 1 ']' - + '[' 2019122200 -lt 2019122200 ']' +.. _CheckExptOutput: -.. _gswp3-log-output: +Check Experiment Output +------------------------- -GSWP3 Experiment Logs -======================= - -For the GSWP3 experiment, the ``log*`` file for a successful experiment will end with a list of resource statistics. For example: - -.. code-block:: console - - Number of times filesystem performed OUTPUT = 250544 - Number of Voluntary Context Switches = 3252 - Number of InVoluntary Context Switches = 183 - *****************END OF RESOURCE STATISTICS************************* - -The ``err*`` file for a successful experiment will end with something similar to: - -.. code-block:: console +Since this experiment in the container is the same experiment explained in the previous document section, it is suggested that users should see the :ref:`experiment output structure ` as well as the :ref:`plotting results ` to learn more about the expected experiment outputs. - + echo 'do_landDA: calling apply snow increment' - + [[ '' =~ hera\.internal ]] - + /apps/intel-2022.1.2/intel-2022.1.2/mpi/2021.5.1/bin/mpiexec -n 6 /path/to/land-DA_workflow/build/bin/apply_incr.exe /path/to/landda_expts/DA_GSWP3_test/DA/logs//apply_incr.log - + [[ 0 != 0 ]] - + '[' YES == YES ']' - + '[' YES == YES ']' - + cp /path/to/workdir/mem000/jedi/20000103.000000.xainc.sfc_data.tile1.nc /path/to/workdir/mem000/jedi/20000103.000000.xainc.sfc_data.tile2.nc /path/to/workdir/mem000/jedi/20000103.000000.xainc.sfc_data.tile3.nc /path/to/workdir/mem000/jedi/20000103.000000.xainc.sfc_data.tile4.nc /path/to/workdir/mem000/jedi/20000103.000000.xainc.sfc_data.tile5.nc /path/to/workdir/mem000/jedi/20000103.000000.xainc.sfc_data.tile6.nc /path/to/landda_expts/DA_GSWP3_test/DA/jedi_incr/ - + [[ YES == \N\O ]] From 33d88e612e79d26442399f7caca374d698f82afe Mon Sep 17 00:00:00 2001 From: EdwardSnyder-NOAA Date: Tue, 8 Oct 2024 16:52:17 +0000 Subject: [PATCH 14/16] update container name to v2.0.0. release --- .../BuildingRunningTesting/Container.rst | 20 +++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/doc/source/BuildingRunningTesting/Container.rst b/doc/source/BuildingRunningTesting/Container.rst index b06b5894..c6d4ba41 100644 --- a/doc/source/BuildingRunningTesting/Container.rst +++ b/doc/source/BuildingRunningTesting/Container.rst @@ -90,7 +90,7 @@ where ``/path/to/landda`` is the path to this top-level directory (e.g., ``/User NOAA RDHPCS Systems ---------------------- -On many NOAA :term:`RDHPCS`, a container named ``ubuntu22.04-intel-ue-1.6.0-landda-dev.img`` has already been built, and users may access the container at the locations in :numref:`Table %s `. +On many NOAA :term:`RDHPCS`, a container named ``ubuntu22.04-intel-landda-release-public-v2.0.0.img`` has already been built, and users may access the container at the locations in :numref:`Table %s `. .. _PreBuiltContainers: @@ -116,30 +116,30 @@ Users can simply set an environment variable to point to the container: .. code-block:: console - export img=path/to/ubuntu22.04-intel-ue-1.6.0-landda-dev.img + export img=path/to/ubuntu22.04-intel-landda-release-public-v2.0.0.img If users prefer, they may copy the container to their local working directory. For example, on Jet: .. code-block:: console - cp /mnt/lfs4/HFIP/hfv3gfs/role.epic/containers/ubuntu22.04-intel-ue-1.6.0-landda-dev.img . + cp /mnt/lfs4/HFIP/hfv3gfs/role.epic/containers/ubuntu22.04-intel-landda-release-public-v2.0.0.img . Other Systems ---------------- -On other systems, users can build the Singularity container from a public Docker :term:`container` image or download the ``ubuntu22.04-intel-ue-1.6.0-landda-dev.img`` container from the `Land DA Data Bucket `_. Downloading may be faster depending on the download speed on the user's system. However, the container in the data bucket is the ``release/v1.2.0`` container rather than the updated ``develop`` branch container. +On other systems, users can build the Singularity container from a public Docker :term:`container` image or download the ``ubuntu22.04-intel-landda-release-public-v2.0.0.img`` container from the `Land DA Data Bucket `_. Downloading may be faster depending on the download speed on the user's system. However, the container in the data bucket is the ``release/v2.0.0`` container rather than the updated ``develop`` branch container. To download from the data bucket, users can run: .. code-block:: console - wget https://noaa-ufs-land-da-pds.s3.amazonaws.com/current_land_da_release_data/v1.2.0/ubuntu20.04-intel-landda-release-public-v1.2.0.img + wget https://noaa-ufs-land-da-pds.s3.amazonaws.com/current_land_da_release_data/v2.0.0/ubuntu22.04-intel-landda-release-public-v2.0.0.img To build the container from a Docker image, users can run: .. code-block:: console - singularity build --force ubuntu22.04-intel-ue-1.6.0-landda-dev.img docker://noaaepic/ubuntu22.04-intel21.10-landda:ue160-dev + singularity build --force ubuntu22.04-intel-landda-release-public-v2.0.0.img docker://noaaepic/ubuntu22.04-intel21.10-landda:ue160-fms2024.01-release This process may take several hours depending on the system. @@ -190,20 +190,20 @@ Save the location of the container in an environment variable. .. code-block:: console - export img=/path/to/ubuntu22.04-intel-ue-1.6.0-landda-dev.img + export img=/path/to/ubuntu22.04-intel-landda-release-public-v2.0.0.img Users may convert a container ``.img`` file to a writable sandbox. This step is optional on most systems: .. code-block:: console - singularity build --sandbox ubuntu22.04-intel-ue-1.6.0-landda-dev $img + singularity build --sandbox ubuntu22.04-intel-landda-release-public-v2.0.0 $img When making a writable sandbox on NOAA :term:`RDHPCS`, the following warnings commonly appear and can be ignored: .. code-block:: console INFO: Starting build... - INFO: Verifying bootstrap image ubuntu20.04-intel-landda-release-public-v1.2.0.img + INFO: Verifying bootstrap image ubuntu22.04-intel-landda-release-public-v2.0.0.img WARNING: integrity: signature not found for object group 1 WARNING: Bootstrap image could not be verified, but build will continue. @@ -239,7 +239,7 @@ where: * ``-c`` is the compiler on the user's local machine (e.g., ``intel/2022.1.2``) * ``-m`` is the :term:`MPI` on the user's local machine (e.g., ``impi/2022.1.2``) - * ``-i`` is the full path to the container image ( e.g., ``$LANDDAROOT/ubuntu22.04-intel-ue-1.6.0-landda-dev.img``). + * ``-i`` is the full path to the container image ( e.g., ``$LANDDAROOT/ubuntu22.04-intel-landda-release-public-v2.0.0.img``). When using a Singularity container, Intel compilers and Intel :term:`MPI` (preferably 2020 versions or newer) need to be available on the host system to properly launch MPI jobs. Generally, this is accomplished by loading a module with a recent Intel compiler and then loading the corresponding Intel MPI. From a062e3587d12ffccc1b74e4e3b2a0a42763e2af6 Mon Sep 17 00:00:00 2001 From: EdwardSnyder-NOAA Date: Wed, 9 Oct 2024 12:39:27 +0000 Subject: [PATCH 15/16] updated container info --- doc/source/BackgroundInfo/TechnicalOverview.rst | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/doc/source/BackgroundInfo/TechnicalOverview.rst b/doc/source/BackgroundInfo/TechnicalOverview.rst index 742df050..a23ffa76 100644 --- a/doc/source/BackgroundInfo/TechnicalOverview.rst +++ b/doc/source/BackgroundInfo/TechnicalOverview.rst @@ -83,9 +83,9 @@ Preconfigured (Level 1) systems for Land DA already have the required external l - /work/noaa/epic/role-epic/spack-stack/hercules/spack-stack-1.6.0/envs/unified-env/install/modulefiles/Core - /work2/noaa/epic/UFS_Land-DA_Dev/jedi_v7_hercules * - Container - - intel-oneapi-compilers/2021.8.0 - - intel-oneapi-mpi/2021.8.0 - - /opt/spack-stack/ (inside the container) + - intel-oneapi-compilers/2021.10.0 + - intel-oneapi-mpi/2021.9.0 + - /opt/spack-stack/spack-stack-1.6.0/envs/unified-env/install/modulefiles/Core (inside the container) - /opt/jedi-bundle (inside the container) Level 2-4 Systems @@ -215,4 +215,4 @@ Unlike the standalone Noah-MP land driver, the Noah-MP :term:`NUOPC cap` is able Unified Workflow (UW) Tools ============================ -The Unified Workflow (UW) is a set of tools intended to unify the workflow for various UFS applications under one framework. The UW toolkit currently includes rocoto, template, and configuration (config) tools, which are being incorporated into the Land DA workflow. Additional tools are under development. More details about UW tools can be found in the `uwtools `_ GitHub repository and in the :uw:`UW Documentation <>`. \ No newline at end of file +The Unified Workflow (UW) is a set of tools intended to unify the workflow for various UFS applications under one framework. The UW toolkit currently includes rocoto, template, and configuration (config) tools, which are being incorporated into the Land DA workflow. Additional tools are under development. More details about UW tools can be found in the `uwtools `_ GitHub repository and in the :uw:`UW Documentation <>`. From 120c1067131ded3161a936165c03e35f21b9b93f Mon Sep 17 00:00:00 2001 From: EdwardSnyder-NOAA Date: Thu, 17 Oct 2024 19:09:28 +0000 Subject: [PATCH 16/16] added changes to work with the new spack-stack --- sorc/test/ci/Dockerfile | 27 ++++++++++++++++++--------- 1 file changed, 18 insertions(+), 9 deletions(-) diff --git a/sorc/test/ci/Dockerfile b/sorc/test/ci/Dockerfile index 793ab2ad..9aabcc0d 100644 --- a/sorc/test/ci/Dockerfile +++ b/sorc/test/ci/Dockerfile @@ -1,20 +1,29 @@ -From noaaepic/ubuntu20.04-intel-landda:develop +From noaaepic/ubuntu22.04-intel21.10-landda:ue160-dev CMD ["/bin/bash"] ENV HOME=/opt WORKDIR $HOME -#remove org land-offline_workflow -RUN rm -rf $HOME/land-offline_workflow -COPY . $HOME/land-offline_workflow +# Get Land DA data +RUN wget https://noaa-ufs-land-da-pds.s3.amazonaws.com/develop-20240809/inputs.tar.gz && \ + tar -xvzf inputs.tar.gz && mv inputs/* /opt/land-DA_workflow/fix/ # set env vars ENV FIXlandda=$HOME/land-DA_workflow/fix -ENV EPICHOME=/opt -ENV JEDI_INSTALL=${EPICHOME}/jedi_skylabv7.0 -ENV TEST_BASEDIR=${EPICHOME}/test_base/restarts/vector"} +ENV JEDI_INSTALL=$HOME +ENV FIXdir=/opt/land-DA_workflow/fix +ENV JEDI_EXECDIR=/opt/jedi-bundle/install/bin + +# Fix UFS WM RT File paths +RUN ln -s /opt/land-DA_workflow/install/bin/ufs_model /opt/land-DA_workflow/build/ufs_model.fd/src/ufs_model.fd-build/ +RUN sed -i '18 i PLATFORM=jet' /opt/land-DA_workflow/sorc/test/run_ufs_datm_lnd.sh +RUN mv /opt/land-DA_workflow/fix/DATM_input_data /opt/land-DA_workflow/fix/DATM_GSWP3_input_data && \ + ln -s /opt/land-DA_workflow/fix/DATM_GSWP3_input_data/gswp3/* /opt/land-DA_workflow/fix/DATM_GSWP3_input_data/ +RUN mkdir -p /opt/land-DA_workflow/fix/FV3_input_data/INPUT && \ + ln -s /opt/land-DA_workflow/fix/FV3_fix_tiled/C96/* /opt/land-DA_workflow/fix/FV3_input_data/INPUT #build & unit testing -WORKDIR $HOME/land-offline_workflow -RUN source /opt/spack-stack/.bashenv; mkdir build; cd build; pwd; ecbuild ..; make -j2; ctest -V --stop-on-failure +WORKDIR $HOME/land-DA_workflow +#RUN source /opt/spack-stack/.bashenv; mkdir build; cd build; pwd; ecbuild ..; make -j2; ctest -V --stop-on-failure +RUN source /opt/spack-stack/spack-stack-1.6.0/.bashenv; cd build; pwd; ctest -V --stop-on-failure