diff --git a/configs/make.lassen b/configs/make.lassen new file mode 100644 index 00000000..1039820b --- /dev/null +++ b/configs/make.lassen @@ -0,0 +1,9 @@ +# # Modules loaded +# 1) StdEnv (S) 2) cuda/11.7.0 3) fftw/3.3.9 4) xl/2022.08.19 5) spectrum-mpi/rolling-release 6) lapack/3.8.0-gcc-4.9.3 +# +etree=no +SW4ROOT=/usr/apps/wpp +FC=mpifort +CXX=mpicxx +#EXTRA_LINK_FLAGS = -L/usr/tce/packages/xl/xl-2019.02.07/xlf/16.1.1/lib -L/usr/tcetmp/packages/lapack/lapack-3.8.0-gcc-4.9.3/lib -llapack -lblas -lxlfmath -lxlf90 -lgfortran +EXTRA_LINK_FLAGS = -L/usr/tce/packages/xl/xl-2022.08.19/xlf/16.1.1/lib -L$(LAPACK_DIR) -llapack -lblas -lxlfmath -lxlf90 -lgfortran diff --git a/doc/SW4-Installation.pdf b/doc/SW4-Installation.pdf index 4a49c2df..32b3cdf1 100644 Binary files a/doc/SW4-Installation.pdf and b/doc/SW4-Installation.pdf differ diff --git a/doc/SW4-Installation.tex b/doc/SW4-Installation.tex index 1082cc06..c64d1c43 100644 --- a/doc/SW4-Installation.tex +++ b/doc/SW4-Installation.tex @@ -341,7 +341,7 @@ \subsubsection{How do I setup the {\tt make.inc} file?} the C++ compiler, Fortran compiler, or linker, on your system. \subsection{Building \emph{SW4} with PROJ, HDF5, and ZFP support} -The PROJ libraray enables the more advanced geographical mapping keywords in the {\tt grid} command and is also required by the {\tt rfile, sfile} and {\tt gmg} commands. To enable the {\tt sfile}, {\tt ssioutput}, and {\tt gmg} commands, you have to also install the {\tt HDF5} library. To use ZFP compression for the {\tt ssioutput} command, {\tt ZFP} and {\tt H5Z-ZFP} are required. See Section \ref{sec:proj} for installing these libraries. +The PROJ library enables the more advanced geographical mapping keywords in the {\tt grid} command and is also required by the {\tt rfile, sfile} and {\tt gmg} commands. To enable the {\tt sfile}, {\tt ssioutput}, and {\tt gmg} commands, you have to also install the {\tt HDF5} library. To use ZFP compression for the {\tt ssioutput} command, {\tt ZFP} and {\tt H5Z-ZFP} are required. See Section \ref{sec:proj} for installing these libraries. Once you have successfully installed the PROJ, and optionally the HDF5 and ZFP libraries, it should be easy to re-configure \emph{SW4} to use them. Simply edit your configuration file (\verb+make.inc+) by adding the following lines to the top of the file, setting the {\tt proj}, {\tt hdf5}, and {\tt zfp} keywords to {\tt yes} or {\tt no}, as appropriate. @@ -366,7 +366,7 @@ \subsection{Building \emph{SW4} with PROJ, HDF5, and ZFP support} \verb+debug+ directories. \subsection{Testing the \emph{SW4} installation} -The \emph{SW4} source code distribution includes a python(3) script for running several tests and +The \emph{SW4} source code distribution includes a Python (3) script for running several tests and checking the solutions against previously verified results. Note that the same set of tests can be performed when \emph{SW4} is built with CMake, see Section~\ref{cha:ctest-sw4}. @@ -438,7 +438,7 @@ \section{Installing \emph{SW4} with CMake}\label{cha:installing-cmake-sw4} The two dots after {\tt cmake [options]} are essential and instructs it to look in the parent directory for the {\tt CMakeLists.txt} file. -The \verb+cmake+ command searches for the necessary libraries and other dependencies, then creates +The \verb+cmake+ command searches for the necessary libraries and other dependencies then creates makefiles that are appropriate for your system. You then run \verb+make+ to compiles and link \emph{SW4} using these makefiles. For details about the exact commands being used in compilation, run \texttt{make VERBOSE=1}. Once SW4 has been successfully built, you will see the @@ -534,7 +534,7 @@ \subsection{CMake Options} Sometimes CMake doesn't pick up the correct compiler. Say, for example that the C++ compiler on your system is called {\tt mpicxx} and the Fortran compiler is {\tt mpiifort}. You can tell {\tt cmake} -to use those compilers by setting the following envoronment variables {\em before} running cmake +to use those compilers by setting the following environment variables {\em before} running cmake (assuming a {\tt csh} shell), \begin{verbatim} > setenv CXX mpicxx @@ -542,7 +542,7 @@ \subsection{CMake Options} \end{verbatim} \subsection{CTest}\label{cha:ctest-sw4} -The \emph{SW4} CMake configuration includes several test cases that are used to verify the correctness +The \emph{SW4} CMake configuration includes several test cases used to verify the correctness of the \emph{SW4} installation. Each test consists of two parts. First it runs a case using an input file in the \verb+pytest+ directory. Secondly, it checks that the results are within a reasonable error tolerance from previously recorded results. @@ -563,7 +563,7 @@ \subsection{CTest}\label{cha:ctest-sw4} Start 24: Check_Result_pointsource/pointsource-sg-1 24/24 Test #24: Check_Result_pointsource/pointsource-sg-1 ... Passed 0.03 sec -100\% tests passed, 0 tests failed out of 24 +100% tests passed, 0 tests failed out of 24 Total Test time (real) = 230.91 sec \end{verbatim} @@ -626,9 +626,9 @@ \subsection{PROJ} > cd proj-x.x.x > mkdir build > cd build - > cmake -DBUILD_APPS=OFF -DCMAKE_INSTALL_PREFIX=${SW4ROOT} --DSQLITE3_INCLUDE_DIR=${SW4ROOT}/include --DSQLITE3_LIBRARY=${SW4ROOT}/lib/libsqlite3.so .. + > cmake -DBUILD_APPS=OFF -DCMAKE_INSTALL_PREFIX=${SW4ROOT} \ + -DSQLITE3_INCLUDE_DIR=${SW4ROOT}/include \ + -DSQLITE3_LIBRARY=${SW4ROOT}/lib/libsqlite3.so .. # Note that the two -DSQLITE3 flags are needed if you compiled SQLite yourself. > make @@ -643,13 +643,13 @@ \subsection{HDF5} > cd hdf5-1.xx.x > mkdir build > cd build - > cmake -DHDF5_ENABLE_PARALLEL=ON -DCMAKE_INSTALL_PREFIX=${SW4ROOT} + > cmake -DHDF5_ENABLE_PARALLEL=ON -DCMAKE_INSTALL_PREFIX=${SW4ROOT} .. > make > make install \end{verbatim} \subsection{ZFP and H5Z-ZFP} -We recommend to use use ZFP and H5Z-ZFP's latest stable release version. +We recommend using ZFP and H5Z-ZFP's latest stable release version. Installing ZFP can be done with the following steps: \begin{verbatim} diff --git a/doc/SW4-UsersGuide.tex b/doc/SW4-UsersGuide.tex index 8ac2931e..fe677059 100644 --- a/doc/SW4-UsersGuide.tex +++ b/doc/SW4-UsersGuide.tex @@ -2519,14 +2519,14 @@ \section{Checkpoint and Restart the checkpoint command}\label{sec:checkpoint} To use the HDF5 format and optionally with compression: \begin{verbatim} checkpoint cycleInterval=80000 restartpath=output file=HFCheck \ - hdf5=yes zfp_accuracy=1e-5 + hdf5=yes zfp-accuracy=1e-5 \end{verbatim} To restart from a previously written checkpoint file, add the \verb+restartfile+ key word and the checkpoint file name to the \verb+checkpoint+ command: \begin{verbatim} checkpoint cycleInterval=80000 restartpath=output file=HFCheck \ restartfile=HFCheck.cycle=160000.sw4checkpoint \ - hdf5=yes zfp_accuracy=1e-5 + hdf5=yes zfp-accuracy=1e-5 \end{verbatim} Note that when using the \verb+checkpoint+ command in combination with the \verb+rec+, \verb+rechdf5+, and \verb+ssioutput+ commands, it is advisable to align their write intervals, such that the latest time-series data are also written at the time writing a checkpoint file. For example, a user may set \verb+writeEvery=1000+ (default) in the \verb+rechdf5+ command, and set \verb+dumpInterval=400+ in the \verb+ssioutput+ command, and set \verb+cycleInterval=40000+, as 40000 is divisible by both 1000 and 4000. @@ -4214,7 +4214,7 @@ \subsection{ssioutput [optional]} The option {\bf file=...} has the same meaning as the corresponding parameters in the image command. The \verb+ssioutput+ command produces files with extension \verb+.ssi+. -To reduce the output size, we have enabled the support to utilize ZFP \url{https://github.com/LLNL/zfp} and SZ \url{https://github.com/szcompressor/SZ} lossy compression. They can reduce the output size without significant precision loss. We found setting \verb+zfp_accuracy=0.01+ can reduce the output size by a factor of 40. Note ZFP and H5Z-ZFP must be installed and linked with SW4 to use this option, see the Installation Guide for detailed installation instructions. +To reduce the output size, we have enabled the support to utilize ZFP \url{https://github.com/LLNL/zfp} and SZ \url{https://github.com/szcompressor/SZ} lossy compression. They can reduce the output size without significant precision loss. We found setting \verb+zfp-accuracy=0.01+ can reduce the output size by a factor of 40. Note ZFP and H5Z-ZFP must be installed and linked with SW4 to use this option, see the Installation Guide for detailed installation instructions. The option {\bf dumpInterval=...} affects the maximum @@ -4251,10 +4251,10 @@ \subsection{ssioutput [optional]} ymin & starting $y$ location of requested SSI region & real & m & 0 \\ \hline ymax & ending $y$ location of requested SSI region & real & m & $y_{max}$ \\ \hline depth & approx depth of output over requested SSI region & real & m & 0 \\ \hline -zfp\_accuracy & use ZFP lossy compression accuracy mode & float & none & N/A \\ \hline -zfp\_precision & use ZFP lossy compression precision mode & float &none & N/A \\ \hline -zfp\_rate & use ZFP lossy compression rate mode & float &none & N/A \\ \hline -zfp\_reversible & use ZFP lossless compression mode & int & none & 0 \\ \hline +zfp-accuracy & use ZFP lossy compression accuracy mode & float & none & N/A \\ \hline +zfp-precision & use ZFP lossy compression precision mode & float &none & N/A \\ \hline +zfp-rate & use ZFP lossy compression rate mode & float &none & N/A \\ \hline +zfp-reversible & use ZFP lossless compression mode & int & none & 0 \\ \hline \end{tabular} \end{center} @@ -4326,9 +4326,9 @@ \subsection{checkpoint [optional]} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \begin{flushleft} \bf Syntax:\\ \tt -checkpoint cycleInterval=... file=... restartpath=... \\ zfp\_{accuracy/precision/rate/reversible}=...\\ +checkpoint cycleInterval=... file=... restartpath=... \\ zfp-{accuracy/precision/rate/reversible}=...\\ checkpoint cycleInterval=... file=... restartpath=... restartfile=... \\ -zfp\_{accuracy/precision/rate/reversible}=...\\ +zfp-{accuracy/precision/rate/reversible}=...\\ \bf Required parameter:\\ \rm Time for output: (time, timeInterval, cycle, or cycleInterval).\\ \bf Notes:\\ @@ -4384,10 +4384,10 @@ \subsection{checkpoint [optional]} restartfile & restart the code from this file & string & ``restart'' \\ \hline restartpath & path to restart file & string & N/A \\ \hline hdf5 & use HDF5 output format & string & ``no'' \\ \hline -zfp\_accuracy & use ZFP lossy compression accuracy mode & float & N/A \\ \hline -zfp\_precision & use ZFP lossy compression precision mode & float & N/A \\ \hline -zfp\_rate & use ZFP lossy compression rate mode & float & N/A \\ \hline -zfp\_reversible & use ZFP lossless compression mode & int & 0 \\ \hline +zfp-accuracy & use ZFP lossy compression accuracy mode & float & N/A \\ \hline +zfp-precision & use ZFP lossy compression precision mode & float & N/A \\ \hline +zfp-rate & use ZFP lossy compression rate mode & float & N/A \\ \hline +zfp-reversible & use ZFP lossless compression mode & int & 0 \\ \hline \end{tabular} \end{center} diff --git a/doc/SW4_UsersGuide.pdf b/doc/SW4_UsersGuide.pdf new file mode 100644 index 00000000..e08c706c Binary files /dev/null and b/doc/SW4_UsersGuide.pdf differ diff --git a/pytest-sw4mopt/test_sw4mopt.py b/pytest-sw4mopt/test_sw4mopt.py index ccd810e2..e2928226 100755 --- a/pytest-sw4mopt/test_sw4mopt.py +++ b/pytest-sw4mopt/test_sw4mopt.py @@ -208,6 +208,9 @@ def guess_mpi_cmd(mpi_tasks, omp_threads, cpu_allocation, verbose): elif 'ray' in node_name: if mpi_tasks<=0: mpi_tasks = 16 mpirun_cmd="mpirun -gpu -np " + str(mpi_tasks)+" mpibind" + elif 'lassen' in node_name: + if mpi_tasks<=0: mpi_tasks = 16 + mpirun_cmd="lrun -T40 " elif 'sierra' in node_name: if mpi_tasks<=0: mpi_tasks = 16 mpirun_cmd="lrun -T16 -p" + str(mpi_tasks) diff --git a/pytest/test_sw4.py b/pytest/test_sw4.py index 8f7eaa3a..9bdb3a2b 100755 --- a/pytest/test_sw4.py +++ b/pytest/test_sw4.py @@ -160,7 +160,7 @@ def guess_mpi_cmd(mpi_tasks, omp_threads, cpu_allocation, verbose): elif 'lassen' in node_name: os.environ["PSM2_DEVICES"] = "" if mpi_tasks<=0: mpi_tasks = 4 - mpirun_cmd="lrun -T4 -M -gpu" + mpirun_cmd="lrun -T40 " # add more machine names here elif 'Linux' in sys_name: if omp_threads<=0: omp_threads=1; diff --git a/src/AllDims.C b/src/AllDims.C index 8252943a..a937fc60 100644 --- a/src/AllDims.C +++ b/src/AllDims.C @@ -104,15 +104,15 @@ AllDims::AllDims( int nprocs, int ibg, int ieg, int jbg, int jeg, ptrdiff_t fftw_alloc_local = fftw_mpi_local_size_3d( nig, njg, nkg, m_communicator, &ni, &ib ); m_fftw_alloc_local = static_cast(fftw_alloc_local); #else - int ni, ib; + ptrdiff_t ni, ib; #endif - std::vector niloc(m_nproci), ibloc(m_nproci); + std::vector niloc(m_nproci), ibloc(m_nproci); niloc[m_myid1d] = ni; ibloc[m_myid1d] = ib; - MPI_Allgather( &ni, 1, MPI_INT, &niloc[0], 1, MPI_INT, m_communicator ); - MPI_Allgather( &ib, 1, MPI_INT, &ibloc[0], 1, MPI_INT, m_communicator ); + MPI_Allgather( &ni, 1, MPI_AINT, &niloc[0], 1, MPI_AINT, m_communicator ); + MPI_Allgather( &ib, 1, MPI_AINT, &ibloc[0], 1, MPI_AINT, m_communicator ); m_ib.resize(m_nproci); m_ie.resize(m_nproci); diff --git a/src/parseInputFile.C b/src/parseInputFile.C index 438ccccd..21d2a31b 100644 --- a/src/parseInputFile.C +++ b/src/parseInputFile.C @@ -5775,16 +5775,17 @@ void EW::processSource(char* buffer, vector > & a_GlobalUniqueSo { tDep = iDiscrete6moments; fname = basename + ".xx"; - npar = 6*(npts+1); } else { tDep = iDiscrete3forces; fname = basename + ".x"; - npar = 3*(npts+1); } bool byteswap; readSACheader( fname.c_str(), dt, t0, latsac, lonsac, cmpazsac, cmpincsac, utcsac, npts, byteswap ); + npar = 3*(npts+1); + if( isMomentType ) npar = 6*(npts+1); + if( !useB ) t0 = 0;