diff --git a/test/test_bdsqr.cc b/test/test_bdsqr.cc index b3ffa96f0..cff7d90d3 100644 --- a/test/test_bdsqr.cc +++ b/test/test_bdsqr.cc @@ -38,7 +38,6 @@ void test_bdsqr_work(Params& params, bool run) lapack::Job jobvt = params.jobvt(); bool check = params.check() == 'y'; bool trace = params.trace() == 'y'; - int verbose = params.verbose(); slate::Origin origin = params.origin(); // mark non-standard output values @@ -113,6 +112,11 @@ void test_bdsqr_work(Params& params, bool run) } } + if (mpi_rank == 0) { + print_vector( "D", D, params ); + print_vector( "E", E, params ); + } + //--------- // run test if (trace) slate::trace::Trace::on(); @@ -130,6 +134,17 @@ void test_bdsqr_work(Params& params, bool run) if (trace) slate::trace::Trace::finish(); + if (mpi_rank == 0) { + print_vector( "D_out ", D, params ); + } + // todo: print crashes if U or VT is empty. + if (jobu != slate::Job::NoVec) { + print_matrix( "U_out", U, params ); + } + if (jobvt != slate::Job::NoVec) { + print_matrix( "VT_out", VT, params ); + } + if (check) { //================================================== // Test results @@ -147,17 +162,8 @@ void test_bdsqr_work(Params& params, bool run) params.ref_time() = barrier_get_wtime(MPI_COMM_WORLD) - time; - if (verbose) { - // Print first 20 and last 20 rows. - printf( "%9s %9s\n", "D", "Dref" ); - for (int64_t i = 0; i < n; ++i) { - if (i < 20 || i > n-20) { - bool okay = std::abs( D[i] - Dref[i] ) < tol; - printf( "%9.6f %9.6f%s\n", - D[i], Dref[i], (okay ? "" : " !!") ); - } - } - printf( "\n" ); + if (mpi_rank == 0) { + print_vector( "Dref_out", Dref, params ); } // Relative forward error: || D - Dref || / || Dref ||. diff --git a/test/test_gbmm.cc b/test/test_gbmm.cc index d8473ab2d..a50484cb3 100644 --- a/test/test_gbmm.cc +++ b/test/test_gbmm.cc @@ -192,7 +192,7 @@ void test_gbmm_work(Params& params, bool run) else if (transA == slate::Op::ConjTrans) A = conj_transpose( A ); - print_matrix( "Cref_in", Cref, params ); + print_matrix( "Cref", Cref, params ); // Get norms of the original data. real_t A_norm = slate::norm( norm, A ); @@ -208,7 +208,7 @@ void test_gbmm_work(Params& params, bool run) time = barrier_get_wtime(MPI_COMM_WORLD) - time; - print_matrix( "Cref", Cref, params ); + print_matrix( "Cref_out", Cref, params ); // get differences Cref = Cref - C slate::add( -one, C, one, Cref ); diff --git a/test/test_gbsv.cc b/test/test_gbsv.cc index fbabac696..209baa1bf 100644 --- a/test/test_gbsv.cc +++ b/test/test_gbsv.cc @@ -233,8 +233,8 @@ void test_gbsv_work(Params& params, bool run) } printf( "] + 1;\n" ); } - print_matrix("A2", A, params); - print_matrix("B2", B, params); + print_matrix( "A_out", A, params ); + print_matrix( "B_out", B, params ); } if (check) { diff --git a/test/test_gels.cc b/test/test_gels.cc index 1df46174d..102aa22e3 100644 --- a/test/test_gels.cc +++ b/test/test_gels.cc @@ -262,8 +262,8 @@ void test_gels_work(Params& params, bool run) params.time4() = slate::timers[ "gels_cholqr::trsm" ]; } - print_matrix( "A2", A, params ); - print_matrix( "BX2", BX, params ); + print_matrix( "A_out", A, params ); + print_matrix( "BX_out", BX, params ); } if (check) { diff --git a/test/test_gemm.cc b/test/test_gemm.cc index da31e33cf..86d827e3b 100644 --- a/test/test_gemm.cc +++ b/test/test_gemm.cc @@ -51,7 +51,6 @@ void test_gemm_work(Params& params, bool run) bool ref = params.ref() == 'y' || ref_only; bool check = params.check() == 'y' && ! ref_only; bool trace = params.trace() == 'y'; - int verbose = params.verbose(); slate::Target target = params.target(); slate::Origin origin = params.origin(); slate::MethodGemm method_gemm = params.method_gemm(); @@ -183,14 +182,11 @@ void test_gemm_work(Params& params, bool run) if (trace) slate::trace::Trace::finish(); - if (verbose >= 2) { - C.tileGetAllForReading( slate::HostNum, slate::LayoutConvert::None ); - print_matrix( "C_out", C, params ); - } - // compute and save timing/performance params.time() = time; params.gflops() = gflop / time; + + print_matrix( "C_out", C, params ); } if (check && ! ref) { diff --git a/test/test_geqrf.cc b/test/test_geqrf.cc index bf30773a9..990d876f3 100644 --- a/test/test_geqrf.cc +++ b/test/test_geqrf.cc @@ -97,7 +97,7 @@ void test_geqrf_work(Params& params, bool run) slate::TriangularFactors T; - print_matrix("A", A, params); + print_matrix( "A", A, params ); // For checks, keep copy of original matrix A. slate::Matrix Aref; diff --git a/test/test_gesv.cc b/test/test_gesv.cc index 21befa35f..59704427f 100644 --- a/test/test_gesv.cc +++ b/test/test_gesv.cc @@ -317,8 +317,10 @@ void test_gesv_work(Params& params, bool run) llong( info ), params.matrix.cond_actual() ); params.msg() = buf; } + + print_matrix( "B_out", B, params ); + print_matrix( "X_out", X, params ); } - print_matrix( "X_out", X, params ); if (info != 0 || std::isinf( params.matrix.cond_actual() )) { // info != 0 if and only if cond == inf (singular matrix). diff --git a/test/test_getri.cc b/test/test_getri.cc index 3693dcba1..f32390f49 100644 --- a/test/test_getri.cc +++ b/test/test_getri.cc @@ -8,6 +8,7 @@ #include "blas/flops.hh" #include "lapack/flops.hh" #include "grid_utils.hh" +#include "print_matrix.hh" #include "scalapack_wrappers.hh" #include "scalapack_copy.hh" @@ -96,6 +97,8 @@ void test_getri_work(Params& params, bool run) } slate::generate_matrix(params.matrix, A); + print_matrix( "A", A, params ); + // Create pivot structure to store pivots after factoring slate::Pivots pivots; @@ -183,6 +186,13 @@ void test_getri_work(Params& params, bool run) // compute and save timing/performance params.time() = time; params.gflops() = gflop / time; + + if (params.routine == "getriOOP") { + print_matrix( "A_inv = C_out", C, params ); + } + else { + print_matrix( "A_inv = A_out", A, params ); + } } if (info != 0 || std::isinf( params.matrix.cond_actual() )) { diff --git a/test/test_hb2st.cc b/test/test_hb2st.cc index 607250fe1..3bd46ce3c 100644 --- a/test/test_hb2st.cc +++ b/test/test_hb2st.cc @@ -207,27 +207,18 @@ void test_hb2st_work(Params& params, bool run) } } - print_matrix("D", 1, n, &Lambda2[0], 1, params); - print_matrix("E", 1, n-1, &E[0], 1, params); + if (mpi_rank == 0) { + print_vector( "D", Lambda2, params ); + print_vector( "E", E, params ); + } info = lapack::steqr(lapack::Job::NoVec, n, &Lambda2[0], &E[0], dummy, 1); assert(info == 0); - if (verbose) { - printf( "%% first and last 20 rows of Lambda1 and Lambda2\n" ); - printf( "%9s %9s\n", "Lambda1", "Lambda2" ); - for (int64_t i = 0; i < n; ++i) { - if (i < 20 || i >= n-20) { - bool okay = std::abs( Lambda1[i] - Lambda2[i] ) < tol; - printf( "%9.6f %9.6f%s\n", - Lambda1[i], Lambda2[i], (okay ? "" : " !!") ); - } - else if (i == 20) { - printf( "--------------------\n" ); - } - } - printf( "\n" ); + if (mpi_rank == 0) { + print_vector( "Lambda1", Lambda1, params ); + print_vector( "Lambda2", Lambda2, params ); } // Relative forward error: || L - Lref || / || Lref ||. diff --git a/test/test_hbmm.cc b/test/test_hbmm.cc index 552fe80ad..59d0a04b8 100644 --- a/test/test_hbmm.cc +++ b/test/test_hbmm.cc @@ -170,7 +170,7 @@ void test_hbmm_work(Params& params, bool run) params.time() = time; params.gflops() = gflop / time; - print_matrix("C2", C, params); + print_matrix( "C_out", C, params ); if (check || ref) { //================================================== diff --git a/test/test_hegst.cc b/test/test_hegst.cc index 3e493ed01..87a02c2ba 100644 --- a/test/test_hegst.cc +++ b/test/test_hegst.cc @@ -134,7 +134,7 @@ void test_hegst_work(Params& params, bool run) params.time() = time; //params.gflops() = gflop / time; - print_matrix("A_hegst", A, params); + print_matrix( "A_out", A, params ); } if (check || ref) { @@ -175,7 +175,7 @@ void test_hegst_work(Params& params, bool run) params.ref_time() = time; // params.ref_gflops() = gflop / time; - print_matrix("Aref_hegst", Aref, params); + print_matrix( "Aref_out", Aref, params ); if (! ref_only) { // Local operation: error = Aref - A diff --git a/test/test_hegv.cc b/test/test_hegv.cc index 924167be2..f919069f3 100644 --- a/test/test_hegv.cc +++ b/test/test_hegv.cc @@ -164,7 +164,6 @@ void test_hegv_work(Params& params, bool run) print_matrix("A", A, params); print_matrix("B", B, params); - print_matrix("Z", Z, params); std::vector Aref_data, Bref_data, Zref_data; std::vector Lambda_ref; @@ -245,9 +244,10 @@ void test_hegv_work(Params& params, bool run) } - print_matrix("A", A, params); - print_matrix("B", B, params); - print_matrix("Z", Z, params); + if (mpi_rank == 0) { + print_vector( "Lambda", Lambda, params ); + } + print_matrix( "Z_out", Z, params ); if (check && jobz == slate::Job::Vec) { // do error checks for the operations diff --git a/test/test_hemm.cc b/test/test_hemm.cc index 6beff4474..0947dfc77 100644 --- a/test/test_hemm.cc +++ b/test/test_hemm.cc @@ -49,7 +49,6 @@ void test_hemm_work(Params& params, bool run) bool check = params.check() == 'y'; bool ref = params.ref() == 'y'; bool trace = params.trace() == 'y'; - int verbose = params.verbose(); slate::Origin origin = params.origin(); slate::Target target = params.target(); slate::MethodHemm method_hemm = params.method_hemm(); @@ -109,6 +108,10 @@ void test_hemm_work(Params& params, bool run) slate::generate_matrix( params.matrixB, B); slate::generate_matrix( params.matrixC, C); + print_matrix( "A", A, params ); + print_matrix( "B", B, params ); + print_matrix( "C", C, params ); + // If reference run is required, record norms to be used in the check/ref. real_t A_norm=0, B_norm=0, C_orig_norm=0; if (ref) { @@ -185,11 +188,6 @@ void test_hemm_work(Params& params, bool run) time = barrier_get_wtime(MPI_COMM_WORLD) - time; - if (verbose >= 2) { - C.tileGetAllForReading( slate::HostNum, slate::LayoutConvert::None ); - print_matrix( "C_out", C, params ); - } - if (trace) slate::trace::Trace::finish(); // Compute and save timing/performance @@ -197,6 +195,8 @@ void test_hemm_work(Params& params, bool run) params.time() = time; params.gflops() = gflop / time; + print_matrix( "C_out", C, params ); + if (check && ! ref) { auto& X = X_alloc.A; auto& Y = Y_alloc.A; diff --git a/test/test_her2k.cc b/test/test_her2k.cc index 23c012993..29682f03b 100644 --- a/test/test_her2k.cc +++ b/test/test_her2k.cc @@ -128,9 +128,9 @@ void test_her2k_work(Params& params, bool run) slate_assert( opB.mt() == C.mt() ); slate_assert( opA.nt() == opB.nt() ); - print_matrix("A", A, params); - print_matrix("B", B, params); - print_matrix("Initial C", C, params); + print_matrix( "A", A, params ); + print_matrix( "B", B, params ); + print_matrix( "C", C, params ); if (trace) slate::trace::Trace::on(); else slate::trace::Trace::off(); @@ -177,7 +177,7 @@ void test_her2k_work(Params& params, bool run) time = barrier_get_wtime(MPI_COMM_WORLD) - time; - print_matrix("Cslate", C, params); + print_matrix( "C_out", C, params ); if (trace) slate::trace::Trace::finish(); @@ -243,7 +243,7 @@ void test_her2k_work(Params& params, bool run) &Cref_data[0], 1, 1, Cref_desc); time = barrier_get_wtime(MPI_COMM_WORLD) - time; - print_matrix("Cref", Cref, params); + print_matrix( "Cref_out", Cref, params ); // get differences C = C - Cref slate::add(-one, Cref, one, C); diff --git a/test/test_herk.cc b/test/test_herk.cc index 45764a67a..8050aaa12 100644 --- a/test/test_herk.cc +++ b/test/test_herk.cc @@ -6,6 +6,7 @@ #include "slate/slate.hh" #include "test.hh" #include "blas/flops.hh" +#include "print_matrix.hh" #include "matrix_utils.hh" #include "test_utils.hh" @@ -93,6 +94,9 @@ void test_herk_work(Params& params, bool run) slate::generate_matrix( params.matrix, A ); slate::generate_matrix( params.matrixC, C ); + print_matrix( "A", A, params ); + print_matrix( "C", C, params ); + // If reference run is required, record norms to be used in the check/ref. real_t A_norm=0, C_orig_norm=0; if (ref) { @@ -158,6 +162,8 @@ void test_herk_work(Params& params, bool run) params.time() = time; params.gflops() = gflop / time; + print_matrix( "C_out", C, params ); + if (check && ! ref) { auto& X = X_alloc.A; auto& Y = Y_alloc.A; diff --git a/test/test_hesv.cc b/test/test_hesv.cc index ab095ee8a..d4d241407 100644 --- a/test/test_hesv.cc +++ b/test/test_hesv.cc @@ -195,8 +195,8 @@ void test_hesv_work(Params& params, bool run) params.msg() = buf; } - print_matrix( "Aout", A, params ); - print_matrix( "Bout", B, params ); + print_matrix( "A_out", A, params ); + print_matrix( "B_out", B, params ); //--------------------- // compute and save timing/performance diff --git a/test/test_pbsv.cc b/test/test_pbsv.cc index 67ae4c813..a9ae66513 100644 --- a/test/test_pbsv.cc +++ b/test/test_pbsv.cc @@ -134,7 +134,6 @@ void test_pbsv_work(Params& params, bool run) print_matrix("A", A, params); print_matrix("B", B, params); - // if check is required, copy test data and create a descriptor for it slate::Matrix Bref; if (check || ref) { @@ -202,8 +201,8 @@ void test_pbsv_work(Params& params, bool run) // A.mpiRank(), llong( A.bandwidth( ))); //printf( "nb = %lld;\n", llong( nb ) ); } - print_matrix("A2", A, params); - print_matrix("B2", B, params); + print_matrix( "A_out", A, params ); + print_matrix( "B_out", B, params ); } if (check) { //================================================== @@ -249,7 +248,7 @@ void test_pbsv_work(Params& params, bool run) printf("Anorm = %.4e; Xnorm = %.4e; Rnorm = %.4e; error = %.4e;\n", A_norm, X_norm, R_norm, residual); } - print_matrix("Residual", Bref, params); + print_matrix( "Residual", Bref, params ); } // todo: reference solution requires setting up band matrix in ScaLAPACK's // band storage format. diff --git a/test/test_posv.cc b/test/test_posv.cc index 1b2e63f13..51cf994f4 100644 --- a/test/test_posv.cc +++ b/test/test_posv.cc @@ -291,8 +291,10 @@ void test_posv_work(Params& params, bool run) llong( info ), params.matrix.cond_actual() ); params.msg() = buf; } + + print_matrix( "B_out", B, params ); + print_matrix( "X_out", X, params ); } - print_matrix( "X_out", X, params ); if (info != 0 || std::isinf( params.matrix.cond_actual() )) { // info != 0 if and only if cond == inf (singular matrix). diff --git a/test/test_potri.cc b/test/test_potri.cc index dd9d2446d..9e109d658 100644 --- a/test/test_potri.cc +++ b/test/test_potri.cc @@ -118,6 +118,8 @@ void test_potri_work(Params& params, bool run) gflop = lapack::Gflop::potrf(n) + lapack::Gflop::potri(n); + print_matrix( "A", A, params ); + if (! ref_only) { if (trace) slate::trace::Trace::on(); @@ -144,6 +146,8 @@ void test_potri_work(Params& params, bool run) params.gflops() = gflop / time; } + print_matrix( "A_inv", A, params ); + // Test using only SLATE routines for a residual check. if (check) { //================================================== diff --git a/test/test_scale_row_col.cc b/test/test_scale_row_col.cc index a749a49f9..7e0b5f579 100644 --- a/test/test_scale_row_col.cc +++ b/test/test_scale_row_col.cc @@ -77,6 +77,10 @@ void test_scale_row_col_work( Params& params, bool run ) {slate::Option::Target, target} }; + // MPI variables + int mpi_rank; + MPI_Comm_rank( MPI_COMM_WORLD, &mpi_rank ); + auto A_alloc = allocate_test_Matrix( check || ref, false, m, n, params ); auto& Afull = A_alloc.A; @@ -109,8 +113,10 @@ void test_scale_row_col_work( Params& params, bool run ) lapack::larnv( idist, iseed, m, &R[ 0 ] ); lapack::larnv( idist, iseed, n, &C[ 0 ] ); - print_vector( "R", R, params ); - print_vector( "C", C, params ); + if (mpi_rank == 0) { + print_vector( "R", R, params ); + print_vector( "C", C, params ); + } if (! ref_only) { if (trace) slate::trace::Trace::on(); diff --git a/test/test_stedc.cc b/test/test_stedc.cc index 14452d1a1..e4a3dd9d4 100644 --- a/test/test_stedc.cc +++ b/test/test_stedc.cc @@ -124,9 +124,10 @@ void test_stedc_work( Params& params, bool run ) set( zero, one, Zref ); } - //print_matrix( "Z", Z, params ); - //print_vector( "D", D, params ); - //print_vector( "E", E, params ); + if (mpi_rank == 0) { + print_vector( "D", D, params ); + print_vector( "E", E, params ); + } if (trace) slate::trace::Trace::on(); @@ -143,6 +144,11 @@ void test_stedc_work( Params& params, bool run ) if (trace) slate::trace::Trace::finish(); + if (mpi_rank == 0) { + print_vector( "D_out ", D, params ); + } + print_matrix( "Z_out", Z, params ); + if (check) { //================================================== // Test results by checking the orthogonality of Z. @@ -272,10 +278,10 @@ void test_stedc_work( Params& params, bool run ) params.ref_time() = barrier_get_wtime( MPI_COMM_WORLD ) - time; - print_matrix( "Zout", Z, params ); - print_matrix( "Zref", Zref, params ); - //print_vector( "Dout", D, params ); - //print_vector( "Dref", Dref, params ); + if (mpi_rank == 0) { + print_vector( "Dref_out", Dref, params ); + } + print_matrix( "Zref_out", Zref, params ); // Relative forward error: || D - Dref || / || Dref || . real_t D_norm = blas::nrm2( n, &Dref[0], 1 ); diff --git a/test/test_stedc_deflate.cc b/test/test_stedc_deflate.cc index 0806a55a0..75a5b067d 100644 --- a/test/test_stedc_deflate.cc +++ b/test/test_stedc_deflate.cc @@ -247,7 +247,7 @@ void test_stedc_deflate_work( Params& params, bool run ) printf( "%%-------------------- SLATE input\n" ); printf( "n = %lld; n1 = %lld; n2 = %lld;\n", llong( n ), llong( n1 ), llong( n2 ) ); } - if (verbose >= 1 && mpi_rank == 0) { + if (mpi_rank == 0) { print_vector( "D", D, params ); print_vector( "z", z, params ); } @@ -312,7 +312,7 @@ void test_stedc_deflate_work( Params& params, bool run ) if (verbose >= 1 && mpi_rank == 0) printf( "%%-------------------- ScaLAPACK input\n" ); print_matrix( "Qref", Qref, params ); - if (verbose >= 1 && mpi_rank == 0) { + if (mpi_rank == 0) { print_vector( "Dref", Dref, params ); print_vector( "zref", zref, params ); } @@ -401,7 +401,7 @@ void test_stedc_deflate_work( Params& params, bool run ) if (verbose >= 1 && mpi_rank == 0) printf( "%%-------------------- SLATE and ScaLAPACK (ref) output\n" ); - print_matrix( "Qout", Q, params ); + print_matrix( "Q_out", Q, params ); if (ref) print_matrix( "Qref", Qref, params ); @@ -443,7 +443,7 @@ void test_stedc_deflate_work( Params& params, bool run ) print_vector( "% zout = Dorig", z, params ); if (ref) print_vector( "% zref = Dorig", zref, params ); - print_vector( "Dout ", D, params ); + print_vector( "D_out ", D, params ); if (ref) print_vector( "Dref ", Dref, params ); print_vector( "Dsecular ", Dsecular, params ); diff --git a/test/test_stedc_sort.cc b/test/test_stedc_sort.cc index 615ac01cc..0786c6bd6 100644 --- a/test/test_stedc_sort.cc +++ b/test/test_stedc_sort.cc @@ -118,7 +118,9 @@ void test_stedc_sort_work( Params& params, bool run ) Zout.insertLocalTiles(); set( nan_, Zout ); - //print_vector( "D", D, params ); + if (mpi_rank == 0) { + print_vector( "D", D, params ); + } print_matrix( "Z", Z, params ); if (trace) @@ -136,8 +138,10 @@ void test_stedc_sort_work( Params& params, bool run ) if (trace) slate::trace::Trace::finish(); - //print_vector( "Dout", D, params ); - print_matrix( "Zout", Zout, params ); + if (mpi_rank == 0) { + print_vector( "D_out", D, params ); + } + print_matrix( "Z_out", Zout, params ); // todo: check that D & Z are sorted. @@ -176,10 +180,10 @@ void test_stedc_sort_work( Params& params, bool run ) params.ref_time() = barrier_get_wtime( MPI_COMM_WORLD ) - time; - //print_vector( "Dout", D, params ); - //print_vector( "Dref", Dref, params ); - print_matrix( "Zout", Zout, params ); - print_matrix( "Zref", Zref, params ); + if (mpi_rank == 0) { + print_vector( "Dref_out", Dref, params ); + } + print_matrix( "Zref_out", Zref, params ); // || D - Dref || should be exactly 0. blas::axpy( n, -one, &D[0], 1, &Dref[0], 1 ); diff --git a/test/test_stedc_z_vector.cc b/test/test_stedc_z_vector.cc index 3134e0dd5..f300a4a17 100644 --- a/test/test_stedc_z_vector.cc +++ b/test/test_stedc_z_vector.cc @@ -39,7 +39,6 @@ void test_stedc_z_vector_work( Params& params, bool run ) bool check = params.check() == 'y'; bool ref = params.ref() == 'y'; bool trace = params.trace() == 'y'; - int verbose = params.verbose(); slate::Origin origin = params.origin(); // mark non-standard output values @@ -117,8 +116,8 @@ void test_stedc_z_vector_work( Params& params, bool run ) if (trace) slate::trace::Trace::finish(); - if (verbose >= 2 && mpi_rank == 0) { - print_vector( "zout", z.size(), &z[0], 1, params ); + if (mpi_rank == 0) { + print_vector( "z_out", z, params ); } if (check || ref) { @@ -160,9 +159,9 @@ void test_stedc_z_vector_work( Params& params, bool run ) params.ref_time() = barrier_get_wtime( MPI_COMM_WORLD ) - time; - if (verbose >= 2 && mpi_rank == 0) { - print_vector( "zref", zref.size(), &zref[0], 1, params ); - print_vector( "work", work.size(), &work[0], 1, params ); + if (mpi_rank == 0) { + print_vector( "zref_out", zref, params ); + print_vector( "work", work, params ); } // Forward error || z - zref || should be zero. diff --git a/test/test_steqr2.cc b/test/test_steqr2.cc index ff5ec29b9..17c987308 100644 --- a/test/test_steqr2.cc +++ b/test/test_steqr2.cc @@ -37,7 +37,6 @@ void test_steqr2_work(Params& params, bool run) lapack::Job jobz = params.jobz(); bool check = params.check() == 'y'; bool trace = params.trace() == 'y'; - int verbose = params.verbose(); slate::Origin origin = params.origin(); // mark non-standard output values @@ -73,6 +72,11 @@ void test_steqr2_work(Params& params, bool run) std::vector Dref = D; std::vector Eref = E; + if (mpi_rank == 0) { + print_vector( "D", D, params ); + print_vector( "E", E, params ); + } + slate::Matrix A; // To check the orth of the eigenvectors if (check) { A = slate::Matrix(n, n, nb, p, q, MPI_COMM_WORLD); @@ -110,6 +114,11 @@ void test_steqr2_work(Params& params, bool run) if (trace) slate::trace::Trace::finish(); + if (mpi_rank == 0) { + print_vector( "D_out ", D, params ); + } + print_matrix( "Z_out", Z, params ); + if (check) { //================================================== // Test results @@ -125,17 +134,8 @@ void test_steqr2_work(Params& params, bool run) params.ref_time() = barrier_get_wtime(MPI_COMM_WORLD) - time; - if (verbose) { - // Print first 20 and last 20 rows. - printf( "%9s %9s\n", "D", "Dref" ); - for (int64_t i = 0; i < n; ++i) { - if (i < 20 || i > n-20) { - bool okay = std::abs( D[i] - Dref[i] ) < tol; - printf( "%9.6f %9.6f%s\n", - D[i], Dref[i], (okay ? "" : " !!") ); - } - } - printf( "\n" ); + if (mpi_rank == 0) { + print_vector( "Dref_out", Dref, params ); } // Relative forward error: || D - Dref || / || Dref ||. diff --git a/test/test_sterf.cc b/test/test_sterf.cc index 5488ef7fe..997cfec44 100644 --- a/test/test_sterf.cc +++ b/test/test_sterf.cc @@ -30,7 +30,6 @@ void test_sterf_work(Params& params, bool run) int q = params.grid.n(); bool check = params.check() == 'y'; bool trace = params.trace() == 'y'; - int verbose = params.verbose(); // mark non-standard output values params.time(); @@ -56,6 +55,11 @@ void test_sterf_work(Params& params, bool run) real_t tol = params.tol() * 0.5 * std::numeric_limits::epsilon(); + if (mpi_rank == 0) { + print_vector( "D", D, params ); + print_vector( "E", E, params ); + } + //--------- // run test if (trace) slate::trace::Trace::on(); @@ -73,6 +77,10 @@ void test_sterf_work(Params& params, bool run) if (trace) slate::trace::Trace::finish(); + if (mpi_rank == 0) { + print_vector( "D_out ", D, params ); + } + if (check) { //================================================== // Test results @@ -82,31 +90,19 @@ void test_sterf_work(Params& params, bool run) //================================================== // Run LAPACK reference routine. //================================================== - lapack::sterf(n, &Dref[0], &Eref[0]); params.ref_time() = barrier_get_wtime(MPI_COMM_WORLD) - time; if (mpi_rank == 0) { - if (verbose) { - // Print first 20 and last 20 rows. - printf( "%9s %9s\n", "D", "Dref" ); - for (int64_t i = 0; i < n; ++i) { - if (i < 20 || i > n-20) { - bool okay = std::abs( D[i] - Dref[i] ) < tol; - printf( "%9.6f %9.6f%s\n", - D[i], Dref[i], (okay ? "" : " !!") ); - } - } - printf( "\n" ); - } - - // Relative forward error: || D - Dref || / || Dref ||. - blas::axpy(D.size(), -1.0, &Dref[0], 1, &D[0], 1); - params.error() = blas::nrm2(D.size(), &D[0], 1) - / blas::nrm2(Dref.size(), &Dref[0], 1); - params.okay() = (params.error() <= tol); + print_vector( "Dref_out", Dref, params ); } + + // Relative forward error: || D - Dref || / || Dref ||. + blas::axpy(D.size(), -1.0, &Dref[0], 1, &D[0], 1); + params.error() = blas::nrm2(D.size(), &D[0], 1) + / blas::nrm2(Dref.size(), &Dref[0], 1); + params.okay() = (params.error() <= tol); } } diff --git a/test/test_svd.cc b/test/test_svd.cc index 36c4e9ba3..84730b6e1 100644 --- a/test/test_svd.cc +++ b/test/test_svd.cc @@ -98,6 +98,10 @@ void test_svd_work( Params& params, bool run ) params.msg() = "job = NoVec requires --ref y to check singular values"; } + // MPI variables + int mpi_rank; + MPI_Comm_rank( MPI_COMM_WORLD, &mpi_rank ); + slate::Options const opts = { {slate::Option::Lookahead, lookahead}, {slate::Option::Target, target}, @@ -194,7 +198,9 @@ void test_svd_work( Params& params, bool run ) params.time12() = slate::timers[ "svd::unmlq" ]; } - print_matrix("D", 1, min_mn, &Sigma[0], 1, params); + if (mpi_rank == 0) { + print_vector( "Sigma", Sigma, params ); + } if (wantu) { print_matrix( "U", U, params ); } diff --git a/test/test_symm.cc b/test/test_symm.cc index e89ec34fe..ab7725c38 100644 --- a/test/test_symm.cc +++ b/test/test_symm.cc @@ -30,6 +30,8 @@ void test_symm_work(Params& params, bool run) // Constants const scalar_t zero = 0.0, one = 1.0; + // todo: method + // get & mark input values slate::Side side = params.side(); slate::Uplo uplo = params.uplo(); @@ -98,6 +100,10 @@ void test_symm_work(Params& params, bool run) slate::generate_matrix( params.matrixB, B); slate::generate_matrix( params.matrixC, C); + print_matrix( "A", A, params ); + print_matrix( "B", B, params ); + print_matrix( "C", C, params ); + // If reference run is required, record norms to be used in the check/ref. real_t A_norm=0, B_norm=0, C_orig_norm=0; if (ref) { @@ -149,6 +155,7 @@ void test_symm_work(Params& params, bool run) slate_assert(A.mt() == C.mt()); else slate_assert(A.mt() == C.nt()); + slate_assert(B.mt() == C.mt()); slate_assert(B.nt() == C.nt()); @@ -180,6 +187,8 @@ void test_symm_work(Params& params, bool run) params.time() = time; params.gflops() = gflop / time; + print_matrix( "C_out", C, params ); + if (check && ! ref) { auto& X = X_alloc.A; auto& Y = Y_alloc.A; @@ -188,7 +197,7 @@ void test_symm_work(Params& params, bool run) // Check error, C*X - Y. real_t y_norm = slate::norm( norm, Y, opts ); // Y = C * X - Y - slate::multiply( one, C, X, -one, Y ); + slate::multiply( one, C, X, -one, Y, opts ); // error = norm( Y ) / y_norm real_t error = slate::norm( slate::Norm::One, Y, opts )/y_norm; params.error() = error; @@ -235,7 +244,6 @@ void test_symm_work(Params& params, bool run) &A_data[0], 1, 1, A_desc, &B_data[0], 1, 1, B_desc, beta, &Cref_data[0], 1, 1, Cref_desc); - MPI_Barrier(MPI_COMM_WORLD); time = barrier_get_wtime(MPI_COMM_WORLD) - time; // get differences C = C - Cref @@ -256,6 +264,7 @@ void test_symm_work(Params& params, bool run) real_t eps = std::numeric_limits::epsilon(); params.okay() = (params.error() <= 3*eps); + Cblacs_gridexit(ictxt); //Cblacs_exit(1) does not handle re-entering #else // not SLATE_HAVE_SCALAPACK diff --git a/test/test_syr2k.cc b/test/test_syr2k.cc index 42163784b..8cf0fbfe0 100644 --- a/test/test_syr2k.cc +++ b/test/test_syr2k.cc @@ -126,9 +126,9 @@ void test_syr2k_work(Params& params, bool run) slate_assert( opB.mt() == C.mt() ); slate_assert( opA.nt() == opB.nt() ); - print_matrix("A", A, params); - print_matrix("B", B, params); - print_matrix("Initial C", C, params); + print_matrix( "A", A, params ); + print_matrix( "B", B, params ); + print_matrix( "C", C, params ); if (trace) slate::trace::Trace::on(); else slate::trace::Trace::off(); @@ -175,7 +175,7 @@ void test_syr2k_work(Params& params, bool run) time = barrier_get_wtime( MPI_COMM_WORLD ) - time; - print_matrix("Cslate", C, params); + print_matrix( "C_out", C, params ); if (trace) slate::trace::Trace::finish(); @@ -241,7 +241,7 @@ void test_syr2k_work(Params& params, bool run) &Cref_data[0], 1, 1, Cref_desc); time = barrier_get_wtime( MPI_COMM_WORLD ) - time; - print_matrix("Cref", Cref, params); + print_matrix( "Cref_out", Cref, params ); // get differences C = C - Cref slate::add(-one, Cref, one, C); diff --git a/test/test_syrk.cc b/test/test_syrk.cc index 2b0c30f3e..3cd63ff31 100644 --- a/test/test_syrk.cc +++ b/test/test_syrk.cc @@ -6,6 +6,7 @@ #include "slate/slate.hh" #include "test.hh" #include "blas/flops.hh" +#include "print_matrix.hh" #include "matrix_utils.hh" #include "test_utils.hh" @@ -91,6 +92,9 @@ void test_syrk_work(Params& params, bool run) slate::generate_matrix( params.matrix, A ); slate::generate_matrix( params.matrixC, C ); + print_matrix( "A", A, params ); + print_matrix( "C", C, params ); + // If reference run is required, record norms to be used in the check/ref. real_t A_norm=0, C_orig_norm=0; if (ref) { @@ -156,6 +160,8 @@ void test_syrk_work(Params& params, bool run) params.time() = time; params.gflops() = gflop / time; + print_matrix( "C_out", C, params ); + if (check && ! ref) { auto& X = X_alloc.A; auto& Y = Y_alloc.A; diff --git a/test/test_tb2bd.cc b/test/test_tb2bd.cc index 29204bda2..c484b61f8 100644 --- a/test/test_tb2bd.cc +++ b/test/test_tb2bd.cc @@ -242,27 +242,16 @@ void test_tb2bd_work(Params& params, bool run) } } - print_matrix("D", 1, n, &Sigma[0], 1, params); - print_matrix("E", 1, n-1, &E[0], 1, params); + print_vector( "D", Sigma, params ); + print_vector( "E", E, params ); info = lapack::bdsqr(lapack::Uplo::Upper, n, 0, 0, 0, &Sigma[0], &E[0], dummy, 1, dummy, 1, dummy, 1); assert(info == 0); - if (verbose) { - printf( "%% first and last 20 rows of Sigma_ref and Sigma\n" ); - printf( "%9s %9s\n", "Sigma_ref", "Sigma" ); - for (int64_t i = 0; i < std::min(n, n); ++i) { - if (i < 20 || i >= std::min(n, n)-20) { - bool okay = std::abs( Sigma_ref[i] - Sigma[i] ) < tol; - printf( "%9.6f %9.6f%s\n", - Sigma_ref[i], Sigma[i], (okay ? "" : " !!") ); - } - else if (i == 20) { - printf( "--------------------\n" ); - } - } - printf( "\n" ); + if (mpi_rank == 0) { + print_vector( "Sigma ", Sigma, params ); + print_vector( "Sigma_ref", Sigma_ref, params ); } // Relative forward error: || Sigma - Sigma_ref || / || Sigma_ref ||. diff --git a/test/test_tbsm.cc b/test/test_tbsm.cc index 82044065d..94f48c126 100644 --- a/test/test_tbsm.cc +++ b/test/test_tbsm.cc @@ -192,7 +192,7 @@ void test_tbsm_work(Params& params, bool run) params.time() = time; //params.gflops() = gflop / time; - print_matrix("B2", B, params); + print_matrix( "B_out", B, params ); if (check || ref) { #ifdef SLATE_HAVE_SCALAPACK @@ -247,7 +247,7 @@ void test_tbsm_work(Params& params, bool run) &Bref_data[0], 1, 1, Bref_desc); time = barrier_get_wtime(MPI_COMM_WORLD) - time; - print_matrix("B2ref", Bref, params); + print_matrix( "Bref_out", Bref, params ); // local operation: error = Bref_data - B_data blas::axpy(Bref_data.size(), -1.0, &B_data[0], 1, &Bref_data[0], 1); diff --git a/test/test_trmm.cc b/test/test_trmm.cc index 71953a95c..9777298d4 100644 --- a/test/test_trmm.cc +++ b/test/test_trmm.cc @@ -6,6 +6,7 @@ #include "slate/slate.hh" #include "test.hh" #include "blas/flops.hh" +#include "print_matrix.hh" #include "matrix_utils.hh" #include "test_utils.hh" @@ -95,6 +96,9 @@ void test_trmm_work(Params& params, bool run) generate_matrix( params.matrix, A ); generate_matrix( params.matrixB, B ); + print_matrix( "A", A, params ); + print_matrix( "B", B, params ); + // If reference run is required, record norms to be used in the check/ref. real_t A_norm=0, B_orig_norm=0; if (ref) { @@ -178,6 +182,8 @@ void test_trmm_work(Params& params, bool run) params.time() = time; params.gflops() = gflop / time; + print_matrix( "B_out", B, params ); + if (check && ! ref) { auto& X = X_alloc.A; auto& Y = Y_alloc.A; diff --git a/test/test_trtri.cc b/test/test_trtri.cc index 53b1cfe38..b828787f6 100644 --- a/test/test_trtri.cc +++ b/test/test_trtri.cc @@ -144,7 +144,7 @@ void test_trtri_work(Params& params, bool run) params.time() = time; params.gflops() = gflop / time; - print_matrix( "Ainv", A, params ); + print_matrix( "A_inv", A, params ); } if (check) { diff --git a/test/test_unmqr.cc b/test/test_unmqr.cc index d30d55de2..9d01f2867 100644 --- a/test/test_unmqr.cc +++ b/test/test_unmqr.cc @@ -92,7 +92,7 @@ void test_unmqr_work(Params& params, bool run) slate::TriangularFactors T; - print_matrix("A", A, params); + print_matrix( "A", A, params ); // Keep copy of original matrix A. slate::Matrix Aref;