diff --git a/benchmarks/benchmarkHelpers.hpp b/benchmarks/benchmarkHelpers.hpp index 25074b5c..83082e1e 100644 --- a/benchmarks/benchmarkHelpers.hpp +++ b/benchmarks/benchmarkHelpers.hpp @@ -162,9 +162,10 @@ void initialize( ArraySlice< T, NDIM, USD, INDEX_TYPE > const slice, int & iter } ); } - +// Return type is RajaView< T, PERMUTATION >, +// unexpected function type error on CUDA > 11.2 without auto template< typename T, typename PERMUTATION > -RajaView< T, PERMUTATION > makeRajaView( ArrayT< T, PERMUTATION > const & array ) +auto makeRajaView( ArrayT< T, PERMUTATION > const & array ) { constexpr int NDIM = typeManipulation::getDimension< PERMUTATION >; std::array< INDEX_TYPE, NDIM > sizes; diff --git a/host-configs/LLNL/lassen-clang10-cuda11.cmake b/host-configs/LLNL/lassen-clang10-cuda11.cmake index e7c26bae..4205056a 100644 --- a/host-configs/LLNL/lassen-clang10-cuda11.cmake +++ b/host-configs/LLNL/lassen-clang10-cuda11.cmake @@ -14,7 +14,7 @@ set(CMAKE_CXX_FLAGS_DEBUG "-O0 -g" CACHE STRING "") # Set up the tpls set(GEOSX_TPL_ROOT_DIR /usr/gapps/GEOSX/thirdPartyLibs CACHE PATH "") -set(GEOSX_TPL_DIR ${GEOSX_TPL_ROOT_DIR}/2022-11-12/install-${CONFIG_NAME}-release CACHE PATH "") +set(GEOSX_TPL_DIR ${GEOSX_TPL_ROOT_DIR}/2023-01-23/install-${CONFIG_NAME}-release CACHE PATH "") set(CAMP_DIR ${GEOSX_TPL_DIR}/raja CACHE PATH "") set(RAJA_DIR ${GEOSX_TPL_DIR}/raja CACHE PATH "") diff --git a/host-configs/LLNL/lassen-clang13-cuda11.cmake b/host-configs/LLNL/lassen-clang13-cuda11.cmake index 2bc75dcd..ae177390 100644 --- a/host-configs/LLNL/lassen-clang13-cuda11.cmake +++ b/host-configs/LLNL/lassen-clang13-cuda11.cmake @@ -28,7 +28,7 @@ set(CHAI_DIR ${GEOSX_TPL_DIR}/chai CACHE PATH "") set(ENABLE_CALIPER ON CACHE BOOL "") set(CALIPER_DIR ${GEOSX_TPL_DIR}/caliper CACHE PATH "") -set(ENABLE_ADDR2LINE ON CACHE BOOL "") +set(ENABLE_ADDR2LINE OFF CACHE BOOL "") # Cuda options set(ENABLE_CUDA ON CACHE BOOL "") diff --git a/src/ArrayOfArraysView.hpp b/src/ArrayOfArraysView.hpp index 706f2014..613be335 100644 --- a/src/ArrayOfArraysView.hpp +++ b/src/ArrayOfArraysView.hpp @@ -729,8 +729,15 @@ class ArrayOfArraysView // capacities + numSubArrays, // m_offsets.data() + 1 ); - RAJA::inclusive_scan< POLICY >( RAJA::make_span< INDEX_TYPE const * >( capacities, numSubArrays ), - RAJA::make_span< INDEX_TYPE * >( m_offsets.data()+1, numSubArrays ) ); + // RAJA::inclusive_scan< POLICY >( RAJA::make_span< INDEX_TYPE const * >( capacities, numSubArrays ), + // RAJA::make_span< INDEX_TYPE * >( m_offsets.data()+1, numSubArrays ) ); + + // Perform a prefix-sum to get the capacities + // (RAJA's Inclusive scan produces garbage values with CUDA 11.2.2) + for( int i = 1; i <= numSubArrays; i++ ) + { + m_offsets[i] = capacities[i - 1] + m_offsets[i - 1]; + } }; resizeFromOffsetsImpl( numSubArrays, fillOffsets, buffers ... ); }