-
Notifications
You must be signed in to change notification settings - Fork 98
SPARSE 2::spmv
brian-kelley edited this page Jul 9, 2024
·
16 revisions
Header File: KokkosSparse_spmv.hpp
Usage:
KokkosSparse::spmv(mode,alpha,A,x,beta,y);
KokkosSparse::spmv(space,mode,alpha,A,x,beta,y);
KokkosSparse::spmv(&handle,mode,alpha,A,x,beta,y);
KokkosSparse::spmv(space,&handle,mode,alpha,A,x,beta,y);
Computes y := beta*y + alpha*Op(A)*x
where Op(A)
is determined by mode
(see below).
template<typename DeviceType, typename AMatrix, typename XVector, typename YVector>
struct KokkosSparse::SPMVHandle;
// Constructor
SPMVHandle(SPMVAlgorithm algo = SPMV_DEFAULT);
-
SPMV_DEFAULT
: best performance for repeated spmv calls with the same handle -
SPMV_FAST_SETUP
: best performance for a single spmv call- Always used internally by spmv interfaces without handle argument
-
SPMV_NATIVE
: use KokkosKernels implementation even when a TPL implementation is available -
SPMV_MERGE_PATH
: use the best known implementation for matrices with imbalanced entries per row (CrsMatrix only) -
SPMV_BSR_V42
: KokkosKernels 4.2 implementation (BsrMatrix only) -
SPMV_BSR_TC
: KokkosKernels implementation targeting NVIDIA Tensor Cores (BsrMatrix only, expert usage). Has additional requirements on A, x and y scalar types depending on GPU architecture. -
SPMV_BSR_V41
: KokkosKernels 4.1 implementation (BsrMatrix only).SPMV_BSR_V42
recommended for better performance.
// Default space instance, and no handle
template <class AlphaType, class AMatrix, class XVector, class BetaType, class YVector>
void
spmv(const char mode[],
const AlphaType& alpha,
const AMatrix& A,
const XVector& x,
const BetaType& beta,
const YVector& y);
// Space instance provided, but no handle
template <class ExecutionSpace, class AlphaType, class AMatrix, class XVector, class BetaType, class YVector>
void
spmv(const ExecutionSpace& space,
const char mode[],
const AlphaType& alpha,
const AMatrix& A,
const XVector& x,
const BetaType& beta,
const YVector& y);
// Default space instance, but handle provided
template <class Handle, class AlphaType, class AMatrix, class XVector, class BetaType, class YVector>
void
spmv(Handle* handle,
const char mode[],
const AlphaType& alpha,
const AMatrix& A,
const XVector& x,
const BetaType& beta,
const YVector& y);
// Both space instance and handle provided
template <class ExecutionSpace, class Handle, class AlphaType, class AMatrix, class XVector, class BetaType, class YVector>
void
spmv(const ExecutionSpace& space,
Handle* handle,
const char mode[],
const AlphaType& alpha,
const AMatrix& A,
const XVector& x,
const BetaType& beta,
const YVector& y);
- ExecutionSpace: A Kokkos execution space. Must be able to access the memory spaces of A, x, and y. Must match
Handle::ExecutionSpaceType
. - Handle: Specialization of
KokkosSparse::SPMVHandle
. - AlphaType: Type of coefficient alpha. Must be convertible to
YVector::value_type
. - AMatrix: A
KokkosSparse::CrsMatrix
, orKokkosSparse::Experimental::BsrMatrix
. Must be identical toHandle::AMatrixType
. - XVector: Type of x, must be a rank-1 or 2
Kokkos::View
. Must be identical toHandle::XVectorType
. - BetaType: Type of coefficient beta. Must be convertible to
YVector::value_type
. - YVector: Type of y, must be a rank-1 or 2
Kokkos::View
and its rank must match that ofXVector
. Must be identical toHandle::YVectorType
.
- space: The execution space instance on which to run the kernel. If none provided, the default-constructed instance is used.
- handle: a pointer to a
KokkosSparse::SPMVHandle
. On the first call tospmv
with a given handle instance, the handle's internal data will be initialized automatically. On all later calls tospmv
, this internal data will be reused. If no handle is provided, any internal data or matrix analysis will be created internally for thespmv
call and then discarded. - mode: selects the operator mode Op(A):
- "N": normal
- "T": transpose
- "C": conjugate non-transpose
- "H": conjugate transpose
- alpha: Scalar multiplier for the matrix A.
- A: The sparse matrix to apply. If handle has previously been passed to
spmv
, A must be identical to the A passed in to that first call. - x: A vector to multiply on the left by A.
- beta: Scalar multiplier for the vector y.
- y: Result vector.
- ExecutionSpace can access A, x, and y
- If handle is provided, then A is the same matrix that has been used in all previous spmv calls with this handle
- y is not const-valued
XVector::rank() == YVector::rank()
-
XVector::rank() == 1
orXVector::rank() == 2
y.extent(1) == x.extent(1)
-
y.extent(0)
andx.extent(0)
match the dimensions ofOp(A)
- for
BsrMatrix
A, the dimensions areA.numRows()*A.blockDim()
xA.numCols()*A.blockDim()
.
- for
- spmv is potentially asynchronous.
- If alpha is zero and x contains NaN and/or infinity on input, then the non-finite values resulting from
Op(A)*x
will not be written to y. - Likewise, if beta is zero and y contains NaN and/or infinity on input, then y will be zeroed out before computing
y := alpha*Op(A)*x
. - The handle can be reused:
- with different space arguments
- with different x/y arguments, even if the numbers of columns has changed
- with different mode arguments
example source location: example/wiki/sparse/KokkosSparse_wiki_spmv.cpp
#include "Kokkos_Core.hpp"
#include "KokkosKernels_default_types.hpp"
#include "KokkosSparse_CrsMatrix.hpp"
#include "KokkosSparse_spmv.hpp"
using Scalar = default_scalar;
using Ordinal = default_lno_t;
using Offset = default_size_type;
using Layout = default_layout;
int main(int argc, char* argv[]) {
Kokkos::initialize();
using device_type = typename Kokkos::Device<Kokkos::DefaultExecutionSpace,
typename Kokkos::DefaultExecutionSpace::memory_space>;
using matrix_type = typename KokkosSparse::CrsMatrix<Scalar, Ordinal, device_type, void, Offset>;
using graph_type = typename matrix_type::staticcrsgraph_type;
using row_map_type = typename graph_type::row_map_type;
using entries_type = typename graph_type::entries_type;
using values_type = typename matrix_type::values_type;
{
const Scalar SC_ONE = Kokkos::ArithTraits<Scalar>::one();
Ordinal numRows = 10;
// Build the row pointers and store numNNZ
typename row_map_type::non_const_type row_map("row pointers", numRows + 1);
typename row_map_type::HostMirror row_map_h = Kokkos::create_mirror_view(row_map);
for(Ordinal rowIdx = 1; rowIdx < numRows + 1; ++rowIdx) {
if( (rowIdx == 1) || (rowIdx == numRows) ){
row_map_h(rowIdx) = row_map_h(rowIdx - 1) + 2;
} else {
row_map_h(rowIdx) = row_map_h(rowIdx - 1) + 3;
}
}
const Offset numNNZ = row_map_h(numRows);
Kokkos::deep_copy(row_map, row_map_h);
typename entries_type::non_const_type entries("column indices", numNNZ);
typename entries_type::HostMirror entries_h = Kokkos::create_mirror_view(entries);
typename values_type::non_const_type values("values", numNNZ);
typename values_type::HostMirror values_h = Kokkos::create_mirror_view(values);
for(Ordinal rowIdx = 0; rowIdx < numRows; ++rowIdx) {
if(rowIdx == 0) {
entries_h(0) = rowIdx;
entries_h(1) = rowIdx + 1;
values_h(0) = SC_ONE;
values_h(1) = -SC_ONE;
} else if(rowIdx == numRows - 1) {
entries_h(row_map_h(rowIdx)) = rowIdx - 1;
entries_h(row_map_h(rowIdx) + 1) = rowIdx;
values_h(row_map_h(rowIdx)) = -SC_ONE;
values_h(row_map_h(rowIdx) + 1) = SC_ONE;
} else {
entries_h(row_map_h(rowIdx)) = rowIdx - 1;
entries_h(row_map_h(rowIdx) + 1) = rowIdx;
entries_h(row_map_h(rowIdx) + 2) = rowIdx + 1;
values_h(row_map_h(rowIdx)) = -SC_ONE;
values_h(row_map_h(rowIdx) + 1) = SC_ONE + SC_ONE;
values_h(row_map_h(rowIdx) + 2) = -SC_ONE;
}
}
Kokkos::deep_copy(entries, entries_h);
Kokkos::deep_copy(values, values_h);
graph_type myGraph(entries, row_map);
matrix_type myMatrix("test matrix", numRows, values, myGraph);
const Scalar alpha = SC_ONE;
const Scalar beta = SC_ONE;
typename values_type::non_const_type x("lhs", numRows);
typename values_type::non_const_type y("rhs", numRows);
KokkosSparse::spmv("N", alpha, myMatrix, x, beta, y);
}
Kokkos::finalize();
return 0;
}