Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Use spectral decomposition as more accurate fallback from Cholesky #2930

Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
149 changes: 120 additions & 29 deletions cpp/daal/src/algorithms/service_kernel_math.h
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,8 @@
#ifndef __SERVICE_KERNEL_MATH_H__
#define __SERVICE_KERNEL_MATH_H__

#include <limits>

#include "services/daal_defines.h"
#include "services/env_detect.h"
#include "src/algorithms/service_error_handling.h"
Expand Down Expand Up @@ -660,6 +662,15 @@ bool solveEquationsSystemWithCholesky(FPType * a, FPType * b, size_t n, size_t n
}
if (info != 0) return false;

/* Note: there can be cases in which the matrix is singular / rank-deficient, but due to numerical
inaccuracies, Cholesky still succeeds. In such cases, it might produce a solution successfully, but
it will not be the minimum-norm solution, and might be prone towards having too large numbers. Thus
it's preferrable to fall back to a different type of solver that can work correctly with those. */
for (size_t ix = 0; ix < n; ix++)
{
if (a[ix * (ix + 1)] < 1e-6) return false;
Vika-F marked this conversation as resolved.
Show resolved Hide resolved
}

/* Solve L*L' * x = b */
if (sequential)
{
Expand All @@ -673,72 +684,152 @@ bool solveEquationsSystemWithCholesky(FPType * a, FPType * b, size_t n, size_t n
}

template <typename FPType, CpuType cpu>
bool solveEquationsSystemWithPLU(FPType * a, FPType * b, size_t n, size_t nX, bool sequential, bool extendFromSymmetric)
bool solveEquationsSystemWithSpectralDecomposition(FPType * a, FPType * b, size_t n, size_t nX, bool sequential)
{
if (extendFromSymmetric)
/* Storage for the eigenvalues.
Note: this allocates more size than they might require when nX > 1, because the same
buffer will get reused later on and needs the extra size. Those additional entries
will not be filled with eigenvalues. */
TArrayScalable<FPType, cpu> eigenvalues(n * nX);
DAAL_CHECK_MALLOC(eigenvalues.get());

/* SYEV parameters */
char jobz = 'V';
char uplo = 'U';
DAAL_INT info;

/* Query the procedure for size of required buffer */
DAAL_INT lwork = -1;
FPType buffer_size;
if (sequential)
{
/* Extend symmetric matrix to generic through filling of upper triangle */
for (size_t i = 0; i < n; ++i)
{
for (size_t j = 0; j < i; ++j)
{
a[j * n + i] = a[i * n + j];
}
}
LapackInst<FPType, cpu>::xxsyev(&jobz, &uplo, (DAAL_INT *)&n, a, (DAAL_INT *)&n, eigenvalues.get(), &buffer_size, &lwork, &info);
}

/* GETRF and GETRS parameters */
char trans = 'N';
DAAL_INT info = 0;
else
{
LapackInst<FPType, cpu>::xsyev(&jobz, &uplo, (DAAL_INT *)&n, a, (DAAL_INT *)&n, eigenvalues.get(), &buffer_size, &lwork, &info);
}

TArrayScalable<DAAL_INT, cpu> ipiv(n);
DAAL_CHECK_MALLOC(ipiv.get());
if (info) return false;

/* Perform P*L*U factorization of A */
/* Check that buffer size will not overflow when passed to LAPACK */
if (static_cast<size_t>(buffer_size) > std::numeric_limits<DAAL_INT>::max()) return false;

/* Allocate work buffer as needed */
DAAL_INT work_buffer_size = static_cast<DAAL_INT>(buffer_size);
TArrayScalable<FPType, cpu> work_buffer(work_buffer_size);
DAAL_CHECK_MALLOC(work_buffer.get());

/* Perform Q*diag(l)*Q' factorization of A */
if (sequential)
{
LapackInst<FPType, cpu>::xxgetrf((DAAL_INT *)&n, (DAAL_INT *)&n, a, (DAAL_INT *)&n, ipiv.get(), &info);
LapackInst<FPType, cpu>::xxsyev(&jobz, &uplo, (DAAL_INT *)&n, a, (DAAL_INT *)&n, eigenvalues.get(), work_buffer.get(), &work_buffer_size,
david-cortes-intel marked this conversation as resolved.
Show resolved Hide resolved
&info);
}
else
{
LapackInst<FPType, cpu>::xgetrf((DAAL_INT *)&n, (DAAL_INT *)&n, a, (DAAL_INT *)&n, ipiv.get(), &info);
LapackInst<FPType, cpu>::xsyev(&jobz, &uplo, (DAAL_INT *)&n, a, (DAAL_INT *)&n, eigenvalues.get(), work_buffer.get(), &work_buffer_size,
&info);
}
if (info) return false;
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What if symmetric matrix decomposition fail to converge. For example, due to some rounding errors.
Isn't it required to fall back to general SVD decomposition (like gesvd which cannot fail)?

Or maybe it would be even more stable approach to use gesvd instead of syev for a spectral decomposition from the beginning?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, gesvd on $\mathbf{X}$ would be more accurate than syev on $\mathbf{X}^T \mathbf{X}$, but it would be slower. However the solver normEq is based on $\mathbf{X}^T \mathbf{X}$ (I guess that's what the name implies but not 100% sure). gesvd could potentially be a fallback from QR, but that'd be a different PR.

I think scikit-learn-intelex only calls oneDAL's solver with normEq BTW.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I meant not to apply gesvd to $X$, but rather to $X^T X$.
Due to the factors like rounding errors $X^T X$ might be not positive-semidefinite and syev might fail to converge. And we will come to the same issue as we have now with Cholesky.
With SVD we can do:
$X^T X \beta = X^T y$
$X^T X = U \Sigma V^T$
$\beta = V \Sigma^{-1} U^T X^T y$

And there will should be no possibilities to fail.
Or maybe this is too precautious.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, it seems that sklearnex only uses normal equations method now. We have QR method implemented, but on CPUs only.
And it is rather inefficient in terms of performance to use normal equations at first, and if fail then apply QR to the whole $X$ matrix -> double cost in terms of performance.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't follow.

The Eigendecomposition part doesn't require positiveness - it would succeed even if the input had negative entries in the main diagonal. If by numerical inaccuracies it happens that some eigenvalue is close to zero in theory but has negative sign when calculated in practice (negative-indefinite matrices would have negative eigenvalues), it would anyway be removed as part of the procedure, so the end result would not take that component.

These "sy" routines takes the values only from the lower triangle of the input matrix, so there shouldn't be any issues with symmetry either if the matrix is getting filled in both corners. I understand they should also be more accurate than gesvd when the input matrix is actually symmetric, but I'm not 100% sure about it.

About the QR part: sorry, to clarify - what I meant was that gesvd on $\mathbf{X}$ could be a fallback when geqrf on $\mathbf{X}$ fails. In those cases, $\mathbf{X}^T \mathbf{X}$ would not get computed.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, right. Forgot that positiveness is not a problem for eigenvalue decomposition, only for Cholesky.
Sorry for misleading info.
My comment was based on the experience with eigenvalue decomposition methods. But I forgot what exactly led to failures. So, I mean those methods can fail and trying to avoid that.

Probably it was not positiveness, but numerical stability issues.
If the condition number of $X^T X$ is too small, i.e. all the eigenvalues are close, some eigenvalue decomposition methods can fail. Similar things might happen when the condition number is too big.

But I am not too familiar in terms of which MKL decomposition routine is better in the case of small or big condition numbers [thought gesvd is the best, but it looks like it's not].

Anyway from your comment I see that for symmetric matrices sy methods should be better than ge. So, the choice of syevr looks reasonable now.

I also understand the point about QR and gesvd for $X$ decomposition, but this is not the scope of this PR.

Copy link
Contributor Author

@david-cortes-intel david-cortes-intel Oct 8, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Got it. It looks from the syevd docs that it is indeed possible for this routine to fail due to values in the inputs, although it doesn't explain when that could happen.

That being said, in case it makes it look less bad, if Eigendecomposition fails, it follows that other methods like SciPy's lstsq or scikit-learn's LinearRegression would also fail since they are also based on spectral decomposition.


/* Components with small singular values get eliminated using the exact same logic as 'gelsd' with default parameters */
constexpr const FPType eps = std::numeric_limits<FPType>::epsilon();
david-cortes-intel marked this conversation as resolved.
Show resolved Hide resolved
if (eigenvalues[n - 1] <= eps) return false;
const double component_threshold = eps * eigenvalues[n - 1];
DAAL_INT num_discarded;
for (num_discarded = 0; num_discarded < static_cast<DAAL_INT>(n) - 1; num_discarded++)
{
if (eigenvalues[num_discarded] > component_threshold)
{
break;
}
}
if (info != 0) return false;

/* Solve P*L*U * x = b */
/* Create the square root of the inverse: Qis = Q * diag(1 / sqrt(l)) */
DAAL_INT one = 1;
for (size_t col = num_discarded; col < n; col++)
{
const FPType scale = std::sqrt(eigenvalues[col]);
david-cortes-intel marked this conversation as resolved.
Show resolved Hide resolved
if (sequential)
{
LapackInst<FPType, cpu>::xxrscl((DAAL_INT *)&n, &scale, a + col * n, &one);
}

else
{
LapackInst<FPType, cpu>::xrscl((DAAL_INT *)&n, &scale, a + col * n, &one);
}
}

/* Now calculate the actual solution: Qis * Qis' * B */
char trans_yes = 'T';
char trans_no = 'N';
FPType one_fp = 1;
FPType zero = 0;
DAAL_INT num_taken = static_cast<DAAL_INT>(n) - num_discarded;
a += static_cast<size_t>(num_discarded) * n;
if (sequential)
{
LapackInst<FPType, cpu>::xxgetrs(&trans, (DAAL_INT *)&n, (DAAL_INT *)&nX, a, (DAAL_INT *)&n, ipiv.get(), b, (DAAL_INT *)&n, &info);
if (nX == 1)
{
BlasInst<FPType, cpu>::xxgemv(&trans_yes, (DAAL_INT *)&n, &num_taken, &one_fp, a, (DAAL_INT *)&n, b, &one, &zero, eigenvalues.get(),
&one);
BlasInst<FPType, cpu>::xxgemv(&trans_no, (DAAL_INT *)&n, &num_taken, &one_fp, a, (DAAL_INT *)&n, eigenvalues.get(), &one, &zero, b, &one);
}

else
{
BlasInst<FPType, cpu>::xxgemm(&trans_yes, &trans_no, &num_taken, (DAAL_INT *)&nX, (DAAL_INT *)&n, &one_fp, a, (DAAL_INT *)&n, b,
(DAAL_INT *)&n, &zero, eigenvalues.get(), &num_taken);
BlasInst<FPType, cpu>::xxgemm(&trans_no, &trans_no, (DAAL_INT *)&n, (DAAL_INT *)&nX, &num_taken, &one_fp, a, (DAAL_INT *)&n,
eigenvalues.get(), &num_taken, &zero, b, (DAAL_INT *)&n);
}
}

else
{
LapackInst<FPType, cpu>::xgetrs(&trans, (DAAL_INT *)&n, (DAAL_INT *)&nX, a, (DAAL_INT *)&n, ipiv.get(), b, (DAAL_INT *)&n, &info);
if (nX == 1)
{
BlasInst<FPType, cpu>::xgemv(&trans_yes, (DAAL_INT *)&n, &num_taken, &one_fp, a, (DAAL_INT *)&n, b, &one, &zero, eigenvalues.get(), &one);
BlasInst<FPType, cpu>::xgemv(&trans_no, (DAAL_INT *)&n, &num_taken, &one_fp, a, (DAAL_INT *)&n, eigenvalues.get(), &one, &zero, b, &one);
}

else
{
BlasInst<FPType, cpu>::xgemm(&trans_yes, &trans_no, &num_taken, (DAAL_INT *)&nX, (DAAL_INT *)&n, &one_fp, a, (DAAL_INT *)&n, b,
(DAAL_INT *)&n, &zero, eigenvalues.get(), &num_taken);
BlasInst<FPType, cpu>::xgemm(&trans_no, &trans_no, (DAAL_INT *)&n, (DAAL_INT *)&nX, &num_taken, &one_fp, a, (DAAL_INT *)&n,
eigenvalues.get(), &num_taken, &zero, b, (DAAL_INT *)&n);
}
}
return (info == 0);

return true;
}

template <typename FPType, CpuType cpu>
bool solveSymmetricEquationsSystem(FPType * a, FPType * b, size_t n, size_t nX, bool sequential)
{
/* Copy data for fallback from Cholesky to PLU factorization */
/* Copy data for fallback from Cholesky to spectral decomposition */
TArrayScalable<FPType, cpu> aCopy(n * n);
TArrayScalable<FPType, cpu> bCopy(n);
TArrayScalable<FPType, cpu> bCopy(n * nX);
DAAL_CHECK_MALLOC(aCopy.get());
DAAL_CHECK_MALLOC(bCopy.get());

int copy_status = services::internal::daal_memcpy_s(aCopy.get(), n * n * sizeof(FPType), a, n * n * sizeof(FPType));
copy_status += services::internal::daal_memcpy_s(bCopy.get(), n * sizeof(FPType), b, n * sizeof(FPType));
copy_status += services::internal::daal_memcpy_s(bCopy.get(), n * nX * sizeof(FPType), b, n * nX * sizeof(FPType));
Vika-F marked this conversation as resolved.
Show resolved Hide resolved

if (copy_status != 0) return false;

/* Try to solve with Cholesky factorization */
if (!solveEquationsSystemWithCholesky<FPType, cpu>(a, b, n, nX, sequential))
{
/* Fallback to PLU factorization */
bool status = solveEquationsSystemWithPLU<FPType, cpu>(aCopy.get(), bCopy.get(), n, nX, sequential, true);
/* Fall back to spectral decomposition */
bool status = solveEquationsSystemWithSpectralDecomposition<FPType, cpu>(aCopy.get(), bCopy.get(), n, nX, sequential);
if (status)
{
status = status && (services::internal::daal_memcpy_s(b, n * sizeof(FPType), bCopy.get(), n * sizeof(FPType)) == 0);
status = status && (services::internal::daal_memcpy_s(b, n * nX * sizeof(FPType), bCopy.get(), n * nX * sizeof(FPType)) == 0);
}
return status;
}
Expand Down
33 changes: 33 additions & 0 deletions cpp/daal/src/externals/service_lapack.h
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,11 @@
//--
*/

/* Note: this file is not auto-generated. These 'x'/'xx' functions are manually added here on an
as-needed basis, and are only used internally within the library so their signatures might not
match LAPACK's to every minutiae like passing pointers to scalars or passing them by value, or
having 'const' qualifiers or not. */

#ifndef __SERVICE_LAPACK_H__
#define __SERVICE_LAPACK_H__

Expand Down Expand Up @@ -193,6 +198,18 @@ struct Lapack
_impl<fpType, cpu>::xxsyevd(jobz, uplo, n, a, lda, w, work, lwork, iwork, liwork, info);
}

static void xsyev(const char * jobz, const char * uplo, const SizeType * n, fpType * a, const SizeType * lda, fpType * w, fpType * work,
SizeType * lwork, SizeType * info)
{
_impl<fpType, cpu>::xsyev(jobz, uplo, n, a, lda, w, work, lwork, info);
}

static void xxsyev(const char * jobz, const char * uplo, const SizeType * n, fpType * a, const SizeType * lda, fpType * w, fpType * work,
SizeType * lwork, SizeType * info)
{
_impl<fpType, cpu>::xxsyev(jobz, uplo, n, a, lda, w, work, lwork, info);
}

static void xormqr(char * side, char * trans, SizeType * m, SizeType * n, SizeType * k, fpType * a, SizeType * lda, fpType * tau, fpType * c,
SizeType * ldc, fpType * work, SizeType * lwork, SizeType * info)
{
Expand All @@ -204,6 +221,10 @@ struct Lapack
{
_impl<fpType, cpu>::xxormqr(side, trans, m, n, k, a, lda, tau, c, ldc, work, lwork, info);
}

static void xrscl(const SizeType * n, const fpType * sa, fpType * sx, const SizeType * incx) { _impl<fpType, cpu>::xrscl(n, sa, sx, incx); }

static void xxrscl(const SizeType * n, const fpType * sa, fpType * sx, const SizeType * incx) { _impl<fpType, cpu>::xxrscl(n, sa, sx, incx); }
};

template <typename fpType, CpuType cpu>
Expand Down Expand Up @@ -361,6 +382,18 @@ struct LapackAutoDispatch
DAAL_DISPATCH_LAPACK_BY_CPU(fpType, xxsyevd, jobz, uplo, n, a, lda, w, work, lwork, iwork, liwork, info);
}

static void xsyev(char * jobz, char * uplo, SizeType * n, fpType * a, SizeType * lda, fpType * w, fpType * work, SizeType * lwork,
SizeType * info)
{
DAAL_DISPATCH_LAPACK_BY_CPU(fpType, xsyev, jobz, uplo, n, a, lda, w, work, lwork, info);
}

static void xxsyev(char * jobz, char * uplo, SizeType * n, fpType * a, SizeType * lda, fpType * w, fpType * work, SizeType * lwork,
SizeType * info)
{
DAAL_DISPATCH_LAPACK_BY_CPU(fpType, xxsyev, jobz, uplo, n, a, lda, w, work, lwork, info);
}

static void xormqr(char * side, char * trans, SizeType * m, SizeType * n, SizeType * k, fpType * a, SizeType * lda, fpType * tau, fpType * c,
SizeType * ldc, fpType * work, SizeType * lwork, SizeType * info)
{
Expand Down
6 changes: 6 additions & 0 deletions cpp/daal/src/externals/service_lapack_declar_ref.h
Original file line number Diff line number Diff line change
Expand Up @@ -82,10 +82,16 @@ extern "C"
extern void ssyevd_(char *, char *, DAAL_INT *, float *, DAAL_INT *, float *, float *, DAAL_INT *, DAAL_INT *, DAAL_INT *, DAAL_INT *);
extern void dsyevd_(char *, char *, DAAL_INT *, double *, DAAL_INT *, double *, double *, DAAL_INT *, DAAL_INT *, DAAL_INT *, DAAL_INT *);

extern void ssyev_(const char *, const char *, const DAAL_INT *, float *, const DAAL_INT *, float *, float *, DAAL_INT *, DAAL_INT *);
extern void dsyev_(const char *, const char *, const DAAL_INT *, double *, const DAAL_INT *, double *, double *, DAAL_INT *, DAAL_INT *);

extern void sormqr_(char *, char *, DAAL_INT *, DAAL_INT *, DAAL_INT *, float *, DAAL_INT *, float *, float *, DAAL_INT *, float *, DAAL_INT *,
DAAL_INT *);
extern void dormqr_(char *, char *, DAAL_INT *, DAAL_INT *, DAAL_INT *, double *, DAAL_INT *, double *, double *, DAAL_INT *, double *,
DAAL_INT *, DAAL_INT *);

extern void drscl_(const DAAL_INT *, const double *, double *, const DAAL_INT *);
extern void srscl_(const DAAL_INT *, const float *, float *, const DAAL_INT *);
}

} // namespace ref
Expand Down
52 changes: 52 additions & 0 deletions cpp/daal/src/externals/service_lapack_mkl.h
Original file line number Diff line number Diff line change
Expand Up @@ -243,6 +243,20 @@ struct MklLapack<double, cpu>
mkl_set_num_threads_local(old_nthr);
}

static void xsyev(const char * jobz, const char * uplo, const DAAL_INT * n, double * a, const DAAL_INT * lda, double * w, double * work,
DAAL_INT * lwork, DAAL_INT * info)
{
__DAAL_MKLFN_CALL_LAPACK(dsyev, (jobz, uplo, (MKL_INT *)n, a, (MKL_INT *)lda, w, work, (MKL_INT *)lwork, (MKL_INT *)info));
}

static void xxsyev(const char * jobz, const char * uplo, const DAAL_INT * n, double * a, const DAAL_INT * lda, double * w, double * work,
DAAL_INT * lwork, DAAL_INT * info)
{
int old_nthr = mkl_set_num_threads_local(1);
__DAAL_MKLFN_CALL_LAPACK(dsyev, (jobz, uplo, (MKL_INT *)n, a, (MKL_INT *)lda, w, work, (MKL_INT *)lwork, (MKL_INT *)info));
mkl_set_num_threads_local(old_nthr);
}

static void xormqr(char * side, char * trans, DAAL_INT * m, DAAL_INT * n, DAAL_INT * k, double * a, DAAL_INT * lda, double * tau, double * c,
DAAL_INT * ldc, double * work, DAAL_INT * lwork, DAAL_INT * info)
{
Expand All @@ -258,6 +272,18 @@ struct MklLapack<double, cpu>
(MKL_INT *)lwork, (MKL_INT *)info));
mkl_set_num_threads_local(old_nthr);
}

static void xrscl(const DAAL_INT * n, const double * sa, double * sx, const DAAL_INT * incx)
{
__DAAL_MKLFN_CALL_LAPACK(drscl, ((MKL_INT *)n, sa, sx, (MKL_INT *)incx));
}

static void xxrscl(const DAAL_INT * n, const double * sa, double * sx, const DAAL_INT * incx)
{
int old_nthr = mkl_set_num_threads_local(1);
__DAAL_MKLFN_CALL_LAPACK(drscl, ((MKL_INT *)n, sa, sx, (MKL_INT *)incx));
mkl_set_num_threads_local(old_nthr);
}
};

/*
Expand Down Expand Up @@ -461,6 +487,20 @@ struct MklLapack<float, cpu>
mkl_set_num_threads_local(old_nthr);
}

static void xsyev(const char * jobz, const char * uplo, const DAAL_INT * n, float * a, const DAAL_INT * lda, float * w, float * work,
DAAL_INT * lwork, DAAL_INT * info)
{
__DAAL_MKLFN_CALL_LAPACK(ssyev, (jobz, uplo, (MKL_INT *)n, a, (MKL_INT *)lda, w, work, (MKL_INT *)lwork, (MKL_INT *)info));
}

static void xxsyev(const char * jobz, const char * uplo, const DAAL_INT * n, float * a, const DAAL_INT * lda, float * w, float * work,
DAAL_INT * lwork, DAAL_INT * info)
{
int old_nthr = mkl_set_num_threads_local(1);
__DAAL_MKLFN_CALL_LAPACK(ssyev, (jobz, uplo, (MKL_INT *)n, a, (MKL_INT *)lda, w, work, (MKL_INT *)lwork, (MKL_INT *)info));
mkl_set_num_threads_local(old_nthr);
}

static void xormqr(char * side, char * trans, DAAL_INT * m, DAAL_INT * n, DAAL_INT * k, float * a, DAAL_INT * lda, float * tau, float * c,
DAAL_INT * ldc, float * work, DAAL_INT * lwork, DAAL_INT * info)
{
Expand All @@ -476,6 +516,18 @@ struct MklLapack<float, cpu>
(MKL_INT *)lwork, (MKL_INT *)info));
mkl_set_num_threads_local(old_nthr);
}

static void xrscl(const DAAL_INT * n, const float * sa, float * sx, const DAAL_INT * incx)
{
__DAAL_MKLFN_CALL_LAPACK(srscl, ((MKL_INT *)n, sa, sx, (MKL_INT *)incx));
}

static void xxrscl(const DAAL_INT * n, const float * sa, float * sx, const DAAL_INT * incx)
{
int old_nthr = mkl_set_num_threads_local(1);
__DAAL_MKLFN_CALL_LAPACK(srscl, ((MKL_INT *)n, sa, sx, (MKL_INT *)incx));
mkl_set_num_threads_local(old_nthr);
}
};

} // namespace mkl
Expand Down
Loading
Loading