Skip to content

Commit

Permalink
implemented lapack geqp3, orgqr, ungqr + test
Browse files Browse the repository at this point in the history
  • Loading branch information
jasonkaye authored and Wentzell committed Mar 4, 2024
1 parent 3983204 commit a93d4f0
Show file tree
Hide file tree
Showing 7 changed files with 312 additions and 1 deletion.
3 changes: 3 additions & 0 deletions c++/nda/lapack.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,10 @@ namespace nda::lapack {

#include "lapack/gelss.hpp"
#include "lapack/gesvd.hpp"
#include "lapack/geqp3.hpp"
#include "lapack/getrf.hpp"
#include "lapack/getri.hpp"
#include "lapack/getrs.hpp"
#include "lapack/gtsv.hpp"
#include "lapack/orgqr.hpp"
#include "lapack/ungqr.hpp"
88 changes: 88 additions & 0 deletions c++/nda/lapack/geqp3.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,88 @@
// Copyright (c) 2020-2022 Simons Foundation
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0.txt
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
// Authors: Olivier Parcollet, Nils Wentzell, Jason Kaye

#pragma once

#include "../lapack.hpp"
#include "nda/concepts.hpp"

namespace nda::lapack {

/**
* Computes a QR factorization with column pivoting of an M-by-N matrix A:
*
* A*P = Q*R
*
* using Level 3 BLAS.
*
* [in,out] a is real/complex array, dimension (LDA,N)
* On entry, the M-by-N matrix A. On exit, the upper triangle of a
* contains the min(M,N)-by-N upper triangular matrix R; the
* elements below the diagonal, together with the length min(M,N)
* vector tau, represent the unitary matrix Q as a product of
* min(M,N) elementary reflectors.
*
* [in,out] jpvt is integer array, dimension (N)
* On entry, if jpvt(j).ne.0, the j-th column of A is permuted to
* the front of A*P (a leading column); if jpvt(j)=0, the j-th
* column of A is a free column. On exit, if JPVT(j)=k, then the
* j-th column of A*P was the the k-th column of A.
*
* [out] tau is a real/complex array, dimension (min(M,N))
* The scalar factors of the elementary reflectors.
*
* [return] info is INTEGER
* = 0: successful exit.
* < 0: if INFO = -i, the i-th argument had an illegal value.
*
* @note If one wishes to carry out the column pivoted QR algorithm, the array
* \p jpvt must be initialized to zero; see the explanation above of the
* argument jpvt.
*/
template <MemoryMatrix A, MemoryVector JPVT, MemoryVector TAU>
requires(mem::on_host<A> and is_blas_lapack_v<get_value_t<A>> and have_same_value_type_v<A, TAU>
and mem::have_compatible_addr_space<A, JPVT, TAU>)
int geqp3(A &&a, JPVT &&jpvt, TAU &&tau) {
static_assert(has_F_layout<A>, "C order not implemented");
static_assert(std::is_same_v<get_value_t<JPVT>, int>, "Pivoting array must have elements of type int");
static_assert(mem::have_host_compatible_addr_space<A, JPVT, TAU>,
"geqp3 is only implemented on the CPU, but was provided non-host compatible array");

using T = get_value_t<A>;
auto [m, n] = a.shape();
auto rwork = array<double, 1>(2 * n);
EXPECTS(tau.size() >= std::min(m, n));

// Must be lapack compatible
EXPECTS(a.indexmap().min_stride() == 1);
EXPECTS(jpvt.indexmap().min_stride() == 1);
EXPECTS(tau.indexmap().min_stride() == 1);

// First call to get the optimal buffersize
T bufferSize_T{};
int info = 0;
lapack::f77::geqp3(m, n, a.data(), get_ld(a), jpvt.data(), tau.data(), &bufferSize_T, -1, rwork.data(), info);
int bufferSize = static_cast<int>(std::ceil(std::real(bufferSize_T)));

// Allocate work buffer and perform actual library call
nda::array<T, 1, C_layout, heap<mem::get_addr_space<A>>> work(bufferSize);
lapack::f77::geqp3(m, n, a.data(), get_ld(a), jpvt.data(), tau.data(), work.data(), bufferSize, rwork.data(), info);

if (info) NDA_RUNTIME_ERROR << "Error in geqp3 : info = " << info;
return info;
}

} // namespace nda::lapack
16 changes: 16 additions & 0 deletions c++/nda/lapack/interface/cxx_interface.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,22 @@ namespace nda::lapack::f77 {
LAPACK_zgesvd(&JOBU, &JOBVT, &M, &N, A, &LDA, S, U, &LDU, VT, &LDVT, WORK, &LWORK, RWORK, &INFO);
}

void geqp3(int M, int N, double *A, int LDA, int *JPVT, double *TAU, double *WORK, int LWORK, [[maybe_unused]] double *RWORK, int &INFO) {
LAPACK_dgeqp3(&M, &N, A, &LDA, JPVT, TAU, WORK, &LWORK, &INFO);
}
void geqp3(int M, int N, std::complex<double> *A, int LDA, int *JPVT, std::complex<double> *TAU, std::complex<double> *WORK, int LWORK,
double *RWORK, int &INFO) {
LAPACK_zgeqp3(&M, &N, A, &LDA, JPVT, TAU, WORK, &LWORK, RWORK, &INFO);
}

void orgqr(int M, int N, int K, double *A, int LDA, double *TAU, double *WORK, int LWORK, int &INFO) {
LAPACK_dorgqr(&M, &N, &K, A, &LDA, TAU, WORK, &LWORK, &INFO);
}

void ungqr(int M, int N, int K, std::complex<double> *A, int LDA, std::complex<double> *TAU, std::complex<double> *WORK, int LWORK, int &INFO) {
LAPACK_zungqr(&M, &N, &K, A, &LDA, TAU, WORK, &LWORK, &INFO);
}

void getrf(int M, int N, double *A, int LDA, int *ipiv, int &info) { LAPACK_dgetrf(&M, &N, A, &LDA, ipiv, &info); }
void getrf(int M, int N, std::complex<double> *A, int LDA, int *ipiv, int &info) { LAPACK_zgetrf(&M, &N, A, &LDA, ipiv, &info); }

Expand Down
8 changes: 8 additions & 0 deletions c++/nda/lapack/interface/cxx_interface.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,14 @@ namespace nda::lapack::f77 {
void gesvd(char JOBU, char JOBVT, int M, int N, std::complex<double> *A, int LDA, double *S, std::complex<double> *U, int LDU,
std::complex<double> *VT, int LDVT, std::complex<double> *WORK, int LWORK, double *RWORK, int &INFO);

void geqp3(int M, int N, double *A, int LDA, int *JPVT, double *TAU, double *WORK, int LWORK, double *RWORK, int &INFO);
void geqp3(int M, int N, std::complex<double> *A, int LDA, int *JPVT, std::complex<double> *TAU, std::complex<double> *WORK, int LWORK,
double *RWORK, int &INFO);

void orgqr(int M, int N, int K, double *A, int LDA, double *TAU, double *WORK, int LWORK, int &INFO);

void ungqr(int M, int N, int K, std::complex<double> *A, int LDA, std::complex<double> *TAU, std::complex<double> *WORK, int LWORK, int &INFO);

void getrf(int M, int N, double *A, int LDA, int *ipiv, int &info);
void getrf(int M, int N, std::complex<double> *A, int LDA, int *ipiv, int &info);

Expand Down
78 changes: 78 additions & 0 deletions c++/nda/lapack/orgqr.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,78 @@
// Copyright (c) 2020-2022 Simons Foundation
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0.txt
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
// Authors: Olivier Parcollet, Nils Wentzell, Jason Kaye

#pragma once

#include "../lapack.hpp"
#include "nda/concepts.hpp"

namespace nda::lapack {

/**
* Generates an M-by-N real matrix Q with orthonormal columns, which is
* defined as the first N columns of a product of K elementary reflectors of
* order M:
*
* Q = H(1) H(2) . . . H(k)
*
* as returned by GEQRF with real M-by-N matrix A.
*
* [in,out] a is real array, dimension (LDA,N)
* On entry, the i-th column must contain the vector which
* defines the elementary reflector H(i), for i = 1,2,...,k, as
* returned by GEQRF or GEQP3 in the first k columns of its array
* argument A.
* On exit, the M-by-N matrix Q.
*
* [in] tau is a real array, dimension (K)
* TAU(i) must contain the scalar factor of the elementary
* reflector H(i), as returned by GEQRF or GEQP3.
*
* [return] info is INTEGER
* = 0: successful exit.
* < 0: if INFO = -i, the i-th argument had an illegal value.
*/
template <MemoryMatrix A, MemoryVector TAU>
requires(mem::on_host<A> and std::is_same_v<double, get_value_t<A>> and have_same_value_type_v<A, TAU>
and mem::have_compatible_addr_space<A, TAU>)
int orgqr(A &&a, TAU &&tau) {
static_assert(has_F_layout<A>, "C order not implemented");
static_assert(mem::have_host_compatible_addr_space<A, TAU>,
"orgqr is only implemented on the CPU, but was provided but was provided non-host compatible array");

using T = get_value_t<A>;
auto [m, n] = a.shape();
auto k = tau.size();

// Must be lapack compatible
EXPECTS(a.indexmap().min_stride() == 1);
EXPECTS(tau.indexmap().min_stride() == 1);

// First call to get the optimal buffersize
T bufferSize_T{};
int info = 0;
lapack::f77::orgqr(m, std::min(m, n), k, a.data(), get_ld(a), tau.data(), &bufferSize_T, -1, info);
int bufferSize = static_cast<int>(std::ceil(std::real(bufferSize_T)));

// Allocate work buffer and perform actual library call
nda::array<T, 1, C_layout, heap<mem::get_addr_space<A>>> work(bufferSize);
lapack::f77::orgqr(m, std::min(m, n), k, a.data(), get_ld(a), tau.data(), work.data(), bufferSize, info);

if (info) NDA_RUNTIME_ERROR << "Error in orgqr : info = " << info;
return info;
}

} // namespace nda::lapack
77 changes: 77 additions & 0 deletions c++/nda/lapack/ungqr.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,77 @@
// Copyright (c) 2020-2022 Simons Foundation
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0.txt
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
// Authors: Olivier Parcollet, Nils Wentzell, Jason Kaye

#pragma once

#include "../lapack.hpp"
#include "nda/concepts.hpp"

namespace nda::lapack {

/**
* Generates an M-by-N complex matrix Q with orthonormal columns, which is
* defined as the first N columns of a product of K elementary reflectors of
* order M:
*
* Q = H(1) H(2) . . . H(k)
*
* as returned by GEQRF with complex M-by-N matrix A.
*
* [in,out] a is complex array, dimension (LDA,N)
* On entry, the i-th column must contain the vector which
* defines the elementary reflector H(i), for i = 1,2,...,k, as
* returned by GEQRF or GEQP3 in the first k columns of its array
* argument A. On exit, the M-by-N matrix Q.
*
* [in] tau is a complex array, dimension (K)
* TAU(i) must contain the scalar factor of the elementary
* reflector H(i), as returned by GEQRF or GEQP3.
*
* [return] info is INTEGER
* = 0: successful exit.
* < 0: if INFO = -i, the i-th argument had an illegal value.
*/
template <MemoryMatrix A, MemoryVector TAU>
requires(mem::on_host<A> and std::is_same_v<std::complex<double>, get_value_t<A>> and have_same_value_type_v<A, TAU>
and mem::have_compatible_addr_space<A, TAU>)
int ungqr(A &&a, TAU &&tau) {
static_assert(has_F_layout<A>, "C order not implemented");
static_assert(mem::have_host_compatible_addr_space<A, TAU>,
"ungqr is only implemented on the CPU, but was provided but was provided non-host compatible array");

using T = get_value_t<A>;
auto [m, n] = a.shape();
auto k = tau.size();

// Must be lapack compatible
EXPECTS(a.indexmap().min_stride() == 1);
EXPECTS(tau.indexmap().min_stride() == 1);

// First call to get the optimal buffersize
T bufferSize_T{};
int info = 0;
lapack::f77::ungqr(m, std::min(m, n), k, a.data(), get_ld(a), tau.data(), &bufferSize_T, -1, info);
int bufferSize = static_cast<int>(std::ceil(std::real(bufferSize_T)));

// Allocate work buffer and perform actual library call
nda::array<T, 1, C_layout, heap<mem::get_addr_space<A>>> work(bufferSize);
lapack::f77::ungqr(m, std::min(m, n), k, a.data(), get_ld(a), tau.data(), work.data(), bufferSize, info);

if (info) NDA_RUNTIME_ERROR << "Error in ungqr : info = " << info;
return info;
}

} // namespace nda::lapack
43 changes: 42 additions & 1 deletion test/c++/nda_lapack.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -140,11 +140,52 @@ void test_gesvd() { //NOLINT
TEST(lapack, gesvd) { test_gesvd<double>(); } //NOLINT
TEST(lapack, zgesvd) { test_gesvd<dcomplex>(); } //NOLINT

// ==================================== geqp3 & orgqr/ungqr ====================================

template <typename value_t, bool wide = false>
void test_geqp3() { //NOLINT
using matrix_t = matrix<value_t, F_layout>;

auto A = matrix_t{{{1, 1, 1}, {3, 2, 4}, {5, 3, 2}, {2, 4, 5}, {4, 5, 3}}};
if (wide) A = matrix_t{transpose(A)};

auto [M, N] = A.shape();

auto jpvt = nda::zeros<int>(N);
auto tau = nda::vector<value_t>(std::min(M, N));

auto Q = matrix_t{A};
lapack::geqp3(Q, jpvt, tau);

// Compute A*P by permuting columns of A
auto AP = matrix_t{A};
for (int j = 0; j < N; ++j) { AP(_, j) = A(_, jpvt(j) - 1); }

// Extract upper triangular matrix R
auto R = nda::matrix<value_t, F_layout>::zeros(std::min(M, N), N);
for (int i = 0; i < std::min(M, N); ++i) {
for (int j = i; j < N; ++j) { R(i, j) = Q(i, j); }
}

// Extract matrix Q with orthonormal columns
if constexpr (std::is_same_v<value_t, double>) {
lapack::orgqr(Q, tau);
} else {
lapack::ungqr(Q, tau);
}

EXPECT_ARRAY_NEAR(AP, Q(_, range(std::min(M, N))) * R, 1e-14);
}
TEST(lapack, geqp3_tall) { test_geqp3<double>(); } //NOLINT
TEST(lapack, zgeqp3_tall) { test_geqp3<dcomplex>(); } //NOLINT
TEST(lapack, geqp3_wide) { test_geqp3<double, true>(); } //NOLINT
TEST(lapack, zgeqp3_wide) { test_geqp3<dcomplex, true>(); } //NOLINT

// =================================== gelss =======================================

template <typename value_t>
void test_gelss() {
// Cf. http://www.netlib.org/lapack/explore-html/d3/d77/example___d_g_e_l_s__colmajor_8c_source.html
// Cf. https://www.netlib.org/lapack/lapack-3.9.0/LAPACKE/example/example_DGELS_colmajor.c
auto A = matrix<value_t>{{1, 1, 1}, {2, 3, 4}, {3, 5, 2}, {4, 2, 5}, {5, 4, 3}};
auto B = matrix<value_t>{{-10, -3}, {12, 14}, {14, 12}, {16, 16}, {18, 16}};
auto Bvec = vector<value_t>{-10, 12, 14, 16, 18}; // For testing vector right hand side
Expand Down

0 comments on commit a93d4f0

Please sign in to comment.