diff --git a/c++/nda/lapack.hpp b/c++/nda/lapack.hpp index 7e74c4014..7186f1fc9 100644 --- a/c++/nda/lapack.hpp +++ b/c++/nda/lapack.hpp @@ -33,7 +33,10 @@ namespace nda::lapack { #include "lapack/gelss.hpp" #include "lapack/gesvd.hpp" +#include "lapack/geqp3.hpp" #include "lapack/getrf.hpp" #include "lapack/getri.hpp" #include "lapack/getrs.hpp" #include "lapack/gtsv.hpp" +#include "lapack/orgqr.hpp" +#include "lapack/ungqr.hpp" diff --git a/c++/nda/lapack/geqp3.hpp b/c++/nda/lapack/geqp3.hpp new file mode 100644 index 000000000..073aba430 --- /dev/null +++ b/c++/nda/lapack/geqp3.hpp @@ -0,0 +1,88 @@ +// Copyright (c) 2020-2022 Simons Foundation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0.txt +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// Authors: Olivier Parcollet, Nils Wentzell, Jason Kaye + +#pragma once + +#include "../lapack.hpp" +#include "nda/concepts.hpp" + +namespace nda::lapack { + + /** + * Computes a QR factorization with column pivoting of an M-by-N matrix A: + * + * A*P = Q*R + * + * using Level 3 BLAS. + * + * [in,out] a is real/complex array, dimension (LDA,N) + * On entry, the M-by-N matrix A. On exit, the upper triangle of a + * contains the min(M,N)-by-N upper triangular matrix R; the + * elements below the diagonal, together with the length min(M,N) + * vector tau, represent the unitary matrix Q as a product of + * min(M,N) elementary reflectors. + * + * [in,out] jpvt is integer array, dimension (N) + * On entry, if jpvt(j).ne.0, the j-th column of A is permuted to + * the front of A*P (a leading column); if jpvt(j)=0, the j-th + * column of A is a free column. On exit, if JPVT(j)=k, then the + * j-th column of A*P was the the k-th column of A. + * + * [out] tau is a real/complex array, dimension (min(M,N)) + * The scalar factors of the elementary reflectors. + * + * [return] info is INTEGER + * = 0: successful exit. + * < 0: if INFO = -i, the i-th argument had an illegal value. + * + * @note If one wishes to carry out the column pivoted QR algorithm, the array + * \p jpvt must be initialized to zero; see the explanation above of the + * argument jpvt. + */ + template + requires(mem::on_host and is_blas_lapack_v> and have_same_value_type_v + and mem::have_compatible_addr_space) + int geqp3(A &&a, JPVT &&jpvt, TAU &&tau) { + static_assert(has_F_layout, "C order not implemented"); + static_assert(std::is_same_v, int>, "Pivoting array must have elements of type int"); + static_assert(mem::have_host_compatible_addr_space, + "geqp3 is only implemented on the CPU, but was provided non-host compatible array"); + + using T = get_value_t; + auto [m, n] = a.shape(); + auto rwork = array(2 * n); + EXPECTS(tau.size() >= std::min(m, n)); + + // Must be lapack compatible + EXPECTS(a.indexmap().min_stride() == 1); + EXPECTS(jpvt.indexmap().min_stride() == 1); + EXPECTS(tau.indexmap().min_stride() == 1); + + // First call to get the optimal buffersize + T bufferSize_T{}; + int info = 0; + lapack::f77::geqp3(m, n, a.data(), get_ld(a), jpvt.data(), tau.data(), &bufferSize_T, -1, rwork.data(), info); + int bufferSize = static_cast(std::ceil(std::real(bufferSize_T))); + + // Allocate work buffer and perform actual library call + nda::array>> work(bufferSize); + lapack::f77::geqp3(m, n, a.data(), get_ld(a), jpvt.data(), tau.data(), work.data(), bufferSize, rwork.data(), info); + + if (info) NDA_RUNTIME_ERROR << "Error in geqp3 : info = " << info; + return info; + } + +} // namespace nda::lapack diff --git a/c++/nda/lapack/interface/cxx_interface.cpp b/c++/nda/lapack/interface/cxx_interface.cpp index 4b2b85664..b4ed40818 100644 --- a/c++/nda/lapack/interface/cxx_interface.cpp +++ b/c++/nda/lapack/interface/cxx_interface.cpp @@ -47,6 +47,22 @@ namespace nda::lapack::f77 { LAPACK_zgesvd(&JOBU, &JOBVT, &M, &N, A, &LDA, S, U, &LDU, VT, &LDVT, WORK, &LWORK, RWORK, &INFO); } + void geqp3(int M, int N, double *A, int LDA, int *JPVT, double *TAU, double *WORK, int LWORK, [[maybe_unused]] double *RWORK, int &INFO) { + LAPACK_dgeqp3(&M, &N, A, &LDA, JPVT, TAU, WORK, &LWORK, &INFO); + } + void geqp3(int M, int N, std::complex *A, int LDA, int *JPVT, std::complex *TAU, std::complex *WORK, int LWORK, + double *RWORK, int &INFO) { + LAPACK_zgeqp3(&M, &N, A, &LDA, JPVT, TAU, WORK, &LWORK, RWORK, &INFO); + } + + void orgqr(int M, int N, int K, double *A, int LDA, double *TAU, double *WORK, int LWORK, int &INFO) { + LAPACK_dorgqr(&M, &N, &K, A, &LDA, TAU, WORK, &LWORK, &INFO); + } + + void ungqr(int M, int N, int K, std::complex *A, int LDA, std::complex *TAU, std::complex *WORK, int LWORK, int &INFO) { + LAPACK_zungqr(&M, &N, &K, A, &LDA, TAU, WORK, &LWORK, &INFO); + } + void getrf(int M, int N, double *A, int LDA, int *ipiv, int &info) { LAPACK_dgetrf(&M, &N, A, &LDA, ipiv, &info); } void getrf(int M, int N, std::complex *A, int LDA, int *ipiv, int &info) { LAPACK_zgetrf(&M, &N, A, &LDA, ipiv, &info); } diff --git a/c++/nda/lapack/interface/cxx_interface.hpp b/c++/nda/lapack/interface/cxx_interface.hpp index 36e42373f..07b27cd01 100644 --- a/c++/nda/lapack/interface/cxx_interface.hpp +++ b/c++/nda/lapack/interface/cxx_interface.hpp @@ -34,6 +34,14 @@ namespace nda::lapack::f77 { void gesvd(char JOBU, char JOBVT, int M, int N, std::complex *A, int LDA, double *S, std::complex *U, int LDU, std::complex *VT, int LDVT, std::complex *WORK, int LWORK, double *RWORK, int &INFO); + void geqp3(int M, int N, double *A, int LDA, int *JPVT, double *TAU, double *WORK, int LWORK, double *RWORK, int &INFO); + void geqp3(int M, int N, std::complex *A, int LDA, int *JPVT, std::complex *TAU, std::complex *WORK, int LWORK, + double *RWORK, int &INFO); + + void orgqr(int M, int N, int K, double *A, int LDA, double *TAU, double *WORK, int LWORK, int &INFO); + + void ungqr(int M, int N, int K, std::complex *A, int LDA, std::complex *TAU, std::complex *WORK, int LWORK, int &INFO); + void getrf(int M, int N, double *A, int LDA, int *ipiv, int &info); void getrf(int M, int N, std::complex *A, int LDA, int *ipiv, int &info); diff --git a/c++/nda/lapack/orgqr.hpp b/c++/nda/lapack/orgqr.hpp new file mode 100644 index 000000000..51ba3908d --- /dev/null +++ b/c++/nda/lapack/orgqr.hpp @@ -0,0 +1,78 @@ +// Copyright (c) 2020-2022 Simons Foundation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0.txt +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// Authors: Olivier Parcollet, Nils Wentzell, Jason Kaye + +#pragma once + +#include "../lapack.hpp" +#include "nda/concepts.hpp" + +namespace nda::lapack { + + /** + * Generates an M-by-N real matrix Q with orthonormal columns, which is + * defined as the first N columns of a product of K elementary reflectors of + * order M: + * + * Q = H(1) H(2) . . . H(k) + * + * as returned by GEQRF with real M-by-N matrix A. + * + * [in,out] a is real array, dimension (LDA,N) + * On entry, the i-th column must contain the vector which + * defines the elementary reflector H(i), for i = 1,2,...,k, as + * returned by GEQRF or GEQP3 in the first k columns of its array + * argument A. + * On exit, the M-by-N matrix Q. + * + * [in] tau is a real array, dimension (K) + * TAU(i) must contain the scalar factor of the elementary + * reflector H(i), as returned by GEQRF or GEQP3. + * + * [return] info is INTEGER + * = 0: successful exit. + * < 0: if INFO = -i, the i-th argument had an illegal value. + */ + template + requires(mem::on_host and std::is_same_v> and have_same_value_type_v + and mem::have_compatible_addr_space) + int orgqr(A &&a, TAU &&tau) { + static_assert(has_F_layout, "C order not implemented"); + static_assert(mem::have_host_compatible_addr_space, + "orgqr is only implemented on the CPU, but was provided but was provided non-host compatible array"); + + using T = get_value_t; + auto [m, n] = a.shape(); + auto k = tau.size(); + + // Must be lapack compatible + EXPECTS(a.indexmap().min_stride() == 1); + EXPECTS(tau.indexmap().min_stride() == 1); + + // First call to get the optimal buffersize + T bufferSize_T{}; + int info = 0; + lapack::f77::orgqr(m, std::min(m, n), k, a.data(), get_ld(a), tau.data(), &bufferSize_T, -1, info); + int bufferSize = static_cast(std::ceil(std::real(bufferSize_T))); + + // Allocate work buffer and perform actual library call + nda::array>> work(bufferSize); + lapack::f77::orgqr(m, std::min(m, n), k, a.data(), get_ld(a), tau.data(), work.data(), bufferSize, info); + + if (info) NDA_RUNTIME_ERROR << "Error in orgqr : info = " << info; + return info; + } + +} // namespace nda::lapack diff --git a/c++/nda/lapack/ungqr.hpp b/c++/nda/lapack/ungqr.hpp new file mode 100644 index 000000000..ab8d91576 --- /dev/null +++ b/c++/nda/lapack/ungqr.hpp @@ -0,0 +1,77 @@ +// Copyright (c) 2020-2022 Simons Foundation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0.txt +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// Authors: Olivier Parcollet, Nils Wentzell, Jason Kaye + +#pragma once + +#include "../lapack.hpp" +#include "nda/concepts.hpp" + +namespace nda::lapack { + + /** + * Generates an M-by-N complex matrix Q with orthonormal columns, which is + * defined as the first N columns of a product of K elementary reflectors of + * order M: + * + * Q = H(1) H(2) . . . H(k) + * + * as returned by GEQRF with complex M-by-N matrix A. + * + * [in,out] a is complex array, dimension (LDA,N) + * On entry, the i-th column must contain the vector which + * defines the elementary reflector H(i), for i = 1,2,...,k, as + * returned by GEQRF or GEQP3 in the first k columns of its array + * argument A. On exit, the M-by-N matrix Q. + * + * [in] tau is a complex array, dimension (K) + * TAU(i) must contain the scalar factor of the elementary + * reflector H(i), as returned by GEQRF or GEQP3. + * + * [return] info is INTEGER + * = 0: successful exit. + * < 0: if INFO = -i, the i-th argument had an illegal value. + */ + template + requires(mem::on_host and std::is_same_v, get_value_t> and have_same_value_type_v + and mem::have_compatible_addr_space) + int ungqr(A &&a, TAU &&tau) { + static_assert(has_F_layout, "C order not implemented"); + static_assert(mem::have_host_compatible_addr_space, + "ungqr is only implemented on the CPU, but was provided but was provided non-host compatible array"); + + using T = get_value_t; + auto [m, n] = a.shape(); + auto k = tau.size(); + + // Must be lapack compatible + EXPECTS(a.indexmap().min_stride() == 1); + EXPECTS(tau.indexmap().min_stride() == 1); + + // First call to get the optimal buffersize + T bufferSize_T{}; + int info = 0; + lapack::f77::ungqr(m, std::min(m, n), k, a.data(), get_ld(a), tau.data(), &bufferSize_T, -1, info); + int bufferSize = static_cast(std::ceil(std::real(bufferSize_T))); + + // Allocate work buffer and perform actual library call + nda::array>> work(bufferSize); + lapack::f77::ungqr(m, std::min(m, n), k, a.data(), get_ld(a), tau.data(), work.data(), bufferSize, info); + + if (info) NDA_RUNTIME_ERROR << "Error in ungqr : info = " << info; + return info; + } + +} // namespace nda::lapack diff --git a/test/c++/nda_lapack.cpp b/test/c++/nda_lapack.cpp index b00efbb89..0445c1f7b 100644 --- a/test/c++/nda_lapack.cpp +++ b/test/c++/nda_lapack.cpp @@ -140,11 +140,52 @@ void test_gesvd() { //NOLINT TEST(lapack, gesvd) { test_gesvd(); } //NOLINT TEST(lapack, zgesvd) { test_gesvd(); } //NOLINT +// ==================================== geqp3 & orgqr/ungqr ==================================== + +template +void test_geqp3() { //NOLINT + using matrix_t = matrix; + + auto A = matrix_t{{{1, 1, 1}, {3, 2, 4}, {5, 3, 2}, {2, 4, 5}, {4, 5, 3}}}; + if (wide) A = matrix_t{transpose(A)}; + + auto [M, N] = A.shape(); + + auto jpvt = nda::zeros(N); + auto tau = nda::vector(std::min(M, N)); + + auto Q = matrix_t{A}; + lapack::geqp3(Q, jpvt, tau); + + // Compute A*P by permuting columns of A + auto AP = matrix_t{A}; + for (int j = 0; j < N; ++j) { AP(_, j) = A(_, jpvt(j) - 1); } + + // Extract upper triangular matrix R + auto R = nda::matrix::zeros(std::min(M, N), N); + for (int i = 0; i < std::min(M, N); ++i) { + for (int j = i; j < N; ++j) { R(i, j) = Q(i, j); } + } + + // Extract matrix Q with orthonormal columns + if constexpr (std::is_same_v) { + lapack::orgqr(Q, tau); + } else { + lapack::ungqr(Q, tau); + } + + EXPECT_ARRAY_NEAR(AP, Q(_, range(std::min(M, N))) * R, 1e-14); +} +TEST(lapack, geqp3_tall) { test_geqp3(); } //NOLINT +TEST(lapack, zgeqp3_tall) { test_geqp3(); } //NOLINT +TEST(lapack, geqp3_wide) { test_geqp3(); } //NOLINT +TEST(lapack, zgeqp3_wide) { test_geqp3(); } //NOLINT + // =================================== gelss ======================================= template void test_gelss() { - // Cf. http://www.netlib.org/lapack/explore-html/d3/d77/example___d_g_e_l_s__colmajor_8c_source.html + // Cf. https://www.netlib.org/lapack/lapack-3.9.0/LAPACKE/example/example_DGELS_colmajor.c auto A = matrix{{1, 1, 1}, {2, 3, 4}, {3, 5, 2}, {4, 2, 5}, {5, 4, 3}}; auto B = matrix{{-10, -3}, {12, 14}, {14, 12}, {16, 16}, {18, 16}}; auto Bvec = vector{-10, 12, 14, 16, 18}; // For testing vector right hand side