diff --git a/c++/nda/lapack.hpp b/c++/nda/lapack.hpp
index 7e74c4014..7186f1fc9 100644
--- a/c++/nda/lapack.hpp
+++ b/c++/nda/lapack.hpp
@@ -33,7 +33,10 @@ namespace nda::lapack {
 
 #include "lapack/gelss.hpp"
 #include "lapack/gesvd.hpp"
+#include "lapack/geqp3.hpp"
 #include "lapack/getrf.hpp"
 #include "lapack/getri.hpp"
 #include "lapack/getrs.hpp"
 #include "lapack/gtsv.hpp"
+#include "lapack/orgqr.hpp"
+#include "lapack/ungqr.hpp"
diff --git a/c++/nda/lapack/geqp3.hpp b/c++/nda/lapack/geqp3.hpp
new file mode 100644
index 000000000..073aba430
--- /dev/null
+++ b/c++/nda/lapack/geqp3.hpp
@@ -0,0 +1,88 @@
+// Copyright (c) 2020-2022 Simons Foundation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0.txt
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Authors: Olivier Parcollet, Nils Wentzell, Jason Kaye
+
+#pragma once
+
+#include "../lapack.hpp"
+#include "nda/concepts.hpp"
+
+namespace nda::lapack {
+
+  /**
+   * Computes a QR factorization with column pivoting of an M-by-N matrix A:
+   *
+   * A*P = Q*R
+   *
+   * using Level 3 BLAS.
+   *
+   * [in,out]  a is real/complex array, dimension (LDA,N)
+   *           On entry, the M-by-N matrix A.  On exit, the upper triangle of a
+   *           contains the min(M,N)-by-N upper triangular matrix R; the
+   *           elements below the diagonal, together with the length min(M,N)
+   *           vector tau, represent the unitary matrix Q as a product of
+   *           min(M,N) elementary reflectors.
+   *
+   * [in,out]  jpvt is integer array, dimension (N)
+   *           On entry, if jpvt(j).ne.0, the j-th column of A is permuted to
+   *           the front of A*P (a leading column); if jpvt(j)=0, the j-th
+   *           column of A is a free column.  On exit, if JPVT(j)=k, then the
+   *           j-th column of A*P was the the k-th column of A.
+   *
+   * [out]     tau is a real/complex array, dimension (min(M,N))
+   *           The scalar factors of the elementary reflectors.
+   *
+   * [return]  info is INTEGER
+   *           = 0: successful exit. 
+   *           < 0: if INFO = -i, the i-th argument had an illegal value.
+   *
+   * @note If one wishes to carry out the column pivoted QR algorithm, the array
+   * \p jpvt must be initialized to zero; see the explanation above of the
+   * argument jpvt.
+   */
+  template <MemoryMatrix A, MemoryVector JPVT, MemoryVector TAU>
+    requires(mem::on_host<A> and is_blas_lapack_v<get_value_t<A>> and have_same_value_type_v<A, TAU>
+             and mem::have_compatible_addr_space<A, JPVT, TAU>)
+  int geqp3(A &&a, JPVT &&jpvt, TAU &&tau) {
+    static_assert(has_F_layout<A>, "C order not implemented");
+    static_assert(std::is_same_v<get_value_t<JPVT>, int>, "Pivoting array must have elements of type int");
+    static_assert(mem::have_host_compatible_addr_space<A, JPVT, TAU>,
+                  "geqp3 is only implemented on the CPU, but was provided non-host compatible array");
+
+    using T     = get_value_t<A>;
+    auto [m, n] = a.shape();
+    auto rwork  = array<double, 1>(2 * n);
+    EXPECTS(tau.size() >= std::min(m, n));
+
+    // Must be lapack compatible
+    EXPECTS(a.indexmap().min_stride() == 1);
+    EXPECTS(jpvt.indexmap().min_stride() == 1);
+    EXPECTS(tau.indexmap().min_stride() == 1);
+
+    // First call to get the optimal buffersize
+    T bufferSize_T{};
+    int info = 0;
+    lapack::f77::geqp3(m, n, a.data(), get_ld(a), jpvt.data(), tau.data(), &bufferSize_T, -1, rwork.data(), info);
+    int bufferSize = static_cast<int>(std::ceil(std::real(bufferSize_T)));
+
+    // Allocate work buffer and perform actual library call
+    nda::array<T, 1, C_layout, heap<mem::get_addr_space<A>>> work(bufferSize);
+    lapack::f77::geqp3(m, n, a.data(), get_ld(a), jpvt.data(), tau.data(), work.data(), bufferSize, rwork.data(), info);
+
+    if (info) NDA_RUNTIME_ERROR << "Error in geqp3 : info = " << info;
+    return info;
+  }
+
+} // namespace nda::lapack
diff --git a/c++/nda/lapack/interface/cxx_interface.cpp b/c++/nda/lapack/interface/cxx_interface.cpp
index 4b2b85664..b4ed40818 100644
--- a/c++/nda/lapack/interface/cxx_interface.cpp
+++ b/c++/nda/lapack/interface/cxx_interface.cpp
@@ -47,6 +47,22 @@ namespace nda::lapack::f77 {
     LAPACK_zgesvd(&JOBU, &JOBVT, &M, &N, A, &LDA, S, U, &LDU, VT, &LDVT, WORK, &LWORK, RWORK, &INFO);
   }
 
+  void geqp3(int M, int N, double *A, int LDA, int *JPVT, double *TAU, double *WORK, int LWORK, [[maybe_unused]] double *RWORK, int &INFO) {
+    LAPACK_dgeqp3(&M, &N, A, &LDA, JPVT, TAU, WORK, &LWORK, &INFO);
+  }
+  void geqp3(int M, int N, std::complex<double> *A, int LDA, int *JPVT, std::complex<double> *TAU, std::complex<double> *WORK, int LWORK,
+             double *RWORK, int &INFO) {
+    LAPACK_zgeqp3(&M, &N, A, &LDA, JPVT, TAU, WORK, &LWORK, RWORK, &INFO);
+  }
+
+  void orgqr(int M, int N, int K, double *A, int LDA, double *TAU, double *WORK, int LWORK, int &INFO) {
+    LAPACK_dorgqr(&M, &N, &K, A, &LDA, TAU, WORK, &LWORK, &INFO);
+  }
+
+  void ungqr(int M, int N, int K, std::complex<double> *A, int LDA, std::complex<double> *TAU, std::complex<double> *WORK, int LWORK, int &INFO) {
+    LAPACK_zungqr(&M, &N, &K, A, &LDA, TAU, WORK, &LWORK, &INFO);
+  }
+
   void getrf(int M, int N, double *A, int LDA, int *ipiv, int &info) { LAPACK_dgetrf(&M, &N, A, &LDA, ipiv, &info); }
   void getrf(int M, int N, std::complex<double> *A, int LDA, int *ipiv, int &info) { LAPACK_zgetrf(&M, &N, A, &LDA, ipiv, &info); }
 
diff --git a/c++/nda/lapack/interface/cxx_interface.hpp b/c++/nda/lapack/interface/cxx_interface.hpp
index 36e42373f..07b27cd01 100644
--- a/c++/nda/lapack/interface/cxx_interface.hpp
+++ b/c++/nda/lapack/interface/cxx_interface.hpp
@@ -34,6 +34,14 @@ namespace nda::lapack::f77 {
   void gesvd(char JOBU, char JOBVT, int M, int N, std::complex<double> *A, int LDA, double *S, std::complex<double> *U, int LDU,
              std::complex<double> *VT, int LDVT, std::complex<double> *WORK, int LWORK, double *RWORK, int &INFO);
 
+  void geqp3(int M, int N, double *A, int LDA, int *JPVT, double *TAU, double *WORK, int LWORK, double *RWORK, int &INFO);
+  void geqp3(int M, int N, std::complex<double> *A, int LDA, int *JPVT, std::complex<double> *TAU, std::complex<double> *WORK, int LWORK,
+             double *RWORK, int &INFO);
+
+  void orgqr(int M, int N, int K, double *A, int LDA, double *TAU, double *WORK, int LWORK, int &INFO);
+
+  void ungqr(int M, int N, int K, std::complex<double> *A, int LDA, std::complex<double> *TAU, std::complex<double> *WORK, int LWORK, int &INFO);
+
   void getrf(int M, int N, double *A, int LDA, int *ipiv, int &info);
   void getrf(int M, int N, std::complex<double> *A, int LDA, int *ipiv, int &info);
 
diff --git a/c++/nda/lapack/orgqr.hpp b/c++/nda/lapack/orgqr.hpp
new file mode 100644
index 000000000..51ba3908d
--- /dev/null
+++ b/c++/nda/lapack/orgqr.hpp
@@ -0,0 +1,78 @@
+// Copyright (c) 2020-2022 Simons Foundation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0.txt
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Authors: Olivier Parcollet, Nils Wentzell, Jason Kaye
+
+#pragma once
+
+#include "../lapack.hpp"
+#include "nda/concepts.hpp"
+
+namespace nda::lapack {
+
+  /**
+   * Generates an M-by-N real matrix Q with orthonormal columns, which is
+   * defined as the first N columns of a product of K elementary reflectors of
+   * order M:
+   *
+   * Q  =  H(1) H(2) . . . H(k)
+   *
+   * as returned by GEQRF with real M-by-N matrix A.
+   *
+   * [in,out]  a is real array, dimension (LDA,N)
+   *           On entry, the i-th column must contain the vector which
+   *           defines the elementary reflector H(i), for i = 1,2,...,k, as
+   *           returned by GEQRF or GEQP3 in the first k columns of its array
+   *           argument A.
+   *           On exit, the M-by-N matrix Q.
+   *
+   * [in]      tau is a real array, dimension (K)
+   *           TAU(i) must contain the scalar factor of the elementary
+   *           reflector H(i), as returned by GEQRF or GEQP3.
+   *
+   * [return]  info is INTEGER
+   *           = 0: successful exit. 
+   *           < 0: if INFO = -i, the i-th argument had an illegal value.
+   */
+  template <MemoryMatrix A, MemoryVector TAU>
+    requires(mem::on_host<A> and std::is_same_v<double, get_value_t<A>> and have_same_value_type_v<A, TAU>
+             and mem::have_compatible_addr_space<A, TAU>)
+  int orgqr(A &&a, TAU &&tau) {
+    static_assert(has_F_layout<A>, "C order not implemented");
+    static_assert(mem::have_host_compatible_addr_space<A, TAU>,
+                  "orgqr is only implemented on the CPU, but was provided but was provided non-host compatible array");
+
+    using T     = get_value_t<A>;
+    auto [m, n] = a.shape();
+    auto k      = tau.size();
+
+    // Must be lapack compatible
+    EXPECTS(a.indexmap().min_stride() == 1);
+    EXPECTS(tau.indexmap().min_stride() == 1);
+
+    // First call to get the optimal buffersize
+    T bufferSize_T{};
+    int info = 0;
+    lapack::f77::orgqr(m, std::min(m, n), k, a.data(), get_ld(a), tau.data(), &bufferSize_T, -1, info);
+    int bufferSize = static_cast<int>(std::ceil(std::real(bufferSize_T)));
+
+    // Allocate work buffer and perform actual library call
+    nda::array<T, 1, C_layout, heap<mem::get_addr_space<A>>> work(bufferSize);
+    lapack::f77::orgqr(m, std::min(m, n), k, a.data(), get_ld(a), tau.data(), work.data(), bufferSize, info);
+
+    if (info) NDA_RUNTIME_ERROR << "Error in orgqr : info = " << info;
+    return info;
+  }
+
+} // namespace nda::lapack
diff --git a/c++/nda/lapack/ungqr.hpp b/c++/nda/lapack/ungqr.hpp
new file mode 100644
index 000000000..ab8d91576
--- /dev/null
+++ b/c++/nda/lapack/ungqr.hpp
@@ -0,0 +1,77 @@
+// Copyright (c) 2020-2022 Simons Foundation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0.txt
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Authors: Olivier Parcollet, Nils Wentzell, Jason Kaye
+
+#pragma once
+
+#include "../lapack.hpp"
+#include "nda/concepts.hpp"
+
+namespace nda::lapack {
+
+  /**
+   * Generates an M-by-N complex matrix Q with orthonormal columns, which is
+   * defined as the first N columns of a product of K elementary reflectors of
+   * order M:
+   *
+   * Q  =  H(1) H(2) . . . H(k)
+   *
+   * as returned by GEQRF with complex M-by-N matrix A.
+   *
+   * [in,out]  a is complex array, dimension (LDA,N)
+   *           On entry, the i-th column must contain the vector which
+   *           defines the elementary reflector H(i), for i = 1,2,...,k, as
+   *           returned by GEQRF or GEQP3 in the first k columns of its array
+   *           argument A. On exit, the M-by-N matrix Q.
+   *
+   * [in]      tau is a complex array, dimension (K)
+   *           TAU(i) must contain the scalar factor of the elementary
+   *           reflector H(i), as returned by GEQRF or GEQP3.
+   *
+   * [return]  info is INTEGER
+   *           = 0: successful exit. 
+   *           < 0: if INFO = -i, the i-th argument had an illegal value.
+   */
+  template <MemoryMatrix A, MemoryVector TAU>
+    requires(mem::on_host<A> and std::is_same_v<std::complex<double>, get_value_t<A>> and have_same_value_type_v<A, TAU>
+             and mem::have_compatible_addr_space<A, TAU>)
+  int ungqr(A &&a, TAU &&tau) {
+    static_assert(has_F_layout<A>, "C order not implemented");
+    static_assert(mem::have_host_compatible_addr_space<A, TAU>,
+                  "ungqr is only implemented on the CPU, but was provided but was provided non-host compatible array");
+
+    using T     = get_value_t<A>;
+    auto [m, n] = a.shape();
+    auto k      = tau.size();
+
+    // Must be lapack compatible
+    EXPECTS(a.indexmap().min_stride() == 1);
+    EXPECTS(tau.indexmap().min_stride() == 1);
+
+    // First call to get the optimal buffersize
+    T bufferSize_T{};
+    int info = 0;
+    lapack::f77::ungqr(m, std::min(m, n), k, a.data(), get_ld(a), tau.data(), &bufferSize_T, -1, info);
+    int bufferSize = static_cast<int>(std::ceil(std::real(bufferSize_T)));
+
+    // Allocate work buffer and perform actual library call
+    nda::array<T, 1, C_layout, heap<mem::get_addr_space<A>>> work(bufferSize);
+    lapack::f77::ungqr(m, std::min(m, n), k, a.data(), get_ld(a), tau.data(), work.data(), bufferSize, info);
+
+    if (info) NDA_RUNTIME_ERROR << "Error in ungqr : info = " << info;
+    return info;
+  }
+
+} // namespace nda::lapack
diff --git a/test/c++/nda_lapack.cpp b/test/c++/nda_lapack.cpp
index b00efbb89..0445c1f7b 100644
--- a/test/c++/nda_lapack.cpp
+++ b/test/c++/nda_lapack.cpp
@@ -140,11 +140,52 @@ void test_gesvd() { //NOLINT
 TEST(lapack, gesvd) { test_gesvd<double>(); }    //NOLINT
 TEST(lapack, zgesvd) { test_gesvd<dcomplex>(); } //NOLINT
 
+// ==================================== geqp3 & orgqr/ungqr ====================================
+
+template <typename value_t, bool wide = false>
+void test_geqp3() { //NOLINT
+  using matrix_t = matrix<value_t, F_layout>;
+
+  auto A = matrix_t{{{1, 1, 1}, {3, 2, 4}, {5, 3, 2}, {2, 4, 5}, {4, 5, 3}}};
+  if (wide) A = matrix_t{transpose(A)};
+
+  auto [M, N] = A.shape();
+
+  auto jpvt = nda::zeros<int>(N);
+  auto tau  = nda::vector<value_t>(std::min(M, N));
+
+  auto Q = matrix_t{A};
+  lapack::geqp3(Q, jpvt, tau);
+
+  // Compute A*P by permuting columns of A
+  auto AP = matrix_t{A};
+  for (int j = 0; j < N; ++j) { AP(_, j) = A(_, jpvt(j) - 1); }
+
+  // Extract upper triangular matrix R
+  auto R = nda::matrix<value_t, F_layout>::zeros(std::min(M, N), N);
+  for (int i = 0; i < std::min(M, N); ++i) {
+    for (int j = i; j < N; ++j) { R(i, j) = Q(i, j); }
+  }
+
+  // Extract matrix Q with orthonormal columns
+  if constexpr (std::is_same_v<value_t, double>) {
+    lapack::orgqr(Q, tau);
+  } else {
+    lapack::ungqr(Q, tau);
+  }
+
+  EXPECT_ARRAY_NEAR(AP, Q(_, range(std::min(M, N))) * R, 1e-14);
+}
+TEST(lapack, geqp3_tall) { test_geqp3<double>(); }          //NOLINT
+TEST(lapack, zgeqp3_tall) { test_geqp3<dcomplex>(); }       //NOLINT
+TEST(lapack, geqp3_wide) { test_geqp3<double, true>(); }    //NOLINT
+TEST(lapack, zgeqp3_wide) { test_geqp3<dcomplex, true>(); } //NOLINT
+
 // =================================== gelss =======================================
 
 template <typename value_t>
 void test_gelss() {
-  // Cf. http://www.netlib.org/lapack/explore-html/d3/d77/example___d_g_e_l_s__colmajor_8c_source.html
+  // Cf. https://www.netlib.org/lapack/lapack-3.9.0/LAPACKE/example/example_DGELS_colmajor.c
   auto A    = matrix<value_t>{{1, 1, 1}, {2, 3, 4}, {3, 5, 2}, {4, 2, 5}, {5, 4, 3}};
   auto B    = matrix<value_t>{{-10, -3}, {12, 14}, {14, 12}, {16, 16}, {18, 16}};
   auto Bvec = vector<value_t>{-10, 12, 14, 16, 18}; // For testing vector right hand side