Mark more constexpr functions as device-available (#17545)

Contributes to #7795. Also contributes to rapidsai/build-planning#76. Authors: - Vyas Ramasubramani (https://github.com/vyasr) Approvers: - Nghia Truong (https://github.com/ttnghia) - Yunsong Wang (https://github.com/PointKernel) - Bradley Dice (https://github.com/bdice) - David Wendt (https://github.com/davidwendt) URL: #17545
rapidsai · Dec 13, 2024 · 34e2045 · 34e2045
1 parent 1a67646
commit 34e2045
Show file tree

Hide file tree

Showing 32 changed files with 302 additions and 199 deletions.
diff --git a/ci/build_docs.sh b/ci/build_docs.sh
@@ -35,6 +35,10 @@ rapids-mamba-retry install \
 
 export RAPIDS_DOCS_DIR="$(mktemp -d)"
 
+EXITCODE=0
+trap "EXITCODE=1" ERR
+set +e
+
 rapids-logger "Build CPP docs"
 pushd cpp/doxygen
 aws s3 cp s3://rapidsai-docs/librmm/html/${RAPIDS_VERSION_MAJOR_MINOR}/rmm.tag . || echo "Failed to download rmm Doxygen tag"
@@ -58,3 +62,5 @@ mv build/dirhtml/* "${RAPIDS_DOCS_DIR}/dask-cudf/html"
 popd
 
 RAPIDS_VERSION_NUMBER="${RAPIDS_VERSION_MAJOR_MINOR}" rapids-upload-docs
+
+exit ${EXITCODE}
diff --git a/cpp/include/cudf/column/column_device_view.cuh b/cpp/include/cudf/column/column_device_view.cuh
@@ -33,11 +33,13 @@
 #include <rmm/cuda_stream_view.hpp>
 
 #include <cuda/std/optional>
+#include <cuda/std/type_traits>
 #include <thrust/iterator/counting_iterator.h>
 #include <thrust/iterator/transform_iterator.h>
 #include <thrust/pair.h>
 
 #include <algorithm>
+#include <type_traits>
 
 /**
  * @file column_device_view.cuh
@@ -56,8 +58,8 @@ namespace CUDF_EXPORT cudf {
  *
  */
 struct nullate {
-  struct YES : std::bool_constant<true> {};
-  struct NO : std::bool_constant<false> {};
+  struct YES : cuda::std::bool_constant<true> {};
+  struct NO : cuda::std::bool_constant<false> {};
   /**
    * @brief `nullate::DYNAMIC` defers the determination of nullability to run time rather than
    * compile time. The calling code is responsible for specifying whether or not nulls are
@@ -80,7 +82,7 @@ struct nullate {
      * @return `true` if nulls are expected in the operation in which this object is applied,
      * otherwise false
      */
-    constexpr operator bool() const noexcept { return value; }
+    CUDF_HOST_DEVICE constexpr operator bool() const noexcept { return value; }
     bool value;  ///< True if nulls are expected
   };
 };
@@ -319,14 +321,14 @@ class alignas(16) column_device_view_base {
   }
 
   template <typename C, typename T, typename = void>
-  struct has_element_accessor_impl : std::false_type {};
+  struct has_element_accessor_impl : cuda::std::false_type {};
 
   template <typename C, typename T>
   struct has_element_accessor_impl<
     C,
     T,
-    void_t<decltype(std::declval<C>().template element<T>(std::declval<size_type>()))>>
-    : std::true_type {};
+    void_t<decltype(cuda::std::declval<C>().template element<T>(cuda::std::declval<size_type>()))>>
+    : cuda::std::true_type {};
 };
 // @cond
 // Forward declaration
@@ -534,7 +536,7 @@ class alignas(16) column_device_view : public detail::column_device_view_base {
    * @return `true` if `column_device_view::element<T>()` has a valid overload, `false` otherwise
    */
   template <typename T>
-  static constexpr bool has_element_accessor()
+  CUDF_HOST_DEVICE static constexpr bool has_element_accessor()
   {
     return has_element_accessor_impl<column_device_view, T>::value;
   }
@@ -1044,7 +1046,7 @@ class alignas(16) mutable_column_device_view : public detail::column_device_view
    * @return `true` if `mutable_column_device_view::element<T>()` has a valid overload, `false`
    */
   template <typename T>
-  static constexpr bool has_element_accessor()
+  CUDF_HOST_DEVICE static constexpr bool has_element_accessor()
   {
     return has_element_accessor_impl<mutable_column_device_view, T>::value;
   }

diff --git a/cpp/include/cudf/detail/aggregation/aggregation.cuh b/cpp/include/cudf/detail/aggregation/aggregation.cuh
@@ -36,7 +36,7 @@
 namespace cudf {
 namespace detail {
 template <typename T>
-constexpr bool is_product_supported()
+CUDF_HOST_DEVICE constexpr bool is_product_supported()
 {
   return is_numeric<T>();
 }

diff --git a/cpp/include/cudf/detail/utilities/cuda.cuh b/cpp/include/cudf/detail/utilities/cuda.cuh
@@ -74,9 +74,10 @@ class grid_1d {
    * @param num_threads_per_block The number of threads per block
    * @return thread_index_type The global thread index
    */
-  static constexpr thread_index_type global_thread_id(thread_index_type thread_id,
-                                                      thread_index_type block_id,
-                                                      thread_index_type num_threads_per_block)
+  __device__ static constexpr thread_index_type global_thread_id(
+    thread_index_type thread_id,
+    thread_index_type block_id,
+    thread_index_type num_threads_per_block)
   {
     return thread_id + block_id * num_threads_per_block;
   }
@@ -114,8 +115,8 @@ class grid_1d {
    * @param num_threads_per_block The number of threads per block
    * @return thread_index_type The global thread index
    */
-  static constexpr thread_index_type grid_stride(thread_index_type num_threads_per_block,
-                                                 thread_index_type num_blocks_per_grid)
+  __device__ static constexpr thread_index_type grid_stride(thread_index_type num_threads_per_block,
+                                                            thread_index_type num_blocks_per_grid)
   {
     return num_threads_per_block * num_blocks_per_grid;
   }

diff --git a/cpp/include/cudf/detail/utilities/device_operators.cuh b/cpp/include/cudf/detail/utilities/device_operators.cuh
@@ -29,6 +29,8 @@
 #include <cudf/utilities/error.hpp>
 #include <cudf/utilities/traits.hpp>
 
+#include <cuda/std/functional>
+
 #include <type_traits>
 
 namespace cudf {
@@ -42,7 +44,7 @@ template <typename LHS,
           std::enable_if_t<cudf::is_relationally_comparable<LHS, RHS>()>* = nullptr>
 CUDF_HOST_DEVICE inline auto min(LHS const& lhs, RHS const& rhs)
 {
-  return std::min(lhs, rhs);
+  return cuda::std::min(lhs, rhs);
 }
 
 /**
@@ -53,7 +55,7 @@ template <typename LHS,
           std::enable_if_t<cudf::is_relationally_comparable<LHS, RHS>()>* = nullptr>
 CUDF_HOST_DEVICE inline auto max(LHS const& lhs, RHS const& rhs)
 {
-  return std::max(lhs, rhs);
+  return cuda::std::max(lhs, rhs);
 }
 }  // namespace detail
 
@@ -68,20 +70,20 @@ struct DeviceSum {
   }
 
   template <typename T, std::enable_if_t<cudf::is_timestamp<T>()>* = nullptr>
-  static constexpr T identity()
+  CUDF_HOST_DEVICE static constexpr T identity()
   {
     return T{typename T::duration{0}};
   }
 
   template <typename T,
             std::enable_if_t<!cudf::is_timestamp<T>() && !cudf::is_fixed_point<T>()>* = nullptr>
-  static constexpr T identity()
+  CUDF_HOST_DEVICE static constexpr T identity()
   {
     return T{0};
   }
 
   template <typename T, std::enable_if_t<cudf::is_fixed_point<T>()>* = nullptr>
-  static constexpr T identity()
+  CUDF_HOST_DEVICE static constexpr T identity()
   {
 #ifndef __CUDA_ARCH__
     CUDF_FAIL("fixed_point does not yet support device operator identity");
@@ -109,7 +111,7 @@ struct DeviceCount {
   }
 
   template <typename T>
-  static constexpr T identity()
+  CUDF_HOST_DEVICE static constexpr T identity()
   {
     return T{};
   }
@@ -129,7 +131,7 @@ struct DeviceMin {
   template <typename T,
             std::enable_if_t<!std::is_same_v<T, cudf::string_view> && !cudf::is_dictionary<T>() &&
                              !cudf::is_fixed_point<T>()>* = nullptr>
-  static constexpr T identity()
+  CUDF_HOST_DEVICE static constexpr T identity()
   {
     // chrono types do not have std::numeric_limits specializations and should use T::max()
     // https://eel.is/c++draft/numeric.limits.general#6
@@ -143,7 +145,7 @@ struct DeviceMin {
   }
 
   template <typename T, std::enable_if_t<cudf::is_fixed_point<T>()>* = nullptr>
-  static constexpr T identity()
+  CUDF_HOST_DEVICE static constexpr T identity()
   {
 #ifndef __CUDA_ARCH__
     CUDF_FAIL("fixed_point does not yet support DeviceMin identity");
@@ -161,7 +163,7 @@ struct DeviceMin {
   }
 
   template <typename T, std::enable_if_t<cudf::is_dictionary<T>()>* = nullptr>
-  static constexpr T identity()
+  CUDF_HOST_DEVICE static constexpr T identity()
   {
     return static_cast<T>(T::max_value());
   }
@@ -181,7 +183,7 @@ struct DeviceMax {
   template <typename T,
             std::enable_if_t<!std::is_same_v<T, cudf::string_view> && !cudf::is_dictionary<T>() &&
                              !cudf::is_fixed_point<T>()>* = nullptr>
-  static constexpr T identity()
+  CUDF_HOST_DEVICE static constexpr T identity()
   {
     // chrono types do not have std::numeric_limits specializations and should use T::min()
     // https://eel.is/c++draft/numeric.limits.general#6
@@ -195,7 +197,7 @@ struct DeviceMax {
   }
 
   template <typename T, std::enable_if_t<cudf::is_fixed_point<T>()>* = nullptr>
-  static constexpr T identity()
+  CUDF_HOST_DEVICE static constexpr T identity()
   {
 #ifndef __CUDA_ARCH__
     CUDF_FAIL("fixed_point does not yet support DeviceMax identity");
@@ -212,7 +214,7 @@ struct DeviceMax {
   }
 
   template <typename T, std::enable_if_t<cudf::is_dictionary<T>()>* = nullptr>
-  static constexpr T identity()
+  CUDF_HOST_DEVICE static constexpr T identity()
   {
     return static_cast<T>(T::lowest_value());
   }
@@ -229,13 +231,13 @@ struct DeviceProduct {
   }
 
   template <typename T, std::enable_if_t<!cudf::is_fixed_point<T>()>* = nullptr>
-  static constexpr T identity()
+  CUDF_HOST_DEVICE static constexpr T identity()
   {
     return T{1};
   }
 
   template <typename T, std::enable_if_t<cudf::is_fixed_point<T>()>* = nullptr>
-  static constexpr T identity()
+  CUDF_HOST_DEVICE static constexpr T identity()
   {
 #ifndef __CUDA_ARCH__
     CUDF_FAIL("fixed_point does not yet support DeviceProduct identity");

diff --git a/cpp/include/cudf/detail/utilities/integer_utils.hpp b/cpp/include/cudf/detail/utilities/integer_utils.hpp
@@ -86,7 +86,7 @@ constexpr S round_down_safe(S number_to_round, S modulus) noexcept
  * `modulus` is positive and does not check for overflow.
  */
 template <typename S>
-constexpr S round_up_unsafe(S number_to_round, S modulus) noexcept
+CUDF_HOST_DEVICE constexpr S round_up_unsafe(S number_to_round, S modulus) noexcept
 {
   auto remainder = number_to_round % modulus;
   if (remainder == 0) { return number_to_round; }
@@ -187,7 +187,7 @@ constexpr bool is_a_power_of_two(I val) noexcept
  * @return Absolute value if value type is signed.
  */
 template <typename T>
-constexpr auto absolute_value(T value) -> T
+CUDF_HOST_DEVICE constexpr auto absolute_value(T value) -> T
 {
   if constexpr (cuda::std::is_signed<T>()) return numeric::detail::abs(value);
   return value;

diff --git a/cpp/include/cudf/fixed_point/detail/floating_conversion.hpp b/cpp/include/cudf/fixed_point/detail/floating_conversion.hpp
@@ -22,6 +22,7 @@
 #include <cuda/std/cmath>
 #include <cuda/std/limits>
 #include <cuda/std/type_traits>
+#include <cuda/std/utility>
 
 #include <cstring>
 
@@ -183,7 +184,7 @@ struct floating_converter {
    * @param integer_rep The bit-casted floating value to extract the exponent from
    * @return The stored base-2 exponent and significand, shifted for denormals
    */
-  CUDF_HOST_DEVICE inline static std::pair<IntegralType, int> get_significand_and_pow2(
+  CUDF_HOST_DEVICE inline static cuda::std::pair<IntegralType, int> get_significand_and_pow2(
     IntegralType integer_rep)
   {
     // Extract the significand
@@ -1008,7 +1009,7 @@ CUDF_HOST_DEVICE inline auto shift_to_binary_pospow(DecimalRep decimal_rep, int
   }
 
   // Our shifting_rep is now the integer mantissa, return it and the powers of 2
-  return std::pair{shifting_rep, pow2};
+  return cuda::std::pair{shifting_rep, pow2};
 }
 
 /**
@@ -1075,7 +1076,7 @@ CUDF_HOST_DEVICE inline auto shift_to_binary_negpow(DecimalRep decimal_rep, int
   }
 
   // Our shifting_rep is now the integer mantissa, return it and the powers of 2
-  return std::pair{shifting_rep, pow2};
+  return cuda::std::pair{shifting_rep, pow2};
 }
 
 /**

diff --git a/cpp/include/cudf/hashing/detail/hash_functions.cuh b/cpp/include/cudf/hashing/detail/hash_functions.cuh
@@ -18,7 +18,8 @@
 
 #include <cudf/utilities/traits.hpp>
 
-#include <limits>
+#include <cuda/std/cmath>
+#include <cuda/std/limits>
 
 namespace cudf::hashing::detail {
 
@@ -29,7 +30,7 @@ template <typename T>
 T __device__ inline normalize_nans(T const& key)
 {
   if constexpr (cudf::is_floating_point<T>()) {
-    if (std::isnan(key)) { return std::numeric_limits<T>::quiet_NaN(); }
+    if (cuda::std::isnan(key)) { return cuda::std::numeric_limits<T>::quiet_NaN(); }
   }
   return key;
 }

diff --git a/cpp/include/cudf/hashing/detail/hashing.hpp b/cpp/include/cudf/hashing/detail/hashing.hpp
@@ -82,7 +82,7 @@ std::unique_ptr<column> xxhash_64(table_view const& input,
  * @param rhs The second hash value
  * @return Combined hash value
  */
-constexpr uint32_t hash_combine(uint32_t lhs, uint32_t rhs)
+CUDF_HOST_DEVICE constexpr uint32_t hash_combine(uint32_t lhs, uint32_t rhs)
 {
   return lhs ^ (rhs + 0x9e37'79b9 + (lhs << 6) + (lhs >> 2));
 }