Merge branch 'main' into staging/1.7.0

NVIDIA · Nov 18, 2021 · ce19c2a · ce19c2a
2 parents fdca6d7 + f443272
commit ce19c2a
Show file tree

Hide file tree

Showing 7 changed files with 24 additions and 21 deletions.
diff --git a/include/cuda/std/atomic b/include/cuda/std/atomic
@@ -37,6 +37,10 @@
     #undef ATOMIC_VAR_INIT
 #endif //__CUDACC_RTC__
 
+// pre-define lock free query for heterogeneous compatibility
+#ifndef _LIBCUDACXX_ATOMIC_IS_LOCK_FREE
+#define _LIBCUDACXX_ATOMIC_IS_LOCK_FREE(__x) (__x <= 8)
+#endif
 
 #include "cassert"
 #include "cstddef"

diff --git a/include/cuda/std/detail/libcxx/include/__config b/include/cuda/std/detail/libcxx/include/__config
@@ -1621,8 +1621,6 @@ extern "C" _LIBCUDACXX_FUNC_VIS void __sanitizer_annotate_contiguous_container(
 #  define _LIBCUDACXX_HAS_MSVC_ATOMIC_IMPL
 #endif
 
-#define _LIBCUDACXX_NO_RUNTIME_LOCK_FREE
-
 // CUDA Atomics supersede host atomics in order to insert the host/device dispatch layer
 #if defined(_LIBCUDACXX_COMPILER_NVCC) || defined(_LIBCUDACXX_COMPILER_NVRTC) || defined(_LIBCUDACXX_COMPILER_PGI)
 #  define _LIBCUDACXX_HAS_CUDA_ATOMIC_IMPL

diff --git a/include/cuda/std/detail/libcxx/include/atomic b/include/cuda/std/detail/libcxx/include/atomic
@@ -710,7 +710,6 @@ using __detail::__cxx_atomic_fetch_sub;
 using __detail::__cxx_atomic_fetch_or;
 using __detail::__cxx_atomic_fetch_and;
 using __detail::__cxx_atomic_fetch_xor;
-using __detail::__cxx_atomic_is_lock_free;
 
 template <class _Tp>
 _LIBCUDACXX_INLINE_VISIBILITY
@@ -1260,7 +1259,7 @@ struct __atomic_base {
 
     _LIBCUDACXX_INLINE_VISIBILITY
     bool is_lock_free() const volatile _NOEXCEPT
-        {return __cxx_atomic_is_lock_free(sizeof(_Tp));}
+        {return _LIBCUDACXX_ATOMIC_IS_LOCK_FREE(sizeof(_Tp));}
     _LIBCUDACXX_INLINE_VISIBILITY
     bool is_lock_free() const _NOEXCEPT
         {return static_cast<__atomic_base const volatile*>(this)->is_lock_free();}
@@ -1385,7 +1384,7 @@ struct __atomic_base_ref {
 
     _LIBCUDACXX_INLINE_VISIBILITY
     bool is_lock_free() const volatile _NOEXCEPT
-        {return __cxx_atomic_is_lock_free(sizeof(_Tp));}
+        {return _LIBCUDACXX_ATOMIC_IS_LOCK_FREE(sizeof(_Tp));}
     _LIBCUDACXX_INLINE_VISIBILITY
     bool is_lock_free() const _NOEXCEPT
         {return static_cast<__atomic_base_ref const volatile*>(this)->is_lock_free();}

diff --git a/include/cuda/std/detail/libcxx/include/support/atomic/atomic_base.h b/include/cuda/std/detail/libcxx/include/support/atomic/atomic_base.h
@@ -12,7 +12,11 @@
 #define _LIBCUDACXX_ATOMIC_BASE_H
 
 #include "cxx_atomic.h"
-#include <type_traits>
+
+// Guard ifdef for lock free query in case it is assigned elsewhere (MSVC/CUDA)
+#ifndef _LIBCUDACXX_ATOMIC_IS_LOCK_FREE
+#define _LIBCUDACXX_ATOMIC_IS_LOCK_FREE(__x) __atomic_is_lock_free(__x, 0)
+#endif
 
 _LIBCUDACXX_INLINE_VISIBILITY inline _LIBCUDACXX_CONSTEXPR int __cxx_atomic_order_to_int(memory_order __order) {
   // Avoid switch statement to make this a constexpr.
@@ -185,13 +189,4 @@ inline auto __cxx_atomic_fetch_min(_Tp* __a, _Td __val,
   return __expected;
 }
 
-inline constexpr
- bool __cxx_atomic_is_lock_free(size_t __x) {
-  #if defined(_LIBCUDACXX_NO_RUNTIME_LOCK_FREE)
-    return __x <= 8;
-  #else
-    return __atomic_is_lock_free(__x, 0);
-  #endif
-}
-
 #endif // _LIBCUDACXX_ATOMIC_BASE_H
diff --git a/include/cuda/std/detail/libcxx/include/support/atomic/atomic_c11.h b/include/cuda/std/detail/libcxx/include/support/atomic/atomic_c11.h
@@ -24,7 +24,9 @@ struct __cxx_atomic_base_impl {
   _LIBCUDACXX_DISABLE_EXTENSION_WARNING _Atomic(_Tp) __a_value;
 };
 
-#define __cxx_atomic_is_lock_free(__s) __c11_atomic_is_lock_free(__s)
+#ifndef _LIBCUDACXX_ATOMIC_IS_LOCK_FREE
+#define _LIBCUDACXX_ATOMIC_IS_LOCK_FREE(__x) __c11_atomic_is_lock_free(__x, 0)
+#endif
 
 _LIBCUDACXX_INLINE_VISIBILITY inline
 void __cxx_atomic_thread_fence(memory_order __order) _NOEXCEPT {

diff --git a/include/cuda/std/detail/libcxx/include/support/atomic/atomic_cuda.h b/include/cuda/std/detail/libcxx/include/support/atomic/atomic_cuda.h
@@ -45,6 +45,11 @@ inline __host__ __device__ int __stronger_order_cuda(int __a, int __b) {
     return __xform[__a < __b ? __a : __b];
 }
 
+// pre-define lock free query for heterogeneous compatibility
+#ifndef _LIBCUDACXX_ATOMIC_IS_LOCK_FREE
+#define _LIBCUDACXX_ATOMIC_IS_LOCK_FREE(__x) (__x <= 8)
+#endif
+
 // Wrap host atomic implementations into a sub-namespace
 namespace __host {
 #if defined(_LIBCUDACXX_COMPILER_MSVC)
@@ -62,11 +67,6 @@ namespace __host {
 #include "atomic_cuda_generated.h"
 #include "atomic_cuda_derived.h"
 
-_LIBCUDACXX_INLINE_VISIBILITY _LIBCUDACXX_CONSTEXPR
- bool __cxx_atomic_is_lock_free(size_t __x) {
-    return __x <= 8;
-}
-
 _LIBCUDACXX_INLINE_VISIBILITY
 inline
  void __cxx_atomic_thread_fence(memory_order __order) {

diff --git a/include/cuda/std/detail/libcxx/include/support/atomic/atomic_msvc.h b/include/cuda/std/detail/libcxx/include/support/atomic/atomic_msvc.h
@@ -25,6 +25,11 @@
     #error Unsupported hardware
 #endif // hardware
 
+// MSVC Does not have compiler intrinsics for lock-free checking
+#ifndef _LIBCUDACXX_ATOMIC_IS_LOCK_FREE
+#define _LIBCUDACXX_ATOMIC_IS_LOCK_FREE(__x) (__x <= 8)
+#endif
+
 inline int __stronger_order_msvc(int __a, int __b) {
     int const __max = __a > __b ? __a : __b;
     if(__max != __ATOMIC_RELEASE)