Skip to content
This repository has been archived by the owner on Mar 21, 2024. It is now read-only.

Commit

Permalink
Merge internal changes from CTK 11.7 to libcudacxx.
Browse files Browse the repository at this point in the history
  • Loading branch information
wmaxey committed Jun 2, 2022
1 parent 768b2a3 commit 8b0c7eb
Show file tree
Hide file tree
Showing 4 changed files with 20 additions and 25 deletions.
6 changes: 0 additions & 6 deletions .upstream-tests/utils/nvidia/nvrtc/nvrtc.sh
Original file line number Diff line number Diff line change
Expand Up @@ -100,11 +100,6 @@ cudart_include_dir=$(
| ${nvcc} -x cu - -M -E "${includes[@]}" \
| grep -e ' /.*/cuda_pipeline_primitives\.h' -o \
| xargs dirname)
cg_include_dir=$(
echo '#include <cooperative_groups.h>' \
| ${nvcc} -x cu - -M -E "${includes[@]}" \
| grep -e ' /.*/cooperative_groups\.h' -o \
| xargs dirname)
ext_include_dir=$(
echo '#include <cuda/pipeline>' \
| ${nvcc} -x cu - -M -E "${includes[@]}" -arch sm_70 -std=c++11 \
Expand Down Expand Up @@ -138,7 +133,6 @@ echo ' // BEGIN SCRIPT GENERATED OPTIONS' >> "${tempfile}"
echo ' "-I'"${libcudacxxdir}/include"'",' >> "${tempfile}"
echo ' "-I'"${libcudacxxdir}/test/support"'",' >> "${tempfile}"
echo ' "-I'"${cudart_include_dir}"'",' >> "${tempfile}"
echo ' "-I'"${cg_include_dir}"'",' >> "${tempfile}"
echo ' "-I'"${ext_include_dir}"'",' >> "${tempfile}"
echo ' "--pre-include='"${libcudacxxdir}/test/support/nvrtc_limit_macros.h"'",' >> "${tempfile}"
echo ' "--device-int128",' >> "${tempfile}"
Expand Down
1 change: 1 addition & 0 deletions include/cuda/std/detail/__config
Original file line number Diff line number Diff line change
Expand Up @@ -88,6 +88,7 @@
#endif
#define _LIBCUDACXX_HAS_EXTERNAL_ATOMIC_IMP
#define _LIBCUDACXX_ATOMIC_ALWAYS_LOCK_FREE(size, ptr) (size <= 8)
#define _LIBCUDACXX_HAS_NO_CXX20_CHRONO_LITERALS

#define _LIBCUDACXX_SYS_CLOCK_DURATION nanoseconds

Expand Down
4 changes: 2 additions & 2 deletions include/cuda/std/detail/libcxx/include/semaphore
Original file line number Diff line number Diff line change
Expand Up @@ -105,7 +105,7 @@ class __atomic_semaphore_base
_LIBCUDACXX_INLINE_VISIBILITY
bool __acquire_slow_timed(chrono::nanoseconds const& __rel_time)
{
return __libcpp_thread_poll_with_backoff([=]() {
return __libcpp_thread_poll_with_backoff([this]() {
ptrdiff_t const __old = __count.load(memory_order_acquire);
return __old != 0 && __fetch_sub_if_slow(__old);
}, __rel_time);
Expand Down Expand Up @@ -180,7 +180,7 @@ class __atomic_semaphore_base<_Sco, 1> {
_LIBCUDACXX_INLINE_VISIBILITY
bool __acquire_slow_timed(chrono::nanoseconds const& __rel_time)
{
return __libcpp_thread_poll_with_backoff([=]() {
return __libcpp_thread_poll_with_backoff([this]() {
return try_acquire();
}, __rel_time);
}
Expand Down
34 changes: 17 additions & 17 deletions include/cuda/std/detail/libcxx/include/support/atomic/atomic_cuda.h
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,7 @@ inline
void __cxx_atomic_thread_fence(memory_order __order) {
NV_DISPATCH_TARGET(
NV_IS_DEVICE, (
__atomic_thread_fence_cuda(__order, __thread_scope_system_tag());
__atomic_thread_fence_cuda(static_cast<__memory_order_underlying_t>(__order), __thread_scope_system_tag());
),
NV_IS_HOST, (
__host::__cxx_atomic_thread_fence(__order);
Expand All @@ -85,7 +85,7 @@ inline
void __cxx_atomic_signal_fence(memory_order __order) {
NV_DISPATCH_TARGET(
NV_IS_DEVICE, (
__atomic_signal_fence_cuda(__order);
__atomic_signal_fence_cuda(static_cast<__memory_order_underlying_t>(__order));
),
NV_IS_HOST, (
__host::__cxx_atomic_signal_fence(__order);
Expand Down Expand Up @@ -181,7 +181,7 @@ __host__ __device__
alignas(_Tp) auto __tmp = __val;
NV_DISPATCH_TARGET(
NV_IS_DEVICE, (
__atomic_store_n_cuda(__cxx_get_underlying_device_atomic(__a), __tmp, __order, __scope_tag<_Sco>());
__atomic_store_n_cuda(__cxx_get_underlying_device_atomic(__a), __tmp, static_cast<__memory_order_underlying_t>(__order), __scope_tag<_Sco>());
),
NV_IS_HOST, (
__host::__cxx_atomic_store(&__a->__a_value, __tmp, __order);
Expand All @@ -194,7 +194,7 @@ __host__ __device__
_Tp __cxx_atomic_load(__cxx_atomic_base_heterogeneous_impl<_Tp, _Sco, _Ref> const volatile* __a, memory_order __order) {
NV_DISPATCH_TARGET(
NV_IS_DEVICE, (
return __atomic_load_n_cuda(__cxx_get_underlying_device_atomic(__a), __order, __scope_tag<_Sco>());
return __atomic_load_n_cuda(__cxx_get_underlying_device_atomic(__a), static_cast<__memory_order_underlying_t>(__order), __scope_tag<_Sco>());
),
NV_IS_HOST, (
return __host::__cxx_atomic_load(&__a->__a_value, __order);
Expand All @@ -208,7 +208,7 @@ __host__ __device__
alignas(_Tp) auto __tmp = __val;
NV_DISPATCH_TARGET(
NV_IS_DEVICE, (
return __atomic_exchange_n_cuda(__cxx_get_underlying_device_atomic(__a), __tmp, __order, __scope_tag<_Sco>());
return __atomic_exchange_n_cuda(__cxx_get_underlying_device_atomic(__a), __tmp, static_cast<__memory_order_underlying_t>(__order), __scope_tag<_Sco>());
),
NV_IS_HOST, (
return __host::__cxx_atomic_exchange(&__a->__a_value, __tmp, __order);
Expand All @@ -224,7 +224,7 @@ __host__ __device__
NV_DISPATCH_TARGET(
NV_IS_DEVICE, (
alignas(_Tp) auto __tmp_v = __val;
__result = __atomic_compare_exchange_cuda(__cxx_get_underlying_device_atomic(__a), &__tmp, &__tmp_v, false, __success, __failure, __scope_tag<_Sco>());
__result = __atomic_compare_exchange_cuda(__cxx_get_underlying_device_atomic(__a), &__tmp, &__tmp_v, false, static_cast<__memory_order_underlying_t>(__success), static_cast<__memory_order_underlying_t>(__failure), __scope_tag<_Sco>());
),
NV_IS_HOST, (
__result = __host::__cxx_atomic_compare_exchange_strong(&__a->__a_value, &__tmp, __val, __success, __failure);
Expand All @@ -242,7 +242,7 @@ __host__ __device__
NV_DISPATCH_TARGET(
NV_IS_DEVICE, (
alignas(_Tp) auto __tmp_v = __val;
__result = __atomic_compare_exchange_cuda(__cxx_get_underlying_device_atomic(__a), &__tmp, &__tmp_v, true, __success, __failure, __scope_tag<_Sco>());
__result = __atomic_compare_exchange_cuda(__cxx_get_underlying_device_atomic(__a), &__tmp, &__tmp_v, true, static_cast<__memory_order_underlying_t>(__success), static_cast<__memory_order_underlying_t>(__failure), __scope_tag<_Sco>());
),
NV_IS_HOST, (
__result = __host::__cxx_atomic_compare_exchange_weak(&__a->__a_value, &__tmp, __val, __success, __failure);
Expand All @@ -257,7 +257,7 @@ __host__ __device__
_Tp __cxx_atomic_fetch_add(__cxx_atomic_base_heterogeneous_impl<_Tp, _Sco, _Ref> volatile* __a, _Tp __delta, memory_order __order) {
NV_DISPATCH_TARGET(
NV_IS_DEVICE, (
return __atomic_fetch_add_cuda(__cxx_get_underlying_device_atomic(__a), __delta, __order, __scope_tag<_Sco>());
return __atomic_fetch_add_cuda(__cxx_get_underlying_device_atomic(__a), __delta, static_cast<__memory_order_underlying_t>(__order), __scope_tag<_Sco>());
),
NV_IS_HOST, (
return __host::__cxx_atomic_fetch_add(&__a->__a_value, __delta, __order);
Expand All @@ -270,7 +270,7 @@ __host__ __device__
_Tp* __cxx_atomic_fetch_add(__cxx_atomic_base_heterogeneous_impl<_Tp*, _Sco, _Ref> volatile* __a, ptrdiff_t __delta, memory_order __order) {
NV_DISPATCH_TARGET(
NV_IS_DEVICE, (
return __atomic_fetch_add_cuda(__cxx_get_underlying_device_atomic(__a), __delta, __order, __scope_tag<_Sco>());
return __atomic_fetch_add_cuda(__cxx_get_underlying_device_atomic(__a), __delta, static_cast<__memory_order_underlying_t>(__order), __scope_tag<_Sco>());
),
NV_IS_HOST, (
return __host::__cxx_atomic_fetch_add(&__a->__a_value, __delta, __order);
Expand All @@ -283,7 +283,7 @@ __host__ __device__
_Tp __cxx_atomic_fetch_sub(__cxx_atomic_base_heterogeneous_impl<_Tp, _Sco, _Ref> volatile* __a, _Tp __delta, memory_order __order) {
NV_DISPATCH_TARGET(
NV_IS_DEVICE, (
return __atomic_fetch_sub_cuda(__cxx_get_underlying_device_atomic(__a), __delta, __order, __scope_tag<_Sco>());
return __atomic_fetch_sub_cuda(__cxx_get_underlying_device_atomic(__a), __delta, static_cast<__memory_order_underlying_t>(__order), __scope_tag<_Sco>());
),
NV_IS_HOST, (
return __host::__cxx_atomic_fetch_sub(&__a->__a_value, __delta, __order);
Expand All @@ -296,7 +296,7 @@ __host__ __device__
_Tp* __cxx_atomic_fetch_sub(__cxx_atomic_base_heterogeneous_impl<_Tp*, _Sco, _Ref> volatile* __a, ptrdiff_t __delta, memory_order __order) {
NV_DISPATCH_TARGET(
NV_IS_DEVICE, (
return __atomic_fetch_sub_cuda(__cxx_get_underlying_device_atomic(__a), __delta, __order, __scope_tag<_Sco>());
return __atomic_fetch_sub_cuda(__cxx_get_underlying_device_atomic(__a), __delta, static_cast<__memory_order_underlying_t>(__order), __scope_tag<_Sco>());
),
NV_IS_HOST, (
return __host::__cxx_atomic_fetch_sub(&__a->__a_value, __delta, __order);
Expand All @@ -309,7 +309,7 @@ __host__ __device__
_Tp __cxx_atomic_fetch_and(__cxx_atomic_base_heterogeneous_impl<_Tp, _Sco, _Ref> volatile* __a, _Tp __pattern, memory_order __order) {
NV_DISPATCH_TARGET(
NV_IS_DEVICE, (
return __atomic_fetch_and_cuda(__cxx_get_underlying_device_atomic(__a), __pattern, __order, __scope_tag<_Sco>());
return __atomic_fetch_and_cuda(__cxx_get_underlying_device_atomic(__a), __pattern, static_cast<__memory_order_underlying_t>(__order), __scope_tag<_Sco>());
),
NV_IS_HOST, (
return __host::__cxx_atomic_fetch_and(&__a->__a_value, __pattern, __order);
Expand All @@ -322,7 +322,7 @@ __host__ __device__
_Tp __cxx_atomic_fetch_or(__cxx_atomic_base_heterogeneous_impl<_Tp, _Sco, _Ref> volatile* __a, _Tp __pattern, memory_order __order) {
NV_DISPATCH_TARGET(
NV_IS_DEVICE, (
return __atomic_fetch_or_cuda(__cxx_get_underlying_device_atomic(__a), __pattern, __order, __scope_tag<_Sco>());
return __atomic_fetch_or_cuda(__cxx_get_underlying_device_atomic(__a), __pattern, static_cast<__memory_order_underlying_t>(__order), __scope_tag<_Sco>());
),
NV_IS_HOST, (
return __host::__cxx_atomic_fetch_or(&__a->__a_value, __pattern, __order);
Expand All @@ -335,7 +335,7 @@ __host__ __device__
_Tp __cxx_atomic_fetch_xor(__cxx_atomic_base_heterogeneous_impl<_Tp, _Sco, _Ref> volatile* __a, _Tp __pattern, memory_order __order) {
NV_DISPATCH_TARGET(
NV_IS_DEVICE, (
return __atomic_fetch_xor_cuda(__cxx_get_underlying_device_atomic(__a), __pattern, __order, __scope_tag<_Sco>());
return __atomic_fetch_xor_cuda(__cxx_get_underlying_device_atomic(__a), __pattern, static_cast<__memory_order_underlying_t>(__order), __scope_tag<_Sco>());
),
NV_IS_HOST, (
return __host::__cxx_atomic_fetch_xor(&__a->__a_value, __pattern, __order);
Expand All @@ -348,7 +348,7 @@ __host__ __device__
_Tp __cxx_atomic_fetch_max(__cxx_atomic_base_heterogeneous_impl<_Tp, _Sco, _Ref> volatile* __a, _Delta __val, memory_order __order) {
NV_IF_TARGET(
NV_IS_DEVICE, (
return __atomic_fetch_max_cuda(__cxx_get_underlying_device_atomic(__a), __val, __order, __scope_tag<_Sco>());
return __atomic_fetch_max_cuda(__cxx_get_underlying_device_atomic(__a), __val, static_cast<__memory_order_underlying_t>(__order), __scope_tag<_Sco>());
), (
return __host::__cxx_atomic_fetch_max(&__a->__a_value, __val, __order);
)
Expand All @@ -360,7 +360,7 @@ __host__ __device__
_Tp __cxx_atomic_fetch_min(__cxx_atomic_base_heterogeneous_impl<_Tp, _Sco, _Ref> volatile* __a, _Delta __val, memory_order __order) {
NV_IF_TARGET(
NV_IS_DEVICE, (
return __atomic_fetch_min_cuda(__cxx_get_underlying_device_atomic(__a), __val, __order, __scope_tag<_Sco>());
return __atomic_fetch_min_cuda(__cxx_get_underlying_device_atomic(__a), __val, static_cast<__memory_order_underlying_t>(__order), __scope_tag<_Sco>());
), (
return __host::__cxx_atomic_fetch_min(&__a->__a_value, __val, __order);
)
Expand Down Expand Up @@ -428,7 +428,7 @@ __host__ __device__ inline bool __cxx_atomic_compare_exchange_weak(__cxx_atomic_
auto const __actual = __cxx_small_from_32<_Tp>(__temp);
if(!__ret) {
if(0 == __cuda_memcmp(&__actual, __expected, sizeof(_Tp)))
__cxx_atomic_fetch_and(&__a->__a_value, (1u << (8*sizeof(_Tp))) - 1, memory_order::memory_order_relaxed);
__cxx_atomic_fetch_and(&__a->__a_value, (1u << (8*sizeof(_Tp))) - 1, memory_order_relaxed);
else
*__expected = __actual;
}
Expand Down

0 comments on commit 8b0c7eb

Please sign in to comment.