From 5afb0d46fd6b77548a976d9103489f1c9579e5a1 Mon Sep 17 00:00:00 2001 From: Hartmut Kaiser Date: Sat, 2 Sep 2023 10:20:25 -0500 Subject: [PATCH 01/10] Adding overloads taking a source_location to all APIs that spawn new HPX threads --- library/CMakeLists.txt | 16 +- library/include/chplx/begin.hpp | 27 +- library/include/chplx/cobegin.hpp | 31 +- library/include/chplx/coforall_loop.hpp | 338 ++++++++++-------- .../chplx/detail/generate_annotation.hpp | 16 + library/include/chplx/forall_loop.hpp | 294 ++++++++------- library/include/chplx/util.hpp | 20 +- library/src/detail/generate_annotation.cpp | 26 ++ library/src/locale.cpp | 1 + 9 files changed, 473 insertions(+), 296 deletions(-) create mode 100644 library/include/chplx/detail/generate_annotation.hpp create mode 100644 library/src/detail/generate_annotation.cpp diff --git a/library/CMakeLists.txt b/library/CMakeLists.txt index ac4d2b2a..7b47db45 100644 --- a/library/CMakeLists.txt +++ b/library/CMakeLists.txt @@ -40,7 +40,10 @@ find_package(fmt REQUIRED CONFIG) find_package(HPX REQUIRED CONFIG) # create library target -set(chplx_library_detail_headers include/chplx/detail/iterator_generator.hpp) +set(chplx_library_detail_headers + include/chplx/detail/generate_annotation.hpp + include/chplx/detail/iterator_generator.hpp +) source_group("Header Files/chplx/detail" FILES ${chplx_library_detail_headers}) set(chplx_library_domain_headers include/chplx/domains/base_rectangular.hpp @@ -79,6 +82,11 @@ set(chplx_library_headers ) source_group("Header Files/chplx" FILES ${chplx_library_headers}) +set(chplx_library_detail_sources + src/detail/generate_annotation.cpp +) +source_group("Source Files/detail" FILES ${chplx_library_detail_sources}) + set(chplx_library_domain_sources src/domains/dmap.cpp) source_group("Source Files/domains" FILES ${chplx_library_domain_sources}) @@ -95,7 +103,11 @@ if(MSVC) set(chplx_library_sources ${chplx_library_sources} src/setenv.c) endif() -add_library(library STATIC ${chplx_library_sources} ${chplx_library_headers}) +add_library(library STATIC + ${chplx_library_sources} + ${chplx_library_detail_sources} + ${chplx_library_headers} +) if(NOT WIN32 AND ${CMAKE_CXX_COMPILER_ID} STREQUAL "Clang" diff --git a/library/include/chplx/begin.hpp b/library/include/chplx/begin.hpp index 597f6064..95ec4a5a 100644 --- a/library/include/chplx/begin.hpp +++ b/library/include/chplx/begin.hpp @@ -6,21 +6,34 @@ #pragma once +#include #include #include #include +#include +#include #include #include namespace chplx { -template void begin(F &&f, Args &&...args) { + template + void begin(hpx::source_location const& location, F&& f, Args&&... args) + { + hpx::parallel::execution::post(hpx::execution::par.executor(), + hpx::annotated_function( + std::forward(f), detail::generate_annotation(location)), + detail::task_intent>::call( + std::forward(args))...); + } - hpx::parallel::execution::post(hpx::execution::par.executor(), - std::forward(f), - detail::task_intent>::call( - std::forward(args))...); -} -} // namespace chplx + template + requires(!std::is_same_v, hpx::source_location>) + void begin(F&& f, Args&&... args) + { + begin(HPX_CURRENT_SOURCE_LOCATION(), std::forward(f), + std::forward(args)...); + } +} // namespace chplx diff --git a/library/include/chplx/cobegin.hpp b/library/include/chplx/cobegin.hpp index beefb6c2..82a6d1f9 100644 --- a/library/include/chplx/cobegin.hpp +++ b/library/include/chplx/cobegin.hpp @@ -6,20 +6,35 @@ #pragma once +#include + #include #include +#include #include namespace chplx { -template void cobegin(F &&f, Fs &&...fs) { + template + void cobegin(hpx::source_location const& location, F&& f, Fs&&... fs) + { + auto exec = hpx::execution::par.executor(); + hpx::experimental::task_group g; + + auto annotation = detail::generate_annotation(location); - auto exec = hpx::execution::par.executor(); - hpx::experimental::task_group g; + g.run(exec, hpx::annotated_function(std::forward(f), annotation)); + (g.run(exec, hpx::annotated_function(std::forward(fs), annotation)), + ...); - g.run(exec, std::forward(f)); - (g.run(exec, std::forward(fs)), ...); + g.wait(); + } - g.wait(); -} -} // namespace chplx + template + requires(!std::is_same_v, hpx::source_location>) + void cobegin(F&& f, Fs&&... fs) + { + cobegin(HPX_CURRENT_SOURCE_LOCATION(), std::forward(f), + std::forward(fs)...); + } +} // namespace chplx diff --git a/library/include/chplx/coforall_loop.hpp b/library/include/chplx/coforall_loop.hpp index c9d24b5f..a07130b0 100644 --- a/library/include/chplx/coforall_loop.hpp +++ b/library/include/chplx/coforall_loop.hpp @@ -10,6 +10,7 @@ #include #include #include +#include #include #include #include @@ -20,6 +21,7 @@ #include #include #include +#include #include #include @@ -27,154 +29,190 @@ namespace chplx { -//----------------------------------------------------------------------------- -// coforall loop for tuples -namespace detail { -template -void coforall(Tuple &t, F &&f, Args &&...args) { - - auto policy = hpx::parallel::util::adapt_sharing_mode( - hpx::execution::par, - hpx::threads::thread_sharing_hint::do_not_combine_tasks); - - using base_tuple = typename Tuple::base_type; - if constexpr (std::tuple_size_v != 0) { - if constexpr (Tuple::isHomogenous()) { - - hpx::wait_all(hpx::parallel::execution::bulk_async_execute( - policy.executor(), - [&](auto val, auto &&...fargs) { - f(val, std::forward(fargs)...); - }, - HomogenousTupleRange(t.base()), - detail::task_intent>::call( - std::forward(args))...)); - } else { - - using table = detail::forLoopTable< - Tuple, std::decay_t, - std::make_index_sequence>, Args...>; - - hpx::wait_all(hpx::parallel::execution::bulk_async_execute( - policy.executor(), - [&](std::size_t i, auto &&...fargs) { - table::lookupTable[i](t, f, std::forward(fargs)...); - }, - t.size(), - detail::task_intent>::call( - std::forward(args))...)); + //----------------------------------------------------------------------------- + // coforall loop for tuples + namespace detail { + template + void coforall(Tuple& t, F&& f, Args&&... args) + { + auto policy = + hpx::parallel::util::adapt_sharing_mode(hpx::execution::par, + hpx::threads::thread_sharing_hint::do_not_combine_tasks); + + using base_tuple = typename Tuple::base_type; + if constexpr (std::tuple_size_v != 0) + { + if constexpr (Tuple::isHomogenous()) + { + hpx::wait_all(hpx::parallel::execution::bulk_async_execute( + policy.executor(), + [&](auto val, auto&&... fargs) { + f(val, std::forward(fargs)...); + }, + HomogenousTupleRange(t.base()), + detail::task_intent>::call( + std::forward(args))...)); + } + else + { + using table = detail::forLoopTable, + std::make_index_sequence>, + Args...>; + + hpx::wait_all(hpx::parallel::execution::bulk_async_execute( + policy.executor(), + [&](std::size_t i, auto&&... fargs) { + table::lookupTable[i]( + t, f, std::forward(fargs)...); + }, + t.size(), + detail::task_intent>::call( + std::forward(args))...)); + } + } + } + } // namespace detail + + template + void coforall(hpx::source_location const& location, Tuple& t, F&& f, + Args&&... args) + { + detail::coforall(t, + hpx::annotated_function( + std::forward(f), detail::generate_annotation(location)), + std::forward(args)...); } - } -} -} // namespace detail - -template -void coforall(Tuple &t, F &&f, Args &&...args) { - - detail::coforall(t, std::forward(f), std::forward(args)...); -} - -template -void coforall(Tuple const &t, F &&f, Args &&...args) { - - detail::coforall(t, std::forward(f), std::forward(args)...); -} - -//----------------------------------------------------------------------------- -// coforall loop for ranges -template -void coforall(Range const &r, F &&f, - Args &&...args) { - - auto policy = hpx::parallel::util::adapt_sharing_mode( - hpx::execution::par, - hpx::threads::thread_sharing_hint::do_not_combine_tasks); - - hpx::wait_all(hpx::parallel::execution::bulk_async_execute( - policy.executor(), - [&](std::size_t idx, auto &&...fargs) { - return f(r.orderToIndex(idx), std::forward(fargs)...); - }, - r.size(), - detail::task_intent>::call( - std::forward(args))...)); -} - -//----------------------------------------------------------------------------- -// coforall loop for domain -template -void coforall(Domain const &d, F &&f, Args &&...args) { - - auto policy = hpx::parallel::util::adapt_sharing_mode( - hpx::execution::par, - hpx::threads::thread_sharing_hint::do_not_combine_tasks); - - hpx::wait_all(hpx::parallel::execution::bulk_async_execute( - policy.executor(), - [&](std::size_t idx, auto &&...fargs) { - return f(d.orderToIndex(idx), std::forward(fargs)...); - }, - d.size(), - detail::task_intent>::call( - std::forward(args))...)); -} - -//----------------------------------------------------------------------------- -// coforall loop for associative domain -template -void coforall(AssocDomain const &d, F &&f, Args &&...args) { - - auto policy = hpx::parallel::util::adapt_sharing_mode( - hpx::execution::par, - hpx::threads::thread_sharing_hint::do_not_combine_tasks); - - hpx::wait_all(hpx::parallel::execution::bulk_async_execute( - policy.executor(), - [&](std::size_t idx, auto &&...fargs) { - return f(d.orderToIndex(idx), std::forward(fargs)...); - }, - d.size(), - detail::task_intent>::call( - std::forward(args))...)); -} - -//----------------------------------------------------------------------------- -// forall loop for zippered iteration -template -void coforall(detail::ZipRange const &zr, F &&f, Args &&...args) { - - auto policy = hpx::parallel::util::adapt_sharing_mode( - hpx::execution::par, - hpx::threads::thread_sharing_hint::do_not_combine_tasks); - - hpx::wait_all(hpx::parallel::execution::bulk_async_execute( - policy.executor(), - [&](std::size_t idx, auto &&...fargs) { - return f(zr.orderToIndex(idx), std::forward(fargs)...); - }, - zr.size(), - detail::task_intent>::call( - std::forward(args))...)); -} - -//----------------------------------------------------------------------------- -// forall loop for array iteration -template -void coforall(Array const &a, F &&f, Args &&...args) { - - auto policy = hpx::parallel::util::adapt_sharing_mode( - hpx::execution::par, - hpx::threads::thread_sharing_hint::do_not_combine_tasks); - - hpx::wait_all(hpx::parallel::execution::bulk_async_execute( - policy.executor(), - [&](std::size_t idx, auto &&...fargs) { - return f(a[idx], std::forward(fargs)...); - }, - a.size(), - detail::task_intent>::call( - std::forward(args))...)); -} - -} // namespace chplx + + template + void coforall(hpx::source_location const& location, Tuple const& t, + F&& f, Args&&... args) + { + detail::coforall(t, + hpx::annotated_function( + std::forward(f), detail::generate_annotation(location)), + std::forward(args)...); + } + + //----------------------------------------------------------------------------- + // coforall loop for ranges + template + void coforall(hpx::source_location const& location, + Range const& r, F&& f, Args&&... args) + { + auto policy = + hpx::parallel::util::adapt_sharing_mode(hpx::execution::par, + hpx::threads::thread_sharing_hint::do_not_combine_tasks); + auto wrapped = + hpx::annotated_function(f, detail::generate_annotation(location)); + + hpx::wait_all(hpx::parallel::execution::bulk_async_execute( + policy.executor(), + [&](std::size_t idx, auto&&... fargs) { + return wrapped(r.orderToIndex(idx), + std::forward(fargs)...); + }, + r.size(), + detail::task_intent>::call( + std::forward(args))...)); + } + + //----------------------------------------------------------------------------- + // coforall loop for domain + template + void coforall(hpx::source_location const& location, + Domain const& d, F&& f, Args&&... args) + { + auto policy = + hpx::parallel::util::adapt_sharing_mode(hpx::execution::par, + hpx::threads::thread_sharing_hint::do_not_combine_tasks); + auto wrapped = + hpx::annotated_function(f, detail::generate_annotation(location)); + + hpx::wait_all(hpx::parallel::execution::bulk_async_execute( + policy.executor(), + [&](std::size_t idx, auto&&... fargs) { + return wrapped(d.orderToIndex(idx), + std::forward(fargs)...); + }, + d.size(), + detail::task_intent>::call( + std::forward(args))...)); + } + + //----------------------------------------------------------------------------- + // coforall loop for associative domain + template + void coforall(hpx::source_location const& location, AssocDomain const& d, + F&& f, Args&&... args) + { + auto policy = + hpx::parallel::util::adapt_sharing_mode(hpx::execution::par, + hpx::threads::thread_sharing_hint::do_not_combine_tasks); + auto wrapped = + hpx::annotated_function(f, detail::generate_annotation(location)); + + hpx::wait_all(hpx::parallel::execution::bulk_async_execute( + policy.executor(), + [&](std::size_t idx, auto&&... fargs) { + return wrapped(d.orderToIndex(idx), + std::forward(fargs)...); + }, + d.size(), + detail::task_intent>::call( + std::forward(args))...)); + } + + //----------------------------------------------------------------------------- + // forall loop for zippered iteration + template + void coforall(hpx::source_location const& location, + detail::ZipRange const& zr, F&& f, Args&&... args) + { + auto policy = + hpx::parallel::util::adapt_sharing_mode(hpx::execution::par, + hpx::threads::thread_sharing_hint::do_not_combine_tasks); + auto wrapped = + hpx::annotated_function(f, detail::generate_annotation(location)); + + hpx::wait_all(hpx::parallel::execution::bulk_async_execute( + policy.executor(), + [&](std::size_t idx, auto&&... fargs) { + return wrapped(zr.orderToIndex(idx), + std::forward(fargs)...); + }, + zr.size(), + detail::task_intent>::call( + std::forward(args))...)); + } + + //----------------------------------------------------------------------------- + // forall loop for array iteration + template + void coforall(hpx::source_location const& location, + Array const& a, F&& f, Args&&... args) + { + auto policy = + hpx::parallel::util::adapt_sharing_mode(hpx::execution::par, + hpx::threads::thread_sharing_hint::do_not_combine_tasks); + auto wrapped = + hpx::annotated_function(f, detail::generate_annotation(location)); + + hpx::wait_all(hpx::parallel::execution::bulk_async_execute( + policy.executor(), + [&](std::size_t idx, auto&&... fargs) { + return wrapped(a[idx], std::forward(fargs)...); + }, + a.size(), + detail::task_intent>::call( + std::forward(args))...)); + } + + template + requires(!std::is_same_v, hpx::source_location>) + void coforall(Target&& t, F&& f, Args&&... args) + { + coforall(HPX_CURRENT_SOURCE_LOCATION(), std::forward(t), + std::forward(f), std::forward(args)...); + } +} // namespace chplx diff --git a/library/include/chplx/detail/generate_annotation.hpp b/library/include/chplx/detail/generate_annotation.hpp new file mode 100644 index 00000000..d9b22865 --- /dev/null +++ b/library/include/chplx/detail/generate_annotation.hpp @@ -0,0 +1,16 @@ +// Copyright (c) 2023 Hartmut Kaiser +// +// SPDX-License-Identifier: BSL-1.0 +// Distributed under the Boost Software License, Version 1.0. (See accompanying +// file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#pragma once + +#include + +#include + +namespace chplx::detail { + + std::string generate_annotation(hpx::source_location const& location); +} // namespace chplx::detail diff --git a/library/include/chplx/forall_loop.hpp b/library/include/chplx/forall_loop.hpp index 4c450de9..beac4300 100644 --- a/library/include/chplx/forall_loop.hpp +++ b/library/include/chplx/forall_loop.hpp @@ -24,124 +24,178 @@ namespace chplx { -//----------------------------------------------------------------------------- -// forall loop for tuples -namespace detail { - -template -void forall(Tuple &t, F &&f, Args &&...args) { - - using base_tuple = typename Tuple::base_type; - if constexpr (std::tuple_size_v != 0) { - - if constexpr (Tuple::isHomogenous()) { - - hpx::ranges::for_each( - hpx::execution::par, HomogenousTupleRange(t.base()), - [&, ... fargs = detail::task_intent>::call( - std::forward(args))](Arg &&value) { - f(std::forward(value), - hpx::util::decay_unwrap::call(fargs)...); - }); - } else { - - using table = detail::forLoopTable< - Tuple, F, std::make_index_sequence>, - Args...>; - - hpx::experimental::for_loop( - hpx::execution::par, 0, t.size(), - [&, ... fargs = detail::task_intent>::call( - std::forward(args))](std::size_t i) mutable { - table::lookupTable[i]( - t, f, hpx::util::decay_unwrap::call(fargs)...); - }); + //----------------------------------------------------------------------------- + // forall loop for tuples + namespace detail { + + template + void forall(Tuple& t, F&& f, Args&&... args) + { + using base_tuple = typename Tuple::base_type; + if constexpr (std::tuple_size_v != 0) + { + if constexpr (Tuple::isHomogenous()) + { + hpx::ranges::for_each(hpx::execution::par, + HomogenousTupleRange(t.base()), + [&, + ... fargs = + detail::task_intent>::call( + std::forward(args))]( + Arg&& value) { + f(std::forward(value), + hpx::util::decay_unwrap::call( + fargs)...); + }); + } + else + { + using table = detail::forLoopTable>, + Args...>; + + hpx::experimental::for_loop(hpx::execution::par, 0, + t.size(), + [&, + ... fargs = + detail::task_intent>::call( + std::forward(args))]( + std::size_t i) mutable { + table::lookupTable[i](t, f, + hpx::util::decay_unwrap::call( + fargs)...); + }); + } + } + } + } // namespace detail + + template + void forall(hpx::source_location const& location, Tuple& t, F&& f, + Args&&... args) + { + detail::forall(t, + hpx::annotated_function( + std::forward(f), detail::generate_annotation(location)), + std::forward(args)...); } - } -} -} // namespace detail - -template -void forall(Tuple &t, F &&f, Args &&...args) { - - detail::forall(t, std::forward(f), std::forward(args)...); -} - -template -void forall(Tuple const &t, F &&f, Args &&...args) { - - detail::forall(t, std::forward(f), std::forward(args)...); -} - -//----------------------------------------------------------------------------- -// forall loop for ranges -template -void forall(Range const &r, F &&f, Args &&...args) { - - hpx::ranges::experimental::for_loop( - hpx::execution::par, detail::IteratorGenerator(r), - [&, ... fargs = detail::task_intent>::call( - std::forward(args))](Arg &&value) mutable { - f(std::forward(value), - hpx::util::decay_unwrap::call(fargs)...); - }); -} - -//----------------------------------------------------------------------------- -// forall loop for domain -template -void forall(Domain const &d, F &&f, Args &&...args) { - - hpx::ranges::experimental::for_loop( - hpx::execution::par, detail::IteratorGenerator(d), - [&, ... fargs = detail::task_intent>::call( - std::forward(args))](Arg &&value) mutable { - f(std::forward(value), - hpx::util::decay_unwrap::call(fargs)...); - }); -} - -//----------------------------------------------------------------------------- -// forall loop for associative domains -template -void forall(AssocDomain const &d, F &&f, Args &&...args) { - - hpx::ranges::experimental::for_loop( - hpx::execution::par, detail::IteratorGenerator(d, 0, d.size()), - [&, ... fargs = detail::task_intent>::call( - std::forward(args))](Arg &&value) mutable { - f(std::forward(value), - hpx::util::decay_unwrap::call(fargs)...); - }); -} - -//----------------------------------------------------------------------------- -// forall loop for zippered iteration -template -void forall(detail::ZipRange const &zr, F &&f, Args &&...args) { - - hpx::ranges::experimental::for_loop( - hpx::execution::par, detail::IteratorGenerator(zr), - [&, ... fargs = detail::task_intent>::call( - std::forward(args))](Arg &&value) mutable { - f(std::forward(value), - hpx::util::decay_unwrap::call(fargs)...); - }); -} - -//----------------------------------------------------------------------------- -// forall loop for array iteration -template -void forall(Array const &a, F &&f, Args &&...args) { - - hpx::ranges::experimental::for_loop( - hpx::execution::par, detail::IteratorGenerator(a), - [&, ... fargs = detail::task_intent>::call( - std::forward(args))](Arg &&value) mutable { - f(std::forward(value), - hpx::util::decay_unwrap::call(fargs)...); - }); -} - -} // namespace chplx + + template + void forall(hpx::source_location const& location, Tuple const& t, + F&& f, Args&&... args) + { + detail::forall(t, + hpx::annotated_function( + std::forward(f), detail::generate_annotation(location)), + std::forward(args)...); + } + + //----------------------------------------------------------------------------- + // forall loop for ranges + template + void forall(hpx::source_location const& location, + Range const& r, F&& f, Args&&... args) + { + auto wrapped = + hpx::annotated_function(f, detail::generate_annotation(location)); + + hpx::ranges::experimental::for_loop(hpx::execution::par, + detail::IteratorGenerator(r), + [&, + ... fargs = detail::task_intent>::call( + std::forward(args))]( + Arg&& value) mutable { + wrapped(std::forward(value), + hpx::util::decay_unwrap::call(fargs)...); + }); + } + + //----------------------------------------------------------------------------- + // forall loop for domain + template + void forall(hpx::source_location const& location, + Domain const& d, F&& f, Args&&... args) + { + auto wrapped = + hpx::annotated_function(f, detail::generate_annotation(location)); + + hpx::ranges::experimental::for_loop(hpx::execution::par, + detail::IteratorGenerator(d), + [&, + ... fargs = detail::task_intent>::call( + std::forward(args))]( + Arg&& value) mutable { + wrapped(std::forward(value), + hpx::util::decay_unwrap::call(fargs)...); + }); + } + + //----------------------------------------------------------------------------- + // forall loop for associative domains + template + void forall(hpx::source_location const& location, AssocDomain const& d, + F&& f, Args&&... args) + { + auto wrapped = + hpx::annotated_function(f, detail::generate_annotation(location)); + + hpx::ranges::experimental::for_loop(hpx::execution::par, + detail::IteratorGenerator(d, 0, d.size()), + [&, + ... fargs = detail::task_intent>::call( + std::forward(args))]( + Arg&& value) mutable { + wrapped(std::forward(value), + hpx::util::decay_unwrap::call(fargs)...); + }); + } + + //----------------------------------------------------------------------------- + // forall loop for zippered iteration + template + void forall(hpx::source_location const& location, + detail::ZipRange const& zr, F&& f, Args&&... args) + { + auto wrapped = + hpx::annotated_function(f, detail::generate_annotation(location)); + + hpx::ranges::experimental::for_loop(hpx::execution::par, + detail::IteratorGenerator(zr), + [&, + ... fargs = detail::task_intent>::call( + std::forward(args))]( + Arg&& value) mutable { + wrapped(std::forward(value), + hpx::util::decay_unwrap::call(fargs)...); + }); + } + + //----------------------------------------------------------------------------- + // forall loop for array iteration + template + void forall(hpx::source_location const& location, Array const& a, + F&& f, Args&&... args) + { + auto wrapped = + hpx::annotated_function(f, detail::generate_annotation(location)); + + hpx::ranges::experimental::for_loop(hpx::execution::par, + detail::IteratorGenerator(a), + [&, + ... fargs = detail::task_intent>::call( + std::forward(args))]( + Arg&& value) mutable { + wrapped(std::forward(value), + hpx::util::decay_unwrap::call(fargs)...); + }); + } + + template + requires(!std::is_same_v, hpx::source_location>) + void forall(Target&& t, F&& f, Args&&... args) + { + forall(HPX_CURRENT_SOURCE_LOCATION(), std::forward(t), + std::forward(f), std::forward(args)...); + } +} // namespace chplx diff --git a/library/include/chplx/util.hpp b/library/include/chplx/util.hpp index df2343e6..cc6bdd65 100644 --- a/library/include/chplx/util.hpp +++ b/library/include/chplx/util.hpp @@ -10,12 +10,14 @@ namespace chplx { -// register startup/shutdown code for a module -template struct registerModule { - - registerModule() { - hpx::register_pre_startup_function(&Module::__construct); - hpx::register_pre_shutdown_function(&Module::__destruct); - } -}; -} // namespace chplx + // register startup/shutdown code for a module + template + struct registerModule + { + registerModule() + { + hpx::register_pre_startup_function(&Module::__construct); + hpx::register_pre_shutdown_function(&Module::__destruct); + } + }; +} // namespace chplx diff --git a/library/src/detail/generate_annotation.cpp b/library/src/detail/generate_annotation.cpp new file mode 100644 index 00000000..e07ac7a1 --- /dev/null +++ b/library/src/detail/generate_annotation.cpp @@ -0,0 +1,26 @@ +// Copyright (c) 2023 Hartmut Kaiser +// +// SPDX-License-Identifier: BSL-1.0 +// Distributed under the Boost Software License, Version 1.0. (See accompanying +// file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#include +#include +#include + +#include +#include + +namespace chplx::detail { + + std::string generate_annotation(hpx::source_location const& location) + { +#if defined(HPX_HAVE_THREAD_DESCRIPTION) + std::filesystem::path p(location.file_name()); + return hpx::util::format("%s(%d): %s", p.filename(), location.line(), + location.function_name()); +#else + return {}; +#endif + } +} // namespace chplx::detail diff --git a/library/src/locale.cpp b/library/src/locale.cpp index 5eb0edf4..1a9d160c 100644 --- a/library/src/locale.cpp +++ b/library/src/locale.cpp @@ -12,6 +12,7 @@ #include #include +#include #include #include #include From 09c922cc8a11c457862adda7a14941282691187d Mon Sep 17 00:00:00 2001 From: ct-clmsn Date: Sat, 2 Sep 2023 22:53:00 -0400 Subject: [PATCH 02/10] updated codegenerator --- backend/src/codegenvisitor.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/backend/src/codegenvisitor.cpp b/backend/src/codegenvisitor.cpp index fdd5db39..6a076453 100644 --- a/backend/src/codegenvisitor.cpp +++ b/backend/src/codegenvisitor.cpp @@ -569,7 +569,7 @@ struct StatementVisitor { //auto const& rk = std::get>(node->indexSet.kind); //auto & indices = rk->args; - os << "chplx::forLoop(chplx::Range{"; + os << "chplx::forLoop(HPX_CURRENT_SOURCE_LOCATION(), chplx::Range{"; if(node->indexSet.size() == 1) { ExprVisitor ev{os}; std::visit(ev, node->indexSet[0]); @@ -625,7 +625,7 @@ struct StatementVisitor { //auto const& rk = std::get>(node->indexSet.kind); //auto & indices = rk->args; - os << "chplx::forall(chplx::Range{"; + os << "chplx::forall(HPX_CURRENT_SOURCE_LOCATION(), chplx::Range{"; if(node->indexSet.size() == 1) { ExprVisitor ev{os}; @@ -680,7 +680,7 @@ struct StatementVisitor { //auto const& rk = std::get>(node->indexSet.kind); //auto & indices = rk->args; - os << "chplx::coforall(chplx::Range{"; + os << "chplx::coforall(HPX_CURRENT_SOURCE_LOCATION(), chplx::Range{"; /* if(std::holds_alternative(indices[0].kind)) { os << int_kind::value(indices[0].literal[0]); From 7c8d9ee31c2ca5fe705afca81fbccda20b63eb1a Mon Sep 17 00:00:00 2001 From: ct-clmsn Date: Sat, 2 Sep 2023 23:16:15 -0400 Subject: [PATCH 03/10] udpated good files --- backend/test/forall/forall.cpp.good | 30 ++++++++++++++--------------- backend/test/heat/heat.cpp.good | 8 ++++---- 2 files changed, 19 insertions(+), 19 deletions(-) diff --git a/backend/test/forall/forall.cpp.good b/backend/test/forall/forall.cpp.good index 1472552c..03d62c4b 100644 --- a/backend/test/forall/forall.cpp.good +++ b/backend/test/forall/forall.cpp.good @@ -13,12 +13,12 @@ namespace forall { #line 39 "forall.chpl" chplx::Array> B(chplx::Range(0, 9)); #line 40 "forall.chpl" - chplx::forLoop(chplx::Range{0, 9}, [&](auto i) { + chplx::forLoop(HPX_CURRENT_SOURCE_LOCATION(),chplx::Range{0, 9}, [&](auto i) { #line 41 "forall.chpl" B(i) = 1.000000; }); #line 43 "forall.chpl" - chplx::forall(chplx::Range{0, 9}, [&](auto i) { + chplx::forall(HPX_CURRENT_SOURCE_LOCATION(),chplx::Range{0, 9}, [&](auto i) { #line 44 "forall.chpl" B(i) = 1.000000; }); @@ -31,73 +31,73 @@ namespace forall { #line 10 "forall.chpl" chplx::Array> A(chplx::Range(0, N)); #line 12 "forall.chpl" - chplx::forLoop(chplx::Range{0, 10}, [&](auto i) { + chplx::forLoop(HPX_CURRENT_SOURCE_LOCATION(),chplx::Range{0, 10}, [&](auto i) { #line 13 "forall.chpl" std::cout << i << std::endl; }); #line 16 "forall.chpl" - chplx::forall(chplx::Range{0, 10}, [&](auto i) { + chplx::forall(HPX_CURRENT_SOURCE_LOCATION(),chplx::Range{0, 10}, [&](auto i) { #line 17 "forall.chpl" std::cout << i << std::endl; }); #line 20 "forall.chpl" - chplx::forLoop(chplx::Range{0, N}, [&](auto i) { + chplx::forLoop(HPX_CURRENT_SOURCE_LOCATION(),chplx::Range{0, N}, [&](auto i) { #line 21 "forall.chpl" std::cout << i << std::endl; }); #line 24 "forall.chpl" - chplx::forall(chplx::Range{0, N}, [&](auto i) { + chplx::forall(HPX_CURRENT_SOURCE_LOCATION(),chplx::Range{0, N}, [&](auto i) { #line 25 "forall.chpl" std::cout << i << std::endl; }); #line 28 "forall.chpl" - chplx::forLoop(chplx::Range{0, N + 1}, [&](auto i) { + chplx::forLoop(HPX_CURRENT_SOURCE_LOCATION(),chplx::Range{0, N + 1}, [&](auto i) { #line 29 "forall.chpl" A(i) = 1.000000; #line 30 "forall.chpl" std::cout << i << std::endl; }); #line 33 "forall.chpl" - chplx::forall(chplx::Range{0, N + 1}, [&](auto i) { + chplx::forall(HPX_CURRENT_SOURCE_LOCATION(),chplx::Range{0, N + 1}, [&](auto i) { #line 34 "forall.chpl" A(i) = 1.000000; #line 35 "forall.chpl" std::cout << i << std::endl; }); #line 48 "forall.chpl" - chplx::forLoop(chplx::Range{0, 9}, [&](auto i) { + chplx::forLoop(HPX_CURRENT_SOURCE_LOCATION(),chplx::Range{0, 9}, [&](auto i) { #line 49 "forall.chpl" A(i) = i; }); #line 51 "forall.chpl" - chplx::forall(chplx::Range{0, 9}, [&](auto i) { + chplx::forall(HPX_CURRENT_SOURCE_LOCATION(),chplx::Range{0, 9}, [&](auto i) { #line 52 "forall.chpl" A(i) = i; }); #line 54 "forall.chpl" chplx::Array> B(chplx::Range(0, 2)); #line 56 "forall.chpl" - chplx::coforall(chplx::Range{0, 2}, [&](auto tid) { + chplx::coforall(HPX_CURRENT_SOURCE_LOCATION(),HPX_CURRENT_SOURCE_LOCATION(),chplx::Range{0, 2}, [&](auto tid) { #line 57 "forall.chpl" B(tid) = tid; }); #line 60 "forall.chpl" - chplx::forLoop(chplx::Range{0, 2}, [&](auto i) { + chplx::forLoop(HPX_CURRENT_SOURCE_LOCATION(),chplx::Range{0, 2}, [&](auto i) { #line 61 "forall.chpl" std::cout << B(i) << std::endl; }); #line 63 "forall.chpl" chplx::Array> C(chplx::Range(0, 2)); #line 64 "forall.chpl" - chplx::coforall(chplx::Range{0, 2}, [&](auto tid) { + chplx::coforall(HPX_CURRENT_SOURCE_LOCATION(),HPX_CURRENT_SOURCE_LOCATION(),chplx::Range{0, 2}, [&](auto tid) { #line 65 "forall.chpl" C(tid) = tid; }); #line 67 "forall.chpl" - chplx::forLoop(chplx::Range{0, 2}, [&](auto i) { + chplx::forLoop(HPX_CURRENT_SOURCE_LOCATION(),chplx::Range{0, 2}, [&](auto i) { #line 68 "forall.chpl" std::cout << C(i) << std::endl; }); } -} // namespace forall \ No newline at end of file +} // namespace forall diff --git a/backend/test/heat/heat.cpp.good b/backend/test/heat/heat.cpp.good index 212e1ab9..65c94746 100644 --- a/backend/test/heat/heat.cpp.good +++ b/backend/test/heat/heat.cpp.good @@ -14,7 +14,7 @@ namespace heat { #line 27 "heat.chpl" auto NX = nx + 1; #line 28 "heat.chpl" - chplx::forall(chplx::Range{1, NX - 1}, [&](auto i) { + chplx::forall(HPX_CURRENT_SOURCE_LOCATION(),chplx::Range{1, NX - 1}, [&](auto i) { #line 30 "heat.chpl" d2(i) = d(i) + (((dt * k) / (dx * dx)) * ((d(1 + i) + d(1 - i)) - (2 * d(i)))); @@ -49,7 +49,7 @@ namespace heat { #line 41 "heat.chpl" chplx::Array> data2(chplx::Range(0, NX)); #line 43 "heat.chpl" - chplx::forall(chplx::Range{0, NX}, [&](auto i) { + chplx::forall(HPX_CURRENT_SOURCE_LOCATION(),chplx::Range{0, NX}, [&](auto i) { #line 44 "heat.chpl" data(i) = 1 + (((i - 1) + nx) % nx); #line 45 "heat.chpl" @@ -58,7 +58,7 @@ namespace heat { #line 52 "heat.chpl" hpx::chrono::high_resolution_timer t; #line 54 "heat.chpl" - chplx::forLoop(chplx::Range{1, nt}, [&](auto t) { + chplx::forLoop(HPX_CURRENT_SOURCE_LOCATION(),chplx::Range{1, nt}, [&](auto t) { #line 55 "heat.chpl" update(data, data2); }); @@ -70,4 +70,4 @@ namespace heat { << "," << elapsed << ",0"; } -} // namespace heat \ No newline at end of file +} // namespace heat From de01aad0475afe9433859c8dddca3111cccd4b54 Mon Sep 17 00:00:00 2001 From: ct-clmsn Date: Sun, 3 Sep 2023 07:28:48 -0400 Subject: [PATCH 04/10] rollback on forLoop --- backend/test/forall/forall.cpp.good | 14 +++++++------- backend/test/heat/heat.cpp.good | 2 +- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/backend/test/forall/forall.cpp.good b/backend/test/forall/forall.cpp.good index 03d62c4b..f2e71fa0 100644 --- a/backend/test/forall/forall.cpp.good +++ b/backend/test/forall/forall.cpp.good @@ -13,7 +13,7 @@ namespace forall { #line 39 "forall.chpl" chplx::Array> B(chplx::Range(0, 9)); #line 40 "forall.chpl" - chplx::forLoop(HPX_CURRENT_SOURCE_LOCATION(),chplx::Range{0, 9}, [&](auto i) { + chplx::forLoop(chplx::Range{0, 9}, [&](auto i) { #line 41 "forall.chpl" B(i) = 1.000000; }); @@ -31,7 +31,7 @@ namespace forall { #line 10 "forall.chpl" chplx::Array> A(chplx::Range(0, N)); #line 12 "forall.chpl" - chplx::forLoop(HPX_CURRENT_SOURCE_LOCATION(),chplx::Range{0, 10}, [&](auto i) { + chplx::forLoop(chplx::Range{0, 10}, [&](auto i) { #line 13 "forall.chpl" std::cout << i << std::endl; }); @@ -41,7 +41,7 @@ namespace forall { std::cout << i << std::endl; }); #line 20 "forall.chpl" - chplx::forLoop(HPX_CURRENT_SOURCE_LOCATION(),chplx::Range{0, N}, [&](auto i) { + chplx::forLoop(chplx::Range{0, N}, [&](auto i) { #line 21 "forall.chpl" std::cout << i << std::endl; }); @@ -51,7 +51,7 @@ namespace forall { std::cout << i << std::endl; }); #line 28 "forall.chpl" - chplx::forLoop(HPX_CURRENT_SOURCE_LOCATION(),chplx::Range{0, N + 1}, [&](auto i) { + chplx::forLoop(chplx::Range{0, N + 1}, [&](auto i) { #line 29 "forall.chpl" A(i) = 1.000000; #line 30 "forall.chpl" @@ -65,7 +65,7 @@ namespace forall { std::cout << i << std::endl; }); #line 48 "forall.chpl" - chplx::forLoop(HPX_CURRENT_SOURCE_LOCATION(),chplx::Range{0, 9}, [&](auto i) { + chplx::forLoop(chplx::Range{0, 9}, [&](auto i) { #line 49 "forall.chpl" A(i) = i; }); @@ -82,7 +82,7 @@ namespace forall { B(tid) = tid; }); #line 60 "forall.chpl" - chplx::forLoop(HPX_CURRENT_SOURCE_LOCATION(),chplx::Range{0, 2}, [&](auto i) { + chplx::forLoop(chplx::Range{0, 2}, [&](auto i) { #line 61 "forall.chpl" std::cout << B(i) << std::endl; }); @@ -94,7 +94,7 @@ namespace forall { C(tid) = tid; }); #line 67 "forall.chpl" - chplx::forLoop(HPX_CURRENT_SOURCE_LOCATION(),chplx::Range{0, 2}, [&](auto i) { + chplx::forLoop(chplx::Range{0, 2}, [&](auto i) { #line 68 "forall.chpl" std::cout << C(i) << std::endl; }); diff --git a/backend/test/heat/heat.cpp.good b/backend/test/heat/heat.cpp.good index 65c94746..41b3ac1d 100644 --- a/backend/test/heat/heat.cpp.good +++ b/backend/test/heat/heat.cpp.good @@ -58,7 +58,7 @@ namespace heat { #line 52 "heat.chpl" hpx::chrono::high_resolution_timer t; #line 54 "heat.chpl" - chplx::forLoop(HPX_CURRENT_SOURCE_LOCATION(),chplx::Range{1, nt}, [&](auto t) { + chplx::forLoop(chplx::Range{1, nt}, [&](auto t) { #line 55 "heat.chpl" update(data, data2); }); From 6de029caa147b76068c58f3408f7f19d722c657d Mon Sep 17 00:00:00 2001 From: ct-clmsn Date: Sun, 3 Sep 2023 07:44:38 -0400 Subject: [PATCH 05/10] rollingback codegen modification on --- backend/src/codegenvisitor.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/backend/src/codegenvisitor.cpp b/backend/src/codegenvisitor.cpp index 6a076453..5f4d6b8c 100644 --- a/backend/src/codegenvisitor.cpp +++ b/backend/src/codegenvisitor.cpp @@ -569,7 +569,7 @@ struct StatementVisitor { //auto const& rk = std::get>(node->indexSet.kind); //auto & indices = rk->args; - os << "chplx::forLoop(HPX_CURRENT_SOURCE_LOCATION(), chplx::Range{"; + os << "chplx::forLoop(chplx::Range{"; if(node->indexSet.size() == 1) { ExprVisitor ev{os}; std::visit(ev, node->indexSet[0]); From b42d933a7df415cb22058e37b2d2ee13250263f3 Mon Sep 17 00:00:00 2001 From: ct-clmsn Date: Sun, 3 Sep 2023 11:12:42 -0400 Subject: [PATCH 06/10] spacing issue in validation file --- backend/test/forall/forall.cpp.good | 14 +++++++------- backend/test/heat/heat.cpp.good | 4 ++-- 2 files changed, 9 insertions(+), 9 deletions(-) diff --git a/backend/test/forall/forall.cpp.good b/backend/test/forall/forall.cpp.good index f2e71fa0..e95f67ba 100644 --- a/backend/test/forall/forall.cpp.good +++ b/backend/test/forall/forall.cpp.good @@ -18,7 +18,7 @@ namespace forall { B(i) = 1.000000; }); #line 43 "forall.chpl" - chplx::forall(HPX_CURRENT_SOURCE_LOCATION(),chplx::Range{0, 9}, [&](auto i) { + chplx::forall(HPX_CURRENT_SOURCE_LOCATION(), chplx::Range{0, 9}, [&](auto i) { #line 44 "forall.chpl" B(i) = 1.000000; }); @@ -36,7 +36,7 @@ namespace forall { std::cout << i << std::endl; }); #line 16 "forall.chpl" - chplx::forall(HPX_CURRENT_SOURCE_LOCATION(),chplx::Range{0, 10}, [&](auto i) { + chplx::forall(HPX_CURRENT_SOURCE_LOCATION(), chplx::Range{0, 10}, [&](auto i) { #line 17 "forall.chpl" std::cout << i << std::endl; }); @@ -46,7 +46,7 @@ namespace forall { std::cout << i << std::endl; }); #line 24 "forall.chpl" - chplx::forall(HPX_CURRENT_SOURCE_LOCATION(),chplx::Range{0, N}, [&](auto i) { + chplx::forall(HPX_CURRENT_SOURCE_LOCATION(), chplx::Range{0, N}, [&](auto i) { #line 25 "forall.chpl" std::cout << i << std::endl; }); @@ -58,7 +58,7 @@ namespace forall { std::cout << i << std::endl; }); #line 33 "forall.chpl" - chplx::forall(HPX_CURRENT_SOURCE_LOCATION(),chplx::Range{0, N + 1}, [&](auto i) { + chplx::forall(HPX_CURRENT_SOURCE_LOCATION(), chplx::Range{0, N + 1}, [&](auto i) { #line 34 "forall.chpl" A(i) = 1.000000; #line 35 "forall.chpl" @@ -70,14 +70,14 @@ namespace forall { A(i) = i; }); #line 51 "forall.chpl" - chplx::forall(HPX_CURRENT_SOURCE_LOCATION(),chplx::Range{0, 9}, [&](auto i) { + chplx::forall(HPX_CURRENT_SOURCE_LOCATION(), chplx::Range{0, 9}, [&](auto i) { #line 52 "forall.chpl" A(i) = i; }); #line 54 "forall.chpl" chplx::Array> B(chplx::Range(0, 2)); #line 56 "forall.chpl" - chplx::coforall(HPX_CURRENT_SOURCE_LOCATION(),HPX_CURRENT_SOURCE_LOCATION(),chplx::Range{0, 2}, [&](auto tid) { + chplx::coforall(HPX_CURRENT_SOURCE_LOCATION(),HPX_CURRENT_SOURCE_LOCATION(), chplx::Range{0, 2}, [&](auto tid) { #line 57 "forall.chpl" B(tid) = tid; }); @@ -89,7 +89,7 @@ namespace forall { #line 63 "forall.chpl" chplx::Array> C(chplx::Range(0, 2)); #line 64 "forall.chpl" - chplx::coforall(HPX_CURRENT_SOURCE_LOCATION(),HPX_CURRENT_SOURCE_LOCATION(),chplx::Range{0, 2}, [&](auto tid) { + chplx::coforall(HPX_CURRENT_SOURCE_LOCATION(),HPX_CURRENT_SOURCE_LOCATION(), chplx::Range{0, 2}, [&](auto tid) { #line 65 "forall.chpl" C(tid) = tid; }); diff --git a/backend/test/heat/heat.cpp.good b/backend/test/heat/heat.cpp.good index 41b3ac1d..f7c07a15 100644 --- a/backend/test/heat/heat.cpp.good +++ b/backend/test/heat/heat.cpp.good @@ -14,7 +14,7 @@ namespace heat { #line 27 "heat.chpl" auto NX = nx + 1; #line 28 "heat.chpl" - chplx::forall(HPX_CURRENT_SOURCE_LOCATION(),chplx::Range{1, NX - 1}, [&](auto i) { + chplx::forall(HPX_CURRENT_SOURCE_LOCATION(), chplx::Range{1, NX - 1}, [&](auto i) { #line 30 "heat.chpl" d2(i) = d(i) + (((dt * k) / (dx * dx)) * ((d(1 + i) + d(1 - i)) - (2 * d(i)))); @@ -49,7 +49,7 @@ namespace heat { #line 41 "heat.chpl" chplx::Array> data2(chplx::Range(0, NX)); #line 43 "heat.chpl" - chplx::forall(HPX_CURRENT_SOURCE_LOCATION(),chplx::Range{0, NX}, [&](auto i) { + chplx::forall(HPX_CURRENT_SOURCE_LOCATION(), chplx::Range{0, NX}, [&](auto i) { #line 44 "heat.chpl" data(i) = 1 + (((i - 1) + nx) % nx); #line 45 "heat.chpl" From 2cfebd3417f68319b9bf0f0650964917f466b084 Mon Sep 17 00:00:00 2001 From: ct-clmsn Date: Sun, 3 Sep 2023 15:56:40 -0400 Subject: [PATCH 07/10] fixed overaggressive replace regex --- backend/test/forall/forall.cpp.good | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/backend/test/forall/forall.cpp.good b/backend/test/forall/forall.cpp.good index e95f67ba..88f8b583 100644 --- a/backend/test/forall/forall.cpp.good +++ b/backend/test/forall/forall.cpp.good @@ -77,7 +77,7 @@ namespace forall { #line 54 "forall.chpl" chplx::Array> B(chplx::Range(0, 2)); #line 56 "forall.chpl" - chplx::coforall(HPX_CURRENT_SOURCE_LOCATION(),HPX_CURRENT_SOURCE_LOCATION(), chplx::Range{0, 2}, [&](auto tid) { + chplx::coforall(HPX_CURRENT_SOURCE_LOCATION(), chplx::Range{0, 2}, [&](auto tid) { #line 57 "forall.chpl" B(tid) = tid; }); @@ -89,7 +89,7 @@ namespace forall { #line 63 "forall.chpl" chplx::Array> C(chplx::Range(0, 2)); #line 64 "forall.chpl" - chplx::coforall(HPX_CURRENT_SOURCE_LOCATION(),HPX_CURRENT_SOURCE_LOCATION(), chplx::Range{0, 2}, [&](auto tid) { + chplx::coforall(HPX_CURRENT_SOURCE_LOCATION(), chplx::Range{0, 2}, [&](auto tid) { #line 65 "forall.chpl" C(tid) = tid; }); From 704732249e03f2f691504e2dbcd42f54eb6aadbc Mon Sep 17 00:00:00 2001 From: ct-clmsn Date: Sun, 3 Sep 2023 22:44:47 -0400 Subject: [PATCH 08/10] fixed formatting w/clang-format --- backend/test/forall/forall.cpp.good | 53 ++++++++++++++++------------- backend/test/heat/heat.cpp.good | 21 +++++++----- 2 files changed, 42 insertions(+), 32 deletions(-) diff --git a/backend/test/forall/forall.cpp.good b/backend/test/forall/forall.cpp.good index 88f8b583..63fb4813 100644 --- a/backend/test/forall/forall.cpp.good +++ b/backend/test/forall/forall.cpp.good @@ -18,10 +18,11 @@ namespace forall { B(i) = 1.000000; }); #line 43 "forall.chpl" - chplx::forall(HPX_CURRENT_SOURCE_LOCATION(), chplx::Range{0, 9}, [&](auto i) { + chplx::forall( + HPX_CURRENT_SOURCE_LOCATION(), chplx::Range{0, 9}, [&](auto i) { #line 44 "forall.chpl" - B(i) = 1.000000; - }); + B(i) = 1.000000; + }); }; void __thisModule::__main() @@ -36,20 +37,22 @@ namespace forall { std::cout << i << std::endl; }); #line 16 "forall.chpl" - chplx::forall(HPX_CURRENT_SOURCE_LOCATION(), chplx::Range{0, 10}, [&](auto i) { + chplx::forall( + HPX_CURRENT_SOURCE_LOCATION(), chplx::Range{0, 10}, [&](auto i) { #line 17 "forall.chpl" - std::cout << i << std::endl; - }); + std::cout << i << std::endl; + }); #line 20 "forall.chpl" chplx::forLoop(chplx::Range{0, N}, [&](auto i) { #line 21 "forall.chpl" std::cout << i << std::endl; }); #line 24 "forall.chpl" - chplx::forall(HPX_CURRENT_SOURCE_LOCATION(), chplx::Range{0, N}, [&](auto i) { + chplx::forall( + HPX_CURRENT_SOURCE_LOCATION(), chplx::Range{0, N}, [&](auto i) { #line 25 "forall.chpl" - std::cout << i << std::endl; - }); + std::cout << i << std::endl; + }); #line 28 "forall.chpl" chplx::forLoop(chplx::Range{0, N + 1}, [&](auto i) { #line 29 "forall.chpl" @@ -58,29 +61,32 @@ namespace forall { std::cout << i << std::endl; }); #line 33 "forall.chpl" - chplx::forall(HPX_CURRENT_SOURCE_LOCATION(), chplx::Range{0, N + 1}, [&](auto i) { + chplx::forall( + HPX_CURRENT_SOURCE_LOCATION(), chplx::Range{0, N + 1}, [&](auto i) { #line 34 "forall.chpl" - A(i) = 1.000000; + A(i) = 1.000000; #line 35 "forall.chpl" - std::cout << i << std::endl; - }); + std::cout << i << std::endl; + }); #line 48 "forall.chpl" chplx::forLoop(chplx::Range{0, 9}, [&](auto i) { #line 49 "forall.chpl" A(i) = i; }); #line 51 "forall.chpl" - chplx::forall(HPX_CURRENT_SOURCE_LOCATION(), chplx::Range{0, 9}, [&](auto i) { + chplx::forall( + HPX_CURRENT_SOURCE_LOCATION(), chplx::Range{0, 9}, [&](auto i) { #line 52 "forall.chpl" - A(i) = i; - }); + A(i) = i; + }); #line 54 "forall.chpl" chplx::Array> B(chplx::Range(0, 2)); #line 56 "forall.chpl" - chplx::coforall(HPX_CURRENT_SOURCE_LOCATION(), chplx::Range{0, 2}, [&](auto tid) { + chplx::coforall( + HPX_CURRENT_SOURCE_LOCATION(), chplx::Range{0, 2}, [&](auto tid) { #line 57 "forall.chpl" - B(tid) = tid; - }); + B(tid) = tid; + }); #line 60 "forall.chpl" chplx::forLoop(chplx::Range{0, 2}, [&](auto i) { #line 61 "forall.chpl" @@ -89,10 +95,11 @@ namespace forall { #line 63 "forall.chpl" chplx::Array> C(chplx::Range(0, 2)); #line 64 "forall.chpl" - chplx::coforall(HPX_CURRENT_SOURCE_LOCATION(), chplx::Range{0, 2}, [&](auto tid) { + chplx::coforall( + HPX_CURRENT_SOURCE_LOCATION(), chplx::Range{0, 2}, [&](auto tid) { #line 65 "forall.chpl" - C(tid) = tid; - }); + C(tid) = tid; + }); #line 67 "forall.chpl" chplx::forLoop(chplx::Range{0, 2}, [&](auto i) { #line 68 "forall.chpl" @@ -100,4 +107,4 @@ namespace forall { }); } -} // namespace forall +} // namespace forall \ No newline at end of file diff --git a/backend/test/heat/heat.cpp.good b/backend/test/heat/heat.cpp.good index f7c07a15..b3a03360 100644 --- a/backend/test/heat/heat.cpp.good +++ b/backend/test/heat/heat.cpp.good @@ -14,11 +14,13 @@ namespace heat { #line 27 "heat.chpl" auto NX = nx + 1; #line 28 "heat.chpl" - chplx::forall(HPX_CURRENT_SOURCE_LOCATION(), chplx::Range{1, NX - 1}, [&](auto i) { + chplx::forall(HPX_CURRENT_SOURCE_LOCATION(), chplx::Range{1, NX - 1}, + [&](auto i) { #line 30 "heat.chpl" - d2(i) = d(i) + - (((dt * k) / (dx * dx)) * ((d(1 + i) + d(1 - i)) - (2 * d(i)))); - }); + d2(i) = d(i) + + (((dt * k) / (dx * dx)) * + ((d(1 + i) + d(1 - i)) - (2 * d(i)))); + }); #line 32 "heat.chpl" d2(0) = d2(1 - NX); #line 33 "heat.chpl" @@ -49,12 +51,13 @@ namespace heat { #line 41 "heat.chpl" chplx::Array> data2(chplx::Range(0, NX)); #line 43 "heat.chpl" - chplx::forall(HPX_CURRENT_SOURCE_LOCATION(), chplx::Range{0, NX}, [&](auto i) { + chplx::forall( + HPX_CURRENT_SOURCE_LOCATION(), chplx::Range{0, NX}, [&](auto i) { #line 44 "heat.chpl" - data(i) = 1 + (((i - 1) + nx) % nx); + data(i) = 1 + (((i - 1) + nx) % nx); #line 45 "heat.chpl" - data2(i) = 0; - }); + data2(i) = 0; + }); #line 52 "heat.chpl" hpx::chrono::high_resolution_timer t; #line 54 "heat.chpl" @@ -70,4 +73,4 @@ namespace heat { << "," << elapsed << ",0"; } -} // namespace heat +} // namespace heat \ No newline at end of file From 73dddc2435cbde3c051c9913800cb0a7349774e1 Mon Sep 17 00:00:00 2001 From: Shreyas Atre Date: Tue, 16 Jan 2024 15:01:12 +0530 Subject: [PATCH 09/10] Benchmarking final stream and gups --- backend/test/CMakeLists.txt | 1 + backend/test/gups.chpl | 119 +++++++++++ backend/test/gups_cpp.chpl | 121 +++++++++++ backend/test/stream.chpl | 76 +++++++ backend/test/stream_cpp.chpl | 65 ++++++ bench-gups-run.sh | 13 ++ bench-gups.bash | 236 ++++++++++++++++++++++ bench-run.sh | 13 ++ bench-triad-run.sh | 13 ++ bench-triad.bash | 236 ++++++++++++++++++++++ bench.bash | 254 ++++++++++++++++++++++++ frontend/BUILD_VERSION | 2 +- frontend/lib/util/git-version.cpp | 2 +- publications/fall2023/fall2023paper.def | 59 +++--- publications/fall2023/gups.chpl | 119 +++++++++++ publications/fall2023/heat.chpl.bkp | 70 +++++++ publications/fall2023/stream.chpl | 76 +++++++ 17 files changed, 1448 insertions(+), 27 deletions(-) create mode 100644 backend/test/gups.chpl create mode 100644 backend/test/gups_cpp.chpl create mode 100644 backend/test/stream.chpl create mode 100644 backend/test/stream_cpp.chpl create mode 100644 bench-gups-run.sh create mode 100644 bench-gups.bash create mode 100644 bench-run.sh create mode 100644 bench-triad-run.sh create mode 100644 bench-triad.bash create mode 100644 bench.bash create mode 100644 publications/fall2023/gups.chpl create mode 100644 publications/fall2023/heat.chpl.bkp create mode 100644 publications/fall2023/stream.chpl diff --git a/backend/test/CMakeLists.txt b/backend/test/CMakeLists.txt index 73c929f9..fdcc4b0e 100644 --- a/backend/test/CMakeLists.txt +++ b/backend/test/CMakeLists.txt @@ -26,6 +26,7 @@ set(tests fnc forall tup + stream ) foreach(test ${tests}) diff --git a/backend/test/gups.chpl b/backend/test/gups.chpl new file mode 100644 index 00000000..e4659e81 --- /dev/null +++ b/backend/test/gups.chpl @@ -0,0 +1,119 @@ +// test/studies/hpcc/RA/ra-randstream-hpcc06.chpl +// test/studies/hpcc/common/probSize-hpcc06.chpl + +use CTypes; +use Time; + +extern proc getenv(name : c_string) : c_string; + +proc computeProblemSize(numArrays : int, physicalMemoryBytes : int, memRatio : int, returnLog2 : bool) { + var totalMem : int = physicalMemoryBytes; + var memoryTarget : int = totalMem / memRatio; + var numBytesPerType : int = c_sizeof(int) : int; + var bytesPerIndex : int = numArrays * numBytesPerType; + var numIndices : int = memoryTarget / bytesPerIndex; + + var lgProblemSize : int = log2(numIndices); + + if (returnLog2) { + numIndices = 2**lgProblemSize; + if (numIndices * bytesPerIndex <= memoryTarget) { + numIndices *= 2; + lgProblemSize += 1; + } + } + + return if returnLog2 then lgProblemSize else numIndices; +} + +proc getNextRandom(x : int) : int { + var poly = 0x7; + var hirandbit = 0x1 << (64-1); + return (x << 1) ^ (if (x & hirandbit) then poly else 0); +} + +proc computeM2Values(m2 : [] int, count : int) { + var nextval = 0x1; + for i in 0..count { + m2[i] = nextval; + nextval = getNextRandom(nextval); + nextval = getNextRandom(nextval); + } +} + +proc getNthRandom(N : int, m2 : [] int, m2count :int) { + var period = 0x7fffffff/7 : int; + + var n = N % period : int; + + var ran = 0x2; + if(n <= 0){ + n = 1; + } + var i = log2(n); + var val = 0; + var J = 0; + for j in 0..i { + J = i-j; + for k in 0..m2count { + if ((ran >> j) & 1) then val ^= m2(j); + } + ran = val; + if ((n >> i) & 1) then getNextRandom(ran); + } + + return ran; +} + +proc RAStream(vals : [] int, vals_idx : int, numvals : int, start : int, m2 : [] int, m2count : int) { + var val = getNthRandom(start, m2, m2count); + var base = vals_idx * numvals; + var idx = 0; + for i in 0..numvals { + idx = base + i; + idx = idx % numvals; + val = getNextRandom(val); + vals[idx] = val; + } +} + +param randWidth = 64; +param physicalMemory = 16437; +var memRatio = 4; +var numTables = 1; + +var m2 : [0..randWidth] int; +computeM2Values(m2, randWidth); + +var N_U = 0; +var n = 0; +n = computeProblemSize(numTables, physicalMemory, memRatio, true); +N_U = 2**(n+2); + +var z = 0; +z = N_U * N_U; +var randval: [0..z] int; + +var m = 0; +m = 2**(n); +var indexMask = m - 1; + +var T : [0..m] int; + +for i in 0..m { + T[i] = i; +} + +var timer : stopwatch; + +timer.start(); +forall block in 0..N_U { + RAStream(randval, block, N_U, 0, m2, randWidth); + for r in 0..N_U { + T ( (randval ( (block * N_U + r) % z ) & indexMask) % m ) ^= r; + } +} +timer.stop(); + +writeln(getenv('CHPL_RT_NUM_THREADS_PER_LOCALE'.c_str()):string,",", timer.elapsed()); + diff --git a/backend/test/gups_cpp.chpl b/backend/test/gups_cpp.chpl new file mode 100644 index 00000000..9c703a0c --- /dev/null +++ b/backend/test/gups_cpp.chpl @@ -0,0 +1,121 @@ +// test/studies/hpcc/RA/ra-randstream-hpcc06.chpl + +proc computeProblemSize(numArrays : int, physicalMemoryBytes : int, memRatio : int, returnLog2 : bool) : int { + var totalMem = physicalMemoryBytes; + var memoryTarget = totalMem / memRatio; + var numBytesPerType = 0; + inlinecxx("{} = sizeof(std::int64_t);", numBytesPerType); + var bytesPerIndex = numArrays * numBytesPerType; + var numIndices = memoryTarget / bytesPerIndex; + + var lgProblemSize = 0; + inlinecxx("{} = std::log2(numIndices);", lgProblemSize); + + if (returnLog2) { + //numIndices = 2**lgProblemSize; + inlinecxx("{} = std::pow(2,{});", numIndices, lgProblemSize); + inlinecxx("if({} * {} <= {})", numIndices, bytesPerIndex, memoryTarget); + inlinecxx("{} *= 2;", numIndices); + inlinecxx("if({} * {} <= {})", numIndices, bytesPerIndex, memoryTarget); + inlinecxx("{} += 1;", lgProblemSize); + } + var retval : int = 0; + inlinecxx("{} = {} ? {} : {};", retval, returnLog2, lgProblemSize, numIndices); + return retval; +} + +proc getNextRandom(x : int) : int { + var poly = 0; + inlinecxx("{} = 0x7;", poly); + var hirandbit = 0; + inlinecxx("{} = 0x1 << (64-1);", hirandbit); + inlinecxx("{} = ({} << 1) ^ ( ({} & {}) ? {} : 0);", x, x, x, hirandbit, poly); + return x; +} + +proc computeM2Values(m2 : [] int, count : int) :bool { + var nextval = 0; + inlinecxx("{} = 0x1;", nextval); + for i in 0..count { + m2[i] = nextval; + nextval = getNextRandom(nextval); + nextval = getNextRandom(nextval); + } + + return true; +} + +proc getNthRandom(N : int, m2 : [] int, m2count : int) { + var period = 0x7fffffff/7 ; + + var n = N % period ; + + inlinecxx("if ({} <= {} )",n,0); + inlinecxx("{} = 1;", n); + var ran = 0x2; + var i = 0; + inlinecxx("{} = std::log2(n);", i); + var val = 0; + var J = 0; + for j in 0..i { + J = i-j; + for k in 0..m2count { + inlinecxx("if (({} >> {}) & 1) {} ^= {} [ {} ];", ran, k, val, m2, k); + } + ran = val; + inlinecxx("if(({} >> {}) & 1) {} = getNextRandom({});", n, J, ran, ran); + } + + return ran; +} + +proc RAStream(vals : [] int, vals_idx : int, numvals : int, start : int, m2 : [] int, m2count : int) { + var val = getNthRandom(start, m2, m2count); + var base = vals_idx * numvals; + var idx = 0; + for i in 0..numvals { + idx = base + i; + idx = idx % numvals; + val = getNextRandom(val); + vals[idx] = val; + } +} + +param randWidth = 64; +param physicalMemory = 16437; +var memRatio = 4; +var numTables = 1; + +var m2 : [0..randWidth] int; +var val = computeM2Values(m2, randWidth); + +var N_U = 0; +var n = 1; +n = computeProblemSize(numTables, physicalMemory, memRatio, true); +inlinecxx("{} = std::pow(2, {}+2);", N_U, n); + +var z = 0; +inlinecxx("{} = {} * {};", z, N_U, N_U); +var randval : [0..z] int; + +var m = 0; +inlinecxx("{} = std::pow(2, {});", m, n); +var indexMask = m - 1; + +var T : [0..m] int; + +for i in 0..m { + T[i] = i; +} + +inlinecxx("hpx::chrono::high_resolution_timer gups;"); + +forall block in 0..N_U { + RAStream(randval, block, N_U, 0, m2, randWidth); + for r in 0..N_U { + inlinecxx("{} [ ({} [ ({} * {} + {}) % {} ] & {}) % {} ] ^= {};", T, randval, block, N_U, r, z, indexMask, m, r); + } +} + +inlinecxx("auto elapsed = gups.elapsed();"); +inlinecxx("std::cout << hpx::resource::get_num_threads() << \",\" << elapsed << std::endl;"); diff --git a/backend/test/stream.chpl b/backend/test/stream.chpl new file mode 100644 index 00000000..a3c7a39e --- /dev/null +++ b/backend/test/stream.chpl @@ -0,0 +1,76 @@ +use CTypes; + +use Time; + +extern proc getenv(name : c_string) : c_string; +extern proc sizeof(type T): uint(32); +extern const RAND_MAX: c_int; +extern proc rand(): c_int; + +extern proc srand(seed: c_uint); + +srand(0); + +proc randindex() { + return rand()%5; +} + +proc streamBenchmark(n: int) { + const iterations = 10; + + writeln("STREAM Benchmark (Chapel - Embarrassingly Parallel)"); + writeln("==================================================="); + + var A:[1..n] real; + var B:[1..n] real; + var C:[1..n] real; + var D:[1..n] real; + + // Initialize arrays + forall i in 1..n { + A[i] = randindex(); + B[i] = randindex(); + C[i] = randindex(); + D[i] = 0.0; + } + + var copy_t: stopwatch; + copy_t.start(); + + // Benchmark Copy + forall it in 1..iterations do { + C[it] = A[it]; + } + copy_t.stop(); + writeln("Copy: ",getenv('CHPL_RT_NUM_THREADS_PER_LOCALE'.c_str()):string, ",",copy_t.elapsed()); + + var scale_t: stopwatch; + scale_t.start(); + // Benchmark Scale + forall it in 1..iterations do{ + C[it] = 3.0 * A[it]; + } + writeln("Scale: ",getenv('CHPL_RT_NUM_THREADS_PER_LOCALE'.c_str()):string, ",", scale_t.elapsed()); + + var add_t: stopwatch; + add_t.start(); + + // Benchmark Add + forall it in 1..iterations do{ + C[it] = A[it] + B[it]; + } + writeln("Add: ",getenv('CHPL_RT_NUM_THREADS_PER_LOCALE'.c_str()):string, ",", add_t.elapsed()); + + var triad_t:stopwatch(); + triad_t.start(); + + // Benchmark Triad + forall it in 1..iterations do{ + D[it] = A[it] + 3.0 * B[it]; + } + writeln("Triad: ",getenv('CHPL_RT_NUM_THREADS_PER_LOCALE'.c_str()):string, ",", triad_t.elapsed()); +} + +// Example usage with an array size of 1000000 +const arraySize = 1000000; +streamBenchmark(arraySize); diff --git a/backend/test/stream_cpp.chpl b/backend/test/stream_cpp.chpl new file mode 100644 index 00000000..2e61b9eb --- /dev/null +++ b/backend/test/stream_cpp.chpl @@ -0,0 +1,65 @@ +proc randindex() { + //inlinecxx("#include "); + //return rand()%5; + var ret = 0; + inlinecxx("{} = rand()%5;", ret); + return ret; +} + + +proc streamBenchmark() { + const iterations = 10; + +const arraySize = 1000000; + //writeln("STREAM Benchmark (Chapel - Embarrassingly Parallel)"); + //writeln("==================================================="); + + var A:[1..arraySize] real; + var B:[1..arraySize] real; + var C:[1..arraySize] real; + var D:[1..arraySize] real; + + // Initialize arrays + forall i in 1..arraySize { + A[i] = randindex(); + B[i] = randindex(); + C[i] = randindex(); + D[i] = 0.0; + } + + + // Benchmark Copy +inlinecxx("hpx::chrono::high_resolution_timer cpy;"); + forall it in 1..iterations do { + C[it] = A[it]; + } +inlinecxx("auto elapsed = cpy.elapsed();"); +inlinecxx("std::cout << \"Copy: \" << hpx::resource::get_num_threads() << \",\" << elapsed << \"\\n\";"); + + // Benchmark Scale +inlinecxx("hpx::chrono::high_resolution_timer scale;"); + forall it in 1..iterations do{ + C[it] = 3.0 * A[it]; + } +inlinecxx("elapsed = scale.elapsed();"); +inlinecxx("std::cout << \"Scale: \" << hpx::resource::get_num_threads() << \",\" << elapsed << \"\\n\";"); + + // Benchmark Add +inlinecxx("hpx::chrono::high_resolution_timer add;"); + forall it in 1..iterations do{ + C[it] = A[it] + B[it]; + } +inlinecxx("elapsed = add.elapsed();"); +inlinecxx("std::cout << \"Add: \" << hpx::resource::get_num_threads() << \",\" << elapsed << \"\\n\";"); + + // Benchmark Triad +inlinecxx("hpx::chrono::high_resolution_timer triad;"); + forall it in 1..iterations do{ + D[it] = A[it] + 3.0 * B[it]; + } +inlinecxx("elapsed = cpy.elapsed();"); +inlinecxx("std::cout << \"Triad: \" << hpx::resource::get_num_threads() << \",\" << elapsed << \"\\n\";"); +} + +// Example usage with an array size of 1000000 +streamBenchmark(); diff --git a/bench-gups-run.sh b/bench-gups-run.sh new file mode 100644 index 00000000..fb97107c --- /dev/null +++ b/bench-gups-run.sh @@ -0,0 +1,13 @@ +#!/bin/bash +#SBATCH --time=24:00:00 +#SBATCH --nodes=1 +#SBATCH --ntasks-per-node=1 +#SBATCH --cpus-per-task=64 +#SBATCH --exclusive +#SBATCH --partition=cuda-A100-intel +#SBATCH -o slurm-%j.out-%N # optional, name of the stdout, using the job number (%j) and the hostname of the node (%N) +#SBATCH -e slurm-%j.err-%N # optional, name of the stderr, using job and hostname values + + +singularity run --bind $(pwd):/root/chplx -i ../singularity-images/noapexHPX.sif /bin/bash -c "cd /root/chplx && bash bench-gups.bash -v" + diff --git a/bench-gups.bash b/bench-gups.bash new file mode 100644 index 00000000..7cf1d98e --- /dev/null +++ b/bench-gups.bash @@ -0,0 +1,236 @@ +#!/bin/bash + +set -e + +export PATH="/opt/bin:$PATH" +TEMP_HEAT_DIR="/tmp/chplx-heat-$$" +CURRENT_DIR="$(pwd)" +export CC=clang-15 +export CXX=clang++-15 +export APEX_DISABLE=1 # disable apex by default +VERBOSE=0 +APEX_INCL=0 +APEX_CMD="/opt/bin/apex_exec --apex:otf2 --apex:csv --apex:gtrace --apex:taskgraph --apex:postprocess --apex:source --apex:cpuinfo --apex:meminfo --apex:status" +APEX_STORE_DIR="$HOME"/apex.$$ + +build_chplx_no_flags() { + mkdir -p build + cd build + rm -rf * + cmake .. -DCHPL_HOME="$CURRENT_DIR"/frontend -DCMAKE_CXX_FLAGS="-undefined dynamic_lookup" -G Ninja 2>/dev/null 1>/dev/null #TODO: Generalize + ninja >/dev/null + cd "$CURRENT_DIR" +} + +build_chplx_debug_flag() { + mkdir -p build + cd build + rm -rf * + cmake .. -DCHPL_HOME="$CURRENT_DIR"/frontend -DCMAKE_CXX_FLAGS="-undefined dynamic_lookup" -DCMAKE_BUILD_TYPE=Debug -G Ninja 2>/dev/null 1>/dev/null #TODO: Generalize + ninja >/dev/null + cd "$CURRENT_DIR" +} + +build_chplx_reldebug_flag() { + mkdir -p build + cd build + rm -rf * + cmake .. -DCHPL_HOME="$CURRENT_DIR"/frontend -DCMAKE_CXX_FLAGS="-undefined dynamic_lookup" -DCMAKE_BUILD_TYPE=RelWithDebInfo -G Ninja 2>/dev/null 1>/dev/null #TODO: Generalize + ninja >/dev/null + cd "$CURRENT_DIR" +} + +build_chplx_rel_flag() { + mkdir -p build + cd build + rm -rf * + chpl "$CURRENT_DIR"/publications/fall2023/gups.chpl -o "$CURRENT_DIR"/build/gups_chapel >/dev/null + cmake .. -DCHPL_HOME="$CURRENT_DIR"/frontend -DCMAKE_CXX_FLAGS="-undefined dynamic_lookup -DCHPLX_NO_SOURCE_LOCATION" -DCMAKE_BUILD_TYPE=Release -G Ninja 2>/dev/null 1>/dev/null #TODO: Generalize + ninja >/dev/null + + mkdir -p gups_chplx + ./backend/chplx -f ../backend/test/gups_cpp.chpl -o gups_chplx + cd gups_chplx + mkdir -p build + cd build + cmake .. -G Ninja -DChplx_DIR="$CURRENT_DIR"/build/library/lib/cmake/Chplx -DCMAKE_BUILD_TYPE=Release -DCMAKE_CXX_FLAGS="-DCHPLX_NO_SOURCE_LOCATION" 2>/dev/null 1>/dev/null + ninja >/dev/null + cd "$CURRENT_DIR" +} + +build_heat_release() { + cd "$TEMP_HEAT_DIR" + mkdir -p build + cd build + rm -rf * + cmake .. -DChplx_DIR="$CURRENT_DIR"/build/library/lib/cmake/Chplx -DCMAKE_BUILD_TYPE=Release -DCMAKE_CXX_FLAGS="-DCHPLX_NO_SOURCE_LOCATION" -G Ninja 2>/dev/null 1>/dev/null #TODO: generalize + ninja >/dev/null + + chpl "$CURRENT_DIR"/publications/fall2023/heat.chpl --fast -o "$CURRENT_DIR"/build/heat_chapel >/dev/null + + echo "Release builds (Chplx with Release build type and heat with release build type) amd chpl with --fast" +} + +build_heat_relwithdebinfo() { + cd "$TEMP_HEAT_DIR" + mkdir -p build + cd build + rm -rf * + cmake .. -DChplx_DIR="$CURRENT_DIR"/build/library/lib/cmake/Chplx -DCMAKE_BUILD_TYPE=RelWithDebInfo -G Ninja 2>/dev/null 1>/dev/null #TODO: generalize + ninja >/dev/null + + chpl "$CURRENT_DIR"/publications/fall2023/heat.chpl --fast -o "$CURRENT_DIR"/build/heat_chapel >/dev/null + + echo "Chplx Release with Debug info and chpl with --fast" +} + +build_heat_no_flags() { + cd "$TEMP_HEAT_DIR" + mkdir -p build + cd build + rm -rf * + cmake .. -DChplx_DIR="$CURRENT_DIR"/build/library/lib/cmake/Chplx -G Ninja 2>/dev/null 1>/dev/null #TODO: generalize + ninja >/dev/null + + chpl "$CURRENT_DIR"/publications/fall2023/heat.chpl -o "$CURRENT_DIR"/build/heat_chapel >/dev/null + + echo "No flags to both" +} + +build_heat_no_flags_chpl_baseline() { + cd "$TEMP_HEAT_DIR" + mkdir -p build + cd build + rm -rf * + cmake .. -DChplx_DIR="$CURRENT_DIR"/build/library/lib/cmake/Chplx -DCMAKE_BUILD_TYPE=Debug -G Ninja 2>/dev/null 1>/dev/null #TODO: generalize + ninja >/dev/null + + chpl "$CURRENT_DIR"/publications/fall2023/heat.chpl --baseline -o "$CURRENT_DIR"/build/heat_chapel >/dev/null + + echo "HPX Debug and chapel with --baseline" +} + +calc() { awk "BEGIN{print $*}"; } + +doit_hpx() { + echo "Benchmarking ChplX" + local _dir_meta_name_="$1" + local _prefix_="" + if [ $APEX_INCL -eq 1 ]; then + _prefix_="${APEX_CMD}" + fi + for i in {4..64}; do + sum=0.000000 + mkdir -p "$APEX_STORE_DIR/$_dir_meta_name_/proc$i" + for j in {1..5}; do + local opt=$($_prefix_ $CURRENT_DIR/build/gups_chplx/build/gups_cpp --hpx:threads=$i) + #echo $opt + sum=$(calc "$sum + $(echo $opt | awk -F',' '{print $2}')") + #echo "$opt" >"$APEX_STORE_DIR/$_dir_meta_name_/proc$i"/runlog + done + local _procs_=$(echo $opt | awk -F',' '{print $1}') + echo "$_procs_,$(calc $sum / 5)" + #echo $opt + if [ $APEX_INCL -eq 1 ]; then + cp -r $TEMP_HEAT_DIR/build/OTF2_archive "$APEX_STORE_DIR/$_dir_meta_name_/proc$i" + fi + done +} + +doit_chapel() { + echo "Benchmarking Chapel" + for i in {4..64}; do + sum=0.000000 + for j in {1..5}; do + local opt=$(CHPL_RT_NUM_THREADS_PER_LOCALE_QUIET=yes MAX_LOGICAL=$i MAX_PHYSICAL=$i CHPL_RT_NUM_THREADS_PER_LOCALE=$i "$CURRENT_DIR"/build/gups_chapel) + #echo $opt + sum=$(calc "$sum + $(echo $opt | awk -F',' '{print $2}')") + done + local _procs_=$(echo $opt | awk -F',' '{print $1}') + echo "$_procs_,$(calc $sum / 5)" + #echo $opt + done +} + +checks() { + if [ -d ./.git ]; then + echo "This $(pwd) is assumed to be the parent directory of the chplx project" + else + echo ".git not found at $(pwd)" + echo "exiting!" + exit 1 + fi + + if git status | grep -q "paper_patches"; then + echo "paper_patches is the correct branch" + else + echo "Checkout the paper_patches branch and try again!" + exit 1 + fi +} + +main() { + # make temporary directories + mkdir -p "$TEMP_HEAT_DIR" + + + cd "$CURRENT_DIR" + + build_chplx_rel_flag + doit_hpx "release" + doit_chapel + cd "$CURRENT_DIR" + + + echo "Benchmarking done" +} + +usage() { + echo "Usage: $0 -v (verbose)" 1>&2 + echo "Usage: $0 -v (verbose) -a (apex)" 1>&2 + echo "Usage: $0 -v (verbose) -A ()" 1>&2 + exit 1 +} + +if ! command -v chpl >/dev/null; then + echo "chpl needs to be installed or not in PATH" + exit 1 +fi + +while getopts "vaA:" o; do + case "${o}" in + v) + VERBOSE=1 + ;; + A) + APEX_INCL=1 + if [ "${OPTARG}" != "" ]; then + if [ -d "${OPTARG}" ]; then + APEX_STORE_DIR="${OPTARG}" + else + echo "${OPTARG} directory is not present" + exit 1 + fi + fi + export APEX_DISABLE=0 + echo "Apex data will be stored in $APEX_STORE_DIR" + ;; + a) + export APEX_DISABLE=0 + APEX_INCL=1 + echo "Apex data will be stored in $APEX_STORE_DIR" + ;; + *) + echo "Unknown argument: ${OPTARG}" + exit 1 + ;; + esac +done +shift $((OPTIND - 1)) + +if [ $VERBOSE == 1 ]; then + echo "Verbose mode" + set -x +fi + +main diff --git a/bench-run.sh b/bench-run.sh new file mode 100644 index 00000000..76f80dd2 --- /dev/null +++ b/bench-run.sh @@ -0,0 +1,13 @@ +#!/bin/bash +#SBATCH --time=24:00:00 +#SBATCH --nodes=1 +#SBATCH --ntasks-per-node=1 +#SBATCH --cpus-per-task=64 +#SBATCH --exclusive +#SBATCH --partition=cuda-A100-intel +#SBATCH -o slurm-%j.out-%N # optional, name of the stdout, using the job number (%j) and the hostname of the node (%N) +#SBATCH -e slurm-%j.err-%N # optional, name of the stderr, using job and hostname values + + +singularity run --bind $(pwd):/root/chplx -i ../singularity-images/noapexHPX.sif /bin/bash -c "cd /root/chplx && bash bench.bash -v" + diff --git a/bench-triad-run.sh b/bench-triad-run.sh new file mode 100644 index 00000000..4fc2843b --- /dev/null +++ b/bench-triad-run.sh @@ -0,0 +1,13 @@ +#!/bin/bash +#SBATCH --time=24:00:00 +#SBATCH --nodes=1 +#SBATCH --ntasks-per-node=1 +#SBATCH --cpus-per-task=64 +#SBATCH --exclusive +#SBATCH --partition=cuda-A100-intel +#SBATCH -o slurm-%j.out-%N # optional, name of the stdout, using the job number (%j) and the hostname of the node (%N) +#SBATCH -e slurm-%j.err-%N # optional, name of the stderr, using job and hostname values + + +singularity run --bind $(pwd):/root/chplx -i ../singularity-images/noapexHPX.sif /bin/bash -c "cd /root/chplx && bash bench-triad.bash -v" + diff --git a/bench-triad.bash b/bench-triad.bash new file mode 100644 index 00000000..06abddda --- /dev/null +++ b/bench-triad.bash @@ -0,0 +1,236 @@ +#!/bin/bash + +set -e + +export PATH="/opt/bin:$PATH" +TEMP_HEAT_DIR="/tmp/chplx-heat-$$" +CURRENT_DIR="$(pwd)" +export CC=clang-15 +export CXX=clang++-15 +export APEX_DISABLE=1 # disable apex by default +VERBOSE=0 +APEX_INCL=0 +APEX_CMD="/opt/bin/apex_exec --apex:otf2 --apex:csv --apex:gtrace --apex:taskgraph --apex:postprocess --apex:source --apex:cpuinfo --apex:meminfo --apex:status" +APEX_STORE_DIR="$HOME"/apex.$$ + +build_chplx_no_flags() { + mkdir -p build + cd build + rm -rf * + cmake .. -DCHPL_HOME="$CURRENT_DIR"/frontend -DCMAKE_CXX_FLAGS="-undefined dynamic_lookup" -G Ninja 2>/dev/null 1>/dev/null #TODO: Generalize + ninja >/dev/null + cd "$CURRENT_DIR" +} + +build_chplx_debug_flag() { + mkdir -p build + cd build + rm -rf * + cmake .. -DCHPL_HOME="$CURRENT_DIR"/frontend -DCMAKE_CXX_FLAGS="-undefined dynamic_lookup" -DCMAKE_BUILD_TYPE=Debug -G Ninja 2>/dev/null 1>/dev/null #TODO: Generalize + ninja >/dev/null + cd "$CURRENT_DIR" +} + +build_chplx_reldebug_flag() { + mkdir -p build + cd build + rm -rf * + cmake .. -DCHPL_HOME="$CURRENT_DIR"/frontend -DCMAKE_CXX_FLAGS="-undefined dynamic_lookup" -DCMAKE_BUILD_TYPE=RelWithDebInfo -G Ninja 2>/dev/null 1>/dev/null #TODO: Generalize + ninja >/dev/null + cd "$CURRENT_DIR" +} + +build_chplx_rel_flag() { + mkdir -p build + cd build + rm -rf * + chpl "$CURRENT_DIR"/publications/fall2023/stream.chpl -o "$CURRENT_DIR"/build/stream_chapel >/dev/null + cmake .. -DCHPL_HOME="$CURRENT_DIR"/frontend -DCMAKE_CXX_FLAGS="-undefined dynamic_lookup -DCHPLX_NO_SOURCE_LOCATION" -DCMAKE_BUILD_TYPE=Release -G Ninja 2>/dev/null 1>/dev/null #TODO: Generalize + ninja >/dev/null + + mkdir -p stream_chplx + ./backend/chplx -f ../backend/test/stream_cpp.chpl -o stream_chplx + cd stream_chplx + mkdir -p build + cd build + cmake .. -G Ninja -DChplx_DIR="$CURRENT_DIR"/build/library/lib/cmake/Chplx -DCMAKE_BUILD_TYPE=Release -DCMAKE_CXX_FLAGS="-DCHPLX_NO_SOURCE_LOCATION" 2>/dev/null 1>/dev/null + ninja >/dev/null + cd "$CURRENT_DIR" +} + +build_heat_release() { + cd "$TEMP_HEAT_DIR" + mkdir -p build + cd build + rm -rf * + cmake .. -DChplx_DIR="$CURRENT_DIR"/build/library/lib/cmake/Chplx -DCMAKE_BUILD_TYPE=Release -DCMAKE_CXX_FLAGS="-DCHPLX_NO_SOURCE_LOCATION" -G Ninja 2>/dev/null 1>/dev/null #TODO: generalize + ninja >/dev/null + + chpl "$CURRENT_DIR"/publications/fall2023/heat.chpl --fast -o "$CURRENT_DIR"/build/heat_chapel >/dev/null + + echo "Release builds (Chplx with Release build type and heat with release build type) amd chpl with --fast" +} + +build_heat_relwithdebinfo() { + cd "$TEMP_HEAT_DIR" + mkdir -p build + cd build + rm -rf * + cmake .. -DChplx_DIR="$CURRENT_DIR"/build/library/lib/cmake/Chplx -DCMAKE_BUILD_TYPE=RelWithDebInfo -G Ninja 2>/dev/null 1>/dev/null #TODO: generalize + ninja >/dev/null + + chpl "$CURRENT_DIR"/publications/fall2023/heat.chpl --fast -o "$CURRENT_DIR"/build/heat_chapel >/dev/null + + echo "Chplx Release with Debug info and chpl with --fast" +} + +build_heat_no_flags() { + cd "$TEMP_HEAT_DIR" + mkdir -p build + cd build + rm -rf * + cmake .. -DChplx_DIR="$CURRENT_DIR"/build/library/lib/cmake/Chplx -G Ninja 2>/dev/null 1>/dev/null #TODO: generalize + ninja >/dev/null + + chpl "$CURRENT_DIR"/publications/fall2023/heat.chpl -o "$CURRENT_DIR"/build/heat_chapel >/dev/null + + echo "No flags to both" +} + +build_heat_no_flags_chpl_baseline() { + cd "$TEMP_HEAT_DIR" + mkdir -p build + cd build + rm -rf * + cmake .. -DChplx_DIR="$CURRENT_DIR"/build/library/lib/cmake/Chplx -DCMAKE_BUILD_TYPE=Debug -G Ninja 2>/dev/null 1>/dev/null #TODO: generalize + ninja >/dev/null + + chpl "$CURRENT_DIR"/publications/fall2023/heat.chpl --baseline -o "$CURRENT_DIR"/build/heat_chapel >/dev/null + + echo "HPX Debug and chapel with --baseline" +} + +calc() { awk "BEGIN{print $*}"; } + +doit_hpx() { + echo "Benchmarking ChplX" + local _dir_meta_name_="$1" + local _prefix_="" + if [ $APEX_INCL -eq 1 ]; then + _prefix_="${APEX_CMD}" + fi + for i in {4..64}; do + sum=0.000000 + mkdir -p "$APEX_STORE_DIR/$_dir_meta_name_/proc$i" + for j in {1..5}; do + local opt=$($_prefix_ $CURRENT_DIR/build/stream_chplx/build/stream_cpp --hpx:threads=$i) + #echo $opt + #sum=$(calc "$sum + $(echo $opt | awk -F',' '{print $7}')") + #echo "$opt" >"$APEX_STORE_DIR/$_dir_meta_name_/proc$i"/runlog + done + #local _procs_=$(echo $opt | awk -F',' '{print $4}') + #echo "$_procs_,$(calc $sum / 5)" + echo $opt + if [ $APEX_INCL -eq 1 ]; then + cp -r $TEMP_HEAT_DIR/build/OTF2_archive "$APEX_STORE_DIR/$_dir_meta_name_/proc$i" + fi + done +} + +doit_chapel() { + echo "Benchmarking Chapel" + for i in {4..64}; do + sum=0.000000 + for j in {1..5}; do + local opt=$(CHPL_RT_NUM_THREADS_PER_LOCALE_QUIET=yes MAX_LOGICAL=$i MAX_PHYSICAL=$i CHPL_RT_NUM_THREADS_PER_LOCALE=$i "$CURRENT_DIR"/build/stream_chapel) + #echo $opt + #sum=$(calc "$sum + $(echo $opt | awk -F',' '{print $7}')") + done + #local _procs_=$(echo $opt | awk -F',' '{print $4}') + #echo "$_procs_,$(calc $sum / 5)" + echo $opt + done +} + +checks() { + if [ -d ./.git ]; then + echo "This $(pwd) is assumed to be the parent directory of the chplx project" + else + echo ".git not found at $(pwd)" + echo "exiting!" + exit 1 + fi + + if git status | grep -q "paper_patches"; then + echo "paper_patches is the correct branch" + else + echo "Checkout the paper_patches branch and try again!" + exit 1 + fi +} + +main() { + # make temporary directories + mkdir -p "$TEMP_HEAT_DIR" + + + cd "$CURRENT_DIR" + + build_chplx_rel_flag + doit_hpx "release" + doit_chapel + cd "$CURRENT_DIR" + + + echo "Benchmarking done" +} + +usage() { + echo "Usage: $0 -v (verbose)" 1>&2 + echo "Usage: $0 -v (verbose) -a (apex)" 1>&2 + echo "Usage: $0 -v (verbose) -A ()" 1>&2 + exit 1 +} + +if ! command -v chpl >/dev/null; then + echo "chpl needs to be installed or not in PATH" + exit 1 +fi + +while getopts "vaA:" o; do + case "${o}" in + v) + VERBOSE=1 + ;; + A) + APEX_INCL=1 + if [ "${OPTARG}" != "" ]; then + if [ -d "${OPTARG}" ]; then + APEX_STORE_DIR="${OPTARG}" + else + echo "${OPTARG} directory is not present" + exit 1 + fi + fi + export APEX_DISABLE=0 + echo "Apex data will be stored in $APEX_STORE_DIR" + ;; + a) + export APEX_DISABLE=0 + APEX_INCL=1 + echo "Apex data will be stored in $APEX_STORE_DIR" + ;; + *) + echo "Unknown argument: ${OPTARG}" + exit 1 + ;; + esac +done +shift $((OPTIND - 1)) + +if [ $VERBOSE == 1 ]; then + echo "Verbose mode" + set -x +fi + +main diff --git a/bench.bash b/bench.bash new file mode 100644 index 00000000..5011f814 --- /dev/null +++ b/bench.bash @@ -0,0 +1,254 @@ +#!/bin/bash + +set -e + +export PATH="/opt/chapel/bin/linux64-x86_64:$PATH" +TEMP_HEAT_DIR="/tmp/chplx-heat-$$" +CURRENT_DIR="$(pwd)" +export CC=clang-15 +export CXX=clang++-15 +export APEX_DISABLE=1 # disable apex by default +VERBOSE=0 +APEX_INCL=0 +APEX_CMD="/opt/bin/apex_exec --apex:otf2 --apex:csv --apex:gtrace --apex:taskgraph --apex:postprocess --apex:source --apex:cpuinfo --apex:meminfo --apex:status" +APEX_STORE_DIR="$HOME"/apex.$$ + +build_chplx_no_flags() { + mkdir -p build + cd build + rm -rf * + cmake .. -DCHPL_HOME="$CURRENT_DIR"/frontend -DCMAKE_CXX_FLAGS="-undefined dynamic_lookup" -G Ninja 2>/dev/null 1>/dev/null #TODO: Generalize + ninja >/dev/null + cd "$CURRENT_DIR" +} + +build_chplx_debug_flag() { + mkdir -p build + cd build + rm -rf * + cmake .. -DCHPL_HOME="$CURRENT_DIR"/frontend -DCMAKE_CXX_FLAGS="-undefined dynamic_lookup" -DCMAKE_BUILD_TYPE=Debug -G Ninja 2>/dev/null 1>/dev/null #TODO: Generalize + ninja >/dev/null + cd "$CURRENT_DIR" +} + +build_chplx_reldebug_flag() { + mkdir -p build + cd build + rm -rf * + cmake .. -DCHPL_HOME="$CURRENT_DIR"/frontend -DCMAKE_CXX_FLAGS="-undefined dynamic_lookup" -DCMAKE_BUILD_TYPE=RelWithDebInfo -G Ninja 2>/dev/null 1>/dev/null #TODO: Generalize + ninja >/dev/null + cd "$CURRENT_DIR" +} + +build_chplx_rel_flag() { + mkdir -p build + cd build + rm -rf * + cmake .. -DCHPL_HOME="$CURRENT_DIR"/frontend -DCMAKE_CXX_FLAGS="-undefined dynamic_lookup -DCHPLX_NO_SOURCE_LOCATION" -DCMAKE_BUILD_TYPE=Release -G Ninja 2>/dev/null 1>/dev/null #TODO: Generalize + ninja >/dev/null + cd "$CURRENT_DIR" +} + +build_heat_release() { + cd "$TEMP_HEAT_DIR" + mkdir -p build + cd build + rm -rf * + cmake .. -DChplx_DIR="$CURRENT_DIR"/build/library/lib/cmake/Chplx -DCMAKE_BUILD_TYPE=Release -DCMAKE_CXX_FLAGS="-DCHPLX_NO_SOURCE_LOCATION" -G Ninja 2>/dev/null 1>/dev/null #TODO: generalize + ninja >/dev/null + + chpl "$CURRENT_DIR"/publications/fall2023/heat.chpl --fast -o "$CURRENT_DIR"/build/heat_chapel >/dev/null + + echo "Release builds (Chplx with Release build type and heat with release build type) amd chpl with --fast" +} + +build_heat_relwithdebinfo() { + cd "$TEMP_HEAT_DIR" + mkdir -p build + cd build + rm -rf * + cmake .. -DChplx_DIR="$CURRENT_DIR"/build/library/lib/cmake/Chplx -DCMAKE_BUILD_TYPE=RelWithDebInfo -G Ninja 2>/dev/null 1>/dev/null #TODO: generalize + ninja >/dev/null + + chpl "$CURRENT_DIR"/publications/fall2023/heat.chpl --fast -o "$CURRENT_DIR"/build/heat_chapel >/dev/null + + echo "Chplx Release with Debug info and chpl with --fast" +} + +build_heat_no_flags() { + cd "$TEMP_HEAT_DIR" + mkdir -p build + cd build + rm -rf * + cmake .. -DChplx_DIR="$CURRENT_DIR"/build/library/lib/cmake/Chplx -G Ninja 2>/dev/null 1>/dev/null #TODO: generalize + ninja >/dev/null + + chpl "$CURRENT_DIR"/publications/fall2023/heat.chpl -o "$CURRENT_DIR"/build/heat_chapel >/dev/null + + echo "No flags to both" +} + +build_heat_no_flags_chpl_baseline() { + cd "$TEMP_HEAT_DIR" + mkdir -p build + cd build + rm -rf * + cmake .. -DChplx_DIR="$CURRENT_DIR"/build/library/lib/cmake/Chplx -DCMAKE_BUILD_TYPE=Debug -G Ninja 2>/dev/null 1>/dev/null #TODO: generalize + ninja >/dev/null + + chpl "$CURRENT_DIR"/publications/fall2023/heat.chpl --baseline -o "$CURRENT_DIR"/build/heat_chapel >/dev/null + + echo "HPX Debug and chapel with --baseline" +} + +calc() { awk "BEGIN{print $*}"; } + +doit_hpx() { + echo "Benchmarking HPX" + local _dir_meta_name_="$1" + local _prefix_="" + if [ $APEX_INCL -eq 1 ]; then + _prefix_="${APEX_CMD}" + fi + for i in {4..64}; do + sum=0.000000 + mkdir -p "$APEX_STORE_DIR/$_dir_meta_name_/proc$i" + for j in {1..5}; do + local opt=$($_prefix_ $TEMP_HEAT_DIR/build/heat --hpx:threads=$i) + sum=$(calc "$sum + $(echo $opt | awk -F',' '{print $7}')") + echo "$opt" >"$APEX_STORE_DIR/$_dir_meta_name_/proc$i"/runlog + done + local _procs_=$(echo $opt | awk -F',' '{print $4}') + echo "$_procs_,$(calc $sum / 5)" + if [ $APEX_INCL -eq 1 ]; then + cp -r $TEMP_HEAT_DIR/build/OTF2_archive "$APEX_STORE_DIR/$_dir_meta_name_/proc$i" + fi + done +} + +doit_chapel() { + echo "Benchmarking Chapel" + for i in {4..64}; do + sum=0.000000 + for j in {1..5}; do + local opt=$(CHPL_RT_NUM_THREADS_PER_LOCALE_QUIET=yes MAX_LOGICAL=$i MAX_PHYSICAL=$i CHPL_RT_NUM_THREADS_PER_LOCALE=$i "$CURRENT_DIR"/build/heat_chapel) + sum=$(calc "$sum + $(echo $opt | awk -F',' '{print $7}')") + done + local _procs_=$(echo $opt | awk -F',' '{print $4}') + echo "$_procs_,$(calc $sum / 5)" + done +} + +checks() { + if [ -d ./.git ]; then + echo "This $(pwd) is assumed to be the parent directory of the chplx project" + else + echo ".git not found at $(pwd)" + echo "exiting!" + exit 1 + fi + + if git status | grep -q "paper_patches"; then + echo "paper_patches is the correct branch" + else + echo "Checkout the paper_patches branch and try again!" + exit 1 + fi +} + +main() { + # make temporary directories + mkdir -p "$TEMP_HEAT_DIR" + mkdir -p "$APEX_STORE_DIR" + + # copy files without the .good in filename + cd backend/test/heat + for _FILENAME_ in *; do + if [ -f "./$_FILENAME_" ]; then + cp "$_FILENAME_" "$TEMP_HEAT_DIR/${_FILENAME_/.good/}" + fi + done + cd "$CURRENT_DIR" + + build_chplx_rel_flag + build_heat_release + doit_hpx "release" + doit_chapel + cd "$CURRENT_DIR" + + echo + + build_chplx_no_flags + build_heat_no_flags + doit_hpx "no_flags" + doit_chapel + cd "$CURRENT_DIR" + + echo + + build_chplx_debug_flag + build_heat_no_flags_chpl_baseline + doit_hpx "debug" + doit_chapel + cd "$CURRENT_DIR" + + echo + + build_chplx_reldebug_flag + build_heat_relwithdebinfo + doit_hpx "reldebug" + doit_chapel + cd "$CURRENT_DIR" + + echo "Benchmarking done" +} + +usage() { + echo "Usage: $0 -v (verbose)" 1>&2 + echo "Usage: $0 -v (verbose) -a (apex)" 1>&2 + echo "Usage: $0 -v (verbose) -A ()" 1>&2 + exit 1 +} + +if ! command -v chpl >/dev/null; then + echo "chpl needs to be installed or not in PATH" + exit 1 +fi + +while getopts "vaA:" o; do + case "${o}" in + v) + VERBOSE=1 + ;; + A) + APEX_INCL=1 + if [ "${OPTARG}" != "" ]; then + if [ -d "${OPTARG}" ]; then + APEX_STORE_DIR="${OPTARG}" + else + echo "${OPTARG} directory is not present" + exit 1 + fi + fi + export APEX_DISABLE=0 + echo "Apex data will be stored in $APEX_STORE_DIR" + ;; + a) + export APEX_DISABLE=0 + APEX_INCL=1 + echo "Apex data will be stored in $APEX_STORE_DIR" + ;; + *) + echo "Unknown argument: ${OPTARG}" + exit 1 + ;; + esac +done +shift $((OPTIND - 1)) + +if [ $VERBOSE == 1 ]; then + echo "Verbose mode" + set -x +fi + +main diff --git a/frontend/BUILD_VERSION b/frontend/BUILD_VERSION index 8b137891..c3d47a0d 100644 --- a/frontend/BUILD_VERSION +++ b/frontend/BUILD_VERSION @@ -1 +1 @@ - +"70473224" diff --git a/frontend/lib/util/git-version.cpp b/frontend/lib/util/git-version.cpp index 59d231ac..93b045da 100644 --- a/frontend/lib/util/git-version.cpp +++ b/frontend/lib/util/git-version.cpp @@ -1,3 +1,3 @@ namespace chpl { - const char* GIT_SHA = "xxxxxxxxxx"; + const char* GIT_SHA = "70473224"; } diff --git a/publications/fall2023/fall2023paper.def b/publications/fall2023/fall2023paper.def index 8b186ad7..3fdd90b4 100644 --- a/publications/fall2023/fall2023paper.def +++ b/publications/fall2023/fall2023paper.def @@ -130,50 +130,59 @@ tar -jxf grcov.tar.bz2 && \ mv grcov /usr/bin && \ rm grcov.tar.bz2 -wget https://perftools.pages.jsc.fz-juelich.de/cicd/otf2/tags/otf2-3.0.3/otf2-3.0.3.tar.gz -tar xzf otf2-3.0.3.tar.gz -cd otf2-3.0.3/ -./configure --prefix=$PWD/../opt CC=clang-15 CXX=clang++-15 && make -j && make install -cd .. +#wget https://perftools.pages.jsc.fz-juelich.de/cicd/otf2/tags/otf2-3.0.3/otf2-3.0.3.tar.gz +#tar xzf otf2-3.0.3.tar.gz +#cd otf2-3.0.3/ +#./configure --prefix=$PWD/../opt CC=clang-15 CXX=clang++-15 && make -j && make install +#cd .. -wget https://github.com/UO-OACISS/apex/archive/refs/tags/v2.6.3.tar.gz -tar xzf v2.6.3.tar.gz -cd apex-2.6.3 -mkdir build -cd build -cmake -DCMAKE_BUILD_TYPE=Release -DCMAKE_INSTALL_PREFIX=$PWD/../../opt -DCMAKE_C_COMPILER=clang-15 -DCMAKE_CXX_COMPILER=clang++-15 -DOTF2_INCLUDE_DIR=$PWD/../../opt/include -DOTF2_LIBRARY=$PWD/../../opt/lib/libotf2.a .. && make -j && make install -cd ../.. +#wget https://github.com/UO-OACISS/apex/archive/refs/tags/v2.6.3.tar.gz +#tar xzf v2.6.3.tar.gz +#cd apex-2.6.3 +#mkdir build +#cd build +#cmake -DCMAKE_BUILD_TYPE=Release -DCMAKE_INSTALL_PREFIX=/opt -DCMAKE_C_COMPILER=clang-15 -DCMAKE_CXX_COMPILER=clang++-15 -DOTF2_INCLUDE_DIR=$PWD/../../opt/include -DOTF2_LIBRARY=$PWD/../../opt/lib/libotf2.a .. && make -j && make install +#cd ../.. wget https://github.com/STEllAR-GROUP/hpx/archive/refs/tags/v1.9.1.tar.gz tar xzf v1.9.1.tar.gz cd hpx-1.9.1 mkdir build cd build -cmake -DCMAKE_BUILD_TYPE=Release -DCMAKE_INSTALL_PREFIX=$PWD/../../opt -DCMAKE_CXX_COMPILER=clang++-15 -DHPX_WITH_MALLOC=tcmalloc -DHPX_WITH_APEX=ON -DAPEX_WITH_OTF2=ON -DAPEX_ROOT=$PWD/../../apex-2.6.3 -DHPX_WITH_CXX_STANDARD=20 -DHPX_WITH_FETCH_ASIO=ON -DHPX_WITH_TESTS=OFF -DHPX_WITH_EXAMPLES=ON -DCMAKE_CXX_FLAGS=-stdlib=libc++ .. && make -j && make install -cd ../../opt/bin +cmake -DCMAKE_BUILD_TYPE=Release -DCMAKE_INSTALL_PREFIX=/opt -DCMAKE_CXX_COMPILER=clang++-15 -DHPX_WITH_MALLOC=tcmalloc -DHPX_WITH_APEX=OFF -DAPEX_WITH_OTF2=OFF -DHPX_WITH_CXX_STANDARD=20 -DHPX_WITH_FETCH_ASIO=ON -DHPX_WITH_TESTS=OFF -DHPX_WITH_EXAMPLES=OFF -DCMAKE_CXX_FLAGS=-stdlib=libc++ .. && make -j && make install +cd /opt/bin export LD_LIBRARY_PATH=$PWD/../lib:$LD_LIBRARY_PATH export PATH=$PWD:$PATH export PKG_CONFIG_PATH=$PWD/../lib/pkgconfig:$PKG_CONFIG_PATH -./apex_exec --apex:otf2 --apex:csv --apex:taskgraph --apex:scatter --apex:postprocess --apex:source --apex:cpuinfo --apex:meminfo --apex:status fibonacci -cd ../.. +# ./apex_exec --apex:otf2 --apex:csv --apex:taskgraph --apex:scatter --apex:postprocess --apex:source --apex:cpuinfo --apex:meminfo --apex:status fibonacci +cd /opt git clone https://github.com/fmtlib/fmt.git -mkdir fmt/build -cd fmt/build -cmake -DCMAKE_CXX_COMPILER=clang++-15 -DCMAKE_BUILD_TYPE=Release -DCMAKE_PREFIX_PATH=$PWD/../../opt .. && make -j && make install -cd ../.. +mkdir -p fmt/build +cd fmt +git checkout 10.0.0 +cd build +cmake -DCMAKE_CXX_COMPILER=clang++-15 -DCMAKE_BUILD_TYPE=Release -DBUILD_SHARED_LIBS=TRUE .. && make -j && make install +cd /opt + +git clone --branch release/1.33 https://github.com/chapel-lang/chapel.git +cd chapel +export CHPL_HOME=$(realpath .) +./configure --prefix=/opt +make -j +make install + -export PATH=$PWD/opt/bin:$PWD/opt/chapel/bin:$PATH git clone --branch paper_patches https://github.com/ct-clmsn/chplx -mkdir chplx/build +mkdir -p chplx/build cd chplx/build -cmake -DCMAKE_CXX_COMPILER=clang++-15 -DHPX_DIR=$PWD/../../opt/lib/cmake/HPX -DLLVM_ROOT=/usr/lib/llvm-15/lib/cmake/llvm -DCLANG_ROOT=/usr/lib/llvm-15/lib/cmake/clang -DCMAKE_BUILD_TYPE=Release -DCMAKE_PREFIX_PATH=$PWD/../../opt -DCMAKE_CXX_EXTENSIONS=OFF -DCMAKE_C_COMPILER=clang-15 .. -make -j4 +cmake -DCMAKE_CXX_COMPILER=clang++-15 -DHPX_DIR=/opt/lib/cmake/HPX -DLLVM_ROOT=/usr/lib/llvm-15/lib/cmake/llvm -DCLANG_ROOT=/usr/lib/llvm-15/lib/cmake/clang -DCMAKE_BUILD_TYPE=Release -DCMAKE_PREFIX_PATH=/opt -DCMAKE_CXX_EXTENSIONS=OFF -DCMAKE_C_COMPILER=clang-15 .. +make -j cd backend ./chplx -o heat -d -E -f ../../publications/fall2023/heateqn.chpl mkdir heat/build cd heat/build cmake -DChplx_DIR=$PWD/../../../library/lib/cmake/Chplx -DCMAKE_CXX_COMPILER=clang++-15 -DCMAKE_C_COMPILER=clang-15 -DHPX_DIR=$PWD/../../../../../opt/lib/cmake/HPX -DCMAKE_BUILD_TYPE=Release -DCMAKE_CXX_EXTENSIONS=OFF .. make -$PWD/../../../../../opt/bin/apex_exec --apex:otf2 --apex:csv --apex:taskgraph --apex:scatter --apex:postprocess --apex:source --apex:cpuinfo --apex:meminfo --apex:status ./heateqn +# $PWD/../../../../../opt/bin/apex_exec --apex:otf2 --apex:csv --apex:taskgraph --apex:scatter --apex:postprocess --apex:source --apex:cpuinfo --apex:meminfo --apex:status ./heateqn diff --git a/publications/fall2023/gups.chpl b/publications/fall2023/gups.chpl new file mode 100644 index 00000000..e4659e81 --- /dev/null +++ b/publications/fall2023/gups.chpl @@ -0,0 +1,119 @@ +// test/studies/hpcc/RA/ra-randstream-hpcc06.chpl +// test/studies/hpcc/common/probSize-hpcc06.chpl + +use CTypes; +use Time; + +extern proc getenv(name : c_string) : c_string; + +proc computeProblemSize(numArrays : int, physicalMemoryBytes : int, memRatio : int, returnLog2 : bool) { + var totalMem : int = physicalMemoryBytes; + var memoryTarget : int = totalMem / memRatio; + var numBytesPerType : int = c_sizeof(int) : int; + var bytesPerIndex : int = numArrays * numBytesPerType; + var numIndices : int = memoryTarget / bytesPerIndex; + + var lgProblemSize : int = log2(numIndices); + + if (returnLog2) { + numIndices = 2**lgProblemSize; + if (numIndices * bytesPerIndex <= memoryTarget) { + numIndices *= 2; + lgProblemSize += 1; + } + } + + return if returnLog2 then lgProblemSize else numIndices; +} + +proc getNextRandom(x : int) : int { + var poly = 0x7; + var hirandbit = 0x1 << (64-1); + return (x << 1) ^ (if (x & hirandbit) then poly else 0); +} + +proc computeM2Values(m2 : [] int, count : int) { + var nextval = 0x1; + for i in 0..count { + m2[i] = nextval; + nextval = getNextRandom(nextval); + nextval = getNextRandom(nextval); + } +} + +proc getNthRandom(N : int, m2 : [] int, m2count :int) { + var period = 0x7fffffff/7 : int; + + var n = N % period : int; + + var ran = 0x2; + if(n <= 0){ + n = 1; + } + var i = log2(n); + var val = 0; + var J = 0; + for j in 0..i { + J = i-j; + for k in 0..m2count { + if ((ran >> j) & 1) then val ^= m2(j); + } + ran = val; + if ((n >> i) & 1) then getNextRandom(ran); + } + + return ran; +} + +proc RAStream(vals : [] int, vals_idx : int, numvals : int, start : int, m2 : [] int, m2count : int) { + var val = getNthRandom(start, m2, m2count); + var base = vals_idx * numvals; + var idx = 0; + for i in 0..numvals { + idx = base + i; + idx = idx % numvals; + val = getNextRandom(val); + vals[idx] = val; + } +} + +param randWidth = 64; +param physicalMemory = 16437; +var memRatio = 4; +var numTables = 1; + +var m2 : [0..randWidth] int; +computeM2Values(m2, randWidth); + +var N_U = 0; +var n = 0; +n = computeProblemSize(numTables, physicalMemory, memRatio, true); +N_U = 2**(n+2); + +var z = 0; +z = N_U * N_U; +var randval: [0..z] int; + +var m = 0; +m = 2**(n); +var indexMask = m - 1; + +var T : [0..m] int; + +for i in 0..m { + T[i] = i; +} + +var timer : stopwatch; + +timer.start(); +forall block in 0..N_U { + RAStream(randval, block, N_U, 0, m2, randWidth); + for r in 0..N_U { + T ( (randval ( (block * N_U + r) % z ) & indexMask) % m ) ^= r; + } +} +timer.stop(); + +writeln(getenv('CHPL_RT_NUM_THREADS_PER_LOCALE'.c_str()):string,",", timer.elapsed()); + diff --git a/publications/fall2023/heat.chpl.bkp b/publications/fall2023/heat.chpl.bkp new file mode 100644 index 00000000..041a84e6 --- /dev/null +++ b/publications/fall2023/heat.chpl.bkp @@ -0,0 +1,70 @@ +// Copyright (c) 2023 AUTHORS +// +// SPDX-License-Identifier: BSL-1.0 +// Distributed under the Boost Software License, Version 1.0. (See accompanying +// file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) +// +// Notes on running: +// (1) Use CHPL_RT_NUM_THREADS_PER_LOCALE to set the desired parallelism. +// (2) This program does not support positional args. +// (3) Use --nx=YYY to set the number of cells to YYY. +// (4) use --nt=YYY to set the number of time steps to YYY. +// CHPL_RT_NUM_THREADS_PER_LOCALE=6 ./heat --nx=10_000_000 + +use Time; + +extern proc getenv(name : c_string) : c_string; +config const ghosts: int = 1; +config const k: real = 0.4; +config const dt: real = 1.0; +config const dx: real = 1.0; + +config const nx: int = 1000000; +config const nt: int = 100; +config const threads: int = 1; + +proc update(d : []real, d2 : []real) { + const NX : int = nx + 1; + forall i in 1..NX-1 do { + //for i in 1..NX-1 do { + d2[i] = d[i] + dt*k/(dx*dx)*(d[i+1] + d[i-1] - 2*d[i]); + } + d2[0] = d2[NX-1]; + d2[NX] = d2[1]; +} + +//proc main() { + + const NX : int = nx + 1; + + var data: [0..NX] real; + var data2: [0..NX] real; + + forall i in 0..NX do { + data[i] = 1 + (i-1 + nx) % nx; + data2[i] = 0; + } + + + var t: stopwatch; + t.start(); + //inlinecxx("hpx::chrono::high_resolution_timer t;"); + + for t in 1..nt do { + update(data, data2); +// data <=> data2; + } + + + t.stop(); + /* if ( data.size < 20 ) { + writeln(data); + } + */ + writeln("chapelng,",nx,",",nt,",",getenv('CHPL_RT_NUM_THREADS_PER_LOCALE'.c_str()):string,",",dt,",",dx,",",t.elapsed(),",0"); + + + //inlinecxx("const auto elapsed = t.elapsed();"); + //inlinecxx("std::cout << \"chapelng,\" << {} << \",\" << {} << \",\" << hpx::resource::get_num_threads() << \",\" << {} << \",\" << {} << \",\" << elapsed << \",0\";", nx, nt, dt, dx); + +//} diff --git a/publications/fall2023/stream.chpl b/publications/fall2023/stream.chpl new file mode 100644 index 00000000..a3c7a39e --- /dev/null +++ b/publications/fall2023/stream.chpl @@ -0,0 +1,76 @@ +use CTypes; + +use Time; + +extern proc getenv(name : c_string) : c_string; +extern proc sizeof(type T): uint(32); +extern const RAND_MAX: c_int; +extern proc rand(): c_int; + +extern proc srand(seed: c_uint); + +srand(0); + +proc randindex() { + return rand()%5; +} + +proc streamBenchmark(n: int) { + const iterations = 10; + + writeln("STREAM Benchmark (Chapel - Embarrassingly Parallel)"); + writeln("==================================================="); + + var A:[1..n] real; + var B:[1..n] real; + var C:[1..n] real; + var D:[1..n] real; + + // Initialize arrays + forall i in 1..n { + A[i] = randindex(); + B[i] = randindex(); + C[i] = randindex(); + D[i] = 0.0; + } + + var copy_t: stopwatch; + copy_t.start(); + + // Benchmark Copy + forall it in 1..iterations do { + C[it] = A[it]; + } + copy_t.stop(); + writeln("Copy: ",getenv('CHPL_RT_NUM_THREADS_PER_LOCALE'.c_str()):string, ",",copy_t.elapsed()); + + var scale_t: stopwatch; + scale_t.start(); + // Benchmark Scale + forall it in 1..iterations do{ + C[it] = 3.0 * A[it]; + } + writeln("Scale: ",getenv('CHPL_RT_NUM_THREADS_PER_LOCALE'.c_str()):string, ",", scale_t.elapsed()); + + var add_t: stopwatch; + add_t.start(); + + // Benchmark Add + forall it in 1..iterations do{ + C[it] = A[it] + B[it]; + } + writeln("Add: ",getenv('CHPL_RT_NUM_THREADS_PER_LOCALE'.c_str()):string, ",", add_t.elapsed()); + + var triad_t:stopwatch(); + triad_t.start(); + + // Benchmark Triad + forall it in 1..iterations do{ + D[it] = A[it] + 3.0 * B[it]; + } + writeln("Triad: ",getenv('CHPL_RT_NUM_THREADS_PER_LOCALE'.c_str()):string, ",", triad_t.elapsed()); +} + +// Example usage with an array size of 1000000 +const arraySize = 1000000; +streamBenchmark(arraySize); From c7d7fec5c195b7c4c715b1df80f8e7fea9f6c857 Mon Sep 17 00:00:00 2001 From: Shreyas Atre Date: Thu, 18 Jan 2024 22:07:15 +0530 Subject: [PATCH 10/10] Update benchmarking programmes Signed-off-by: Shreyas Atre --- backend/test/gups.chpl | 68 ++++++++++------------------ backend/test/gups_cpp.chpl | 80 +++++++++++++++------------------ publications/fall2023/gups.chpl | 68 ++++++++++------------------ 3 files changed, 81 insertions(+), 135 deletions(-) diff --git a/backend/test/gups.chpl b/backend/test/gups.chpl index e4659e81..faca086e 100644 --- a/backend/test/gups.chpl +++ b/backend/test/gups.chpl @@ -1,19 +1,17 @@ -// test/studies/hpcc/RA/ra-randstream-hpcc06.chpl -// test/studies/hpcc/common/probSize-hpcc06.chpl - -use CTypes; use Time; +use Math; +use CTypes; extern proc getenv(name : c_string) : c_string; proc computeProblemSize(numArrays : int, physicalMemoryBytes : int, memRatio : int, returnLog2 : bool) { - var totalMem : int = physicalMemoryBytes; - var memoryTarget : int = totalMem / memRatio; + var totalMem = physicalMemoryBytes; + var memoryTarget = totalMem / memRatio; var numBytesPerType : int = c_sizeof(int) : int; - var bytesPerIndex : int = numArrays * numBytesPerType; + var bytesPerIndex :int = numArrays * numBytesPerType; var numIndices : int = memoryTarget / bytesPerIndex; - var lgProblemSize : int = log2(numIndices); + var lgProblemSize : int = ceil(log2(numIndices:real)) : int; if (returnLog2) { numIndices = 2**lgProblemSize; @@ -42,43 +40,31 @@ proc computeM2Values(m2 : [] int, count : int) { } proc getNthRandom(N : int, m2 : [] int, m2count :int) { - var period = 0x7fffffff/7 : int; - - var n = N % period : int; - var ran = 0x2; - if(n <= 0){ - n = 1; - } - var i = log2(n); + var i :int = ceil(log2(N)) : int; var val = 0; var J = 0; for j in 0..i { J = i-j; - for k in 0..m2count { - if ((ran >> j) & 1) then val ^= m2(j); + for k in 1..m2count { + if ((ran >> (k-1)) & 1) then val ^= m2(k-1); } ran = val; - if ((n >> i) & 1) then getNextRandom(ran); + if ((N >> J) & 1) then getNextRandom(ran); } - return ran; } -proc RAStream(vals : [] int, vals_idx : int, numvals : int, start : int, m2 : [] int, m2count : int) { - var val = getNthRandom(start, m2, m2count); - var base = vals_idx * numvals; - var idx = 0; +proc RAStream(vals : [] int, numvals : int, m2 : [] int, m2count : int) { + var val = getNthRandom(2, m2, m2count); for i in 0..numvals { - idx = base + i; - idx = idx % numvals; val = getNextRandom(val); - vals[idx] = val; + vals[i] = val; } } param randWidth = 64; -param physicalMemory = 16437; +param physicalMemory = 17179869184; //1024; var memRatio = 4; var numTables = 1; @@ -87,33 +73,25 @@ computeM2Values(m2, randWidth); var N_U = 0; var n = 0; -n = computeProblemSize(numTables, physicalMemory, memRatio, true); -N_U = 2**(n+2); +n = computeProblemSize(numTables, physicalMemory, memRatio, false); +N_U = n+2; -var z = 0; -z = N_U * N_U; +var z = N_U; var randval: [0..z] int; -var m = 0; -m = 2**(n); -var indexMask = m - 1; - -var T : [0..m] int; +var indexMask = z - 1; +var T : [0..z] int; -for i in 0..m { +for i in 0..z { T[i] = i; } var timer : stopwatch; timer.start(); -forall block in 0..N_U { - RAStream(randval, block, N_U, 0, m2, randWidth); - for r in 0..N_U { - T ( (randval ( (block * N_U + r) % z ) & indexMask) % m ) ^= r; - } +RAStream(randval, z, m2, randWidth); +forall r in 0..z { + T ( randval ( r ) & indexMask ) ^= randval(r); } timer.stop(); - writeln(getenv('CHPL_RT_NUM_THREADS_PER_LOCALE'.c_str()):string,",", timer.elapsed()); - diff --git a/backend/test/gups_cpp.chpl b/backend/test/gups_cpp.chpl index 9c703a0c..4ae59119 100644 --- a/backend/test/gups_cpp.chpl +++ b/backend/test/gups_cpp.chpl @@ -1,4 +1,11 @@ -// test/studies/hpcc/RA/ra-randstream-hpcc06.chpl +proc getNextRandom(x : int) : int { + var poly = 0; + inlinecxx("{} = 0x7;", poly); + var hirandbit = 0; + inlinecxx("{} = 0x1 << (64-1);", hirandbit); + inlinecxx("{} = ({} << 1) ^ ( ({} & {}) ? {} : 0);", x, x, x, hirandbit, poly); + return x; +} proc computeProblemSize(numArrays : int, physicalMemoryBytes : int, memRatio : int, returnLog2 : bool) : int { var totalMem = physicalMemoryBytes; @@ -19,19 +26,12 @@ proc computeProblemSize(numArrays : int, physicalMemoryBytes : int, memRatio : i inlinecxx("if({} * {} <= {})", numIndices, bytesPerIndex, memoryTarget); inlinecxx("{} += 1;", lgProblemSize); } - var retval : int = 0; + + var retval : int = 0; inlinecxx("{} = {} ? {} : {};", retval, returnLog2, lgProblemSize, numIndices); return retval; } -proc getNextRandom(x : int) : int { - var poly = 0; - inlinecxx("{} = 0x7;", poly); - var hirandbit = 0; - inlinecxx("{} = 0x1 << (64-1);", hirandbit); - inlinecxx("{} = ({} << 1) ^ ( ({} & {}) ? {} : 0);", x, x, x, hirandbit, poly); - return x; -} proc computeM2Values(m2 : [] int, count : int) :bool { var nextval = 0; @@ -46,76 +46,66 @@ proc computeM2Values(m2 : [] int, count : int) :bool { } proc getNthRandom(N : int, m2 : [] int, m2count : int) { - var period = 0x7fffffff/7 ; - - var n = N % period ; - - inlinecxx("if ({} <= {} )",n,0); - inlinecxx("{} = 1;", n); - var ran = 0x2; + var ran = 0; + inlinecxx("{} = 0x2;", ran); var i = 0; - inlinecxx("{} = std::log2(n);", i); + inlinecxx("{} = std::ceil(std::log2(static_cast({})));", i, N); var val = 0; var J = 0; for j in 0..i { J = i-j; - for k in 0..m2count { - inlinecxx("if (({} >> {}) & 1) {} ^= {} [ {} ];", ran, k, val, m2, k); + for k in 1..m2count { + inlinecxx("if (({} >> ({}-1)) & 1) {} ^= {} [ ({}-1) ];", ran, k, val, m2, k); } ran = val; - inlinecxx("if(({} >> {}) & 1) {} = getNextRandom({});", n, J, ran, ran); + inlinecxx("if(({} >> {}) & 1) {} = getNextRandom({});", N, J, ran, ran); } return ran; } -proc RAStream(vals : [] int, vals_idx : int, numvals : int, start : int, m2 : [] int, m2count : int) { - var val = getNthRandom(start, m2, m2count); - var base = vals_idx * numvals; - var idx = 0; +proc RAStream(vals : [] int, numvals : int, m2 : [] int, m2count : int) { + + var val = getNthRandom(2, m2, m2count); for i in 0..numvals { - idx = base + i; - idx = idx % numvals; val = getNextRandom(val); - vals[idx] = val; + vals[i] = val; } } param randWidth = 64; -param physicalMemory = 16437; -var memRatio = 4; -var numTables = 1; +var physicalMemory = 0; +inlinecxx("{} = 17179869184l;", physicalMemory); +param memRatio = 4; +param numTables = 1; var m2 : [0..randWidth] int; var val = computeM2Values(m2, randWidth); var N_U = 0; var n = 1; -n = computeProblemSize(numTables, physicalMemory, memRatio, true); -inlinecxx("{} = std::pow(2, {}+2);", N_U, n); +n = computeProblemSize(numTables, physicalMemory, memRatio, false); +inlinecxx("{} = {}+2;", N_U, n); -var z = 0; -inlinecxx("{} = {} * {};", z, N_U, N_U); +var z : int; +z = N_U; var randval : [0..z] int; -var m = 0; -inlinecxx("{} = std::pow(2, {});", m, n); -var indexMask = m - 1; +var indexMask = z - 1; -var T : [0..m] int; +var T : [0..z] int; -for i in 0..m { +for i in 0..z { T[i] = i; } inlinecxx("hpx::chrono::high_resolution_timer gups;"); -forall block in 0..N_U { - RAStream(randval, block, N_U, 0, m2, randWidth); - for r in 0..N_U { - inlinecxx("{} [ ({} [ ({} * {} + {}) % {} ] & {}) % {} ] ^= {};", T, randval, block, N_U, r, z, indexMask, m, r); - } +RAStream(randval, z, m2, randWidth); +forall r in 0..z { + inlinecxx("{} [ {} [ {} ] & {} ] ^= {} [ {} ];", T, randval, r, indexMask, randval, r); } inlinecxx("auto elapsed = gups.elapsed();"); + inlinecxx("std::cout << hpx::resource::get_num_threads() << \",\" << elapsed << std::endl;"); diff --git a/publications/fall2023/gups.chpl b/publications/fall2023/gups.chpl index e4659e81..faca086e 100644 --- a/publications/fall2023/gups.chpl +++ b/publications/fall2023/gups.chpl @@ -1,19 +1,17 @@ -// test/studies/hpcc/RA/ra-randstream-hpcc06.chpl -// test/studies/hpcc/common/probSize-hpcc06.chpl - -use CTypes; use Time; +use Math; +use CTypes; extern proc getenv(name : c_string) : c_string; proc computeProblemSize(numArrays : int, physicalMemoryBytes : int, memRatio : int, returnLog2 : bool) { - var totalMem : int = physicalMemoryBytes; - var memoryTarget : int = totalMem / memRatio; + var totalMem = physicalMemoryBytes; + var memoryTarget = totalMem / memRatio; var numBytesPerType : int = c_sizeof(int) : int; - var bytesPerIndex : int = numArrays * numBytesPerType; + var bytesPerIndex :int = numArrays * numBytesPerType; var numIndices : int = memoryTarget / bytesPerIndex; - var lgProblemSize : int = log2(numIndices); + var lgProblemSize : int = ceil(log2(numIndices:real)) : int; if (returnLog2) { numIndices = 2**lgProblemSize; @@ -42,43 +40,31 @@ proc computeM2Values(m2 : [] int, count : int) { } proc getNthRandom(N : int, m2 : [] int, m2count :int) { - var period = 0x7fffffff/7 : int; - - var n = N % period : int; - var ran = 0x2; - if(n <= 0){ - n = 1; - } - var i = log2(n); + var i :int = ceil(log2(N)) : int; var val = 0; var J = 0; for j in 0..i { J = i-j; - for k in 0..m2count { - if ((ran >> j) & 1) then val ^= m2(j); + for k in 1..m2count { + if ((ran >> (k-1)) & 1) then val ^= m2(k-1); } ran = val; - if ((n >> i) & 1) then getNextRandom(ran); + if ((N >> J) & 1) then getNextRandom(ran); } - return ran; } -proc RAStream(vals : [] int, vals_idx : int, numvals : int, start : int, m2 : [] int, m2count : int) { - var val = getNthRandom(start, m2, m2count); - var base = vals_idx * numvals; - var idx = 0; +proc RAStream(vals : [] int, numvals : int, m2 : [] int, m2count : int) { + var val = getNthRandom(2, m2, m2count); for i in 0..numvals { - idx = base + i; - idx = idx % numvals; val = getNextRandom(val); - vals[idx] = val; + vals[i] = val; } } param randWidth = 64; -param physicalMemory = 16437; +param physicalMemory = 17179869184; //1024; var memRatio = 4; var numTables = 1; @@ -87,33 +73,25 @@ computeM2Values(m2, randWidth); var N_U = 0; var n = 0; -n = computeProblemSize(numTables, physicalMemory, memRatio, true); -N_U = 2**(n+2); +n = computeProblemSize(numTables, physicalMemory, memRatio, false); +N_U = n+2; -var z = 0; -z = N_U * N_U; +var z = N_U; var randval: [0..z] int; -var m = 0; -m = 2**(n); -var indexMask = m - 1; - -var T : [0..m] int; +var indexMask = z - 1; +var T : [0..z] int; -for i in 0..m { +for i in 0..z { T[i] = i; } var timer : stopwatch; timer.start(); -forall block in 0..N_U { - RAStream(randval, block, N_U, 0, m2, randWidth); - for r in 0..N_U { - T ( (randval ( (block * N_U + r) % z ) & indexMask) % m ) ^= r; - } +RAStream(randval, z, m2, randWidth); +forall r in 0..z { + T ( randval ( r ) & indexMask ) ^= randval(r); } timer.stop(); - writeln(getenv('CHPL_RT_NUM_THREADS_PER_LOCALE'.c_str()):string,",", timer.elapsed()); -