From 3a9df7a2041c00aaeb0ece97141eb1861b8f1c03 Mon Sep 17 00:00:00 2001 From: Nick Sarkauskas Date: Tue, 8 Oct 2024 13:46:06 -0700 Subject: [PATCH] TL/UCP: Check for ucp_memh_pack, disable sliding win --- config/m4/ucx.m4 | 9 +++++++++ contrib/doca_urom_ucc_plugin/Makefile.am | 2 ++ src/components/cl/doca_urom/Makefile.am | 4 ++++ src/components/tl/ucp/Makefile.am | 4 ++++ .../tl/ucp/allreduce/allreduce_sliding_window.c | 4 ++++ src/components/tl/ucp/tl_ucp_coll.c | 4 ++++ src/components/tl/ucp/tl_ucp_coll.h | 2 ++ 7 files changed, 29 insertions(+) diff --git a/config/m4/ucx.m4 b/config/m4/ucx.m4 index 97fd547881..40b12b7fd4 100644 --- a/config/m4/ucx.m4 +++ b/config/m4/ucx.m4 @@ -67,6 +67,14 @@ AS_IF([test "x$ucx_checked" != "xyes"],[ ], []) + AC_CHECK_LIB([ucp], [ucp_memh_pack], + [ + ucp_memh_happy="yes" + ], + [ + ucp_memh_happy="no" + ],[-luct -lucm -lucp]) + AS_IF([test "x$ucx_happy" = "xyes"], [ AC_COMPUTE_INT(ucx_major, [UCP_API_MAJOR], [#include ], @@ -160,5 +168,6 @@ AS_IF([test "x$ucx_checked" != "xyes"],[ ucx_checked=yes AM_CONDITIONAL([HAVE_UCX], [test "x$ucx_happy" != xno]) + AM_CONDITIONAL([HAVE_UCP_MEMH_PACK], [test "x$ucp_memh_happy" != xno]) ]) ]) diff --git a/contrib/doca_urom_ucc_plugin/Makefile.am b/contrib/doca_urom_ucc_plugin/Makefile.am index c3976f488c..399efe9739 100644 --- a/contrib/doca_urom_ucc_plugin/Makefile.am +++ b/contrib/doca_urom_ucc_plugin/Makefile.am @@ -2,6 +2,7 @@ # Copyright (c) 2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved. # +if HAVE_UCP_MEMH_PACK if HAVE_DOCA_UROM sources = \ @@ -20,3 +21,4 @@ libucc_doca_urom_plugin_la_LDFLAGS = -version-info $(SOVERSION) --as-needed $(U libucc_doca_urom_plugin_la_LIBADD = $(UCX_LIBADD) $(DOCA_UROM_LIBADD) $(UCC_TOP_BUILDDIR)/src/libucc.la endif +endif diff --git a/src/components/cl/doca_urom/Makefile.am b/src/components/cl/doca_urom/Makefile.am index 5422019384..7ebe48f341 100644 --- a/src/components/cl/doca_urom/Makefile.am +++ b/src/components/cl/doca_urom/Makefile.am @@ -2,6 +2,8 @@ # Copyright (c) 2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved. # +if HAVE_UCP_MEMH_PACK + sources = \ cl_doca_urom.h \ cl_doca_urom.c \ @@ -22,3 +24,5 @@ libucc_cl_doca_urom_la_LDFLAGS = -version-info $(SOVERSION) --as-needed $(DOCA_ libucc_cl_doca_urom_la_LIBADD = $(DOCA_UROM_LIBADD) $(UCC_TOP_BUILDDIR)/src/libucc.la include $(top_srcdir)/config/module.am + +endif \ No newline at end of file diff --git a/src/components/tl/ucp/Makefile.am b/src/components/tl/ucp/Makefile.am index b196479893..5cbd8abf36 100644 --- a/src/components/tl/ucp/Makefile.am +++ b/src/components/tl/ucp/Makefile.am @@ -42,9 +42,11 @@ allreduce = \ allreduce/allreduce.c \ allreduce/allreduce_knomial.c \ allreduce/allreduce_sra_knomial.c \ +if HAVE_UCP_MEMH_PACK allreduce/allreduce_sliding_window.h \ allreduce/allreduce_sliding_window.c \ allreduce/allreduce_sliding_window_setup.c \ +endif allreduce/allreduce_dbt.c barrier = \ @@ -113,8 +115,10 @@ sources = \ tl_ucp_ep.c \ tl_ucp_coll.c \ tl_ucp_service_coll.c \ +if HAVE_UCP_MEMH_PACK tl_ucp_dpu_offload.h \ tl_ucp_dpu_offload.c \ +endif $(allgather) \ $(allgatherv) \ $(alltoall) \ diff --git a/src/components/tl/ucp/allreduce/allreduce_sliding_window.c b/src/components/tl/ucp/allreduce/allreduce_sliding_window.c index a1ba003bd3..2f767acda5 100644 --- a/src/components/tl/ucp/allreduce/allreduce_sliding_window.c +++ b/src/components/tl/ucp/allreduce/allreduce_sliding_window.c @@ -4,6 +4,8 @@ * See file LICENSE for terms. */ +#ifdef HAVE_UCP_MEMH_PACK + #include "allreduce.h" #include "allreduce_sliding_window.h" #include "../allgather/allgather.h" @@ -655,3 +657,5 @@ ucc_tl_ucp_allreduce_sliding_window_init(ucc_base_coll_args_t *coll_args, ucc_tl_ucp_put_schedule(schedule); return status; } + +#endif diff --git a/src/components/tl/ucp/tl_ucp_coll.c b/src/components/tl/ucp/tl_ucp_coll.c index 88e22e6957..e3c379c00c 100644 --- a/src/components/tl/ucp/tl_ucp_coll.c +++ b/src/components/tl/ucp/tl_ucp_coll.c @@ -285,7 +285,11 @@ ucc_status_t ucc_tl_ucp_alg_id_to_init(int alg_id, const char *alg_id_str, *init = ucc_tl_ucp_allreduce_dbt_init; break; case UCC_TL_UCP_ALLREDUCE_ALG_SLIDING_WINDOW: +#ifdef HAVE_UCP_MEMH_PACK *init = ucc_tl_ucp_allreduce_sliding_window_init; +#else + status = UCC_ERR_NOT_SUPPORTED; +#endif break; default: status = UCC_ERR_INVALID_PARAM; diff --git a/src/components/tl/ucp/tl_ucp_coll.h b/src/components/tl/ucp/tl_ucp_coll.h index 848b113b13..61a136d5c9 100644 --- a/src/components/tl/ucp/tl_ucp_coll.h +++ b/src/components/tl/ucp/tl_ucp_coll.h @@ -129,6 +129,7 @@ typedef struct ucc_tl_ucp_task { ucc_ee_executor_task_t *etask; ucc_ee_executor_t *executor; } allreduce_kn; +#ifdef HAVE_UCP_MEMH_PACK struct { ucc_tl_ucp_allreduce_sw_pipeline *pipe; ucs_status_ptr_t *put_requests; @@ -137,6 +138,7 @@ typedef struct ucc_tl_ucp_task { ucc_ee_executor_task_t *reduce_task; ucc_tl_ucp_dpu_offload_buf_info_t *bufs; } allreduce_sliding_window; +#endif struct { int phase; ucc_knomial_pattern_t p;