diff --git a/config/hwloc_internal.m4 b/config/hwloc_internal.m4 index 7a097b92e0..5b85c420d0 100644 --- a/config/hwloc_internal.m4 +++ b/config/hwloc_internal.m4 @@ -338,6 +338,34 @@ EOF AC_CHECK_FUNCS([clock_gettime]) ]) + # Check for ptrace support + hwloc_have_ptrace=1 + AC_CHECK_HEADERS([sys/ptrace.h],, [hwloc_have_ptrace=0]) + AC_CHECK_FUNCS([ptrace],, [hwloc_have_ptrace=0]) + AC_COMPILE_IFELSE([AC_LANG_SOURCE([[ + #include "sys/ptrace.h" + int main(void){ + return ptrace(PTRACE_SEIZE, + -1, + 0, + (void*)(PTRACE_O_TRACECLONE|PTRACE_O_TRACEFORK)); + } + ]])],, [hwloc_have_ptrace=0]) + AM_CONDITIONAL([HWLOC_HAVE_PTRACE],[test $hwloc_have_ptrace -eq 1]) + AC_DEFINE_UNQUOTED([HWLOC_HAVE_PTRACE], [$hwloc_have_ptrace], [Whether ptrace is present and supports PTRACE_SEIZE or not]) + + # Check if syscall gettid is available. + hwloc_have_sys_gettid=1 + AC_CHECK_HEADERS([sys/syscall.h],, [hwloc_have_sys_gettid=0]) + AC_COMPILE_IFELSE([AC_LANG_SOURCE([[ + #include "sys/syscall.h" + #ifndef SYS_gettid + #error "syscall SYS_gettid not found" + #endif + int main(void){ return syscall(SYS_gettid) > 0;} + ]])],,[hwloc_have_sys_gettid=0]) + AC_DEFINE_UNQUOTED([HWLOC_HAVE_SYS_GETTID], [$hwloc_have_sys_gettid], [Whether syscall header is present and SYS_gettid macro is defined or not]) + # Only generate this if we're building the utilities # Even the netloc library Makefile is here because # we don't embed libnetloc yet, it's useless without tools @@ -368,7 +396,11 @@ AC_DEFUN([HWLOC_SETUP_TESTS],[ ### EOF - AC_CHECK_LIB([pthread], [pthread_self], [hwloc_have_pthread=yes]) + # Check thread support. + AC_CHECK_LIB([pthread], [pthread_self], [hwloc_have_pthread=1], [hwloc_have_pthread=0]) + AC_DEFINE_UNQUOTED([HWLOC_HAVE_PTHREAD], [$hwloc_have_pthread], [Whether we have the pthread library or not]) + AM_CONDITIONAL([HWLOC_HAVE_PTHREAD], [test $hwloc_have_pthread -eq 1]) AC_CHECK_LIB([pthread], [pthread_self], [hwloc_have_pthread=yes]) + AC_OPENMP HWLOC_PKG_CHECK_MODULES([NUMA], [numa], [numa_available], [numa.h], [hwloc_have_linux_libnuma=yes], diff --git a/contrib/ci.inria.fr/Jenkinsfile-basic b/contrib/ci.inria.fr/Jenkinsfile-basic index 65fd4cf280..07970cd98c 100644 --- a/contrib/ci.inria.fr/Jenkinsfile-basic +++ b/contrib/ci.inria.fr/Jenkinsfile-basic @@ -40,7 +40,6 @@ pipeline { stash includes: "job-1-visualstudio.bat", name: 'script-msvc' } archiveArtifacts artifacts: tarballgz+","+tarballbz2+",doc/doxygen-doc/hwloc-a4.pdf", fingerprint: true, onlyIfSuccessful: true - deleteDir() } } } @@ -63,7 +62,6 @@ pipeline { unstash 'script-unix-check' sh 'chmod 755 job-1-check.sh && ./job-1-check.sh '+tarballgz if (env.KEEP_WORKING_DIRECTORY != 'true') - deleteDir() } } } else { diff --git a/utils/hwloc/Makefile.am b/utils/hwloc/Makefile.am index f9b03d1ff7..39f0d47cb8 100644 --- a/utils/hwloc/Makefile.am +++ b/utils/hwloc/Makefile.am @@ -167,3 +167,24 @@ endif HWLOC_HAVE_LINUX distclean-local: rm -f $(nodist_man_MANS) + +# Build hwloc-thread-bind +if HWLOC_HAVE_PTRACE +if HWLOC_HAVE_PTHREAD +AM_LDFLAGS+=-lpthread +endif #HWLOC_HAVE_PHREAD +noinst_LTLIBRARIES+=libhwloc-thread-bind.la +libhwloc_thread_bind_la_SOURCES=hwloc-thread-bind-utils.c hwloc-thread-bind.h +LIBDADD=$(HWLOC_top_builddir)/hwloc/libhwloc.la + +bin_PROGRAMS += hwloc-thread-bind +hwloc_thread_bind_SOURCES = \ + hwloc-thread-bind.c \ + hwloc-thread-bind.h +LDADD+=$(HWLOC_top_builddir)/hwloc/libhwloc.la +LDADD+=libhwloc-thread-bind.la +check_PROGRAMS=test-hwloc-thread-bind +if !HWLOC_HAVE_MINGW32 +TESTS+=test-hwloc-thread-bind +endif #!HWLOC_HAVE_MINGW32 +endif #HWLOC_HAVE_PTRACE diff --git a/utils/hwloc/hwloc-thread-bind-utils.c b/utils/hwloc/hwloc-thread-bind-utils.c new file mode 100644 index 0000000000..34a990d8be --- /dev/null +++ b/utils/hwloc/hwloc-thread-bind-utils.c @@ -0,0 +1,343 @@ +#include "misc.h" +#include "hwloc-thread-bind.h" +#include "hwloc/helper.h" +#include "hwloc-calc.h" + +#include +#include +#include +#include "private/autogen/config.h" +#if HWLOC_HAVE_PTRACE +#include +#endif + +extern int logical; // Whether indexing is logical +extern int verbose; // Whether to verbose print. + +/** Maximum len of string containing a hwloc_obj logical index **/ +#define STR_OBJ_MAX 32 + +/**********************************************************************/ +/* enum structure */ +/**********************************************************************/ + +struct cpuaffinity_enum{ + /** The topology used to build enum **/ + hwloc_topology_t topology; + /** Index of current hwloc_obj in enumeration **/ + unsigned current; + /** Number of hwloc_obj in enumeration **/ + unsigned n; + /** + * Maximum number of hwloc_obj storable. + * This the number of HWLOC_OBJ_PU in topology. + **/ + unsigned nmax; + /** + * Array of processing units. + * These are the pointers from + * an existing topology. Topology must not + * be destroyed until the enum is. + **/ + hwloc_obj_t *obj; +}; + +struct cpuaffinity_enum * +cpuaffinity_enum_alloc(hwloc_topology_t topology) +{ + unsigned i, nmax = hwloc_get_nbobjs_by_type(topology, HWLOC_OBJ_PU); + + struct cpuaffinity_enum *obj = malloc(sizeof *obj); + + if (obj == NULL) + return NULL; + + obj->topology = topology; + obj->obj = malloc(nmax * sizeof(*obj->obj)); + if (obj->obj == NULL) { + free(obj); + return NULL; + } + + for (i = 0; i < nmax; i++) + obj->obj[i] = NULL; + obj->n = 0; + obj->nmax = nmax; + obj->current = 0; + + return obj; +} + +void +cpuaffinity_enum_free(struct cpuaffinity_enum *obj) +{ + free(obj->obj); + free(obj); +} + +size_t +cpuaffinity_enum_size(struct cpuaffinity_enum *obj) +{ + return obj->n; +} + +int +cpuaffinity_enum_append(struct cpuaffinity_enum *e, hwloc_obj_t obj) +{ + unsigned i; + if (e == NULL) + goto out_einval; + + if (e->n == e->nmax) + goto out_edom; + + if (obj == NULL || (e->obj[0] && obj->type != e->obj[0]->type)) + goto out_einval; + + for (i = 0; i < e->n; i++) + if (e->obj[i]->logical_index == obj->logical_index) + goto out_einval; + + e->obj[e->n++] = obj; + return 0; + out_einval: + errno = EINVAL; + return -1; + out_edom: + errno = EDOM; + return -1; +} + +hwloc_obj_t +cpuaffinity_enum_next(struct cpuaffinity_enum *e) +{ + hwloc_obj_t obj = e->obj[e->current]; + e->current = (e->current + 1) % e->n; + return obj; +} + +hwloc_obj_t +cpuaffinity_enum_get(struct cpuaffinity_enum * e, + const size_t i) +{ + if(e == NULL) + return NULL; + return e->obj[i % e->n]; +} + +static int cpuaffinity_obj_snprintf(char* str, + const size_t len, + const char *sep, + const hwloc_obj_t obj, + const int cpuset, + const int taskset) +{ + char* c = str; + int index = logical ? obj->logical_index : obj->os_index; + if(taskset) + c+=hwloc_bitmap_taskset_snprintf(str, len, obj->cpuset); + else if(cpuset) + c+=hwloc_bitmap_snprintf(str, len, obj->cpuset); + else + c += snprintf(c, len, "%d", index); + c += snprintf(c, c-str+len, "%s", sep); + return c-str; +} + +void cpuaffinity_enum_print(const struct cpuaffinity_enum *e, + const char *sep, + const int cpuset, + const int taskset, + const int reverse, + unsigned num) +{ + size_t len = e->n * (strlen(sep) + STR_OBJ_MAX); + char *c, *enum_str = malloc(len); + int i, start, end; + num = num == 0 ? e->n : num; + num = num > e->n ? e->n : num; + + if (enum_str == NULL) { + errno = ENOMEM; + return; + } + + memset(enum_str, 0, len); + c = enum_str; + + start = reverse ? num-1 : 0; + end = reverse ? 0 : num-1; + for (i = start; i != end; reverse ? i-- : i++){ + c+=cpuaffinity_obj_snprintf(c, + len + enum_str - c, + sep, + e->obj[i], + cpuset, + taskset); + } + c+=cpuaffinity_obj_snprintf(c, + len + enum_str - c, + "", + e->obj[i], + cpuset, + taskset); + printf("%s\n", enum_str); + free(enum_str); +} + +hwloc_obj_t +cpuaffinity_bind_thread(struct cpuaffinity_enum * objs, + const pid_t tid) +{ + hwloc_obj_t obj = cpuaffinity_enum_next(objs); + + if(hwloc_set_proc_cpubind(objs->topology, + tid, + obj->cpuset, + HWLOC_CPUBIND_THREAD) == -1){ + perror("hwloc_set_cpubind"); + return NULL; + } + return obj; +} + +#if HWLOC_HAVE_PTRACE +int +cpuaffinity_attach(const pid_t pid, + struct cpuaffinity_enum *e, + const int repeat, + const int stopped) +{ + const struct hwloc_topology_support * support = + hwloc_topology_get_support(e->topology); + if(!support->cpubind->set_thread_cpubind){ + fprintf(stderr, + "cpuaffinity_attach: set_thread_cpubind not supported.\n"); + return -1; + } + + /* Wait for child to stop */ + if(!stopped) + kill(pid, SIGSTOP); + waitpid(pid, NULL, WUNTRACED); + + /* attach and set options to trace threads creation and process exit */ + if(ptrace(PTRACE_SEIZE, + pid, + NULL, + (void*)(PTRACE_O_TRACECLONE|PTRACE_O_TRACEFORK)) == -1){ + perror("PTRACE_SEIZE"); + return -1; + } + + /* Resume stopped child */ + kill(pid, SIGCONT); + + /* wait childrens until process exits */ + do{ + int status; + pid_t child = waitpid(-1, &status, __WALL); + if(WIFEXITED(status) && child == pid){ return WEXITSTATUS(status);} + if(WIFSIGNALED(status) && child == pid){break;} + + /* Child Stopped */ + if(WIFSTOPPED(status)){ + int sig = WSTOPSIG(status); + if(sig == SIGTRAP){ + /*Get ptrace event that triggered the stop*/ + int event = status >> 8; + unsigned long eventmsg; + + if(ptrace(PTRACE_GETEVENTMSG, + child, + NULL, + (void*)(&eventmsg)) == -1){ + perror("PTRACE_GETEVENTMSG"); + continue; + } + if(event == (SIGTRAP|(PTRACE_EVENT_FORK<<8)) || + event == (SIGTRAP|(PTRACE_EVENT_VFORK<<8)) || + event == (SIGTRAP|(PTRACE_EVENT_CLONE<<8))){ + if(e->current < e->n || repeat){ + if(verbose){ + hwloc_obj_t loc = e->obj[e->current]; + printf("Binding thread %lu to %s:%d\n", + eventmsg, + hwloc_obj_type_string(loc->type), + logical ? loc->logical_index + : loc->os_index); + } + cpuaffinity_bind_thread(e, eventmsg); + } + } + } + /* Resume stopped child */ + if(ptrace(PTRACE_CONT, child, NULL, NULL) == -1) { + perror("PTRACE_CONT(interrupt)"); + } + } + } while(1); + return 0; +} +#endif // HWLOC_HAVE_PTRACE + +hwloc_cpuset_t +hwloc_process_location(hwloc_topology_t topology, + const char* str) +{ + int err; + hwloc_bitmap_t cpuset; + + cpuset = hwloc_bitmap_alloc(); + if(cpuset == NULL){ + perror("hwloc_bitmap_alloc"); + exit(1); + } + + struct hwloc_calc_location_context_s lcontext = { + .topology = topology, + .topodepth = hwloc_topology_get_depth(topology), + .only_hbm = -1, + .logical = logical, + .verbose = 0 + }; + struct hwloc_calc_set_context_s scontext = { + .nodeset_input = 0, + .nodeset_output = 0, + .output_set = cpuset, + }; + + err = hwloc_calc_process_location_as_set(&lcontext, + &scontext, + str); + + if(err < 0){ + fprintf(stderr, + "Obj %s is not recognized or does not contain a cpuset.\n", + str); + return NULL; + } + + return cpuset; +} + +int +restrict_topology(hwloc_topology_t topology, + const char *restrict_str) +{ + int err = 0; + hwloc_bitmap_t restrict_cpuset; + + restrict_cpuset = hwloc_process_location(topology, restrict_str); + if(restrict_cpuset == NULL) + return -1; + + hwloc_topology_restrict(topology, + restrict_cpuset, + HWLOC_RESTRICT_FLAG_REMOVE_CPULESS); + if(err != 0) + perror("hwloc_topology_restrict"); + + hwloc_bitmap_free(restrict_cpuset); + return err; +} diff --git a/utils/hwloc/hwloc-thread-bind.c b/utils/hwloc/hwloc-thread-bind.c new file mode 100644 index 0000000000..422336488c --- /dev/null +++ b/utils/hwloc/hwloc-thread-bind.c @@ -0,0 +1,242 @@ +/*************************************************************************** + * Copyright 2019 UChicago Argonne, LLC. + * Author: Nicolas Denoyelle + * SPDX-License-Identifier: BSD-3-Clause + * See COPYING in top-level directory. + ****************************************************************************/ + +#include "misc.h" +#include "hwloc-calc.h" +#include "hwloc-thread-bind.h" + +#include +#include +#include +#include + +//Options with default values. +static char * topology_input = NULL; +static char * input_format = NULL; +static char * restrict_topo = NULL; +static int round_robin = 0; +static char **locations_str = NULL; +static char **user_argv = NULL; +int logical = 0; // Whether indexing is logical +int verbose = 0; // Whether to verbose print. + +// Structure used for accessing threads binding locations. +static hwloc_topology_t topology; +static struct cpuaffinity_enum *binding; + +static void hwloc_process_locations(void) +{ + char **loc_str; + hwloc_cpuset_t location_cpuset; + hwloc_obj_t location; + for(loc_str = locations_str; loc_str < user_argv-1; loc_str++){ + location_cpuset = hwloc_process_location(topology, *loc_str); + if(location_cpuset == NULL) + exit(1); + location = hwloc_get_first_largest_obj_inside_cpuset(topology, + location_cpuset); + hwloc_bitmap_free(location_cpuset); + if(location == NULL) + exit(1); + if(cpuaffinity_enum_append(binding, location) != 0) + exit(1); + } +} + +/***************************************************************************/ +/* Main */ +/***************************************************************************/ + +void usage(const char *callname __hwloc_attribute_unused, FILE *where) +{ + fprintf(where, "hwloc-thread-bind is a utility to bind threads in the order of their creation to specific cpusets.\n\n"); + + fprintf(where, "Usage: hwloc-thread-bind [options] ... -- \n"); + fprintf(where, " may be a space-separated list of cpusets or objects\n"); + fprintf(where, " as supported by the hwloc-bind utility, e.g:\n"); + hwloc_calc_locations_usage(where); + + fprintf(where, "Binding options:\n"); + fprintf(where, " -r --round-robin If more threads are created than locations provided, then round-robin on locations to bind extra threads instead of not binding them.\n"); + + fprintf(where, "Formatting options:\n"); + fprintf(where, " -l --logical Use logical object indexes (default)\n"); + fprintf(where, " -p --physical Use physical object indexes\n"); + fprintf(where, " --restrict Restrict the topology to processors listed in \n"); + hwloc_utils_input_format_usage(where, 10); + + fprintf(where, "Miscellaneous options:\n"); + fprintf(where, " -v --verbose Show verbose messages\n"); + fprintf(where, " --version Report version and exit\n"); +} + +static int match_opt(const int i, + int argc, + char **argv, + const char *short_opt, + const char *long_opt, + const int num_args) +{ + if(i > argc){ + fprintf(stderr, "Option %s does not require argument\n", argv[i]); + exit(1); + } + + if(strcmp(argv[i], short_opt) && strcmp(argv[i],long_opt)) + return 0; + + if(strlen(argv[i]) != strlen(short_opt) && + strlen(argv[i]) != strlen(long_opt)) + return 0; + + if(i+num_args >= argc){ + fprintf(stderr, "Option %s requires %d %s.\n", + argv[i], + num_args, + num_args > 1 ? "arguments" : "argument"); + exit(1); + } + + return 1; +} + +static void parse_options(int argc, char **argv) +{ + int i = 0; + while(++i < argc){ + if(match_opt(i, argc, argv, "-h", "--help", 0)){ + usage(argv[0], stdout); + exit(1); + } + else if(match_opt(i, argc, argv, "!Do not match short option!", "--version", 0)){ + printf("%s %s\n", argv[0], HWLOC_VERSION); + exit(EXIT_SUCCESS); + } + else if(match_opt(i, argc, argv, "-l", "--logical", 0)) + logical = 1; + else if(match_opt(i, argc, argv, "-p", "--physical", 0)) + logical = 0; + else if(match_opt(i, argc, argv, "-v", "--verbose", 0)) + verbose = 1; + else if(match_opt(i, argc, argv, "-r", "--round-robin", 0)) + round_robin = 1; + else if(match_opt(i, argc, argv, "!Do not match short option!", "--restrict", 1)) + restrict_topo = argv[++i]; + else if(match_opt(i, argc, argv, "!Do not match short option!", "--", 1)){ + user_argv = &(argv[++i]); + break; + } + else if (locations_str == NULL) + locations_str = &(argv[i]); + } + + // Check that required options are provided. + if(user_argv == NULL){ + fprintf(stderr, "Must provide a command line to run.\n"); + exit(1); + } + if(locations_str == NULL){ + fprintf(stderr, "Must provide a list of cpuset where to bind threads.\n"); + exit(1); + } +} + +int main(int argc, char **argv) +{ + int err = 0; + enum hwloc_utils_input_format format; + pid_t pid; + + hwloc_utils_check_api_version(argv[0]); + parse_options(argc, argv); + + // Build topology + if (hwloc_topology_init(&topology)) { + perror("hwloc_topology_init"); + return -1; + } + + if(topology_input != NULL){ + format = input_format != NULL ? + hwloc_utils_parse_input_format(input_format, + argv[0]) : + hwloc_utils_autodetect_input_format(topology_input, 0); + if (hwloc_utils_enable_input_format(topology, + HWLOC_TOPOLOGY_FLAG_IS_THISSYSTEM, + topology_input, + &format, 0, + argv[0]) != EXIT_SUCCESS) + { + err = -1; + goto out_with_topology; + } + } + + if (hwloc_topology_load(topology) != 0) { + perror("hwloc_topology_load"); + err = -1; + goto out_with_topology; + } + + // Restrict topology + if(restrict_topo != NULL && restrict_topology(topology, restrict_topo) < 0){ + err = -1; + goto out_with_topology; + } + + + // Parse locations + binding = cpuaffinity_enum_alloc(topology); + if(binding == NULL){ + err = -1; + goto out_with_topology; + } + hwloc_process_locations(); + + // Bind program threads + pid = fork(); + + /* Tracee */ + if(pid == 0) { + // Stop child itself, it will be resumed by cpuaffinity_attach() or + // killed if the call fails. + kill(getpid(), SIGSTOP); + // Start command when parent resume this child. + if(execvp(user_argv[0], user_argv) == -1){ + perror("execvp"); + return -1; + } + return 0; + } + + /* Tracer code */ + else if(pid > 0) { + // Attach + err = cpuaffinity_attach(pid, binding, round_robin, 1); + if(err == -1){ + // Could not attach to child. Kill it then. + kill(pid, SIGKILL); + waitpid(pid, NULL, 0); + } else { + wait(&err); + if(WIFEXITED(err)) + err = WEXITSTATUS(err); + } + } + + /* Fork error */ + else { + perror("fork"); + err = -1; + } + + cpuaffinity_enum_free(binding); + out_with_topology: + hwloc_topology_destroy(topology); + + return err; +} diff --git a/utils/hwloc/hwloc-thread-bind.h b/utils/hwloc/hwloc-thread-bind.h new file mode 100644 index 0000000000..20e67cc431 --- /dev/null +++ b/utils/hwloc/hwloc-thread-bind.h @@ -0,0 +1,119 @@ +/*************************************************************************** + * Copyright 2019 UChicago Argonne, LLC. + * Author: Nicolas Denoyelle + * SPDX-License-Identifier: BSD-3-Clause +****************************************************************************/ + +#ifndef HWLOC_TBIND_H +#define HWLOC_TBIND_H + +#include "hwloc.h" + +/** + * Structure holding an ordered enumeration of + * all objects (hwloc_obj) of the same type. + * This structure is further used for binding threads + * in the order of their creation. Enumerations can be + * initialized by manually appending topology objects + * to it or with helper functions. + **/ +struct cpuaffinity_enum; + +/** + * Allocate an enumeration of topology objects. + * @param topology: The topology where objects originate. + * enumeration will fit at most the number of leaves. + **/ +struct cpuaffinity_enum *cpuaffinity_enum_alloc(hwloc_topology_t topology); + +/** + * Free enumeration of processing units. + * @param obj: The object to free. + **/ +void cpuaffinity_enum_free(struct cpuaffinity_enum * obj); + +/** + * Add a processing unit at the end of enumeration. + * @param e: The enumeration to which append hwloc_obj. + * @param obj: The hwloc_obj to append. + * @return -1 on error with errno set to; + * - EINVAL if obj already exists in enumeration, or on argument + * is NULL. + * - EDOM if the enumeration is full. + * @return 0 on success. + **/ +int cpuaffinity_enum_append(struct cpuaffinity_enum * e, hwloc_obj_t obj); + +/** + * Get enumeration length. + **/ +size_t cpuaffinity_enum_size(struct cpuaffinity_enum *obj); + +/** + * Print indexes of hwloc_obj to stdout. + * @param e: The enumeration to print. Printing can be shorten by setting + * field "n" to a smaller number desired number of elements. + * @param sep: The character separator to print between hwloc_obj indexes. + * @param num: The number of index to print. + * If 0, all objects index are printed. + **/ +void cpuaffinity_enum_print(const struct cpuaffinity_enum *e, + const char *sep, + const int cpuset, + const int taskset, + const int reverse, + unsigned num); + +/** + * Get next object in enumeration. If end of enumeration has been reach, + * then first object is returned. + * @param e: The object enumeration to iterate. + * @return The next obj in enumeration. + **/ +hwloc_obj_t cpuaffinity_enum_next(struct cpuaffinity_enum * e); + +/** + * Get next an object in enumeration. If index is out of bound, a modulo + * on index is done. + * @param e: The enumeration of objects. + * @param i: The index of object. + * @return An object in enumeration. + **/ +hwloc_obj_t cpuaffinity_enum_get(struct cpuaffinity_enum * e, + const size_t i); + +/** + * Bind a thread on next topology object. + * @param objs: The list of hwloc objects to use. + * @param tid: The system id of the thread to bind. + * @return The object on which thread is bound. + **/ +hwloc_obj_t cpuaffinity_bind_thread(struct cpuaffinity_enum * objs, + const pid_t tid); + +/** + * Bind the next threads spawned by a process with a particular cpuaffinity. + * @param pid: The pid of the process to bind. + * @param objs: The cpu affinity containing a list of object where to + * consecutively bind threads. + * @param repeat: If more thread are created than objects in + * cpuaffinity_enum, and repeat is set, then next will, + * bound in a round robin fashion of objs instead of not + * not beeing bound. + * @param stopped: A boolean telling the process has been sent a SIGSTOP + * signal prior to call to cpuaffinity_attach(). + * @return -1 on failure, pid exit status on success, 0 if pid has terminated + * because of a signal. + **/ +int cpuaffinity_attach(const pid_t pid, + struct cpuaffinity_enum * objs, + const int repeat, + const int stopped); + +hwloc_cpuset_t hwloc_process_location(hwloc_topology_t topology, + const char* str); + +int restrict_topology(hwloc_topology_t topology, + const char *restrict_str); + +#endif diff --git a/utils/hwloc/test-hwloc-thread-bind.c b/utils/hwloc/test-hwloc-thread-bind.c new file mode 100644 index 0000000000..41e49f622e --- /dev/null +++ b/utils/hwloc/test-hwloc-thread-bind.c @@ -0,0 +1,299 @@ +/*************************************************************************** + * Copyright 2019 UChicago Argonne, LLC. + * Author: Nicolas Denoyelle + * SPDX-License-Identifier: BSD-3-Clause + * See COPYING in top-level directory. + ****************************************************************************/ + +#include +#include +#include +#include +#include +#include "private/autogen/config.h" +#if HWLOC_HAVE_SYS_GETTID +#include +#endif +#ifdef _OPENMP +#include +#endif +#if HWLOC_HAVE_PTHREAD +#include +#endif +#include "hwloc-thread-bind.h" + +int logical = 1; +int verbose = 1; +static hwloc_topology_t topology; +static struct cpuaffinity_enum *binding; + +static inline hwloc_obj_t topology_leaf(void) +{ + int depth = hwloc_topology_get_depth(topology); + return hwloc_get_obj_by_depth(topology, depth-1, 0); +} + +static void hwloc_test_topology_load(void) +{ + hwloc_obj_t PU = NULL; + int depth; + + if (hwloc_topology_init(&topology)) { + perror("hwloc_topology_init"); + goto error; + } + if (hwloc_topology_load(topology) != 0) { + perror("hwloc_topology_load"); + goto error_with_topology; + } + + depth = hwloc_topology_get_depth(topology) - 1; + binding = cpuaffinity_enum_alloc(topology); + if(binding == NULL){ + goto error_with_topology; + } + while((PU = hwloc_get_next_obj_by_depth(topology, depth, PU)) != NULL){ + if(cpuaffinity_enum_append(binding, PU) != 0) + goto error_with_binding; + } + return; + + error_with_binding: + cpuaffinity_enum_free(binding); + error_with_topology: + hwloc_topology_destroy(topology); + error: + exit(1); +} + +static hwloc_obj_t +cpuaffinity_get_binding(const pid_t tid) +{ + int depth = hwloc_topology_get_depth(topology); + hwloc_bitmap_t checkset = hwloc_bitmap_alloc(); + hwloc_obj_t bound, ret = NULL; + + if(hwloc_get_proc_cpubind(topology, + tid, + checkset, + HWLOC_CPUBIND_THREAD) == -1){ + perror("get_cpubind"); + goto check_cpubind_exit; + } + + bound = hwloc_get_obj_by_depth(topology, depth-1, 0); + while(bound != NULL && + !hwloc_bitmap_isincluded(bound->cpuset, checkset)){ + bound = bound->next_cousin; + } + while(bound != NULL && bound->parent != NULL && + hwloc_bitmap_isincluded(bound->parent->cpuset, checkset)){ + bound = bound->parent; + } + ret = bound; + + check_cpubind_exit: + hwloc_bitmap_free(checkset); + return ret; +} + +static int cpuaffinity_check(const hwloc_obj_t target, const pid_t tid) +{ + int ret = 0; + hwloc_bitmap_t checkset = hwloc_bitmap_alloc(); + + hwloc_obj_t bound = cpuaffinity_get_binding(tid); + if(bound == NULL){ + fprintf(stderr, "Binding outside of topology\n"); + goto check_cpubind_exit; + } + if(!hwloc_bitmap_isequal(bound->cpuset, target->cpuset)){ + fprintf(stderr, "Binding on %s:%d instead of %s:%d\n", + hwloc_obj_type_string(bound->type), + bound->logical_index, + hwloc_obj_type_string(target->type), + target->logical_index); + } else { + ret = 1; + } + + check_cpubind_exit: + hwloc_bitmap_free(checkset); + return ret; +} + +#if HWLOC_HAVE_SYS_GETTID +#ifdef _OPENMP +static int check_strategy_openmp(int prebind) +{ + hwloc_obj_t target; + unsigned num_threads; + int err = 0; + hwloc_obj_t leaf = topology_leaf(); + + num_threads = hwloc_get_nbobjs_by_type(topology, leaf->type); + + if(prebind){ +#pragma omp parallel num_threads(num_threads) shared(err, binding) private(target) + { + pid_t pid = syscall(SYS_gettid); + target = cpuaffinity_enum_get(binding, omp_get_thread_num()); + hwloc_set_proc_cpubind(topology, + pid, + target->cpuset, + HWLOC_CPUBIND_THREAD); + err += !cpuaffinity_check(target, + pid); + } + if(err != 0) + goto out; + } + +#pragma omp parallel num_threads(num_threads) shared(binding, err) private(target) + { + pid_t pid = syscall(SYS_gettid); + target = cpuaffinity_enum_get(binding, omp_get_thread_num()); + err += !cpuaffinity_check(target, pid); + } + + out: + return err == 0; +} +#endif + +#if HWLOC_HAVE_PTHREAD + +struct pthread_arg { + int prebind; + hwloc_obj_t target; +}; + +static void* pthread_check(void* arg) +{ + int ret = 0; + int tid = syscall(SYS_gettid); + struct pthread_arg *parg = arg; + if(parg->prebind) + hwloc_set_proc_cpubind(topology, + tid, + parg->target->cpuset, + HWLOC_CPUBIND_THREAD); + // Return 1 if ok + ret = cpuaffinity_check(parg->target, tid); + return (void*)(intptr_t)ret; +} + +static int check_strategy_pthread(int prebind) +{ + int i, err = 0, num_threads; + intptr_t ret = 0; + hwloc_thread_t tid; + hwloc_obj_t leaf = topology_leaf(); + struct pthread_arg parg = { + .prebind = prebind, + .target = NULL, + }; + + num_threads = hwloc_get_nbobjs_by_type(topology, + leaf->type); + for(i=0; i= 0); + + /* Tracee */ + if(pid == 0) { + // Stop child itself, it will be resumed by ptrace or + // killed if ptrace fails. + kill(getpid(), SIGSTOP); + // On resume do check. Return 0 if check succeeded. + int status = check_fn(0); + exit(!status); + } + /* Tracer code */ + else if(pid > 0){ + int out; + // Attach and continue execution. + out = cpuaffinity_attach(pid, binding, 1, 1); + if(out < 0){ + kill(pid, SIGKILL); + waitpid(pid, NULL, 0); + assert(0); + } + assert(out == 0); + } +} + +static void test_parallel(int (*check_fn)(int)) +{ + // Check function return 1 if everything went as exepected + assert(check_fn(1)); +} +#endif //HWLOC_HAVE_SYS_GETTID + +static void test_sequential(void) +{ + pid_t pid = getpid(); + hwloc_obj_t obj; + size_t i; + + for(i=0; icpubind || + !support->cpubind->set_thread_cpubind || + !support->cpubind->get_thread_cpubind) + return 0; + + test_sequential(); + + // Test binding is done as expected. +#if HWLOC_HAVE_SYS_GETTID +#if HWLOC_HAVE_PTRACE +#ifdef _OPENMP + test_parallel(check_strategy_openmp); + //test_attach(check_strategy_openmp); +#endif // _OPENMP +#if HWLOC_HAVE_PTHREAD + test_parallel(check_strategy_pthread); + test_attach(check_strategy_pthread); +#endif // HWLOC_HAVE_PTHREAD +#endif // HWLOC_HAVE_PTRACE +#endif // HWLOC_HAVE_SYS_GETTID + + hwloc_topology_destroy(topology); + return 0; +} +