diff --git a/ldms/src/sampler/slurm/slurm_sampler.c b/ldms/src/sampler/slurm/slurm_sampler.c index 3968f51f9..7d0a3bccd 100644 --- a/ldms/src/sampler/slurm/slurm_sampler.c +++ b/ldms/src/sampler/slurm/slurm_sampler.c @@ -681,6 +681,24 @@ static void handle_job_init(job_data_t job, json_entity_t e) ldms_metric_array_set_u32(job_set, task_exit_status_idx + job->job_slot, i, 0); } + out: + ldms_transaction_end(job_set); +} + +static void handle_step_init(job_data_t job, json_entity_t e) +{ + int int_v; + json_entity_t attr, data, dict; + + data = json_attr_find(e, "data"); + if (!data) { + msglog(LDMSD_LERROR, "slurm_sampler: Missing 'data' attribute " + "in 'init' event.\n"); + return; + } + dict = json_attr_value(data); + + ldms_transaction_begin(job_set); attr = json_attr_find(dict, "job_user"); if (attr) { json_entity_t user_name = json_attr_value(attr); @@ -716,24 +734,6 @@ static void handle_job_init(job_data_t job, json_entity_t e) json_value_str(job_tag)->str); break; } - out: - ldms_transaction_end(job_set); -} - -static void handle_step_init(job_data_t job, json_entity_t e) -{ - int int_v; - json_entity_t attr, data, dict; - - data = json_attr_find(e, "data"); - if (!data) { - msglog(LDMSD_LERROR, "slurm_sampler: Missing 'data' attribute " - "in 'init' event.\n"); - return; - } - dict = json_attr_value(data); - - ldms_transaction_begin(job_set); attr = json_attr_find(dict, "nnodes"); if (attr) { int_v = json_value_int(json_attr_value(attr)); @@ -746,6 +746,12 @@ static void handle_step_init(job_data_t job, json_entity_t e) ldms_metric_array_set_u32(job_set, task_count_idx, job->job_slot, int_v); } + attr = json_attr_find(dict, "step_id"); + if (attr) { + int_v = json_value_int(json_attr_value(attr)); + ldms_metric_array_set_u64(job_set, app_id_idx, job->job_slot, int_v); + } + attr = json_attr_find(dict, "uid"); if (attr) { int_v = json_value_int(json_attr_value(attr)); diff --git a/ldms/src/sampler/spank/slurm_notifier.c b/ldms/src/sampler/spank/slurm_notifier.c index ae400e6cd..d4036b594 100644 --- a/ldms/src/sampler/spank/slurm_notifier.c +++ b/ldms/src/sampler/spank/slurm_notifier.c @@ -152,8 +152,6 @@ static void msglog(const char *format, ...) * * "data" : { * "job_id" : // S_JOB_ID - * "job_name" : // getenv("SLURM_JOB_NAME") - * "job_user" : // getenv("SLURM_JOB_USER") * "nodeid" : // S_JOB_NODEID * "uid" : // S_JOB_UID * "gid" : // S_JOB_GID @@ -171,7 +169,14 @@ static void msglog(const char *format, ...) * "nodeid" : // S_JOB_NODEID * "step_id" : // S_JOB_STEPID * "alloc_mb" : // S_STEP_ALLOC_MEM - * "subscriber_data" : // getenv("SUBSCRIBER_DATA") + * "subscriber_data" : // getenv("SUBSCRIBER_DATA") + * "job_name" : // getenv("SLURM_JOB_NAME") + * "job_user" : // getenv("SLURM_JOB_USER") + * "ncpus" : // S_JOB_NCPUS + * "nnodes" : // S_JOB_NNODES + * "alloc_mb" : // S_JOB_ALLOC_MEM + * "local_tasks" : // S_JOB_LOCAL_TASK_COUNT + * "total_tasks" : // S_JOB_TOTAL_TASK_COUNT * } * * Step Exit Event ("step_exit") - End of Job Step @@ -655,9 +660,7 @@ char *__context_str(spank_t sh, const char *func) jbuf_t make_job_init_data(spank_t sh) { - char name[80]; jbuf_t jb; - spank_err_t err; jb = jbuf_new(); if (!jb) goto out_1; jb = jbuf_append_str(jb, "{"); if (!jb) goto out_1; @@ -667,19 +670,6 @@ jbuf_t make_job_init_data(spank_t sh) jb = jbuf_append_attr(jb, "context", "\"%s\",", context_str(sh)); if (!jb) goto out_1; jb = jbuf_append_attr(jb, "data", "{"); if (!jb) goto out_1; jb = _append_item_u32(sh, jb, "job_id", S_JOB_ID, ','); if (!jb) goto out_1; - - name[0] = '\0'; - err = spank_getenv(sh, "SLURM_JOB_NAME", name, sizeof(name)); - if (err) - name[0] = '\0'; - jb = jbuf_append_attr(jb, "job_name", "\"%s\",", name); if (!jb) goto out_1; - - name[0] = '\0'; - err = spank_getenv(sh, "SLURM_JOB_USER", name, sizeof(name)); - if (err) - name[0] = '\0'; - jb = jbuf_append_attr(jb, "job_user", "\"%s\",", name); if (!jb) goto out_1; - jb = _append_item_u32(sh, jb, "nodeid", S_JOB_NODEID, ','); if (!jb) goto out_1; jb = _append_item_u32(sh, jb, "uid", S_JOB_UID, ','); if (!jb) goto out_1; jb = _append_item_u32(sh, jb, "gid", S_JOB_GID, ','); if (!jb) goto out_1; @@ -712,7 +702,7 @@ jbuf_t make_job_exit_data(spank_t sh) jbuf_t make_step_init_data(spank_t sh) { jbuf_t jb; - char subscriber_data[PATH_MAX]; + char env[PATH_MAX]; spank_err_t err; jb = jbuf_new(); if (!jb) goto out_1; @@ -722,17 +712,29 @@ jbuf_t make_step_init_data(spank_t sh) jb = jbuf_append_attr(jb, "timestamp", "%d,", time(NULL)); if (!jb) goto out_1; jb = jbuf_append_attr(jb, "context", "\"%s\",", context_str(sh)); if (!jb) goto out_1; jb = jbuf_append_attr(jb, "data", "{"); if (!jb) goto out_1; - subscriber_data[0] = '\0'; - err = spank_getenv(sh, "SUBSCRIBER_DATA", subscriber_data, sizeof(subscriber_data)); + env[0] = '\0'; + err = spank_getenv(sh, "SUBSCRIBER_DATA", env, sizeof(env)); if (err) - strcpy(subscriber_data, "{}"); - DEBUG2("SUBSCRIBER_DATA '%s'.\n", subscriber_data); - if (json_verify_string(subscriber_data)) { + strcpy(env, "{}"); + DEBUG2("SUBSCRIBER_DATA '%s'.\n", env); + if (json_verify_string(env)) { DEBUG2("subscriber_data '%s' is not valid JSON and is being " - "ignored.\n", subscriber_data); - strcpy(subscriber_data, "{}"); + "ignored.\n", env); + strcpy(env, "{}"); } - jb = jbuf_append_attr(jb, "subscriber_data", "%s,", subscriber_data); if (!jb) goto out_1; + jb = jbuf_append_attr(jb, "subscriber_data", "%s,", env); if (!jb) goto out_1; + env[0] = '\0'; + err = spank_getenv(sh, "SLURM_JOB_NAME", env, sizeof(env)); + if (err) + env[0] = '\0'; + jb = jbuf_append_attr(jb, "job_name", "\"%s\",", env); if (!jb) goto out_1; + + env[0] = '\0'; + err = spank_getenv(sh, "SLURM_JOB_USER", env, sizeof(env)); + if (err) + env[0] = '\0'; + jb = jbuf_append_attr(jb, "job_user", "\"%s\",", env); if (!jb) goto out_1; + jb = _append_item_u32(sh, jb, "job_id", S_JOB_ID, ','); if (!jb) goto out_1; jb = _append_item_u32(sh, jb, "nodeid", S_JOB_NODEID, ','); if (!jb) goto out_1; jb = _append_item_u32(sh, jb, "step_id", S_JOB_STEPID, ','); if (!jb) goto out_1;