Skip to content

Commit

Permalink
Add a configuration command to reset all statistics
Browse files Browse the repository at this point in the history
  • Loading branch information
nichamon committed Dec 15, 2023
1 parent 549d4fa commit 9c07275
Show file tree
Hide file tree
Showing 7 changed files with 199 additions and 8 deletions.
21 changes: 21 additions & 0 deletions ldms/python/ldmsd/ldmsd_communicator.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,7 @@
import time
import json
import errno
from pickle import NONE

#:Dictionary contains the cmd_id, required attribute list
#:and optional attribute list of each ldmsd commands. For example,
Expand Down Expand Up @@ -160,6 +161,7 @@
'metric_sets_default_authz': {'req_attr':[], 'opt_attr': ['uid', 'gid', 'perm']},
'set_sec_mod' : {'req_attr': ['regex'], 'opt_attr': ['uid', 'gid', 'perm']},
'log_status' : {'req_attr' : [], 'opt_attr' : ['name']},
'stats_reset' : {'req_attr' : [], 'opt_attr' : ['list']},
##### Failover. #####
'failover_config': {
'req_attr': [
Expand Down Expand Up @@ -529,6 +531,7 @@ class LDMSD_Request(object):
SET_DEFAULT_AUTHZ = 0x600 + 17
SET_SEC_MOD = 0x600 + 19
LOG_STATUS = 0x600 + 20
STATS_RESET = 0x600 + 21

FAILOVER_CONFIG = 0x700
FAILOVER_PEERCFG_START = 0x700 + 1
Expand Down Expand Up @@ -1774,6 +1777,24 @@ def log_level(self, level, name = None, regex = None):
self.close()
return errno.ENOTCONN, str(e)

def stats_reset(self, s = None):
"""
Reset the statistics counters
"""
if s is not None and len(s) > 0:
attr_list = [LDMSD_Req_Attr(attr_id = LDMSD_Req_Attr.STRING, value = s)]
else:
attr_list = []
req = LDMSD_Request(command_id = LDMSD_Request.STATS_RESET, attrs = attr_list)

try:
req.send(self)
resp = req.receive(self)
return resp['errcode'], resp['msg']
except Exception as e:
self.close()
return errno.ENOTCONN, str(e)

def getCfgCntr(self):
req = LDMSD_Request(command_id=LDMSD_Request.CFG_CNTR)
try:
Expand Down
20 changes: 20 additions & 0 deletions ldms/python/ldmsd/ldmsd_controller
Original file line number Diff line number Diff line change
Expand Up @@ -2728,6 +2728,26 @@ class LdmsdCmdParser(cmd.Cmd):
"log level as the default logger (ldmsd). When the default log "
"level changes, their log levels change accordingly.")

def complete_stats_reset(self, text, line, begidx, endidx):
return self.__complete_attr_list('stats_reset', text)

def do_stats_reset(self, arg):
"""
Reset the statistics counters
Parameters:
[list=] A comma-seprated list of statistics to be reset
thread - reset the thread statistics.
xprt - reset the transport statistics.
update - reset the update time statistics and skipped and over-sampled counters.
store - reset the store time statistics.
stream - reset the stream and stream client statistics
"""
arg = self.handle_args('stats_reset', arg)
rc, msg = self.comm.stats_reset(s = arg['list'])
if rc:
print(f"Failed to reset the statistics")

def do_option(self, arg):
"""
ONLY SUPPORTED IN CONFIGURATION FILES
Expand Down
8 changes: 7 additions & 1 deletion ldms/src/core/ldms.h
Original file line number Diff line number Diff line change
Expand Up @@ -1339,7 +1339,8 @@ char *ldms_stream_stats_tq_to_str(struct ldms_stream_stats_tq_s *tq);
*
* \param match The stream name or a regular expression.
* \param is_regex 1 if \c match is a regular expression; otherwise, 0.
* \param is_reset 1 means to reset the streams' statistics
* \param is_reset 0 means not to reset the statistics.
* A non-zero value means to reset the statistics.
*
* \retval str The string describing the stats.
*
Expand Down Expand Up @@ -1388,6 +1389,11 @@ char *ldms_stream_client_stats_tq_to_str(struct ldms_stream_client_stats_tq_s *t
*/
char *ldms_stream_client_stats_str(int is_reset);

/**
* \brief Reset the statistics of streams and their clients
*/
void ldms_stream_n_client_stats_reset();

/** \} */

/**
Expand Down
56 changes: 56 additions & 0 deletions ldms/src/core/ldms_stream.c
Original file line number Diff line number Diff line change
Expand Up @@ -1739,6 +1739,62 @@ char *ldms_stream_stats_tq_to_str(struct ldms_stream_stats_tq_s *tq)
return ret;
}

void ldms_stream_n_client_stats_reset()
{
struct rbn *rbn, *srbn;
struct ldms_stream_s *s;
struct ldms_stream_client_entry_s *sce;
struct ldms_stream_src_stats_s *src;
ldms_stream_client_t cli;

/*
* There is a possibility of racing because the readlock is used.
* However, the reset logic does not change the tree or list's structures.
*/
__STREAM_RDLOCK();

/* Reset regex clients first */
TAILQ_FOREACH(cli, &__regex_client_tq, entry) {
pthread_rwlock_rdlock(&cli->rwlock);
LDMS_STREAM_COUNTERS_INIT(&cli->tx);
LDMS_STREAM_COUNTERS_INIT(&cli->drops);
TAILQ_FOREACH(sce, &cli->stream_tq, client_stream_entry) {
LDMS_STREAM_COUNTERS_INIT(&sce->tx);
LDMS_STREAM_COUNTERS_INIT(&sce->drops);
}
pthread_rwlock_unlock(&cli->rwlock);
}

RBT_FOREACH(rbn, &__stream_rbt) {
s = container_of(rbn, struct ldms_stream_s, rbn);
pthread_rwlock_rdlock(&s->rwlock);

RBT_FOREACH(srbn, &s->src_stats_rbt) {
src = container_of(srbn, struct ldms_stream_src_stats_s, rbn);
LDMS_STREAM_COUNTERS_INIT(&src->rx);
}

TAILQ_FOREACH(sce, &s->client_tq, stream_client_entry) {
/* reset client's stats */
cli = sce->client;
if (!cli)
continue;
if (cli->is_regex)
continue; /* Already reset above */

LDMS_STREAM_COUNTERS_INIT(&sce->tx);
LDMS_STREAM_COUNTERS_INIT(&sce->drops);
LDMS_STREAM_COUNTERS_INIT(&cli->tx);
LDMS_STREAM_COUNTERS_INIT(&cli->drops);
}
LDMS_STREAM_COUNTERS_INIT(&s->rx);
pthread_rwlock_unlock(&s->rwlock);
}

__STREAM_UNLOCK();
return;
}

char *ldms_stream_stats_str(const char *match, int is_regex, int is_reset)
{
struct ldms_stream_stats_tq_s *tq = NULL;
Expand Down
16 changes: 9 additions & 7 deletions ldms/src/core/ldms_xprt.c
Original file line number Diff line number Diff line change
Expand Up @@ -220,13 +220,15 @@ void ldms_xprt_rate_data(struct ldms_xprt_rate_data *data, int reset)
struct timespec now;
double dur_s;
(void)clock_gettime(CLOCK_REALTIME, &now);
dur_s = ldms_timespec_diff_s(&xprt_start, &now);
data->connect_rate_s = (double)xprt_connect_count / dur_s;
data->connect_request_rate_s = (double)xprt_connect_request_count / dur_s;
data->disconnect_rate_s = (double)xprt_disconnect_count / dur_s;
data->reject_rate_s = (double)xprt_reject_count / dur_s;
data->auth_fail_rate_s = (double)xprt_auth_fail_count / dur_s;
data->duration = dur_s;
if (data) {
dur_s = ldms_timespec_diff_s(&xprt_start, &now);
data->connect_rate_s = (double)xprt_connect_count / dur_s;
data->connect_request_rate_s = (double)xprt_connect_request_count / dur_s;
data->disconnect_rate_s = (double)xprt_disconnect_count / dur_s;
data->reject_rate_s = (double)xprt_reject_count / dur_s;
data->auth_fail_rate_s = (double)xprt_auth_fail_count / dur_s;
data->duration = dur_s;
}
if (reset) {
struct ldms_xprt *x;
pthread_mutex_lock(&xprt_list_lock);
Expand Down
85 changes: 85 additions & 0 deletions ldms/src/ldmsd/ldmsd_request.c
Original file line number Diff line number Diff line change
Expand Up @@ -262,6 +262,7 @@ static int prdcr_hint_tree_status_handler(ldmsd_req_ctxt_t reqc);
static int update_time_stats_handler(ldmsd_req_ctxt_t reqc);
static int set_sec_mod_handler(ldmsd_req_ctxt_t reqc);
static int log_status_handler(ldmsd_req_ctxt_t reqc);
static int stats_reset_handler(ldmsd_req_ctxt_t reqc);

/* these are implemented in ldmsd_failover.c */
int failover_config_handler(ldmsd_req_ctxt_t req_ctxt);
Expand Down Expand Up @@ -510,6 +511,9 @@ static struct request_handler_entry request_handler[] = {
[LDMSD_LOG_STATUS_REQ] = {
LDMSD_LOG_STATUS_REQ, log_status_handler, XUG
},
[LDMSD_STATS_RESET_REQ] = {
LDMSD_STATS_RESET_REQ, stats_reset_handler, XALL
},

/* Transport Stats Request */
[LDMSD_XPRT_STATS_REQ] = {
Expand Down Expand Up @@ -8788,3 +8792,84 @@ static int store_time_stats_handler(ldmsd_req_ctxt_t reqc)
json_entity_free(strgp_dict);
return rc;
}

static void __prdset_stats_reset(struct timespec *now, int is_update, int is_store)
{
ldmsd_prdcr_t prdcr;
ldmsd_prdcr_set_t prdset;
struct rbn *rbn;

for (prdcr = ldmsd_prdcr_first(); prdcr; prdcr = ldmsd_prdcr_next(prdcr)) {
ldmsd_prdcr_lock(prdcr);
RBT_FOREACH(rbn, &prdcr->set_tree) {
prdset = container_of(rbn, struct ldmsd_prdcr_set, rbn);
if (is_update) {
memset(&prdset->updt_stat, 0, sizeof(struct ldmsd_stat));
prdset->updt_stat.start = prdset->store_stat.start = *now;
prdset->oversampled_cnt = prdset->skipped_upd_cnt = 0;
}
if (is_store)
memset(&prdset->store_stat, 0, sizeof(struct ldmsd_stat));
}
ldmsd_prdcr_unlock(prdcr);
}
}

static int stats_reset_handler(ldmsd_req_ctxt_t reqc)
{
struct timespec now;
int rc = 0;
char *s;
char *tmp, *tok, *ptr;
int is_update;
int is_store;
int is_thread;
int is_xprt;
int is_stream;
is_update = is_store = is_thread = is_xprt = is_stream = 0;

s = ldmsd_req_attr_str_value_get_by_id(reqc, LDMSD_ATTR_STRING);
if (s) {
tmp = strdup(s);
if (!tmp) {
ovis_log(config_log, OVIS_LCRIT, "Memory allocation failure\n");
(void) Snprintf(&reqc->line_buf, &reqc->line_len, "Memory allocation failed.");
rc = ENOMEM;
goto out;
}

tok = strtok_r(tmp, ",", &ptr);
while (tok) {
if (0 == strcasecmp(tok, "update"))
is_update = 1;
else if (0 == strcasecmp(tok, "store"))
is_store = 1;
else if (0 == strcasecmp(tok, "thread"))
is_thread = 1;
else if (0 == strcasecmp(tok, "xprt"))
is_xprt = 1;
else if (0 == strcasecmp(tok, "stream"))
is_stream = 1;
tok = strtok_r(NULL, ",", &ptr);
}

} else {
is_update = is_store = is_thread = is_xprt = is_stream = 1;
}

clock_gettime(CLOCK_REALTIME, &now);
if (is_thread)
zap_thrstat_reset_all();

if (is_xprt)
ldms_xprt_rate_data(NULL, 1);

__prdset_stats_reset(&now, is_update, is_store);

if (is_stream)
ldms_stream_n_client_stats_reset();
out:
free(s);
ldmsd_send_req_response(reqc, reqc->line_buf);
return rc;
}
1 change: 1 addition & 0 deletions ldms/src/ldmsd/ldmsd_request.h
Original file line number Diff line number Diff line change
Expand Up @@ -145,6 +145,7 @@ enum ldmsd_request {
LDMSD_CMDLINE_OPTIONS_SET_REQ,
LDMSD_SET_SEC_MOD_REQ,
LDMSD_LOG_STATUS_REQ,
LDMSD_STATS_RESET_REQ,

/* failover requests by user */
LDMSD_FAILOVER_CONFIG_REQ = 0x700, /* "failover_config" user command */
Expand Down

0 comments on commit 9c07275

Please sign in to comment.