Skip to content

Commit

Permalink
Make ldmsd continue startup on producer's hostname resolution failure
Browse files Browse the repository at this point in the history
Change producer's hostname resolution failure at config time from an
error to a warning, allowing ldmsd to continue starting up. The hostname
will be resolved again before establishing the connection and retired at
the reconnect interval if the resolution fails.
  • Loading branch information
nichamon committed Oct 31, 2024
1 parent f81bfeb commit 26ed761
Show file tree
Hide file tree
Showing 2 changed files with 19 additions and 5 deletions.
5 changes: 5 additions & 0 deletions ldms/src/ldmsd/ldmsd.h
Original file line number Diff line number Diff line change
Expand Up @@ -289,6 +289,11 @@ typedef struct ldmsd_prdcr {
* quick lookup by the logic that handles update schedule.
*/
struct rbt hint_set_tree;

/**
* Not-resolved counter. Reset to zero when the hostname is resolved.
*/
int not_resolved_cnt;
} *ldmsd_prdcr_t;

struct ldmsd_strgp;
Expand Down
19 changes: 14 additions & 5 deletions ldms/src/ldmsd/ldmsd_prdcr.c
Original file line number Diff line number Diff line change
Expand Up @@ -845,6 +845,17 @@ static void prdcr_connect(ldmsd_prdcr_t prdcr)
{
int ret;

if (prdcr_resolve(prdcr->host_name, prdcr->port_no, &prdcr->ss, &prdcr->ss_len)) {
if (0 == prdcr->not_resolved_cnt) {
ldmsd_log(LDMSD_LERROR, "Producer '%s' connection failed. Unable to resolve hostname '%s:%u'.\n",
prdcr->obj.name, prdcr->host_name,(unsigned) prdcr->port_no);
}
prdcr->not_resolved_cnt++;
return;
} else {
prdcr->not_resolved_cnt = 0;
}

switch (prdcr->type) {
case LDMSD_PRDCR_TYPE_ACTIVE:
case LDMSD_PRDCR_TYPE_ADVERTISER:
Expand Down Expand Up @@ -1006,11 +1017,9 @@ ldmsd_prdcr_new_with_auth(const char *name, const char *xprt_name,
}
}

if (prdcr_resolve(host_name, port_no, &prdcr->ss, &prdcr->ss_len)) {
errno = EAFNOSUPPORT;
ldmsd_log(LDMSD_LERROR, "ldmsd_prdcr_new: %s:%u not resolved.\n",
host_name,(unsigned) port_no);
goto out;
if (prdcr_resolve(prdcr->host_name, prdcr->port_no, &prdcr->ss, &prdcr->ss_len)) {
ldmsd_log(LDMSD_LWARNING, "Producer '%s': %s:%u not resolved.\n",
prdcr->obj.name, prdcr->host_name,(unsigned) prdcr->port_no);
}

if (!auth)
Expand Down

0 comments on commit 26ed761

Please sign in to comment.