From eca5335b4ebd46be749da41575b2896a552d1fd1 Mon Sep 17 00:00:00 2001 From: nick Date: Fri, 4 Oct 2024 11:00:23 -0500 Subject: [PATCH 1/3] YAML file parsing for LDMSD --- ldms/python/ldmsd/Makefile.am | 4 +- ldms/python/ldmsd/hostlist.py | 414 +++++++++++++ ldms/python/ldmsd/ldmsd_yaml_parser | 916 ++++++++++++++++++++++++++++ ldms/python/ldmsd/parser_util.py | 176 ++++++ ldms/src/ldmsd/ldmsd.c | 56 +- ldms/src/ldmsd/ldmsd.h | 4 + ldms/src/ldmsd/ldmsd_config.c | 249 +++++++- 7 files changed, 1813 insertions(+), 6 deletions(-) create mode 100644 ldms/python/ldmsd/hostlist.py create mode 100755 ldms/python/ldmsd/ldmsd_yaml_parser create mode 100644 ldms/python/ldmsd/parser_util.py diff --git a/ldms/python/ldmsd/Makefile.am b/ldms/python/ldmsd/Makefile.am index f4dd661dd..bdcfaf1c9 100644 --- a/ldms/python/ldmsd/Makefile.am +++ b/ldms/python/ldmsd/Makefile.am @@ -1,3 +1,3 @@ pkgpythondir=${pythondir}/ldmsd -pkgpython_PYTHON = __init__.py ldmsd_setup.py ldmsd_util.py ldmsd_communicator.py ldmsd_config.py -dist_bin_SCRIPTS = ldmsd_controller +pkgpython_PYTHON = __init__.py ldmsd_setup.py ldmsd_util.py ldmsd_communicator.py ldmsd_config.py parser_util.py hostlist.py +dist_bin_SCRIPTS = ldmsd_controller ldmsd_yaml_parser diff --git a/ldms/python/ldmsd/hostlist.py b/ldms/python/ldmsd/hostlist.py new file mode 100644 index 000000000..5797fccd5 --- /dev/null +++ b/ldms/python/ldmsd/hostlist.py @@ -0,0 +1,414 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +# +# Hostlist library +# +# Copyright (C) 2008-2018 +# Kent Engström , +# Thomas Bellman , +# Pär Lindfors and +# Torbjörn Lönnemark , +# National Supercomputer Centre +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA +# 02110-1301, USA. + +"""Handle hostlist expressions. + +This module provides operations to expand and collect hostlist +expressions. + +The hostlist expression syntax is the same as in several programs +developed at LLNL (https://computing.llnl.gov/linux/). However in +corner cases the behaviour of this module have not been compared for +compatibility with pdsh/dshbak/SLURM et al. +""" + +__version__ = "1.20" + +import re +import itertools + +# Replace range with xrange on Python 2, do nothing on Python 3 (where xrange +# does not exist, and range returns an iterator) +try: + range = xrange +except: + pass + +# Exception used for error reporting to the caller +class BadHostlist(Exception): pass + +# Configuration to guard against ridiculously long expanded lists +MAX_SIZE = 100000 + +# Hostlist expansion + +def expand_hostlist(hostlist, allow_duplicates=False, sort=False): + """Expand a hostlist expression string to a Python list. + + Example: expand_hostlist("n[9-11],d[01-02]") ==> + ['n9', 'n10', 'n11', 'd01', 'd02'] + + Unless allow_duplicates is true, duplicates will be purged + from the results. If sort is true, the output will be sorted. + """ + + results = [] + bracket_level = 0 + part = "" + for c in hostlist + ",": + if c == "," and bracket_level == 0: + # Comma at top level, split! + if part: results.extend(expand_part(part)) + part = "" + bad_part = False + else: + part += c + + if c == "[": bracket_level += 1 + elif c == "]": bracket_level -= 1 + + if bracket_level > 1: + raise BadHostlist("nested brackets") + elif bracket_level < 0: + raise BadHostlist("unbalanced brackets") + + if bracket_level > 0: + raise BadHostlist("unbalanced brackets") + + if not allow_duplicates: + results = remove_duplicates(results) + if sort: + results = numerically_sorted(results) + return results + +def expand_part(s): + """Expand a part (e.g. "x[1-2]y[1-3][1-3]") (no outer level commas).""" + + # Base case: the empty part expand to the singleton list of "" + if s == "": + return [""] + + # Split into: + # 1) prefix string (may be empty) + # 2) rangelist in brackets (may be missing) + # 3) the rest + + m = re.match(r'([^,\[]*)(\[[^\]]*\])?(.*)', s) + (prefix, rangelist, rest) = m.group(1,2,3) + + # Expand the rest first (here is where we recurse!) + rest_expanded = expand_part(rest) + + # Expand our own part + if not rangelist: + # If there is no rangelist, our own contribution is the prefix only + us_expanded = [prefix] + else: + # Otherwise expand the rangelist (adding the prefix before) + us_expanded = expand_rangelist(prefix, rangelist[1:-1]) + + # Combine our list with the list from the expansion of the rest + # (but guard against too large results first) + if len(us_expanded) * len(rest_expanded) > MAX_SIZE: + raise BadHostlist("results too large") + + return [us_part + rest_part + for us_part in us_expanded + for rest_part in rest_expanded] + +def expand_rangelist(prefix, rangelist): + """ Expand a rangelist (e.g. "1-10,14"), putting a prefix before.""" + + # Split at commas and expand each range separately + results = [] + for range_ in rangelist.split(","): + results.extend(expand_range(prefix, range_)) + return results + +def expand_range(prefix, range_): + """ Expand a range (e.g. 1-10 or 14), putting a prefix before.""" + + # Check for a single number first + m = re.match(r'^[0-9]+$', range_) + if m: + return ["%s%s" % (prefix, range_)] + + # Otherwise split low-high + m = re.match(r'^([0-9]+)-([0-9]+)$', range_) + if not m: + raise BadHostlist("bad range") + + (s_low, s_high) = m.group(1,2) + low = int(s_low) + high = int(s_high) + width = len(s_low) + + if high < low: + raise BadHostlist("start > stop") + elif high - low > MAX_SIZE: + raise BadHostlist("range too large") + + results = [] + for i in range(low, high+1): + results.append("%s%0*d" % (prefix, width, i)) + return results + +def remove_duplicates(l): + """Remove duplicates from a list (but keep the order).""" + seen = set() + results = [] + for e in l: + if e not in seen: + results.append(e) + seen.add(e) + return results + +# Hostlist collection + +def collect_hostlist(hosts, silently_discard_bad = False): + """Collect a hostlist string from a Python list of hosts. + + We start grouping from the rightmost numerical part. + Duplicates are removed. + + A bad hostname raises an exception (unless silently_discard_bad + is true causing the bad hostname to be silently discarded instead). + """ + + # Split hostlist into a list of (host, "") for the iterative part. + # (Also check for bad node names now) + # The idea is to move already collected numerical parts from the + # left side (seen by each loop) to the right side (just copied). + + left_right = [] + for host in hosts: + # We remove leading and trailing whitespace first, and skip empty lines + host = host.strip() + if host == "": continue + + # We cannot accept a host containing any of the three special + # characters in the hostlist syntax (comma and flat brackets) + if re.search(r'[][,]', host): + if silently_discard_bad: + continue + else: + raise BadHostlist("forbidden character") + + left_right.append((host, "")) + + # Call the iterative function until it says it's done + looping = True + while looping: + left_right, looping = collect_hostlist_1(left_right) + return ",".join([left + right for left, right in left_right]) + +def collect_hostlist_1(left_right): + """Collect a hostlist string from a list of hosts (left+right). + + The input is a list of tuples (left, right). The left part + is analyzed, while the right part is just passed along + (it can contain already collected range expressions). + """ + + # Scan the list of hosts (left+right) and build two things: + # *) a set of all hosts seen (used later) + # *) a list where each host entry is preprocessed for correct sorting + + sortlist = [] + remaining = set() + for left, right in left_right: + host = left + right + remaining.add(host) + + # Match the left part into parts + m = re.match(r'^(.*?)([0-9]+)?([^0-9]*)$', left) + (prefix, num_str, suffix) = m.group(1,2,3) + + # Add the right part unprocessed to the suffix. + # This ensures than an already computed range expression + # in the right part is not analyzed again. + suffix = suffix + right + + if num_str is None: + # A left part with no numeric part at all gets special treatment! + # The regexp matches with the whole string as the suffix, + # with nothing in the prefix or numeric parts. + # We do not want that, so we move it to the prefix and put + # None as a special marker where the suffix should be. + assert prefix == "" + sortlist.append(((host, None), None, None, host)) + else: + # A left part with at least an numeric part + # (we care about the rightmost numeric part) + num_int = int(num_str) + num_width = len(num_str) # This width includes leading zeroes + sortlist.append(((prefix, suffix), num_int, num_width, host)) + + # Sort lexicographically, first on prefix, then on suffix, then on + # num_int (numerically), then... + # This determines the order of the final result. + + sortlist.sort() + + # We are ready to collect the result parts as a list of new (left, + # right) tuples. + + results = [] + needs_another_loop = False + + # Now group entries with the same prefix+suffix combination (the + # key is the first element in the sortlist) to loop over them and + # then to loop over the list of hosts sharing the same + # prefix+suffix combination. + + for ((prefix, suffix), group) in itertools.groupby(sortlist, + key=lambda x:x[0]): + + if suffix is None: + # Special case: a host with no numeric part + results.append(("", prefix)) # Move everything to the right part + remaining.remove(prefix) + else: + # The general case. We prepare to collect a list of + # ranges expressed as (low, high, width) for later + # formatting. + range_list = [] + + for ((prefix2, suffix2), num_int, num_width, host) in group: + if host not in remaining: + # Below, we will loop internally to enumate a whole range + # at a time. We then remove the covered hosts from the set. + # Therefore, skip the host here if it is gone from the set. + continue + assert num_int is not None + + # Scan for a range starting at the current host + low = num_int + while True: + host = "%s%0*d%s" % (prefix, num_width, num_int, suffix) + if host in remaining: + remaining.remove(host) + num_int += 1 + else: + break + high = num_int - 1 + assert high >= low + range_list.append((low, high, num_width)) + + # We have a list of ranges to format. We make sure + # we move our handled numerical part to the right to + # stop it from being processed again. + needs_another_loop = True + if len(range_list) == 1 and range_list[0][0] == range_list[0][1]: + # Special case to make sure that n1 is not shown as n[1] etc + results.append((prefix, + "%0*d%s" % + (range_list[0][2], range_list[0][0], suffix))) + else: + # General case where high > low + results.append((prefix, "[" + \ + ",".join([format_range(l, h, w) + for l, h, w in range_list]) + \ + "]" + suffix)) + + # At this point, the set of remaining hosts should be empty and we + # are ready to return the result, together with the flag that says + # if we need to loop again (we do if we have added something to a + # left part). + assert not remaining + return results, needs_another_loop + +def format_range(low, high, width): + """Format a range from low to high inclusively, with a certain width.""" + + if low == high: + return "%0*d" % (width, low) + else: + return "%0*d-%0*d" % (width, low, width, high) + +# Sort a list of hosts numerically + +def numerically_sorted(l): + """Sort a list of hosts numerically. + + E.g. sorted order should be n1, n2, n10; not n1, n10, n2. + """ + + return sorted(l, key=numeric_sort_key) + +numeric_sort_key_regexp = re.compile("([0-9]+)|([^0-9]+)") +def numeric_sort_key(x): + """Compose a sorting key to compare strings "numerically": + + We split numerical (integer) and non-numerical parts into a list, + making sure that the numerical parts are converted to Python ints, + and then sort on the lists. Thus, if we sort x10y and x9z8, we will + compare ["x", 10, "y"] with ["x", 9, "x", "8"] and return x9z8 + before x10y". + + Python 3 complication: We cannot compare int and str, so while we can + compare x10y and x9z8, we cannot compare x10y and 9z8. Kludge: insert + a blank string first if the list would otherwise start with an integer. + This will give the same ordering as before, as integers seem to compare + smaller than strings in Python 2. + """ + + keylist = [int(i_ni[0]) if i_ni[0] else i_ni[1] + for i_ni in numeric_sort_key_regexp.findall(x)] + if keylist and isinstance(keylist[0], int): + keylist.insert(0, "") + return keylist + +# Parse SLURM_TASKS_PER_NODE into a list of task numbers +# +# Description from the SLURM sbatch man page: +# Number of tasks to be initiated on each node. Values +# are comma separated and in the same order as +# SLURM_NODELIST. If two or more consecutive nodes are +# to have the same task count, that count is followed by +# "(x#)" where "#" is the repetition count. For example, +# "SLURM_TASKS_PER_NODE=2(x3),1" indicates that the first +# three nodes will each execute three tasks and the +# fourth node will execute one task. + +def parse_slurm_tasks_per_node(s): + res = [] + for part in s.split(","): + m = re.match(r'^([0-9]+)(\(x([0-9]+)\))?$', part) + if m: + tasks = int(m.group(1)) + repetitions = m.group(3) + if repetitions is None: + repetitions = 1 + else: + repetitions = int(repetitions) + if repetitions > MAX_SIZE: + raise BadHostlist("task list repetitions too large") + for i in range(repetitions): + res.append(tasks) + else: + raise BadHostlist("bad task list syntax") + return res + +# +# Keep this part to tell users where the command line interface went +# + +if __name__ == '__main__': + import os, sys + sys.stderr.write("The command line utility has been moved to a separate 'hostlist' program.\n") + sys.exit(os.EX_USAGE) diff --git a/ldms/python/ldmsd/ldmsd_yaml_parser b/ldms/python/ldmsd/ldmsd_yaml_parser new file mode 100755 index 000000000..90d5c70c7 --- /dev/null +++ b/ldms/python/ldmsd/ldmsd_yaml_parser @@ -0,0 +1,916 @@ +#!/usr/bin/env python3 +import os, sys +import errno +import yaml +import argparse +import json +import subprocess +import socket +import time +import itertools as it +from collections.abc import Mapping, Sequence +from ldmsd.parser_util import * + +class ClusterCtrl(object): + def emit_value(self, path, value): + try: + res = client.put(path, str(value)) + except Exception as e: + print("Error {0} setting {1} : {2}".format(str(e), path, str(value))) + + def check_key(self, key): + # Handle forward slash in keys e.g. endpoints containing "/" in the name + try: + if '/' in key: + print(f'Error: "/" is not a supported character in key name {key}') + sys.exit(1) + return key + except Exception as e: + print(str(e)) + + def walk(self, obj, path=''): + if obj is None: + if path.split("/")[-1] in CORE_ATTRS: + print(f'{path.split("/")[-1]} not present in ldms yaml configuration file.\nContinuing..') + elif isinstance(obj, Mapping): + for key in obj: + safe_key = self.check_key(key) + self.walk(obj[key], '{0}/{1}'.format(path, safe_key)) + elif isinstance(obj, Sequence): + if isinstance(obj, (str, bytearray)): + self.emit_value(path, obj) + else: + item = 0 + for v in obj: + # we want keys to be returned in numerical order which requires z-fill + self.walk(v, path + '/{0:06}'.format(item)) + item += 1 + elif obj: + self.emit_value(path, obj) + + def build_daemons(self, config): + """Generate a daemon spec list from YAML config + + Builds a dictionary of endpoint definitions. The 'config' is a + list of daemon specifications. Each daemon specification contains + 'names', 'host', and 'endpoint' attributes. All attributes are + expanded per the slurm hostlist rules. The length of the + expanded name-list, must equal the length of the expanded + host-list. + + Example: + + daemons: + - names : "agg-[1-10]" + hosts : "node[1-10]" + endpoints : + - names : "node-[1-10]-[10002] + ports : "[10002]" + maestro_comm : True + xprt : sock + auth : + name : munge + plugin : munge + + results in the following host-spec dictionary: + + { + "agg-[[1-3]-[10002]" : { + "agg-1" : { + "endpoints": { + "node-1-10002" : { "host" : "node-1", "port" : 10002 }, + "node-2-10002" : { "host" : "node-2", "port" : 10002 }, + "node-3-10002" : { "host" : "node-3", "port" : 10002 }, + ... + } + } + } + + """ + ep_dict = {} + node_config = config['daemons'] + if type(node_config) is not list: + print(f'{LDMS_YAML_ERR}') + print(f'daemons {LIST_ERR}') + print(f'e.g. daemons:') + print(f' - names : &l1-agg "l1-aggs-[1-8]"') + print(f' hosts : &l1-agg-hosts "node-[1-8]"') + sys.exit() + for spec in node_config: + check_required([ 'names', 'endpoints', 'hosts' ], + spec, '"daemons" entry') + hosts = expand_names(spec['hosts']) + dnames = expand_names(spec['names']) + hostnames = hosts + if len(dnames) != len(hostnames): + hosts = [ [host]*(len(dnames)//len(hostnames)) for host in hostnames ] + hosts = list(it.chain.from_iterable(hosts)) + ep_names = [] + ep_ports = [] + if type(spec['endpoints']) is not list: + print(f'{LDMS_YAML_ERR}') + print(f'endpoints {LIST_ERR}') + print(f'e.g endpoints :') + print(f' - names : &l1-agg-endpoints "node-[1-8]-[10101]"') + print(f' ports : &agg-ports "[10101]"') + print(f' maestro_comm : True') + print(f' xprt : sock') + print(f' auth :') + print(f' name : munge1') + print(f' plugin : munge') + sys.exit() + for endpoints in spec['endpoints']: + check_required(['names','ports'], + endpoints, '"endpoints" entry') + cur_epnames = expand_names(endpoints['names']) + ep_names.append(cur_epnames) + cur_ports = expand_names(endpoints['ports']) + _ports = cur_ports + if len(cur_ports) != len(cur_epnames): + cur_ports = [ _ports for i in range(0, len(cur_epnames)//len(_ports)) ] + cur_ports = list(it.chain.from_iterable(cur_ports)) + ep_ports.append(cur_ports) + ep_dict[spec['names']] = {} + env = check_opt('environment', spec) + for dname, host in zip(dnames, hosts): + ep_dict[spec['names']][dname] = {} + ep_dict[spec['names']][dname]['addr'] = host + ep_dict[spec['names']][dname]['environment'] = env + ep_dict[spec['names']][dname]['endpoints'] = {} + for ep_, ep_port, ep in zip(ep_names, ep_ports, spec['endpoints']): + port = ep_port.pop(0) + ep_name = ep_.pop(0) + xprt = check_opt('xprt', ep) + auth_name = check_opt('auth', ep) + auth_conf = check_opt('conf', ep) + plugin = check_opt('plugin', ep['auth']) + maestro_comm = parse_yaml_bool(check_opt('maestro_comm', ep)) + h = { + 'name' : ep_name, + 'port' : port, + 'xprt' : xprt, + 'maestro_comm' : maestro_comm, + 'auth' : { 'name' : auth_name, 'conf' : auth_conf, 'plugin' : plugin } + } + ep_dict[spec['names']][dname]['endpoints'][ep_name] = h + ep_dict[spec['names']][dname]['addr'] = host + if len(ep_dict[spec['names']]) == 0: + print(f'Error processing regex of hostnames {spec["hosts"]} and daemons {spec["names"]}.'\ + f'Number of hosts must be a multiple of daemons with appropriate ports or equivalent to length of daemons.\n'\ + f'Regex {spec["hosts"]} translates to {len(hostnames)} hosts\n'\ + f'Regex {spec["names"]} translates to {len(dnames)} daemons\n') + sys.exit() + return ep_dict + + def build_aggregators(self, config): + aggregators = {} + if 'aggregators' not in config: + return aggregators + agg_conf = config['aggregators'] + if type(agg_conf) is not list: + print(f'{LDMS_YAML_ERR}') + print(f'aggregators {LIST_ERR}') + print(f'e.g. aggregators:') + print(f' - daemons: "l1-aggregators"') + print(f' peers :') + print(f' - daemons : "samplers"') + print(f' ... : ...') + return aggregators + for agg_spec in agg_conf: + check_required([ 'daemons' ], + agg_spec, '"aggregators" entry') + names = expand_names(agg_spec['daemons']) + group = agg_spec['daemons'] + plugins = check_opt('plugins', agg_spec) + if plugins: + if plugins is not list: + print(f'Error: "plugins" must be a list of plugin instance names"\n') + for plugin in plugins: + check_plugin_config(plugin, self.plugins) + daemons_ = None + for daemons in config['daemons']: + if group == daemons['names']: + daemons_ = daemons + if daemons_ is None: + raise ValueError(f"No daemons matched matched daemon key {group}") + if group not in aggregators: + aggregators[group] = {} + subscribe = check_opt('subscribe', agg_spec) + if subscribe: + for stream in subscribe: + check_required([ 'stream', 'regex' ], stream, "stream specification") + for name in names: + aggregators[group][name] = { 'state' : 'stopped' } # 'running', 'error' + if subscribe: + aggregators[group][name]['subscribe'] = subscribe + if plugins: + aggregators[group][name]['plugins'] = plugins + return aggregators + + def build_producers(self, config): + """ + Return a dictionary keyed by the group name. Each dictionary + entry is a list of producers in that group. + """ + producers = {} + for agg in config.get('aggregators', []): + if 'peers' not in agg: + continue + if type(agg['peers']) is not list: + print(f'{LDMS_YAML_ERR}') + print(f'peers {LIST_ERR}') + print(f'e.g. peers:') + print(f' - daemons: "samplers"') + print(f' endpoints : "sampler-endpoints"') + print(f' ... : ...') + continue + for prod in agg['peers']: + check_required([ 'endpoints', 'updaters', + 'reconnect', 'type', ], + prod, '"peers" entry') + # Use endpoints for producer names and remove names attribute? + if prod['daemons'] not in self.daemons: + dmn_grps = prod['daemons'].split(',') + eps = prod['endpoints'].split(',') + else: + dmn_grps = [ prod['daemons'] ] + eps = [ prod['endpoints'] ] + for daemons, endpoints in zip(dmn_grps, eps): + names = expand_names(endpoints) + endpoints = expand_names(endpoints) + group = agg['daemons'] + smplr_dmns = expand_names(daemons) + if group not in producers: + producers[group] = {} + + upd_spec = prod['updaters'] + # Expand and generate all the producers + typ = prod['type'] + reconnect = check_intrvl_str(prod['reconnect']) + ports_per_dmn = len(endpoints) / len(smplr_dmns) + ppd = ports_per_dmn + try: + for name in names: + if ppd > 1: + smplr_dmn = smplr_dmns[0] + ppd -= 1 + else: + smplr_dmn = smplr_dmns.pop(0) + ppd = ports_per_dmn + endpoint = endpoints.pop(0) + prod = { + 'daemon' : smplr_dmn, + 'dmn_grp' : daemons, + 'name' : name, + 'endpoint' : endpoint, + 'type' : typ, + 'group' : group, + 'reconnect' : reconnect, + 'updaters' : upd_spec + } + producers[group][endpoint] = prod + except: + print(f'Error building producer config:\n'\ + f'Please ensure "endpoints" is configured to the correct number of ports specified.') + return producers + + def build_updaters(self, config): + """ + Return a dictionary based on the aggregator. Each dictionary + entry is a list of updaters in that group. + """ + updaters = {} + updtr_cnt = 0 + for agg in config.get('aggregators', []): + if 'peers' not in agg: + continue + for prod in agg['peers']: + if type(prod['updaters']) is not list: + print(f'Error parsing ldms_config yaml file') + print(f'Updater spec must be a list of dictionaries, specified with "-" designator in the ldms_config yaml file') + print(f'e.g. updaters:') + print(f' - mode : pull') + print(f' interval : "1.0s"') + print(f' sets :') + print(f' - regex : ".*"') + print(f' field : inst') + continue + for updtr_spec in prod['updaters']: + check_required([ 'interval', 'sets', ], + updtr_spec, '"updaters" entry') + group = agg['daemons'] + if group not in updaters: + updaters[group] = {} + grp_updaters = updaters[group] + updtr_name = f'updtr_{updtr_cnt}' + if updtr_name in grp_updaters: + raise ValueError(f"Duplicate updater name '{updtr_name}''. "\ + f"An updater name must be unique within the group") + updtr = { + 'name' : updtr_name, + 'interval' : check_intrvl_str(updtr_spec['interval']), + 'group' : agg['daemons'], + 'sets' : updtr_spec['sets'], + 'producers' : [{ 'regex' : '.*' }] + } + if 'offset' in updtr_spec: + updtr['offset'] = check_intrvl_str(updtr_spec['offset']) + if 'mode' in updtr_spec: + updtr['mode'] = updtr_spec['mode'] + else: + updtr['mode'] = 'pull' + grp_updaters[updtr_name] = updtr + updtr_cnt += 1 + return updaters + + def build_stores(self, config): + """ + Return a dictionary keyed by the group name. Each dictionary + entry is a list of stores in that group. + """ + if 'stores' not in config: + return None + stores = {} + if type(config['stores']) is not dict: + print(f'{LDMS_YAML_ERR}') + print(f'store {DICT_ERR}') + print(f'e.g. stores:') + print(f' sos-meminfo :') + print(f' daemons : "l1-aggregators"') + print(f' container : ldms_data') + print(f' ... : ...') + return None + for store in config['stores']: + store_spec = config['stores'][store] + check_required([ 'plugin', 'container' ], + store_spec, '"store" entry') + decomp = check_opt('decomp', store_spec) + decomposition = check_opt('decomposition', store_spec) + if not decomp and not decomposition: + check_required(['schema'], store_spec, '"store" entry') + schema = check_opt('schema', store_spec) + regex = check_opt('regex', store_spec) + if decomp and not schema and not regex: + raise ValueError("Decomposition plugin configuration requires either" + " 'schema' or 'regex' attribute'") + group = store_spec['daemons'] + if group not in stores: + stores[group] = {} + grp_stores = stores[group] + if store in grp_stores: + raise ValueError(f"Duplicate store name '{store}'. " + "A store name must be unique within the group") + check_opt('flush', store_spec) + check_plugin_config(store_spec['plugin'], self.plugins) + grp_stores[store] = store_spec + return stores + + def build_samplers(self, config): + """ + Generate samplers from YAML config. + Return a dictionary keyed by the samplers group name. Each dictionary + entry is a single ldms daemon's sampler configuration. + """ + if 'samplers' not in config: + return None + smplrs = {} + if type(config['samplers']) is not list: + print(f'{LDMS_YAML_ERR}') + print(f'samplers {LIST_ERR}') + print(f'e.g. samplers:') + print(f' - daemons : "samplers"') + print(f' plugins :') + print(f' - name : meminfo') + print(f' interval : "1.0s"') + print(f' offset : "0s"') + print(f' config :') + print(f' - schema : meminfo') + print(f' component_id : "10001"') + print(f' producer : "node-1"') + print(f' perm : "0777"') + return None + for smplr_spec in config['samplers']: + check_required([ 'daemons', 'plugins' ], + smplr_spec, '"sampler" entry') + for plugin in smplr_spec['plugins']: + check_plugin_config(plugin, self.plugins) + smplrs[smplr_spec['daemons']] = smplr_spec + return smplrs + + def build_plugins(self, config): + """ + Generate plugins to load from a YAML config. + Return a dictionary keyed by the plugin's group name. Each dictionary entry + is a single plugin's configuration. + """ + if 'plugins' not in config: + return None + plugins = {} + plugn_spec = config['plugins'] + for plugn in plugn_spec: + if plugn in plugins: + raise ValueError(f'Duplicate plugin name "{plugin_name}". ' + f'Plugin must be unique within a group.') + check_plugin_config(plugn, plugn_spec) + plugins[plugn] = plugn_spec[plugn] + return plugins + + def __init__(self, client, name, cluster_config, args): + """ + """ + self.client = client + self.name = name + self.args = args + self.cluster_config = cluster_config + self.daemons = self.build_daemons(cluster_config) + self.plugins = self.build_plugins(cluster_config) + self.aggregators = self.build_aggregators(cluster_config) + self.producers = self.build_producers(cluster_config) + self.updaters = self.build_updaters(cluster_config) + self.stores = self.build_stores(cluster_config) + self.samplers = self.build_samplers(cluster_config) + + def commit(self): + pass + + def save_config(self): + try: + self.client.delete_prefix('/' + self.name) + self.walk(self.daemons, '/' + self.name + '/daemons') + self.walk(self.aggregators, '/' + self.name + '/aggregators') + self.walk(self.producers, '/' + self.name + '/producers') + self.walk(self.updaters, '/' + self.name + '/updaters') + self.walk(self.stores, '/' + self.name + '/stores') + self.walk(self.samplers, '/' + self.name + '/samplers') + self.walk(self.plugins, '/' + self.name + '/plugins') + self.client.put('/'+self.name+'/last_updated', str(time.time())) + except Exception as e: + a, b, c = sys.exc_info() + print(str(e)+' '+str(c.tb_lineno)) + return 1 + + def local_mode(self, local_path): + # Local mode uses hostname to help identify which daemon(s) to start + hostname = socket.gethostname() + local_list = {} + fd = None + match_host = False + for dmn_grp in self.daemons: + for dmn in self.daemons[dmn_grp]: + auth_list = {} + if hostname == self.daemons[dmn_grp][dmn]['addr']: + match_host = True + local_list[dmn] = self.daemons[dmn_grp][dmn] + local_list[dmn]['dmn_grp'] = dmn_grp + if dmn_grp in self.aggregators and dmn in self.aggregators[dmn_grp]: + try: + fd = open(f'{local_path}/{dmn}.conf', 'w+') + dstr, auth_list = self.write_listeners(dstr, dmn_grp, dmn, auth_list) + dstr, auth_list = self.write_producers(dstr, dmn_grp, dmn, auth_list) + dstr = self.write_stream_subscribe(dstr, dmn_grp, dmn) + dstr = self.write_agg_plugins(dstr, dmn_grp, dmn) + dstr = self.write_updaters(dstr, dmn_grp) + dstr = self.write_stores(dstr, dmn_grp) + fd.write(dstr) + except Exception as e: + print(f'Error {e}: writing ldms configuration files') + if fd: + fd.close() + if dmn_grp in self.samplers and dmn in self.samplers[dmn_grp]: + fd = open(f'{local_path}/{dmn}.conf', 'w+') + # TO DO: Refactor sampler config architecture to more easily reference appropriate groups + dstr = '' + dstr = self.write_samplers(dstr, dmn_grp) + dstr, auth_list = self.write_listeners(dstr, dmn_grp, dmn, auth_list) + fd.write(dstr) + if fd: + fd.close() + print(f'Starting {dmn}') + start_args = self.ldmsd_arg_list(local_path, dmn_grp, dmn) + local_list[dmn] = subprocess.Popen(start_args, stdin=subprocess.PIPE, stdout=subprocess.PIPE) + if match_host is False: + print(f'{hostname} does not match any daemon hosts in the ldms configuration file') + for dmn in local_list: + local_list[dmn].wait() + + def ldmsd_arg_list(self, local_path, dmn_grp, dmn): + start_list = [ 'ldmsd' ] + for ep in self.daemons[dmn_grp][dmn]['endpoints']: + if self.daemons[dmn_grp][dmn]['endpoints'][ep]['maestro_comm'] is True: + ep_ = self.daemons[dmn_grp][dmn]['endpoints'][ep] + start_list.append('-x') + start_list.append(f'{ep_["xprt"]}:{ep_["port"]}') + auth = check_opt('auth', ep_) + if auth: + auth_plugin = check_opt('plugin', ep_['auth']) + auth_opt = check_opt('conf', ep_) + start_list.append('-a') + start_list.append(auth_plugin) + if auth_opt: + if len(auth_opt.split('=')) < 2: + auth_opt = f'conf={auth_opt}' + start_list.append('-A') + start_list.append(auth_opt) + start_list.append('-c') + start_list.append(f'{local_path}/{dmn}.conf') + start_list.append('-r') + start_list.append(f'{local_path}/{dmn}.pid') + start_list.append('-l') + start_list.append(f'{local_path}/{dmn}.log') + start_list.append(f'-F') + return start_list + + def write_listeners(self, dstr, dmn_grp, dmn_name, auth_list={}): + for endp in self.daemons[dmn_grp][dmn_name]['endpoints']: + ep = self.daemons[dmn_grp][dmn_name]['endpoints'][endp] + auth, plugin, auth_opt = check_auth(ep) + if auth: + if auth not in auth_list: + auth_list[auth] = { 'conf' : auth_opt } + dstr += f'auth_add name={auth}' + dstr = self.write_opt_attr(dstr, 'plugin', plugin, endline=False) + dstr = self.write_opt_attr(dstr, 'conf', auth_opt) + dstr += f'listen xprt={ep["xprt"]} port={ep["port"]}' + dstr = self.write_opt_attr(dstr, 'auth', auth, endline=False) + dstr = self.write_opt_attr(dstr, 'conf', auth_opt) + return dstr, auth_list + + def write_opt_attr(self, dstr, attr, val, endline=True): + # Include leading space + if val is not None: + dstr += f' {attr}={val}' + if endline: + dstr += f'\n' + return dstr + + def write_producers(self, dstr, group_name, dmn, auth_list): + if group_name in self.producers: + ''' Balance samplers across aggregators ''' + ppd = -(len(self.producers[group_name]) // -len(self.aggregators[group_name].keys())) + rem = len(self.producers[group_name]) % len(self.aggregators[group_name].keys()) + prdcrs = list(self.producers[group_name].keys()) + aggs = list(self.daemons[group_name].keys()) + agg_idx = int(aggs.index(dmn)) + prdcr_idx = int(ppd * agg_idx) + prod_group = prdcrs[prdcr_idx:prdcr_idx+ppd] + i = 0 + auth = None + for ep in prod_group: + producer = self.producers[group_name][ep] + auth = check_opt('auth', self.daemons[producer['dmn_grp']][producer['daemon']]['endpoints'][ep]) + auth_opt = check_opt('conf', self.daemons[producer['dmn_grp']][producer['daemon']]['endpoints'][ep]) + if auth not in auth_list: + auth_list[auth] = { 'conf' : auth_opt } + plugin = check_opt('plugin', self.daemons[producer['dmn_grp']][producer['daemon']]['endpoints'][ep]['auth']) + if plugin is None: + print(f'Please specify auth plugin type for producer "{producer["daemon"]}" with auth name "{auth}"\n'\ + 'configuration file generation will continue, but auth will likely be denied.\n') + plugin = auth + dstr += f'auth_add name={auth} plugin={plugin}' + dstr = self.write_opt_attr(dstr, 'conf', auth_list[auth]['conf']) + for ep in prod_group: + regex = False + producer = self.producers[group_name][ep] + pname = producer['name'] + port = self.daemons[producer['dmn_grp']][producer['daemon']]['endpoints'][ep]['port'] + xprt = self.daemons[producer['dmn_grp']][producer['daemon']]['endpoints'][ep]['xprt'] + hostname = self.daemons[producer['dmn_grp']][producer['daemon']]['addr'] + auth = check_opt('auth', self.daemons[producer['dmn_grp']][producer['daemon']]['endpoints'][ep]) + ptype = producer['type'] + reconnect = producer['reconnect'] + dstr += f'prdcr_add name={pname} '\ + f'host={hostname} '\ + f'port={port} '\ + f'xprt={xprt} '\ + f'type={ptype} '\ + f'reconnect={reconnect}' + dstr = self.write_opt_attr(dstr, 'auth', auth) + last_sampler = pname + if 'regex' in producer: + regex = True + dstr += f'prdcr_start_regex regex={producer["regex"]}\n' + if not regex: + dstr += f'prdcr_start_regex regex=.*\n' + return dstr, auth_list + + def write_env(self, dstr, grp, dname): + if grp not in self.daemons: + return 1 + if dname not in self.daemons[grp]: + return 1 + if check_opt('environment', self.daemons[grp][dname]): + if type(self.daemons[grp][dname]['environment']) is not dict: + print(f'Error: Environment variables must be a yaml key:value dictionary\n') + sys.exit() + for attr in self.daemons[grp][dname]['environment']: + dstr += f'env {attr}={self.daemons[grp][dname]["environment"]}\n' + return dstr + + def write_sampler(self, dstr, smplr_grp, sname): + if smplr_grp not in self.samplers: + return dstr + dstr = self.write_env(dstr, smplr_grp, sname) + dstr, auth_list = self.write_listeners(dstr, smplr_grp, sname) + for plugin in self.samplers[smplr_grp]['plugins']: + plugn = self.plugins[plugin] + dstr += f'load name={plugn["name"]}\n' + for cfg_ in plugn['config']: + if type(cfg_) is dict: + hostname = socket.gethostname() + cfg_args = {} + prod = check_opt('producer', cfg_) + inst = check_opt('instance', cfg_) + if not prod: + cfg_args['producer'] = f'{hostname}' + if not inst: + cfg_args['instance'] = f'{hostname}/{plugn["name"]}' + for attr in cfg_: + if attr == 'name' or attr == 'interval': + continue + cfg_args[attr] = cfg_[attr] + cfg_str = parse_to_cfg_str(cfg_args) + else: + cfg_str = cfg_ + + interval = check_intrvl_str(plugn['interval']) + dstr += f'config name={plugn["name"]} {cfg_str}\n' + dstr += f'start name={plugn["name"]} interval={interval}' + offset = check_opt('offset', plugn) + dstr = self.write_opt_attr(dstr, 'offset', offset) + return dstr + + def write_samplers(self, dstr, smplr_group): + for inst_name in self.samplers[smplr_group]['plugins']: + plugin = self.plugins[inst_name] + sname = plugin['name'] + dstr += f'load name={sname}\n' + for cfg_ in plugin['config']: + if type(cfg_) is dict: + hostname = socket.gethostname() + if args.local: + cfg_args = { 'producer' : f'{hostname}', + 'instance' : f'{hostname}/{plugin["name"]}', + 'component_id' : '${LDMS_COMPONENT_ID}' } + else: + cfg_args = {} + prod = check_opt('producer', cfg_) + inst = check_opt('instance', cfg_) + if not prod: + cfg_args['producer'] = '{hostname}' + if not inst: + cfg_args['instance'] = '{hostname}/{plugin["name"]}' + for attr in cfg_: + if attr == 'name' or attr == 'interval': + continue + cfg_args[attr] = cfg_[attr] + cfg_str = parse_to_cfg_str(cfg_args) + else: + cfg_str = cfg_ + + interval = check_intrvl_str(plugin['interval']) + dstr += f'config name={sname} {cfg_str}\n' + dstr += f'start name={sname} interval={interval}' + offset = check_opt('offset', plugin) + dstr = self.write_opt_attr(dstr, 'offset', offset) + return dstr + + def write_stream_subscribe(self, dstr, group_name, agg): + subscribe = check_opt('subscribe', self.aggregators[group_name][agg]) + if subscribe: + for stream in subscribe: + regex = check_opt('regex', stream) + if regex is None: + regex = '.*' + dstr += f'prdcr_subscribe stream={stream["stream"]} '\ + f'regex={regex}\n' + return dstr + + def write_aggregator(self, dstr, group_name, dmn): + # Agg config + try: + ''' "Balance" agg configuration if all samplers are included in each aggregator ''' + if group_name not in self.aggregators: + return dstr + auth_list = {} + dstr, auth_list = self.write_listeners(dstr, group_name, dmn, auth_list) + dstr, auth_list = self.write_producers(dstr, group_name, dmn, auth_list) + dstr = self.write_stream_subscribe(dstr, group_name, dmn) + dstr = self.write_agg_plugins(dstr, group_name, dmn) + dstr = self.write_updaters(dstr, group_name) + dstr = self.write_stores(dstr, group_name) + return dstr + except Exception as e: + ea, eb, ec = sys.exc_info() + print('Agg config Error: '+str(e)+' Line:'+str(ec.tb_lineno)) + raise ValueError + + def write_agg_plugins(self, dstr, group_name, agg): + # Write independent plugin configuration for group + plugins = check_opt('plugins', self.aggregators[group_name][agg]) + if plugins is not None: + for plugn in plugins: + plugin = self.plugins[plugn] + dstr += f'load name={plugin["name"]}\n' + for cfg_ in plugin["config"]: + if type(cfg_) is dict: + cfg_str = parse_to_cfg_str(plugin["config"]) + else: + cfg_str = cfg_ + dstr += f'config name={plugin["name"]} {cfg_str}\n' + return dstr + + def write_updaters(self, dstr, group_name): + if group_name in self.updaters: + updtr_group = self.updaters[group_name] + for updtr in updtr_group: + interval = check_intrvl_str(updtr_group[updtr]['interval']) + updtr_str = f'updtr_add name={updtr_group[updtr]["name"]}' + if 'mode' in updtr_group[updtr]: + mode = updtr_group[updtr]['mode'] + else: + mode = 'pull' + # Check mode + if mode == 'push': + updtr_str = f'{updtr_str} push=True' + elif mode == 'onchange': + updtr_str = f'{updtr_str} push=onchange' + elif mode == 'auto_interval' or 'auto': + updtr_str = f'{updtr_str} auto_interval=True' + dstr += f'{updtr_str} '\ + f'interval={interval}' + offset = check_opt('offset', updtr_group[updtr]) + dstr = self.write_opt_attr(dstr, 'offset', offset) + for prod in updtr_group[updtr]['producers']: + dstr += f'updtr_prdcr_add name={updtr_group[updtr]["name"]} '\ + f'regex={prod["regex"]}\n' + dstr += f'updtr_start name={updtr_group[updtr]["name"]}\n' + return dstr + + def write_stores(self, dstr, group_name): + if group_name in self.stores: + store_group = self.stores[group_name] + loaded_plugins = [] + for store in store_group: + if store_group[store]['plugin'] not in loaded_plugins: + if store_group[store]['plugin'] not in self.plugins: + print(f'Error: Storage policy plugin reference {store_group[store]["plugin"]} '\ + f'is not defined in the top level "plugins" dictionary"\n' + f'Continuing...\n') + continue + plugin = self.plugins[store_group[store]['plugin']] + dstr += f'load name={plugin["name"]}\n' + for cfg_ in plugin['config']: + if type(cfg_) is dict: + cfg_str = parse_to_cfg_str(cfg_) + else: + cfg_str = cfg_ + dstr += f'config name={plugin["name"]} '\ + f'{cfg_str}\n' + loaded_plugins.append(store_group[store]['plugin']) + strgp_add = f'strgp_add name={store} plugin={plugin["name"]} ' + strgp_add += f'container={store_group[store]["container"]} ' + strgp_add += f'schema={store_group[store]["schema"]}' + dstr += strgp_add + flush = check_opt('flush', store_group[store]) + dstr = self.write_opt_attr(dstr, 'flush', flush) + dstr += f'strgp_start name={store}\n' + return dstr + + def daemon_config(self, path, dname): + """ + Write a specific daemon's V4 configuration to file. + """ + dmn = None + grp = None + for dmn_grp in self.daemons: + if dname in self.daemons[dmn_grp]: + dmn = self.daemons[dmn_grp][dname] + grp = dmn_grp + break + if dmn is None: + print(f'Error: {dname} does not exist in YAML configuration file {path}\n') + return 1 + dstr = '' + dstr = self.write_sampler(dstr, grp, dname) + dstr = self.write_aggregator(dstr, grp, dname) + return f'{dstr}\0' + + def config_v4(self, path): + """ + Read the group configuration from ETCD and generate a version 4 LDMSD configuration + This configuration assumes that the environemnt variables COMPONENT_ID, HOSTNAME + all exist on the machines relevant to the ldmsd cluster. + """ + for group_name in self.daemons: + # Sampler config + if self.samplers != None: + try: + # TO DO: Refactor sampler config architecture to more easily reference appropriate groups + if group_name in self.samplers: + fd = open(f'{path}/{group_name}-samplers.conf', 'w+') + dstr = '' + dstr = self.write_samplers(dstr, group_name) + for dmn_name in self.daemons[group_name]: + dstr, auth_list = self.write_listeners(dstr, group_name, dmn_name) + fd.write(dstr) + if fd: + fd.close() + except Exception as e: + a, b, d = sys.exc_info() + print(f'Error generating sampler configuration: {str(e)} {str(d.tb_lineno)}') + sys.exit() + else: + print(f'"samplers" not found in configuration file. Skipping...') + + # Write aggregators in daemon group + if group_name in self.aggregators: + for dmn in self.aggregators[group_name]: + fd = open(f'{path}/{dmn}.conf', 'w+') + dstr = '' + dstr = self.write_aggregator(dstr, group_name, dmn) + fd.write(dstr) + fd.close() + +if __name__ == "__main__": + parser = argparse.ArgumentParser( + description="LDMS Monitoring Cluster Configuration") + parser.add_argument("--ldms_config", metavar="FILE", required=True, + help="The ldmsd load balance domain configuration file. " + "This will not start the maestro " + "load balancer") + parser.add_argument("--cluster", metavar="FILE", + help="The name of the etcd cluster configuration file") + parser.add_argument("--prefix", metavar="STRING", + help="The prefix for the dumped aggregator configurations", + default="unknown") + local = parser.add_mutually_exclusive_group() + local.add_argument("--local", action='store_true', default=False, help="Start maestro_ctrl in local mode") + parser.add_argument("--local_path", metavar="STRING", + help="The path for the generated local configuration files", + default=os.path.expanduser('~'), required=False) + parser.add_argument("--generate-config-path", metavar="STRING", required=False, + default=False) + parser.add_argument("--daemon_name", metavar="STRING", required=False, + default=False, + help="Daemon name to generate configuration from YAML file") + parser.add_argument("--version", metavar="VERSION", + help="The OVIS version for the output syntax (4 or 5), default is 4", + default=4) + parser.add_argument("--debug", action="store_true", + help="Enable debug information") + args = parser.parse_args() + if not args.debug: + import sys + sys.tracebacklimit=0 + config_fp = open(args.ldms_config) + conf_spec = yaml.safe_load(config_fp) + + if args.cluster: + # All keys in the DB are prefixed with the prefix name. So we can + # have multiple monitoring hosted by the same consensus cluster. + import etcd3 + if not args.prefix: + print(f'"prefix" is required when using etcd') + # Load the cluster configuration file. This configures the daemons + # that support the key/value configuration database + etcd_fp = open(args.cluster) + etcd_spec = yaml.safe_load(etcd_fp) + + etcd_hosts = () + for h in etcd_spec['members']: + etcd_hosts += (( h['host'], h['port'] ),) + + # Use the 1st host for now + client = etcd3.client(host=etcd_hosts[0][0], port=etcd_hosts[0][1], + grpc_options=[ ('grpc.max_send_message_length',16*1024*1024), + ('grpc.max_receive_message_length',16*1024*1024)]) + else: + client = None + args.prefix = None + + cluster = ClusterCtrl(client, args.prefix, conf_spec, args) + + if args.daemon_name: + ldmsd_cfg_str = cluster.daemon_config(args.ldms_config, args.daemon_name.rstrip('0')) + print(f'{ldmsd_cfg_str}\0') + sys.exit(0) + if args.local: + cluster.local_mode(args.local_path) + + if args.generate_config_path: + cluster.config_v4(args.generate_config_path) + print("LDMSD v4 config files generated") + sys.exit(0) + + # Replace existing configuration if etcd cluster specified + if args.cluster: + rc = cluster.save_config() + if rc: + print("Error saving ldms cluster configuration to etcd cluster.") + sys.exit(0) + print("LDMS cluster configuration saved to etcd cluster.") + + if not args.cluster and not args.prefix and not args.local and not args.generate_config_path: + print(f'No action detected. Exiting...') + + sys.exit(0) diff --git a/ldms/python/ldmsd/parser_util.py b/ldms/python/ldmsd/parser_util.py new file mode 100644 index 000000000..50768d6a6 --- /dev/null +++ b/ldms/python/ldmsd/parser_util.py @@ -0,0 +1,176 @@ +import collections +import ldmsd.hostlist as hostlist + +AUTH_ATTRS = [ + 'auth', + 'conf' +] + +CORE_ATTRS = [ + 'daemons', + 'aggregators', + 'samplers', + 'stores' +] + +DEFAULT_ATTR_VAL = { + 'maestro_comm' : False, + 'xprt' : 'sock', + 'interval' : 1000000, + 'auth' : 'none', + 'mode' : 'static' +} + +INT_ATTRS = [ + 'interval', + 'offset', + 'reconnect', + 'flush' +] + +unit_strs = { + 'ms' : 1000, + 'us' : 1, + 'm' : 60000000, + 's' : 1000000, + 'h' : 3600000000, + 'd' : 86400000000 +} +LDMS_YAML_ERR = 'Error parsing ldms_config yaml file' +LIST_ERR = 'spec must be a list of dictionaries, specified with "-" in the ldms_config yaml file' +DICT_ERR = 'spec must be a dictionary, with keys referencing a plugin instance name' + +def check_intrvl_str(interval_s): + """Check the format of the interval string is correct + + A time-interval string is an integer or float followed by a + unit-string. A unit-string is any of the following: + + 'us' - microseconds + 'ms' - milliseconds + 's' - seconds + 'm' - minutes + 'h' - hours + 'd' - days + + Unit strings are not case-sensitive. + + Examples: + '1.5s' - 1.5 seconds + '1.5S' - 1.5 seconds + '2s' - 2 seconds + """ + error_str = f"{interval_s} is not a valid time-interval string\n"\ + f"'Only a single unit-string is allowed. e.g. '50s40us' is not a valid entry."\ + f"Examples of acceptable format:\n"\ + f"'1.5s' - 1.5 seconds\n"\ + f"'1.5S' - 1.5 seconds\n"\ + f"'2us' - 2 microseconds\n"\ + f"'3m' - 3 minutes\n"\ + f"'1h' - 1 hour\n"\ + f"'1d' - 1 day\n"\ + f"\n" + if type(interval_s) == int or type(interval_s) == float: + return interval_s + if type(interval_s) != str: + raise ValueError(f"{error_str}") + interval_s = interval_s.lower() + unit = next((unit for unit in unit_strs if unit in interval_s), None) + if unit: + if interval_s.split(unit)[1] != '': + raise ValueError(f"{error_str}") + ival_s = interval_s.split(unit)[0] + else: + ival_s = interval_s + try: + ival_s = float(ival_s) * unit_strs[unit] + except Exception as e: + raise ValueError(f"{interval_s} is not a valid time-interval string") + return int(ival_s) + +def check_opt(attr, spec): + # Check for optional argument and return None if not present + if attr in AUTH_ATTRS: + if attr == 'auth': + attr = 'name' + if 'auth' in spec: + spec = spec['auth'] + if attr in spec: + if attr in INT_ATTRS: + return check_intrvl_str(spec[attr]) + return spec[attr] + else: + if attr in DEFAULT_ATTR_VAL: + return DEFAULT_ATTR_VAL[attr] + else: + return None + +def check_required(attr_list, container, container_name): + """Verify that each name in attr_list is in the container""" + for name in attr_list: + if name not in container: + raise ValueError("The '{0}' attribute is required in a {1}". + format(name, container_name)) + +def fmt_cmd_args(comm, cmd, spec): + cfg_args = {} + cmd_attr_list = comm.get_cmd_attr_list(cmd) + for key in spec: + if key in cmd_attr_list['req'] or key in cmd_attr_list['opt']: + if key == 'plugin': + cfg_args[key] = spec[key]['name'] + continue + cfg_args[key] = spec[key] + if not all(key in spec for key in cmd_attr_list['req']): + print(f'The attribute(s) {set(cmd_attr_list["req"]) - spec.keys()} are required by {cmd}') + raise ValueError() + return cfg_args + +def NUM_STR(obj): + return str(obj) if type(obj) in [ int, float ] else obj + +def expand_names(name_spec): + if type(name_spec) != str and isinstance(name_spec, collections.abc.Sequence): + names = [] + for name in name_spec: + names += hostlist.expand_hostlist(NUM_STR(name)) + else: + names = hostlist.expand_hostlist(NUM_STR(name_spec)) + return names + +def check_auth(auth_spec): + name = check_opt('auth', auth_spec) + if not name: + return None, None, None + plugin = check_opt('plugin', auth_spec['auth']) + auth_opt = check_opt('conf', auth_spec) + return name, plugin, auth_opt + +def check_plugin_config(plugn, plugin_spec): + if plugn not in plugin_spec: + raise ValueError(f'Configuration for plugin instance "{plugn}"\n'\ + f'is not defined in the top level "plugins" dictionary"') + plugin = plugin_spec[plugn] + check_required([ 'name' ], plugin, f'"plugin" entry. Error in "'+ plugn +'" configuration') + check_required(['config'], plugin, '"plugin" entry') + if type(plugin['config']) is not list: + raise ValueError('"config" must be a list of configuration commands') + for cfg in plugin['config']: + if type(cfg) is not dict and type(cfg) is not str: + raise ValueError('"config" list members must be a dictionary or a string') + return plugin + +def parse_to_cfg_str(cfg_obj): + cfg_str = '' + for key in cfg_obj: + if key not in INT_ATTRS: + if len(cfg_str) > 1: + cfg_str += ' ' + cfg_str += key + '=' + str(cfg_obj[key]) + return cfg_str + +def parse_yaml_bool(bool_): + if bool_ is True or bool_ == 'true' or bool_ == 'True': + return True + else: + return False diff --git a/ldms/src/ldmsd/ldmsd.c b/ldms/src/ldmsd/ldmsd.c index 69cfb97ee..6c02c0a7a 100644 --- a/ldms/src/ldmsd/ldmsd.c +++ b/ldms/src/ldmsd/ldmsd.c @@ -97,7 +97,7 @@ #define LDMSD_LOGFILE "/var/log/ldmsd.log" #define LDMSD_PIDFILE_FMT "/var/run/%s.pid" -const char *short_opts = "B:l:s:x:P:m:Fkr:v:Vc:u:a:A:n:tL:"; +const char *short_opts = "B:l:s:x:P:m:Fkr:v:Vc:y:u:a:A:n:tL:"; struct option long_opts[] = { { "default_auth_args", required_argument, 0, 'A' }, @@ -655,6 +655,7 @@ void usage_hint(char *argv[],char *hint) printf(" -P COUNT, --worker_threads COUNT Count of event threads to start.\n"); printf(" Configuration Options\n"); printf(" -c PATH The path to configuration file (optional, default: ).\n"); + printf(" -y PATH Path to YAML configuration file (optional, default: ).\n"); printf(" -V Print LDMS version and exit.\n"); printf(" Deprecated options\n"); printf(" -H DEPRECATED.\n"); @@ -2053,6 +2054,12 @@ int ldmsd_process_cmd_line_arg(char opt, char *value) * Handle separately in the main() function. */ break; + case 'y': + /* + * Must be specified at the command line. + * Handle separately in the main() function. + */ + break; case 'a': /* auth name */ if (auth_name) { @@ -2229,6 +2236,9 @@ int main(int argc, char *argv[]) case 'c': /* Handle below */ break; + case 'y': + /* Handle below */ + break; default: ret = ldmsd_process_cmd_line_arg(op, optarg); if (ret) { @@ -2278,10 +2288,26 @@ int main(int argc, char *argv[]) opterr = 0; optind = 0; struct ldmsd_str_list cfgfile_list; + struct ldmsd_str_list yamlfile_list; + TAILQ_INIT(&yamlfile_list); TAILQ_INIT(&cfgfile_list); struct ldmsd_str_ent *cpath; + struct ldmsd_str_ent *ypath; + struct ldmsd_str_ent *config_str; + char *resp; while ((op = getopt_long(argc, argv, short_opts, long_opts, NULL)) != -1) { switch (op) { + case 'y': + ypath = ldmsd_str_ent_new(optarg); + TAILQ_INSERT_TAIL(&yamlfile_list, ypath, entry); + while ((ypath = TAILQ_FIRST(&yamlfile_list))) { + resp = process_yaml_config_file(ypath->str, myname); + TAILQ_REMOVE(&yamlfile_list, ypath, entry); + ldmsd_str_ent_free(ypath); + } + config_str = ldmsd_str_ent_new(resp); + TAILQ_INSERT_TAIL(&yamlfile_list, config_str, entry); + break; case 'c': cpath = ldmsd_str_ent_new(optarg); TAILQ_INSERT_TAIL(&cfgfile_list, cpath, entry); @@ -2290,6 +2316,20 @@ int main(int argc, char *argv[]) } int lln; + while ((config_str = TAILQ_FIRST(&yamlfile_list))) { + lln = -1; + ret = process_config_str(config_str->str, &lln, 1); + if (ret) { + char errstr[128]; + snprintf(errstr, sizeof(errstr), + "Error %d processing configuration file '%s'", + ret, config_str->str); + ldmsd_str_list_destroy(&yamlfile_list); + cleanup(ret, errstr); + } + TAILQ_REMOVE(&yamlfile_list, config_str, entry); + ldmsd_str_ent_free(config_str); + } while ((cpath = TAILQ_FIRST(&cfgfile_list))) { lln = -1; ret = process_config_file(cpath->str, &lln, 1); @@ -2483,6 +2523,18 @@ int main(int argc, char *argv[]) } ldmsd_log(LDMSD_LINFO, "Processing the config file '%s' is done.\n", optarg); break; + case 'y': + has_config_file = 1; + ret = process_config_str(resp, &lln, 1); + if (ret) { + char errstr[128]; + snprintf(errstr, sizeof(errstr), + "Error %d processing configuration string '%s'", + ret, resp); + cleanup(ret, errstr); + } + free(resp); + break; } } @@ -2511,7 +2563,7 @@ int main(int argc, char *argv[]) _listen = (ldmsd_listen_t) ldmsd_cfgobj_first(LDMSD_CFGOBJ_LISTEN); if (!_listen && !has_config_file) { ldmsd_log(LDMSD_LCRITICAL, - "A config file (-c) or listening port (-x) is required." + "A config file, (-c) or (-y), or listening port (-x) is required." " Specify at least one of these. ... exiting\n"); cleanup(101, "no config files nor listening ports"); } diff --git a/ldms/src/ldmsd/ldmsd.h b/ldms/src/ldmsd/ldmsd.h index 137648357..b4a64e212 100644 --- a/ldms/src/ldmsd/ldmsd.h +++ b/ldms/src/ldmsd/ldmsd.h @@ -854,6 +854,10 @@ char *ldmsd_set_info_origin_enum2str(enum ldmsd_set_origin_type type); int process_config_file(const char *path, int *lineno, int trust); +int process_config_str(char *cfg_str, int *lineno, int trust); + +char *process_yaml_config_file(const char *path, const char *dname); + #define LDMSD_MAX_PLUGIN_NAME_LEN 64 #define LDMSD_CFG_FILE_XPRT_MAX_REC 8192 struct attr_value_list; diff --git a/ldms/src/ldmsd/ldmsd_config.c b/ldms/src/ldmsd/ldmsd_config.c index 52074b685..e07ac61a7 100644 --- a/ldms/src/ldmsd/ldmsd_config.c +++ b/ldms/src/ldmsd/ldmsd_config.c @@ -699,7 +699,7 @@ int __process_config_file(const char *path, int *lno, int trust, (0 == strncmp(line, "prdcr_start", 11))) { if (strstr(line, "interval")) { ldmsd_log(LDMSD_LWARNING, - "'interval' is begin deprecated. " + "'interval' is being deprecated. " "Please use 'reconnect' with 'prdcr_add' or 'prdcr_start*' " "in the future.\n"); } @@ -771,7 +771,236 @@ int __process_config_file(const char *path, int *lno, int trust, return rc; } -int __req_deferred_start_regex(ldmsd_req_ctxt_t reqc, ldmsd_cfgobj_type_t type) +static +int __process_config_str(char *cfg_str, int *lno, int trust, + int (*req_filter)(ldmsd_cfg_xprt_t, ldmsd_req_hdr_t, void *), + void *ctxt) +{ + static uint32_t msg_no = 0; + int rc = 0; + int lineno = 0; + char *buff = NULL; + char *line = NULL; + char *tmp; + size_t line_sz = 0; + char *comment; + ssize_t off = 0; + ssize_t cnt; + size_t buf_len = 0; + struct ldmsd_cfg_xprt_s xprt; + ldmsd_req_hdr_t request = NULL; + struct ldmsd_req_array *req_array = NULL; + if (!cfg_str) + return EINVAL; + line = malloc(LDMSD_CFG_FILE_XPRT_MAX_REC); + if (!line) { + rc = errno; + ldmsd_log(LDMSD_LERROR, "Out of memory\n"); + goto cleanup; + } + line_sz = LDMSD_CFG_FILE_XPRT_MAX_REC; + xprt.type = LDMSD_CFG_TYPE_FILE; + xprt.file.cfgfile_id = __get_cfgfile_id(); + xprt.send_fn = log_response_fn; + xprt.max_msg = LDMSD_CFG_FILE_XPRT_MAX_REC; + xprt.trust = trust; + xprt.rsp_err = 0; + xprt.cleanup_fn = NULL; + +next_line: + errno = 0; + if (buff) { + memset(buff, 0, buf_len); + buff = strtok(NULL, "\n"); + } else + buff = strtok(cfg_str, "\n"); + if (!buff) + goto cleanup; + buf_len = sizeof(buff); + cnt = strlen(buff); + + lineno++; + tmp = buff; + comment = find_comment(tmp); + + if (comment) + *comment = '\0'; + + /* Get rid of trailing spaces */ + while (cnt && isspace(tmp[cnt-1])) + cnt--; + + if (!buff) { + /* empty string */ + goto parse; + } + + tmp[cnt] = '\0'; + + /* Get rid of leading spaces */ + while (isspace(*tmp)) { + tmp++; + cnt--; + } + + if (!cnt) { + /* empty buffer */ + goto parse; + } + + if (tmp[cnt-1] == '\\') { + if (cnt == 1) + goto parse; + } + + if (cnt + off > line_sz) { + char *nline = realloc(line, ((cnt + off)/line_sz + 1) * line_sz); + if (!nline) { + rc = errno; + ldmsd_log(LDMSD_LERROR, "Out of memory\n"); + goto cleanup; + } + line = nline; + line_sz = ((cnt + off)/line_sz + 1) * line_sz; + } + off += snprintf(&line[off], line_sz, "%s", tmp); + + /* attempt to merge multiple lines together */ + if (off > 0 && line[off-1] == '\\') { + line[off-1] = ' '; + goto next_line; + } + +parse: + if (!off) + goto next_line; + + if (ldmsd_is_initialized()) { + if ((0 == strncmp(line, "prdcr_add", 9)) || + (0 == strncmp(line, "prdcr_start", 11))) { + if (strstr(line, "interval")) { + ldmsd_log(LDMSD_LWARNING, + "'interval' is being deprecated. " + "Please use 'reconnect' with 'prdcr_add' or 'prdcr_start*' " + "in the future.\n"); + } + } + } + + req_array = ldmsd_parse_config_str(line, msg_no, xprt.max_msg, ldmsd_log); + if (!req_array) { + rc = errno; + ldmsd_log(LDMSD_LERROR, "Process config string error at line %d " + "(%s). %s\n", lineno, cfg_str, STRERROR(rc)); + goto cleanup; + } + + request = __aggregate_records(req_array); + if (!request) { + rc = errno; + goto cleanup; + } + ldmsd_req_array_free(req_array); + req_array = NULL; + + if (!ldmsd_is_initialized()) { + /* Process only the priority commands, e.g., cmd-line options */ + if (!is_req_id_priority(ntohl(request->req_id))) + goto next_req; + } else { + /* Process non-priority commands, e.g., cfgobj config commands */ + if (is_req_id_priority(ntohl(request->req_id))) + goto next_req; + } + + /* + * Make sure that LDMSD will create large enough buffer to receive + * the config data. + */ + if (xprt.max_msg < ntohl(request->rec_len)) + xprt.max_msg = ntohl(request->rec_len); + + if (req_filter) { + rc = req_filter(&xprt, request, ctxt); + /* rc = 0, filter OK */ + if (rc == 0) { + __dlog(DLOG_CFGOK, "# deferring line %d (%s): %s\n", + lineno, cfg_str, line); + goto next_req; + } + /* rc == errno */ + if (rc > 0) { + ldmsd_log(LDMSD_LERROR, + "Configuration error at " + "line %d (%s)\n", lineno, cfg_str); + goto cleanup; + } else { + /* rc < 0, filter not applied */ + rc = 0; + } + } + + rc = ldmsd_process_config_request(&xprt, request); + if (rc || xprt.rsp_err) { + if (!rc) + rc = xprt.rsp_err; + ldmsd_log(LDMSD_LERROR, "Configuration error at line %d (%s)\n", + lineno, cfg_str); + goto cleanup; + } +next_req: + free(request); + request = NULL; + msg_no += 1; + off = 0; + goto next_line; + +cleanup: + if (cfg_str) + free(cfg_str); + if (buff) + free(buff); + if (line) + free(line); + if (lno) + *lno = lineno; + ldmsd_req_array_free(req_array); + if (request) + free(request); + return rc; +} + +char *__process_yaml_config_file(const char *path, const char *dname) +{ + FILE *fp; + char command[256]; + char cstr[256]; + char *cfg_str = malloc(256); + snprintf(command, sizeof(command), "ldmsd_yaml_parser --ldms_config %s --daemon_name %s 2>&1", path, dname); + fp = popen(command, "r"); + if (!fp) + printf("Error in yaml_parser\n"); + int lineno = 0; + size_t char_cnt = 0; + while (fgets(cstr, sizeof(cstr), fp) != NULL) { + printf("%s", cstr); + char_cnt += sizeof(cstr); + if (char_cnt >= 1024) + cfg_str = (char *)realloc(cfg_str, char_cnt - 256); + if (lineno) + strcat(cfg_str, cstr); + else + snprintf(cfg_str, sizeof(cstr), cstr); + lineno++; + } + pclose(fp); + char *config_str = strdup(cfg_str); + if (cfg_str) + free(cfg_str); + return config_str; +} + +int __req_deferred_start_regex(ldmsd_req_hdr_t req, ldmsd_cfgobj_type_t type) { regex_t regex = {0}; ldmsd_cfgobj_t obj; @@ -999,6 +1228,22 @@ int process_config_file(const char *path, int *lno, int trust) return rc; } +int process_config_str(char *config_str, int *lno, int trust) +{ + int rc; + char *cfg_str = strdup(config_str); + rc = __process_config_str(cfg_str, lno, trust, + __req_filter_failover, &ldmsd_use_failover); + return rc; +} + +char *process_yaml_config_file(const char *path, const char *dname) +{ + char *cstr; + cstr = __process_yaml_config_file(path, dname); + return cstr; +} + static inline void __log_sent_req(ldmsd_cfg_xprt_t xprt, ldmsd_req_hdr_t req) { if (!ldmsd_req_debug) /* defined in ldmsd_request.c */ From e87da418cf1c5748df8bf5e1d1ddfbd4c9aec680 Mon Sep 17 00:00:00 2001 From: nick Date: Tue, 8 Oct 2024 11:30:47 -0500 Subject: [PATCH 2/3] Decouple and rename ClusterCtrl class from ldmsd_yaml_parser Rename ClusterCtrl to YamlCfg Move YamlCfg to parser_util.py Remove maestro-specific functionailty from YamlCfg --- ldms/python/ldmsd/ldmsd_yaml_parser | 875 +--------------------------- ldms/python/ldmsd/parser_util.py | 762 +++++++++++++++++++++++- ldms/src/ldmsd/ldmsd_config.c | 2 +- 3 files changed, 748 insertions(+), 891 deletions(-) diff --git a/ldms/python/ldmsd/ldmsd_yaml_parser b/ldms/python/ldmsd/ldmsd_yaml_parser index 90d5c70c7..80fb91143 100755 --- a/ldms/python/ldmsd/ldmsd_yaml_parser +++ b/ldms/python/ldmsd/ldmsd_yaml_parser @@ -1,836 +1,10 @@ #!/usr/bin/env python3 import os, sys -import errno import yaml +import errno import argparse -import json -import subprocess -import socket -import time -import itertools as it -from collections.abc import Mapping, Sequence from ldmsd.parser_util import * -class ClusterCtrl(object): - def emit_value(self, path, value): - try: - res = client.put(path, str(value)) - except Exception as e: - print("Error {0} setting {1} : {2}".format(str(e), path, str(value))) - - def check_key(self, key): - # Handle forward slash in keys e.g. endpoints containing "/" in the name - try: - if '/' in key: - print(f'Error: "/" is not a supported character in key name {key}') - sys.exit(1) - return key - except Exception as e: - print(str(e)) - - def walk(self, obj, path=''): - if obj is None: - if path.split("/")[-1] in CORE_ATTRS: - print(f'{path.split("/")[-1]} not present in ldms yaml configuration file.\nContinuing..') - elif isinstance(obj, Mapping): - for key in obj: - safe_key = self.check_key(key) - self.walk(obj[key], '{0}/{1}'.format(path, safe_key)) - elif isinstance(obj, Sequence): - if isinstance(obj, (str, bytearray)): - self.emit_value(path, obj) - else: - item = 0 - for v in obj: - # we want keys to be returned in numerical order which requires z-fill - self.walk(v, path + '/{0:06}'.format(item)) - item += 1 - elif obj: - self.emit_value(path, obj) - - def build_daemons(self, config): - """Generate a daemon spec list from YAML config - - Builds a dictionary of endpoint definitions. The 'config' is a - list of daemon specifications. Each daemon specification contains - 'names', 'host', and 'endpoint' attributes. All attributes are - expanded per the slurm hostlist rules. The length of the - expanded name-list, must equal the length of the expanded - host-list. - - Example: - - daemons: - - names : "agg-[1-10]" - hosts : "node[1-10]" - endpoints : - - names : "node-[1-10]-[10002] - ports : "[10002]" - maestro_comm : True - xprt : sock - auth : - name : munge - plugin : munge - - results in the following host-spec dictionary: - - { - "agg-[[1-3]-[10002]" : { - "agg-1" : { - "endpoints": { - "node-1-10002" : { "host" : "node-1", "port" : 10002 }, - "node-2-10002" : { "host" : "node-2", "port" : 10002 }, - "node-3-10002" : { "host" : "node-3", "port" : 10002 }, - ... - } - } - } - - """ - ep_dict = {} - node_config = config['daemons'] - if type(node_config) is not list: - print(f'{LDMS_YAML_ERR}') - print(f'daemons {LIST_ERR}') - print(f'e.g. daemons:') - print(f' - names : &l1-agg "l1-aggs-[1-8]"') - print(f' hosts : &l1-agg-hosts "node-[1-8]"') - sys.exit() - for spec in node_config: - check_required([ 'names', 'endpoints', 'hosts' ], - spec, '"daemons" entry') - hosts = expand_names(spec['hosts']) - dnames = expand_names(spec['names']) - hostnames = hosts - if len(dnames) != len(hostnames): - hosts = [ [host]*(len(dnames)//len(hostnames)) for host in hostnames ] - hosts = list(it.chain.from_iterable(hosts)) - ep_names = [] - ep_ports = [] - if type(spec['endpoints']) is not list: - print(f'{LDMS_YAML_ERR}') - print(f'endpoints {LIST_ERR}') - print(f'e.g endpoints :') - print(f' - names : &l1-agg-endpoints "node-[1-8]-[10101]"') - print(f' ports : &agg-ports "[10101]"') - print(f' maestro_comm : True') - print(f' xprt : sock') - print(f' auth :') - print(f' name : munge1') - print(f' plugin : munge') - sys.exit() - for endpoints in spec['endpoints']: - check_required(['names','ports'], - endpoints, '"endpoints" entry') - cur_epnames = expand_names(endpoints['names']) - ep_names.append(cur_epnames) - cur_ports = expand_names(endpoints['ports']) - _ports = cur_ports - if len(cur_ports) != len(cur_epnames): - cur_ports = [ _ports for i in range(0, len(cur_epnames)//len(_ports)) ] - cur_ports = list(it.chain.from_iterable(cur_ports)) - ep_ports.append(cur_ports) - ep_dict[spec['names']] = {} - env = check_opt('environment', spec) - for dname, host in zip(dnames, hosts): - ep_dict[spec['names']][dname] = {} - ep_dict[spec['names']][dname]['addr'] = host - ep_dict[spec['names']][dname]['environment'] = env - ep_dict[spec['names']][dname]['endpoints'] = {} - for ep_, ep_port, ep in zip(ep_names, ep_ports, spec['endpoints']): - port = ep_port.pop(0) - ep_name = ep_.pop(0) - xprt = check_opt('xprt', ep) - auth_name = check_opt('auth', ep) - auth_conf = check_opt('conf', ep) - plugin = check_opt('plugin', ep['auth']) - maestro_comm = parse_yaml_bool(check_opt('maestro_comm', ep)) - h = { - 'name' : ep_name, - 'port' : port, - 'xprt' : xprt, - 'maestro_comm' : maestro_comm, - 'auth' : { 'name' : auth_name, 'conf' : auth_conf, 'plugin' : plugin } - } - ep_dict[spec['names']][dname]['endpoints'][ep_name] = h - ep_dict[spec['names']][dname]['addr'] = host - if len(ep_dict[spec['names']]) == 0: - print(f'Error processing regex of hostnames {spec["hosts"]} and daemons {spec["names"]}.'\ - f'Number of hosts must be a multiple of daemons with appropriate ports or equivalent to length of daemons.\n'\ - f'Regex {spec["hosts"]} translates to {len(hostnames)} hosts\n'\ - f'Regex {spec["names"]} translates to {len(dnames)} daemons\n') - sys.exit() - return ep_dict - - def build_aggregators(self, config): - aggregators = {} - if 'aggregators' not in config: - return aggregators - agg_conf = config['aggregators'] - if type(agg_conf) is not list: - print(f'{LDMS_YAML_ERR}') - print(f'aggregators {LIST_ERR}') - print(f'e.g. aggregators:') - print(f' - daemons: "l1-aggregators"') - print(f' peers :') - print(f' - daemons : "samplers"') - print(f' ... : ...') - return aggregators - for agg_spec in agg_conf: - check_required([ 'daemons' ], - agg_spec, '"aggregators" entry') - names = expand_names(agg_spec['daemons']) - group = agg_spec['daemons'] - plugins = check_opt('plugins', agg_spec) - if plugins: - if plugins is not list: - print(f'Error: "plugins" must be a list of plugin instance names"\n') - for plugin in plugins: - check_plugin_config(plugin, self.plugins) - daemons_ = None - for daemons in config['daemons']: - if group == daemons['names']: - daemons_ = daemons - if daemons_ is None: - raise ValueError(f"No daemons matched matched daemon key {group}") - if group not in aggregators: - aggregators[group] = {} - subscribe = check_opt('subscribe', agg_spec) - if subscribe: - for stream in subscribe: - check_required([ 'stream', 'regex' ], stream, "stream specification") - for name in names: - aggregators[group][name] = { 'state' : 'stopped' } # 'running', 'error' - if subscribe: - aggregators[group][name]['subscribe'] = subscribe - if plugins: - aggregators[group][name]['plugins'] = plugins - return aggregators - - def build_producers(self, config): - """ - Return a dictionary keyed by the group name. Each dictionary - entry is a list of producers in that group. - """ - producers = {} - for agg in config.get('aggregators', []): - if 'peers' not in agg: - continue - if type(agg['peers']) is not list: - print(f'{LDMS_YAML_ERR}') - print(f'peers {LIST_ERR}') - print(f'e.g. peers:') - print(f' - daemons: "samplers"') - print(f' endpoints : "sampler-endpoints"') - print(f' ... : ...') - continue - for prod in agg['peers']: - check_required([ 'endpoints', 'updaters', - 'reconnect', 'type', ], - prod, '"peers" entry') - # Use endpoints for producer names and remove names attribute? - if prod['daemons'] not in self.daemons: - dmn_grps = prod['daemons'].split(',') - eps = prod['endpoints'].split(',') - else: - dmn_grps = [ prod['daemons'] ] - eps = [ prod['endpoints'] ] - for daemons, endpoints in zip(dmn_grps, eps): - names = expand_names(endpoints) - endpoints = expand_names(endpoints) - group = agg['daemons'] - smplr_dmns = expand_names(daemons) - if group not in producers: - producers[group] = {} - - upd_spec = prod['updaters'] - # Expand and generate all the producers - typ = prod['type'] - reconnect = check_intrvl_str(prod['reconnect']) - ports_per_dmn = len(endpoints) / len(smplr_dmns) - ppd = ports_per_dmn - try: - for name in names: - if ppd > 1: - smplr_dmn = smplr_dmns[0] - ppd -= 1 - else: - smplr_dmn = smplr_dmns.pop(0) - ppd = ports_per_dmn - endpoint = endpoints.pop(0) - prod = { - 'daemon' : smplr_dmn, - 'dmn_grp' : daemons, - 'name' : name, - 'endpoint' : endpoint, - 'type' : typ, - 'group' : group, - 'reconnect' : reconnect, - 'updaters' : upd_spec - } - producers[group][endpoint] = prod - except: - print(f'Error building producer config:\n'\ - f'Please ensure "endpoints" is configured to the correct number of ports specified.') - return producers - - def build_updaters(self, config): - """ - Return a dictionary based on the aggregator. Each dictionary - entry is a list of updaters in that group. - """ - updaters = {} - updtr_cnt = 0 - for agg in config.get('aggregators', []): - if 'peers' not in agg: - continue - for prod in agg['peers']: - if type(prod['updaters']) is not list: - print(f'Error parsing ldms_config yaml file') - print(f'Updater spec must be a list of dictionaries, specified with "-" designator in the ldms_config yaml file') - print(f'e.g. updaters:') - print(f' - mode : pull') - print(f' interval : "1.0s"') - print(f' sets :') - print(f' - regex : ".*"') - print(f' field : inst') - continue - for updtr_spec in prod['updaters']: - check_required([ 'interval', 'sets', ], - updtr_spec, '"updaters" entry') - group = agg['daemons'] - if group not in updaters: - updaters[group] = {} - grp_updaters = updaters[group] - updtr_name = f'updtr_{updtr_cnt}' - if updtr_name in grp_updaters: - raise ValueError(f"Duplicate updater name '{updtr_name}''. "\ - f"An updater name must be unique within the group") - updtr = { - 'name' : updtr_name, - 'interval' : check_intrvl_str(updtr_spec['interval']), - 'group' : agg['daemons'], - 'sets' : updtr_spec['sets'], - 'producers' : [{ 'regex' : '.*' }] - } - if 'offset' in updtr_spec: - updtr['offset'] = check_intrvl_str(updtr_spec['offset']) - if 'mode' in updtr_spec: - updtr['mode'] = updtr_spec['mode'] - else: - updtr['mode'] = 'pull' - grp_updaters[updtr_name] = updtr - updtr_cnt += 1 - return updaters - - def build_stores(self, config): - """ - Return a dictionary keyed by the group name. Each dictionary - entry is a list of stores in that group. - """ - if 'stores' not in config: - return None - stores = {} - if type(config['stores']) is not dict: - print(f'{LDMS_YAML_ERR}') - print(f'store {DICT_ERR}') - print(f'e.g. stores:') - print(f' sos-meminfo :') - print(f' daemons : "l1-aggregators"') - print(f' container : ldms_data') - print(f' ... : ...') - return None - for store in config['stores']: - store_spec = config['stores'][store] - check_required([ 'plugin', 'container' ], - store_spec, '"store" entry') - decomp = check_opt('decomp', store_spec) - decomposition = check_opt('decomposition', store_spec) - if not decomp and not decomposition: - check_required(['schema'], store_spec, '"store" entry') - schema = check_opt('schema', store_spec) - regex = check_opt('regex', store_spec) - if decomp and not schema and not regex: - raise ValueError("Decomposition plugin configuration requires either" - " 'schema' or 'regex' attribute'") - group = store_spec['daemons'] - if group not in stores: - stores[group] = {} - grp_stores = stores[group] - if store in grp_stores: - raise ValueError(f"Duplicate store name '{store}'. " - "A store name must be unique within the group") - check_opt('flush', store_spec) - check_plugin_config(store_spec['plugin'], self.plugins) - grp_stores[store] = store_spec - return stores - - def build_samplers(self, config): - """ - Generate samplers from YAML config. - Return a dictionary keyed by the samplers group name. Each dictionary - entry is a single ldms daemon's sampler configuration. - """ - if 'samplers' not in config: - return None - smplrs = {} - if type(config['samplers']) is not list: - print(f'{LDMS_YAML_ERR}') - print(f'samplers {LIST_ERR}') - print(f'e.g. samplers:') - print(f' - daemons : "samplers"') - print(f' plugins :') - print(f' - name : meminfo') - print(f' interval : "1.0s"') - print(f' offset : "0s"') - print(f' config :') - print(f' - schema : meminfo') - print(f' component_id : "10001"') - print(f' producer : "node-1"') - print(f' perm : "0777"') - return None - for smplr_spec in config['samplers']: - check_required([ 'daemons', 'plugins' ], - smplr_spec, '"sampler" entry') - for plugin in smplr_spec['plugins']: - check_plugin_config(plugin, self.plugins) - smplrs[smplr_spec['daemons']] = smplr_spec - return smplrs - - def build_plugins(self, config): - """ - Generate plugins to load from a YAML config. - Return a dictionary keyed by the plugin's group name. Each dictionary entry - is a single plugin's configuration. - """ - if 'plugins' not in config: - return None - plugins = {} - plugn_spec = config['plugins'] - for plugn in plugn_spec: - if plugn in plugins: - raise ValueError(f'Duplicate plugin name "{plugin_name}". ' - f'Plugin must be unique within a group.') - check_plugin_config(plugn, plugn_spec) - plugins[plugn] = plugn_spec[plugn] - return plugins - - def __init__(self, client, name, cluster_config, args): - """ - """ - self.client = client - self.name = name - self.args = args - self.cluster_config = cluster_config - self.daemons = self.build_daemons(cluster_config) - self.plugins = self.build_plugins(cluster_config) - self.aggregators = self.build_aggregators(cluster_config) - self.producers = self.build_producers(cluster_config) - self.updaters = self.build_updaters(cluster_config) - self.stores = self.build_stores(cluster_config) - self.samplers = self.build_samplers(cluster_config) - - def commit(self): - pass - - def save_config(self): - try: - self.client.delete_prefix('/' + self.name) - self.walk(self.daemons, '/' + self.name + '/daemons') - self.walk(self.aggregators, '/' + self.name + '/aggregators') - self.walk(self.producers, '/' + self.name + '/producers') - self.walk(self.updaters, '/' + self.name + '/updaters') - self.walk(self.stores, '/' + self.name + '/stores') - self.walk(self.samplers, '/' + self.name + '/samplers') - self.walk(self.plugins, '/' + self.name + '/plugins') - self.client.put('/'+self.name+'/last_updated', str(time.time())) - except Exception as e: - a, b, c = sys.exc_info() - print(str(e)+' '+str(c.tb_lineno)) - return 1 - - def local_mode(self, local_path): - # Local mode uses hostname to help identify which daemon(s) to start - hostname = socket.gethostname() - local_list = {} - fd = None - match_host = False - for dmn_grp in self.daemons: - for dmn in self.daemons[dmn_grp]: - auth_list = {} - if hostname == self.daemons[dmn_grp][dmn]['addr']: - match_host = True - local_list[dmn] = self.daemons[dmn_grp][dmn] - local_list[dmn]['dmn_grp'] = dmn_grp - if dmn_grp in self.aggregators and dmn in self.aggregators[dmn_grp]: - try: - fd = open(f'{local_path}/{dmn}.conf', 'w+') - dstr, auth_list = self.write_listeners(dstr, dmn_grp, dmn, auth_list) - dstr, auth_list = self.write_producers(dstr, dmn_grp, dmn, auth_list) - dstr = self.write_stream_subscribe(dstr, dmn_grp, dmn) - dstr = self.write_agg_plugins(dstr, dmn_grp, dmn) - dstr = self.write_updaters(dstr, dmn_grp) - dstr = self.write_stores(dstr, dmn_grp) - fd.write(dstr) - except Exception as e: - print(f'Error {e}: writing ldms configuration files') - if fd: - fd.close() - if dmn_grp in self.samplers and dmn in self.samplers[dmn_grp]: - fd = open(f'{local_path}/{dmn}.conf', 'w+') - # TO DO: Refactor sampler config architecture to more easily reference appropriate groups - dstr = '' - dstr = self.write_samplers(dstr, dmn_grp) - dstr, auth_list = self.write_listeners(dstr, dmn_grp, dmn, auth_list) - fd.write(dstr) - if fd: - fd.close() - print(f'Starting {dmn}') - start_args = self.ldmsd_arg_list(local_path, dmn_grp, dmn) - local_list[dmn] = subprocess.Popen(start_args, stdin=subprocess.PIPE, stdout=subprocess.PIPE) - if match_host is False: - print(f'{hostname} does not match any daemon hosts in the ldms configuration file') - for dmn in local_list: - local_list[dmn].wait() - - def ldmsd_arg_list(self, local_path, dmn_grp, dmn): - start_list = [ 'ldmsd' ] - for ep in self.daemons[dmn_grp][dmn]['endpoints']: - if self.daemons[dmn_grp][dmn]['endpoints'][ep]['maestro_comm'] is True: - ep_ = self.daemons[dmn_grp][dmn]['endpoints'][ep] - start_list.append('-x') - start_list.append(f'{ep_["xprt"]}:{ep_["port"]}') - auth = check_opt('auth', ep_) - if auth: - auth_plugin = check_opt('plugin', ep_['auth']) - auth_opt = check_opt('conf', ep_) - start_list.append('-a') - start_list.append(auth_plugin) - if auth_opt: - if len(auth_opt.split('=')) < 2: - auth_opt = f'conf={auth_opt}' - start_list.append('-A') - start_list.append(auth_opt) - start_list.append('-c') - start_list.append(f'{local_path}/{dmn}.conf') - start_list.append('-r') - start_list.append(f'{local_path}/{dmn}.pid') - start_list.append('-l') - start_list.append(f'{local_path}/{dmn}.log') - start_list.append(f'-F') - return start_list - - def write_listeners(self, dstr, dmn_grp, dmn_name, auth_list={}): - for endp in self.daemons[dmn_grp][dmn_name]['endpoints']: - ep = self.daemons[dmn_grp][dmn_name]['endpoints'][endp] - auth, plugin, auth_opt = check_auth(ep) - if auth: - if auth not in auth_list: - auth_list[auth] = { 'conf' : auth_opt } - dstr += f'auth_add name={auth}' - dstr = self.write_opt_attr(dstr, 'plugin', plugin, endline=False) - dstr = self.write_opt_attr(dstr, 'conf', auth_opt) - dstr += f'listen xprt={ep["xprt"]} port={ep["port"]}' - dstr = self.write_opt_attr(dstr, 'auth', auth, endline=False) - dstr = self.write_opt_attr(dstr, 'conf', auth_opt) - return dstr, auth_list - - def write_opt_attr(self, dstr, attr, val, endline=True): - # Include leading space - if val is not None: - dstr += f' {attr}={val}' - if endline: - dstr += f'\n' - return dstr - - def write_producers(self, dstr, group_name, dmn, auth_list): - if group_name in self.producers: - ''' Balance samplers across aggregators ''' - ppd = -(len(self.producers[group_name]) // -len(self.aggregators[group_name].keys())) - rem = len(self.producers[group_name]) % len(self.aggregators[group_name].keys()) - prdcrs = list(self.producers[group_name].keys()) - aggs = list(self.daemons[group_name].keys()) - agg_idx = int(aggs.index(dmn)) - prdcr_idx = int(ppd * agg_idx) - prod_group = prdcrs[prdcr_idx:prdcr_idx+ppd] - i = 0 - auth = None - for ep in prod_group: - producer = self.producers[group_name][ep] - auth = check_opt('auth', self.daemons[producer['dmn_grp']][producer['daemon']]['endpoints'][ep]) - auth_opt = check_opt('conf', self.daemons[producer['dmn_grp']][producer['daemon']]['endpoints'][ep]) - if auth not in auth_list: - auth_list[auth] = { 'conf' : auth_opt } - plugin = check_opt('plugin', self.daemons[producer['dmn_grp']][producer['daemon']]['endpoints'][ep]['auth']) - if plugin is None: - print(f'Please specify auth plugin type for producer "{producer["daemon"]}" with auth name "{auth}"\n'\ - 'configuration file generation will continue, but auth will likely be denied.\n') - plugin = auth - dstr += f'auth_add name={auth} plugin={plugin}' - dstr = self.write_opt_attr(dstr, 'conf', auth_list[auth]['conf']) - for ep in prod_group: - regex = False - producer = self.producers[group_name][ep] - pname = producer['name'] - port = self.daemons[producer['dmn_grp']][producer['daemon']]['endpoints'][ep]['port'] - xprt = self.daemons[producer['dmn_grp']][producer['daemon']]['endpoints'][ep]['xprt'] - hostname = self.daemons[producer['dmn_grp']][producer['daemon']]['addr'] - auth = check_opt('auth', self.daemons[producer['dmn_grp']][producer['daemon']]['endpoints'][ep]) - ptype = producer['type'] - reconnect = producer['reconnect'] - dstr += f'prdcr_add name={pname} '\ - f'host={hostname} '\ - f'port={port} '\ - f'xprt={xprt} '\ - f'type={ptype} '\ - f'reconnect={reconnect}' - dstr = self.write_opt_attr(dstr, 'auth', auth) - last_sampler = pname - if 'regex' in producer: - regex = True - dstr += f'prdcr_start_regex regex={producer["regex"]}\n' - if not regex: - dstr += f'prdcr_start_regex regex=.*\n' - return dstr, auth_list - - def write_env(self, dstr, grp, dname): - if grp not in self.daemons: - return 1 - if dname not in self.daemons[grp]: - return 1 - if check_opt('environment', self.daemons[grp][dname]): - if type(self.daemons[grp][dname]['environment']) is not dict: - print(f'Error: Environment variables must be a yaml key:value dictionary\n') - sys.exit() - for attr in self.daemons[grp][dname]['environment']: - dstr += f'env {attr}={self.daemons[grp][dname]["environment"]}\n' - return dstr - - def write_sampler(self, dstr, smplr_grp, sname): - if smplr_grp not in self.samplers: - return dstr - dstr = self.write_env(dstr, smplr_grp, sname) - dstr, auth_list = self.write_listeners(dstr, smplr_grp, sname) - for plugin in self.samplers[smplr_grp]['plugins']: - plugn = self.plugins[plugin] - dstr += f'load name={plugn["name"]}\n' - for cfg_ in plugn['config']: - if type(cfg_) is dict: - hostname = socket.gethostname() - cfg_args = {} - prod = check_opt('producer', cfg_) - inst = check_opt('instance', cfg_) - if not prod: - cfg_args['producer'] = f'{hostname}' - if not inst: - cfg_args['instance'] = f'{hostname}/{plugn["name"]}' - for attr in cfg_: - if attr == 'name' or attr == 'interval': - continue - cfg_args[attr] = cfg_[attr] - cfg_str = parse_to_cfg_str(cfg_args) - else: - cfg_str = cfg_ - - interval = check_intrvl_str(plugn['interval']) - dstr += f'config name={plugn["name"]} {cfg_str}\n' - dstr += f'start name={plugn["name"]} interval={interval}' - offset = check_opt('offset', plugn) - dstr = self.write_opt_attr(dstr, 'offset', offset) - return dstr - - def write_samplers(self, dstr, smplr_group): - for inst_name in self.samplers[smplr_group]['plugins']: - plugin = self.plugins[inst_name] - sname = plugin['name'] - dstr += f'load name={sname}\n' - for cfg_ in plugin['config']: - if type(cfg_) is dict: - hostname = socket.gethostname() - if args.local: - cfg_args = { 'producer' : f'{hostname}', - 'instance' : f'{hostname}/{plugin["name"]}', - 'component_id' : '${LDMS_COMPONENT_ID}' } - else: - cfg_args = {} - prod = check_opt('producer', cfg_) - inst = check_opt('instance', cfg_) - if not prod: - cfg_args['producer'] = '{hostname}' - if not inst: - cfg_args['instance'] = '{hostname}/{plugin["name"]}' - for attr in cfg_: - if attr == 'name' or attr == 'interval': - continue - cfg_args[attr] = cfg_[attr] - cfg_str = parse_to_cfg_str(cfg_args) - else: - cfg_str = cfg_ - - interval = check_intrvl_str(plugin['interval']) - dstr += f'config name={sname} {cfg_str}\n' - dstr += f'start name={sname} interval={interval}' - offset = check_opt('offset', plugin) - dstr = self.write_opt_attr(dstr, 'offset', offset) - return dstr - - def write_stream_subscribe(self, dstr, group_name, agg): - subscribe = check_opt('subscribe', self.aggregators[group_name][agg]) - if subscribe: - for stream in subscribe: - regex = check_opt('regex', stream) - if regex is None: - regex = '.*' - dstr += f'prdcr_subscribe stream={stream["stream"]} '\ - f'regex={regex}\n' - return dstr - - def write_aggregator(self, dstr, group_name, dmn): - # Agg config - try: - ''' "Balance" agg configuration if all samplers are included in each aggregator ''' - if group_name not in self.aggregators: - return dstr - auth_list = {} - dstr, auth_list = self.write_listeners(dstr, group_name, dmn, auth_list) - dstr, auth_list = self.write_producers(dstr, group_name, dmn, auth_list) - dstr = self.write_stream_subscribe(dstr, group_name, dmn) - dstr = self.write_agg_plugins(dstr, group_name, dmn) - dstr = self.write_updaters(dstr, group_name) - dstr = self.write_stores(dstr, group_name) - return dstr - except Exception as e: - ea, eb, ec = sys.exc_info() - print('Agg config Error: '+str(e)+' Line:'+str(ec.tb_lineno)) - raise ValueError - - def write_agg_plugins(self, dstr, group_name, agg): - # Write independent plugin configuration for group - plugins = check_opt('plugins', self.aggregators[group_name][agg]) - if plugins is not None: - for plugn in plugins: - plugin = self.plugins[plugn] - dstr += f'load name={plugin["name"]}\n' - for cfg_ in plugin["config"]: - if type(cfg_) is dict: - cfg_str = parse_to_cfg_str(plugin["config"]) - else: - cfg_str = cfg_ - dstr += f'config name={plugin["name"]} {cfg_str}\n' - return dstr - - def write_updaters(self, dstr, group_name): - if group_name in self.updaters: - updtr_group = self.updaters[group_name] - for updtr in updtr_group: - interval = check_intrvl_str(updtr_group[updtr]['interval']) - updtr_str = f'updtr_add name={updtr_group[updtr]["name"]}' - if 'mode' in updtr_group[updtr]: - mode = updtr_group[updtr]['mode'] - else: - mode = 'pull' - # Check mode - if mode == 'push': - updtr_str = f'{updtr_str} push=True' - elif mode == 'onchange': - updtr_str = f'{updtr_str} push=onchange' - elif mode == 'auto_interval' or 'auto': - updtr_str = f'{updtr_str} auto_interval=True' - dstr += f'{updtr_str} '\ - f'interval={interval}' - offset = check_opt('offset', updtr_group[updtr]) - dstr = self.write_opt_attr(dstr, 'offset', offset) - for prod in updtr_group[updtr]['producers']: - dstr += f'updtr_prdcr_add name={updtr_group[updtr]["name"]} '\ - f'regex={prod["regex"]}\n' - dstr += f'updtr_start name={updtr_group[updtr]["name"]}\n' - return dstr - - def write_stores(self, dstr, group_name): - if group_name in self.stores: - store_group = self.stores[group_name] - loaded_plugins = [] - for store in store_group: - if store_group[store]['plugin'] not in loaded_plugins: - if store_group[store]['plugin'] not in self.plugins: - print(f'Error: Storage policy plugin reference {store_group[store]["plugin"]} '\ - f'is not defined in the top level "plugins" dictionary"\n' - f'Continuing...\n') - continue - plugin = self.plugins[store_group[store]['plugin']] - dstr += f'load name={plugin["name"]}\n' - for cfg_ in plugin['config']: - if type(cfg_) is dict: - cfg_str = parse_to_cfg_str(cfg_) - else: - cfg_str = cfg_ - dstr += f'config name={plugin["name"]} '\ - f'{cfg_str}\n' - loaded_plugins.append(store_group[store]['plugin']) - strgp_add = f'strgp_add name={store} plugin={plugin["name"]} ' - strgp_add += f'container={store_group[store]["container"]} ' - strgp_add += f'schema={store_group[store]["schema"]}' - dstr += strgp_add - flush = check_opt('flush', store_group[store]) - dstr = self.write_opt_attr(dstr, 'flush', flush) - dstr += f'strgp_start name={store}\n' - return dstr - - def daemon_config(self, path, dname): - """ - Write a specific daemon's V4 configuration to file. - """ - dmn = None - grp = None - for dmn_grp in self.daemons: - if dname in self.daemons[dmn_grp]: - dmn = self.daemons[dmn_grp][dname] - grp = dmn_grp - break - if dmn is None: - print(f'Error: {dname} does not exist in YAML configuration file {path}\n') - return 1 - dstr = '' - dstr = self.write_sampler(dstr, grp, dname) - dstr = self.write_aggregator(dstr, grp, dname) - return f'{dstr}\0' - - def config_v4(self, path): - """ - Read the group configuration from ETCD and generate a version 4 LDMSD configuration - This configuration assumes that the environemnt variables COMPONENT_ID, HOSTNAME - all exist on the machines relevant to the ldmsd cluster. - """ - for group_name in self.daemons: - # Sampler config - if self.samplers != None: - try: - # TO DO: Refactor sampler config architecture to more easily reference appropriate groups - if group_name in self.samplers: - fd = open(f'{path}/{group_name}-samplers.conf', 'w+') - dstr = '' - dstr = self.write_samplers(dstr, group_name) - for dmn_name in self.daemons[group_name]: - dstr, auth_list = self.write_listeners(dstr, group_name, dmn_name) - fd.write(dstr) - if fd: - fd.close() - except Exception as e: - a, b, d = sys.exc_info() - print(f'Error generating sampler configuration: {str(e)} {str(d.tb_lineno)}') - sys.exit() - else: - print(f'"samplers" not found in configuration file. Skipping...') - - # Write aggregators in daemon group - if group_name in self.aggregators: - for dmn in self.aggregators[group_name]: - fd = open(f'{path}/{dmn}.conf', 'w+') - dstr = '' - dstr = self.write_aggregator(dstr, group_name, dmn) - fd.write(dstr) - fd.close() - if __name__ == "__main__": parser = argparse.ArgumentParser( description="LDMS Monitoring Cluster Configuration") @@ -838,16 +12,6 @@ if __name__ == "__main__": help="The ldmsd load balance domain configuration file. " "This will not start the maestro " "load balancer") - parser.add_argument("--cluster", metavar="FILE", - help="The name of the etcd cluster configuration file") - parser.add_argument("--prefix", metavar="STRING", - help="The prefix for the dumped aggregator configurations", - default="unknown") - local = parser.add_mutually_exclusive_group() - local.add_argument("--local", action='store_true', default=False, help="Start maestro_ctrl in local mode") - parser.add_argument("--local_path", metavar="STRING", - help="The path for the generated local configuration files", - default=os.path.expanduser('~'), required=False) parser.add_argument("--generate-config-path", metavar="STRING", required=False, default=False) parser.add_argument("--daemon_name", metavar="STRING", required=False, @@ -865,52 +29,19 @@ if __name__ == "__main__": config_fp = open(args.ldms_config) conf_spec = yaml.safe_load(config_fp) - if args.cluster: - # All keys in the DB are prefixed with the prefix name. So we can - # have multiple monitoring hosted by the same consensus cluster. - import etcd3 - if not args.prefix: - print(f'"prefix" is required when using etcd') - # Load the cluster configuration file. This configures the daemons - # that support the key/value configuration database - etcd_fp = open(args.cluster) - etcd_spec = yaml.safe_load(etcd_fp) - - etcd_hosts = () - for h in etcd_spec['members']: - etcd_hosts += (( h['host'], h['port'] ),) - - # Use the 1st host for now - client = etcd3.client(host=etcd_hosts[0][0], port=etcd_hosts[0][1], - grpc_options=[ ('grpc.max_send_message_length',16*1024*1024), - ('grpc.max_receive_message_length',16*1024*1024)]) - else: - client = None - args.prefix = None - - cluster = ClusterCtrl(client, args.prefix, conf_spec, args) + cluster = YamlCfg(None, None, conf_spec, args) if args.daemon_name: ldmsd_cfg_str = cluster.daemon_config(args.ldms_config, args.daemon_name.rstrip('0')) print(f'{ldmsd_cfg_str}\0') sys.exit(0) - if args.local: - cluster.local_mode(args.local_path) if args.generate_config_path: cluster.config_v4(args.generate_config_path) print("LDMSD v4 config files generated") sys.exit(0) - # Replace existing configuration if etcd cluster specified - if args.cluster: - rc = cluster.save_config() - if rc: - print("Error saving ldms cluster configuration to etcd cluster.") - sys.exit(0) - print("LDMS cluster configuration saved to etcd cluster.") - - if not args.cluster and not args.prefix and not args.local and not args.generate_config_path: + if not args.generate_config_path and not args.ldms_config and not args.daemon_name: print(f'No action detected. Exiting...') sys.exit(0) diff --git a/ldms/python/ldmsd/parser_util.py b/ldms/python/ldmsd/parser_util.py index 50768d6a6..fe74b9908 100644 --- a/ldms/python/ldmsd/parser_util.py +++ b/ldms/python/ldmsd/parser_util.py @@ -1,3 +1,10 @@ +import os +import errno +import json +import subprocess +import socket +import time +import itertools as it import collections import ldmsd.hostlist as hostlist @@ -10,6 +17,7 @@ 'daemons', 'aggregators', 'samplers', + 'plugins', 'stores' ] @@ -80,12 +88,12 @@ def check_intrvl_str(interval_s): if interval_s.split(unit)[1] != '': raise ValueError(f"{error_str}") ival_s = interval_s.split(unit)[0] + try: + ival_s = float(ival_s) * unit_strs[unit] + except Exception as e: + raise ValueError(f"{interval_s} is not a valid time-interval string") else: ival_s = interval_s - try: - ival_s = float(ival_s) * unit_strs[unit] - except Exception as e: - raise ValueError(f"{interval_s} is not a valid time-interval string") return int(ival_s) def check_opt(attr, spec): @@ -112,20 +120,6 @@ def check_required(attr_list, container, container_name): raise ValueError("The '{0}' attribute is required in a {1}". format(name, container_name)) -def fmt_cmd_args(comm, cmd, spec): - cfg_args = {} - cmd_attr_list = comm.get_cmd_attr_list(cmd) - for key in spec: - if key in cmd_attr_list['req'] or key in cmd_attr_list['opt']: - if key == 'plugin': - cfg_args[key] = spec[key]['name'] - continue - cfg_args[key] = spec[key] - if not all(key in spec for key in cmd_attr_list['req']): - print(f'The attribute(s) {set(cmd_attr_list["req"]) - spec.keys()} are required by {cmd}') - raise ValueError() - return cfg_args - def NUM_STR(obj): return str(obj) if type(obj) in [ int, float ] else obj @@ -174,3 +168,735 @@ def parse_yaml_bool(bool_): return True else: return False + +class YamlCfg(object): + def build_daemons(self, config): + """Generate a daemon spec list from YAML config + + Builds a dictionary of endpoint definitions. The 'config' is a + list of daemon specifications. Each daemon specification contains + 'names', 'host', and 'endpoint' attributes. All attributes are + expanded per the slurm hostlist rules. The length of the + expanded name-list, must equal the length of the expanded + host-list. + + Example: + + daemons: + - names : "agg-[1-10]" + hosts : "node[1-10]" + endpoints : + - names : "node-[1-10]-[10002] + ports : "[10002]" + maestro_comm : True + xprt : sock + auth : + name : munge + plugin : munge + + results in the following host-spec dictionary: + + { + "agg-[[1-3]-[10002]" : { + "agg-1" : { + "endpoints": { + "node-1-10002" : { "host" : "node-1", "port" : 10002 }, + "node-2-10002" : { "host" : "node-2", "port" : 10002 }, + "node-3-10002" : { "host" : "node-3", "port" : 10002 }, + ... + } + } + } + + """ + ep_dict = {} + node_config = config['daemons'] + if type(node_config) is not list: + print(f'{LDMS_YAML_ERR}') + print(f'daemons {LIST_ERR}') + print(f'e.g. daemons:') + print(f' - names : &l1-agg "l1-aggs-[1-8]"') + print(f' hosts : &l1-agg-hosts "node-[1-8]"') + sys.exit() + for spec in node_config: + check_required([ 'names', 'endpoints', 'hosts' ], + spec, '"daemons" entry') + hosts = expand_names(spec['hosts']) + dnames = expand_names(spec['names']) + hostnames = hosts + if len(dnames) != len(hostnames): + hosts = [ [host]*(len(dnames)//len(hostnames)) for host in hostnames ] + hosts = list(it.chain.from_iterable(hosts)) + ep_names = [] + ep_ports = [] + if type(spec['endpoints']) is not list: + print(f'{LDMS_YAML_ERR}') + print(f'endpoints {LIST_ERR}') + print(f'e.g endpoints :') + print(f' - names : &l1-agg-endpoints "node-[1-8]-[10101]"') + print(f' ports : &agg-ports "[10101]"') + print(f' maestro_comm : True') + print(f' xprt : sock') + print(f' auth :') + print(f' name : munge1') + print(f' plugin : munge') + sys.exit() + for endpoints in spec['endpoints']: + check_required(['names','ports'], + endpoints, '"endpoints" entry') + cur_epnames = expand_names(endpoints['names']) + ep_names.append(cur_epnames) + cur_ports = expand_names(endpoints['ports']) + _ports = cur_ports + if len(cur_ports) != len(cur_epnames): + cur_ports = [ _ports for i in range(0, len(cur_epnames)//len(_ports)) ] + cur_ports = list(it.chain.from_iterable(cur_ports)) + ep_ports.append(cur_ports) + ep_dict[spec['names']] = {} + env = check_opt('environment', spec) + for dname, host in zip(dnames, hosts): + ep_dict[spec['names']][dname] = {} + ep_dict[spec['names']][dname]['addr'] = host + ep_dict[spec['names']][dname]['environment'] = env + ep_dict[spec['names']][dname]['endpoints'] = {} + for ep_, ep_port, ep in zip(ep_names, ep_ports, spec['endpoints']): + port = ep_port.pop(0) + ep_name = ep_.pop(0) + xprt = check_opt('xprt', ep) + auth_name = check_opt('auth', ep) + auth_conf = check_opt('conf', ep) + plugin = check_opt('plugin', ep['auth']) + maestro_comm = parse_yaml_bool(check_opt('maestro_comm', ep)) + h = { + 'name' : ep_name, + 'port' : port, + 'xprt' : xprt, + 'maestro_comm' : maestro_comm, + 'auth' : { 'name' : auth_name, 'conf' : auth_conf, 'plugin' : plugin } + } + ep_dict[spec['names']][dname]['endpoints'][ep_name] = h + ep_dict[spec['names']][dname]['addr'] = host + if len(ep_dict[spec['names']]) == 0: + print(f'Error processing regex of hostnames {spec["hosts"]} and daemons {spec["names"]}.'\ + f'Number of hosts must be a multiple of daemons with appropriate ports or equivalent to length of daemons.\n'\ + f'Regex {spec["hosts"]} translates to {len(hostnames)} hosts\n'\ + f'Regex {spec["names"]} translates to {len(dnames)} daemons\n') + sys.exit() + return ep_dict + + def build_aggregators(self, config): + aggregators = {} + if 'aggregators' not in config: + return aggregators + agg_conf = config['aggregators'] + if type(agg_conf) is not list: + print(f'{LDMS_YAML_ERR}') + print(f'aggregators {LIST_ERR}') + print(f'e.g. aggregators:') + print(f' - daemons: "l1-aggregators"') + print(f' peers :') + print(f' - daemons : "samplers"') + print(f' ... : ...') + return aggregators + for agg_spec in agg_conf: + check_required([ 'daemons' ], + agg_spec, '"aggregators" entry') + names = expand_names(agg_spec['daemons']) + group = agg_spec['daemons'] + plugins = check_opt('plugins', agg_spec) + if plugins: + if plugins is not list: + print(f'Error: "plugins" must be a list of plugin instance names"\n') + for plugin in plugins: + check_plugin_config(plugin, self.plugins) + daemons_ = None + for daemons in config['daemons']: + if group == daemons['names']: + daemons_ = daemons + if daemons_ is None: + raise ValueError(f"No daemons matched matched daemon key {group}") + if group not in aggregators: + aggregators[group] = {} + subscribe = check_opt('subscribe', agg_spec) + if subscribe: + for stream in subscribe: + check_required([ 'stream', 'regex' ], stream, "stream specification") + for name in names: + aggregators[group][name] = { 'state' : 'stopped' } # 'running', 'error' + if subscribe: + aggregators[group][name]['subscribe'] = subscribe + if plugins: + aggregators[group][name]['plugins'] = plugins + return aggregators + + def build_producers(self, config): + """ + Return a dictionary keyed by the group name. Each dictionary + entry is a list of producers in that group. + """ + producers = {} + for agg in config.get('aggregators', []): + if 'peers' not in agg: + continue + if type(agg['peers']) is not list: + print(f'{LDMS_YAML_ERR}') + print(f'peers {LIST_ERR}') + print(f'e.g. peers:') + print(f' - daemons: "samplers"') + print(f' endpoints : "sampler-endpoints"') + print(f' ... : ...') + continue + for prod in agg['peers']: + check_required([ 'endpoints', 'updaters', + 'reconnect', 'type', ], + prod, '"peers" entry') + # Use endpoints for producer names and remove names attribute? + if prod['daemons'] not in self.daemons: + dmn_grps = prod['daemons'].split(',') + eps = prod['endpoints'].split(',') + else: + dmn_grps = [ prod['daemons'] ] + eps = [ prod['endpoints'] ] + for daemons, endpoints in zip(dmn_grps, eps): + names = expand_names(endpoints) + endpoints = expand_names(endpoints) + group = agg['daemons'] + smplr_dmns = expand_names(daemons) + if group not in producers: + producers[group] = {} + + upd_spec = prod['updaters'] + # Expand and generate all the producers + typ = prod['type'] + reconnect = check_intrvl_str(prod['reconnect']) + ports_per_dmn = len(endpoints) / len(smplr_dmns) + ppd = ports_per_dmn + try: + for name in names: + if ppd > 1: + smplr_dmn = smplr_dmns[0] + ppd -= 1 + else: + smplr_dmn = smplr_dmns.pop(0) + ppd = ports_per_dmn + endpoint = endpoints.pop(0) + prod = { + 'daemon' : smplr_dmn, + 'dmn_grp' : daemons, + 'name' : name, + 'endpoint' : endpoint, + 'type' : typ, + 'group' : group, + 'reconnect' : reconnect, + 'updaters' : upd_spec + } + producers[group][endpoint] = prod + except: + print(f'Error building producer config:\n'\ + f'Please ensure "endpoints" is configured to the correct number of ports specified.') + return producers + + def build_updaters(self, config): + """ + Return a dictionary based on the aggregator. Each dictionary + entry is a list of updaters in that group. + """ + updaters = {} + updtr_cnt = 0 + for agg in config.get('aggregators', []): + if 'peers' not in agg: + continue + for prod in agg['peers']: + if type(prod['updaters']) is not list: + print(f'Error parsing ldms_config yaml file') + print(f'Updater spec must be a list of dictionaries, specified with "-" designator in the ldms_config yaml file') + print(f'e.g. updaters:') + print(f' - mode : pull') + print(f' interval : "1.0s"') + print(f' sets :') + print(f' - regex : ".*"') + print(f' field : inst') + continue + for updtr_spec in prod['updaters']: + check_required([ 'interval', 'sets', ], + updtr_spec, '"updaters" entry') + group = agg['daemons'] + if group not in updaters: + updaters[group] = {} + grp_updaters = updaters[group] + updtr_name = f'updtr_{updtr_cnt}' + if updtr_name in grp_updaters: + raise ValueError(f"Duplicate updater name '{updtr_name}''. "\ + f"An updater name must be unique within the group") + updtr = { + 'name' : updtr_name, + 'interval' : check_intrvl_str(updtr_spec['interval']), + 'group' : agg['daemons'], + 'sets' : updtr_spec['sets'], + 'producers' : [{ 'regex' : '.*' }] + } + if 'offset' in updtr_spec: + updtr['offset'] = check_intrvl_str(updtr_spec['offset']) + if 'mode' in updtr_spec: + updtr['mode'] = updtr_spec['mode'] + else: + updtr['mode'] = 'pull' + grp_updaters[updtr_name] = updtr + updtr_cnt += 1 + return updaters + + def build_stores(self, config): + """ + Return a dictionary keyed by the group name. Each dictionary + entry is a list of stores in that group. + """ + if 'stores' not in config: + return None + stores = {} + if type(config['stores']) is not dict: + print(f'{LDMS_YAML_ERR}') + print(f'store {DICT_ERR}') + print(f'e.g. stores:') + print(f' sos-meminfo :') + print(f' daemons : "l1-aggregators"') + print(f' container : ldms_data') + print(f' ... : ...') + return None + for store in config['stores']: + store_spec = config['stores'][store] + store_spec['name'] = store + check_required([ 'plugin', 'container' ], + store_spec, '"store" entry') + decomp = check_opt('decomp', store_spec) + decomposition = check_opt('decomposition', store_spec) + if not decomp and not decomposition: + check_required(['schema'], store_spec, '"store" entry') + schema = check_opt('schema', store_spec) + regex = check_opt('regex', store_spec) + if decomp and not schema and not regex: + raise ValueError("Decomposition plugin configuration requires either" + " 'schema' or 'regex' attribute'") + group = store_spec['daemons'] + if group not in stores: + stores[group] = {} + grp_stores = stores[group] + if store in grp_stores: + raise ValueError(f"Duplicate store name '{store}'. " + "A store name must be unique within the group") + check_opt('flush', store_spec) + check_plugin_config(store_spec['plugin'], self.plugins) + grp_stores[store] = store_spec + return stores + + def build_samplers(self, config): + """ + Generate samplers from YAML config. + Return a dictionary keyed by the samplers group name. Each dictionary + entry is a single ldms daemon's sampler configuration. + """ + if 'samplers' not in config: + return None + smplrs = {} + if type(config['samplers']) is not list: + print(f'{LDMS_YAML_ERR}') + print(f'samplers {LIST_ERR}') + print(f'e.g. samplers:') + print(f' - daemons : "samplers"') + print(f' plugins :') + print(f' - name : meminfo') + print(f' interval : "1.0s"') + print(f' offset : "0s"') + print(f' config :') + print(f' - schema : meminfo') + print(f' component_id : "10001"') + print(f' producer : "node-1"') + print(f' perm : "0777"') + return None + for smplr_spec in config['samplers']: + check_required([ 'daemons', 'plugins' ], + smplr_spec, '"sampler" entry') + for plugin in smplr_spec['plugins']: + check_plugin_config(plugin, self.plugins) + smplrs[smplr_spec['daemons']] = smplr_spec + return smplrs + + def build_plugins(self, config): + """ + Generate plugins to load from a YAML config. + Return a dictionary keyed by the plugin's group name. Each dictionary entry + is a single plugin's configuration. + """ + if 'plugins' not in config: + return None + if type(config['plugins']) is not dict: + print(f'{LDMS_YAML_ERR}') + print(f'store {DICT_ERR}') + print(f'e.g. plugins:') + print(f' meminfo1 :') + print(f' name : meminfo') + print(f' interval : 1.0s') + print(f' config : [ { schema : meminfo }, { ... : ... } ]') + + plugins = {} + plugn_spec = config['plugins'] + for plugn in plugn_spec: + if plugn in plugins: + raise ValueError(f'Duplicate plugin name "{plugin_name}". ' + f'Plugin must be unique within a group.') + check_plugin_config(plugn, plugn_spec) + plugins[plugn] = plugn_spec[plugn] + return plugins + + def __init__(self, client, name, cluster_config, args): + """ + Build configuration groups out of the YAML configuration + """ + self.client = client + self.name = name + self.args = args + self.cluster_config = cluster_config + self.daemons = self.build_daemons(cluster_config) + self.plugins = self.build_plugins(cluster_config) + self.aggregators = self.build_aggregators(cluster_config) + self.producers = self.build_producers(cluster_config) + self.updaters = self.build_updaters(cluster_config) + self.stores = self.build_stores(cluster_config) + self.samplers = self.build_samplers(cluster_config) + + def ldmsd_arg_list(self, local_path, dmn_grp, dmn): + start_list = [ 'ldmsd' ] + for ep in self.daemons[dmn_grp][dmn]['endpoints']: + if self.daemons[dmn_grp][dmn]['endpoints'][ep]['maestro_comm'] is True: + ep_ = self.daemons[dmn_grp][dmn]['endpoints'][ep] + start_list.append('-x') + start_list.append(f'{ep_["xprt"]}:{ep_["port"]}') + auth = check_opt('auth', ep_) + if auth: + auth_plugin = check_opt('plugin', ep_['auth']) + auth_opt = check_opt('conf', ep_) + start_list.append('-a') + start_list.append(auth_plugin) + if auth_opt: + if len(auth_opt.split('=')) < 2: + auth_opt = f'conf={auth_opt}' + start_list.append('-A') + start_list.append(auth_opt) + start_list.append('-c') + start_list.append(f'{local_path}/{dmn}.conf') + start_list.append('-r') + start_list.append(f'{local_path}/{dmn}.pid') + start_list.append('-l') + start_list.append(f'{local_path}/{dmn}.log') + start_list.append(f'-F') + return start_list + + def write_listeners(self, dstr, dmn_grp, dmn_name, auth_list={}): + for endp in self.daemons[dmn_grp][dmn_name]['endpoints']: + ep = self.daemons[dmn_grp][dmn_name]['endpoints'][endp] + auth, plugin, auth_opt = check_auth(ep) + if auth: + if auth not in auth_list: + auth_list[auth] = { 'conf' : auth_opt } + dstr += f'auth_add name={auth}' + dstr = self.write_opt_attr(dstr, 'plugin', plugin, endline=False) + dstr = self.write_opt_attr(dstr, 'conf', auth_opt) + dstr += f'listen xprt={ep["xprt"]} port={ep["port"]}' + dstr = self.write_opt_attr(dstr, 'auth', auth, endline=False) + dstr = self.write_opt_attr(dstr, 'conf', auth_opt) + return dstr, auth_list + + def write_opt_attr(self, dstr, attr, val, endline=True): + # Include leading space + if val is not None: + dstr += f' {attr}={val}' + if endline: + dstr += f'\n' + return dstr + + def write_producers(self, dstr, group_name, dmn, auth_list): + if group_name in self.producers: + ''' Balance samplers across aggregators ''' + ppd = -(len(self.producers[group_name]) // -len(self.aggregators[group_name].keys())) + rem = len(self.producers[group_name]) % len(self.aggregators[group_name].keys()) + prdcrs = list(self.producers[group_name].keys()) + aggs = list(self.daemons[group_name].keys()) + agg_idx = int(aggs.index(dmn)) + prdcr_idx = int(ppd * agg_idx) + prod_group = prdcrs[prdcr_idx:prdcr_idx+ppd] + i = 0 + auth = None + for ep in prod_group: + producer = self.producers[group_name][ep] + auth = check_opt('auth', self.daemons[producer['dmn_grp']][producer['daemon']]['endpoints'][ep]) + auth_opt = check_opt('conf', self.daemons[producer['dmn_grp']][producer['daemon']]['endpoints'][ep]) + if auth not in auth_list: + auth_list[auth] = { 'conf' : auth_opt } + plugin = check_opt('plugin', self.daemons[producer['dmn_grp']][producer['daemon']]['endpoints'][ep]['auth']) + if plugin is None: + print(f'Please specify auth plugin type for producer "{producer["daemon"]}" with auth name "{auth}"\n'\ + 'configuration file generation will continue, but auth will likely be denied.\n') + plugin = auth + dstr += f'auth_add name={auth} plugin={plugin}' + dstr = self.write_opt_attr(dstr, 'conf', auth_list[auth]['conf']) + for ep in prod_group: + regex = False + producer = self.producers[group_name][ep] + pname = producer['name'] + port = self.daemons[producer['dmn_grp']][producer['daemon']]['endpoints'][ep]['port'] + xprt = self.daemons[producer['dmn_grp']][producer['daemon']]['endpoints'][ep]['xprt'] + hostname = self.daemons[producer['dmn_grp']][producer['daemon']]['addr'] + auth = check_opt('auth', self.daemons[producer['dmn_grp']][producer['daemon']]['endpoints'][ep]) + ptype = producer['type'] + reconnect = producer['reconnect'] + dstr += f'prdcr_add name={pname} '\ + f'host={hostname} '\ + f'port={port} '\ + f'xprt={xprt} '\ + f'type={ptype} '\ + f'reconnect={reconnect}' + dstr = self.write_opt_attr(dstr, 'auth', auth) + last_sampler = pname + if 'regex' in producer: + regex = True + dstr += f'prdcr_start_regex regex={producer["regex"]}\n' + if not regex: + dstr += f'prdcr_start_regex regex=.*\n' + return dstr, auth_list + + def write_env(self, dstr, grp, dname): + if grp not in self.daemons: + return 1 + if dname not in self.daemons[grp]: + return 1 + if check_opt('environment', self.daemons[grp][dname]): + if type(self.daemons[grp][dname]['environment']) is not dict: + print(f'Error: Environment variables must be a yaml key:value dictionary\n') + sys.exit() + for attr in self.daemons[grp][dname]['environment']: + dstr += f'env {attr}={self.daemons[grp][dname]["environment"]}\n' + return dstr + + def write_sampler(self, dstr, smplr_grp, sname): + if smplr_grp not in self.samplers: + return dstr + dstr = self.write_env(dstr, smplr_grp, sname) + dstr, auth_list = self.write_listeners(dstr, smplr_grp, sname) + for plugin in self.samplers[smplr_grp]['plugins']: + plugn = self.plugins[plugin] + dstr += f'load name={plugn["name"]}\n' + for cfg_ in plugn['config']: + if type(cfg_) is dict: + hostname = socket.gethostname() + cfg_args = {} + prod = check_opt('producer', cfg_) + inst = check_opt('instance', cfg_) + if not prod: + cfg_args['producer'] = f'{hostname}' + if not inst: + cfg_args['instance'] = f'{hostname}/{plugn["name"]}' + for attr in cfg_: + if attr == 'name' or attr == 'interval': + continue + cfg_args[attr] = cfg_[attr] + cfg_str = parse_to_cfg_str(cfg_args) + else: + cfg_str = cfg_ + + interval = check_intrvl_str(plugn['interval']) + dstr += f'config name={plugn["name"]} {cfg_str}\n' + dstr += f'start name={plugn["name"]} interval={interval}' + offset = check_opt('offset', plugn) + dstr = self.write_opt_attr(dstr, 'offset', offset) + return dstr + + def write_samplers(self, dstr, smplr_group): + for inst_name in self.samplers[smplr_group]['plugins']: + plugin = self.plugins[inst_name] + sname = plugin['name'] + dstr += f'load name={sname}\n' + for cfg_ in plugin['config']: + if type(cfg_) is dict: + hostname = socket.gethostname() + if args.local: + cfg_args = { 'producer' : f'{hostname}', + 'instance' : f'{hostname}/{plugin["name"]}', + 'component_id' : '${LDMS_COMPONENT_ID}' } + else: + cfg_args = {} + prod = check_opt('producer', cfg_) + inst = check_opt('instance', cfg_) + if not prod: + cfg_args['producer'] = '{hostname}' + if not inst: + cfg_args['instance'] = '{hostname}/{plugin["name"]}' + for attr in cfg_: + if attr == 'name' or attr == 'interval': + continue + cfg_args[attr] = cfg_[attr] + cfg_str = parse_to_cfg_str(cfg_args) + else: + cfg_str = cfg_ + + interval = check_intrvl_str(plugin['interval']) + dstr += f'config name={sname} {cfg_str}\n' + dstr += f'start name={sname} interval={interval}' + offset = check_opt('offset', plugin) + dstr = self.write_opt_attr(dstr, 'offset', offset) + return dstr + + def write_stream_subscribe(self, dstr, group_name, agg): + subscribe = check_opt('subscribe', self.aggregators[group_name][agg]) + if subscribe: + for stream in subscribe: + regex = check_opt('regex', stream) + if regex is None: + regex = '.*' + dstr += f'prdcr_subscribe stream={stream["stream"]} '\ + f'regex={regex}\n' + return dstr + + def write_aggregator(self, dstr, group_name, dmn): + # Agg config + try: + ''' "Balance" agg configuration if all samplers are included in each aggregator ''' + if group_name not in self.aggregators: + return dstr + auth_list = {} + dstr, auth_list = self.write_listeners(dstr, group_name, dmn, auth_list) + dstr, auth_list = self.write_producers(dstr, group_name, dmn, auth_list) + dstr = self.write_stream_subscribe(dstr, group_name, dmn) + dstr = self.write_agg_plugins(dstr, group_name, dmn) + dstr = self.write_updaters(dstr, group_name) + dstr = self.write_stores(dstr, group_name) + return dstr + except Exception as e: + ea, eb, ec = sys.exc_info() + print('Agg config Error: '+str(e)+' Line:'+str(ec.tb_lineno)) + raise ValueError + + def write_agg_plugins(self, dstr, group_name, agg): + # Write independent plugin configuration for group + plugins = check_opt('plugins', self.aggregators[group_name][agg]) + if plugins is not None: + for plugn in plugins: + plugin = self.plugins[plugn] + dstr += f'load name={plugin["name"]}\n' + for cfg_ in plugin["config"]: + if type(cfg_) is dict: + cfg_str = parse_to_cfg_str(plugin["config"]) + else: + cfg_str = cfg_ + dstr += f'config name={plugin["name"]} {cfg_str}\n' + return dstr + + def write_updaters(self, dstr, group_name): + if group_name in self.updaters: + updtr_group = self.updaters[group_name] + for updtr in updtr_group: + interval = check_intrvl_str(updtr_group[updtr]['interval']) + updtr_str = f'updtr_add name={updtr_group[updtr]["name"]}' + if 'mode' in updtr_group[updtr]: + mode = updtr_group[updtr]['mode'] + else: + mode = 'pull' + # Check mode + if mode == 'push': + updtr_str = f'{updtr_str} push=True' + elif mode == 'onchange': + updtr_str = f'{updtr_str} push=onchange' + elif mode == 'auto_interval' or 'auto': + updtr_str = f'{updtr_str} auto_interval=True' + dstr += f'{updtr_str} '\ + f'interval={interval}' + offset = check_opt('offset', updtr_group[updtr]) + dstr = self.write_opt_attr(dstr, 'offset', offset) + for prod in updtr_group[updtr]['producers']: + dstr += f'updtr_prdcr_add name={updtr_group[updtr]["name"]} '\ + f'regex={prod["regex"]}\n' + dstr += f'updtr_start name={updtr_group[updtr]["name"]}\n' + return dstr + + def write_stores(self, dstr, group_name): + if group_name in self.stores: + store_group = self.stores[group_name] + loaded_plugins = [] + for store in store_group: + if store_group[store]['plugin'] not in loaded_plugins: + if store_group[store]['plugin'] not in self.plugins: + print(f'Error: Storage policy plugin reference {store_group[store]["plugin"]} '\ + f'is not defined in the top level "plugins" dictionary"\n' + f'Continuing...\n') + continue + plugin = self.plugins[store_group[store]['plugin']] + dstr += f'load name={plugin["name"]}\n' + for cfg_ in plugin['config']: + if type(cfg_) is dict: + cfg_str = parse_to_cfg_str(cfg_) + else: + cfg_str = cfg_ + dstr += f'config name={plugin["name"]} '\ + f'{cfg_str}\n' + loaded_plugins.append(store_group[store]['plugin']) + strgp_add = f'strgp_add name={store} plugin={plugin["name"]} ' + strgp_add += f'container={store_group[store]["container"]} ' + strgp_add += f'schema={store_group[store]["schema"]}' + dstr += strgp_add + flush = check_opt('flush', store_group[store]) + dstr = self.write_opt_attr(dstr, 'flush', flush) + dstr += f'strgp_start name={store}\n' + return dstr + + def daemon_config(self, path, dname): + """ + Write a specific daemon's V4 configuration to file. + """ + dmn = None + grp = None + for dmn_grp in self.daemons: + if dname in self.daemons[dmn_grp]: + dmn = self.daemons[dmn_grp][dname] + grp = dmn_grp + break + if dmn is None: + print(f'Error: {dname} does not exist in YAML configuration file {path}\n') + return 1 + dstr = '' + dstr = self.write_sampler(dstr, grp, dname) + dstr = self.write_aggregator(dstr, grp, dname) + return f'{dstr}\0' + + def config_v4(self, path): + """ + Read the group configuration from ETCD and generate a version 4 LDMSD configuration + This configuration assumes that the environemnt variables COMPONENT_ID, HOSTNAME + all exist on the machines relevant to the ldmsd cluster. + """ + for group_name in self.daemons: + # Sampler config + if self.samplers != None: + try: + # TO DO: Refactor sampler config architecture to more easily reference appropriate groups + if group_name in self.samplers: + fd = open(f'{path}/{group_name}-samplers.conf', 'w+') + dstr = '' + dstr = self.write_samplers(dstr, group_name) + for dmn_name in self.daemons[group_name]: + dstr, auth_list = self.write_listeners(dstr, group_name, dmn_name) + fd.write(dstr) + if fd: + fd.close() + except Exception as e: + a, b, d = sys.exc_info() + print(f'Error generating sampler configuration: {str(e)} {str(d.tb_lineno)}') + sys.exit() + else: + print(f'"samplers" not found in configuration file. Skipping...') + + # Write aggregators in daemon group + if group_name in self.aggregators: + for dmn in self.aggregators[group_name]: + fd = open(f'{path}/{dmn}.conf', 'w+') + dstr = '' + dstr = self.write_aggregator(dstr, group_name, dmn) + fd.write(dstr) + fd.close() diff --git a/ldms/src/ldmsd/ldmsd_config.c b/ldms/src/ldmsd/ldmsd_config.c index e07ac61a7..f41a172d0 100644 --- a/ldms/src/ldmsd/ldmsd_config.c +++ b/ldms/src/ldmsd/ldmsd_config.c @@ -1000,7 +1000,7 @@ char *__process_yaml_config_file(const char *path, const char *dname) return config_str; } -int __req_deferred_start_regex(ldmsd_req_hdr_t req, ldmsd_cfgobj_type_t type) +int __req_deferred_start_regex(ldmsd_req_ctxt_t reqc, ldmsd_cfgobj_type_t type) { regex_t regex = {0}; ldmsd_cfgobj_t obj; From 2b0c6585e5494dd29f50d09f5fe2411723f5e253 Mon Sep 17 00:00:00 2001 From: nick Date: Tue, 29 Oct 2024 16:11:42 -0500 Subject: [PATCH 3/3] Add advertiser and producer listener support to YAML Support for advertisers and producer listeners in YAML configuration file Support for multiple yaml files on configuration line Update error handling in parser_util.c Update error handling in __process_yaml_config_file --- ldms/python/ldmsd/parser_util.py | 182 ++++++++++++++++++++++++------- ldms/src/ldmsd/ldmsd.c | 47 ++++---- ldms/src/ldmsd/ldmsd_config.c | 72 +++++------- 3 files changed, 191 insertions(+), 110 deletions(-) diff --git a/ldms/python/ldmsd/parser_util.py b/ldms/python/ldmsd/parser_util.py index fe74b9908..1d15f248d 100644 --- a/ldms/python/ldmsd/parser_util.py +++ b/ldms/python/ldmsd/parser_util.py @@ -1,4 +1,4 @@ -import os +import os, sys import errno import json import subprocess @@ -81,17 +81,20 @@ def check_intrvl_str(interval_s): if type(interval_s) == int or type(interval_s) == float: return interval_s if type(interval_s) != str: - raise ValueError(f"{error_str}") + print(f"{error_str}") + sys.exit(22) interval_s = interval_s.lower() unit = next((unit for unit in unit_strs if unit in interval_s), None) if unit: if interval_s.split(unit)[1] != '': - raise ValueError(f"{error_str}") + print(f"{error_str}") + sys.exit(22) ival_s = interval_s.split(unit)[0] try: ival_s = float(ival_s) * unit_strs[unit] except Exception as e: - raise ValueError(f"{interval_s} is not a valid time-interval string") + print(f"{interval_s} is not a valid time-interval string") + sys.exit(22) else: ival_s = interval_s return int(ival_s) @@ -117,8 +120,9 @@ def check_required(attr_list, container, container_name): """Verify that each name in attr_list is in the container""" for name in attr_list: if name not in container: - raise ValueError("The '{0}' attribute is required in a {1}". - format(name, container_name)) + print(f"The '{0}' attribute is required in a {1}". + format(name, container_name)) + sys.exit(22) def NUM_STR(obj): return str(obj) if type(obj) in [ int, float ] else obj @@ -142,16 +146,19 @@ def check_auth(auth_spec): def check_plugin_config(plugn, plugin_spec): if plugn not in plugin_spec: - raise ValueError(f'Configuration for plugin instance "{plugn}"\n'\ - f'is not defined in the top level "plugins" dictionary"') + print(f'Configuration for plugin instance "{plugn}"\n'\ + f'is not defined in the top level "plugins" dictionary"') + sys.exit(22) plugin = plugin_spec[plugn] check_required([ 'name' ], plugin, f'"plugin" entry. Error in "'+ plugn +'" configuration') check_required(['config'], plugin, '"plugin" entry') if type(plugin['config']) is not list: - raise ValueError('"config" must be a list of configuration commands') + print('"config" must be a list of configuration commands') + sys.exit(22) for cfg in plugin['config']: if type(cfg) is not dict and type(cfg) is not str: - raise ValueError('"config" list members must be a dictionary or a string') + print('"config" list members must be a dictionary or a string') + sys.exit(22) return plugin def parse_to_cfg_str(cfg_obj): @@ -284,6 +291,57 @@ def build_daemons(self, config): sys.exit() return ep_dict + def build_advertisers(self, spec): + if 'advertise' not in spec: + return + ad_grp = spec['advertise'] + check_required(['names', 'hosts', 'xprt', 'port', 'reconnect'], + ad_grp, '"advertise" entry') + names = expand_names(ad_grp['names']) + dmns = expand_names(spec['daemons']) + if len(names) != len(dmns): + print(f'Please provide a regex for "names" that is equal to the number of daemons'\ + 'to advertise from\n') + sys.exit() + auth_name, plugin, auth_opt = check_auth(ad_grp) + perm = check_opt('perm', ad_grp) + rail = check_opt('rail', ad_grp) + credits = check_opt('credits', ad_grp) + rx_rate = check_opt('rx_rate', ad_grp) + ad_list = expand_names(spec['daemons']) + self.advertisers[spec['daemons']] = {'names' : ad_grp['names'], + 'hosts' : ad_grp['hosts'], + 'xprt' : ad_grp['xprt'], + 'port' : ad_grp['port'], + 'reconnect' : ad_grp['reconnect'], + 'auth' : { 'name' : auth_name, + 'conf' : auth_opt, + 'plugin' : plugin }, + 'perm' : perm, + 'rail' : rail, + 'credits' : credits, + 'rx_rate' : rx_rate, + 'ad_list' : ad_list + } + + def build_prdcr_listeners(self, spec): + if 'prdcr_listen' not in spec: + return + for pl in spec['prdcr_listen']: + check_required(['name'], pl, + '"prdcr_listen" entry') + node_listen = {} + regex = check_opt('regex', pl) + rail = check_opt('rail', pl) + credits = check_opt('credits', pl) + rx_rate = check_opt('rx_rate', pl) + node_listen[pl['name']] = { 'rail' : rail, + 'credits' : credits, + 'rx_rate' : rx_rate, + 'regex' : regex + } + self.prdcr_listeners[spec['daemons']] = node_listen + def build_aggregators(self, config): aggregators = {} if 'aggregators' not in config: @@ -291,13 +349,13 @@ def build_aggregators(self, config): agg_conf = config['aggregators'] if type(agg_conf) is not list: print(f'{LDMS_YAML_ERR}') - print(f'aggregators {LIST_ERR}') - print(f'e.g. aggregators:') - print(f' - daemons: "l1-aggregators"') - print(f' peers :') - print(f' - daemons : "samplers"') - print(f' ... : ...') - return aggregators + print(f' aggregators {LIST_ERR}\n') + print(f'e.g. aggregators:\n') + print(f' - daemons: "l1-aggregators"\n') + print(f' peers :\n') + print(f' - daemons : "samplers"\n') + print(f' ... : ...\n') + sys.exit(22) for agg_spec in agg_conf: check_required([ 'daemons' ], agg_spec, '"aggregators" entry') @@ -314,7 +372,8 @@ def build_aggregators(self, config): if group == daemons['names']: daemons_ = daemons if daemons_ is None: - raise ValueError(f"No daemons matched matched daemon key {group}") + print(f"No daemons matched matched daemon key {group}") + sys.exit(22) if group not in aggregators: aggregators[group] = {} subscribe = check_opt('subscribe', agg_spec) @@ -323,6 +382,8 @@ def build_aggregators(self, config): check_required([ 'stream', 'regex' ], stream, "stream specification") for name in names: aggregators[group][name] = { 'state' : 'stopped' } # 'running', 'error' + self.build_advertisers(agg_spec) + self.build_prdcr_listeners(agg_spec) if subscribe: aggregators[group][name]['subscribe'] = subscribe if plugins: @@ -345,7 +406,7 @@ def build_producers(self, config): print(f' - daemons: "samplers"') print(f' endpoints : "sampler-endpoints"') print(f' ... : ...') - continue + sys.exit(22) for prod in agg['peers']: check_required([ 'endpoints', 'updaters', 'reconnect', 'type', ], @@ -394,6 +455,7 @@ def build_producers(self, config): except: print(f'Error building producer config:\n'\ f'Please ensure "endpoints" is configured to the correct number of ports specified.') + sys.exit(22) return producers def build_updaters(self, config): @@ -416,7 +478,7 @@ def build_updaters(self, config): print(f' sets :') print(f' - regex : ".*"') print(f' field : inst') - continue + sys.exit(22) for updtr_spec in prod['updaters']: check_required([ 'interval', 'sets', ], updtr_spec, '"updaters" entry') @@ -426,8 +488,9 @@ def build_updaters(self, config): grp_updaters = updaters[group] updtr_name = f'updtr_{updtr_cnt}' if updtr_name in grp_updaters: - raise ValueError(f"Duplicate updater name '{updtr_name}''. "\ - f"An updater name must be unique within the group") + print(f"Duplicate updater name '{updtr_name}''. "\ + f"An updater name must be unique within the group") + sys.exit(22) updtr = { 'name' : updtr_name, 'interval' : check_intrvl_str(updtr_spec['interval']), @@ -461,7 +524,7 @@ def build_stores(self, config): print(f' daemons : "l1-aggregators"') print(f' container : ldms_data') print(f' ... : ...') - return None + sys.exit(22) for store in config['stores']: store_spec = config['stores'][store] store_spec['name'] = store @@ -474,15 +537,17 @@ def build_stores(self, config): schema = check_opt('schema', store_spec) regex = check_opt('regex', store_spec) if decomp and not schema and not regex: - raise ValueError("Decomposition plugin configuration requires either" - " 'schema' or 'regex' attribute'") + print("Decomposition plugin configuration requires either" + " 'schema' or 'regex' attribute'") + sys.exit(22) group = store_spec['daemons'] if group not in stores: stores[group] = {} grp_stores = stores[group] if store in grp_stores: - raise ValueError(f"Duplicate store name '{store}'. " - "A store name must be unique within the group") + print(f"Duplicate store name '{store}'. " + f"A store name must be unique within the group") + sys.exit(22) check_opt('flush', store_spec) check_plugin_config(store_spec['plugin'], self.plugins) grp_stores[store] = store_spec @@ -511,10 +576,11 @@ def build_samplers(self, config): print(f' component_id : "10001"') print(f' producer : "node-1"') print(f' perm : "0777"') - return None + sys.exit(22) for smplr_spec in config['samplers']: check_required([ 'daemons', 'plugins' ], smplr_spec, '"sampler" entry') + self.build_advertisers(smplr_spec) for plugin in smplr_spec['plugins']: check_plugin_config(plugin, self.plugins) smplrs[smplr_spec['daemons']] = smplr_spec @@ -536,13 +602,14 @@ def build_plugins(self, config): print(f' name : meminfo') print(f' interval : 1.0s') print(f' config : [ { schema : meminfo }, { ... : ... } ]') - + sys.exit(22) plugins = {} plugn_spec = config['plugins'] for plugn in plugn_spec: if plugn in plugins: - raise ValueError(f'Duplicate plugin name "{plugin_name}". ' - f'Plugin must be unique within a group.') + print(f'Duplicate plugin name "{plugin_name}". ' + f'Plugin must be unique within a group.') + sys.exit(22) check_plugin_config(plugn, plugn_spec) plugins[plugn] = plugn_spec[plugn] return plugins @@ -555,6 +622,8 @@ def __init__(self, client, name, cluster_config, args): self.name = name self.args = args self.cluster_config = cluster_config + self.advertisers = {} + self.prdcr_listeners = {} self.daemons = self.build_daemons(cluster_config) self.plugins = self.build_plugins(cluster_config) self.aggregators = self.build_aggregators(cluster_config) @@ -590,6 +659,38 @@ def ldmsd_arg_list(self, local_path, dmn_grp, dmn): start_list.append(f'-F') return start_list + def write_advertisers(self, dstr, dmn_grp, dname, auth_list): + if dmn_grp not in self.advertisers: + return dstr, auth_list + ad_grp = self.advertisers[dmn_grp] + for host in expand_names(self.advertisers[dmn_grp]['hosts']): + auth, plugin, auth_opt = check_auth(ad_grp) + if auth not in auth_list: + auth_list[auth] = { 'conf' : auth_opt } + dstr += f'auth_add name={auth}' + dstr = self.write_opt_attr(dstr, 'plugin', plugin, endline=False) + dstr = self.write_opt_attr(dstr, 'conf', auth_opt) + dstr += f'advertiser_add name={dname}-{host} host={host} xprt={ad_grp["xprt"]} port={ad_grp["port"]} '\ + f'reconnect={ad_grp["reconnect"]}' + perm = check_opt('perm', ad_grp) + dstr = self.write_opt_attr(dstr, 'auth', auth, endline=False) + dstr = self.write_opt_attr(dstr, 'perm', perm, endline=True) + dstr += f'advertiser_start name={dname}-{host}\n' + return dstr, auth_list + + def write_prdcr_listeners(self, dstr, dmn_grp): + if dmn_grp not in self.prdcr_listeners: + return dstr + plisten = self.prdcr_listeners[dmn_grp] + for pl in plisten: + dstr += f'prdcr_listen_add name={pl} reconnect={plisten[pl]["reconnect"]}' + dstart = check_opt('disable_start', plisten[pl]) + regex = check_opt('regex', plisten[pl]) + dstr = self.write_opt_attr(dstr, 'disable_start', dstart, endline=False) + dstr = self.write_opt_attr(dstr, 'regex', regex, endline=True) + dstr += f'prdcr_listen_start name={pl}\n' + return dstr + def write_listeners(self, dstr, dmn_grp, dmn_name, auth_list={}): for endp in self.daemons[dmn_grp][dmn_name]['endpoints']: ep = self.daemons[dmn_grp][dmn_name]['endpoints'][endp] @@ -633,8 +734,8 @@ def write_producers(self, dstr, group_name, dmn, auth_list): auth_list[auth] = { 'conf' : auth_opt } plugin = check_opt('plugin', self.daemons[producer['dmn_grp']][producer['daemon']]['endpoints'][ep]['auth']) if plugin is None: - print(f'Please specify auth plugin type for producer "{producer["daemon"]}" with auth name "{auth}"\n'\ - 'configuration file generation will continue, but auth will likely be denied.\n') + #print(f'Please specify auth plugin type for producer "{producer["daemon"]}" with auth name "{auth}"\n'\ + # 'configuration file generation will continue, but auth will likely be denied.\n') plugin = auth dstr += f'auth_add name={auth} plugin={plugin}' dstr = self.write_opt_attr(dstr, 'conf', auth_list[auth]['conf']) @@ -661,7 +762,7 @@ def write_producers(self, dstr, group_name, dmn, auth_list): dstr += f'prdcr_start_regex regex={producer["regex"]}\n' if not regex: dstr += f'prdcr_start_regex regex=.*\n' - return dstr, auth_list + return dstr, auth_list def write_env(self, dstr, grp, dname): if grp not in self.daemons: @@ -671,7 +772,7 @@ def write_env(self, dstr, grp, dname): if check_opt('environment', self.daemons[grp][dname]): if type(self.daemons[grp][dname]['environment']) is not dict: print(f'Error: Environment variables must be a yaml key:value dictionary\n') - sys.exit() + sys.exit(22) for attr in self.daemons[grp][dname]['environment']: dstr += f'env {attr}={self.daemons[grp][dname]["environment"]}\n' return dstr @@ -681,6 +782,7 @@ def write_sampler(self, dstr, smplr_grp, sname): return dstr dstr = self.write_env(dstr, smplr_grp, sname) dstr, auth_list = self.write_listeners(dstr, smplr_grp, sname) + dstr, auth_list = self.write_advertisers(dstr, smplr_grp, sname, auth_list) for plugin in self.samplers[smplr_grp]['plugins']: plugn = self.plugins[plugin] dstr += f'load name={plugn["name"]}\n' @@ -764,6 +866,7 @@ def write_aggregator(self, dstr, group_name, dmn): auth_list = {} dstr, auth_list = self.write_listeners(dstr, group_name, dmn, auth_list) dstr, auth_list = self.write_producers(dstr, group_name, dmn, auth_list) + dstr = self.write_prdcr_listeners(dstr, group_name) dstr = self.write_stream_subscribe(dstr, group_name, dmn) dstr = self.write_agg_plugins(dstr, group_name, dmn) dstr = self.write_updaters(dstr, group_name) @@ -772,7 +875,7 @@ def write_aggregator(self, dstr, group_name, dmn): except Exception as e: ea, eb, ec = sys.exc_info() print('Agg config Error: '+str(e)+' Line:'+str(ec.tb_lineno)) - raise ValueError + sys.exit(22) def write_agg_plugins(self, dstr, group_name, agg): # Write independent plugin configuration for group @@ -824,9 +927,8 @@ def write_stores(self, dstr, group_name): if store_group[store]['plugin'] not in loaded_plugins: if store_group[store]['plugin'] not in self.plugins: print(f'Error: Storage policy plugin reference {store_group[store]["plugin"]} '\ - f'is not defined in the top level "plugins" dictionary"\n' - f'Continuing...\n') - continue + f'is not defined in the top level "plugins" dictionary"\n') + sys.exit(22) plugin = self.plugins[store_group[store]['plugin']] dstr += f'load name={plugin["name"]}\n' for cfg_ in plugin['config']: @@ -859,7 +961,7 @@ def daemon_config(self, path, dname): break if dmn is None: print(f'Error: {dname} does not exist in YAML configuration file {path}\n') - return 1 + sys.exit(22) dstr = '' dstr = self.write_sampler(dstr, grp, dname) dstr = self.write_aggregator(dstr, grp, dname) diff --git a/ldms/src/ldmsd/ldmsd.c b/ldms/src/ldmsd/ldmsd.c index 6c02c0a7a..2494632b5 100644 --- a/ldms/src/ldmsd/ldmsd.c +++ b/ldms/src/ldmsd/ldmsd.c @@ -2292,21 +2292,17 @@ int main(int argc, char *argv[]) TAILQ_INIT(&yamlfile_list); TAILQ_INIT(&cfgfile_list); struct ldmsd_str_ent *cpath; - struct ldmsd_str_ent *ypath; - struct ldmsd_str_ent *config_str; + struct ldmsd_str_ent *conf_str; char *resp; while ((op = getopt_long(argc, argv, short_opts, long_opts, NULL)) != -1) { switch (op) { case 'y': - ypath = ldmsd_str_ent_new(optarg); - TAILQ_INSERT_TAIL(&yamlfile_list, ypath, entry); - while ((ypath = TAILQ_FIRST(&yamlfile_list))) { - resp = process_yaml_config_file(ypath->str, myname); - TAILQ_REMOVE(&yamlfile_list, ypath, entry); - ldmsd_str_ent_free(ypath); - } - config_str = ldmsd_str_ent_new(resp); - TAILQ_INSERT_TAIL(&yamlfile_list, config_str, entry); + resp = process_yaml_config_file(optarg, myname); + if (!resp) + cleanup(22, ""); + conf_str = ldmsd_str_ent_new(resp); + free(resp); + TAILQ_INSERT_TAIL(&yamlfile_list, conf_str, entry); break; case 'c': cpath = ldmsd_str_ent_new(optarg); @@ -2316,19 +2312,17 @@ int main(int argc, char *argv[]) } int lln; - while ((config_str = TAILQ_FIRST(&yamlfile_list))) { + TAILQ_FOREACH(conf_str, &yamlfile_list, entry) { lln = -1; - ret = process_config_str(config_str->str, &lln, 1); + ret = process_config_str(conf_str->str, &lln, 1); if (ret) { char errstr[128]; snprintf(errstr, sizeof(errstr), "Error %d processing configuration file '%s'", - ret, config_str->str); + ret, conf_str->str); ldmsd_str_list_destroy(&yamlfile_list); cleanup(ret, errstr); } - TAILQ_REMOVE(&yamlfile_list, config_str, entry); - ldmsd_str_ent_free(config_str); } while ((cpath = TAILQ_FIRST(&cfgfile_list))) { lln = -1; @@ -2525,15 +2519,20 @@ int main(int argc, char *argv[]) break; case 'y': has_config_file = 1; - ret = process_config_str(resp, &lln, 1); - if (ret) { - char errstr[128]; - snprintf(errstr, sizeof(errstr), - "Error %d processing configuration string '%s'", - ret, resp); - cleanup(ret, errstr); + while ((conf_str = TAILQ_FIRST(&yamlfile_list))) { + lln = -1; + ret = process_config_str(conf_str->str, &lln, 1); + if (ret) { + char errstr[128]; + snprintf(errstr, sizeof(errstr), + "Error %d processing configuration file '%s'", + ret, conf_str->str); + ldmsd_str_list_destroy(&yamlfile_list); + cleanup(ret, errstr); + } + TAILQ_REMOVE(&yamlfile_list, conf_str, entry); + ldmsd_str_ent_free(conf_str); } - free(resp); break; } } diff --git a/ldms/src/ldmsd/ldmsd_config.c b/ldms/src/ldmsd/ldmsd_config.c index f41a172d0..4e971ef0a 100644 --- a/ldms/src/ldmsd/ldmsd_config.c +++ b/ldms/src/ldmsd/ldmsd_config.c @@ -773,10 +773,8 @@ int __process_config_file(const char *path, int *lno, int trust, static int __process_config_str(char *cfg_str, int *lno, int trust, - int (*req_filter)(ldmsd_cfg_xprt_t, ldmsd_req_hdr_t, void *), - void *ctxt) + req_filter_fn req_filter, void *ctxt) { - static uint32_t msg_no = 0; int rc = 0; int lineno = 0; char *buff = NULL; @@ -800,7 +798,6 @@ int __process_config_str(char *cfg_str, int *lno, int trust, } line_sz = LDMSD_CFG_FILE_XPRT_MAX_REC; xprt.type = LDMSD_CFG_TYPE_FILE; - xprt.file.cfgfile_id = __get_cfgfile_id(); xprt.send_fn = log_response_fn; xprt.max_msg = LDMSD_CFG_FILE_XPRT_MAX_REC; xprt.trust = trust; @@ -887,11 +884,11 @@ int __process_config_str(char *cfg_str, int *lno, int trust, } } - req_array = ldmsd_parse_config_str(line, msg_no, xprt.max_msg, ldmsd_log); + req_array = ldmsd_parse_config_str(line, lineno, xprt.max_msg, ldmsd_log); if (!req_array) { rc = errno; - ldmsd_log(LDMSD_LERROR, "Process config string error at line %d " - "(%s). %s\n", lineno, cfg_str, STRERROR(rc)); + ldmsd_log(LDMSD_LERROR, "Process config string error in line\n %s\n " + ". %s\n", line, STRERROR(rc)); goto cleanup; } @@ -920,27 +917,7 @@ int __process_config_str(char *cfg_str, int *lno, int trust, if (xprt.max_msg < ntohl(request->rec_len)) xprt.max_msg = ntohl(request->rec_len); - if (req_filter) { - rc = req_filter(&xprt, request, ctxt); - /* rc = 0, filter OK */ - if (rc == 0) { - __dlog(DLOG_CFGOK, "# deferring line %d (%s): %s\n", - lineno, cfg_str, line); - goto next_req; - } - /* rc == errno */ - if (rc > 0) { - ldmsd_log(LDMSD_LERROR, - "Configuration error at " - "line %d (%s)\n", lineno, cfg_str); - goto cleanup; - } else { - /* rc < 0, filter not applied */ - rc = 0; - } - } - - rc = ldmsd_process_config_request(&xprt, request); + rc = ldmsd_process_config_request(&xprt, request, req_filter, ctxt); if (rc || xprt.rsp_err) { if (!rc) rc = xprt.rsp_err; @@ -951,13 +928,10 @@ int __process_config_str(char *cfg_str, int *lno, int trust, next_req: free(request); request = NULL; - msg_no += 1; off = 0; goto next_line; cleanup: - if (cfg_str) - free(cfg_str); if (buff) free(buff); if (line) @@ -975,29 +949,34 @@ char *__process_yaml_config_file(const char *path, const char *dname) FILE *fp; char command[256]; char cstr[256]; - char *cfg_str = malloc(256); + char *cfg_str = malloc(512); + *cfg_str = '\0'; snprintf(command, sizeof(command), "ldmsd_yaml_parser --ldms_config %s --daemon_name %s 2>&1", path, dname); fp = popen(command, "r"); if (!fp) printf("Error in yaml_parser\n"); - int lineno = 0; size_t char_cnt = 0; + int status; while (fgets(cstr, sizeof(cstr), fp) != NULL) { - printf("%s", cstr); - char_cnt += sizeof(cstr); - if (char_cnt >= 1024) - cfg_str = (char *)realloc(cfg_str, char_cnt - 256); - if (lineno) - strcat(cfg_str, cstr); - else - snprintf(cfg_str, sizeof(cstr), cstr); - lineno++; + char_cnt += strlen(cstr); + if (char_cnt > 512) { + cfg_str = (char *)realloc(cfg_str, char_cnt+1); + if (cfg_str == NULL) { + goto err; + } + } + strcat(cfg_str, cstr); } - pclose(fp); - char *config_str = strdup(cfg_str); - if (cfg_str) + status = pclose(fp); + if (status) + goto err; + return cfg_str; +err: + if (cfg_str) { + printf("ERROR: %s", cfg_str); free(cfg_str); - return config_str; + } + return NULL; } int __req_deferred_start_regex(ldmsd_req_ctxt_t reqc, ldmsd_cfgobj_type_t type) @@ -1234,6 +1213,7 @@ int process_config_str(char *config_str, int *lno, int trust) char *cfg_str = strdup(config_str); rc = __process_config_str(cfg_str, lno, trust, __req_filter_failover, &ldmsd_use_failover); + free(cfg_str); return rc; }