Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Netlink add jobid 4.4.x #1385

Merged
merged 3 commits into from
Apr 18, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
133 changes: 133 additions & 0 deletions ldms/scripts/examples/linux_proc_sampler.job
Original file line number Diff line number Diff line change
@@ -0,0 +1,133 @@
export plugname=linux_proc_sampler
export dsname=$(ldms_dstat_schema_name mmalloc=1 io=1 fd=1 stat=1 auto-schema=1)
export dstat_schema=$dsname
export LDMSD_LOG_LEVEL=ERROR
export LDMSD_LOG_TIME_SEC=1
export LDMSD_EXTRA="-m 128m"

function SUSLEEP () {
if test "$bypass" = "1"; then
echo skipping sleep
return 0
fi
echo -n sleep $1 ...
runuser -u $USER sleep $1
echo done
}

portbase=61060
cat << EOF > $LDMSD_RUN/exclude_env
^COLORTERM
^DBU.*
^DESKTOP_SESSION
^DISPLAY
^GDM.*
^GNO.*
^GUESTFISH.*
^XDG.*
^LS_COLORS
^SESSION_MANAGER
^SSH.*
^XAU.*
^BASH_FUNC_m
"
EOF
ldms-gen-syscall-map > $LDMSD_RUN/syscalls.map
cat << EOF > $LDMSD_RUN/metrics.input
{ "stream" : "slurm",
"argv_sep":"\t",
"syscalls": "${LDMSD_RUN}/syscalls.map",
"argv_msg": 1,
"log_send": 1,
"env_msg": 1,
"env_exclude": "${LDMSD_RUN}/exclude_env",
"fd_msg": 1,
"fd_exclude": [
"^/dev/",
"^/run/",
"^/var/",
"^/etc/",
"^/sys/",
"^/tmp/",
"^/proc/",
"^/proc$",
"^/ram/tmp/",
"^/usr/lib",
"^/usr/share/",
"^/opt/ness",
"^/ram/opt/ness",
"^/ram/var/",
"/.nfs0"
],
"published_pid_dir" : "${LDMSD_RUN}/ldms-netlink-tracked",
"metrics" : [
"status_real_user",
"status_eff_user",
"status_real_group",
"status_eff_group",
"stat_pid" ,
"stat_state",
"stat_rss",
"stat_utime",
"stat_stime",
"io_read_b",
"io_write_b",
"syscall_name"
]
}
EOF
rm -f $LOGDIR/json*.log
for pi in $(seq 80 100); do
touch ${LDMSD_RUN}/ldms-netlink-tracked/$pi
done
/bin/rm $LOGDIR/nl.log

JOBDATA $TESTDIR/job.data 1

drd="valgrind -v --tool=drd --log-file=$LOGDIR/vg.netlink.drd.txt --trace-cond=yes --trace-fork-join=yes"
memcheck="valgrind -v --leak-check=full --track-origins=yes --trace-children=yes --log-file=$LOGDIR/vg.netlink.memcheck.txt --keep-debuginfo=yes --malloc-fill=3b"
#${BUILDDIR}/sbin/ldms-netlink-notifier --port=61061 --auth=none --reconnect=1 -D 30 -r -j $LOGDIR/json.log --exclude-dir-path= --exclude-short-path= --exclude-programs --track-dir=${LDMSD_RUN}/ldms-netlink-tracked --purge-track-dir &

${BUILDDIR}/sbin/ldms-netlink-notifier --port=61061 --auth=none --reconnect=1 -D 30 -r -j $LOGDIR/json.log --exclude-dir-path= --exclude-short-path= --exclude-programs --track-dir=${LDMSD_RUN}/ldms-netlink-tracked -x -e exec,clone,exit -L $LOGDIR/nl.log --heartbeat 1 -v 3 --ProducerName=$(hostname) --purge-track-dir --format 2 --jobid-file=$TESTDIR/job.data.1 &

# uncomment next one to test duplicate handling
#${BUILDDIR}/sbin/ldms-netlink-notifier --port=61061 --auth=none --reconnect=1 -D 30 -r -j $LOGDIR/json2.log --exclude-dir-path= --exclude-short-path= --exclude-programs &
VGARGS="--tool=drd --trace-cond=yes --trace-fork-join=yes"
VGARGS="--leak-check=full --track-origins=yes --trace-children=yes --show-leak-kinds=definite --time-stamp=yes --keep-debuginfo=yes --malloc-fill=3b"
#vgon
LDMSD -p prolog.jobid 1
vgoff
LDMSD 2
LDMSD 3
vgoff
JOBDATA $TESTDIR/job.data 1
SUSLEEP 2
MESSAGE ldms_ls on host 1:
#LDMS_LS 1 -v
MESSAGE ldms_ls on host 2:
JOBDATA $TESTDIR/job.data 1
SUSLEEP 1
LDMS_LS 2 -v
JOBDATA $TESTDIR/job.data 1
SUSLEEP 5
#MESSAGE stream_client_dump on sampler daemon 1
#for lc in $(seq 1); do
#ldmsd_controller --auth none --port 61061 --cmd stream_client_dump
# SUSLEEP 1
#done
JOBDATA $TESTDIR/job.data 1
SUSLEEP 5
for lc in $(seq 1); do
#LDMS_LS 1 -v
JOBDATA $TESTDIR/job.data 1
SUSLEEP 2
done
JOBDATA $TESTDIR/job.data 1
SUSLEEP 20
KILL_LDMSD 3 2 1
file_created $STOREDIR/node/$plugname
file_created $STOREDIR/node/$dsname
rollover_created $STOREDIR/blobs/linux_proc_sampler_argv.DAT
rollover_created $STOREDIR/blobs/linux_proc_sampler_files.DAT
rollover_created $STOREDIR/blobs/linux_proc_sampler_env.DAT
rollover_created $STOREDIR/blobs/slurm.DAT
7 changes: 7 additions & 0 deletions ldms/scripts/examples/linux_proc_sampler.job.1
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
load name=${plugname}
config name=${plugname} producer=localhost${i} schema=${plugname} instance=localhost${i}/${plugname} component_id=${i} perm=0644 cfg_file=${LDMSD_RUN}/metrics.input
start name=${plugname} interval=1000000 offset=0

# load name=dstat
# config name=dstat producer=localhost${i} instance=localhost${i}/${dstat_schema} component_id=${i} mmalloc=1 io=1 fd=1 auto-schema=1 stat=1) perm=777
# start name=dstat interval=1000000 offset=0
25 changes: 25 additions & 0 deletions ldms/scripts/examples/linux_proc_sampler.job.2
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
# blobs must be allowed by writer plugin and prdcr_subscribe by daemon
load name=blob_stream_writer plugin=blob_stream_writer
config name=blob_stream_writer path=${STOREDIR} container=blobs stream=slurm stream=linux_proc_sampler_env stream=linux_proc_sampler_argv types=1 stream=linux_proc_sampler_files

load name=dstat
config name=dstat producer=localhost${i} instance=localhost${i}/${dstat_schema} component_id=${i} mmalloc=1 io=1 fd=1 auto-schema=1 stat=1) perm=777
start name=dstat interval=1000000 offset=0

prdcr_add name=localhost1 host=${HOST} type=active xprt=${XPRT} port=${port1} interval=2000000
prdcr_subscribe regex=.* stream=slurm
prdcr_subscribe regex=.* stream=linux_proc_sampler_argv
prdcr_subscribe regex=.* stream=linux_proc_sampler_env
prdcr_subscribe regex=.* stream=linux_proc_sampler_files
prdcr_start name=localhost1

updtr_add name=allhosts interval=1000000 offset=100000
updtr_prdcr_add name=allhosts regex=.*
updtr_start name=allhosts

load name=store_csv
config name=store_csv path=${STOREDIR} altheader=0

strgp_add name=store_${testname} plugin=store_csv schema=linux_proc_sampler container=node
strgp_prdcr_add name=store_${testname} regex=.*
strgp_start name=store_${testname}
13 changes: 13 additions & 0 deletions ldms/scripts/examples/linux_proc_sampler.job.3
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
prdcr_add name=localhost2 host=${HOST} type=active xprt=${XPRT} port=${port2} interval=2000000
prdcr_start name=localhost2

updtr_add name=allhosts interval=1000000 offset=200000
updtr_prdcr_add name=allhosts regex=.*
updtr_start name=allhosts

load name=store_csv
config name=store_csv path=${STOREDIR} altheader=0

strgp_add name=store_dstat plugin=store_csv schema=${dstat_schema} container=node
strgp_prdcr_add name=store_dstat regex=.*
strgp_start name=store_dstat
Loading
Loading