grafana · Dieterbe · Oct 5, 2020 · Oct 5, 2020
diff --git a/benchmarks/cluster-query-pathological-queries/lib.sh b/benchmarks/cluster-query-pathological-queries/lib.sh
@@ -0,0 +1,62 @@
+
+# 11 series at a time x 1h each, 50Hz
+function same11 () {
+echo -e 'GET http://localhost:6061/render?target=some.id.of.a.metric.123*&from=-1h\nX-Org-Id: 1\n\n' | vegeta attack -duration 60s | vegeta report
+}
+
+# 1 series at a time out of the same set of 11, 50Hz
+function distinct11 () {
+	for i in 123 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239; do
+		echo -e "GET http://localhost:6061/render?target=some.id.of.a.metric.$i*&from=-1h\nX-Org-Id: 1\n\n"
+	done | vegeta attack -duration 60s | vegeta report
+}
+
+# 2k series x 1440 points each fetched = 2.88M fetched, due to MDP default 800 -> 1.440 returned  total
+# comes out of RAM+cassandra, rate 0.2Hz
+function pathological_all () {
+	echo -e 'GET http://localhost:6061/render?target=some.id.of.a.metric.*&from=-24h\nX-Org-Id: 1\n\n'  | vegeta attack -duration 60s -rate '1/5s' | vegeta report
+}
+
+# 2k series x 1440 points each fetched = 2.88M fetched+returned
+# comes out of RAM+cassandra, rate 0.2Hz
+function pathological_all_no_mdp () {
+	echo -e 'GET http://localhost:6061/render?target=some.id.of.a.metric.*&maxDataPoints=1500&from=-24h\nX-Org-Id: 1\n\n'  | vegeta attack -duration 60s -rate '1/5s' | vegeta report
+}
+
+# 2k series x 6 points each = 12k fetched+returned
+# comes out of RAM
+# <100ms response each on dieter's laptop
+# rate 0.5Hz
+function pathological_all_tiny () {
+	echo -e 'GET http://localhost:6061/render?target=some.id.of.a.metric.*&from=-60s\nX-Org-Id: 1\n\n'  | vegeta attack -duration 60s -rate '1/2s' | vegeta report
+}
+
+
+# any timeseries, any timerange of x hours in the last 24 hours. on avg 12h
+# rate 0.5Hz
+function anySeriesAnyTimeRange () {
+	everySeriesEveryTimeRange | sort -R | vegeta attack -duration 60s -rate '1/2s' | vegeta report
+}
+
+# everyof the 2k series, and all time rages of x hours in the last 24 hours. on avg 12h
+function everySeriesEveryTimeRange () {
+for m in {1..2000}; do
+	# increment timeranges in 1h ranges
+	for start in {1..24}; do
+		maxEnd=$((start-1))
+		for end in `seq 0 $maxEnd`; do
+			if [ "$end" -eq 0 ]; then
+				until=now
+			else
+				until="-${end}h"
+			fi
+			echo -e "GET http://localhost:6061/render?target=some.id.of.a.metric.$m*&from=-${start}h&until=$until\nX-Org-Id: 1\n\n"
+		done
+	done
+done
+}
+
+# could not get mt-index-cat to work... should be something like:
+# ./build/mt-index-cat -addr http://localhost:6061 -from 60min -regex 'some\.id\.of\.a\.metric\.9....' cass -hosts localhost:9042 -schema-file scripts/config/schema-idx-cassandra.toml 'GET http://localhost:6061/render?target={{.Name | patternCustom 100 "1rccw" }}&from=-1h\nX-Org-Id: 1\n\n'
+
+
diff --git a/benchmarks/cluster-query-pathological-queries/run.sh b/benchmarks/cluster-query-pathological-queries/run.sh
@@ -0,0 +1,47 @@
+#!/bin/bash
+
+# Find the directory we exist within
+DIR=$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )
+cd ${DIR}/
+source ../lib/util.sh
+source lib.sh
+
+docker_dir=../../docker/docker-cluster-query
+cp storage-schemas.conf $docker_dir
+
+
+echo "waiting til an even 2minutely timestamp to provide a fair starting point"
+wait_time 120
+cd $docker_dir
+docker-compose up --force-recreate  -V -d
+sleep 60
+log "starting backfill..."
+mt-fakemetrics backfill --kafka-mdm-addr localhost:9092 --offset 24h --period 10s --speedup 250 --mpo 2000
+# looks like the backfill may write 4 minutes into the future because the first 4min of feed results in out of order drops. oh well...
+log "backfill done. starting realtime feed..."
+mt-fakemetrics feed --kafka-mdm-addr localhost:9092 --period 10s --mpo 2000 &
+
+cd ../..
+
+log "query 11 series over and over again, 50Hz"
+same11
+
+log "query 11 distinct series over and over again, 50Hz"
+distinct11
+
+log "pathological queries: one that queries ALL data (with MDP consolidation)"
+pathological_all
+
+log "pathological queries: one that queries ALL data (without MDP consolidation)"
+pathological_all_no_mdp
+
+log "pathological queries: ALL series but tiny responses"
+pathological_all_tiny
+
+log "anySeriesAnyTimeRange"
+anySeriesAnyTimeRange
+
+log "pathological-all + distinct11 simultaneously"
+pathological_all &
+distinct11
+
diff --git a/benchmarks/cluster-query-pathological-queries/storage-schemas.conf b/benchmarks/cluster-query-pathological-queries/storage-schemas.conf
@@ -0,0 +1,3 @@
+[default]
+pattern = .*
+retentions = 1s:6h:2min:2,1min:35d:10min:1
diff --git a/benchmarks/lib/util.sh b/benchmarks/lib/util.sh
@@ -0,0 +1,23 @@
+#!/bin/bash
+
+log () {
+  echo "$(date +'%F %T') $1"
+}
+
+# converts a number of ns to a specificaton for `sleep`
+ns_to_sleep() {
+	local ns=$1
+	printf "%010d\n" $ns | sed 's/\(.\{9\}\)$/.\1/'
+}
+
+# sleep until the next round timestamp (snap), but offsetted
+# eg wait_time 60 7 will sleep until 7 seconds after the next whole minute
+# obviously this is isn't accurate to the ns, due to overhead of executing commands, etc
+# on Dieter's laptop, there is typically 5.7 to 5.8 millis of overhead, hence the compensation
+wait_time() {
+	local snap=$(($1 * 1000000000))
+	local offset=$((${2:-0} * 1000000000))
+	now=$(date +%s%N)
+	sleep_ns=$(($snap - (($now + 5700000 - $offset) % $snap)))
+	sleep $(ns_to_sleep $sleep_ns)
+}