From aea3361c8e2a40a1814671b4fdd70b5837878bdf Mon Sep 17 00:00:00 2001 From: Blair Currey <12960453+BlairCurrey@users.noreply.github.com> Date: Fri, 9 Aug 2024 12:58:09 -0400 Subject: [PATCH] feat(localenv): span metrics generation (#2849) * feat(localenv): add span metric generation - adds configuration that generates span metrics from tempo traces - can see new `traces_spanmetrics_bucket` etc. in local grafana dashboard * feat(localenv): add gql resolver metric * chore(localenv): give panel title --- localenv/telemetry/docker-compose.yml | 5 +- .../provisioning/dashboards/example.json | 77 ++++++++++++++++++- localenv/telemetry/tempo.yaml | 10 +++ 3 files changed, 88 insertions(+), 4 deletions(-) diff --git a/localenv/telemetry/docker-compose.yml b/localenv/telemetry/docker-compose.yml index b44dbe7990..227f7b2d9e 100644 --- a/localenv/telemetry/docker-compose.yml +++ b/localenv/telemetry/docker-compose.yml @@ -25,7 +25,10 @@ services: prometheus: image: prom/prometheus:latest - command: "--config.file=/etc/prometheus/prometheus.yaml --log.level=debug" + command: + - --config.file=/etc/prometheus/prometheus.yaml + - --log.level=debug + - --web.enable-remote-write-receiver networks: - rafiki volumes: diff --git a/localenv/telemetry/grafana/provisioning/dashboards/example.json b/localenv/telemetry/grafana/provisioning/dashboards/example.json index 160184e20a..a4a59f4abd 100644 --- a/localenv/telemetry/grafana/provisioning/dashboards/example.json +++ b/localenv/telemetry/grafana/provisioning/dashboards/example.json @@ -175,7 +175,7 @@ } ] }, - "pluginVersion": "11.1.0", + "pluginVersion": "11.1.3", "targets": [ { "datasource": { @@ -319,6 +319,77 @@ "title": "Transaction Count", "type": "timeseries" }, + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "continuous-GrYlRd" + }, + "fieldMinMax": false, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "s" + }, + "overrides": [] + }, + "gridPos": { + "h": 12, + "w": 12, + "x": 12, + "y": 8 + }, + "id": 6, + "options": { + "displayMode": "gradient", + "maxVizHeight": 300, + "minVizHeight": 16, + "minVizWidth": 8, + "namePlacement": "auto", + "orientation": "horizontal", + "reduceOptions": { + "calcs": ["lastNotNull"], + "fields": "", + "values": false + }, + "showUnfilled": true, + "sizing": "auto", + "valueMode": "color" + }, + "pluginVersion": "11.1.3", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "editorMode": "code", + "expr": "histogram_quantile(0.95, sum(rate(traces_spanmetrics_latency_bucket{span_name=~\"^(mutation|query).*\"}[$__rate_interval])) by (le, span_name))", + "instant": false, + "legendFormat": "__auto", + "range": true, + "refId": "A" + } + ], + "title": "Graphql Resolver Duration (95th Percentile)", + "type": "bargauge" + }, { "datasource": { "type": "prometheus", @@ -370,7 +441,7 @@ "textMode": "auto", "wideLayout": true }, - "pluginVersion": "11.1.0", + "pluginVersion": "11.1.3", "targets": [ { "datasource": { @@ -465,7 +536,7 @@ "textMode": "auto", "wideLayout": true }, - "pluginVersion": "11.1.0", + "pluginVersion": "11.1.3", "targets": [ { "datasource": { diff --git a/localenv/telemetry/tempo.yaml b/localenv/telemetry/tempo.yaml index d8ade9376f..f382a2423f 100644 --- a/localenv/telemetry/tempo.yaml +++ b/localenv/telemetry/tempo.yaml @@ -15,3 +15,13 @@ storage: path: /var/tempo/blocks wal: path: /var/tempo/wal + +metrics_generator: + storage: + path: /tmp/tempo/wal + remote_write: + - url: http://prometheus:9090/api/v1/write + send_exemplars: true + +overrides: + metrics_generator_processors: [span-metrics]