diff --git a/packages/node/docker/monitoring/README.md b/packages/node/docker/monitoring/README.md new file mode 100644 index 0000000000..3a777cf565 --- /dev/null +++ b/packages/node/docker/monitoring/README.md @@ -0,0 +1,30 @@ + +# Monitoring + +This directory is to use prometheus and grafana to build monitoring for subql node. + +## Subql Node Dashboards + +### Bootstrap + +There is a docker compose file, start a subql node to index data to postgres, and use prometheus to collect indicators and grafana for visual display. + +To get started, run this command to launch the containers: +`$ docker compose up` + + +Until each container is up and running, we can access the web interface of each service. + +- `localhost:3000`: subql node(access the path `/metrics` to view current indicators) +- `localhost:5432`: is accessible for postgres connections +- `localhost:9090`: prometheus web ui +- `localhost:3001`: grafana dashboard + +### View dashboard + +Open a browser to access `ocalhost:3001/dashboards` and select `subql-node-dashboard` to browse the monitoring indicators of subql node +![subql node dashbard](imgs/subql-node-dashboard.png?raw=true) + +### License + +Apache-2.0 \ No newline at end of file diff --git a/packages/node/docker/monitoring/config/db/init.sql b/packages/node/docker/monitoring/config/db/init.sql new file mode 100644 index 0000000000..c30efd1224 --- /dev/null +++ b/packages/node/docker/monitoring/config/db/init.sql @@ -0,0 +1 @@ +CREATE EXTENSION IF NOT EXISTS btree_gist; diff --git a/packages/node/docker/monitoring/config/grafana/dashboards/subql-node.json b/packages/node/docker/monitoring/config/grafana/dashboards/subql-node.json new file mode 100644 index 0000000000..784bd945e5 --- /dev/null +++ b/packages/node/docker/monitoring/config/grafana/dashboards/subql-node.json @@ -0,0 +1,757 @@ +{ + "annotations": { + "list": [ + { + "builtIn": 1, + "datasource": "-- Grafana --", + "enable": true, + "hide": true, + "iconColor": "rgba(0, 211, 255, 1)", + "name": "Annotations & Alerts", + "type": "dashboard" + } + ] + }, + "description": "subql node prometheus metrics", + "editable": true, + "gnetId": 11159, + "graphTooltip": 0, + "id": 188, + "iteration": 1682040212874, + "links": [], + "panels": [ + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "Prometheus", + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 6, + "w": 24, + "x": 0, + "y": 0 + }, + "hiddenSeries": false, + "id": 11, + "legend": { + "alignAsTable": false, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "options": { + "dataLinks": [] + }, + "paceLength": 10, + "percentage": false, + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "subql_indexer_target_block_height{instance=~\"$instance\"}", + "format": "time_series", + "interval": "", + "intervalFactor": 1, + "legendFormat": "Finalized Height - {{instance}} ", + "refId": "D" + }, + { + "expr": "subql_indexer_best_block_height{instance=~\"$instance\"}", + "format": "time_series", + "hide": false, + "intervalFactor": 1, + "legendFormat": "Best Height - {{instance}} ", + "refId": "A" + }, + { + "expr": "subql_indexer_processing_block_height{instance=~\"$instance\"}", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "Processing Height - {{instance}} ", + "refId": "B" + }, + { + "expr": "subql_indexer_processed_block_count{instance=~\"$instance\"}", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "Processed Height - {{instance}} ", + "refId": "C" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Indexing Detail", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "Prometheus", + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 8, + "w": 8, + "x": 0, + "y": 6 + }, + "hiddenSeries": false, + "id": 13, + "legend": { + "alignAsTable": false, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "options": { + "dataLinks": [] + }, + "paceLength": 10, + "percentage": false, + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "subql_indexer_block_queue_size{instance=~\"$instance\"}", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "Queue Size - {{instance}} ", + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Processing Queue Size", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "Prometheus", + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 8, + "w": 8, + "x": 8, + "y": 6 + }, + "hiddenSeries": false, + "id": 14, + "legend": { + "alignAsTable": false, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "options": { + "dataLinks": [] + }, + "paceLength": 10, + "percentage": false, + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "subql_indexer_blocknumber_queue_size{instance=~\"$instance\"}", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "Block Number Queue Size - {{instance}} ", + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Block Number Queue Size ", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "Prometheus", + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 8, + "w": 8, + "x": 16, + "y": 6 + }, + "hiddenSeries": false, + "id": 12, + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "max": true, + "min": true, + "rightSide": false, + "show": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "options": { + "dataLinks": [] + }, + "paceLength": 10, + "percentage": false, + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "subql_indexer_store_cache_threshold{instance=~\"$instance\"}", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "Cache Threshold - {{instance}} ", + "refId": "A" + }, + { + "expr": "subql_indexer_store_cache_records_size{instance=~\"$instance\"}", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "Cache Records Size - {{instance}} ", + "refId": "B" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Store Cache Detail", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "#299c46", + "rgba(237, 129, 40, 0.89)", + "#d44a3a" + ], + "datasource": "Prometheus", + "format": "none", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "gridPos": { + "h": 4, + "w": 4, + "x": 0, + "y": 14 + }, + "id": 16, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "nullPointMode": "connected", + "nullText": null, + "options": {}, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "#F2495C", + "show": true + }, + "tableColumn": "", + "targets": [ + { + "expr": "subql_indexer_api_connected{instance=~\"$instance\"}", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "{{instance}}", + "refId": "A" + } + ], + "thresholds": "", + "timeFrom": null, + "timeShift": null, + "title": "Connection Status", + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "current" + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "Prometheus", + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 8, + "w": 8, + "x": 4, + "y": 14 + }, + "hiddenSeries": false, + "id": 15, + "legend": { + "alignAsTable": false, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "options": { + "dataLinks": [] + }, + "paceLength": 10, + "percentage": false, + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "subql_indexer_skip_dictionary_count{instance=~\"$instance\"}", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "Skip Count - {{instance}} ", + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Skip Dictionary Count", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "#299c46", + "rgba(237, 129, 40, 0.89)", + "#d44a3a" + ], + "datasource": "Prometheus", + "format": "none", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "gridPos": { + "h": 4, + "w": 4, + "x": 0, + "y": 18 + }, + "id": 4, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "nullPointMode": "connected", + "nullText": null, + "options": {}, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "#F2495C", + "show": true + }, + "tableColumn": "", + "targets": [ + { + "expr": "subql_indexer_using_dictionary{instance=~\"$instance\"}", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "{{instance}}", + "refId": "A" + } + ], + "thresholds": "", + "timeFrom": null, + "timeShift": null, + "title": "Using the dictionary", + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "current" + } + ], + "refresh": false, + "schemaVersion": 21, + "style": "dark", + "tags": [ + "subql" + ], + "templating": { + "list": [ + { + "allValue": "", + "current": { + "text": "All", + "value": [ + "$__all" + ] + }, + "datasource": "Prometheus", + "definition": "label_values(subql_indexer_best_block_height, instance)", + "hide": 0, + "includeAll": true, + "label": "instance", + "multi": false, + "name": "instance", + "options": [], + "query": "label_values(subql_indexer_best_block_height, instance)", + "refresh": 1, + "regex": "", + "skipUrlSync": false, + "sort": 1, + "tagValuesQuery": "", + "tags": [], + "tagsQuery": "", + "type": "query", + "useTags": false + } + ] + }, + "time": { + "from": "now-24h", + "to": "now" + }, + "timepicker": { + "refresh_intervals": [ + "5s", + "10s", + "30s", + "1m", + "5m", + "15m", + "30m", + "1h", + "2h", + "1d" + ], + "time_options": [ + "5m", + "15m", + "1h", + "6h", + "12h", + "24h", + "2d", + "7d", + "30d" + ] + }, + "timezone": "", + "title": "Subql Node Dashboard", + "uid": "u00GnJPVz", + "version": 8 + } \ No newline at end of file diff --git a/packages/node/docker/monitoring/config/grafana/provisioning/dashboards/subql-node.yaml b/packages/node/docker/monitoring/config/grafana/provisioning/dashboards/subql-node.yaml new file mode 100644 index 0000000000..e0b51484e9 --- /dev/null +++ b/packages/node/docker/monitoring/config/grafana/provisioning/dashboards/subql-node.yaml @@ -0,0 +1,13 @@ +apiVersion: 1 + +providers: + - name: 'subql-node' + orgId: 1 + folder: '' + folderUid: '' + type: file + disableDeletion: false + updateIntervalSeconds: 10 + allowUiUpdates: false + options: + path: /var/lib/grafana/dashboards diff --git a/packages/node/docker/monitoring/config/grafana/provisioning/datasources/prometheus.yml b/packages/node/docker/monitoring/config/grafana/provisioning/datasources/prometheus.yml new file mode 100644 index 0000000000..00e36c4f16 --- /dev/null +++ b/packages/node/docker/monitoring/config/grafana/provisioning/datasources/prometheus.yml @@ -0,0 +1,9 @@ +apiVersion: 1 + +datasources: + - name: Prometheus + type: prometheus + access: proxy + httpMethod: POST + url: http://prometheus:9090 + isDefault: true diff --git a/packages/node/docker/monitoring/config/prometheus.yml b/packages/node/docker/monitoring/config/prometheus.yml new file mode 100644 index 0000000000..1db756cfb8 --- /dev/null +++ b/packages/node/docker/monitoring/config/prometheus.yml @@ -0,0 +1,16 @@ +global: + scrape_interval: 10s + evaluation_interval: 10s + +scrape_configs: + - job_name: 'my-subql-node' + metrics_path: '/metrics' + # Insecure mode: + scheme: 'http' + # Secure mode: + # scheme: 'https' + tls_config: + insecure_skip_verify: true + + static_configs: + - targets: ['subquery-node:3000'] diff --git a/packages/node/docker/monitoring/docker-compose.yml b/packages/node/docker/monitoring/docker-compose.yml new file mode 100644 index 0000000000..7f5dd68e1f --- /dev/null +++ b/packages/node/docker/monitoring/docker-compose.yml @@ -0,0 +1,60 @@ +version: '3' + +services: + postgres: + image: postgres + ports: + - 5432:5432 + volumes: + - .data/postgres:/var/lib/postgresql/data + - ./config/db/init.sql:/docker-entrypoint-initdb.d/init.sql + environment: + POSTGRES_PASSWORD: postgres + healthcheck: + test: ["CMD-SHELL", "pg_isready -U postgres"] + interval: 5s + timeout: 5s + retries: 5 + grafana: + image: grafana/grafana + environment: + - "GF_AUTH_ANONYMOUS_ENABLED=true" + - "GF_AUTH_ANONYMOUS_ORG_ROLE=Admin" + - "GF_AUTH_ANONYMOUS_HIDE_VERSION=true" + - "GF_SERVER_HTTP_PORT=3001" + volumes: + # Mount provisioning configuration + - "./config/grafana/provisioning:/etc/grafana/provisioning" + # # Mount dashboards + - "./config/grafana/dashboards:/var/lib/grafana/dashboards" + ports: [ "3001:3001" ] + prometheus: + image: prom/prometheus + # Mount prometheus configuration + volumes: [ "./config/prometheus.yml:/etc/prometheus/prometheus.yml" ] + ports: [ "9090:9090" ] + subquery-node: + image: onfinality/subql-node + depends_on: + postgres: + condition: service_healthy + restart: always + environment: + DB_USER: postgres + DB_PASS: postgres + DB_DATABASE: postgres + DB_HOST: postgres + DB_PORT: 5432 + ports: [ "3000:3000" ] + volumes: + - ./:/app + command: + - -f=ipfs://QmV27Y8tRFMmAhioFutj1KX5EnEMVhybWuVFkGBLaWToA8 + - --db-schema=app + - --network-endpoint=wss://kusama-rpc.polkadot.io/ + - --disable-historical=false + healthcheck: + test: ["CMD", "curl", "-f", "http://subquery-node:3000/ready"] + interval: 3s + timeout: 5s + retries: 10 diff --git a/packages/node/docker/monitoring/imgs/subql-node-dashboard.png b/packages/node/docker/monitoring/imgs/subql-node-dashboard.png new file mode 100644 index 0000000000..82064ad81e Binary files /dev/null and b/packages/node/docker/monitoring/imgs/subql-node-dashboard.png differ