Skip to content

Commit

Permalink
separate more openstack canary alerts
Browse files Browse the repository at this point in the history
  • Loading branch information
artherd42 committed Jul 20, 2018
1 parent 0ed50bc commit db53d3a
Show file tree
Hide file tree
Showing 8 changed files with 335 additions and 7 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -45,3 +45,49 @@ groups:
description: '{{ $labels.check }} API is flapping for 30 minutes.'
summary: '{{ $labels.check }} API flapping'

- alert: OpenstackArcCanaryDown
expr: blackbox_canary_status_gauge{service=~"arc"} == 1
for: 1h
labels:
severity: warning
tier: openstack
service: '{{ $labels.service }}'
context: '{{ $labels.service }}'
dashboard: ccloud-health-canary-details
meta: 'Canary {{ $labels.service }} {{ $labels.check }} is down for 1 hour. See Sentry for details'
sentry: 'blackbox/?query=test_{{ $labels.check }}'
playbook: 'docs/devops/alert/{{ $labels.service }}'
annotations:
description: 'Canary {{ $labels.service }} {{ $labels.check }} is down for 1 hour. See Sentry for details'
summary: 'Canary {{ $labels.service }} {{ $labels.check }} is down'

- alert: OpenstackArcCanaryTimeout
expr: blackbox_canary_status_gauge{service=~"arc"} == 0.5
for: 1h
labels:
severity: info
tier: openstack
service: '{{ $labels.service }}'
context: '{{ $labels.service }}'
dashboard: ccloud-health-canary-details
meta: 'Canary {{ $labels.service }} {{ $labels.check }} is timing out for 1 hour. See Sentry for details'
sentry: 'blackbox/?query=test_{{ $labels.check }}'
playbook: 'docs/devops/alert/{{ $labels.service }}'
annotations:
description: 'Canary {{ $labels.service }} {{ $labels.check }} is timing out for 1 hour. See Sentry for details'
summary: 'Canary {{ $labels.service }} {{ $labels.check }} is timing out'

- alert: OpenstackArcCanaryFlapping
expr: changes(blackbox_canary_status_gauge{service=~"arc"}[2h]) > 8
labels:
severity: warning
tier: openstack
service: '{{ $labels.service }}'
context: '{{ $labels.service }}'
dashboard: ccloud-health-canary-details
meta: 'Canary {{ $labels.service }} {{ $labels.check }} is flapping for 2 hours. See Sentry for details'
sentry: 'blackbox/?query=test_{{ $labels.check }}'
playbook: 'docs/devops/alert/{{ $labels.service }}'
annotations:
description: 'Canary {{ $labels.service }} {{ $labels.check }} is flapping for 2 hours. See Sentry for details'
summary: 'Canary {{ $labels.service }} {{ $labels.check }} is flapping'
Original file line number Diff line number Diff line change
Expand Up @@ -80,7 +80,7 @@ groups:
summary: Blackbox datapath test

- alert: OpenstackCanaryHealthCheckFailing
expr: blackbox_canary_status_gauge{service!="nova", service!="neutron"} == 1
expr: blackbox_canary_status_gauge{service!="nova", service!="neutron", service!="cinder", service!="manila", service!="lbaas", service!="swift", service!="designate", service!="arc", service!="hermes"} == 1
for: 1h
labels:
severity: warning
Expand All @@ -96,7 +96,7 @@ groups:
summary: Blackbox canary test

- alert: OpenstackCanaryHealthCheckFlapping
expr: changes(blackbox_canary_status_gauge{service!="nova", service!="neutron"}[2h]) > 8
expr: changes(blackbox_canary_status_gauge{service!="nova", service!="neutron", service!="cinder", service!="manila", service!="lbaas", service!="swift", service!="designate", service!="arc", service!="hermes"}[2h]) > 8
labels:
severity: warning
tier: openstack
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -72,4 +72,51 @@ groups:
playbook: 'docs/devops/alert/{{ $labels.service }}/#{{ $labels.check }}'
annotations:
description: '{{ $labels.check }} API is flapping for 30 minutes.'
summary: '{{ $labels.check }} API flapping'
summary: '{{ $labels.check }} API flapping'

- alert: OpenstackCinderCanaryDown
expr: blackbox_canary_status_gauge{service=~"cinder"} == 1
for: 1h
labels:
severity: warning
tier: openstack
service: '{{ $labels.service }}'
context: '{{ $labels.service }}'
dashboard: ccloud-health-canary-details
meta: 'Canary {{ $labels.service }} {{ $labels.check }} is down for 1 hour. See Sentry for details'
sentry: 'blackbox/?query=test_{{ $labels.check }}'
playbook: 'docs/devops/alert/{{ $labels.service }}'
annotations:
description: 'Canary {{ $labels.service }} {{ $labels.check }} is down for 1 hour. See Sentry for details'
summary: 'Canary {{ $labels.service }} {{ $labels.check }} is down'

- alert: OpenstackCinderCanaryTimeout
expr: blackbox_canary_status_gauge{service=~"cinder"} == 0.5
for: 1h
labels:
severity: info
tier: openstack
service: '{{ $labels.service }}'
context: '{{ $labels.service }}'
dashboard: ccloud-health-canary-details
meta: 'Canary {{ $labels.service }} {{ $labels.check }} is timing out for 1 hour. See Sentry for details'
sentry: 'blackbox/?query=test_{{ $labels.check }}'
playbook: 'docs/devops/alert/{{ $labels.service }}'
annotations:
description: 'Canary {{ $labels.service }} {{ $labels.check }} is timing out for 1 hour. See Sentry for details'
summary: 'Canary {{ $labels.service }} {{ $labels.check }} is timing out'

- alert: OpenstackCinderCanaryFlapping
expr: changes(blackbox_canary_status_gauge{service=~"cinder"}[2h]) > 8
labels:
severity: warning
tier: openstack
service: '{{ $labels.service }}'
context: '{{ $labels.service }}'
dashboard: ccloud-health-canary-details
meta: 'Canary {{ $labels.service }} {{ $labels.check }} is flapping for 2 hours. See Sentry for details'
sentry: 'blackbox/?query=test_{{ $labels.check }}'
playbook: 'docs/devops/alert/{{ $labels.service }}'
annotations:
description: 'Canary {{ $labels.service }} {{ $labels.check }} is flapping for 2 hours. See Sentry for details'
summary: 'Canary {{ $labels.service }} {{ $labels.check }} is flapping'
Original file line number Diff line number Diff line change
Expand Up @@ -121,3 +121,49 @@ groups:
description: 'Datapath {{ $labels.service }} {{ $labels.check }} is flapping for 30 minutes. See Sentry for details'
summary: 'Datapath {{ $labels.service }} {{ $labels.check }} is flapping'

- alert: OpenstackDesignateCanaryDown
expr: blackbox_canary_status_gauge{service=~"designate"} == 1
for: 1h
labels:
severity: warning
tier: openstack
service: '{{ $labels.service }}'
context: '{{ $labels.service }}'
dashboard: ccloud-health-canary-details
meta: 'Canary {{ $labels.service }} {{ $labels.check }} is down for 1 hour. See Sentry for details'
sentry: 'blackbox/?query=test_{{ $labels.check }}'
playbook: 'docs/devops/alert/{{ $labels.service }}'
annotations:
description: 'Canary {{ $labels.service }} {{ $labels.check }} is down for 1 hour. See Sentry for details'
summary: 'Canary {{ $labels.service }} {{ $labels.check }} is down'

- alert: OpenstackDesignateCanaryTimeout
expr: blackbox_canary_status_gauge{service=~"designate"} == 0.5
for: 1h
labels:
severity: info
tier: openstack
service: '{{ $labels.service }}'
context: '{{ $labels.service }}'
dashboard: ccloud-health-canary-details
meta: 'Canary {{ $labels.service }} {{ $labels.check }} is timing out for 1 hour. See Sentry for details'
sentry: 'blackbox/?query=test_{{ $labels.check }}'
playbook: 'docs/devops/alert/{{ $labels.service }}'
annotations:
description: 'Canary {{ $labels.service }} {{ $labels.check }} is timing out for 1 hour. See Sentry for details'
summary: 'Canary {{ $labels.service }} {{ $labels.check }} is timing out'

- alert: OpenstackDesignateCanaryFlapping
expr: changes(blackbox_canary_status_gauge{service=~"designate"}[2h]) > 8
labels:
severity: warning
tier: openstack
service: '{{ $labels.service }}'
context: '{{ $labels.service }}'
dashboard: ccloud-health-canary-details
meta: 'Canary {{ $labels.service }} {{ $labels.check }} is flapping for 2 hours. See Sentry for details'
sentry: 'blackbox/?query=test_{{ $labels.check }}'
playbook: 'docs/devops/alert/{{ $labels.service }}'
annotations:
description: 'Canary {{ $labels.service }} {{ $labels.check }} is flapping for 2 hours. See Sentry for details'
summary: 'Canary {{ $labels.service }} {{ $labels.check }} is flapping'
Original file line number Diff line number Diff line change
Expand Up @@ -56,4 +56,51 @@ groups:
playbook: 'docs/devops/alert/{{ $labels.service }}/#{{ $labels.check }}'
annotations:
description: '{{ $labels.check }} API is flapping for 30 minutes.'
summary: '{{ $labels.check }} API flapping'
summary: '{{ $labels.check }} API flapping'

- alert: OpenstackHermesCanaryDown
expr: blackbox_canary_status_gauge{service=~"hermes"} == 1
for: 1h
labels:
severity: warning
tier: openstack
service: '{{ $labels.service }}'
context: '{{ $labels.service }}'
dashboard: ccloud-health-canary-details
meta: 'Canary {{ $labels.service }} {{ $labels.check }} is down for 1 hour. See Sentry for details'
sentry: 'blackbox/?query=test_{{ $labels.check }}'
playbook: 'docs/devops/alert/{{ $labels.service }}'
annotations:
description: 'Canary {{ $labels.service }} {{ $labels.check }} is down for 1 hour. See Sentry for details'
summary: 'Canary {{ $labels.service }} {{ $labels.check }} is down'

- alert: OpenstackHermesCanaryTimeout
expr: blackbox_canary_status_gauge{service=~"hermes"} == 0.5
for: 1h
labels:
severity: info
tier: openstack
service: '{{ $labels.service }}'
context: '{{ $labels.service }}'
dashboard: ccloud-health-canary-details
meta: 'Canary {{ $labels.service }} {{ $labels.check }} is timing out for 1 hour. See Sentry for details'
sentry: 'blackbox/?query=test_{{ $labels.check }}'
playbook: 'docs/devops/alert/{{ $labels.service }}'
annotations:
description: 'Canary {{ $labels.service }} {{ $labels.check }} is timing out for 1 hour. See Sentry for details'
summary: 'Canary {{ $labels.service }} {{ $labels.check }} is timing out'

- alert: OpenstackHermesCanaryFlapping
expr: changes(blackbox_canary_status_gauge{service=~"hermes"}[2h]) > 8
labels:
severity: warning
tier: openstack
service: '{{ $labels.service }}'
context: '{{ $labels.service }}'
dashboard: ccloud-health-canary-details
meta: 'Canary {{ $labels.service }} {{ $labels.check }} is flapping for 2 hours. See Sentry for details'
sentry: 'blackbox/?query=test_{{ $labels.check }}'
playbook: 'docs/devops/alert/{{ $labels.service }}'
annotations:
description: 'Canary {{ $labels.service }} {{ $labels.check }} is flapping for 2 hours. See Sentry for details'
summary: 'Canary {{ $labels.service }} {{ $labels.check }} is flapping'
Original file line number Diff line number Diff line change
Expand Up @@ -77,4 +77,51 @@ groups:
playbook: 'docs/devops/alert/{{ $labels.service }}/#{{ $labels.check }}'
annotations:
description: 'Datapath {{ $labels.service }} {{ $labels.check }} is flapping for 30 minutes. See Sentry for details'
summary: 'Datapath {{ $labels.service }} {{ $labels.check }} is flapping'
summary: 'Datapath {{ $labels.service }} {{ $labels.check }} is flapping'

- alert: OpenstackLbaasCanaryDown
expr: blackbox_canary_status_gauge{service=~"lbaas"} == 1
for: 1h
labels:
severity: warning
tier: openstack
service: '{{ $labels.service }}'
context: '{{ $labels.service }}'
dashboard: ccloud-health-canary-details
meta: 'Canary {{ $labels.service }} {{ $labels.check }} is down for 1 hour. See Sentry for details'
sentry: 'blackbox/?query=test_{{ $labels.check }}'
playbook: 'docs/devops/alert/{{ $labels.service }}'
annotations:
description: 'Canary {{ $labels.service }} {{ $labels.check }} is down for 1 hour. See Sentry for details'
summary: 'Canary {{ $labels.service }} {{ $labels.check }} is down'

- alert: OpenstackLbaasCanaryTimeout
expr: blackbox_canary_status_gauge{service=~"lbaas"} == 0.5
for: 1h
labels:
severity: info
tier: openstack
service: '{{ $labels.service }}'
context: '{{ $labels.service }}'
dashboard: ccloud-health-canary-details
meta: 'Canary {{ $labels.service }} {{ $labels.check }} is timing out for 1 hour. See Sentry for details'
sentry: 'blackbox/?query=test_{{ $labels.check }}'
playbook: 'docs/devops/alert/{{ $labels.service }}'
annotations:
description: 'Canary {{ $labels.service }} {{ $labels.check }} is timing out for 1 hour. See Sentry for details'
summary: 'Canary {{ $labels.service }} {{ $labels.check }} is timing out'

- alert: OpenstackLbaasCanaryFlapping
expr: changes(blackbox_canary_status_gauge{service=~"lbaas"}[2h]) > 8
labels:
severity: warning
tier: openstack
service: '{{ $labels.service }}'
context: '{{ $labels.service }}'
dashboard: ccloud-health-canary-details
meta: 'Canary {{ $labels.service }} {{ $labels.check }} is flapping for 2 hours. See Sentry for details'
sentry: 'blackbox/?query=test_{{ $labels.check }}'
playbook: 'docs/devops/alert/{{ $labels.service }}'
annotations:
description: 'Canary {{ $labels.service }} {{ $labels.check }} is flapping for 2 hours. See Sentry for details'
summary: 'Canary {{ $labels.service }} {{ $labels.check }} is flapping'
Original file line number Diff line number Diff line change
Expand Up @@ -134,4 +134,52 @@ groups:
playbook: 'docs/devops/alert/{{ $labels.service }}/#{{ $labels.check }}'
annotations:
description: 'Datapath {{ $labels.service }} {{ $labels.check }} is flapping for 30 minutes. See Sentry for details'
summary: 'Datapath {{ $labels.service }} {{ $labels.check }} is flapping'
summary: 'Datapath {{ $labels.service }} {{ $labels.check }} is flapping'

- alert: OpenstackManilaCanaryDown
expr: blackbox_canary_status_gauge{service=~"manila"} == 1
for: 1h
labels:
severity: warning
tier: openstack
service: '{{ $labels.service }}'
context: '{{ $labels.service }}'
dashboard: ccloud-health-canary-details
meta: 'Canary {{ $labels.service }} {{ $labels.check }} is down for 1 hour. See Sentry for details'
sentry: 'blackbox/?query=test_{{ $labels.check }}'
playbook: 'docs/devops/alert/{{ $labels.service }}'
annotations:
description: 'Canary {{ $labels.service }} {{ $labels.check }} is down for 1 hour. See Sentry for details'
summary: 'Canary {{ $labels.service }} {{ $labels.check }} is down'

- alert: OpenstackManilaCanaryTimeout
expr: blackbox_canary_status_gauge{service=~"manila"} == 0.5
for: 1h
labels:
severity: info
tier: openstack
service: '{{ $labels.service }}'
context: '{{ $labels.service }}'
dashboard: ccloud-health-canary-details
meta: 'Canary {{ $labels.service }} {{ $labels.check }} is timing out for 1 hour. See Sentry for details'
sentry: 'blackbox/?query=test_{{ $labels.check }}'
playbook: 'docs/devops/alert/{{ $labels.service }}'
annotations:
description: 'Canary {{ $labels.service }} {{ $labels.check }} is timing out for 1 hour. See Sentry for details'
summary: 'Canary {{ $labels.service }} {{ $labels.check }} is timing out'

- alert: OpenstackManilaCanaryFlapping
expr: changes(blackbox_canary_status_gauge{service=~"manila"}[2h]) > 8
labels:
severity: warning
tier: openstack
service: '{{ $labels.service }}'
context: '{{ $labels.service }}'
dashboard: ccloud-health-canary-details
meta: 'Canary {{ $labels.service }} {{ $labels.check }} is flapping for 2 hours. See Sentry for details'
sentry: 'blackbox/?query=test_{{ $labels.check }}'
playbook: 'docs/devops/alert/{{ $labels.service }}'
annotations:
description: 'Canary {{ $labels.service }} {{ $labels.check }} is flapping for 2 hours. See Sentry for details'
summary: 'Canary {{ $labels.service }} {{ $labels.check }} is flapping'

Original file line number Diff line number Diff line change
Expand Up @@ -245,4 +245,51 @@ groups:
playbook: 'docs/devops/alert/{{ $labels.service }}/#{{ $labels.check }}'
annotations:
description: '{{ $labels.check }} API is flapping for 30 minutes.'
summary: '{{ $labels.check }} API flapping'
summary: '{{ $labels.check }} API flapping'

- alert: OpenstackSwiftCanaryDown
expr: blackbox_canary_status_gauge{service=~"swift"} == 1
for: 1h
labels:
severity: warning
tier: openstack
service: '{{ $labels.service }}'
context: '{{ $labels.service }}'
dashboard: ccloud-health-canary-details
meta: 'Canary {{ $labels.service }} {{ $labels.check }} is down for 1 hour. See Sentry for details'
sentry: 'blackbox/?query=test_{{ $labels.check }}'
playbook: 'docs/devops/alert/{{ $labels.service }}'
annotations:
description: 'Canary {{ $labels.service }} {{ $labels.check }} is down for 1 hour. See Sentry for details'
summary: 'Canary {{ $labels.service }} {{ $labels.check }} is down'

- alert: OpenstackSwiftCanaryTimeout
expr: blackbox_canary_status_gauge{service=~"swift"} == 0.5
for: 1h
labels:
severity: info
tier: openstack
service: '{{ $labels.service }}'
context: '{{ $labels.service }}'
dashboard: ccloud-health-canary-details
meta: 'Canary {{ $labels.service }} {{ $labels.check }} is timing out for 1 hour. See Sentry for details'
sentry: 'blackbox/?query=test_{{ $labels.check }}'
playbook: 'docs/devops/alert/{{ $labels.service }}'
annotations:
description: 'Canary {{ $labels.service }} {{ $labels.check }} is timing out for 1 hour. See Sentry for details'
summary: 'Canary {{ $labels.service }} {{ $labels.check }} is timing out'

- alert: OpenstackSwiftCanaryFlapping
expr: changes(blackbox_canary_status_gauge{service=~"swift"}[2h]) > 8
labels:
severity: warning
tier: openstack
service: '{{ $labels.service }}'
context: '{{ $labels.service }}'
dashboard: ccloud-health-canary-details
meta: 'Canary {{ $labels.service }} {{ $labels.check }} is flapping for 2 hours. See Sentry for details'
sentry: 'blackbox/?query=test_{{ $labels.check }}'
playbook: 'docs/devops/alert/{{ $labels.service }}'
annotations:
description: 'Canary {{ $labels.service }} {{ $labels.check }} is flapping for 2 hours. See Sentry for details'
summary: 'Canary {{ $labels.service }} {{ $labels.check }} is flapping'

0 comments on commit db53d3a

Please sign in to comment.