diff --git a/component/networkpolicy.libsonnet b/component/networkpolicy.libsonnet index 43ce83f..1b86010 100644 --- a/component/networkpolicy.libsonnet +++ b/component/networkpolicy.libsonnet @@ -5,6 +5,8 @@ local kube = import 'lib/kube.libjsonnet'; local inv = kap.inventory(); local params = inv.parameters.openshift4_monitoring; +local cilium_cluster = std.member(inv.applications, 'cilium'); + [ kube.NetworkPolicy('alertmanager-allow-web') { spec: { @@ -82,4 +84,32 @@ local params = inv.parameters.openshift4_monitoring; }, }, }, -] +] + if cilium_cluster then [ + // allow all traffic from the cluster nodes, so that the HAproxy ingress can + // do healthchecks for routes in the openshift-monitoring namespace. + { + apiVersion: 'cilium.io/v2', + kind: 'CiliumNetworkPolicy', + metadata: { + annotations: { + 'syn.tools/description': ||| + Note that this policy isn't named allow-from-cluster-nodes, even + though its content is identical to ensure that Espejo doesn't delete + the policy. + |||, + }, + name: 'allow-from-cluster-nodes-custom', + }, + spec: { + endpointSelector: {}, + ingress: [ + { + fromEntities: [ + 'host', + 'remote-node', + ], + }, + ], + }, + }, +] else [] diff --git a/tests/golden/capacity-alerts-with-node-labels/openshift4-monitoring/openshift4-monitoring/prometheus_rules.yaml b/tests/golden/capacity-alerts-with-node-labels/openshift4-monitoring/openshift4-monitoring/prometheus_rules.yaml index 1df7499..ee419e5 100644 --- a/tests/golden/capacity-alerts-with-node-labels/openshift4-monitoring/openshift4-monitoring/prometheus_rules.yaml +++ b/tests/golden/capacity-alerts-with-node-labels/openshift4-monitoring/openshift4-monitoring/prometheus_rules.yaml @@ -1077,13 +1077,14 @@ spec: syn_component: openshift4-monitoring - alert: SYN_KubeAggregatedAPIErrors annotations: - description: Kubernetes aggregated API {{ $labels.name }}/{{ $labels.namespace - }} has reported errors. It has appeared unavailable {{ $value | humanize - }} times averaged over the past 10m. + description: Kubernetes aggregated API {{ $labels.instance }}/{{ $labels.name + }} has reported {{ $labels.reason }} errors on cluster {{ $labels.cluster + }}. summary: Kubernetes aggregated API has reported errors. syn_component: openshift4-monitoring expr: | - sum by(name, namespace, cluster)(increase(aggregator_unavailable_apiservice_total{job="apiserver"}[10m])) > 4 + sum by(cluster, instance, name, reason)(increase(aggregator_unavailable_apiservice_total{job="apiserver"}[1m])) > 0 + for: 10m labels: severity: warning syn: 'true' diff --git a/tests/golden/capacity-alerts/openshift4-monitoring/openshift4-monitoring/prometheus_rules.yaml b/tests/golden/capacity-alerts/openshift4-monitoring/openshift4-monitoring/prometheus_rules.yaml index 1df7499..ee419e5 100644 --- a/tests/golden/capacity-alerts/openshift4-monitoring/openshift4-monitoring/prometheus_rules.yaml +++ b/tests/golden/capacity-alerts/openshift4-monitoring/openshift4-monitoring/prometheus_rules.yaml @@ -1077,13 +1077,14 @@ spec: syn_component: openshift4-monitoring - alert: SYN_KubeAggregatedAPIErrors annotations: - description: Kubernetes aggregated API {{ $labels.name }}/{{ $labels.namespace - }} has reported errors. It has appeared unavailable {{ $value | humanize - }} times averaged over the past 10m. + description: Kubernetes aggregated API {{ $labels.instance }}/{{ $labels.name + }} has reported {{ $labels.reason }} errors on cluster {{ $labels.cluster + }}. summary: Kubernetes aggregated API has reported errors. syn_component: openshift4-monitoring expr: | - sum by(name, namespace, cluster)(increase(aggregator_unavailable_apiservice_total{job="apiserver"}[10m])) > 4 + sum by(cluster, instance, name, reason)(increase(aggregator_unavailable_apiservice_total{job="apiserver"}[1m])) > 0 + for: 10m labels: severity: warning syn: 'true' diff --git a/tests/golden/custom-rules/openshift4-monitoring/openshift4-monitoring/prometheus_rules.yaml b/tests/golden/custom-rules/openshift4-monitoring/openshift4-monitoring/prometheus_rules.yaml index 64aef6c..38d4328 100644 --- a/tests/golden/custom-rules/openshift4-monitoring/openshift4-monitoring/prometheus_rules.yaml +++ b/tests/golden/custom-rules/openshift4-monitoring/openshift4-monitoring/prometheus_rules.yaml @@ -1086,14 +1086,15 @@ spec: syn_component: openshift4-monitoring - alert: SYN_KubeAggregatedAPIErrors annotations: - description: Kubernetes aggregated API {{ $labels.name }}/{{ $labels.namespace - }} has reported errors. It has appeared unavailable {{ $value | humanize - }} times averaged over the past 10m. + description: Kubernetes aggregated API {{ $labels.instance }}/{{ $labels.name + }} has reported {{ $labels.reason }} errors on cluster {{ $labels.cluster + }}. runbook_url: https://github.com/openshift/runbooks/blob/master/alerts/cluster-monitoring-operator/KubeAggregatedAPIErrors.md summary: Kubernetes aggregated API has reported errors. syn_component: openshift4-monitoring expr: | - sum by(name, namespace, cluster)(increase(aggregator_unavailable_apiservice_total{job="apiserver"}[10m])) > 4 + sum by(cluster, instance, name, reason)(increase(aggregator_unavailable_apiservice_total{job="apiserver"}[1m])) > 0 + for: 10m labels: severity: warning syn: 'true' diff --git a/tests/golden/ovn-kubernetes/openshift4-monitoring/openshift4-monitoring/prometheus_rules.yaml b/tests/golden/ovn-kubernetes/openshift4-monitoring/openshift4-monitoring/prometheus_rules.yaml index 6abfa06..04b636b 100644 --- a/tests/golden/ovn-kubernetes/openshift4-monitoring/openshift4-monitoring/prometheus_rules.yaml +++ b/tests/golden/ovn-kubernetes/openshift4-monitoring/openshift4-monitoring/prometheus_rules.yaml @@ -1228,13 +1228,14 @@ spec: syn_component: openshift4-monitoring - alert: SYN_KubeAggregatedAPIErrors annotations: - description: Kubernetes aggregated API {{ $labels.name }}/{{ $labels.namespace - }} has reported errors. It has appeared unavailable {{ $value | humanize - }} times averaged over the past 10m. + description: Kubernetes aggregated API {{ $labels.instance }}/{{ $labels.name + }} has reported {{ $labels.reason }} errors on cluster {{ $labels.cluster + }}. summary: Kubernetes aggregated API has reported errors. syn_component: openshift4-monitoring expr: | - sum by(name, namespace, cluster)(increase(aggregator_unavailable_apiservice_total{job="apiserver"}[10m])) > 4 + sum by(cluster, instance, name, reason)(increase(aggregator_unavailable_apiservice_total{job="apiserver"}[1m])) > 0 + for: 10m labels: severity: warning syn: 'true' diff --git a/tests/golden/release-4.14/openshift4-monitoring/openshift4-monitoring/prometheus_rules.yaml b/tests/golden/release-4.14/openshift4-monitoring/openshift4-monitoring/prometheus_rules.yaml index 13a95e1..c8e16f0 100644 --- a/tests/golden/release-4.14/openshift4-monitoring/openshift4-monitoring/prometheus_rules.yaml +++ b/tests/golden/release-4.14/openshift4-monitoring/openshift4-monitoring/prometheus_rules.yaml @@ -1058,13 +1058,14 @@ spec: syn_component: openshift4-monitoring - alert: SYN_KubeAggregatedAPIErrors annotations: - description: Kubernetes aggregated API {{ $labels.name }}/{{ $labels.namespace - }} has reported errors. It has appeared unavailable {{ $value | humanize - }} times averaged over the past 10m. + description: Kubernetes aggregated API {{ $labels.instance }}/{{ $labels.name + }} has reported {{ $labels.reason }} errors on cluster {{ $labels.cluster + }}. summary: Kubernetes aggregated API has reported errors. syn_component: openshift4-monitoring expr: | - sum by(name, namespace, cluster)(increase(aggregator_unavailable_apiservice_total{job="apiserver"}[10m])) > 4 + sum by(cluster, instance, name, reason)(increase(aggregator_unavailable_apiservice_total{job="apiserver"}[1m])) > 0 + for: 10m labels: severity: warning syn: 'true' diff --git a/tests/golden/release-4.15/openshift4-monitoring/openshift4-monitoring/prometheus_rules.yaml b/tests/golden/release-4.15/openshift4-monitoring/openshift4-monitoring/prometheus_rules.yaml index 1df7499..ee419e5 100644 --- a/tests/golden/release-4.15/openshift4-monitoring/openshift4-monitoring/prometheus_rules.yaml +++ b/tests/golden/release-4.15/openshift4-monitoring/openshift4-monitoring/prometheus_rules.yaml @@ -1077,13 +1077,14 @@ spec: syn_component: openshift4-monitoring - alert: SYN_KubeAggregatedAPIErrors annotations: - description: Kubernetes aggregated API {{ $labels.name }}/{{ $labels.namespace - }} has reported errors. It has appeared unavailable {{ $value | humanize - }} times averaged over the past 10m. + description: Kubernetes aggregated API {{ $labels.instance }}/{{ $labels.name + }} has reported {{ $labels.reason }} errors on cluster {{ $labels.cluster + }}. summary: Kubernetes aggregated API has reported errors. syn_component: openshift4-monitoring expr: | - sum by(name, namespace, cluster)(increase(aggregator_unavailable_apiservice_total{job="apiserver"}[10m])) > 4 + sum by(cluster, instance, name, reason)(increase(aggregator_unavailable_apiservice_total{job="apiserver"}[1m])) > 0 + for: 10m labels: severity: warning syn: 'true' diff --git a/tests/golden/release-4.16/openshift4-monitoring/openshift4-monitoring/prometheus_rules.yaml b/tests/golden/release-4.16/openshift4-monitoring/openshift4-monitoring/prometheus_rules.yaml index ef3122d..9e58f20 100644 --- a/tests/golden/release-4.16/openshift4-monitoring/openshift4-monitoring/prometheus_rules.yaml +++ b/tests/golden/release-4.16/openshift4-monitoring/openshift4-monitoring/prometheus_rules.yaml @@ -1083,14 +1083,15 @@ spec: syn_component: openshift4-monitoring - alert: SYN_KubeAggregatedAPIErrors annotations: - description: Kubernetes aggregated API {{ $labels.name }}/{{ $labels.namespace - }} has reported errors. It has appeared unavailable {{ $value | humanize - }} times averaged over the past 10m. + description: Kubernetes aggregated API {{ $labels.instance }}/{{ $labels.name + }} has reported {{ $labels.reason }} errors on cluster {{ $labels.cluster + }}. runbook_url: https://github.com/openshift/runbooks/blob/master/alerts/cluster-monitoring-operator/KubeAggregatedAPIErrors.md summary: Kubernetes aggregated API has reported errors. syn_component: openshift4-monitoring expr: | - sum by(name, namespace, cluster)(increase(aggregator_unavailable_apiservice_total{job="apiserver"}[10m])) > 4 + sum by(cluster, instance, name, reason)(increase(aggregator_unavailable_apiservice_total{job="apiserver"}[1m])) > 0 + for: 10m labels: severity: warning syn: 'true' diff --git a/tests/golden/remote-write/openshift4-monitoring/openshift4-monitoring/prometheus_rules.yaml b/tests/golden/remote-write/openshift4-monitoring/openshift4-monitoring/prometheus_rules.yaml index 1df7499..ee419e5 100644 --- a/tests/golden/remote-write/openshift4-monitoring/openshift4-monitoring/prometheus_rules.yaml +++ b/tests/golden/remote-write/openshift4-monitoring/openshift4-monitoring/prometheus_rules.yaml @@ -1077,13 +1077,14 @@ spec: syn_component: openshift4-monitoring - alert: SYN_KubeAggregatedAPIErrors annotations: - description: Kubernetes aggregated API {{ $labels.name }}/{{ $labels.namespace - }} has reported errors. It has appeared unavailable {{ $value | humanize - }} times averaged over the past 10m. + description: Kubernetes aggregated API {{ $labels.instance }}/{{ $labels.name + }} has reported {{ $labels.reason }} errors on cluster {{ $labels.cluster + }}. summary: Kubernetes aggregated API has reported errors. syn_component: openshift4-monitoring expr: | - sum by(name, namespace, cluster)(increase(aggregator_unavailable_apiservice_total{job="apiserver"}[10m])) > 4 + sum by(cluster, instance, name, reason)(increase(aggregator_unavailable_apiservice_total{job="apiserver"}[1m])) > 0 + for: 10m labels: severity: warning syn: 'true' diff --git a/tests/golden/team-routing/openshift4-monitoring/openshift4-monitoring/prometheus_rules.yaml b/tests/golden/team-routing/openshift4-monitoring/openshift4-monitoring/prometheus_rules.yaml index 573eb60..9a87a3a 100644 --- a/tests/golden/team-routing/openshift4-monitoring/openshift4-monitoring/prometheus_rules.yaml +++ b/tests/golden/team-routing/openshift4-monitoring/openshift4-monitoring/prometheus_rules.yaml @@ -1132,13 +1132,14 @@ spec: syn_team: clumsy-donkeys - alert: SYN_KubeAggregatedAPIErrors annotations: - description: Kubernetes aggregated API {{ $labels.name }}/{{ $labels.namespace - }} has reported errors. It has appeared unavailable {{ $value | humanize - }} times averaged over the past 10m. + description: Kubernetes aggregated API {{ $labels.instance }}/{{ $labels.name + }} has reported {{ $labels.reason }} errors on cluster {{ $labels.cluster + }}. summary: Kubernetes aggregated API has reported errors. syn_component: openshift4-monitoring expr: | - sum by(name, namespace, cluster)(increase(aggregator_unavailable_apiservice_total{job="apiserver"}[10m])) > 4 + sum by(cluster, instance, name, reason)(increase(aggregator_unavailable_apiservice_total{job="apiserver"}[1m])) > 0 + for: 10m labels: severity: warning syn: 'true' diff --git a/tests/golden/user-workload-monitoring/openshift4-monitoring/openshift4-monitoring/20_networkpolicy.yaml b/tests/golden/user-workload-monitoring/openshift4-monitoring/openshift4-monitoring/20_networkpolicy.yaml index e7e81d7..b400430 100644 --- a/tests/golden/user-workload-monitoring/openshift4-monitoring/openshift4-monitoring/20_networkpolicy.yaml +++ b/tests/golden/user-workload-monitoring/openshift4-monitoring/openshift4-monitoring/20_networkpolicy.yaml @@ -60,3 +60,20 @@ spec: - alertmanager policyTypes: - Ingress +--- +apiVersion: cilium.io/v2 +kind: CiliumNetworkPolicy +metadata: + annotations: + syn.tools/description: | + Note that this policy isn't named allow-from-cluster-nodes, even + though its content is identical to ensure that Espejo doesn't delete + the policy. + name: allow-from-cluster-nodes-custom + namespace: openshift-monitoring +spec: + endpointSelector: {} + ingress: + - fromEntities: + - host + - remote-node diff --git a/tests/golden/user-workload-monitoring/openshift4-monitoring/openshift4-monitoring/20_user_workload_networkpolicy.yaml b/tests/golden/user-workload-monitoring/openshift4-monitoring/openshift4-monitoring/20_user_workload_networkpolicy.yaml index ad57295..5ba5074 100644 --- a/tests/golden/user-workload-monitoring/openshift4-monitoring/openshift4-monitoring/20_user_workload_networkpolicy.yaml +++ b/tests/golden/user-workload-monitoring/openshift4-monitoring/openshift4-monitoring/20_user_workload_networkpolicy.yaml @@ -60,3 +60,20 @@ spec: - alertmanager policyTypes: - Ingress +--- +apiVersion: cilium.io/v2 +kind: CiliumNetworkPolicy +metadata: + annotations: + syn.tools/description: | + Note that this policy isn't named allow-from-cluster-nodes, even + though its content is identical to ensure that Espejo doesn't delete + the policy. + name: allow-from-cluster-nodes-custom + namespace: openshift-user-workload-monitoring +spec: + endpointSelector: {} + ingress: + - fromEntities: + - host + - remote-node diff --git a/tests/golden/user-workload-monitoring/openshift4-monitoring/openshift4-monitoring/prometheus_rules.yaml b/tests/golden/user-workload-monitoring/openshift4-monitoring/openshift4-monitoring/prometheus_rules.yaml index d2790e5..cf47410 100644 --- a/tests/golden/user-workload-monitoring/openshift4-monitoring/openshift4-monitoring/prometheus_rules.yaml +++ b/tests/golden/user-workload-monitoring/openshift4-monitoring/openshift4-monitoring/prometheus_rules.yaml @@ -1077,13 +1077,14 @@ spec: syn_component: openshift4-monitoring - alert: SYN_KubeAggregatedAPIErrors annotations: - description: Kubernetes aggregated API {{ $labels.name }}/{{ $labels.namespace - }} has reported errors. It has appeared unavailable {{ $value | humanize - }} times averaged over the past 10m. + description: Kubernetes aggregated API {{ $labels.instance }}/{{ $labels.name + }} has reported {{ $labels.reason }} errors on cluster {{ $labels.cluster + }}. summary: Kubernetes aggregated API has reported errors. syn_component: openshift4-monitoring expr: | - sum by(name, namespace, cluster)(increase(aggregator_unavailable_apiservice_total{job="apiserver"}[10m])) > 4 + sum by(cluster, instance, name, reason)(increase(aggregator_unavailable_apiservice_total{job="apiserver"}[1m])) > 0 + for: 10m labels: severity: warning syn: 'true' diff --git a/tests/golden/vsphere/openshift4-monitoring/openshift4-monitoring/prometheus_rules.yaml b/tests/golden/vsphere/openshift4-monitoring/openshift4-monitoring/prometheus_rules.yaml index 710272c..7a8b957 100644 --- a/tests/golden/vsphere/openshift4-monitoring/openshift4-monitoring/prometheus_rules.yaml +++ b/tests/golden/vsphere/openshift4-monitoring/openshift4-monitoring/prometheus_rules.yaml @@ -1077,13 +1077,14 @@ spec: syn_component: openshift4-monitoring - alert: SYN_KubeAggregatedAPIErrors annotations: - description: Kubernetes aggregated API {{ $labels.name }}/{{ $labels.namespace - }} has reported errors. It has appeared unavailable {{ $value | humanize - }} times averaged over the past 10m. + description: Kubernetes aggregated API {{ $labels.instance }}/{{ $labels.name + }} has reported {{ $labels.reason }} errors on cluster {{ $labels.cluster + }}. summary: Kubernetes aggregated API has reported errors. syn_component: openshift4-monitoring expr: | - sum by(name, namespace, cluster)(increase(aggregator_unavailable_apiservice_total{job="apiserver"}[10m])) > 4 + sum by(cluster, instance, name, reason)(increase(aggregator_unavailable_apiservice_total{job="apiserver"}[1m])) > 0 + for: 10m labels: severity: warning syn: 'true' diff --git a/tests/user-workload-monitoring.yml b/tests/user-workload-monitoring.yml index 053ae10..ce69401 100644 --- a/tests/user-workload-monitoring.yml +++ b/tests/user-workload-monitoring.yml @@ -1,3 +1,5 @@ +applications: + - cilium parameters: kapitan: dependencies: