feat(judge): add ability to mute metric result from total score compu…

…tation (#819) Co-authored-by: Justin Field <[email protected]>
spinnaker · Dec 4, 2020 · 410bde5 · 410bde5
1 parent d87366d
commit 410bde5
Show file tree

Hide file tree

Showing 11 changed files with 151 additions and 11 deletions.
diff --git a/docs/canary-config.md b/docs/canary-config.md
@@ -77,8 +77,9 @@ Describes how to judge a metric, see the [Netflix Automated Canary Analysis Judg
 - `nanStrategy` (enum[string], optional) - How to handle NaN values which can occur if the metric does not return data for a particular time interval.
   - `remove` (default) - Use when you expect a metric to always have data and you want the NaNs removed from your data set (usage metrics).
   - `replace` - Use when you expect a metric to return no data in certain use cases and you want the NaNs replaced with zeros (for example: count metrics, if no errors happened, then metric will return no data for that time interval).
-- `critical` **true** (boolean, optional) - Use to fail the entire canary if this metric fails (recommended for important metrics that signal service outages or severe problems).
-- `mustHaveData` **true** (boolean, optional) - Use to fail a metric if data is missing.
+- `critical` **false** (boolean, optional) - Use to fail the entire canary if this metric fails (recommended for important metrics that signal service outages or severe problems).
+- `muted` **false** (boolean, optional) - Use to mute metric result in a total score computation.
+- `mustHaveData` **false** (boolean, optional) - Use to fail a metric if data is missing.
 - `effectSize` ([EffectSize](#effectsize), optional) - Controls how much different the metric needs to be to fail or fail critically.
 - `outliers` ([Outliers](#outliers), optional) - Controls how to classify and handle outliers.
 
@@ -90,11 +91,28 @@ See the [Netflix Automated Canary Analysis Judge] and [Mann Whitney Classifier]
 
 ### Properties
 
+- `measure` **meanRatio** (enum[string], optional) - Specifies how effect size is measured.
+  - `cles` - [Common Language Effect Size](https://en.wikipedia.org/wiki/Mann%E2%80%93Whitney_U_test#Common_language_effect_size)
+  - `meanRatio` - Ratio of means
+
+For `meanRatio` measure:
+
 - `allowedIncrease` **1.1** (number, optional) - Defaults to 1. The multiplier increase that must be met for the metric to fail. This example makes the metric fail when the metric has increased 10% from the baseline.
 - `allowedDecrease` **0.90** (number, optional) - Defaults to 1. The multiplier decrease that must be met for the metric to fail. This example makes the metric fail when the metric has decreased 10% from the baseline.
 - `criticalIncrease` **5.0** (number, optional) - Defaults to 1. The multiplier increase that must be met for the metric to be a critical failure and fail the entire analysis with a score of 0. This example make the canary fail critically if there is a 5x increase.
 - `criticalDecrease` **0.5** (number, optional) - Defaults to 1. The multiplier decrease that must be met for the metric to be a critical failure and fail the entire analysis with a score of 0. This example make the canary fail critically if there is a 50% decrease.
 
+For `cles` measure:
+
+- `allowedIncrease` (number, optional) - Defaults to 0.5. 
+- `allowedDecrease` (number, optional) - Defaults to 0.5.
+- `criticalIncrease` (number, optional) - Defaults to 0.5.
+- `criticalDecrease` (number, optional) - Defaults to 0.5.
+
+The CLES reports the probability that a value from one group will be greater than a value from the other group. 
+A value of 0.50 indicates that the two groups are stochastically equal. 
+A value of 1 indicates that the first group shows complete stochastic domination over the other group, and a value of 0 indicates the complete stochastic domination by the second group.
+
 ## Outliers
 
 Controls how to classify and handle outliers.

diff --git a/kayenta-core/src/main/java/com/netflix/kayenta/canary/results/CanaryAnalysisResult.java b/kayenta-core/src/main/java/com/netflix/kayenta/canary/results/CanaryAnalysisResult.java
@@ -52,4 +52,6 @@ public class CanaryAnalysisResult {
   @NotNull @Getter private Map<String, Object> resultMetadata;
 
   @Getter @Builder.Default private boolean critical = false;
+
+  @Getter @Builder.Default private boolean muted = false;
 }
diff --git a/kayenta-graphite/kayenta-graphite.gradle b/kayenta-graphite/kayenta-graphite.gradle
@@ -180,7 +180,7 @@ dependencies {
     integrationTestCompile project(':kayenta-web')
 
     // Apache 2.0 https://github.com/rest-assured/rest-assured/blob/master/LICENSE
-    integrationTestCompile 'io.rest-assured:rest-assured:3.1.1'
+    integrationTestCompile 'io.rest-assured:rest-assured:4.3.2'
     integrationTestAnnotationProcessor platform("com.netflix.spinnaker.orca:orca-bom:$orcaVersion")
     integrationTestAnnotationProcessor "org.projectlombok:lombok"
     integrationTestCompileOnly "org.projectlombok:lombok"

diff --git a/kayenta-integration-tests/kayenta-integration-tests.gradle b/kayenta-integration-tests/kayenta-integration-tests.gradle
@@ -1,13 +1,13 @@
 dependencies {
 
     testCompile project(":kayenta-web")
-    testCompile 'io.rest-assured:rest-assured:3.1.1'
-    testCompile 'org.awaitility:awaitility:3.1.6'
+    testCompile 'io.rest-assured:rest-assured:4.3.2'
+    testCompile 'org.awaitility:awaitility:4.0.3'
     testCompile 'io.micrometer:micrometer-registry-prometheus'
     testCompile 'io.micrometer:micrometer-registry-graphite'
     testCompile 'org.springframework.cloud:spring-cloud-starter:2.1.2.RELEASE'// needed for bootstrap phase when all embedded containers are setup
-    testCompile 'com.playtika.testcontainers:embedded-redis:1.36'
-    testCompile 'com.playtika.testcontainers:embedded-minio:1.36'
+    testCompile 'com.playtika.testcontainers:embedded-redis:1.85'
+    testCompile 'com.playtika.testcontainers:embedded-minio:1.85'
 }
 
 test.testLogging {

diff --git a/kayenta-integration-tests/src/test/java/com/netflix/kayenta/metrics/RandomProvider.java b/kayenta-integration-tests/src/test/java/com/netflix/kayenta/metrics/RandomProvider.java
@@ -32,6 +32,9 @@ public long getLong(int lowerBound, int upperBound) {
   }
 
   private int getRandom(int lowerBound, int upperBound) {
+    if (lowerBound == upperBound) {
+      return lowerBound;
+    }
     return random.nextInt(upperBound - lowerBound) + lowerBound;
   }
 }
diff --git a/...est/java/com/netflix/kayenta/tests/standalone/PrometheusStandaloneCanaryAnalysisTest.java b/...est/java/com/netflix/kayenta/tests/standalone/PrometheusStandaloneCanaryAnalysisTest.java
@@ -45,7 +45,8 @@ public void canaryAnalysisIsSuccessful() {
         .body("canaryAnalysisExecutionResult.didPassThresholds", is(true))
         .body(
             "canaryAnalysisExecutionResult.canaryScoreMessage",
-            is("Final canary score 100.0 met or exceeded the pass score threshold."));
+            is("Final canary score 100.0 met or exceeded the pass score threshold."))
+        .body(metricResultPath("CPU usage for service") + ".classification", is("Pass"));
   }
 
   @Test
@@ -66,6 +67,33 @@ public void canaryAnalysisIsFailed() {
         .body("canaryAnalysisExecutionResult.didPassThresholds", is(false))
         .body(
             "canaryAnalysisExecutionResult.canaryScoreMessage",
-            is("Final canary score 0.0 is not above the marginal score threshold."));
+            is("Final canary score 0.0 is not above the marginal score threshold."))
+        .body(metricResultPath("CPU usage for service") + ".classification", is("High"));
+  }
+
+  @Test
+  public void mutedMetricsAreNotTakenIntoFinalScore() {
+    String canaryAnalysisExecutionId =
+        steps.createCanaryAnalysis(
+            "muted-metric-analysis-case",
+            "prometheus-account",
+            "minio-store-account",
+            "canary-configs/prometheus/muted-metric.json");
+
+    ValidatableResponse response =
+        steps.waitUntilCanaryAnalysisCompleted(canaryAnalysisExecutionId);
+
+    response
+        .body("executionStatus", is("SUCCEEDED"))
+        .body("canaryAnalysisExecutionResult.hasWarnings", is(false))
+        .body("canaryAnalysisExecutionResult.didPassThresholds", is(true))
+        .body(metricResultPath("Failing metric") + ".classification", is("High"))
+        .body(metricResultPath("Successful metric") + ".classification", is("Pass"));
+  }
+
+  private String metricResultPath(String metricName) {
+    return "canaryAnalysisExecutionResult.canaryExecutionResults[0].result.judgeResult.results.find { it.name == '"
+        + metricName
+        + "' }";
   }
 }
diff --git a/kayenta-integration-tests/src/test/resources/application-cases.yml b/kayenta-integration-tests/src/test/resources/application-cases.yml
@@ -37,3 +37,30 @@ canary-analysis.cases:
           lower-bound: 20
           upper-bound: 23
           type: gauge
+
+  muted-metric-analysis-case:
+    lifetime-duration-minutes: 1
+    analysis-interval-minutes: 1
+    namespace: 'prod-namespace-3'
+    control:
+      scope: 'myapp-prod-control-3'
+      metrics:
+        - name: 'integration.failing.metric'
+          lower-bound: 40
+          upper-bound: 45
+          type: gauge
+        - name: 'integration.successful.metric'
+          lower-bound: 40
+          upper-bound: 45
+          type: gauge
+    experiment:
+      scope: 'myapp-prod-canary-3'
+      metrics:
+        - name: 'integration.failing.metric'
+          lower-bound: 1000
+          upper-bound: 1000
+          type: gauge
+        - name: 'integration.successful.metric'
+          lower-bound: 40
+          upper-bound: 45
+          type: gauge
diff --git a/kayenta-integration-tests/src/test/resources/canary-configs/prometheus/muted-metric.json b/kayenta-integration-tests/src/test/resources/canary-configs/prometheus/muted-metric.json
@@ -0,0 +1,59 @@
+{
+  "name": "integration-test-canary-config",
+  "description": "A very simple config for integration testing",
+  "judge": {
+    "judgeConfigurations": {},
+    "name": "NetflixACAJudge-v1.0"
+  },
+  "metrics": [
+    {
+      "name": "Failing metric",
+      "query": {
+        "type": "prometheus",
+        "metricName": "integration_failing_metric",
+        "customFilterTemplate": "standard-template"
+      },
+      "analysisConfigurations": {
+        "canary": {
+          "critical": true,
+          "muted": true,
+          "direction": "increase"
+        }
+      },
+      "groups": [
+        "pod-group"
+      ],
+      "scopeName": "default"
+    },
+    {
+      "name": "Successful metric",
+      "query": {
+        "type": "prometheus",
+        "metricName": "integration_successful_metric",
+        "customFilterTemplate": "standard-template"
+      },
+      "analysisConfigurations": {
+        "canary": {
+          "critical": true,
+          "direction": "increase"
+        }
+      },
+      "groups": [
+        "pod-group"
+      ],
+      "scopeName": "default"
+    }
+  ],
+  "templates": {
+    "standard-template": "namespace='${namespace}', scope='${scope}'"
+  },
+  "classifier": {
+    "groupWeights": {
+      "pod-group": 100
+    },
+    "scoreThresholds": {
+      "marginal": 50,
+      "pass": 75
+    }
+  }
+}
diff --git a/kayenta-judge/src/main/scala/com/netflix/kayenta/judge/NetflixACAJudge.scala b/kayenta-judge/src/main/scala/com/netflix/kayenta/judge/NetflixACAJudge.scala
@@ -113,6 +113,7 @@ class NetflixACAJudge extends CanaryJudge with StrictLogging {
 
     val isCriticalMetric = MapUtils.getAsBooleanWithDefault(false, metricConfig.getAnalysisConfigurations, "canary", "critical")
     val isDataRequired = MapUtils.getAsBooleanWithDefault(false, metricConfig.getAnalysisConfigurations, "canary", "mustHaveData")
+    val isMutedMetric = MapUtils.getAsBooleanWithDefault(false, metricConfig.getAnalysisConfigurations, "canary", "muted")
 
     //Effect Size Parameters
     val effectSizeMeasure = MapUtils.getAsStringWithDefault("meanRatio", metricConfig.getAnalysisConfigurations, "canary", "effectSize", "measure")
@@ -149,6 +150,7 @@ class NetflixACAJudge extends CanaryJudge with StrictLogging {
       .id(metric.getId)
       .tags(metric.getTags)
       .groups(metricConfig.getGroups)
+      .muted(isMutedMetric)
       .experimentMetadata(Map("stats" -> experimentStats.toMap.asJava.asInstanceOf[Object]).asJava)
       .controlMetadata(Map("stats" -> controlStats.toMap.asJava.asInstanceOf[Object]).asJava)
 

diff --git a/kayenta-judge/src/main/scala/com/netflix/kayenta/judge/scorers/ScoringHelper.scala b/kayenta-judge/src/main/scala/com/netflix/kayenta/judge/scorers/ScoringHelper.scala
@@ -35,6 +35,7 @@ class ScoringHelper(judgeName: String) {
   def score(canaryConfig: CanaryConfig,
             scoreThresholds: CanaryClassifierThresholdsConfig,
             metricResults: List[CanaryAnalysisResult]): CanaryJudgeResult = {
+    val notMutedMetricResults = metricResults.filter(!_.isMuted)
     //Get the group weights from the canary configuration
     val groupWeights = Option(canaryConfig.getClassifier.getGroupWeights) match {
       case Some(groups) => groups.asScala.mapValues(_.toDouble).toMap
@@ -43,7 +44,7 @@ class ScoringHelper(judgeName: String) {
 
     //Calculate the summary and group scores based on the metric results
     val weightedSumScorer = new WeightedSumScorer(groupWeights)
-    val scores = weightedSumScorer.score(metricResults)
+    val scores = weightedSumScorer.score(notMutedMetricResults)
 
     //Classify the summary score
     val scoreClassifier = new ThresholdScoreClassifier(scoreThresholds.getPass, scoreThresholds.getMarginal)

diff --git a/kayenta-signalfx/kayenta-signalfx.gradle b/kayenta-signalfx/kayenta-signalfx.gradle
@@ -122,7 +122,7 @@ dependencies {
   integrationTestCompile project(':kayenta-web')
 
   // Apache 2.0 https://github.com/rest-assured/rest-assured/blob/master/LICENSE
-  integrationTestCompile 'io.rest-assured:rest-assured:3.1.1'
+  integrationTestCompile 'io.rest-assured:rest-assured:4.3.2'
 
   integrationTestAnnotationProcessor platform("com.netflix.spinnaker.orca:orca-bom:$orcaVersion")
   integrationTestAnnotationProcessor "org.projectlombok:lombok"