diff --git a/pkg/metricscollector/opentelemetry.go b/pkg/metricscollector/opentelemetry.go index db64edde67d..d35ab2c8eb4 100644 --- a/pkg/metricscollector/opentelemetry.go +++ b/pkg/metricscollector/opentelemetry.go @@ -39,6 +39,7 @@ var ( otelScalerMetricsLatencyVals []OtelMetricFloat64Val otelScalerMetricsLatencyValDeprecated []OtelMetricFloat64Val otelInternalLoopLatencyVals []OtelMetricFloat64Val + otelUnterlanLoopLatencyValHistogram []OtelMetricFloat64Val otelInternalLoopLatencyValDeprecated []OtelMetricFloat64Val otelBuildInfoVal OtelMetricInt64Val @@ -170,6 +171,7 @@ func initMeters() { if err != nil { otLog.Error(err, msg) } + _, err = meter.Float64ObservableGauge( "keda.internal.scale.loop.latency.seconds", api.WithDescription("Internal latency of ScaledObject/ScaledJob loop execution"), @@ -180,6 +182,15 @@ func initMeters() { otLog.Error(err, msg) } + _, err = meter.Float64Histogram( + "keda.internal.scale.loop.latency.bucket", + api.WithDescription("Internal latency of ScaledObject/ScaledJob loop execution"), + api.WithUnit("s"), + ) + if err != nil { + otLog.Error(err, msg) + } + _, err = meter.Float64ObservableGauge( "keda.scaler.active", api.WithDescription("Indicates whether a scaler is active (1), or not (0)"), @@ -324,6 +335,11 @@ func (o *OtelMetrics) RecordScalableObjectLatency(namespace string, name string, otelInternalLoopLatencyD.val = float64(value.Milliseconds()) otelInternalLoopLatencyD.measurementOption = opt otelInternalLoopLatencyValDeprecated = append(otelInternalLoopLatencyValDeprecated, otelInternalLoopLatencyD) + + otelInternalLoopLatencyHistogram := OtelMetricFloat64Val{} + otelInternalLoopLatencyHistogram.val = value.Seconds() + otelInternalLoopLatencyHistogram.measurementOption = opt + otelUnterlanLoopLatencyValHistogram = append(otelUnterlanLoopLatencyValHistogram, otelInternalLoopLatencyHistogram) } func ScalerActiveCallback(_ context.Context, obsrv api.Float64Observer) error { diff --git a/pkg/metricscollector/prommetrics.go b/pkg/metricscollector/prommetrics.go index a5b9bc6c5b8..ea3c8cc5174 100644 --- a/pkg/metricscollector/prommetrics.go +++ b/pkg/metricscollector/prommetrics.go @@ -210,6 +210,16 @@ var ( []string{"namespace", "type", "resource"}, ) + internalLoopLatencyHistogram = prometheus.NewHistogramVec( + prometheus.HistogramOpts{ + Namespace: DefaultPromMetricsNamespace, + Subsystem: "internal_scale_loop", + Name: "latency_seconds_bucket", + Help: "Total deviation (in seconds) between the expected execution time and the actual execution time for the scaling loop. Represented as a histogram.", + }, + []string{"namespace", "type", "resource"}, + ) + // Total emitted cloudevents. cloudeventEmitted = prometheus.NewCounterVec( prometheus.CounterOpts{ @@ -242,6 +252,7 @@ func NewPromMetrics() *PromMetrics { metrics.Registry.MustRegister(scalerMetricsLatency) metrics.Registry.MustRegister(internalLoopLatencyDeprecated) metrics.Registry.MustRegister(internalLoopLatency) + metrics.Registry.MustRegister(internalLoopLatencyHistogram) metrics.Registry.MustRegister(scalerActive) metrics.Registry.MustRegister(scalerErrorsDeprecated) metrics.Registry.MustRegister(scalerErrors) @@ -284,6 +295,7 @@ func (p *PromMetrics) RecordScalerLatency(namespace string, scaledResource strin func (p *PromMetrics) RecordScalableObjectLatency(namespace string, name string, isScaledObject bool, value time.Duration) { internalLoopLatency.WithLabelValues(namespace, getResourceType(isScaledObject), name).Set(value.Seconds()) internalLoopLatencyDeprecated.WithLabelValues(namespace, getResourceType(isScaledObject), name).Set(float64(value.Milliseconds())) + internalLoopLatencyHistogram.WithLabelValues(namespace, getResourceType(isScaledObject), name).Observe(value.Seconds()) } // RecordScalerActive create a measurement of the activity of the scaler diff --git a/tests/sequential/opentelemetry_metrics/opentelemetry_metrics_test.go b/tests/sequential/opentelemetry_metrics/opentelemetry_metrics_test.go index be9fafdc46e..8095a39c6ca 100644 --- a/tests/sequential/opentelemetry_metrics/opentelemetry_metrics_test.go +++ b/tests/sequential/opentelemetry_metrics/opentelemetry_metrics_test.go @@ -904,6 +904,37 @@ func testScalableObjectMetrics(t *testing.T) { } } assert.Equal(t, true, found) + + val, ok = family["keda_internal_scale_loop_latency_seconds_bucket"] + assert.True(t, ok, "keda_internal_scale_loop_latency_seconds_bucket not available") + if ok { + var found bool + metrics := val.GetMetric() + + // check scaledobject loop + found = false + for _, metric := range metrics { + labels := metric.GetLabel() + for _, label := range labels { + if *label.Name == labelType && *label.Value == "scaledobject" { + found = true + } + } + } + assert.Equal(t, true, found) + + // check scaledjob loop + found = false + for _, metric := range metrics { + labels := metric.GetLabel() + for _, label := range labels { + if *label.Name == labelType && *label.Value == "scaledjob" { + found = true + } + } + } + assert.Equal(t, true, found) + } } } diff --git a/tests/sequential/prometheus_metrics/prometheus_metrics_test.go b/tests/sequential/prometheus_metrics/prometheus_metrics_test.go index 9593d13d5cf..a73f91b4622 100644 --- a/tests/sequential/prometheus_metrics/prometheus_metrics_test.go +++ b/tests/sequential/prometheus_metrics/prometheus_metrics_test.go @@ -873,6 +873,36 @@ func testScalableObjectMetrics(t *testing.T) { } assert.Equal(t, true, found) + // check scaledjob loop + found = false + for _, metric := range metrics { + labels := metric.GetLabel() + for _, label := range labels { + if *label.Name == labelType && *label.Value == "scaledjob" { + found = true + } + } + } + assert.Equal(t, true, found) + } else { + t.Errorf("scaledobject metric not available") + } + if val, ok := family["keda_internal_scale_loop_latency_seconds_bucket"]; ok { + var found bool + metrics := val.GetMetric() + + // check scaledobject loop + found = false + for _, metric := range metrics { + labels := metric.GetLabel() + for _, label := range labels { + if *label.Name == labelType && *label.Value == "scaledobject" { + found = true + } + } + } + assert.Equal(t, true, found) + // check scaledjob loop found = false for _, metric := range metrics {