Skip to content

Commit

Permalink
Merge pull request #3795 from jwcesign/fix-metrics-size
Browse files Browse the repository at this point in the history
fix: reduce the number of metrics by merging entries
  • Loading branch information
karmada-bot authored Jul 17, 2023
2 parents 9a03a10 + 4f41da2 commit bc2c443
Show file tree
Hide file tree
Showing 6 changed files with 21 additions and 23 deletions.
2 changes: 1 addition & 1 deletion pkg/controllers/binding/binding_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -111,7 +111,7 @@ func (c *ResourceBindingController) syncBinding(binding *workv1alpha2.ResourceBi
}
start := time.Now()
err = ensureWork(c.Client, c.ResourceInterpreter, workload, c.OverrideManager, binding, apiextensionsv1.NamespaceScoped)
metrics.ObserveSyncWorkLatency(binding.ObjectMeta, err, start)
metrics.ObserveSyncWorkLatency(err, start)
if err != nil {
klog.Errorf("Failed to transform resourceBinding(%s/%s) to works. Error: %v.",
binding.GetNamespace(), binding.GetName(), err)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -111,7 +111,7 @@ func (c *ClusterResourceBindingController) syncBinding(binding *workv1alpha2.Clu

start := time.Now()
err = ensureWork(c.Client, c.ResourceInterpreter, workload, c.OverrideManager, binding, apiextensionsv1.ClusterScoped)
metrics.ObserveSyncWorkLatency(binding.ObjectMeta, err, start)
metrics.ObserveSyncWorkLatency(err, start)
if err != nil {
klog.Errorf("Failed to transform clusterResourceBinding(%s) to works. Error: %v.", binding.GetName(), err)
c.EventRecorder.Event(binding, corev1.EventTypeWarning, events.EventReasonSyncWorkFailed, err.Error())
Expand Down
2 changes: 1 addition & 1 deletion pkg/controllers/execution/execution_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -114,7 +114,7 @@ func (c *Controller) SetupWithManager(mgr controllerruntime.Manager) error {
func (c *Controller) syncWork(clusterName string, work *workv1alpha1.Work) (controllerruntime.Result, error) {
start := time.Now()
err := c.syncToClusters(clusterName, work)
metrics.ObserveSyncWorkloadLatency(work.ObjectMeta, err, start)
metrics.ObserveSyncWorkloadLatency(err, start)
if err != nil {
msg := fmt.Sprintf("Failed to sync work(%s) to cluster(%s): %v", work.Name, clusterName, err)
klog.Errorf(msg)
Expand Down
4 changes: 2 additions & 2 deletions pkg/detector/detector.go
Original file line number Diff line number Diff line change
Expand Up @@ -378,7 +378,7 @@ func (d *ResourceDetector) ApplyPolicy(object *unstructured.Unstructured, object
klog.Infof("Applying policy(%s/%s) for object: %s", policy.Namespace, policy.Name, objectKey)
var operationResult controllerutil.OperationResult
defer func() {
metrics.ObserveApplyPolicyAttemptAndLatency(object, policy.ObjectMeta, err, start)
metrics.ObserveApplyPolicyAttemptAndLatency(err, start)
if err != nil {
d.EventRecorder.Eventf(object, corev1.EventTypeWarning, events.EventReasonApplyPolicyFailed, "Apply policy(%s/%s) failed: %v", policy.Namespace, policy.Name, err)
} else if operationResult != controllerutil.OperationResultNone {
Expand Down Expand Up @@ -451,7 +451,7 @@ func (d *ResourceDetector) ApplyClusterPolicy(object *unstructured.Unstructured,
klog.Infof("Applying cluster policy(%s) for object: %s", policy.Name, objectKey)
var operationResult controllerutil.OperationResult
defer func() {
metrics.ObserveApplyPolicyAttemptAndLatency(object, policy.ObjectMeta, err, start)
metrics.ObserveApplyPolicyAttemptAndLatency(err, start)
if err != nil {
d.EventRecorder.Eventf(object, corev1.EventTypeWarning, events.EventReasonApplyPolicyFailed, "Apply cluster policy(%s) failed: %v", policy.Name, err)
} else if operationResult != controllerutil.OperationResultNone {
Expand Down
4 changes: 2 additions & 2 deletions pkg/detector/policy.go
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,7 @@ func (d *ResourceDetector) propagateResource(object *unstructured.Unstructured,
return fmt.Errorf("waiting for dependent overrides")
}
d.RemoveWaiting(objectKey)
metrics.ObserveFindMatchedPolicyLatency(object, start)
metrics.ObserveFindMatchedPolicyLatency(start)
return d.ApplyPolicy(object, objectKey, propagationPolicy)
}

Expand All @@ -70,7 +70,7 @@ func (d *ResourceDetector) propagateResource(object *unstructured.Unstructured,
return fmt.Errorf("waiting for dependent overrides")
}
d.RemoveWaiting(objectKey)
metrics.ObserveFindMatchedPolicyLatency(object, start)
metrics.ObserveFindMatchedPolicyLatency(start)
return d.ApplyClusterPolicy(object, objectKey, clusterPolicy)
}

Expand Down
30 changes: 14 additions & 16 deletions pkg/metrics/resource.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,6 @@ import (
"time"

"github.com/prometheus/client_golang/prometheus"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/apis/meta/v1/unstructured"

utilmetrics "github.com/karmada-io/karmada/pkg/util/metrics"
)
Expand All @@ -23,51 +21,51 @@ var (
Name: resourceMatchPolicyDurationMetricsName,
Help: "Duration in seconds to find a matched propagation policy for the resource template.",
Buckets: prometheus.ExponentialBuckets(0.001, 2, 12),
}, []string{"apiVersion", "kind", "name", "namespace"})
}, []string{})

applyPolicyDurationHistogram = prometheus.NewHistogramVec(prometheus.HistogramOpts{
Name: resourceApplyPolicyDurationMetricsName,
Help: "Duration in seconds to apply a propagation policy for the resource template. By the result, 'error' means a resource template failed to apply the policy. Otherwise 'success'.",
Buckets: prometheus.ExponentialBuckets(0.001, 2, 12),
}, []string{"apiVersion", "kind", "name", "namespace", "result"})
}, []string{"result"})

policyApplyAttempts = prometheus.NewCounterVec(prometheus.CounterOpts{
Name: policyApplyAttemptsMetricsName,
Help: "Number of attempts to be applied for a propagation policy. By the result, 'error' means a resource template failed to apply the policy. Otherwise 'success'.",
}, []string{"namespace", "name", "result"})
}, []string{"result"})

syncWorkDurationHistogram = prometheus.NewHistogramVec(prometheus.HistogramOpts{
Name: syncWorkDurationMetricsName,
Help: "Duration in seconds to sync works for a binding object. By the result, 'error' means a binding failed to sync works. Otherwise 'success'.",
Buckets: prometheus.ExponentialBuckets(0.001, 2, 12),
}, []string{"namespace", "name", "result"})
}, []string{"result"})

syncWorkloadDurationHistogram = prometheus.NewHistogramVec(prometheus.HistogramOpts{
Name: syncWorkloadDurationMetricsName,
Help: "Duration in seconds to sync the workload to a target cluster. By the result, 'error' means a work failed to sync workloads. Otherwise 'success'.",
Buckets: prometheus.ExponentialBuckets(0.001, 2, 12),
}, []string{"namespace", "name", "result"})
}, []string{"result"})
)

// ObserveFindMatchedPolicyLatency records the duration for the resource finding a matched policy.
func ObserveFindMatchedPolicyLatency(object *unstructured.Unstructured, start time.Time) {
findMatchedPolicyDurationHistogram.WithLabelValues(object.GetAPIVersion(), object.GetKind(), object.GetName(), object.GetNamespace()).Observe(utilmetrics.DurationInSeconds(start))
func ObserveFindMatchedPolicyLatency(start time.Time) {
findMatchedPolicyDurationHistogram.WithLabelValues().Observe(utilmetrics.DurationInSeconds(start))
}

// ObserveApplyPolicyAttemptAndLatency records the duration for the resource applying a policy and a applying attempt for the policy.
func ObserveApplyPolicyAttemptAndLatency(object *unstructured.Unstructured, policyMetaData metav1.ObjectMeta, err error, start time.Time) {
applyPolicyDurationHistogram.WithLabelValues(object.GetAPIVersion(), object.GetKind(), object.GetName(), object.GetNamespace(), utilmetrics.GetResultByError(err)).Observe(utilmetrics.DurationInSeconds(start))
policyApplyAttempts.WithLabelValues(policyMetaData.Namespace, policyMetaData.Name, utilmetrics.GetResultByError(err)).Inc()
func ObserveApplyPolicyAttemptAndLatency(err error, start time.Time) {
applyPolicyDurationHistogram.WithLabelValues(utilmetrics.GetResultByError(err)).Observe(utilmetrics.DurationInSeconds(start))
policyApplyAttempts.WithLabelValues(utilmetrics.GetResultByError(err)).Inc()
}

// ObserveSyncWorkLatency records the duration to sync works for a binding object.
func ObserveSyncWorkLatency(bindingMetaData metav1.ObjectMeta, err error, start time.Time) {
syncWorkDurationHistogram.WithLabelValues(bindingMetaData.Namespace, bindingMetaData.Name, utilmetrics.GetResultByError(err)).Observe(utilmetrics.DurationInSeconds(start))
func ObserveSyncWorkLatency(err error, start time.Time) {
syncWorkDurationHistogram.WithLabelValues(utilmetrics.GetResultByError(err)).Observe(utilmetrics.DurationInSeconds(start))
}

// ObserveSyncWorkloadLatency records the duration to sync the workload to a target cluster.
func ObserveSyncWorkloadLatency(workMetadata metav1.ObjectMeta, err error, start time.Time) {
syncWorkloadDurationHistogram.WithLabelValues(workMetadata.Namespace, workMetadata.Name, utilmetrics.GetResultByError(err)).Observe(utilmetrics.DurationInSeconds(start))
func ObserveSyncWorkloadLatency(err error, start time.Time) {
syncWorkloadDurationHistogram.WithLabelValues(utilmetrics.GetResultByError(err)).Observe(utilmetrics.DurationInSeconds(start))
}

// ResourceCollectors returns the collectors about resources.
Expand Down

0 comments on commit bc2c443

Please sign in to comment.