Skip to content

Commit

Permalink
Add central ServiceMonitor to operator's namespace (#81)
Browse files Browse the repository at this point in the history
* Create both local and central ServiceMonitor

* Change match labels and namespaces watched by ServiceMonitors

* Fix tests

* Get central ServiceMonitor and create/update as needed

Remove the boolean flag
  • Loading branch information
ruivieira authored Aug 3, 2023
1 parent 2c935c5 commit e7e9b60
Show file tree
Hide file tree
Showing 3 changed files with 168 additions and 75 deletions.
156 changes: 156 additions & 0 deletions controllers/monitor.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,156 @@
package controllers

import (
"context"
monitoringv1 "github.com/prometheus-operator/prometheus-operator/pkg/apis/monitoring/v1"
trustyaiopendatahubiov1alpha1 "github.com/trustyai-explainability/trustyai-service-operator/api/v1alpha1"
"k8s.io/apimachinery/pkg/api/errors"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/types"
ctrl "sigs.k8s.io/controller-runtime"
"sigs.k8s.io/controller-runtime/pkg/log"
)

// generateServiceMonitorSpecCentral generates the ServiceMonitor spec for central ServiceMonitor
func generateServiceMonitorSpecCentral(deploymentNamespace string) *monitoringv1.ServiceMonitor {
serviceMonitor := &monitoringv1.ServiceMonitor{
ObjectMeta: metav1.ObjectMeta{
Name: serviceMonitorName,
Namespace: deploymentNamespace,
Labels: map[string]string{
"modelmesh-service": "modelmesh-serving",
},
},
Spec: monitoringv1.ServiceMonitorSpec{
NamespaceSelector: monitoringv1.NamespaceSelector{
Any: true,
},
Endpoints: []monitoringv1.Endpoint{
{
Interval: "4s",
Path: "/q/metrics",
HonorLabels: true,
Scheme: "http",
Params: map[string][]string{
"match[]": {
`{__name__= "trustyai_spd"}`,
`{__name__= "trustyai_dir"}`,
},
},
MetricRelabelConfigs: []*monitoringv1.RelabelConfig{
{
Action: "keep",
Regex: "trustyai_.*",
SourceLabels: []monitoringv1.LabelName{"__name__"},
},
},
},
},
Selector: metav1.LabelSelector{
MatchLabels: map[string]string{
"app.kubernetes.io/part-of": serviceType,
},
},
},
}
return serviceMonitor
}

// ensureCentralServiceMonitor ensures that the central ServiceMonitor is created
func (r *TrustyAIServiceReconciler) ensureCentralServiceMonitor(ctx context.Context) error {
serviceMonitor := generateServiceMonitorSpecCentral(r.Namespace)

// Check if this ServiceMonitor already exists
found := &monitoringv1.ServiceMonitor{}
err := r.Get(ctx, types.NamespacedName{Name: serviceMonitor.Name, Namespace: serviceMonitor.Namespace}, found)
if err != nil {
if errors.IsNotFound(err) {
log.FromContext(ctx).Info("Creating a new central ServiceMonitor", "ServiceMonitor.Namespace", serviceMonitor.Namespace, "ServiceMonitor.Name", serviceMonitor.Name)
err = r.Create(ctx, serviceMonitor)
if err != nil {
log.FromContext(ctx).Error(err, "Failed to create central ServiceMonitor", "ServiceMonitor.Namespace", serviceMonitor.Namespace, "ServiceMonitor.Name", serviceMonitor.Name)
return err
}
} else {
log.FromContext(ctx).Error(err, "Failed to get central ServiceMonitor", "ServiceMonitor.Namespace", serviceMonitor.Namespace, "ServiceMonitor.Name", serviceMonitor.Name)
return err
}
}

return nil
}

// generateServiceMonitorSpecLocal generates the ServiceMonitor spec for a local ServiceMonitor
func generateServiceMonitorSpecLocal(deploymentNamespace string, serviceName string) *monitoringv1.ServiceMonitor {
serviceMonitor := &monitoringv1.ServiceMonitor{
ObjectMeta: metav1.ObjectMeta{
Name: serviceName,
Namespace: deploymentNamespace,
Labels: map[string]string{
"modelmesh-service": "modelmesh-serving",
},
},
Spec: monitoringv1.ServiceMonitorSpec{
NamespaceSelector: monitoringv1.NamespaceSelector{
MatchNames: []string{deploymentNamespace},
},
Endpoints: []monitoringv1.Endpoint{
{
Interval: "4s",
Path: "/q/metrics",
HonorLabels: true,
Scheme: "http",
Params: map[string][]string{
"match[]": {
`{__name__= "trustyai_spd"}`,
`{__name__= "trustyai_dir"}`,
},
},
MetricRelabelConfigs: []*monitoringv1.RelabelConfig{
{
Action: "keep",
Regex: "trustyai_.*",
SourceLabels: []monitoringv1.LabelName{"__name__"},
},
},
},
},
Selector: metav1.LabelSelector{
MatchLabels: map[string]string{
"app.kubernetes.io/part-of": serviceType,
},
},
},
}
return serviceMonitor
}

// ensureLocalServiceMonitor ensures that the local ServiceMonitor is created
func (r *TrustyAIServiceReconciler) ensureLocalServiceMonitor(cr *trustyaiopendatahubiov1alpha1.TrustyAIService, ctx context.Context) error {
serviceMonitor := generateServiceMonitorSpecLocal(cr.Namespace, cr.Name)

// Set TrustyAIService instance as the owner and controller
err := ctrl.SetControllerReference(cr, serviceMonitor, r.Scheme)
if err != nil {
return err
}

// Check if the ServiceMonitor already exists
found := &monitoringv1.ServiceMonitor{}
err = r.Get(ctx, types.NamespacedName{Name: serviceMonitor.Name, Namespace: serviceMonitor.Namespace}, found)
if err != nil {
if errors.IsNotFound(err) {
log.FromContext(ctx).Info("Creating a new local ServiceMonitor", "ServiceMonitor.Namespace", serviceMonitor.Namespace, "ServiceMonitor.Name", serviceMonitor.Name)
err = r.Create(ctx, serviceMonitor)
if err != nil {
log.FromContext(ctx).Error(err, "Failed to create local ServiceMonitor", "ServiceMonitor.Namespace", serviceMonitor.Namespace, "ServiceMonitor.Name", serviceMonitor.Name)
return err
}
} else {
log.FromContext(ctx).Error(err, "Failed to get local ServiceMonitor", "ServiceMonitor.Namespace", serviceMonitor.Namespace, "ServiceMonitor.Name", serviceMonitor.Name)
return err
}
}

return nil
}
4 changes: 2 additions & 2 deletions controllers/suite_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -243,7 +243,7 @@ var _ = Describe("TrustyAI operator", func() {
Expect(deployment.Labels["app"]).Should(Equal(name))
Expect(deployment.Labels["app.kubernetes.io/name"]).Should(Equal(name))
Expect(deployment.Labels["app.kubernetes.io/instance"]).Should(Equal(name))
Expect(deployment.Labels["app.kubernetes.io/part-of"]).Should(Equal(name))
Expect(deployment.Labels["app.kubernetes.io/part-of"]).Should(Equal(serviceType))
Expect(deployment.Labels["app.kubernetes.io/version"]).Should(Equal("0.1.0"))

Expect(deployment.Spec.Template.Spec.Containers[0].Image).Should(Equal("quay.io/trustyai/trustyai-service:latest"))
Expand Down Expand Up @@ -298,7 +298,7 @@ var _ = Describe("TrustyAI operator", func() {
Expect(deployment.Labels["app"]).Should(Equal(name))
Expect(deployment.Labels["app.kubernetes.io/name"]).Should(Equal(name))
Expect(deployment.Labels["app.kubernetes.io/instance"]).Should(Equal(name))
Expect(deployment.Labels["app.kubernetes.io/part-of"]).Should(Equal(name))
Expect(deployment.Labels["app.kubernetes.io/part-of"]).Should(Equal(serviceType))
Expect(deployment.Labels["app.kubernetes.io/version"]).Should(Equal("0.1.0"))

Expect(deployment.Spec.Template.Spec.Containers[0].Image).Should(Equal("quay.io/trustyai/trustyai-service:latest"))
Expand Down
83 changes: 10 additions & 73 deletions controllers/trustyaiservice_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,6 @@ import (
goerrors "errors"
"fmt"
kserveapi "github.com/kserve/kserve/pkg/apis/serving/v1alpha1"
monitoringv1 "github.com/prometheus-operator/prometheus-operator/pkg/apis/monitoring/v1"
trustyaiopendatahubiov1alpha1 "github.com/trustyai-explainability/trustyai-service-operator/api/v1alpha1"
appsv1 "k8s.io/api/apps/v1"
corev1 "k8s.io/api/core/v1"
Expand Down Expand Up @@ -49,6 +48,7 @@ const (
modelMeshLabelKey = "modelmesh-service"
modelMeshLabelValue = "modelmesh-serving"
volumeMountName = "volume"
serviceType = "trustyai-service"
)

// TrustyAIServiceReconciler reconciles a TrustyAIService object
Expand Down Expand Up @@ -80,7 +80,7 @@ func getCommonLabels(serviceName string) map[string]string {
"app": serviceName,
"app.kubernetes.io/name": serviceName,
"app.kubernetes.io/instance": serviceName,
"app.kubernetes.io/part-of": serviceName,
"app.kubernetes.io/part-of": serviceType,
"app.kubernetes.io/version": "0.1.0",
}
}
Expand Down Expand Up @@ -218,8 +218,14 @@ func (r *TrustyAIServiceReconciler) Reconcile(ctx context.Context, req ctrl.Requ
}
}

// Service Monitor
err = r.reconcileServiceMonitor(instance, ctx)
// Local Service Monitor
err = r.ensureLocalServiceMonitor(instance, ctx)
if err != nil {
return ctrl.Result{}, err
}

// Central Service Monitor
err = r.ensureCentralServiceMonitor(ctx)
if err != nil {
return RequeueWithError(err)
}
Expand Down Expand Up @@ -276,75 +282,6 @@ func (r *TrustyAIServiceReconciler) reconcileService(cr *trustyaiopendatahubiov1
return service, nil
}

func (r *TrustyAIServiceReconciler) reconcileServiceMonitor(cr *trustyaiopendatahubiov1alpha1.TrustyAIService, ctx context.Context) error {

serviceMonitor := &monitoringv1.ServiceMonitor{
ObjectMeta: metav1.ObjectMeta{
Name: serviceMonitorName,
Namespace: cr.Namespace,
Labels: map[string]string{
"modelmesh-service": "modelmesh-serving",
},
},
Spec: monitoringv1.ServiceMonitorSpec{
NamespaceSelector: monitoringv1.NamespaceSelector{
MatchNames: []string{cr.Namespace},
},
Endpoints: []monitoringv1.Endpoint{
{
Interval: "4s",
Path: "/q/metrics",
HonorLabels: true,
Scheme: "http",
Params: map[string][]string{
"match[]": {
`{__name__= "trustyai_spd"}`,
`{__name__= "trustyai_dir"}`,
},
},
MetricRelabelConfigs: []*monitoringv1.RelabelConfig{
{
Action: "keep",
Regex: "trustyai_.*",
SourceLabels: []monitoringv1.LabelName{"__name__"},
},
},
},
},
Selector: metav1.LabelSelector{
MatchLabels: map[string]string{
"app.kubernetes.io/name": cr.Name,
},
},
},
}

// Set TrustyAIService instance as the owner and controller
err := ctrl.SetControllerReference(cr, serviceMonitor, r.Scheme)
if err != nil {
return err
}

// Check if this ServiceMonitor already exists
found := &monitoringv1.ServiceMonitor{}
err = r.Get(ctx, types.NamespacedName{Name: serviceMonitor.Name, Namespace: serviceMonitor.Namespace}, found)
if err != nil {
if errors.IsNotFound(err) {
log.FromContext(ctx).Info("Creating a new ServiceMonitor", "ServiceMonitor.Namespace", serviceMonitor.Namespace, "ServiceMonitor.Name", serviceMonitor.Name)
err = r.Create(ctx, serviceMonitor)
if err != nil {
log.FromContext(ctx).Error(err, "Not found ServiceMonitor", "ServiceMonitor.Namespace", serviceMonitor.Namespace, "ServiceMonitor.Name", serviceMonitor.Name)
return err
}
} else {
log.FromContext(ctx).Error(err, "Couldn't create new ServiceMonitor", "ServiceMonitor.Namespace", serviceMonitor.Namespace, "ServiceMonitor.Name", serviceMonitor.Name)
return err
}
}

return nil
}

// SetupWithManager sets up the controller with the Manager.
func (r *TrustyAIServiceReconciler) SetupWithManager(mgr ctrl.Manager) error {
// Watch ServingRuntime objects (not managed by this controller)
Expand Down

0 comments on commit e7e9b60

Please sign in to comment.