Skip to content

Commit

Permalink
fix: fixed diagnosis info in pool draining (#551)
Browse files Browse the repository at this point in the history
Signed-off-by: Aylei <[email protected]>
  • Loading branch information
aylei authored Sep 26, 2024
1 parent d28d6ed commit f26606e
Show file tree
Hide file tree
Showing 2 changed files with 20 additions and 11 deletions.
25 changes: 15 additions & 10 deletions pkg/controllers/cnstore/controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -211,20 +211,25 @@ func (c *withCNSet) OnPreparingStop(ctx *recon.Context[*corev1.Pod]) error {
return c.completeDraining(ctx)
}
if time.Since(startTime) > storeDrainTakesLongDuration {
ctx.Log.Info("store draining takes too long, collect diagnostic info", "uuid", uid)
if err := ctx.Patch(pod, func() error {
if pod.Annotations == nil {
pod.Annotations = map[string]string{}
}
pod.Annotations[diagnosDrainingAnno] = "y"
return nil
}); err != nil {
ctx.Log.Error(err, "error patching diagnos draining anno")
}
c.diagnosisDraining(ctx, uid)
}
return recon.ErrReSync("wait for CN store draining", retryInterval)
}

func (c *Controller) diagnosisDraining(ctx *recon.Context[*corev1.Pod], uid string) {
ctx.Log.Info("store draining takes too long, collect diagnostic info", "uuid", uid)
pod := ctx.Obj
if err := ctx.Patch(pod, func() error {
if pod.Annotations == nil {
pod.Annotations = map[string]string{}
}
pod.Annotations[diagnosDrainingAnno] = "y"
return nil
}); err != nil {
ctx.Log.Error(err, "error patching diagnos draining anno")
}
}

func (c *withCNSet) handleConnectionDraining(ctx *recon.Context[*corev1.Pod], uid string, timeout context.Context, h *mocli.ClientSet) (bool, error) {
pod := ctx.Obj
ctx.Log.Info("set CN store draining", "uuid", uid)
Expand Down
6 changes: 5 additions & 1 deletion pkg/controllers/cnstore/pooling.go
Original file line number Diff line number Diff line change
Expand Up @@ -16,14 +16,15 @@ package cnstore

import (
"context"
"time"

"github.com/go-errors/errors"
recon "github.com/matrixorigin/controller-runtime/pkg/reconciler"
"github.com/matrixorigin/matrixone-operator/api/core/v1alpha1"
"github.com/matrixorigin/matrixone-operator/pkg/controllers/common"
"github.com/matrixorigin/matrixone-operator/pkg/mocli"
"github.com/openkruise/kruise-api/apps/pub"
corev1 "k8s.io/api/core/v1"
"time"
)

func (c *withCNSet) poolingCNReconcile(ctx *recon.Context[*corev1.Pod]) error {
Expand Down Expand Up @@ -55,6 +56,9 @@ func (c *withCNSet) poolingCNReconcile(ctx *recon.Context[*corev1.Pod]) error {
// handle graceful logic in OnPreparingStop()
return evictPoolPodGracefully(ctx, pod)
}
if time.Since(parsed) > storeDrainTakesLongDuration {
c.diagnosisDraining(ctx, uid)
}
if time.Since(parsed) < c.cn.Spec.ScalingConfig.GetMinDelayDuration() {
return recon.ErrReSync("wait min delay duration", retryInterval)
}
Expand Down

0 comments on commit f26606e

Please sign in to comment.