Skip to content

Commit

Permalink
Merge branch 'fix--batch-requeue-and-pg-stat' of github.com:hatchet-d…
Browse files Browse the repository at this point in the history
…ev/hatchet into fix--batch-requeue-and-pg-stat
  • Loading branch information
abelanger5 committed Aug 1, 2024
2 parents 0a1f55f + a5e249e commit bb51f40
Showing 1 changed file with 34 additions and 9 deletions.
43 changes: 34 additions & 9 deletions internal/services/controllers/jobs/controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,9 @@ type JobsControllerImpl struct {
a *hatcheterrors.Wrapped
partitionId string

requeueMutexes map[string]*sync.Mutex
requeueMutexes map[string]*sync.Mutex
reassignMutexes map[string]*sync.Mutex
timeoutMutexes map[string]*sync.Mutex
}

type JobsControllerOpt func(*JobsControllerOpts)
Expand Down Expand Up @@ -134,14 +136,16 @@ func New(fs ...JobsControllerOpt) (*JobsControllerImpl, error) {
a.WithData(map[string]interface{}{"service": "jobs-controller"})

return &JobsControllerImpl{
mq: opts.mq,
l: opts.l,
repo: opts.repo,
dv: opts.dv,
s: s,
a: a,
partitionId: opts.partitionId,
requeueMutexes: make(map[string]*sync.Mutex),
mq: opts.mq,
l: opts.l,
repo: opts.repo,
dv: opts.dv,
s: s,
a: a,
partitionId: opts.partitionId,
requeueMutexes: make(map[string]*sync.Mutex),
reassignMutexes: make(map[string]*sync.Mutex),
timeoutMutexes: make(map[string]*sync.Mutex),
}, nil
}

Expand Down Expand Up @@ -795,6 +799,17 @@ func (jc *JobsControllerImpl) runStepRunReassign(ctx context.Context, startedAt
// runStepRunReassignTenant looks for step runs that have been assigned to a worker but have not started,
// or have been running but the worker has become inactive.
func (ec *JobsControllerImpl) runStepRunReassignTenant(ctx context.Context, tenantId string) error {
// we want only one requeue running at a time for a tenant
if ec.reassignMutexes[tenantId] == nil {
ec.reassignMutexes[tenantId] = &sync.Mutex{}
}

if !ec.reassignMutexes[tenantId].TryLock() {
return nil
}

defer ec.reassignMutexes[tenantId].Unlock()

ctx, span := telemetry.NewSpan(ctx, "handle-step-run-reassign")
defer span.End()

Expand Down Expand Up @@ -888,6 +903,16 @@ func (jc *JobsControllerImpl) runStepRunTimeout(ctx context.Context) func() {

// runStepRunTimeoutTenant looks for step runs that are timed out in the tenant.
func (ec *JobsControllerImpl) runStepRunTimeoutTenant(ctx context.Context, tenantId string) error {
if ec.timeoutMutexes[tenantId] == nil {
ec.timeoutMutexes[tenantId] = &sync.Mutex{}
}

if !ec.timeoutMutexes[tenantId].TryLock() {
return nil
}

defer ec.timeoutMutexes[tenantId].Unlock()

ctx, span := telemetry.NewSpan(ctx, "handle-step-run-timeout")
defer span.End()

Expand Down

0 comments on commit bb51f40

Please sign in to comment.