Skip to content

Commit

Permalink
[core] OCTRL-911 Transitions should not be performed concurrently
Browse files Browse the repository at this point in the history
  • Loading branch information
knopers8 committed Jul 31, 2024
1 parent bf6af3b commit d193466
Show file tree
Hide file tree
Showing 2 changed files with 26 additions and 5 deletions.
24 changes: 19 additions & 5 deletions core/environment/environment.go
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,7 @@ var log = logger.New(logrus.StandardLogger(), "env")
type Environment struct {
Mu sync.RWMutex
once sync.Once
transitionMutex sync.RWMutex
Sm *fsm.FSM
name string
id uid.ID
Expand Down Expand Up @@ -955,6 +956,13 @@ func (env *Environment) runTasksAsHooks(hooksToTrigger task.Tasks) (errorMap map
}

func (env *Environment) TryTransition(t Transition) (err error) {
if !env.transitionMutex.TryLock() {
log.WithField("partition", env.id.String()).
Warnf("environment transition attempt delayed: transition '%s' in progress. waiting for completion or failure", env.currentTransition)
env.transitionMutex.Lock()
}
defer env.transitionMutex.Unlock()

the.EventWriterWithTopic(topic.Environment).WriteEvent(&pb.Ev_EnvironmentEvent{
EnvironmentId: env.id.String(),
State: env.Sm.Current(),
Expand Down Expand Up @@ -1171,11 +1179,17 @@ func (env *Environment) subscribeToWfState(taskman *task.Manager) {
Warn("one of the critical tasks went into ERROR state, transitioning the environment into ERROR")
err := env.TryTransition(NewGoErrorTransition(taskman))
if err != nil {
log.WithField("partition", env.id).
WithError(err).
WithField("level", infologger.IL_Devel).
Warn("could not transition gently to ERROR, forcing it")
env.setState(wfState.String())
if env.Sm.Current() == "ERROR" {
log.WithField("partition", env.id).
WithField("level", infologger.IL_Devel).
Info("skipped requested transition to ERROR: environment already in ERROR state")
} else {
log.WithField("partition", env.id).
WithError(err).
WithField("level", infologger.IL_Devel).
Warn("could not transition gently to ERROR, forcing it")
env.setState(wfState.String())
}
}
toStop := env.Workflow().GetTasks().Filtered(func(t *task.Task) bool {
t.SetSafeToStop(true)
Expand Down
7 changes: 7 additions & 0 deletions core/environment/manager.go
Original file line number Diff line number Diff line change
Expand Up @@ -582,6 +582,13 @@ func (envs *Manager) TeardownEnvironment(environmentId uid.ID, force bool) error
return err
}

if !env.transitionMutex.TryLock() {
log.WithField("partition", environmentId.String()).
Warnf("environment teardown attempt delayed: transition '%s' in progress. waiting for completion or failure", env.currentTransition)
env.transitionMutex.Lock()
}
defer env.transitionMutex.Unlock()

if env.CurrentState() != "STANDBY" && env.CurrentState() != "DEPLOYED" && !force {
return errors.New(fmt.Sprintf("cannot teardown environment in state %s", env.CurrentState()))
}
Expand Down

0 comments on commit d193466

Please sign in to comment.