Skip to content

Commit

Permalink
Fixed panic when creating host with huaweicloud stack (#356)
Browse files Browse the repository at this point in the history
- in some circumstances (notably when FlexibleEngine does not create host in a reasonable amount of time), tried to delete a server from nil pointer

Co-authored-by: Vincent Planchenault <[email protected]>
  • Loading branch information
Vincent Planchenault and Vincent Planchenault authored Jul 9, 2022
1 parent 891e482 commit 365e28f
Showing 1 changed file with 33 additions and 37 deletions.
70 changes: 33 additions & 37 deletions lib/server/iaas/stacks/huaweicloud/compute.go
Original file line number Diff line number Diff line change
Expand Up @@ -597,11 +597,7 @@ func (s stack) CreateHost(ctx context.Context, request abstract.HostRequest) (ho
if server != nil && server.ID != "" {
derr := servers.Delete(s.ComputeClient, server.ID).ExtractErr()
if derr != nil {
_ = xerr.AddConsequence(
fail.Wrap(
derr, "cleaning up on failure, failed to delete host",
),
)
_ = xerr.AddConsequence(fail.Wrap(derr, "cleaning up on failure, failed to delete host"))
}
}
}
Expand All @@ -623,45 +619,46 @@ func (s stack) CreateHost(ctx context.Context, request abstract.HostRequest) (ho
}
}

creationZone, zoneErr := s.GetAvailabilityZoneOfServer(server.ID)
ahc.ID = server.ID
ahc.Name = server.Name

creationZone, zoneErr := s.GetAvailabilityZoneOfServer(ahc.ID)
if zoneErr != nil {
logrus.Tracef("Host successfully created but cannot confirm Availability Zone: %s", zoneErr)
logrus.Tracef("Host '%s' successfully created but cannot confirm Availability Zone: %s", server.Name, zoneErr)
} else {
logrus.Tracef("Host successfully created in requested Availability Zone '%s'", creationZone)
if creationZone != srvOpts.AvailabilityZone {
if srvOpts.AvailabilityZone != "" {
logrus.Warnf(
"Host created in the WRONG availability zone: requested '%s' and got instead '%s'",
srvOpts.AvailabilityZone, creationZone,
)
}
logrus.Tracef("Host '%s' successfully created in requested Availability Zone '%s'", server.Name, creationZone)
if creationZone != srvOpts.AvailabilityZone && srvOpts.AvailabilityZone != "" {
logrus.Warnf("Host '%s' created in the WRONG availability zone: requested '%s' and got instead '%s'", server.Name, srvOpts.AvailabilityZone, creationZone)
}
}

defer func() {
if innerXErr != nil {
derr := servers.Delete(s.ComputeClient, server.ID).ExtractErr()
if innerXErr != nil && ahc.ID != "" {
derr := servers.Delete(s.ComputeClient, ahc.ID).ExtractErr()
if derr != nil {
logrus.Errorf("cleaning up on failure, failed to delete host: %s", derr.Error())
} else {
ahc.ID = ""
ahc.Name = ""
}
}
}()

ahc.ID = server.ID
ahc.Name = server.Name

// Wait that host is ready, not just that the build is started
//FIXME: timings.HostOperationTimeout() may not be sufficient time to wait when hosts are created in parallel...
// at least with it's current default value of 2 minutes and at least for flexibleengine provider
// We should think of a way to increase this timing based on number of hosts are created
server, innerXErr = s.WaitHostState(ctx, ahc, hoststate.Started, timings.HostOperationTimeout())
if innerXErr != nil {
switch innerXErr.(type) {
case *fail.ErrNotAvailable:
if server != nil {
ahc.ID = server.ID
ahc.Name = server.Name
// ahc.ID = server.ID
// ahc.Name = server.Name
ahc.LastState = hoststate.Error
}

return fail.Wrap(innerXErr, "host '%s' is in Error state", request.ResourceName)

default:
return innerXErr
}
Expand All @@ -671,27 +668,15 @@ func (s stack) CreateHost(ctx context.Context, request abstract.HostRequest) (ho
timings.NormalDelay(),
timings.HostLongOperationTimeout(),
)
if retryErr != nil {
switch retryErr.(type) {
case *retry.ErrStopRetry: // here it should never happen
return nil, userData, fail.Wrap(fail.Cause(retryErr), "stopping retries")
case *retry.ErrTimeout:
return nil, userData, fail.Wrap(fail.Cause(retryErr), "timeout")
default:
return nil, userData, retryErr
}
}

// Starting from here, delete host if exiting with error
defer func() {
if ferr != nil {
if ferr != nil && ahc.ID != "" {
derr := s.DeleteHost(ctx, ahc.ID)
if derr != nil {
switch derr.(type) {
case *fail.ErrNotFound:
logrus.Errorf(
"Cleaning up on failure, failed to delete host '%s', resource not found: '%v'", ahc.Name, derr,
)
logrus.Errorf("Cleaning up on failure, failed to delete host '%s', resource not found: '%v'", ahc.Name, derr)
case *fail.ErrTimeout:
logrus.Errorf("Cleaning up on failure, failed to delete host '%s', timeout: '%v'", ahc.Name, derr)
default:
Expand All @@ -702,6 +687,17 @@ func (s stack) CreateHost(ctx context.Context, request abstract.HostRequest) (ho
}
}()

if retryErr != nil {
switch retryErr.(type) {
case *retry.ErrStopRetry: // here it should never happen
return nil, userData, fail.Wrap(fail.Cause(retryErr), "stopping retries")
case *retry.ErrTimeout:
return nil, userData, fail.Wrap(fail.Cause(retryErr), "timeout")
default:
return nil, userData, retryErr
}
}

host, xerr = s.complementHost(ctx, ahc, server)
if xerr != nil {
return nil, nil, xerr
Expand Down

0 comments on commit 365e28f

Please sign in to comment.