From 365e28f91cb0b95e1706fcac62b01942ae651369 Mon Sep 17 00:00:00 2001 From: Vincent Planchenault Date: Sat, 9 Jul 2022 07:45:29 +0200 Subject: [PATCH] Fixed panic when creating host with huaweicloud stack (#356) - in some circumstances (notably when FlexibleEngine does not create host in a reasonable amount of time), tried to delete a server from nil pointer Co-authored-by: Vincent Planchenault --- lib/server/iaas/stacks/huaweicloud/compute.go | 70 +++++++++---------- 1 file changed, 33 insertions(+), 37 deletions(-) diff --git a/lib/server/iaas/stacks/huaweicloud/compute.go b/lib/server/iaas/stacks/huaweicloud/compute.go index b3596f78e..2036b190d 100644 --- a/lib/server/iaas/stacks/huaweicloud/compute.go +++ b/lib/server/iaas/stacks/huaweicloud/compute.go @@ -597,11 +597,7 @@ func (s stack) CreateHost(ctx context.Context, request abstract.HostRequest) (ho if server != nil && server.ID != "" { derr := servers.Delete(s.ComputeClient, server.ID).ExtractErr() if derr != nil { - _ = xerr.AddConsequence( - fail.Wrap( - derr, "cleaning up on failure, failed to delete host", - ), - ) + _ = xerr.AddConsequence(fail.Wrap(derr, "cleaning up on failure, failed to delete host")) } } } @@ -623,45 +619,46 @@ func (s stack) CreateHost(ctx context.Context, request abstract.HostRequest) (ho } } - creationZone, zoneErr := s.GetAvailabilityZoneOfServer(server.ID) + ahc.ID = server.ID + ahc.Name = server.Name + + creationZone, zoneErr := s.GetAvailabilityZoneOfServer(ahc.ID) if zoneErr != nil { - logrus.Tracef("Host successfully created but cannot confirm Availability Zone: %s", zoneErr) + logrus.Tracef("Host '%s' successfully created but cannot confirm Availability Zone: %s", server.Name, zoneErr) } else { - logrus.Tracef("Host successfully created in requested Availability Zone '%s'", creationZone) - if creationZone != srvOpts.AvailabilityZone { - if srvOpts.AvailabilityZone != "" { - logrus.Warnf( - "Host created in the WRONG availability zone: requested '%s' and got instead '%s'", - srvOpts.AvailabilityZone, creationZone, - ) - } + logrus.Tracef("Host '%s' successfully created in requested Availability Zone '%s'", server.Name, creationZone) + if creationZone != srvOpts.AvailabilityZone && srvOpts.AvailabilityZone != "" { + logrus.Warnf("Host '%s' created in the WRONG availability zone: requested '%s' and got instead '%s'", server.Name, srvOpts.AvailabilityZone, creationZone) } } defer func() { - if innerXErr != nil { - derr := servers.Delete(s.ComputeClient, server.ID).ExtractErr() + if innerXErr != nil && ahc.ID != "" { + derr := servers.Delete(s.ComputeClient, ahc.ID).ExtractErr() if derr != nil { logrus.Errorf("cleaning up on failure, failed to delete host: %s", derr.Error()) + } else { + ahc.ID = "" + ahc.Name = "" } } }() - ahc.ID = server.ID - ahc.Name = server.Name - // Wait that host is ready, not just that the build is started + //FIXME: timings.HostOperationTimeout() may not be sufficient time to wait when hosts are created in parallel... + // at least with it's current default value of 2 minutes and at least for flexibleengine provider + // We should think of a way to increase this timing based on number of hosts are created server, innerXErr = s.WaitHostState(ctx, ahc, hoststate.Started, timings.HostOperationTimeout()) if innerXErr != nil { switch innerXErr.(type) { case *fail.ErrNotAvailable: if server != nil { - ahc.ID = server.ID - ahc.Name = server.Name + // ahc.ID = server.ID + // ahc.Name = server.Name ahc.LastState = hoststate.Error } - return fail.Wrap(innerXErr, "host '%s' is in Error state", request.ResourceName) + default: return innerXErr } @@ -671,27 +668,15 @@ func (s stack) CreateHost(ctx context.Context, request abstract.HostRequest) (ho timings.NormalDelay(), timings.HostLongOperationTimeout(), ) - if retryErr != nil { - switch retryErr.(type) { - case *retry.ErrStopRetry: // here it should never happen - return nil, userData, fail.Wrap(fail.Cause(retryErr), "stopping retries") - case *retry.ErrTimeout: - return nil, userData, fail.Wrap(fail.Cause(retryErr), "timeout") - default: - return nil, userData, retryErr - } - } // Starting from here, delete host if exiting with error defer func() { - if ferr != nil { + if ferr != nil && ahc.ID != "" { derr := s.DeleteHost(ctx, ahc.ID) if derr != nil { switch derr.(type) { case *fail.ErrNotFound: - logrus.Errorf( - "Cleaning up on failure, failed to delete host '%s', resource not found: '%v'", ahc.Name, derr, - ) + logrus.Errorf("Cleaning up on failure, failed to delete host '%s', resource not found: '%v'", ahc.Name, derr) case *fail.ErrTimeout: logrus.Errorf("Cleaning up on failure, failed to delete host '%s', timeout: '%v'", ahc.Name, derr) default: @@ -702,6 +687,17 @@ func (s stack) CreateHost(ctx context.Context, request abstract.HostRequest) (ho } }() + if retryErr != nil { + switch retryErr.(type) { + case *retry.ErrStopRetry: // here it should never happen + return nil, userData, fail.Wrap(fail.Cause(retryErr), "stopping retries") + case *retry.ErrTimeout: + return nil, userData, fail.Wrap(fail.Cause(retryErr), "timeout") + default: + return nil, userData, retryErr + } + } + host, xerr = s.complementHost(ctx, ahc, server) if xerr != nil { return nil, nil, xerr