From 094843d19a579f6d31caff99d16df4889af511ef Mon Sep 17 00:00:00 2001 From: Praveen Rewar <8457124+praveenrewar@users.noreply.github.com> Date: Mon, 19 Sep 2022 13:24:55 +0530 Subject: [PATCH] Apply as many changes as possible with errors at end (#532) - Changes that depend on failing changes shouldn't be applied - exit-early-on-apply-error flag can be used to exit as soon as an error is encountered while applying changes (default true) - exit-early-on-wait-error flag can be used to exit as soon as an error is encountered while waiting for changes (default true) --- pkg/kapp/clusterapply/applying_changes.go | 25 +- pkg/kapp/clusterapply/cluster_change.go | 2 +- pkg/kapp/clusterapply/cluster_change_set.go | 27 ++- pkg/kapp/clusterapply/waiting_changes.go | 27 ++- pkg/kapp/cmd/app/apply_flags.go | 4 + test/e2e/apply_wait_error_test.go | 253 ++++++++++++++++++++ test/e2e/formatted_error_test.go | 2 +- 7 files changed, 317 insertions(+), 23 deletions(-) create mode 100644 test/e2e/apply_wait_error_test.go diff --git a/pkg/kapp/clusterapply/applying_changes.go b/pkg/kapp/clusterapply/applying_changes.go index 25bda01ad..b240deecf 100644 --- a/pkg/kapp/clusterapply/applying_changes.go +++ b/pkg/kapp/clusterapply/applying_changes.go @@ -23,10 +23,11 @@ type ApplyingChanges struct { applied map[*ctldgraph.Change]struct{} clusterChangeFactory ClusterChangeFactory ui UI + exitOnError bool } -func NewApplyingChanges(numTotal int, opts ApplyingChangesOpts, clusterChangeFactory ClusterChangeFactory, ui UI) *ApplyingChanges { - return &ApplyingChanges{numTotal, opts, map[*ctldgraph.Change]struct{}{}, clusterChangeFactory, ui} +func NewApplyingChanges(numTotal int, opts ApplyingChangesOpts, clusterChangeFactory ClusterChangeFactory, ui UI, exitOnError bool) *ApplyingChanges { + return &ApplyingChanges{numTotal, opts, map[*ctldgraph.Change]struct{}{}, clusterChangeFactory, ui, exitOnError} } type applyResult struct { @@ -37,14 +38,16 @@ type applyResult struct { Err error } -func (c *ApplyingChanges) Apply(allChanges []*ctldgraph.Change) ([]WaitingChange, error) { +func (c *ApplyingChanges) Apply(allChanges []*ctldgraph.Change) ([]WaitingChange, []string, error) { startTime := time.Now() + var unsuccessfulChangeDesc []string + for { nonAppliedChanges := c.nonAppliedChanges(allChanges) if len(nonAppliedChanges) == 0 { // Do not print applying message if no changes - return nil, nil + return nil, unsuccessfulChangeDesc, nil } c.ui.NotifySection("applying %d changes %s", len(nonAppliedChanges), c.stats()) @@ -88,10 +91,14 @@ func (c *ApplyingChanges) Apply(allChanges []*ctldgraph.Change) ([]WaitingChange if result.Err != nil { lastErr = result.Err - if result.Retryable { - continue + if !result.Retryable { + if c.exitOnError { + return nil, nil, result.Err + } + unsuccessfulChangeDesc = append(unsuccessfulChangeDesc, result.Err.Error()) + c.markApplied(result.Change) } - return nil, result.Err + continue } c.markApplied(result.Change) @@ -99,11 +106,11 @@ func (c *ApplyingChanges) Apply(allChanges []*ctldgraph.Change) ([]WaitingChange } if len(appliedChanges) > 0 { - return appliedChanges, nil + return appliedChanges, unsuccessfulChangeDesc, nil } if time.Now().Sub(startTime) > c.opts.Timeout { - return nil, fmt.Errorf("Timed out waiting after %s: Last error: %w", c.opts.Timeout, lastErr) + return nil, unsuccessfulChangeDesc, fmt.Errorf("Timed out waiting after %s: Last error: %s", c.opts.Timeout, lastErr) } time.Sleep(c.opts.CheckInterval) diff --git a/pkg/kapp/clusterapply/cluster_change.go b/pkg/kapp/clusterapply/cluster_change.go index 84852677b..19b1921af 100644 --- a/pkg/kapp/clusterapply/cluster_change.go +++ b/pkg/kapp/clusterapply/cluster_change.go @@ -280,7 +280,7 @@ func (c *ClusterChange) applyErr(err error) error { } } - return fmt.Errorf("Applying %s: %s%s", c.ApplyDescription(), + return fmt.Errorf("%s: %s%s", c.ApplyDescription(), uierrs.NewSemiStructuredError(err), hintMsg) } diff --git a/pkg/kapp/clusterapply/cluster_change_set.go b/pkg/kapp/clusterapply/cluster_change_set.go index 48d2c81cd..f60261253 100644 --- a/pkg/kapp/clusterapply/cluster_change_set.go +++ b/pkg/kapp/clusterapply/cluster_change_set.go @@ -5,7 +5,9 @@ package clusterapply import ( "fmt" + "strings" + uierrs "github.com/cppforlife/go-cli-ui/errors" ctlconf "github.com/vmware-tanzu/carvel-kapp/pkg/kapp/config" ctldiff "github.com/vmware-tanzu/carvel-kapp/pkg/kapp/diff" ctldgraph "github.com/vmware-tanzu/carvel-kapp/pkg/kapp/diffgraph" @@ -15,6 +17,9 @@ import ( type ClusterChangeSetOpts struct { ApplyingChangesOpts WaitingChangesOpts + + ExitEarlyOnApplyError bool + ExitEarlyOnWaitError bool } type ClusterChangeSet struct { @@ -95,18 +100,30 @@ func (c ClusterChangeSet) Apply(changesGraph *ctldgraph.ChangeGraph) error { blockedChanges := ctldgraph.NewBlockedChanges(changesGraph) applyingChanges := NewApplyingChanges( - expectedNumChanges, c.opts.ApplyingChangesOpts, c.clusterChangeFactory, c.ui) - waitingChanges := NewWaitingChanges(expectedNumChanges, c.opts.WaitingChangesOpts, c.ui) + expectedNumChanges, c.opts.ApplyingChangesOpts, c.clusterChangeFactory, c.ui, c.opts.ExitEarlyOnApplyError) + waitingChanges := NewWaitingChanges(expectedNumChanges, c.opts.WaitingChangesOpts, c.ui, c.opts.ExitEarlyOnWaitError) + + var unsuccessfulChanges []string for { - appliedChanges, err := applyingChanges.Apply(blockedChanges.Unblocked()) + appliedChanges, unsuccessfulChangeDesc, err := applyingChanges.Apply(blockedChanges.Unblocked()) if err != nil { return err } + unsuccessfulChanges = append(unsuccessfulChanges, unsuccessfulChangeDesc...) + waitingChanges.Track(appliedChanges) if waitingChanges.IsEmpty() { + if len(unsuccessfulChanges) == 1 { + return fmt.Errorf("%s", unsuccessfulChanges[0]) + } + + if len(unsuccessfulChanges) > 0 { + return uierrs.NewSemiStructuredError(fmt.Errorf("[%s]", strings.Join(unsuccessfulChanges, ", "))) + } + err := applyingChanges.Complete() if err != nil { c.ui.Notify([]string{fmt.Sprintf("Blocked changes:\n%s\n", blockedChanges.WhyBlocked(blockedChanges.Blocked()))}) @@ -116,11 +133,13 @@ func (c ClusterChangeSet) Apply(changesGraph *ctldgraph.ChangeGraph) error { return waitingChanges.Complete() } - doneChanges, err := waitingChanges.WaitForAny() + doneChanges, unsuccessfulChangeDesc, err := waitingChanges.WaitForAny() if err != nil { return err } + unsuccessfulChanges = append(unsuccessfulChanges, unsuccessfulChangeDesc...) + for _, change := range doneChanges { blockedChanges.Unblock(change.Graph) } diff --git a/pkg/kapp/clusterapply/waiting_changes.go b/pkg/kapp/clusterapply/waiting_changes.go index 3cedd8430..18695caac 100644 --- a/pkg/kapp/clusterapply/waiting_changes.go +++ b/pkg/kapp/clusterapply/waiting_changes.go @@ -27,6 +27,7 @@ type WaitingChanges struct { trackedChanges []WaitingChange opts WaitingChangesOpts ui UI + exitOnError bool } type WaitingChange struct { @@ -35,8 +36,8 @@ type WaitingChange struct { startTime time.Time } -func NewWaitingChanges(numTotal int, opts WaitingChangesOpts, ui UI) *WaitingChanges { - return &WaitingChanges{numTotal, 0, nil, opts, ui} +func NewWaitingChanges(numTotal int, opts WaitingChangesOpts, ui UI, exitOnError bool) *WaitingChanges { + return &WaitingChanges{numTotal, 0, nil, opts, ui, exitOnError} } func (c *WaitingChanges) Track(changes []WaitingChange) { @@ -54,7 +55,7 @@ type waitResult struct { Err error } -func (c *WaitingChanges) WaitForAny() ([]WaitingChange, error) { +func (c *WaitingChanges) WaitForAny() ([]WaitingChange, []string, error) { startTime := time.Now() for { @@ -83,6 +84,7 @@ func (c *WaitingChanges) WaitForAny() ([]WaitingChange, error) { var newInProgressChanges []WaitingChange var doneChanges []WaitingChange + var unsuccessfulChangeDesc []string for i := 0; i < len(c.trackedChanges); i++ { result := <-waitCh @@ -92,7 +94,12 @@ func (c *WaitingChanges) WaitForAny() ([]WaitingChange, error) { c.ui.Notify(descMsgs) if err != nil { - return nil, fmt.Errorf("%s: Errored: %w", desc, err) + err = fmt.Errorf("%s: Errored: %w", desc, err) + if c.exitOnError { + return nil, nil, err + } + unsuccessfulChangeDesc = append(unsuccessfulChangeDesc, err.Error()) + continue } if state.Done { c.numWaited++ @@ -111,7 +118,11 @@ func (c *WaitingChanges) WaitForAny() ([]WaitingChange, error) { if len(state.Message) > 0 { msg += " (" + state.Message + ")" } - return nil, fmt.Errorf("%s: Finished unsuccessfully%s", desc, msg) + err := fmt.Errorf("%s: Finished unsuccessfully%s", desc, msg) + if c.exitOnError { + return nil, nil, err + } + unsuccessfulChangeDesc = append(unsuccessfulChangeDesc, err.Error()) case state.Done && state.Successful: doneChanges = append(doneChanges, change) @@ -120,8 +131,8 @@ func (c *WaitingChanges) WaitForAny() ([]WaitingChange, error) { c.trackedChanges = newInProgressChanges - if len(c.trackedChanges) == 0 || len(doneChanges) > 0 { - return doneChanges, nil + if len(c.trackedChanges) == 0 || len(doneChanges) > 0 || len(unsuccessfulChangeDesc) > 0 { + return doneChanges, unsuccessfulChangeDesc, nil } if time.Now().Sub(startTime) > c.opts.Timeout { @@ -129,7 +140,7 @@ func (c *WaitingChanges) WaitForAny() ([]WaitingChange, error) { for _, change := range c.trackedChanges { trackedResourcesDesc = append(trackedResourcesDesc, change.Cluster.Resource().Description()) } - return nil, uierrs.NewSemiStructuredError(fmt.Errorf("Timed out waiting after %s for resources: [%s]", c.opts.Timeout, strings.Join(trackedResourcesDesc, ", "))) + return nil, unsuccessfulChangeDesc, uierrs.NewSemiStructuredError(fmt.Errorf("Timed out waiting after %s for resources: [%s]", c.opts.Timeout, strings.Join(trackedResourcesDesc, ", "))) } time.Sleep(c.opts.CheckInterval) diff --git a/pkg/kapp/cmd/app/apply_flags.go b/pkg/kapp/cmd/app/apply_flags.go index ce4865a35..ecbd50293 100644 --- a/pkg/kapp/cmd/app/apply_flags.go +++ b/pkg/kapp/cmd/app/apply_flags.go @@ -50,6 +50,8 @@ func (s *ApplyFlags) SetWithDefaults(prefix string, defaults ApplyFlags, cmd *co cmd.Flags().StringVar(&s.AddOrUpdateChangeOpts.DefaultUpdateStrategy, prefix+"apply-default-update-strategy", defaults.AddOrUpdateChangeOpts.DefaultUpdateStrategy, "Change default update strategy") + cmd.Flags().BoolVar(&s.ExitEarlyOnApplyError, prefix+"exit-early-on-apply-error", true, "Exit quickly on apply failure") + cmd.Flags().BoolVar(&s.Wait, prefix+"wait", defaults.Wait, "Set to wait for changes to be applied") cmd.Flags().BoolVar(&s.WaitIgnored, prefix+"wait-ignored", defaults.WaitIgnored, "Set to wait for ignored changes to be applied") @@ -63,6 +65,8 @@ func (s *ApplyFlags) SetWithDefaults(prefix string, defaults ApplyFlags, cmd *co 5, "Maximum number of concurrent wait operations") cmd.Flags().BoolVar(&s.ExitStatus, prefix+"apply-exit-status", false, "Return specific exit status based on number of changes") + + cmd.Flags().BoolVar(&s.ExitEarlyOnWaitError, prefix+"exit-early-on-wait-error", true, "Exit quickly on wait failure") } func mustParseDuration(str string) time.Duration { diff --git a/test/e2e/apply_wait_error_test.go b/test/e2e/apply_wait_error_test.go new file mode 100644 index 000000000..a4fc2a971 --- /dev/null +++ b/test/e2e/apply_wait_error_test.go @@ -0,0 +1,253 @@ +// Copyright 2022 VMware, Inc. +// SPDX-License-Identifier: Apache-2.0 + +package e2e + +import ( + "fmt" + "strings" + "testing" + + uitest "github.com/cppforlife/go-cli-ui/ui/test" + "github.com/stretchr/testify/require" +) + +func TestApplyWaitErrors(t *testing.T) { + env := BuildEnv(t) + logger := Logger{} + kapp := Kapp{t, env.Namespace, env.KappBinaryPath, logger} + + yaml1 := ` +--- +apiVersion: batch/v1 +kind: Job +metadata: + name: my-job-fail + annotations: + kapp.k14s.io/change-group: "job-fail" +spec: + template: + metadata: + name: my-job-fail + spec: + restartPolicy: Never + containers: + - name: my-job-fail + image: busybox + command: [ "sh", "-c", "exit 1" ] + backoffLimit: 0 +--- +apiVersion: batch/v1 +kind: Job +metadata: + name: my-job-fail-2 + annotations: + kapp.k14s.io/change-group: "job-fail" +spec: + template: + metadata: + name: my-job-fail-2 + spec: + restartPolicy: Never + containers: + - name: my-job-fail-2 + image: busybox + command: [ "sh", "-c", "exit 1" ] + backoffLimit: 0 +--- +apiVersion: batch/v1 +kind: Job +metadata: + name: my-job-succeed + annotations: + kapp.k14s.io/change-group: "job-succeed" + kapp.k14s.io/change-rule: "upsert before upserting service-not-deployed" +spec: + template: + metadata: + name: my-job-succeed + spec: + restartPolicy: Never + containers: + - name: my-job-succeed + image: busybox + command: [ "sh", "-c", "exit 0" ] + backoffLimit: 0 +--- +apiVersion: v1 +kind: Service +metadata: + name: service-succeed + annotations: + kapp.k14s.io/change-rule: "upsert after upserting job-succeed" +spec: + ports: + - port: 80 +--- +apiVersion: v1 +kind: Service +metadata: + name: service-fail + annotations: + kapp.k14s.io/change-group: "service-fail" + kapp.k14s.io/change-rule: "upsert after upserting job-succeed" +--- +apiVersion: v1 +kind: Service +metadata: + name: service-not-deployed + annotations: + kapp.k14s.io/change-group: "service-not-deployed" + kapp.k14s.io/change-rule: "upsert after upserting job-fail" +spec: + ports: + - port: 80 +--- +apiVersion: v1 +kind: Service +metadata: + name: service-not-deployed-2 + annotations: + kapp.k14s.io/change-rule: "upsert after upserting service-fail" +spec: + ports: + - port: 80 +` + + name := "test-multiple-errors" + cleanUp := func() { + kapp.Run([]string{"delete", "-a", name}) + } + + cleanUp() + defer cleanUp() + + logger.Section("deploy with multiple errors and exit early on error set to false", func() { + _, err := kapp.RunWithOpts([]string{"deploy", "-f", "-", "-a", name, "--exit-early-on-apply-error=false", "--exit-early-on-wait-error=false"}, + RunOpts{StdinReader: strings.NewReader(yaml1), AllowError: true}) + + expectedErrs := []string{ + `kapp: Error:`, + fmt.Sprintf(`- waiting on reconcile job/my-job-fail-2 (batch/v1) namespace: %s: Finished unsuccessfully (Failed with reason BackoffLimitExceeded: Job has reached the specified backoff limit)`, env.Namespace), + fmt.Sprintf(`- waiting on reconcile job/my-job-fail (batch/v1) namespace: %s: Finished unsuccessfully (Failed with reason BackoffLimitExceeded: Job has reached the specified backoff limit)`, env.Namespace), + fmt.Sprintf(`- create service/service-fail (v1) namespace: %s: Creating resource service/service-fail (v1) namespace: kapp-test: API server says: Service "service-fail" is invalid: spec.ports: Required value (reason: Invalid)`, env.Namespace), + } + + for _, expectedErr := range expectedErrs { + require.Containsf(t, err.Error(), expectedErr, "Expected to see expected err in output, but did not") + } + + out := kapp.Run([]string{"inspect", "-a", name, "--filter-kind", "Service", "--json"}) + + expectedResources := []map[string]string{{ + "age": "", + "kind": "Service", + "name": "service-succeed", + "namespace": env.Namespace, + "owner": "kapp", + "reconcile_info": "", + "reconcile_state": "ok", + }} + + inspectOut := uitest.JSONUIFromBytes(t, []byte(out)) + + require.Exactlyf(t, expectedResources, replaceAge(inspectOut.Tables[0].Rows), "Expected to see correct changes") + }) +} + +func TestExitEarlyOnApplyErrorFlag(t *testing.T) { + env := BuildEnv(t) + logger := Logger{} + kapp := Kapp{t, env.Namespace, env.KappBinaryPath, logger} + + yaml1 := ` +--- +apiVersion: v1 +kind: Service +metadata: + name: service-fail +--- +apiVersion: batch/v1 +kind: Job +metadata: + name: my-job-succeed + annotations: + kapp.k14s.io/change-group: "job-succeed" +spec: + template: + metadata: + name: my-job-succeed + spec: + restartPolicy: Never + containers: + - name: my-job-succeed + image: busybox + command: [ "sh", "-c", "exit 0" ] + backoffLimit: 0 +--- +apiVersion: v1 +kind: Service +metadata: + name: service-succeed + annotations: + kapp.k14s.io/change-rule: "upsert after upserting job-succeed" +spec: + ports: + - port: 80 +` + + name := "test-exit-early-on-apply-error" + cleanUp := func() { + kapp.Run([]string{"delete", "-a", name}) + } + + cleanUp() + defer cleanUp() + + expectedErr := strings.ReplaceAll(`kapp: Error: create service/service-fail (v1) namespace: : + Creating resource service/service-fail (v1) namespace: : + API server says: + Service "service-fail" is invalid: spec.ports: + Required value (reason: Invalid) +`, "", env.Namespace) + + logger.Section("deploy with exit-early-on-apply-error=false", func() { + _, err := kapp.RunWithOpts([]string{"deploy", "-f", "-", "-a", name, "--exit-early-on-apply-error=false"}, + RunOpts{StdinReader: strings.NewReader(yaml1), AllowError: true}) + + require.Containsf(t, err.Error(), expectedErr, "Expected to see expected err in output, but did not") + + out := kapp.Run([]string{"inspect", "-a", name, "--json", "--filter-kind", "Service"}) + + expectedResources := []map[string]string{{ + "age": "", + "kind": "Service", + "name": "service-succeed", + "namespace": env.Namespace, + "owner": "kapp", + "reconcile_info": "", + "reconcile_state": "ok", + }} + + inspectOut := uitest.JSONUIFromBytes(t, []byte(out)) + + require.Exactlyf(t, expectedResources, replaceAge(inspectOut.Tables[0].Rows), "Expected to see correct changes") + }) + + cleanUp() + + logger.Section("deploy with exit-early-on-apply-error=true", func() { + _, err := kapp.RunWithOpts([]string{"deploy", "-f", "-", "-a", name}, + RunOpts{StdinReader: strings.NewReader(yaml1), AllowError: true}) + + require.Containsf(t, err.Error(), expectedErr, "Expected to see expected err in output, but did not") + + out := kapp.Run([]string{"inspect", "-a", name, "--json", "--filter-kind", "Service"}) + + expectedResources := []map[string]string{} + + inspectOut := uitest.JSONUIFromBytes(t, []byte(out)) + + require.Exactlyf(t, expectedResources, replaceAge(inspectOut.Tables[0].Rows), "Expected to see correct changes") + }) +} diff --git a/test/e2e/formatted_error_test.go b/test/e2e/formatted_error_test.go index 345c77f0c..7dfe27782 100644 --- a/test/e2e/formatted_error_test.go +++ b/test/e2e/formatted_error_test.go @@ -50,7 +50,7 @@ spec: logger.Section("deploy with errors", func() { expectedErr := strings.TrimSpace(` -kapp: Error: Applying create job/successful-job (batch/v1) namespace: default: +kapp: Error: create job/successful-job (batch/v1) namespace: default: Creating resource job/successful-job (batch/v1) namespace: default: API server says: Job.batch "successful-job" is invalid: