diff --git a/controllers/quay/features.go b/controllers/quay/features.go index b8511b6f9..ac33acb9b 100644 --- a/controllers/quay/features.go +++ b/controllers/quay/features.go @@ -408,7 +408,7 @@ func (r *QuayRegistryReconciler) checkMonitoringAvailable( // checkPostgresVersion returns the image name used by the currently deployed postgres version func (r *QuayRegistryReconciler) checkNeedsPostgresUpgradeForComponent( ctx context.Context, qctx *quaycontext.QuayRegistryContext, quay *v1.QuayRegistry, component v1.ComponentKind, -) error { +) (err error, scaledDown bool) { componentInfo := map[v1.ComponentKind]struct { deploymentSuffix string upgradeField *bool @@ -419,7 +419,7 @@ func (r *QuayRegistryReconciler) checkNeedsPostgresUpgradeForComponent( info, ok := componentInfo[component] if !ok { - return fmt.Errorf("invalid component kind: %s", component) + return fmt.Errorf("invalid component kind: %s", component), false } deploymentName := fmt.Sprintf("%s-%s", quay.GetName(), info.deploymentSuffix) @@ -435,7 +435,7 @@ func (r *QuayRegistryReconciler) checkNeedsPostgresUpgradeForComponent( postgresDeployment, ); err != nil { r.Log.Info(fmt.Sprintf("%s deployment not found, skipping", component)) - return nil + return nil, true } deployedImageName := postgresDeployment.Spec.Template.Spec.Containers[0].Image @@ -458,9 +458,45 @@ func (r *QuayRegistryReconciler) checkNeedsPostgresUpgradeForComponent( *info.upgradeField = true } else { r.Log.Info(fmt.Sprintf("%s does not need to perform an upgrade", component)) + return nil, true } - return nil + // at this point we have determined that these postgres deployments need to be upgraded and can set them to 0 replicas + // so that the upgrade job can run with no interference + r.Log.Info(fmt.Sprintf("scaling down %s deployment", component)) + postgresDeployment.Spec.Replicas = &[]int32{0}[0] + postgresDeployment.Spec.Template.Spec.TerminationGracePeriodSeconds = &[]int64{600}[0] + if err := r.Client.Update(ctx, postgresDeployment); err != nil { + r.Log.Error(err, "unable to update postgres deployment replicas") + } + // now we wait to ensure that the deployment has scaled down before we proceed + + terminatingPods := []corev1.Pod{} + podList := &corev1.PodList{} + labelSelector, err := metav1.LabelSelectorAsSelector(postgresDeployment.Spec.Selector) + if err != nil { + r.Log.Error(err, "unable to get label selector for postgres deployment") + } + err = r.Client.List(ctx, podList, &client.ListOptions{ + LabelSelector: labelSelector, + }) + if err != nil { + r.Log.Error(err, "unable to list pods for postgres deployment") + return err, false + } + + for _, pod := range podList.Items { + if pod.Status.Phase == corev1.PodRunning { + terminatingPods = append(terminatingPods, pod) + } + } + + if len(terminatingPods) > 0 { + r.Log.Info(fmt.Sprintf("Found %d pods in terminating status", len(terminatingPods))) + return nil, false + } + + return nil, true } func extractImageName(imageName string) string { diff --git a/controllers/quay/quayregistry_controller.go b/controllers/quay/quayregistry_controller.go index 2008af692..0eebb56f7 100644 --- a/controllers/quay/quayregistry_controller.go +++ b/controllers/quay/quayregistry_controller.go @@ -553,7 +553,7 @@ func (r *QuayRegistryReconciler) Reconcile(ctx context.Context, req ctrl.Request // Populate the QuayContext with whether or not the QuayRegistry needs an upgrade if v1.ComponentIsManaged(updatedQuay.Spec.Components, v1.ComponentPostgres) { - err := r.checkNeedsPostgresUpgradeForComponent(ctx, quayContext, updatedQuay, v1.ComponentPostgres) + err, scaledDown := r.checkNeedsPostgresUpgradeForComponent(ctx, quayContext, updatedQuay, v1.ComponentPostgres) if err != nil { return r.reconcileWithCondition( ctx, @@ -564,11 +564,14 @@ func (r *QuayRegistryReconciler) Reconcile(ctx context.Context, req ctrl.Request fmt.Sprintf("error checking for pg upgrade: %s", err), ) } + if !scaledDown { + return r.Requeue, nil + } } // Populate the QuayContext with whether or not the QuayRegistry needs an upgrade if v1.ComponentIsManaged(updatedQuay.Spec.Components, v1.ComponentClairPostgres) { - err := r.checkNeedsPostgresUpgradeForComponent(ctx, quayContext, updatedQuay, v1.ComponentClairPostgres) + err, scaledDown := r.checkNeedsPostgresUpgradeForComponent(ctx, quayContext, updatedQuay, v1.ComponentClairPostgres) if err != nil { return r.reconcileWithCondition( ctx, @@ -579,6 +582,9 @@ func (r *QuayRegistryReconciler) Reconcile(ctx context.Context, req ctrl.Request fmt.Sprintf("error checking for pg upgrade: %s", err), ) } + if !scaledDown { + return r.Requeue, nil + } } if err := r.checkBuildManagerAvailable(quayContext, cbundle); err != nil { diff --git a/kustomize/components/clairpgupgrade/base/clair-pg-old.deployment.yaml b/kustomize/components/clairpgupgrade/base/clair-pg-old.deployment.yaml index 5514631de..53340a3b8 100644 --- a/kustomize/components/clairpgupgrade/base/clair-pg-old.deployment.yaml +++ b/kustomize/components/clairpgupgrade/base/clair-pg-old.deployment.yaml @@ -18,7 +18,7 @@ spec: labels: quay-component: clair-postgres-old spec: - terminationGracePeriodSeconds: 180 + terminationGraceperiodSeconds: 600 serviceAccountName: clair-postgres volumes: - name: clair-postgres-conf-sample @@ -27,15 +27,6 @@ spec: - name: postgres-data persistentVolumeClaim: claimName: clair-postgres-13 - initContainers: - - name: check-postgres-scale-down - image: quay.io/sclorg/postgresql-13-c9s:latest - command: - - /bin/sh - - -c - - | - echo "Waiting for 30 seconds before starting the main container..." - sleep 30 containers: - name: postgres image: quay.io/sclorg/postgresql-13-c9s:latest diff --git a/kustomize/components/clairpgupgrade/base/clair-pg-upgrade.job.yaml b/kustomize/components/clairpgupgrade/base/clair-pg-upgrade.job.yaml index 19be19445..49aed104d 100644 --- a/kustomize/components/clairpgupgrade/base/clair-pg-upgrade.job.yaml +++ b/kustomize/components/clairpgupgrade/base/clair-pg-upgrade.job.yaml @@ -10,7 +10,7 @@ spec: template: spec: restartPolicy: OnFailure - terminationGracePeriodSeconds: 180 + terminationGraceperiodSeconds: 600 serviceAccountName: clair-postgres volumes: - name: clair-postgres-conf-sample diff --git a/kustomize/components/clairpostgres/postgres.deployment.yaml b/kustomize/components/clairpostgres/postgres.deployment.yaml index e31c093c4..53a8c5922 100644 --- a/kustomize/components/clairpostgres/postgres.deployment.yaml +++ b/kustomize/components/clairpostgres/postgres.deployment.yaml @@ -18,7 +18,7 @@ spec: labels: quay-component: clair-postgres spec: - terminationGracePeriodSeconds: 180 + terminationGraceperiodSeconds: 600 serviceAccountName: clair-postgres volumes: - name: clair-postgres-conf-sample diff --git a/kustomize/components/pgupgrade/quay-pg-old.deployment.yaml b/kustomize/components/pgupgrade/quay-pg-old.deployment.yaml index 0f8c34951..e106cf82d 100644 --- a/kustomize/components/pgupgrade/quay-pg-old.deployment.yaml +++ b/kustomize/components/pgupgrade/quay-pg-old.deployment.yaml @@ -18,7 +18,7 @@ spec: labels: quay-component: postgres spec: - terminationGracePeriodSeconds: 180 + terminationGraceperiodSeconds: 600 serviceAccountName: quay-database volumes: - name: postgres-conf-sample diff --git a/kustomize/components/pgupgrade/quay-pg-upgrade.job.yaml b/kustomize/components/pgupgrade/quay-pg-upgrade.job.yaml index 1b5e6dd9e..7a80e121e 100644 --- a/kustomize/components/pgupgrade/quay-pg-upgrade.job.yaml +++ b/kustomize/components/pgupgrade/quay-pg-upgrade.job.yaml @@ -10,7 +10,7 @@ spec: template: spec: restartPolicy: OnFailure - terminationGracePeriodSeconds: 180 + terminationGraceperiodSeconds: 600 serviceAccountName: quay-database volumes: - name: postgres-conf-sample @@ -55,10 +55,9 @@ spec: cpu: 500m memory: 2Gi command: - - "/bin/sh" - - "-c" + - "/bin/sh" + - "-c" args: - - > - run-postgresql --version || (echo "postgres migration command failed, cleaning up..." && rm -rf /var/lib/pgsql/data/* && exit 1) + - > + run-postgresql --version || (echo "postgres migration command failed, cleaning up..." && rm -rf /var/lib/pgsql/data/* && exit 1) backoffLimit: 50 - diff --git a/kustomize/components/postgres/postgres.deployment.yaml b/kustomize/components/postgres/postgres.deployment.yaml index 6f18ad244..650ff9576 100644 --- a/kustomize/components/postgres/postgres.deployment.yaml +++ b/kustomize/components/postgres/postgres.deployment.yaml @@ -18,7 +18,7 @@ spec: labels: quay-component: postgres spec: - terminationGracePeriodSeconds: 180 + terminationGraceperiodSeconds: 600 serviceAccountName: quay-database volumes: - name: postgres-conf-sample