Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[redhat-3.13] pgupgrade: Ensure that old pg deployments are terminated before upgrade job is run (PROJQUAY-8092) #992

Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
44 changes: 40 additions & 4 deletions controllers/quay/features.go
Original file line number Diff line number Diff line change
Expand Up @@ -408,7 +408,7 @@ func (r *QuayRegistryReconciler) checkMonitoringAvailable(
// checkPostgresVersion returns the image name used by the currently deployed postgres version
func (r *QuayRegistryReconciler) checkNeedsPostgresUpgradeForComponent(
ctx context.Context, qctx *quaycontext.QuayRegistryContext, quay *v1.QuayRegistry, component v1.ComponentKind,
) error {
) (err error, scaledDown bool) {
componentInfo := map[v1.ComponentKind]struct {
deploymentSuffix string
upgradeField *bool
Expand All @@ -419,7 +419,7 @@ func (r *QuayRegistryReconciler) checkNeedsPostgresUpgradeForComponent(

info, ok := componentInfo[component]
if !ok {
return fmt.Errorf("invalid component kind: %s", component)
return fmt.Errorf("invalid component kind: %s", component), false
}

deploymentName := fmt.Sprintf("%s-%s", quay.GetName(), info.deploymentSuffix)
Expand All @@ -435,7 +435,7 @@ func (r *QuayRegistryReconciler) checkNeedsPostgresUpgradeForComponent(
postgresDeployment,
); err != nil {
r.Log.Info(fmt.Sprintf("%s deployment not found, skipping", component))
return nil
return nil, true
}

deployedImageName := postgresDeployment.Spec.Template.Spec.Containers[0].Image
Expand All @@ -458,9 +458,45 @@ func (r *QuayRegistryReconciler) checkNeedsPostgresUpgradeForComponent(
*info.upgradeField = true
} else {
r.Log.Info(fmt.Sprintf("%s does not need to perform an upgrade", component))
return nil, true
}

return nil
// at this point we have determined that these postgres deployments need to be upgraded and can set them to 0 replicas
// so that the upgrade job can run with no interference
r.Log.Info(fmt.Sprintf("scaling down %s deployment", component))
postgresDeployment.Spec.Replicas = &[]int32{0}[0]
postgresDeployment.Spec.Template.Spec.TerminationGracePeriodSeconds = &[]int64{600}[0]
if err := r.Client.Update(ctx, postgresDeployment); err != nil {
r.Log.Error(err, "unable to update postgres deployment replicas")
}
// now we wait to ensure that the deployment has scaled down before we proceed

terminatingPods := []corev1.Pod{}
podList := &corev1.PodList{}
labelSelector, err := metav1.LabelSelectorAsSelector(postgresDeployment.Spec.Selector)
if err != nil {
r.Log.Error(err, "unable to get label selector for postgres deployment")
}
err = r.Client.List(ctx, podList, &client.ListOptions{
LabelSelector: labelSelector,
})
if err != nil {
r.Log.Error(err, "unable to list pods for postgres deployment")
return err, false
}

for _, pod := range podList.Items {
if pod.Status.Phase == corev1.PodRunning {
terminatingPods = append(terminatingPods, pod)
}
}

if len(terminatingPods) > 0 {
r.Log.Info(fmt.Sprintf("Found %d pods in terminating status", len(terminatingPods)))
return nil, false
}

return nil, true
}

func extractImageName(imageName string) string {
Expand Down
10 changes: 8 additions & 2 deletions controllers/quay/quayregistry_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -553,7 +553,7 @@ func (r *QuayRegistryReconciler) Reconcile(ctx context.Context, req ctrl.Request

// Populate the QuayContext with whether or not the QuayRegistry needs an upgrade
if v1.ComponentIsManaged(updatedQuay.Spec.Components, v1.ComponentPostgres) {
err := r.checkNeedsPostgresUpgradeForComponent(ctx, quayContext, updatedQuay, v1.ComponentPostgres)
err, scaledDown := r.checkNeedsPostgresUpgradeForComponent(ctx, quayContext, updatedQuay, v1.ComponentPostgres)
if err != nil {
return r.reconcileWithCondition(
ctx,
Expand All @@ -564,11 +564,14 @@ func (r *QuayRegistryReconciler) Reconcile(ctx context.Context, req ctrl.Request
fmt.Sprintf("error checking for pg upgrade: %s", err),
)
}
if !scaledDown {
return r.Requeue, nil
}
}

// Populate the QuayContext with whether or not the QuayRegistry needs an upgrade
if v1.ComponentIsManaged(updatedQuay.Spec.Components, v1.ComponentClairPostgres) {
err := r.checkNeedsPostgresUpgradeForComponent(ctx, quayContext, updatedQuay, v1.ComponentClairPostgres)
err, scaledDown := r.checkNeedsPostgresUpgradeForComponent(ctx, quayContext, updatedQuay, v1.ComponentClairPostgres)
if err != nil {
return r.reconcileWithCondition(
ctx,
Expand All @@ -579,6 +582,9 @@ func (r *QuayRegistryReconciler) Reconcile(ctx context.Context, req ctrl.Request
fmt.Sprintf("error checking for pg upgrade: %s", err),
)
}
if !scaledDown {
return r.Requeue, nil
}
}

if err := r.checkBuildManagerAvailable(quayContext, cbundle); err != nil {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ spec:
labels:
quay-component: clair-postgres-old
spec:
terminationGracePeriodSeconds: 180
terminationGraceperiodSeconds: 600
serviceAccountName: clair-postgres
volumes:
- name: clair-postgres-conf-sample
Expand All @@ -27,15 +27,6 @@ spec:
- name: postgres-data
persistentVolumeClaim:
claimName: clair-postgres-13
initContainers:
- name: check-postgres-scale-down
image: quay.io/sclorg/postgresql-13-c9s:latest
command:
- /bin/sh
- -c
- |
echo "Waiting for 30 seconds before starting the main container..."
sleep 30
containers:
- name: postgres
image: quay.io/sclorg/postgresql-13-c9s:latest
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ spec:
template:
spec:
restartPolicy: OnFailure
terminationGracePeriodSeconds: 180
terminationGraceperiodSeconds: 600
serviceAccountName: clair-postgres
volumes:
- name: clair-postgres-conf-sample
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ spec:
labels:
quay-component: clair-postgres
spec:
terminationGracePeriodSeconds: 180
terminationGraceperiodSeconds: 600
serviceAccountName: clair-postgres
volumes:
- name: clair-postgres-conf-sample
Expand Down
2 changes: 1 addition & 1 deletion kustomize/components/pgupgrade/quay-pg-old.deployment.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ spec:
labels:
quay-component: postgres
spec:
terminationGracePeriodSeconds: 180
terminationGraceperiodSeconds: 600
serviceAccountName: quay-database
volumes:
- name: postgres-conf-sample
Expand Down
11 changes: 5 additions & 6 deletions kustomize/components/pgupgrade/quay-pg-upgrade.job.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ spec:
template:
spec:
restartPolicy: OnFailure
terminationGracePeriodSeconds: 180
terminationGraceperiodSeconds: 600
serviceAccountName: quay-database
volumes:
- name: postgres-conf-sample
Expand Down Expand Up @@ -55,10 +55,9 @@ spec:
cpu: 500m
memory: 2Gi
command:
- "/bin/sh"
- "-c"
- "/bin/sh"
- "-c"
args:
- >
run-postgresql --version || (echo "postgres migration command failed, cleaning up..." && rm -rf /var/lib/pgsql/data/* && exit 1)
- >
run-postgresql --version || (echo "postgres migration command failed, cleaning up..." && rm -rf /var/lib/pgsql/data/* && exit 1)
backoffLimit: 50

2 changes: 1 addition & 1 deletion kustomize/components/postgres/postgres.deployment.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ spec:
labels:
quay-component: postgres
spec:
terminationGracePeriodSeconds: 180
terminationGraceperiodSeconds: 600
serviceAccountName: quay-database
volumes:
- name: postgres-conf-sample
Expand Down
Loading