gyfora commented on code in PR #165: URL: https://github.com/apache/flink-kubernetes-operator/pull/165#discussion_r850340937
########## flink-kubernetes-operator/src/main/java/org/apache/flink/kubernetes/operator/reconciler/deployment/ApplicationReconciler.java: ########## @@ -102,36 +111,84 @@ public void reconcile(FlinkDeployment flinkApp, Context context, Configuration e } if (currentJobState == JobState.SUSPENDED && desiredJobState == JobState.RUNNING) { if (upgradeMode == UpgradeMode.STATELESS) { - deployFlinkJob(flinkApp, effectiveConfig, Optional.empty()); - } else if (upgradeMode == UpgradeMode.LAST_STATE - || upgradeMode == UpgradeMode.SAVEPOINT) { - restoreFromLastSavepoint(flinkApp, effectiveConfig); + deployFlinkJob(currentJobSpec, status, effectiveConfig, Optional.empty()); + } else { + restoreFromLastSavepoint(currentJobSpec, status, effectiveConfig); } stateAfterReconcile = JobState.RUNNING; } - IngressUtils.updateIngressRules(flinkApp, effectiveConfig, kubernetesClient); + IngressUtils.updateIngressRules( + deployMeta, currentDeploySpec, effectiveConfig, kubernetesClient); ReconciliationUtils.updateForSpecReconciliationSuccess(flinkApp, stateAfterReconcile); - } else if (SavepointUtils.shouldTriggerSavepoint(flinkApp) && isJobRunning(flinkApp)) { + } else if (ReconciliationUtils.shouldRollBack(reconciliationStatus, effectiveConfig)) { + rollbackApplication(flinkApp); + } else if (SavepointUtils.shouldTriggerSavepoint(currentJobSpec, status) + && isJobRunning(status)) { triggerSavepoint(flinkApp, effectiveConfig); ReconciliationUtils.updateSavepointReconciliationSuccess(flinkApp); + } else { + LOG.info("Deployment is fully reconciled, nothing to do."); } } + private void rollbackApplication(FlinkDeployment flinkApp) throws Exception { + ReconciliationStatus reconciliationStatus = flinkApp.getStatus().getReconciliationStatus(); + + if (reconciliationStatus.getState() != ReconciliationStatus.State.ROLLING_BACK) { + LOG.warn("Preparing to roll back to last stable spec."); + if (flinkApp.getStatus().getError() == null) { + flinkApp.getStatus() + .setError( + "Deployment is not ready within the configured timeout, rolling-back."); + } + reconciliationStatus.setState(ReconciliationStatus.State.ROLLING_BACK); + return; + } + + LOG.warn("Executing roll-back operation"); + + FlinkDeploymentSpec rollbackSpec = reconciliationStatus.deserializeLastStableSpec(); + Configuration rollbackConfig = + FlinkUtils.getEffectiveConfig(flinkApp.getMetadata(), rollbackSpec, defaultConfig); + + UpgradeMode upgradeMode = flinkApp.getSpec().getJob().getUpgradeMode(); + + suspendJob( + flinkApp, + upgradeMode == UpgradeMode.STATELESS + ? UpgradeMode.STATELESS + : UpgradeMode.LAST_STATE, + rollbackConfig); + deployFlinkJob( + rollbackSpec.getJob(), + flinkApp.getStatus(), + rollbackConfig, + Optional.ofNullable( Review Comment: good catch, in that case we should always pass empty -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: issues-unsubscr...@flink.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org