Skip to content

Commit

Permalink
Add retry to post-processing job start
Browse files Browse the repository at this point in the history
  • Loading branch information
lathanh committed Nov 29, 2023
1 parent e034567 commit 4375922
Showing 1 changed file with 32 additions and 13 deletions.
45 changes: 32 additions & 13 deletions buildstockbatch/gcp/gcp.py
Original file line number Diff line number Diff line change
Expand Up @@ -974,19 +974,38 @@ def start_combine_results_job_on_cloud(self, results_dir, do_timeseries=True):
)

# Start the job!
try:
jobs_client.run_job(name=self.postprocessing_job_name)
logger.info(
"Post-processing Cloud Run job started! "
f"See status at: {self.postprocessing_job_console_url}. "
"You will need to run this script with --clean to clean up the GCP "
"environment after post-processing is complete."
)
except:
logger.warning(
"Post-processing Cloud Run job failed to start. You may try starting it "
f"at the console: {self.postprocessing_job_console_url}"
)
attempts_remaining = 3
while True:
try:
jobs_client.run_job(name=self.postprocessing_job_name)
logger.info(
"Post-processing Cloud Run job started! "
f"See status at: {self.postprocessing_job_console_url}. "
"You will need to run this script with --clean to clean up the GCP "
"environment after post-processing is complete."
)
break
except:
attempts_remaining -= 1
if attempts_remaining > 0:
# retry after delay
logger.warning(
"Post-processing Cloud Run job failed to start. "
f"{attempts_remaining} attempt(s) remaining. "
"Will retry in 1 second...",
exc_info=logger.isEnabledFor(logging.DEBUG),
)
time.sleep(1)
continue

# no attempts remaining
logger.warning(
"Post-processing Cloud Run job failed to start after three attempts. "
"You may want to investigate why and try starting it at the console: "
f"{self.postprocessing_job_console_url}",
exc_info=True,
)
break

def clean_postprocessing_job(self):
jobs_client = run_v2.JobsClient()
Expand Down

0 comments on commit 4375922

Please sign in to comment.