jaketf commented on a change in pull request #9590: URL: https://github.com/apache/airflow/pull/9590#discussion_r452557394
########## File path: airflow/providers/google/cloud/operators/bigquery.py ########## @@ -1692,32 +1693,48 @@ def prepare_template(self) -> None: with open(self.configuration, 'r') as file: self.configuration = json.loads(file.read()) + def _submit_job(self, hook: BigQueryHook, job_id: str): + # Submit a new job + job = hook.insert_job( + configuration=self.configuration, + project_id=self.project_id, + location=self.location, + job_id=job_id, + ) + # Start the job and wait for it to complete and get the result. + job.result() + return job + def execute(self, context: Any): hook = BigQueryHook( gcp_conn_id=self.gcp_conn_id, delegate_to=self.delegate_to, ) - job_id = self.job_id or f"airflow_{self.task_id}_{int(time())}" + exec_date = re.sub("\:|\-|\+", "_", context['execution_date'].isoformat()) + job_id = self.job_id or f"airflow_{self.dag_id}_{self.task_id}_{exec_date}_" + try: - job = hook.insert_job( - configuration=self.configuration, - project_id=self.project_id, - location=self.location, - job_id=job_id, - ) - # Start the job and wait for it to complete and get the result. - job.result() + # Submit a new job + job = self._submit_job(hook, job_id) except Conflict: + # If the job already exists retrieve it Review comment: what does this do to the the behavior for re-running a DAG for an execution date? Would this not by default reattach to the originally succeeded job (rather than expected behavior or re-running the job)? Should this be configurable? ---------------------------------------------------------------- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org