TobKed commented on a change in pull request #8553: URL: https://github.com/apache/airflow/pull/8553#discussion_r508222123
########## File path: airflow/providers/google/cloud/operators/dataflow.py ########## @@ -406,6 +406,88 @@ def on_kill(self) -> None: self.hook.cancel_job(job_id=self.job_id, project_id=self.project_id) +class DataflowStartSqlJobOperator(BaseOperator): + """ + Starts Dataflow SQL query. + + :param job_name: The unique name to assign to the Cloud Dataflow job. + :type job_name: str + :param query: The SQL query to execute. + :type query: str + :param options: Job parameters to be executed. It can be a dictionary with the following keys. + + For more information, look at: + `https://cloud.google.com/sdk/gcloud/reference/beta/dataflow/sql/query + <gcloud beta dataflow sql query>`__ + command reference + + :param options: dict + :param location: The location of the Dataflow job (for example europe-west1) + :type location: str + :param project_id: The ID of the GCP project that owns the job. + If set to ``None`` or missing, the default project_id from the GCP connection is used. + :type project_id: Optional[str] + :param gcp_conn_id: The connection ID to use connecting to Google Cloud + Platform. + :type gcp_conn_id: str + :param delegate_to: The account to impersonate, if any. + For this to work, the service account making the request must have + domain-wide delegation enabled. + :type delegate_to: str + """ + + template_fields = ["job_name", 'query', 'options', 'location', 'project_id', 'gcp_conn_id'] + + @apply_defaults + def __init__( + self, + job_name: str, + query: str, + options: Dict[str, Any], + location: str = DEFAULT_DATAFLOW_LOCATION, + project_id: Optional[str] = None, + gcp_conn_id: str = 'google_cloud_default', + delegate_to: Optional[str] = None, + *args, + **kwargs + ) -> None: + super().__init__(*args, **kwargs) + self.job_name = job_name + self.query = query + self.options = options + self.location = location + self.project_id = project_id + self.gcp_conn_id = gcp_conn_id + self.delegate_to = delegate_to + self.job_id = None + self.hook: Optional[DataflowHook] = None + + def execute(self, context): + self.hook = DataflowHook( + gcp_conn_id=self.gcp_conn_id, + delegate_to=self.delegate_to, + ) + + def set_current_job_id(job_id): + self.job_id = job_id + + job = self.hook.start_sql_job( + job_name=self.job_name, + query=self.query, + options=self.options, + location=self.location, + project_id=self.project_id, + on_new_job_id_callback=set_current_job_id, + ) + + return job + + def on_kill(self) -> None: + self.log.info("On kill.") + if self.job_id: + self.hook.cancel_job(job_id=self.job_id, project_id=self.project_id) Review comment: I fixed it in hook. ---------------------------------------------------------------- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org