mik-laj commented on a change in pull request #5539: [AIRFLOW-4811] Implement 
GCP DLP' Hook and Operators
URL: https://github.com/apache/airflow/pull/5539#discussion_r303131994
 
 

 ##########
 File path: airflow/contrib/hooks/gcp_dlp_hook.py
 ##########
 @@ -0,0 +1,1362 @@
+# -*- coding: utf-8 -*-
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+"""
+This module contains a CloudDLPHook
+which allows you to connect to GCP Cloud DLP service.
+"""
+
+import time
+from google.cloud.dlp_v2 import DlpServiceClient
+from google.cloud.dlp_v2.types import DlpJob
+
+from airflow import AirflowException
+from airflow.contrib.hooks.gcp_api_base_hook import GoogleCloudBaseHook
+
+
+# pylint: disable=R0904, C0302
+class CloudDLPHook(GoogleCloudBaseHook):
+    """
+    Hook for Google Cloud Data Loss Prevention (DLP) APIs.
+    Cloud DLP allows clients to detect the presence of Personally Identifiable
+    Information (PII) and other privacy-sensitive data in user-supplied,
+    unstructured data streams, like text blocks or images. The service also
+    includes methods for sensitive data redaction and scheduling of data scans
+    on Google Cloud Platform based data sets.
+
+    :param gcp_conn_id: The connection ID to use when fetching connection info.
+    :type gcp_conn_id: str
+    :param delegate_to: The account to impersonate, if any.
+        For this to work, the service account making the request must have
+        domain-wide delegation enabled.
+    :type delegate_to: str
+    """
+
+    def __init__(self,
+                 gcp_conn_id="google_cloud_default",
+                 delegate_to=None):
+        super().__init__(gcp_conn_id, delegate_to)
+        self._client = None
+
+    def get_conn(self):
+        """
+        Provides a client for interacting with the Cloud DLP API.
+
+        :return: GCP Cloud DLP API Client
+        :rtype: google.cloud.dlp_v2.DlpServiceClient
+        """
+        if not self._client:
+            self._client = 
DlpServiceClient(credentials=self._get_credentials())
+        return self._client
+
+    @GoogleCloudBaseHook.catch_http_exception
+    def cancel_dlp_job(
+        self, name, retry=None, timeout=None, metadata=None
+    ):
+        """
+        Starts asynchronous cancellation on a long-running DlpJob.
+
+        :param name: The name of the DlpJob resource to be cancelled.
+        :type name: str
+        :param retry: (Optional) A retry object used to retry requests.
+            If None is specified, requests will not be retried.
+        :type retry: google.api_core.retry.Retry
+        :param timeout: (Optional) The amount of time, in seconds, to wait for 
the request
+            to complete. Note that if retry is specified, the timeout applies 
to each
+            individual attempt.
+        :type timeout: float
+        :param metadata: (Optional) Additional metadata that is provided to 
the method.
+        :type metadata: sequence[tuple[str, str]]]
+        """
+
+        client = self.get_conn()
+
+        if not name:
+            raise AirflowException(
+                "Please provide the name of the DlpJob resource to be 
cancelled."
+            )
+
+        client.cancel_dlp_job(
+            name=name, retry=retry, timeout=timeout, metadata=metadata
+        )
+
+    @GoogleCloudBaseHook.catch_http_exception
+    def create_deidentify_template(
+        self,
+        parent,
+        deidentify_template=None,
+        template_id=None,
+        retry=None,
+        timeout=None,
+        metadata=None,
+    ):
+        """
+        Creates a DeidentifyTemplate for re-using frequently used 
configuration for
+        de-identifying content, images, and storage.
+
+        :param parent: The parent resource name.
+        :type parent: str
+        :param deidentify_template: (Optional) The DeidentifyTemplate to 
create.
+        :type deidentify_template: dict or 
google.cloud.dlp_v2.types.DeidentifyTemplate
+        :param template_id: (Optional) The template ID.
+        :type template_id: str
+        :param retry: (Optional) A retry object used to retry requests.
+            If None is specified, requests will not be retried.
+        :type retry: google.api_core.retry.Retry
+        :param timeout: (Optional) The amount of time, in seconds, to wait for 
the request
+            to complete. Note that if retry is specified, the timeout applies 
to each
+            individual attempt.
+        :type timeout: float
+        :param metadata: (Optional) Additional metadata that is provided to 
the method.
+        :type metadata: sequence[tuple[str, str]]]
+        :rtype: google.cloud.dlp_v2.types.DeidentifyTemplate
+        """
+
+        client = self.get_conn()
+
+        if not parent:
+            raise AirflowException("Please provide the parent resource name.")
+
+        return client.create_deidentify_template(
+            parent=parent,
+            deidentify_template=deidentify_template,
+            template_id=template_id,
+            retry=retry,
+            timeout=timeout,
+            metadata=metadata,
+        )
+
+    @GoogleCloudBaseHook.catch_http_exception
+    def create_dlp_job(
+        self,
+        parent,
+        inspect_job=None,
+        risk_job=None,
+        job_id=None,
+        retry=None,
+        timeout=None,
+        metadata=None,
+        wait_until_finished=True,
+        polling_interval_in_seconds=60
+    ):
+        """
+        Creates a new job to inspect storage or calculate risk metrics.
+
+        :param parent: The parent resource name.
+        :type parent: str
+        :param inspect_job: (Optional) The configuration for the inspect job.
+        :type inspect_job: dict or google.cloud.dlp_v2.types.InspectJobConfig
+        :param risk_job: (Optional) The configuration for the risk job.
+        :type risk_job: dict or google.cloud.dlp_v2.types.RiskAnalysisJobConfig
+        :param job_id: (Optional) The job ID.
+        :type job_id: str
+        :param retry: (Optional) A retry object used to retry requests.
+            If None is specified, requests will not be retried.
+        :type retry: google.api_core.retry.Retry
+        :param timeout: (Optional) The amount of time, in seconds, to wait for 
the request
+            to complete. Note that if retry is specified, the timeout applies 
to each
+            individual attempt.
+        :type timeout: float
+        :param metadata: (Optional) Additional metadata that is provided to 
the method.
+        :type metadata: sequence[tuple[str, str]]]
+        :param wait_until_finished: (Optional) If true, it will keep polling 
the job state
+            until it is set to DONE.
+        :type wait_until_finished: bool
+        :param polling_interval_in_seconds: (Optional) The number of seconds 
to wait
+            before calling another request. This field will only be used when
+            wait_until_finished is set to True.
+        :type polling_interval_in_seconds: int
+        :rtype: google.cloud.dlp_v2.types.DlpJob
+        """
+
+        client = self.get_conn()
+
+        if not parent:
+            raise AirflowException("Please provide the parent resource name.")
+
+        job = client.create_dlp_job(
+            parent=parent,
+            inspect_job=inspect_job,
+            risk_job=risk_job,
+            job_id=job_id,
+            retry=retry,
+            timeout=timeout,
+            metadata=metadata,
+        )
+
+        while wait_until_finished:
+            job = self.get_dlp_job(name=job.name)
+            state_message = 'DLP job {} state: {}.'.format(
+                job.name,
+                DlpJob.JobState.Name(job.state)
+            )
+
+            self.log.info(state_message)
+
+            if job.state == DlpJob.JobState.DONE:
+                wait_until_finished = False
+            elif job.state in [DlpJob.JobState.PENDING,
+                               DlpJob.JobState.RUNNING,
+                               DlpJob.JobState.JOB_STATE_UNSPECIFIED]:
+                time.sleep(polling_interval_in_seconds)
+            else:
+                raise AirflowException(state_message)
+        return job
+
+    @GoogleCloudBaseHook.catch_http_exception
+    def create_inspect_template(
+        self,
+        parent,
+        inspect_template=None,
+        template_id=None,
+        retry=None,
+        timeout=None,
+        metadata=None,
+    ):
+        """
+        Creates an InspectTemplate for re-using frequently used configuration 
for
+        inspecting content, images, and storage.
+
+        :param parent: The parent resource name.
+        :type parent: str
+        :param inspect_template: (Optional) The InspectTemplate to create.
+        :type inspect_template: dict or 
google.cloud.dlp_v2.types.InspectTemplate
+        :param template_id: (Optional) The template ID.
+        :type template_id: str
+        :param retry: (Optional) A retry object used to retry requests.
+            If None is specified, requests will not be retried.
+        :type retry: google.api_core.retry.Retry
+        :param timeout: (Optional) The amount of time, in seconds, to wait for 
the request
+            to complete. Note that if retry is specified, the timeout applies 
to each
+            individual attempt.
+        :type timeout: float
+        :param metadata: (Optional) Additional metadata that is provided to 
the method.
+        :type metadata: sequence[tuple[str, str]]]
+        :rtype: google.cloud.dlp_v2.types.InspectTemplate
+        """
+
+        client = self.get_conn()
+
+        if not parent:
+            raise AirflowException("Please provide the parent resource name.")
+
+        return client.create_inspect_template(
+            parent=parent,
+            inspect_template=inspect_template,
+            template_id=template_id,
+            retry=retry,
+            timeout=timeout,
+            metadata=metadata,
+        )
+
+    @GoogleCloudBaseHook.catch_http_exception
+    def create_job_trigger(
+        self,
+        parent,
+        job_trigger=None,
+        trigger_id=None,
+        retry=None,
+        timeout=None,
+        metadata=None,
+    ):
+        """
+        Creates a job trigger to run DLP actions such as scanning storage for 
sensitive
+        information on a set schedule.
+
+        :param parent: The parent resource name.
+        :type parent: str
+        :param job_trigger: (Optional) The JobTrigger to create.
+        :type job_trigger: dict or google.cloud.dlp_v2.types.JobTrigger
+        :param trigger_id: (Optional) The JobTrigger ID.
+        :type trigger_id: str
+        :param retry: (Optional) A retry object used to retry requests.
+            If None is specified, requests will not be retried.
+        :type retry: google.api_core.retry.Retry
+        :param timeout: (Optional) The amount of time, in seconds, to wait for 
the request
+            to complete. Note that if retry is specified, the timeout applies 
to each
+            individual attempt.
+        :type timeout: float
+        :param metadata: (Optional) Additional metadata that is provided to 
the method.
+        :type metadata: sequence[tuple[str, str]]]
+        :rtype: google.cloud.dlp_v2.types.JobTrigger
+        """
+
+        client = self.get_conn()
+
+        if not parent:
+            raise AirflowException("Please provide the parent resource name.")
+
+        return client.create_job_trigger(
+            parent=parent,
+            job_trigger=job_trigger,
+            trigger_id=trigger_id,
+            retry=retry,
+            timeout=timeout,
+            metadata=metadata,
+        )
+
+    @GoogleCloudBaseHook.catch_http_exception
+    def create_stored_info_type(
+        self,
+        parent,
+        config=None,
+        stored_info_type_id=None,
+        retry=None,
+        timeout=None,
+        metadata=None,
+    ):
+        """
+        Creates a pre-built stored infoType to be used for inspection.
+
+        :param parent: The parent resource name.
+        :type parent: str
+        :param config: (Optional) The config for the StoredInfoType.
+        :type config: dict or google.cloud.dlp_v2.types.StoredInfoTypeConfig
+        :param stored_info_type_id: (Optional) The StoredInfoType ID.
+        :type stored_info_type_id: str
+        :param retry: (Optional) A retry object used to retry requests.
+            If None is specified, requests will not be retried.
+        :type retry: google.api_core.retry.Retry
+        :param timeout: (Optional) The amount of time, in seconds, to wait for 
the request
+            to complete. Note that if retry is specified, the timeout applies 
to each
+            individual attempt.
+        :type timeout: float
+        :param metadata: (Optional) Additional metadata that is provided to 
the method.
+        :type metadata: sequence[tuple[str, str]]]
+        :rtype: google.cloud.dlp_v2.types.StoredInfoType
+        """
+
+        client = self.get_conn()
+
+        if not parent:
+            raise AirflowException("Please provide the parent resource name.")
+
+        return client.create_stored_info_type(
+            parent=parent,
+            config=config,
+            stored_info_type_id=stored_info_type_id,
+            retry=retry,
+            timeout=timeout,
+            metadata=metadata,
+        )
+
+    @GoogleCloudBaseHook.catch_http_exception
+    def deidentify_content(
+        self,
+        parent,
+        deidentify_config=None,
+        inspect_config=None,
+        item=None,
+        inspect_template_name=None,
+        deidentify_template_name=None,
+        retry=None,
+        timeout=None,
+        metadata=None,
+    ):
+        """
+        De-identifies potentially sensitive info from a ContentItem. This 
method has limits
+        on input size and output size.
+
+        :param parent: The parent resource name.
+        :type parent: str
+        :param deidentify_config: (Optional) Configuration for the 
de-identification of the
+            content item. Items specified here will override the template 
referenced by the
+            deidentify_template_name argument.
+        :type deidentify_config: dict or 
google.cloud.dlp_v2.types.DeidentifyConfig
+        :param inspect_config: (Optional) Configuration for the inspector. 
Items specified
+            here will override the template referenced by the 
inspect_template_name argument.
+        :type inspect_config: dict or google.cloud.dlp_v2.types.InspectConfig
+        :param item: (Optional) The item to de-identify. Will be treated as 
text.
+        :type item: dict or google.cloud.dlp_v2.types.ContentItem
+        :param inspect_template_name: (Optional) Optional template to use. Any 
configuration
+            directly specified in inspect_config will override those set in 
the template.
+        :type inspect_template_name: str
+        :param deidentify_template_name: (Optional) Optional template to use. 
Any
+            configuration directly specified in deidentify_config will 
override those set
+            in the template.
+        :type deidentify_template_name: str
+        :param retry: (Optional) A retry object used to retry requests.
+            If None is specified, requests will not be retried.
+        :type retry: google.api_core.retry.Retry
+        :param timeout: (Optional) The amount of time, in seconds, to wait for 
the request
+            to complete. Note that if retry is specified, the timeout applies 
to each
+            individual attempt.
+        :type timeout: float
+        :param metadata: (Optional) Additional metadata that is provided to 
the method.
+        :type metadata: sequence[tuple[str, str]]]
+        :rtype: google.cloud.dlp_v2.types.DeidentifyContentResponse
+        """
+
+        client = self.get_conn()
+
+        if not parent:
+            raise AirflowException("Please provide the parent resource name.")
+
+        return client.deidentify_content(
+            parent=parent,
+            deidentify_config=deidentify_config,
+            inspect_config=inspect_config,
+            item=item,
+            inspect_template_name=inspect_template_name,
+            deidentify_template_name=deidentify_template_name,
+            retry=retry,
+            timeout=timeout,
+            metadata=metadata,
+        )
+
+    @GoogleCloudBaseHook.catch_http_exception
+    def delete_deidentify_template(
+        self, name, retry=None, timeout=None, metadata=None
+    ):
+        """
+        Deletes a DeidentifyTemplate.
+
+        :param name: Resource name of the organization and deidentify template 
to be deleted.
+        :type name: str
+        :param retry: (Optional) A retry object used to retry requests.
+            If None is specified, requests will not be retried.
+        :type retry: google.api_core.retry.Retry
+        :param timeout: (Optional) The amount of time, in seconds, to wait for 
the request
+            to complete. Note that if retry is specified, the timeout applies 
to each
+            individual attempt.
+        :type timeout: float
+        :param metadata: (Optional) Additional metadata that is provided to 
the method.
+        :type metadata: sequence[tuple[str, str]]]
+        """
+
+        client = self.get_conn()
+
+        if not name:
+            raise AirflowException(
+                "Please provide the deidentifyTemplate name to be deleted."
+            )
+
+        client.delete_deidentify_template(
+            name=name, retry=retry, timeout=timeout, metadata=metadata
+        )
+
+    @GoogleCloudBaseHook.catch_http_exception
+    def delete_dlp_job(
+        self, name, retry=None, timeout=None, metadata=None
+    ):
+        """
+        Deletes a long-running DlpJob. This method indicates that the client 
is no longer
+        interested in the DlpJob result. The job will be cancelled if possible.
+
+        :param name: The name of the DlpJob resource to be deleted.
+        :type name: str
+        :param retry: (Optional) A retry object used to retry requests.
+            If None is specified, requests will not be retried.
+        :type retry: google.api_core.retry.Retry
+        :param timeout: (Optional) The amount of time, in seconds, to wait for 
the request
+            to complete. Note that if retry is specified, the timeout applies 
to each
+            individual attempt.
+        :type timeout: float
+        :param metadata: (Optional) Additional metadata that is provided to 
the method.
+        :type metadata: sequence[tuple[str, str]]]
+        """
+
+        client = self.get_conn()
+
+        if not name:
+            raise AirflowException(
+                "Please provide the name of the DlpJob resource to be deleted."
+            )
+
+        client.delete_dlp_job(
+            name=name, retry=retry, timeout=timeout, metadata=metadata
+        )
+
+    @GoogleCloudBaseHook.catch_http_exception
+    def delete_inspect_template(self, name, retry=None, timeout=None, 
metadata=None):
+        """
+        Deletes an InspectTemplate.
+
+        :param name: Resource name of the organization and inspectTemplate to 
be deleted.
+        :type name: str
+        :param retry: (Optional) A retry object used to retry requests.
+            If None is specified, requests will not be retried.
+        :type retry: google.api_core.retry.Retry
+        :param timeout: (Optional) The amount of time, in seconds, to wait for 
the request
+            to complete. Note that if retry is specified, the timeout applies 
to each
+            individual attempt.
+        :type timeout: float
+        :param metadata: (Optional) Additional metadata that is provided to 
the method.
+        :type metadata: sequence[tuple[str, str]]]
+        """
+
+        client = self.get_conn()
+
+        if not name:
+            raise AirflowException(
+                "Please provide the inspectTemplate name to be deleted."
+            )
+
+        client.delete_inspect_template(
+            name=name, retry=retry, timeout=timeout, metadata=metadata
+        )
+
+    @GoogleCloudBaseHook.catch_http_exception
+    def delete_job_trigger(self, name, retry=None, timeout=None, 
metadata=None):
+        """
+        Deletes a job trigger.
+
+        :param name: Resource name of the project and the triggeredJob.
+        :type name: str
+        :param retry: (Optional) A retry object used to retry requests.
+            If None is specified, requests will not be retried.
+        :type retry: google.api_core.retry.Retry
+        :param timeout: (Optional) The amount of time, in seconds, to wait for 
the request
+            to complete. Note that if retry is specified, the timeout applies 
to each
+            individual attempt.
+        :type timeout: float
+        :param metadata: (Optional) Additional metadata that is provided to 
the method.
+        :type metadata: sequence[tuple[str, str]]]
+        """
+
+        client = self.get_conn()
+
+        if not name:
+            raise AirflowException(
+                "Please provide the triggeredJob name to be deleted."
+            )
+
+        client.delete_job_trigger(
+            name=name, retry=retry, timeout=timeout, metadata=metadata
+        )
+
+    @GoogleCloudBaseHook.catch_http_exception
+    def delete_stored_info_type(self, name, retry=None, timeout=None, 
metadata=None):
+        """
+        Deletes a stored infoType.
+
+        :param name: Resource name of the organization and storedInfoType to 
be deleted.
+        :type name: str
+        :param retry: (Optional) A retry object used to retry requests.
+            If None is specified, requests will not be retried.
+        :type retry: google.api_core.retry.Retry
+        :param timeout: (Optional) The amount of time, in seconds, to wait for 
the request
+            to complete. Note that if retry is specified, the timeout applies 
to each
+            individual attempt.
+        :type timeout: float
+        :param metadata: (Optional) Additional metadata that is provided to 
the method.
+        :type metadata: sequence[tuple[str, str]]]
+        """
+
+        client = self.get_conn()
+
+        if not name:
+            raise AirflowException(
+                "Please provide the storedInfoType name to be deleted."
+            )
+
+        client.delete_stored_info_type(
+            name=name, retry=retry, timeout=timeout, metadata=metadata
+        )
+
+    @GoogleCloudBaseHook.catch_http_exception
+    def get_deidentify_template(self, name, retry=None, timeout=None, 
metadata=None):
+        """
+        Gets a DeidentifyTemplate.
+
+        :param name: Resource name of the organization and deidentify template 
to be read.
+        :type name: str
+        :param retry: (Optional) A retry object used to retry requests.
+            If None is specified, requests will not be retried.
+        :type retry: google.api_core.retry.Retry
+        :param timeout: (Optional) The amount of time, in seconds, to wait for 
the request
+            to complete. Note that if retry is specified, the timeout applies 
to each
+            individual attempt.
+        :type timeout: float
+        :param metadata: (Optional) Additional metadata that is provided to 
the method.
+        :type metadata: sequence[tuple[str, str]]]
+        :rtype: google.cloud.dlp_v2.types.DeidentifyTemplate
+        """
+
+        client = self.get_conn()
+
+        if not name:
+            raise AirflowException("Please provide the deidentifyTemplate 
name.")
+
+        return client.get_deidentify_template(
+            name=name, retry=retry, timeout=timeout, metadata=metadata
+        )
+
+    @GoogleCloudBaseHook.catch_http_exception
+    def get_dlp_job(self, name, retry=None, timeout=None, metadata=None):
+        """
+        Gets the latest state of a long-running DlpJob.
+
+        :param name: The name of the DlpJob resource.
+        :type name: str
+        :param retry: (Optional) A retry object used to retry requests.
+            If None is specified, requests will not be retried.
+        :type retry: google.api_core.retry.Retry
+        :param timeout: (Optional) The amount of time, in seconds, to wait for 
the request
+            to complete. Note that if retry is specified, the timeout applies 
to each
+            individual attempt.
+        :type timeout: float
+        :param metadata: (Optional) Additional metadata that is provided to 
the method.
+        :type metadata: sequence[tuple[str, str]]]
+        :rtype: google.cloud.dlp_v2.types.DlpJob
+        """
+
+        client = self.get_conn()
+
+        if not name:
+            raise AirflowException("Please provide the name of the DlpJob 
resource.")
+
+        return client.get_dlp_job(
+            name=name, retry=retry, timeout=timeout, metadata=metadata
+        )
+
+    @GoogleCloudBaseHook.catch_http_exception
+    def get_inspect_template(self, name, retry=None, timeout=None, 
metadata=None):
+        """
+        Gets an InspectTemplate.
+
+        :param name: Resource name of the organization and inspectTemplate to 
be read.
+        :type name: str
+        :param retry: (Optional) A retry object used to retry requests.
+            If None is specified, requests will not be retried.
+        :type retry: google.api_core.retry.Retry
+        :param timeout: (Optional) The amount of time, in seconds, to wait for 
the request
+            to complete. Note that if retry is specified, the timeout applies 
to each
+            individual attempt.
+        :type timeout: float
+        :param metadata: (Optional) Additional metadata that is provided to 
the method.
+        :type metadata: sequence[tuple[str, str]]]
+        :rtype: google.cloud.dlp_v2.types.InspectTemplate
+        """
+
+        client = self.get_conn()
+
+        if not name:
+            raise AirflowException("Please provide the inspectTemplate name.")
+
+        return client.get_inspect_template(
+            name=name, retry=retry, timeout=timeout, metadata=metadata
+        )
+
+    @GoogleCloudBaseHook.catch_http_exception
+    def get_job_trigger(self, name, retry=None, timeout=None, metadata=None):
+        """
+        Gets a job trigger.
+
+        :param name: Resource name of the project and the triggeredJob.
+        :type name: str
+        :param retry: (Optional) A retry object used to retry requests.
+            If None is specified, requests will not be retried.
+        :type retry: google.api_core.retry.Retry
+        :param timeout: (Optional) The amount of time, in seconds, to wait for 
the request
+            to complete. Note that if retry is specified, the timeout applies 
to each
+            individual attempt.
+        :type timeout: float
+        :param metadata: (Optional) Additional metadata that is provided to 
the method.
+        :type metadata: sequence[tuple[str, str]]]
+        :rtype: google.cloud.dlp_v2.types.JobTrigger
+        """
+
+        client = self.get_conn()
+
+        if not name:
+            raise AirflowException("Please provide the triggeredJob name.")
+
+        return client.get_job_trigger(
+            name=name, retry=retry, timeout=timeout, metadata=metadata
+        )
+
+    @GoogleCloudBaseHook.catch_http_exception
+    def get_stored_info_type(self, name, retry=None, timeout=None, 
metadata=None):
+        """
+        Gets a stored infoType.
+
+        :param name: Resource name of the organization and storedInfoType to 
be read.
+        :type name: str
+        :param retry: (Optional) A retry object used to retry requests.
+            If None is specified, requests will not be retried.
+        :type retry: google.api_core.retry.Retry
+        :param timeout: (Optional) The amount of time, in seconds, to wait for 
the request
+            to complete. Note that if retry is specified, the timeout applies 
to each
+            individual attempt.
+        :type timeout: float
+        :param metadata: (Optional) Additional metadata that is provided to 
the method.
+        :type metadata: sequence[tuple[str, str]]]
+        :rtype: google.cloud.dlp_v2.types.StoredInfoType
+        """
+
+        client = self.get_conn()
+
+        if not name:
+            raise AirflowException("Please provide the storedInfoType name.")
+
+        return client.get_stored_info_type(
+            name=name, retry=retry, timeout=timeout, metadata=metadata
+        )
+
+    @GoogleCloudBaseHook.catch_http_exception
+    def inspect_content(
+        self,
+        parent,
+        inspect_config=None,
+        item=None,
+        inspect_template_name=None,
+        retry=None,
+        timeout=None,
+        metadata=None,
+    ):
+        """
+        Finds potentially sensitive info in content. This method has limits on 
input size,
+        processing time, and output size.
+
+        :param parent: The parent resource name.
+        :type parent: str
+        :param inspect_config: (Optional) Configuration for the inspector. 
Items specified
+            here will override the template referenced by the 
inspect_template_name argument.
+        :type inspect_config: dict or google.cloud.dlp_v2.types.InspectConfig
+        :param item: (Optional) The item to de-identify. Will be treated as 
text.
+        :type item: dict or google.cloud.dlp_v2.types.ContentItem
+        :param inspect_template_name: (Optional) Optional template to use. Any 
configuration
+            directly specified in inspect_config will override those set in 
the template.
+        :type inspect_template_name: str
+        :param retry: (Optional) A retry object used to retry requests.
+            If None is specified, requests will not be retried.
+        :type retry: google.api_core.retry.Retry
+        :param timeout: (Optional) The amount of time, in seconds, to wait for 
the request
+            to complete. Note that if retry is specified, the timeout applies 
to each
+            individual attempt.
+        :type timeout: float
+        :param metadata: (Optional) Additional metadata that is provided to 
the method.
+        :type metadata: sequence[tuple[str, str]]]
+        :rtype: google.cloud.dlp_v2.types.InspectContentResponse
+        """
+
+        client = self.get_conn()
+
+        if not parent:
+            raise AirflowException("Please provide the parent resource name.")
+
+        return client.inspect_content(
+            parent=parent,
+            inspect_config=inspect_config,
+            item=item,
+            inspect_template_name=inspect_template_name,
+            retry=retry,
+            timeout=timeout,
+            metadata=metadata,
+        )
+
+    @GoogleCloudBaseHook.catch_http_exception
+    def list_deidentify_templates(
+        self,
+        parent,
+        page_size=None,
+        order_by=None,
+        retry=None,
+        timeout=None,
+        metadata=None,
+    ):
+        """
+        Lists DeidentifyTemplates.
+
+        :param parent: The parent resource name.
+        :type parent: str
+        :param page_size: (Optional) The maximum number of resources contained 
in the
+            underlying API response.
+        :type page_size: int
+        :param order_by: (Optional) Optional comma separated list of fields to 
order by,
+            followed by asc or desc postfix.
+        :type order_by: str
+        :param retry: (Optional) A retry object used to retry requests.
+            If None is specified, requests will not be retried.
+        :type retry: google.api_core.retry.Retry
+        :param timeout: (Optional) The amount of time, in seconds, to wait for 
the request
+            to complete. Note that if retry is specified, the timeout applies 
to each
+            individual attempt.
+        :type timeout: float
+        :param metadata: (Optional) Additional metadata that is provided to 
the method.
+        :type metadata: sequence[tuple[str, str]]]
+        :rtype: list[google.cloud.dlp_v2.types.DeidentifyTemplate]
+        """
+
+        client = self.get_conn()
+
+        if not parent:
+            raise AirflowException("Please provide the parent resource name.")
+
+        results = client.list_deidentify_templates(
+            parent=parent,
+            page_size=page_size,
+            order_by=order_by,
+            retry=retry,
+            timeout=timeout,
+            metadata=metadata,
+        )
+
+        return list(results)
+
+    @GoogleCloudBaseHook.catch_http_exception
+    def list_dlp_jobs(
+        self,
+        parent,
+        results_filter=None,
+        page_size=None,
+        job_type=None,
+        order_by=None,
+        retry=None,
+        timeout=None,
+        metadata=None,
+    ):
+        """
+        Lists DlpJobs that match the specified filter in the request.
+
+        :param parent: The parent resource name.
+        :type parent: str
+        :param results_filter: (Optional) Filter used to specify a subset of 
results.
+        :type results_filter: str
+        :param page_size: (Optional) The maximum number of resources contained 
in the
+            underlying API response.
+        :type page_size: int
+        :param job_type: (Optional) The type of job.
+        :type job_type: str
+        :param order_by: (Optional) Optional comma separated list of fields to 
order by,
+            followed by asc or desc postfix.
+        :type order_by: str
+        :param retry: (Optional) A retry object used to retry requests.
+            If None is specified, requests will not be retried.
+        :type retry: google.api_core.retry.Retry
+        :param timeout: (Optional) The amount of time, in seconds, to wait for 
the request
+            to complete. Note that if retry is specified, the timeout applies 
to each
+            individual attempt.
+        :type timeout: float
+        :param metadata: (Optional) Additional metadata that is provided to 
the method.
+        :type metadata: sequence[tuple[str, str]]]
+        :rtype: list[google.cloud.dlp_v2.types.DlpJob]
+        """
+
+        client = self.get_conn()
+
+        if not parent:
+            raise AirflowException("Please provide the parent resource name.")
+
+        results = client.list_dlp_jobs(
+            parent=parent,
+            filter_=results_filter,
+            page_size=page_size,
+            type_=job_type,
+            order_by=order_by,
+            retry=retry,
+            timeout=timeout,
+            metadata=metadata,
+        )
+        return list(results)
+
+    @GoogleCloudBaseHook.catch_http_exception
+    def list_info_types(
+        self,
+        language_code=None,
+        results_filter=None,
+        retry=None,
+        timeout=None,
+        metadata=None,
+    ):
+        """
+        Returns a list of the sensitive information types that the DLP API 
supports.
+
+        :param language_code: (Optional) Optional BCP-47 language code for 
localized infoType
+            friendly names. If omitted, or if localized strings are not 
available, en-US
+            strings will be returned.
+        :type language_code: str
+        :param results_filter: (Optional) Filter used to specify a subset of 
results.
+        :type results_filter: str
+        :param retry: (Optional) A retry object used to retry requests.
+            If None is specified, requests will not be retried.
+        :type retry: google.api_core.retry.Retry
+        :param timeout: (Optional) The amount of time, in seconds, to wait for 
the request
+            to complete. Note that if retry is specified, the timeout applies 
to each
+            individual attempt.
+        :type timeout: float
+        :param metadata: (Optional) Additional metadata that is provided to 
the method.
+        :type metadata: sequence[tuple[str, str]]]
+        :rtype: google.cloud.dlp_v2.types.ListInfoTypesResponse
+        """
+
+        client = self.get_conn()
+
+        return client.list_info_types(
+            language_code=language_code,
+            filter_=results_filter,
+            retry=retry,
+            timeout=timeout,
+            metadata=metadata,
+        )
+
+    @GoogleCloudBaseHook.catch_http_exception
+    def list_inspect_templates(
+        self,
+        parent,
+        page_size=None,
+        order_by=None,
+        retry=None,
+        timeout=None,
+        metadata=None,
+    ):
+        """
+        Lists InspectTemplates.
+
+        :param parent: The parent resource name.
+        :type parent: str
+        :param page_size: (Optional) The maximum number of resources contained 
in the
+            underlying API response.
+        :type page_size: int
+        :param order_by: (Optional) Optional comma separated list of fields to 
order by,
+            followed by asc or desc postfix.
+        :type order_by: str
+        :param retry: (Optional) A retry object used to retry requests.
+            If None is specified, requests will not be retried.
+        :type retry: google.api_core.retry.Retry
+        :param timeout: (Optional) The amount of time, in seconds, to wait for 
the request
+            to complete. Note that if retry is specified, the timeout applies 
to each
+            individual attempt.
+        :type timeout: float
+        :param metadata: (Optional) Additional metadata that is provided to 
the method.
+        :type metadata: sequence[tuple[str, str]]]
+        :rtype: list[google.cloud.dlp_v2.types.InspectTemplate]
+        """
+
+        client = self.get_conn()
+
+        if not parent:
+            raise AirflowException("Please provide the parent resource name.")
+
+        results = client.list_inspect_templates(
+            parent=parent,
+            page_size=page_size,
+            order_by=order_by,
+            retry=retry,
+            timeout=timeout,
+            metadata=metadata,
+        )
+        return list(results)
+
+    @GoogleCloudBaseHook.catch_http_exception
+    def list_job_triggers(
+        self,
+        parent,
+        page_size=None,
+        order_by=None,
+        results_filter=None,
+        retry=None,
+        timeout=None,
+        metadata=None,
+    ):
+        """
+        Lists job triggers.
+
+        :param parent: The parent resource name.
+        :type parent: str
+        :param page_size: (Optional) The maximum number of resources contained 
in the
+            underlying API response.
+        :type page_size: int
+        :param order_by: (Optional) Optional comma separated list of fields to 
order by,
+            followed by asc or desc postfix.
+        :type order_by: str
+        :param results_filter: (Optional) Filter used to specify a subset of 
results.
+        :type results_filter: str
+        :param retry: (Optional) A retry object used to retry requests.
+            If None is specified, requests will not be retried.
+        :type retry: google.api_core.retry.Retry
+        :param timeout: (Optional) The amount of time, in seconds, to wait for 
the request
+            to complete. Note that if retry is specified, the timeout applies 
to each
+            individual attempt.
+        :type timeout: float
+        :param metadata: (Optional) Additional metadata that is provided to 
the method.
+        :type metadata: sequence[tuple[str, str]]]
+        :rtype: list[google.cloud.dlp_v2.types.JobTrigger]
+        """
+
+        client = self.get_conn()
+
+        if not parent:
+            raise AirflowException("Please provide the parent resource name.")
+
+        results = client.list_job_triggers(
+            parent=parent,
+            page_size=page_size,
+            order_by=order_by,
+            filter_=results_filter,
+            retry=retry,
+            timeout=timeout,
+            metadata=metadata,
+        )
+        return list(results)
+
+    def list_stored_info_types(
+        self,
+        parent,
+        page_size=None,
+        order_by=None,
+        retry=None,
+        timeout=None,
+        metadata=None,
+    ):
+        """
+        Lists stored infoTypes.
+
+        :param parent: The parent resource name.
+        :type parent: str
+        :param page_size: (Optional) The maximum number of resources contained 
in the
+            underlying API response.
+        :type page_size: int
+        :param order_by: (Optional) Optional comma separated list of fields to 
order by,
+            followed by asc or desc postfix.
+        :type order_by: str
+        :param retry: (Optional) A retry object used to retry requests.
+            If None is specified, requests will not be retried.
+        :type retry: google.api_core.retry.Retry
+        :param timeout: (Optional) The amount of time, in seconds, to wait for 
the request
+            to complete. Note that if retry is specified, the timeout applies 
to each
+            individual attempt.
+        :type timeout: float
+        :param metadata: (Optional) Additional metadata that is provided to 
the method.
+        :type metadata: sequence[tuple[str, str]]]
+        :rtype: list[google.cloud.dlp_v2.types.StoredInfoType]
+        """
+
+        client = self.get_conn()
+
+        if not parent:
+            raise AirflowException("Please provide the parent resource name.")
+
+        results = client.list_stored_info_types(
+            parent=parent,
+            page_size=page_size,
+            order_by=order_by,
+            retry=retry,
+            timeout=timeout,
+            metadata=metadata,
+        )
+        return list(results)
+
+    @GoogleCloudBaseHook.catch_http_exception
+    def redact_image(
+        self,
+        parent,
+        inspect_config=None,
+        image_redaction_configs=None,
+        include_findings=None,
+        byte_item=None,
+        retry=None,
+        timeout=None,
+        metadata=None,
+    ):
+        """
+        Redacts potentially sensitive info from an image. This method has 
limits on
+        input size, processing time, and output size.
+
+        :param parent: The parent resource name.
+        :type parent: str
 
 Review comment:
   It is worth adding the possibility of defining the project ID using the 
connection configuration.

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


With regards,
Apache Git Services

Reply via email to