This is an automated email from the ASF dual-hosted git repository.

eladkal pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/airflow.git


The following commit(s) were added to refs/heads/main by this push:
     new b7a0983b66 docs for `DataprocSubmitJobOperator` (Presto job) (#32798)
b7a0983b66 is described below

commit b7a0983b668ba2d5c817b812daec68943a3d2bc2
Author: max <[email protected]>
AuthorDate: Wed Jul 26 08:51:01 2023 +0200

    docs for `DataprocSubmitJobOperator` (Presto job) (#32798)
---
 .../operators/cloud/dataproc.rst                   |  20 ++++
 .../cloud/dataproc/example_dataproc_presto.py      | 119 +++++++++++++++++++++
 2 files changed, 139 insertions(+)

diff --git a/docs/apache-airflow-providers-google/operators/cloud/dataproc.rst 
b/docs/apache-airflow-providers-google/operators/cloud/dataproc.rst
index 8144e0bfe4..d13227c135 100644
--- a/docs/apache-airflow-providers-google/operators/cloud/dataproc.rst
+++ b/docs/apache-airflow-providers-google/operators/cloud/dataproc.rst
@@ -75,6 +75,18 @@ With this configuration we can create the cluster:
     :start-after: [START how_to_cloud_dataproc_create_cluster_operator_in_gke]
     :end-before: [END how_to_cloud_dataproc_create_cluster_operator_in_gke]
 
+You can also create Dataproc cluster with optional component Presto.
+To do so, please use the following configuration.
+Note that default image might not support the chosen optional component.
+If this is your case, please specify correct ``image_version`` that you can 
find in the
+`documentation.  
<https://cloud.google.com/dataproc/docs/concepts/components/overview#available_optional_components>`__
+
+.. exampleinclude:: 
/../../tests/system/providers/google/cloud/dataproc/example_dataproc_presto.py
+    :language: python
+    :dedent: 0
+    :start-after: [START how_to_cloud_dataproc_create_cluster]
+    :end-before: [END how_to_cloud_dataproc_create_cluster]
+
 You can use deferrable mode for this action in order to run the operator 
asynchronously:
 
 .. exampleinclude:: 
/../../tests/system/providers/google/cloud/dataproc/example_dataproc_cluster_deferrable.py
@@ -239,6 +251,14 @@ Example of the configuration for a SparkR:
     :start-after: [START how_to_cloud_dataproc_sparkr_config]
     :end-before: [END how_to_cloud_dataproc_sparkr_config]
 
+Example of the configuration for a Presto Job:
+
+.. exampleinclude:: 
/../../tests/system/providers/google/cloud/dataproc/example_dataproc_presto.py
+    :language: python
+    :dedent: 0
+    :start-after: [START how_to_cloud_dataproc_presto_config]
+    :end-before: [END how_to_cloud_dataproc_presto_config]
+
 Working with workflows templates
 --------------------------------
 
diff --git 
a/tests/system/providers/google/cloud/dataproc/example_dataproc_presto.py 
b/tests/system/providers/google/cloud/dataproc/example_dataproc_presto.py
new file mode 100644
index 0000000000..48e3d46614
--- /dev/null
+++ b/tests/system/providers/google/cloud/dataproc/example_dataproc_presto.py
@@ -0,0 +1,119 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+"""
+Example Airflow DAG for DataprocSubmitJobOperator with presto job.
+"""
+from __future__ import annotations
+
+import os
+from datetime import datetime
+
+from airflow import models
+from airflow.providers.google.cloud.operators.dataproc import (
+    DataprocCreateClusterOperator,
+    DataprocDeleteClusterOperator,
+    DataprocSubmitJobOperator,
+)
+from airflow.utils.trigger_rule import TriggerRule
+
+ENV_ID = os.environ.get("SYSTEM_TESTS_ENV_ID")
+DAG_ID = "dataproc_presto"
+PROJECT_ID = os.environ.get("SYSTEM_TESTS_GCP_PROJECT")
+
+CLUSTER_NAME = f"cluster-{ENV_ID}-{DAG_ID}".replace("_", "-")
+REGION = "europe-west1"
+
+# Cluster definition
+# [START how_to_cloud_dataproc_create_cluster]
+CLUSTER_CONFIG = {
+    "master_config": {
+        "num_instances": 1,
+        "machine_type_uri": "n1-standard-4",
+        "disk_config": {"boot_disk_type": "pd-standard", "boot_disk_size_gb": 
1024},
+    },
+    "worker_config": {
+        "num_instances": 2,
+        "machine_type_uri": "n1-standard-4",
+        "disk_config": {"boot_disk_type": "pd-standard", "boot_disk_size_gb": 
1024},
+    },
+    "software_config": {
+        "optional_components": [
+            "PRESTO",
+        ],
+        "image_version": "2.0",
+    },
+}
+# [END how_to_cloud_dataproc_create_cluster]
+
+# Jobs definitions
+# [START how_to_cloud_dataproc_presto_config]
+PRESTO_JOB = {
+    "reference": {"project_id": PROJECT_ID},
+    "placement": {"cluster_name": CLUSTER_NAME},
+    "presto_job": {"query_list": {"queries": ["SHOW CATALOGS"]}},
+}
+# [END how_to_cloud_dataproc_presto_config]
+
+
+with models.DAG(
+    DAG_ID,
+    schedule="@once",
+    start_date=datetime(2021, 1, 1),
+    catchup=False,
+    tags=["example", "dataproc", "presto"],
+) as dag:
+    create_cluster = DataprocCreateClusterOperator(
+        task_id="create_cluster",
+        project_id=PROJECT_ID,
+        cluster_config=CLUSTER_CONFIG,
+        region=REGION,
+        cluster_name=CLUSTER_NAME,
+    )
+
+    presto_task = DataprocSubmitJobOperator(
+        task_id="presto_task", job=PRESTO_JOB, region=REGION, 
project_id=PROJECT_ID
+    )
+
+    delete_cluster = DataprocDeleteClusterOperator(
+        task_id="delete_cluster",
+        project_id=PROJECT_ID,
+        cluster_name=CLUSTER_NAME,
+        region=REGION,
+        trigger_rule=TriggerRule.ALL_DONE,
+    )
+
+    (
+        # TEST SETUP
+        create_cluster
+        # TEST BODY
+        >> presto_task
+        # TEST TEARDOWN
+        >> delete_cluster
+    )
+
+    from tests.system.utils.watcher import watcher
+
+    # This test needs watcher in order to properly mark success/failure
+    # when "teardown" task with trigger rule is part of the DAG
+    list(dag.tasks) >> watcher()
+
+
+from tests.system.utils import get_test_run  # noqa: E402
+
+# Needed to run the example DAG with pytest (see: 
tests/system/README.md#run_via_pytest)
+test_run = get_test_run(dag)

Reply via email to