This is an automated email from the ASF dual-hosted git repository.

potiuk pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/airflow.git


The following commit(s) were added to refs/heads/main by this push:
     new 394820b201d Validate Dataproc batch labels max 63 characters (#55196)
394820b201d is described below

commit 394820b201de080d03d3e2a11fa8d77a10fc82d4
Author: Chris Nauroth <[email protected]>
AuthorDate: Wed Sep 3 18:07:02 2025 -0700

    Validate Dataproc batch labels max 63 characters (#55196)
    
    As per
    [Dataproc 
documentation](https://cloud.google.com/dataproc/docs/guides/creating-managing-labels)
    the maximum length for a label value is 63.
    `DataprocCreateBatchOperator` performs validation to prevent setting
    longer labels. However, the regex currently allows 64 characters, which
    causes a failure when trying to create the batch. Update the regex and
    unit tests to enforce a max of 63.
---
 .../providers/google/cloud/operators/dataproc.py   |  2 +-
 .../unit/google/cloud/operators/test_dataproc.py   | 23 +++++++++++++++++++++-
 2 files changed, 23 insertions(+), 2 deletions(-)

diff --git 
a/providers/google/src/airflow/providers/google/cloud/operators/dataproc.py 
b/providers/google/src/airflow/providers/google/cloud/operators/dataproc.py
index 72a5fda7ed8..3dcf64dbeee 100644
--- a/providers/google/src/airflow/providers/google/cloud/operators/dataproc.py
+++ b/providers/google/src/airflow/providers/google/cloud/operators/dataproc.py
@@ -2573,7 +2573,7 @@ class 
DataprocCreateBatchOperator(GoogleCloudBaseOperator):
         dag_id = re.sub(r"[.\s]", "_", self.dag_id.lower())
         task_id = re.sub(r"[.\s]", "_", self.task_id.lower())
 
-        labels_regex = re.compile(r"^[a-z][\w-]{0,63}$")
+        labels_regex = re.compile(r"^[a-z][\w-]{0,62}$")
         if not labels_regex.match(dag_id) or not labels_regex.match(task_id):
             return
 
diff --git 
a/providers/google/tests/unit/google/cloud/operators/test_dataproc.py 
b/providers/google/tests/unit/google/cloud/operators/test_dataproc.py
index b631716e6fa..d71be1318c4 100644
--- a/providers/google/tests/unit/google/cloud/operators/test_dataproc.py
+++ b/providers/google/tests/unit/google/cloud/operators/test_dataproc.py
@@ -3874,6 +3874,27 @@ class TestDataprocCreateBatchOperator:
 
         TestDataprocCreateBatchOperator.__assert_batch_create(mock_hook, 
expected_batch)
 
+    @mock.patch(DATAPROC_PATH.format("Batch.to_dict"))
+    @mock.patch(DATAPROC_PATH.format("DataprocHook"))
+    def test_create_batch_asdict_taskid_max_length_labels_updated(self, 
mock_hook, to_dict_mock):
+        long_task_id = "a" * 63
+        expected_batch = {
+            **BATCH,
+            "labels": {
+                "airflow-dag-id": TEST_DAG_ID,
+                "airflow-dag-display-name": TEST_DAG_ID,
+                "airflow-task-id": long_task_id,
+            },
+        }
+        DataprocCreateBatchOperator(
+            task_id=long_task_id,
+            dag=DAG(dag_id=TEST_DAG_ID),
+            batch=BATCH,
+            region=GCP_REGION,
+        ).execute(context=EXAMPLE_CONTEXT)
+
+        TestDataprocCreateBatchOperator.__assert_batch_create(mock_hook, 
expected_batch)
+
     @mock.patch(DATAPROC_PATH.format("Batch.to_dict"))
     @mock.patch(DATAPROC_PATH.format("DataprocHook"))
     def test_create_batch_invalid_taskid_labels_ignored(self, mock_hook, 
to_dict_mock):
@@ -3890,7 +3911,7 @@ class TestDataprocCreateBatchOperator:
     @mock.patch(DATAPROC_PATH.format("DataprocHook"))
     def test_create_batch_long_taskid_labels_ignored(self, mock_hook, 
to_dict_mock):
         DataprocCreateBatchOperator(
-            task_id="a" * 65,
+            task_id="a" * 64,
             dag=DAG(dag_id=TEST_DAG_ID),
             batch=BATCH,
             region=GCP_REGION,

Reply via email to