SameerMesiah97 commented on code in PR #61878:
URL: https://github.com/apache/airflow/pull/61878#discussion_r2810118932


##########
airflow-core/src/airflow/serialization/helpers.py:
##########
@@ -31,6 +31,94 @@
     from airflow.timetables.base import Timetable as CoreTimetable
 
 
+def _truncate_rendered_value(rendered: str, max_length: int) -> str:
+    if max_length <= 0:
+        return ""
+
+    prefix = "Truncated. You can change this behaviour in 
[core]max_templated_field_length. "
+    suffix = "..."
+    value = str(rendered)
+
+    # Always prioritize showing the truncation message first
+    trunc_only = f"{prefix}{suffix}"
+    trunc_only_len = len(trunc_only)
+
+    # If max_length is too small to even show the message, return it anyway
+    # (message takes priority over the constraint)
+    if max_length < trunc_only_len:
+        return trunc_only
+
+    # Check if value already has outer quotes - if so, preserve them and don't 
add extra quotes
+    has_outer_quotes = (value.startswith('"') and value.endswith('"')) or (
+        value.startswith("'") and value.endswith("'")
+    )
+
+    if has_outer_quotes:
+        # Value already has quotes - preserve the opening quote, truncate 
inner content
+        quote_char = value[0]
+        inner_value = value[1:-1]  # Strip outer quotes
+        # Calculate overhead: prefix + opening quote + suffix (no closing 
quote)
+        overhead = len(prefix) + 1 + len(suffix)
+        available = max_length - overhead
+
+        MIN_CONTENT_LENGTH = 7
+        if available < MIN_CONTENT_LENGTH:
+            return trunc_only
+
+        # Get content and trim trailing spaces
+        content = inner_value[:available].rstrip()
+
+        # Build result with opening quote, content, and suffix (no closing 
quote)
+        result = f"{prefix}{quote_char}{content}{suffix}"
+
+        # Ensure result < max_length, with a buffer when possible
+        # For values with outer quotes, use a larger buffer (3) since we don't 
add closing quotes
+        target_length = max_length - 3
+        while len(result) > target_length and len(content) > 0:
+            content = content[:-1].rstrip()
+            result = f"{prefix}{quote_char}{content}{suffix}"

Review Comment:
   Why not add trailing quotes here but then add both leading and trailing 
quotes further down? I think we should include leading and trailing quotes in 
both scenarios to make it simpler. 



##########
airflow-core/src/airflow/serialization/helpers.py:
##########
@@ -31,6 +31,94 @@
     from airflow.timetables.base import Timetable as CoreTimetable
 
 
+def _truncate_rendered_value(rendered: str, max_length: int) -> str:
+    if max_length <= 0:
+        return ""
+
+    prefix = "Truncated. You can change this behaviour in 
[core]max_templated_field_length. "
+    suffix = "..."
+    value = str(rendered)
+
+    # Always prioritize showing the truncation message first
+    trunc_only = f"{prefix}{suffix}"
+    trunc_only_len = len(trunc_only)
+
+    # If max_length is too small to even show the message, return it anyway
+    # (message takes priority over the constraint)
+    if max_length < trunc_only_len:
+        return trunc_only
+
+    # Check if value already has outer quotes - if so, preserve them and don't 
add extra quotes
+    has_outer_quotes = (value.startswith('"') and value.endswith('"')) or (
+        value.startswith("'") and value.endswith("'")
+    )
+
+    if has_outer_quotes:
+        # Value already has quotes - preserve the opening quote, truncate 
inner content
+        quote_char = value[0]
+        inner_value = value[1:-1]  # Strip outer quotes
+        # Calculate overhead: prefix + opening quote + suffix (no closing 
quote)
+        overhead = len(prefix) + 1 + len(suffix)
+        available = max_length - overhead
+
+        MIN_CONTENT_LENGTH = 7
+        if available < MIN_CONTENT_LENGTH:
+            return trunc_only
+
+        # Get content and trim trailing spaces
+        content = inner_value[:available].rstrip()
+
+        # Build result with opening quote, content, and suffix (no closing 
quote)
+        result = f"{prefix}{quote_char}{content}{suffix}"
+
+        # Ensure result < max_length, with a buffer when possible
+        # For values with outer quotes, use a larger buffer (3) since we don't 
add closing quotes
+        target_length = max_length - 3
+        while len(result) > target_length and len(content) > 0:
+            content = content[:-1].rstrip()
+            result = f"{prefix}{quote_char}{content}{suffix}"
+
+        return result
+    # Value doesn't have outer quotes - add quotes around content
+    # Choose quote character: use double quotes if value contains single 
quotes,
+    # otherwise use single quotes
+    if "'" in value and '"' not in value:
+        quote_char = '"'
+    else:
+        quote_char = "'"
+
+    # Calculate overhead: prefix + quotes around content + suffix
+    # Format: prefix + quote_char + content + quote_char + suffix
+    overhead = len(prefix) + 2 + len(suffix)  # 2 for the quotes around content
+    available = max_length - overhead
+
+    # Only show content if there's meaningful space for it
+    # Require at least enough space to show a few characters of content 
meaningfully
+    # This prevents showing just 1-2 characters which isn't very useful
+    MIN_CONTENT_LENGTH = 7
+    if available < MIN_CONTENT_LENGTH:
+        return trunc_only
+
+    # Get content and trim trailing spaces
+    content = value[:available].rstrip()
+
+    # Build the result and ensure it doesn't exceed max_length
+    result = f"{prefix}{quote_char}{content}{quote_char}{suffix}"
+
+    # Trim content to ensure result < max_length, with a small buffer when 
possible
+    # Trim until result is at least 1 char under max_length to leave a buffer
+    target_length = max_length - 1
+    while len(result) > target_length and len(content) > 0:
+        content = content[:-1].rstrip()
+        result = f"{prefix}{quote_char}{content}{quote_char}{suffix}"
+
+    return result
+
+
+def _safe_truncate_rendered_value(rendered: Any, max_length: int) -> str:
+    return _truncate_rendered_value(str(rendered), max_length)

Review Comment:
   What purpose does `_safe_truncate_rendered_value` serve here? It looks like 
you are just calling `_truncate_rendered_value`, whilst casting `rendered` as 
`str`. But are you not handling that type conversion in 
`_truncate_rendered_value`? Why would `rendered` be anything other than `str`?



##########
airflow-core/src/airflow/serialization/helpers.py:
##########
@@ -31,6 +31,94 @@
     from airflow.timetables.base import Timetable as CoreTimetable
 
 
+def _truncate_rendered_value(rendered: str, max_length: int) -> str:
+    if max_length <= 0:
+        return ""
+
+    prefix = "Truncated. You can change this behaviour in 
[core]max_templated_field_length. "
+    suffix = "..."
+    value = str(rendered)

Review Comment:
   Why are you casting this to `str` here if `rendered` is of type `str`?



##########
airflow-core/src/airflow/serialization/helpers.py:
##########
@@ -31,6 +31,94 @@
     from airflow.timetables.base import Timetable as CoreTimetable
 
 
+def _truncate_rendered_value(rendered: str, max_length: int) -> str:
+    if max_length <= 0:
+        return ""
+
+    prefix = "Truncated. You can change this behaviour in 
[core]max_templated_field_length. "
+    suffix = "..."
+    value = str(rendered)
+
+    # Always prioritize showing the truncation message first
+    trunc_only = f"{prefix}{suffix}"
+    trunc_only_len = len(trunc_only)
+
+    # If max_length is too small to even show the message, return it anyway
+    # (message takes priority over the constraint)
+    if max_length < trunc_only_len:

Review Comment:
   This is more of a stylistic nit but you can do `len(trunc_only)` instead of 
`trunc_only_len`. You don't need `trunc_only_len` to be explicit here. 



##########
airflow-core/src/airflow/serialization/helpers.py:
##########
@@ -31,6 +31,94 @@
     from airflow.timetables.base import Timetable as CoreTimetable
 
 
+def _truncate_rendered_value(rendered: str, max_length: int) -> str:
+    if max_length <= 0:
+        return ""
+
+    prefix = "Truncated. You can change this behaviour in 
[core]max_templated_field_length. "
+    suffix = "..."
+    value = str(rendered)
+
+    # Always prioritize showing the truncation message first
+    trunc_only = f"{prefix}{suffix}"
+    trunc_only_len = len(trunc_only)
+
+    # If max_length is too small to even show the message, return it anyway
+    # (message takes priority over the constraint)
+    if max_length < trunc_only_len:
+        return trunc_only
+
+    # Check if value already has outer quotes - if so, preserve them and don't 
add extra quotes
+    has_outer_quotes = (value.startswith('"') and value.endswith('"')) or (
+        value.startswith("'") and value.endswith("'")
+    )
+
+    if has_outer_quotes:
+        # Value already has quotes - preserve the opening quote, truncate 
inner content
+        quote_char = value[0]
+        inner_value = value[1:-1]  # Strip outer quotes
+        # Calculate overhead: prefix + opening quote + suffix (no closing 
quote)
+        overhead = len(prefix) + 1 + len(suffix)
+        available = max_length - overhead
+
+        MIN_CONTENT_LENGTH = 7
+        if available < MIN_CONTENT_LENGTH:
+            return trunc_only
+
+        # Get content and trim trailing spaces
+        content = inner_value[:available].rstrip()
+
+        # Build result with opening quote, content, and suffix (no closing 
quote)
+        result = f"{prefix}{quote_char}{content}{suffix}"
+
+        # Ensure result < max_length, with a buffer when possible
+        # For values with outer quotes, use a larger buffer (3) since we don't 
add closing quotes
+        target_length = max_length - 3
+        while len(result) > target_length and len(content) > 0:
+            content = content[:-1].rstrip()
+            result = f"{prefix}{quote_char}{content}{suffix}"
+
+        return result

Review Comment:
   I think there is unnecessary duplication in lines ~60-80. I think it would 
be better to handle the quotes (both outer and non-outer) in one continguous 
block of code, calculate a 'buffer' for the quotes and then use that to 
determine the string that is returned i.e. `result`. The current implementation 
seems functional but it is not very clean and explicit. 



##########
airflow-core/tests/unit/serialization/test_helpers.py:
##########
@@ -0,0 +1,136 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+from __future__ import annotations
+
+from airflow.serialization.helpers import _truncate_rendered_value
+
+
+def test_serialize_template_field_with_very_small_max_length(monkeypatch):
+    """Test that truncation message is prioritized even for very small 
max_length."""
+    monkeypatch.setenv("AIRFLOW__CORE__MAX_TEMPLATED_FIELD_LENGTH", "1")
+
+    from airflow.serialization.helpers import serialize_template_field
+
+    result = serialize_template_field("This is a long string", "test")
+
+    # The truncation message should be shown even if it exceeds max_length
+    # This ensures users always see why content is truncated
+    assert result
+    assert "Truncated. You can change this behaviour" in result

Review Comment:
   Is this asserting the full string? My understanding is that it is:
   
   `"Truncated. You can change this behaviour in 
[core]max_templated_field_length."`
   



##########
airflow-core/src/airflow/serialization/helpers.py:
##########
@@ -31,6 +31,94 @@
     from airflow.timetables.base import Timetable as CoreTimetable
 
 
+def _truncate_rendered_value(rendered: str, max_length: int) -> str:
+    if max_length <= 0:
+        return ""
+
+    prefix = "Truncated. You can change this behaviour in 
[core]max_templated_field_length. "
+    suffix = "..."
+    value = str(rendered)
+
+    # Always prioritize showing the truncation message first
+    trunc_only = f"{prefix}{suffix}"
+    trunc_only_len = len(trunc_only)
+
+    # If max_length is too small to even show the message, return it anyway
+    # (message takes priority over the constraint)
+    if max_length < trunc_only_len:
+        return trunc_only
+
+    # Check if value already has outer quotes - if so, preserve them and don't 
add extra quotes
+    has_outer_quotes = (value.startswith('"') and value.endswith('"')) or (
+        value.startswith("'") and value.endswith("'")
+    )
+
+    if has_outer_quotes:
+        # Value already has quotes - preserve the opening quote, truncate 
inner content
+        quote_char = value[0]
+        inner_value = value[1:-1]  # Strip outer quotes
+        # Calculate overhead: prefix + opening quote + suffix (no closing 
quote)
+        overhead = len(prefix) + 1 + len(suffix)
+        available = max_length - overhead
+
+        MIN_CONTENT_LENGTH = 7
+        if available < MIN_CONTENT_LENGTH:
+            return trunc_only
+
+        # Get content and trim trailing spaces
+        content = inner_value[:available].rstrip()
+
+        # Build result with opening quote, content, and suffix (no closing 
quote)
+        result = f"{prefix}{quote_char}{content}{suffix}"
+
+        # Ensure result < max_length, with a buffer when possible
+        # For values with outer quotes, use a larger buffer (3) since we don't 
add closing quotes
+        target_length = max_length - 3
+        while len(result) > target_length and len(content) > 0:
+            content = content[:-1].rstrip()
+            result = f"{prefix}{quote_char}{content}{suffix}"
+
+        return result
+    # Value doesn't have outer quotes - add quotes around content
+    # Choose quote character: use double quotes if value contains single 
quotes,
+    # otherwise use single quotes
+    if "'" in value and '"' not in value:
+        quote_char = '"'
+    else:
+        quote_char = "'"
+
+    # Calculate overhead: prefix + quotes around content + suffix
+    # Format: prefix + quote_char + content + quote_char + suffix
+    overhead = len(prefix) + 2 + len(suffix)  # 2 for the quotes around content
+    available = max_length - overhead
+
+    # Only show content if there's meaningful space for it
+    # Require at least enough space to show a few characters of content 
meaningfully
+    # This prevents showing just 1-2 characters which isn't very useful
+    MIN_CONTENT_LENGTH = 7

Review Comment:
   I think `MIN_CONTENT_LENGTH` should be instantiated once and moved to the 
top of the function. This looks like a constant that is not dependent on 
branching logic. 



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to