This is an automated email from the ASF dual-hosted git repository.

potiuk pushed a commit to branch v3-1-test
in repository https://gitbox.apache.org/repos/asf/airflow.git


The following commit(s) were added to refs/heads/v3-1-test by this push:
     new 407ae0ce1c8 [v3-1-test] Add LOG_MAX_SIZE environment variables to log 
groomer (#61559) (#61950)
407ae0ce1c8 is described below

commit 407ae0ce1c83a90f1d999eb2bcdf80b9ea84f9ac
Author: github-actions[bot] 
<41898282+github-actions[bot]@users.noreply.github.com>
AuthorDate: Mon Feb 16 00:30:09 2026 +0100

    [v3-1-test] Add LOG_MAX_SIZE environment variables to log groomer (#61559) 
(#61950)
    
    * Add LOG_MAX_SIZE_BYTES option to log cleaner
    
    * Updated clean logs
    
    * Charts updated and tests passing
    
    * Linting fixes
    (cherry picked from commit 1e5f789db9c24a396ffd7f4b1950a94e6e909047)
    
    Co-authored-by: Philip Corliss <[email protected]>
---
 Dockerfile                                         | 32 +++++++++-
 airflow-core/tests/unit/charts/log_groomer.py      | 70 ++++++++++++++++++++++
 .../dag-processor/dag-processor-deployment.yaml    |  8 +++
 .../templates/scheduler/scheduler-deployment.yaml  |  8 +++
 .../templates/triggerer/triggerer-deployment.yaml  |  8 +++
 chart/templates/workers/worker-deployment.yaml     |  8 +++
 chart/values.schema.json                           | 13 ++++
 chart/values.yaml                                  | 18 ++++++
 helm-tests/tests/chart_utils/log_groomer.py        | 70 ++++++++++++++++++++++
 scripts/docker/clean-logs.sh                       | 32 +++++++++-
 10 files changed, 263 insertions(+), 4 deletions(-)

diff --git a/Dockerfile b/Dockerfile
index e53e25e76c8..7b9d3568664 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -1641,22 +1641,50 @@ set -euo pipefail
 readonly DIRECTORY="${AIRFLOW_HOME:-/usr/local/airflow}"
 readonly RETENTION="${AIRFLOW__LOG_RETENTION_DAYS:-15}"
 readonly FREQUENCY="${AIRFLOW__LOG_CLEANUP_FREQUENCY_MINUTES:-15}"
+readonly MAX_PERCENT="${AIRFLOW__LOG_MAX_SIZE_PERCENT:-0}"
 
 trap "exit" INT TERM
 
+MAX_SIZE_BYTES="${AIRFLOW__LOG_MAX_SIZE_BYTES:-0}"
+if [[ "$MAX_SIZE_BYTES" -eq 0 && "$MAX_PERCENT" -gt 0 ]]; then
+  total_space=$(df -k "${DIRECTORY}"/logs 2>/dev/null | tail -1 | awk '{print 
$2}' || echo "0")
+  MAX_SIZE_BYTES=$(( total_space * 1024 * MAX_PERCENT / 100 ))
+  echo "Computed MAX_SIZE_BYTES from ${MAX_PERCENT}% of disk: 
${MAX_SIZE_BYTES} bytes"
+fi
+
+readonly MAX_SIZE_BYTES
+
 readonly EVERY=$((FREQUENCY*60))
 
 echo "Cleaning logs every $EVERY seconds"
+if [[ "$MAX_SIZE_BYTES" -gt 0 ]]; then
+  echo "Max log size limit: $MAX_SIZE_BYTES bytes"
+fi
+
+retention_days="${RETENTION}"
 
 while true; do
-  echo "Trimming airflow logs to ${RETENTION} days."
+  echo "Trimming airflow logs to ${retention_days} days."
   find "${DIRECTORY}"/logs \
     -type d -name 'lost+found' -prune -o \
-    -type f -mtime +"${RETENTION}" -name '*.log' -print0 | \
+    -type f -mtime +"${retention_days}" -name '*.log' -print0 | \
     xargs -0 rm -f || true
 
+  if [[ "$MAX_SIZE_BYTES" -gt 0 && "$retention_days" -ge 0 ]]; then
+    current_size=$(df -k "${DIRECTORY}"/logs 2>/dev/null | tail -1 | awk 
'{print $3}' || echo "0")
+    current_size=$(( current_size * 1024 ))
+
+    if [[ "$current_size" -gt "$MAX_SIZE_BYTES" ]]; then
+      retention_days=$((retention_days - 1))
+      echo "Size ($current_size bytes) exceeds limit ($MAX_SIZE_BYTES bytes). 
Reducing retention to ${retention_days} days."
+      continue
+    fi
+  fi
+
   find "${DIRECTORY}"/logs -type d -empty -delete || true
 
+  retention_days="${RETENTION}"
+
   seconds=$(( $(date -u +%s) % EVERY))
   (( seconds < 1 )) || sleep $((EVERY - seconds - 1))
   sleep 1
diff --git a/airflow-core/tests/unit/charts/log_groomer.py 
b/airflow-core/tests/unit/charts/log_groomer.py
index 0f64a137631..359763c1af3 100644
--- a/airflow-core/tests/unit/charts/log_groomer.py
+++ b/airflow-core/tests/unit/charts/log_groomer.py
@@ -218,6 +218,76 @@ class LogGroomerTestBase:
         else:
             assert len(jmespath.search("spec.template.spec.containers[1].env", 
docs[0])) == 2
 
+    @pytest.mark.parametrize(
+        ("max_size_bytes", "max_size_result"), [(None, None), (1234567890, 
"1234567890")]
+    )
+    def test_log_groomer_max_size_bytes_overrides(self, max_size_bytes, 
max_size_result):
+        if self.obj_name == "dag-processor":
+            values = {
+                "dagProcessor": {
+                    "enabled": True,
+                    "logGroomerSidecar": {"maxSizeBytes": max_size_bytes},
+                }
+            }
+        else:
+            values = {f"{self.folder}": {"logGroomerSidecar": {"maxSizeBytes": 
max_size_bytes}}}
+
+        docs = render_chart(
+            values=values,
+            
show_only=[f"templates/{self.folder}/{self.obj_name}-deployment.yaml"],
+        )
+
+        if max_size_result:
+            assert (
+                jmespath.search(
+                    
"spec.template.spec.containers[1].env[?name=='AIRFLOW__LOG_MAX_SIZE_BYTES'].value
 | [0]",
+                    docs[0],
+                )
+                == max_size_result
+            )
+        else:
+            assert (
+                jmespath.search(
+                    
"spec.template.spec.containers[1].env[?name=='AIRFLOW__LOG_MAX_SIZE_BYTES'].value
 | [0]",
+                    docs[0],
+                )
+                is None
+            )
+
+    @pytest.mark.parametrize(("max_size_percent", "max_size_result"), [(None, 
None), (80, "80")])
+    def test_log_groomer_max_size_percent_overrides(self, max_size_percent, 
max_size_result):
+        if self.obj_name == "dag-processor":
+            values = {
+                "dagProcessor": {
+                    "enabled": True,
+                    "logGroomerSidecar": {"maxSizePercent": max_size_percent},
+                }
+            }
+        else:
+            values = {f"{self.folder}": {"logGroomerSidecar": 
{"maxSizePercent": max_size_percent}}}
+
+        docs = render_chart(
+            values=values,
+            
show_only=[f"templates/{self.folder}/{self.obj_name}-deployment.yaml"],
+        )
+
+        if max_size_result:
+            assert (
+                jmespath.search(
+                    
"spec.template.spec.containers[1].env[?name=='AIRFLOW__LOG_MAX_SIZE_PERCENT'].value
 | [0]",
+                    docs[0],
+                )
+                == max_size_result
+            )
+        else:
+            assert (
+                jmespath.search(
+                    
"spec.template.spec.containers[1].env[?name=='AIRFLOW__LOG_MAX_SIZE_PERCENT'].value
 | [0]",
+                    docs[0],
+                )
+                is None
+            )
+
     def test_log_groomer_resources(self):
         if self.obj_name == "dag-processor":
             values = {
diff --git a/chart/templates/dag-processor/dag-processor-deployment.yaml 
b/chart/templates/dag-processor/dag-processor-deployment.yaml
index b5490fcfe2c..3f029eebf9f 100644
--- a/chart/templates/dag-processor/dag-processor-deployment.yaml
+++ b/chart/templates/dag-processor/dag-processor-deployment.yaml
@@ -221,6 +221,14 @@ spec:
           {{- if .Values.dagProcessor.logGroomerSidecar.frequencyMinutes }}
             - name: AIRFLOW__LOG_CLEANUP_FREQUENCY_MINUTES
               value: "{{ 
.Values.dagProcessor.logGroomerSidecar.frequencyMinutes }}"
+          {{- end }}
+          {{- if .Values.dagProcessor.logGroomerSidecar.maxSizeBytes }}
+            - name: AIRFLOW__LOG_MAX_SIZE_BYTES
+              value: "{{ .Values.dagProcessor.logGroomerSidecar.maxSizeBytes | 
int64 }}"
+          {{- end }}
+          {{- if .Values.dagProcessor.logGroomerSidecar.maxSizePercent }}
+            - name: AIRFLOW__LOG_MAX_SIZE_PERCENT
+              value: "{{ .Values.dagProcessor.logGroomerSidecar.maxSizePercent 
}}"
           {{- end }}
             - name: AIRFLOW_HOME
               value: "{{ .Values.airflowHome }}"
diff --git a/chart/templates/scheduler/scheduler-deployment.yaml 
b/chart/templates/scheduler/scheduler-deployment.yaml
index 903d4767d0f..bb3670cecbe 100644
--- a/chart/templates/scheduler/scheduler-deployment.yaml
+++ b/chart/templates/scheduler/scheduler-deployment.yaml
@@ -280,6 +280,14 @@ spec:
           {{- if .Values.scheduler.logGroomerSidecar.frequencyMinutes }}
             - name: AIRFLOW__LOG_CLEANUP_FREQUENCY_MINUTES
               value: "{{ .Values.scheduler.logGroomerSidecar.frequencyMinutes 
}}"
+          {{- end }}
+          {{- if .Values.scheduler.logGroomerSidecar.maxSizeBytes }}
+            - name: AIRFLOW__LOG_MAX_SIZE_BYTES
+              value: "{{ .Values.scheduler.logGroomerSidecar.maxSizeBytes | 
int64 }}"
+          {{- end }}
+          {{- if .Values.scheduler.logGroomerSidecar.maxSizePercent }}
+            - name: AIRFLOW__LOG_MAX_SIZE_PERCENT
+              value: "{{ .Values.scheduler.logGroomerSidecar.maxSizePercent }}"
           {{- end }}
             - name: AIRFLOW_HOME
               value: "{{ .Values.airflowHome }}"
diff --git a/chart/templates/triggerer/triggerer-deployment.yaml 
b/chart/templates/triggerer/triggerer-deployment.yaml
index 56a353a942c..9010becf118 100644
--- a/chart/templates/triggerer/triggerer-deployment.yaml
+++ b/chart/templates/triggerer/triggerer-deployment.yaml
@@ -251,6 +251,14 @@ spec:
           {{- if .Values.triggerer.logGroomerSidecar.frequencyMinutes }}
             - name: AIRFLOW__LOG_CLEANUP_FREQUENCY_MINUTES
               value: "{{ .Values.triggerer.logGroomerSidecar.frequencyMinutes 
}}"
+          {{- end }}
+          {{- if .Values.triggerer.logGroomerSidecar.maxSizeBytes }}
+            - name: AIRFLOW__LOG_MAX_SIZE_BYTES
+              value: "{{ .Values.triggerer.logGroomerSidecar.maxSizeBytes | 
int64 }}"
+          {{- end }}
+          {{- if .Values.triggerer.logGroomerSidecar.maxSizePercent }}
+            - name: AIRFLOW__LOG_MAX_SIZE_PERCENT
+              value: "{{ .Values.triggerer.logGroomerSidecar.maxSizePercent }}"
           {{- end }}
             - name: AIRFLOW_HOME
               value: "{{ .Values.airflowHome }}"
diff --git a/chart/templates/workers/worker-deployment.yaml 
b/chart/templates/workers/worker-deployment.yaml
index 5e9e7a097bf..a0d79413792 100644
--- a/chart/templates/workers/worker-deployment.yaml
+++ b/chart/templates/workers/worker-deployment.yaml
@@ -347,6 +347,14 @@ spec:
           {{- if .Values.workers.logGroomerSidecar.frequencyMinutes }}
             - name: AIRFLOW__LOG_CLEANUP_FREQUENCY_MINUTES
               value: "{{ .Values.workers.logGroomerSidecar.frequencyMinutes }}"
+          {{- end }}
+          {{- if .Values.workers.logGroomerSidecar.maxSizeBytes }}
+            - name: AIRFLOW__LOG_MAX_SIZE_BYTES
+              value: "{{ .Values.workers.logGroomerSidecar.maxSizeBytes | 
int64 }}"
+          {{- end }}
+          {{- if .Values.workers.logGroomerSidecar.maxSizePercent }}
+            - name: AIRFLOW__LOG_MAX_SIZE_PERCENT
+              value: "{{ .Values.workers.logGroomerSidecar.maxSizePercent }}"
           {{- end }}
             - name: AIRFLOW_HOME
               value: "{{ .Values.airflowHome }}"
diff --git a/chart/values.schema.json b/chart/values.schema.json
index 64b35a780a1..d631db5fff7 100644
--- a/chart/values.schema.json
+++ b/chart/values.schema.json
@@ -12572,6 +12572,19 @@
                     "type": "integer",
                     "default": 15
                 },
+                "maxSizeBytes": {
+                    "description": "Max size of logs directory in bytes. When 
exceeded, the log groomer reduces retention until size is under limit. 0 = 
disabled.",
+                    "type": "integer",
+                    "default": 0,
+                    "minimum": 0
+                },
+                "maxSizePercent": {
+                    "description": "Max size of logs as a percentage of total 
disk space. When exceeded, the log groomer reduces retention until size is 
under limit. 0 = disabled. Ignored if maxSizeBytes is set.",
+                    "type": "integer",
+                    "default": 0,
+                    "minimum": 0,
+                    "maximum": 100
+                },
                 "env": {
                     "description": "Add additional env vars to log groomer 
sidecar container (templated).",
                     "items": {
diff --git a/chart/values.yaml b/chart/values.yaml
index bdca77834f2..6e8f5ac9bf7 100644
--- a/chart/values.yaml
+++ b/chart/values.yaml
@@ -935,6 +935,12 @@ workers:
     # Frequency to attempt to groom logs (in minutes)
     frequencyMinutes: 15
 
+    # Max size of logs in bytes. 0 = disabled
+    maxSizeBytes: 0
+
+    # Max size of logs as a percent of disk usage. 0 = disabled. Ignored if 
maxSizeBytes is set.
+    maxSizePercent: 0
+
     resources: {}
     #  limits:
     #   cpu: 100m
@@ -1177,6 +1183,10 @@ scheduler:
     retentionDays: 15
     # frequency to attempt to groom logs, in minutes
     frequencyMinutes: 15
+    # Max size of logs in bytes. 0 = disabled
+    maxSizeBytes: 0
+    # Max size of logs as a percent of disk usage. 0 = disabled. Ignored if 
maxSizeBytes is set.
+    maxSizePercent: 0
     resources: {}
     #  limits:
     #   cpu: 100m
@@ -1961,6 +1971,10 @@ triggerer:
     retentionDays: 15
     # frequency to attempt to groom logs, in minutes
     frequencyMinutes: 15
+    # Max size of logs in bytes. 0 = disabled
+    maxSizeBytes: 0
+    # Max size of logs as a percent of disk usage. 0 = disabled. Ignored if 
maxSizeBytes is set.
+    maxSizePercent: 0
     resources: {}
     #  limits:
     #   cpu: 100m
@@ -2149,6 +2163,10 @@ dagProcessor:
     retentionDays: 15
     # frequency to attempt to groom logs, in minutes
     frequencyMinutes: 15
+    # Max size of logs in bytes. 0 = disabled
+    maxSizeBytes: 0
+    # Max size of logs as a percent of disk usage. 0 = disabled. Ignored if 
maxSizeBytes is set.
+    maxSizePercent: 0
     resources: {}
     #  limits:
     #   cpu: 100m
diff --git a/helm-tests/tests/chart_utils/log_groomer.py 
b/helm-tests/tests/chart_utils/log_groomer.py
index 3fcd60983e9..339e65540a1 100644
--- a/helm-tests/tests/chart_utils/log_groomer.py
+++ b/helm-tests/tests/chart_utils/log_groomer.py
@@ -219,6 +219,76 @@ class LogGroomerTestBase:
         else:
             assert len(jmespath.search("spec.template.spec.containers[1].env", 
docs[0])) == 2
 
+    @pytest.mark.parametrize(
+        ("max_size_bytes", "max_size_result"), [(None, None), (1234567890, 
"1234567890")]
+    )
+    def test_log_groomer_max_size_bytes_overrides(self, max_size_bytes, 
max_size_result):
+        if self.obj_name == "dag-processor":
+            values = {
+                "dagProcessor": {
+                    "enabled": True,
+                    "logGroomerSidecar": {"maxSizeBytes": max_size_bytes},
+                }
+            }
+        else:
+            values = {f"{self.folder}": {"logGroomerSidecar": {"maxSizeBytes": 
max_size_bytes}}}
+
+        docs = render_chart(
+            values=values,
+            
show_only=[f"templates/{self.folder}/{self.obj_name}-deployment.yaml"],
+        )
+
+        if max_size_result:
+            assert (
+                jmespath.search(
+                    
"spec.template.spec.containers[1].env[?name=='AIRFLOW__LOG_MAX_SIZE_BYTES'].value
 | [0]",
+                    docs[0],
+                )
+                == max_size_result
+            )
+        else:
+            assert (
+                jmespath.search(
+                    
"spec.template.spec.containers[1].env[?name=='AIRFLOW__LOG_MAX_SIZE_BYTES'].value
 | [0]",
+                    docs[0],
+                )
+                is None
+            )
+
+    @pytest.mark.parametrize(("max_size_percent", "max_size_result"), [(None, 
None), (80, "80")])
+    def test_log_groomer_max_size_percent_overrides(self, max_size_percent, 
max_size_result):
+        if self.obj_name == "dag-processor":
+            values = {
+                "dagProcessor": {
+                    "enabled": True,
+                    "logGroomerSidecar": {"maxSizePercent": max_size_percent},
+                }
+            }
+        else:
+            values = {f"{self.folder}": {"logGroomerSidecar": 
{"maxSizePercent": max_size_percent}}}
+
+        docs = render_chart(
+            values=values,
+            
show_only=[f"templates/{self.folder}/{self.obj_name}-deployment.yaml"],
+        )
+
+        if max_size_result:
+            assert (
+                jmespath.search(
+                    
"spec.template.spec.containers[1].env[?name=='AIRFLOW__LOG_MAX_SIZE_PERCENT'].value
 | [0]",
+                    docs[0],
+                )
+                == max_size_result
+            )
+        else:
+            assert (
+                jmespath.search(
+                    
"spec.template.spec.containers[1].env[?name=='AIRFLOW__LOG_MAX_SIZE_PERCENT'].value
 | [0]",
+                    docs[0],
+                )
+                is None
+            )
+
     def test_log_groomer_resources(self):
         if self.obj_name == "dag-processor":
             values = {
diff --git a/scripts/docker/clean-logs.sh b/scripts/docker/clean-logs.sh
index 063b0a985b6..15370362695 100755
--- a/scripts/docker/clean-logs.sh
+++ b/scripts/docker/clean-logs.sh
@@ -22,22 +22,50 @@ set -euo pipefail
 readonly DIRECTORY="${AIRFLOW_HOME:-/usr/local/airflow}"
 readonly RETENTION="${AIRFLOW__LOG_RETENTION_DAYS:-15}"
 readonly FREQUENCY="${AIRFLOW__LOG_CLEANUP_FREQUENCY_MINUTES:-15}"
+readonly MAX_PERCENT="${AIRFLOW__LOG_MAX_SIZE_PERCENT:-0}"
 
 trap "exit" INT TERM
 
+MAX_SIZE_BYTES="${AIRFLOW__LOG_MAX_SIZE_BYTES:-0}"
+if [[ "$MAX_SIZE_BYTES" -eq 0 && "$MAX_PERCENT" -gt 0 ]]; then
+  total_space=$(df -k "${DIRECTORY}"/logs 2>/dev/null | tail -1 | awk '{print 
$2}' || echo "0")
+  MAX_SIZE_BYTES=$(( total_space * 1024 * MAX_PERCENT / 100 ))
+  echo "Computed MAX_SIZE_BYTES from ${MAX_PERCENT}% of disk: 
${MAX_SIZE_BYTES} bytes"
+fi
+
+readonly MAX_SIZE_BYTES
+
 readonly EVERY=$((FREQUENCY*60))
 
 echo "Cleaning logs every $EVERY seconds"
+if [[ "$MAX_SIZE_BYTES" -gt 0 ]]; then
+  echo "Max log size limit: $MAX_SIZE_BYTES bytes"
+fi
+
+retention_days="${RETENTION}"
 
 while true; do
-  echo "Trimming airflow logs to ${RETENTION} days."
+  echo "Trimming airflow logs to ${retention_days} days."
   find "${DIRECTORY}"/logs \
     -type d -name 'lost+found' -prune -o \
-    -type f -mtime +"${RETENTION}" -name '*.log' -print0 | \
+    -type f -mtime +"${retention_days}" -name '*.log' -print0 | \
     xargs -0 rm -f || true
 
+  if [[ "$MAX_SIZE_BYTES" -gt 0 && "$retention_days" -ge 0 ]]; then
+    current_size=$(df -k "${DIRECTORY}"/logs 2>/dev/null | tail -1 | awk 
'{print $3}' || echo "0")
+    current_size=$(( current_size * 1024 ))
+
+    if [[ "$current_size" -gt "$MAX_SIZE_BYTES" ]]; then
+      retention_days=$((retention_days - 1))
+      echo "Size ($current_size bytes) exceeds limit ($MAX_SIZE_BYTES bytes). 
Reducing retention to ${retention_days} days."
+      continue
+    fi
+  fi
+
   find "${DIRECTORY}"/logs -type d -empty -delete || true
 
+  retention_days="${RETENTION}"
+
   seconds=$(( $(date -u +%s) % EVERY))
   (( seconds < 1 )) || sleep $((EVERY - seconds - 1))
   sleep 1

Reply via email to