This is an automated email from the ASF dual-hosted git repository.
potiuk pushed a commit to branch v3-1-test
in repository https://gitbox.apache.org/repos/asf/airflow.git
The following commit(s) were added to refs/heads/v3-1-test by this push:
new 407ae0ce1c8 [v3-1-test] Add LOG_MAX_SIZE environment variables to log
groomer (#61559) (#61950)
407ae0ce1c8 is described below
commit 407ae0ce1c83a90f1d999eb2bcdf80b9ea84f9ac
Author: github-actions[bot]
<41898282+github-actions[bot]@users.noreply.github.com>
AuthorDate: Mon Feb 16 00:30:09 2026 +0100
[v3-1-test] Add LOG_MAX_SIZE environment variables to log groomer (#61559)
(#61950)
* Add LOG_MAX_SIZE_BYTES option to log cleaner
* Updated clean logs
* Charts updated and tests passing
* Linting fixes
(cherry picked from commit 1e5f789db9c24a396ffd7f4b1950a94e6e909047)
Co-authored-by: Philip Corliss <[email protected]>
---
Dockerfile | 32 +++++++++-
airflow-core/tests/unit/charts/log_groomer.py | 70 ++++++++++++++++++++++
.../dag-processor/dag-processor-deployment.yaml | 8 +++
.../templates/scheduler/scheduler-deployment.yaml | 8 +++
.../templates/triggerer/triggerer-deployment.yaml | 8 +++
chart/templates/workers/worker-deployment.yaml | 8 +++
chart/values.schema.json | 13 ++++
chart/values.yaml | 18 ++++++
helm-tests/tests/chart_utils/log_groomer.py | 70 ++++++++++++++++++++++
scripts/docker/clean-logs.sh | 32 +++++++++-
10 files changed, 263 insertions(+), 4 deletions(-)
diff --git a/Dockerfile b/Dockerfile
index e53e25e76c8..7b9d3568664 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -1641,22 +1641,50 @@ set -euo pipefail
readonly DIRECTORY="${AIRFLOW_HOME:-/usr/local/airflow}"
readonly RETENTION="${AIRFLOW__LOG_RETENTION_DAYS:-15}"
readonly FREQUENCY="${AIRFLOW__LOG_CLEANUP_FREQUENCY_MINUTES:-15}"
+readonly MAX_PERCENT="${AIRFLOW__LOG_MAX_SIZE_PERCENT:-0}"
trap "exit" INT TERM
+MAX_SIZE_BYTES="${AIRFLOW__LOG_MAX_SIZE_BYTES:-0}"
+if [[ "$MAX_SIZE_BYTES" -eq 0 && "$MAX_PERCENT" -gt 0 ]]; then
+ total_space=$(df -k "${DIRECTORY}"/logs 2>/dev/null | tail -1 | awk '{print
$2}' || echo "0")
+ MAX_SIZE_BYTES=$(( total_space * 1024 * MAX_PERCENT / 100 ))
+ echo "Computed MAX_SIZE_BYTES from ${MAX_PERCENT}% of disk:
${MAX_SIZE_BYTES} bytes"
+fi
+
+readonly MAX_SIZE_BYTES
+
readonly EVERY=$((FREQUENCY*60))
echo "Cleaning logs every $EVERY seconds"
+if [[ "$MAX_SIZE_BYTES" -gt 0 ]]; then
+ echo "Max log size limit: $MAX_SIZE_BYTES bytes"
+fi
+
+retention_days="${RETENTION}"
while true; do
- echo "Trimming airflow logs to ${RETENTION} days."
+ echo "Trimming airflow logs to ${retention_days} days."
find "${DIRECTORY}"/logs \
-type d -name 'lost+found' -prune -o \
- -type f -mtime +"${RETENTION}" -name '*.log' -print0 | \
+ -type f -mtime +"${retention_days}" -name '*.log' -print0 | \
xargs -0 rm -f || true
+ if [[ "$MAX_SIZE_BYTES" -gt 0 && "$retention_days" -ge 0 ]]; then
+ current_size=$(df -k "${DIRECTORY}"/logs 2>/dev/null | tail -1 | awk
'{print $3}' || echo "0")
+ current_size=$(( current_size * 1024 ))
+
+ if [[ "$current_size" -gt "$MAX_SIZE_BYTES" ]]; then
+ retention_days=$((retention_days - 1))
+ echo "Size ($current_size bytes) exceeds limit ($MAX_SIZE_BYTES bytes).
Reducing retention to ${retention_days} days."
+ continue
+ fi
+ fi
+
find "${DIRECTORY}"/logs -type d -empty -delete || true
+ retention_days="${RETENTION}"
+
seconds=$(( $(date -u +%s) % EVERY))
(( seconds < 1 )) || sleep $((EVERY - seconds - 1))
sleep 1
diff --git a/airflow-core/tests/unit/charts/log_groomer.py
b/airflow-core/tests/unit/charts/log_groomer.py
index 0f64a137631..359763c1af3 100644
--- a/airflow-core/tests/unit/charts/log_groomer.py
+++ b/airflow-core/tests/unit/charts/log_groomer.py
@@ -218,6 +218,76 @@ class LogGroomerTestBase:
else:
assert len(jmespath.search("spec.template.spec.containers[1].env",
docs[0])) == 2
+ @pytest.mark.parametrize(
+ ("max_size_bytes", "max_size_result"), [(None, None), (1234567890,
"1234567890")]
+ )
+ def test_log_groomer_max_size_bytes_overrides(self, max_size_bytes,
max_size_result):
+ if self.obj_name == "dag-processor":
+ values = {
+ "dagProcessor": {
+ "enabled": True,
+ "logGroomerSidecar": {"maxSizeBytes": max_size_bytes},
+ }
+ }
+ else:
+ values = {f"{self.folder}": {"logGroomerSidecar": {"maxSizeBytes":
max_size_bytes}}}
+
+ docs = render_chart(
+ values=values,
+
show_only=[f"templates/{self.folder}/{self.obj_name}-deployment.yaml"],
+ )
+
+ if max_size_result:
+ assert (
+ jmespath.search(
+
"spec.template.spec.containers[1].env[?name=='AIRFLOW__LOG_MAX_SIZE_BYTES'].value
| [0]",
+ docs[0],
+ )
+ == max_size_result
+ )
+ else:
+ assert (
+ jmespath.search(
+
"spec.template.spec.containers[1].env[?name=='AIRFLOW__LOG_MAX_SIZE_BYTES'].value
| [0]",
+ docs[0],
+ )
+ is None
+ )
+
+ @pytest.mark.parametrize(("max_size_percent", "max_size_result"), [(None,
None), (80, "80")])
+ def test_log_groomer_max_size_percent_overrides(self, max_size_percent,
max_size_result):
+ if self.obj_name == "dag-processor":
+ values = {
+ "dagProcessor": {
+ "enabled": True,
+ "logGroomerSidecar": {"maxSizePercent": max_size_percent},
+ }
+ }
+ else:
+ values = {f"{self.folder}": {"logGroomerSidecar":
{"maxSizePercent": max_size_percent}}}
+
+ docs = render_chart(
+ values=values,
+
show_only=[f"templates/{self.folder}/{self.obj_name}-deployment.yaml"],
+ )
+
+ if max_size_result:
+ assert (
+ jmespath.search(
+
"spec.template.spec.containers[1].env[?name=='AIRFLOW__LOG_MAX_SIZE_PERCENT'].value
| [0]",
+ docs[0],
+ )
+ == max_size_result
+ )
+ else:
+ assert (
+ jmespath.search(
+
"spec.template.spec.containers[1].env[?name=='AIRFLOW__LOG_MAX_SIZE_PERCENT'].value
| [0]",
+ docs[0],
+ )
+ is None
+ )
+
def test_log_groomer_resources(self):
if self.obj_name == "dag-processor":
values = {
diff --git a/chart/templates/dag-processor/dag-processor-deployment.yaml
b/chart/templates/dag-processor/dag-processor-deployment.yaml
index b5490fcfe2c..3f029eebf9f 100644
--- a/chart/templates/dag-processor/dag-processor-deployment.yaml
+++ b/chart/templates/dag-processor/dag-processor-deployment.yaml
@@ -221,6 +221,14 @@ spec:
{{- if .Values.dagProcessor.logGroomerSidecar.frequencyMinutes }}
- name: AIRFLOW__LOG_CLEANUP_FREQUENCY_MINUTES
value: "{{
.Values.dagProcessor.logGroomerSidecar.frequencyMinutes }}"
+ {{- end }}
+ {{- if .Values.dagProcessor.logGroomerSidecar.maxSizeBytes }}
+ - name: AIRFLOW__LOG_MAX_SIZE_BYTES
+ value: "{{ .Values.dagProcessor.logGroomerSidecar.maxSizeBytes |
int64 }}"
+ {{- end }}
+ {{- if .Values.dagProcessor.logGroomerSidecar.maxSizePercent }}
+ - name: AIRFLOW__LOG_MAX_SIZE_PERCENT
+ value: "{{ .Values.dagProcessor.logGroomerSidecar.maxSizePercent
}}"
{{- end }}
- name: AIRFLOW_HOME
value: "{{ .Values.airflowHome }}"
diff --git a/chart/templates/scheduler/scheduler-deployment.yaml
b/chart/templates/scheduler/scheduler-deployment.yaml
index 903d4767d0f..bb3670cecbe 100644
--- a/chart/templates/scheduler/scheduler-deployment.yaml
+++ b/chart/templates/scheduler/scheduler-deployment.yaml
@@ -280,6 +280,14 @@ spec:
{{- if .Values.scheduler.logGroomerSidecar.frequencyMinutes }}
- name: AIRFLOW__LOG_CLEANUP_FREQUENCY_MINUTES
value: "{{ .Values.scheduler.logGroomerSidecar.frequencyMinutes
}}"
+ {{- end }}
+ {{- if .Values.scheduler.logGroomerSidecar.maxSizeBytes }}
+ - name: AIRFLOW__LOG_MAX_SIZE_BYTES
+ value: "{{ .Values.scheduler.logGroomerSidecar.maxSizeBytes |
int64 }}"
+ {{- end }}
+ {{- if .Values.scheduler.logGroomerSidecar.maxSizePercent }}
+ - name: AIRFLOW__LOG_MAX_SIZE_PERCENT
+ value: "{{ .Values.scheduler.logGroomerSidecar.maxSizePercent }}"
{{- end }}
- name: AIRFLOW_HOME
value: "{{ .Values.airflowHome }}"
diff --git a/chart/templates/triggerer/triggerer-deployment.yaml
b/chart/templates/triggerer/triggerer-deployment.yaml
index 56a353a942c..9010becf118 100644
--- a/chart/templates/triggerer/triggerer-deployment.yaml
+++ b/chart/templates/triggerer/triggerer-deployment.yaml
@@ -251,6 +251,14 @@ spec:
{{- if .Values.triggerer.logGroomerSidecar.frequencyMinutes }}
- name: AIRFLOW__LOG_CLEANUP_FREQUENCY_MINUTES
value: "{{ .Values.triggerer.logGroomerSidecar.frequencyMinutes
}}"
+ {{- end }}
+ {{- if .Values.triggerer.logGroomerSidecar.maxSizeBytes }}
+ - name: AIRFLOW__LOG_MAX_SIZE_BYTES
+ value: "{{ .Values.triggerer.logGroomerSidecar.maxSizeBytes |
int64 }}"
+ {{- end }}
+ {{- if .Values.triggerer.logGroomerSidecar.maxSizePercent }}
+ - name: AIRFLOW__LOG_MAX_SIZE_PERCENT
+ value: "{{ .Values.triggerer.logGroomerSidecar.maxSizePercent }}"
{{- end }}
- name: AIRFLOW_HOME
value: "{{ .Values.airflowHome }}"
diff --git a/chart/templates/workers/worker-deployment.yaml
b/chart/templates/workers/worker-deployment.yaml
index 5e9e7a097bf..a0d79413792 100644
--- a/chart/templates/workers/worker-deployment.yaml
+++ b/chart/templates/workers/worker-deployment.yaml
@@ -347,6 +347,14 @@ spec:
{{- if .Values.workers.logGroomerSidecar.frequencyMinutes }}
- name: AIRFLOW__LOG_CLEANUP_FREQUENCY_MINUTES
value: "{{ .Values.workers.logGroomerSidecar.frequencyMinutes }}"
+ {{- end }}
+ {{- if .Values.workers.logGroomerSidecar.maxSizeBytes }}
+ - name: AIRFLOW__LOG_MAX_SIZE_BYTES
+ value: "{{ .Values.workers.logGroomerSidecar.maxSizeBytes |
int64 }}"
+ {{- end }}
+ {{- if .Values.workers.logGroomerSidecar.maxSizePercent }}
+ - name: AIRFLOW__LOG_MAX_SIZE_PERCENT
+ value: "{{ .Values.workers.logGroomerSidecar.maxSizePercent }}"
{{- end }}
- name: AIRFLOW_HOME
value: "{{ .Values.airflowHome }}"
diff --git a/chart/values.schema.json b/chart/values.schema.json
index 64b35a780a1..d631db5fff7 100644
--- a/chart/values.schema.json
+++ b/chart/values.schema.json
@@ -12572,6 +12572,19 @@
"type": "integer",
"default": 15
},
+ "maxSizeBytes": {
+ "description": "Max size of logs directory in bytes. When
exceeded, the log groomer reduces retention until size is under limit. 0 =
disabled.",
+ "type": "integer",
+ "default": 0,
+ "minimum": 0
+ },
+ "maxSizePercent": {
+ "description": "Max size of logs as a percentage of total
disk space. When exceeded, the log groomer reduces retention until size is
under limit. 0 = disabled. Ignored if maxSizeBytes is set.",
+ "type": "integer",
+ "default": 0,
+ "minimum": 0,
+ "maximum": 100
+ },
"env": {
"description": "Add additional env vars to log groomer
sidecar container (templated).",
"items": {
diff --git a/chart/values.yaml b/chart/values.yaml
index bdca77834f2..6e8f5ac9bf7 100644
--- a/chart/values.yaml
+++ b/chart/values.yaml
@@ -935,6 +935,12 @@ workers:
# Frequency to attempt to groom logs (in minutes)
frequencyMinutes: 15
+ # Max size of logs in bytes. 0 = disabled
+ maxSizeBytes: 0
+
+ # Max size of logs as a percent of disk usage. 0 = disabled. Ignored if
maxSizeBytes is set.
+ maxSizePercent: 0
+
resources: {}
# limits:
# cpu: 100m
@@ -1177,6 +1183,10 @@ scheduler:
retentionDays: 15
# frequency to attempt to groom logs, in minutes
frequencyMinutes: 15
+ # Max size of logs in bytes. 0 = disabled
+ maxSizeBytes: 0
+ # Max size of logs as a percent of disk usage. 0 = disabled. Ignored if
maxSizeBytes is set.
+ maxSizePercent: 0
resources: {}
# limits:
# cpu: 100m
@@ -1961,6 +1971,10 @@ triggerer:
retentionDays: 15
# frequency to attempt to groom logs, in minutes
frequencyMinutes: 15
+ # Max size of logs in bytes. 0 = disabled
+ maxSizeBytes: 0
+ # Max size of logs as a percent of disk usage. 0 = disabled. Ignored if
maxSizeBytes is set.
+ maxSizePercent: 0
resources: {}
# limits:
# cpu: 100m
@@ -2149,6 +2163,10 @@ dagProcessor:
retentionDays: 15
# frequency to attempt to groom logs, in minutes
frequencyMinutes: 15
+ # Max size of logs in bytes. 0 = disabled
+ maxSizeBytes: 0
+ # Max size of logs as a percent of disk usage. 0 = disabled. Ignored if
maxSizeBytes is set.
+ maxSizePercent: 0
resources: {}
# limits:
# cpu: 100m
diff --git a/helm-tests/tests/chart_utils/log_groomer.py
b/helm-tests/tests/chart_utils/log_groomer.py
index 3fcd60983e9..339e65540a1 100644
--- a/helm-tests/tests/chart_utils/log_groomer.py
+++ b/helm-tests/tests/chart_utils/log_groomer.py
@@ -219,6 +219,76 @@ class LogGroomerTestBase:
else:
assert len(jmespath.search("spec.template.spec.containers[1].env",
docs[0])) == 2
+ @pytest.mark.parametrize(
+ ("max_size_bytes", "max_size_result"), [(None, None), (1234567890,
"1234567890")]
+ )
+ def test_log_groomer_max_size_bytes_overrides(self, max_size_bytes,
max_size_result):
+ if self.obj_name == "dag-processor":
+ values = {
+ "dagProcessor": {
+ "enabled": True,
+ "logGroomerSidecar": {"maxSizeBytes": max_size_bytes},
+ }
+ }
+ else:
+ values = {f"{self.folder}": {"logGroomerSidecar": {"maxSizeBytes":
max_size_bytes}}}
+
+ docs = render_chart(
+ values=values,
+
show_only=[f"templates/{self.folder}/{self.obj_name}-deployment.yaml"],
+ )
+
+ if max_size_result:
+ assert (
+ jmespath.search(
+
"spec.template.spec.containers[1].env[?name=='AIRFLOW__LOG_MAX_SIZE_BYTES'].value
| [0]",
+ docs[0],
+ )
+ == max_size_result
+ )
+ else:
+ assert (
+ jmespath.search(
+
"spec.template.spec.containers[1].env[?name=='AIRFLOW__LOG_MAX_SIZE_BYTES'].value
| [0]",
+ docs[0],
+ )
+ is None
+ )
+
+ @pytest.mark.parametrize(("max_size_percent", "max_size_result"), [(None,
None), (80, "80")])
+ def test_log_groomer_max_size_percent_overrides(self, max_size_percent,
max_size_result):
+ if self.obj_name == "dag-processor":
+ values = {
+ "dagProcessor": {
+ "enabled": True,
+ "logGroomerSidecar": {"maxSizePercent": max_size_percent},
+ }
+ }
+ else:
+ values = {f"{self.folder}": {"logGroomerSidecar":
{"maxSizePercent": max_size_percent}}}
+
+ docs = render_chart(
+ values=values,
+
show_only=[f"templates/{self.folder}/{self.obj_name}-deployment.yaml"],
+ )
+
+ if max_size_result:
+ assert (
+ jmespath.search(
+
"spec.template.spec.containers[1].env[?name=='AIRFLOW__LOG_MAX_SIZE_PERCENT'].value
| [0]",
+ docs[0],
+ )
+ == max_size_result
+ )
+ else:
+ assert (
+ jmespath.search(
+
"spec.template.spec.containers[1].env[?name=='AIRFLOW__LOG_MAX_SIZE_PERCENT'].value
| [0]",
+ docs[0],
+ )
+ is None
+ )
+
def test_log_groomer_resources(self):
if self.obj_name == "dag-processor":
values = {
diff --git a/scripts/docker/clean-logs.sh b/scripts/docker/clean-logs.sh
index 063b0a985b6..15370362695 100755
--- a/scripts/docker/clean-logs.sh
+++ b/scripts/docker/clean-logs.sh
@@ -22,22 +22,50 @@ set -euo pipefail
readonly DIRECTORY="${AIRFLOW_HOME:-/usr/local/airflow}"
readonly RETENTION="${AIRFLOW__LOG_RETENTION_DAYS:-15}"
readonly FREQUENCY="${AIRFLOW__LOG_CLEANUP_FREQUENCY_MINUTES:-15}"
+readonly MAX_PERCENT="${AIRFLOW__LOG_MAX_SIZE_PERCENT:-0}"
trap "exit" INT TERM
+MAX_SIZE_BYTES="${AIRFLOW__LOG_MAX_SIZE_BYTES:-0}"
+if [[ "$MAX_SIZE_BYTES" -eq 0 && "$MAX_PERCENT" -gt 0 ]]; then
+ total_space=$(df -k "${DIRECTORY}"/logs 2>/dev/null | tail -1 | awk '{print
$2}' || echo "0")
+ MAX_SIZE_BYTES=$(( total_space * 1024 * MAX_PERCENT / 100 ))
+ echo "Computed MAX_SIZE_BYTES from ${MAX_PERCENT}% of disk:
${MAX_SIZE_BYTES} bytes"
+fi
+
+readonly MAX_SIZE_BYTES
+
readonly EVERY=$((FREQUENCY*60))
echo "Cleaning logs every $EVERY seconds"
+if [[ "$MAX_SIZE_BYTES" -gt 0 ]]; then
+ echo "Max log size limit: $MAX_SIZE_BYTES bytes"
+fi
+
+retention_days="${RETENTION}"
while true; do
- echo "Trimming airflow logs to ${RETENTION} days."
+ echo "Trimming airflow logs to ${retention_days} days."
find "${DIRECTORY}"/logs \
-type d -name 'lost+found' -prune -o \
- -type f -mtime +"${RETENTION}" -name '*.log' -print0 | \
+ -type f -mtime +"${retention_days}" -name '*.log' -print0 | \
xargs -0 rm -f || true
+ if [[ "$MAX_SIZE_BYTES" -gt 0 && "$retention_days" -ge 0 ]]; then
+ current_size=$(df -k "${DIRECTORY}"/logs 2>/dev/null | tail -1 | awk
'{print $3}' || echo "0")
+ current_size=$(( current_size * 1024 ))
+
+ if [[ "$current_size" -gt "$MAX_SIZE_BYTES" ]]; then
+ retention_days=$((retention_days - 1))
+ echo "Size ($current_size bytes) exceeds limit ($MAX_SIZE_BYTES bytes).
Reducing retention to ${retention_days} days."
+ continue
+ fi
+ fi
+
find "${DIRECTORY}"/logs -type d -empty -delete || true
+ retention_days="${RETENTION}"
+
seconds=$(( $(date -u +%s) % EVERY))
(( seconds < 1 )) || sleep $((EVERY - seconds - 1))
sleep 1