This is an automated email from the ASF dual-hosted git repository.
uranusjr pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/airflow.git
The following commit(s) were added to refs/heads/main by this push:
new 339fd06026 Better table name validation for db clean (#28246)
339fd06026 is described below
commit 339fd060266d87c7dd650f7ec2e1256fffa6bb67
Author: Jed Cunningham <[email protected]>
AuthorDate: Wed Dec 21 23:49:08 2022 -0600
Better table name validation for db clean (#28246)
Co-authored-by: Tzu-ping Chung <[email protected]>
Co-authored-by: Kaxil Naik <[email protected]>
---
airflow/utils/db_cleanup.py | 14 +++++++++++---
tests/utils/test_db_cleanup.py | 14 ++++++++++++++
2 files changed, 25 insertions(+), 3 deletions(-)
diff --git a/airflow/utils/db_cleanup.py b/airflow/utils/db_cleanup.py
index 51bfbb7963..ac96a6abdb 100644
--- a/airflow/utils/db_cleanup.py
+++ b/airflow/utils/db_cleanup.py
@@ -336,8 +336,16 @@ def run_cleanup(
:param session: Session representing connection to the metadata database.
"""
clean_before_timestamp = timezone.coerce_datetime(clean_before_timestamp)
- effective_table_names = table_names if table_names else
list(config_dict.keys())
- effective_config_dict = {k: v for k, v in config_dict.items() if k in
effective_table_names}
+ desired_table_names = set(table_names or config_dict)
+ effective_config_dict = {k: v for k, v in config_dict.items() if k in
desired_table_names}
+ effective_table_names = set(effective_config_dict)
+ if desired_table_names != effective_table_names:
+ outliers = desired_table_names - effective_table_names
+ logger.warning(
+ "The following table(s) are not valid choices and will be skipped:
%s", sorted(outliers)
+ )
+ if not effective_table_names:
+ raise SystemExit("No tables selected for db cleanup. Please choose
valid table names.")
if dry_run:
print("Performing dry run for db cleanup.")
print(
@@ -346,7 +354,7 @@ def run_cleanup(
)
_print_config(configs=effective_config_dict)
if not dry_run and confirm:
- _confirm_delete(date=clean_before_timestamp,
tables=list(effective_config_dict.keys()))
+ _confirm_delete(date=clean_before_timestamp,
tables=sorted(effective_table_names))
existing_tables = reflect_tables(tables=None, session=session).tables
for table_name, table_config in effective_config_dict.items():
if table_name not in existing_tables:
diff --git a/tests/utils/test_db_cleanup.py b/tests/utils/test_db_cleanup.py
index b00fde82a8..9775d9867c 100644
--- a/tests/utils/test_db_cleanup.py
+++ b/tests/utils/test_db_cleanup.py
@@ -121,6 +121,20 @@ class TestDBCleanup:
run_cleanup(**base_kwargs, table_names=table_names)
assert clean_table_mock.call_count == len(table_names) if table_names
else len(config_dict)
+ @patch("airflow.utils.db_cleanup._cleanup_table")
+ @patch("airflow.utils.db_cleanup._confirm_delete")
+ def test_validate_tables_all_invalid(self, confirm_delete_mock,
clean_table_mock):
+ """If only invalid tables are provided, don't try cleaning anything"""
+ base_kwargs = dict(
+ clean_before_timestamp=None,
+ dry_run=None,
+ verbose=None,
+ )
+ with pytest.raises(SystemExit) as execinfo:
+ run_cleanup(**base_kwargs, table_names=["all", "fake"])
+ assert "No tables selected for db cleanup" in str(execinfo.value)
+ confirm_delete_mock.assert_not_called()
+
@pytest.mark.parametrize(
"dry_run",
[None, True, False],