This is an automated email from the ASF dual-hosted git repository. ash pushed a commit to branch v2-1-test in repository https://gitbox.apache.org/repos/asf/airflow.git
commit 4c37aeab97fb1f40a20b912402d2747cd5fc1d5a Author: Jarek Potiuk <[email protected]> AuthorDate: Wed Jun 16 11:29:45 2021 +0200 Switch to built-in data structures in SecretsMasker (#16424) Using Iterable in SecretsMasker might cause undesireable side effect in case the object passed as log parameter is an iterable object and actually iterating it is not idempotent. For example in case of botocore, it passes StreamingBody object to log and this object is Iterable. However it can be iterated only once. Masking causes the object to be iterated during logging and results in empty body when actual results are retrieved later. This change only iterates list type of objects and recurrently redacts only dicts/strs/tuples/sets/lists which should never produce any side effects as all those objects do not have side effects when they are accessed. Fixes: #16148 (cherry picked from commit d1d02b62e3436dedfe9a2b80cd1e61954639ca4d) --- airflow/utils/log/secrets_masker.py | 8 +++----- tests/utils/log/test_secrets_masker.py | 16 ---------------- 2 files changed, 3 insertions(+), 21 deletions(-) diff --git a/airflow/utils/log/secrets_masker.py b/airflow/utils/log/secrets_masker.py index 3177c58..2fd0d0a 100644 --- a/airflow/utils/log/secrets_masker.py +++ b/airflow/utils/log/secrets_masker.py @@ -16,7 +16,6 @@ # under the License. """Mask sensitive information from logs""" import collections -import io import logging import re from typing import TYPE_CHECKING, Iterable, Optional, Set, TypeVar, Union @@ -178,7 +177,7 @@ class SecretsMasker(logging.Filter): elif isinstance(item, (tuple, set)): # Turn set in to tuple! return tuple(self._redact_all(subval) for subval in item) - elif isinstance(item, Iterable): + elif isinstance(item, list): return list(self._redact_all(subval) for subval in item) else: return item @@ -209,12 +208,11 @@ class SecretsMasker(logging.Filter): elif isinstance(item, (tuple, set)): # Turn set in to tuple! return tuple(self.redact(subval) for subval in item) - elif isinstance(item, io.IOBase): - return item - elif isinstance(item, Iterable): + elif isinstance(item, list): return list(self.redact(subval) for subval in item) else: return item + # I think this should never happen, but it does not hurt to leave it just in case except Exception as e: # pylint: disable=broad-except log.warning( "Unable to redact %r, please report this via <https://github.com/apache/airflow/issues>. " diff --git a/tests/utils/log/test_secrets_masker.py b/tests/utils/log/test_secrets_masker.py index 24e86c1..5d3b404 100644 --- a/tests/utils/log/test_secrets_masker.py +++ b/tests/utils/log/test_secrets_masker.py @@ -72,22 +72,6 @@ class TestSecretsMasker: assert caplog.text == "INFO Cannot connect to user:***\n" - def test_non_redactable(self, logger, caplog): - class NonReactable: - def __iter__(self): - raise RuntimeError("force fail") - - def __repr__(self): - return "<NonRedactable>" - - logger.info("Logging %s", NonReactable()) - - assert caplog.messages == [ - "Unable to redact <NonRedactable>, please report this via " - + "<https://github.com/apache/airflow/issues>. Error was: RuntimeError: force fail", - "Logging <NonRedactable>", - ] - def test_extra(self, logger, caplog): logger.handlers[0].formatter = ShortExcFormatter("%(levelname)s %(message)s %(conn)s") logger.info("Cannot connect", extra={'conn': "user:password"})
