This is an automated email from the ASF dual-hosted git repository. henrik pushed a commit to branch verify-script in repository https://gitbox.apache.org/repos/asf/otava.git
commit 641028cabd51c95de437121b500adb62516f99c7 Author: Alex Sorokoumov <[email protected]> AuthorDate: Wed Nov 12 23:18:37 2025 -0800 Fix multiple issues discovered via running examples (#95) * Fix docker run commands in examples * Bump year in the postgres example data By default, Otava looks 1 year back. Example data is too old. * Expand all env variables and fix PostgreSQL example This commit fixes behavior removed in #86 - expand all env variables, not just ones matching CLI arguments. There are users relying on this behavior so we should not break it without longer discussion and a major release. --- docs/GRAFANA.md | 2 +- docs/GRAPHITE.md | 2 +- docs/POSTGRESQL.md | 2 +- examples/postgresql/init-db/schema.sql | 12 +- otava/config.py | 26 +++- pyproject.toml | 1 + tests/config_test.py | 232 ++++++++++++++++++++++++++++++++- util/verify-asf-release | 106 +++++++++++++++ uv.lock | 13 +- 9 files changed, 383 insertions(+), 13 deletions(-) diff --git a/docs/GRAFANA.md b/docs/GRAFANA.md index 13e8210..922c0ad 100644 --- a/docs/GRAFANA.md +++ b/docs/GRAFANA.md @@ -61,7 +61,7 @@ docker-compose -f examples/graphite/docker-compose.yaml up --force-recreate --al Run otava in another tab: ```bash -docker-compose -f examples/graphite/docker-compose.yaml run otava otava analyze my-product.test --since=-10m --update-grafana +docker-compose -f examples/graphite/docker-compose.yaml run otava analyze my-product.test --since=-10m --update-grafana ``` Expected output: diff --git a/docs/GRAPHITE.md b/docs/GRAPHITE.md index d7987b9..54d0e83 100644 --- a/docs/GRAPHITE.md +++ b/docs/GRAPHITE.md @@ -105,7 +105,7 @@ docker-compose -f examples/graphite/docker-compose.yaml up --force-recreate --al Run otava in another tab: ```bash -docker-compose -f examples/graphite/docker-compose.yaml run otava otava analyze my-product.test --since=-10m +docker-compose -f examples/graphite/docker-compose.yaml run otava analyze my-product.test --since=-10m ``` Expected output: diff --git a/docs/POSTGRESQL.md b/docs/POSTGRESQL.md index 115483a..7ecda0b 100644 --- a/docs/POSTGRESQL.md +++ b/docs/POSTGRESQL.md @@ -99,7 +99,7 @@ docker-compose -f examples/postgresql/docker-compose.yaml up --force-recreate -- Run Otava in the other tab to show results for a single test `aggregate_mem` and update the database with newly found change points: ```bash -docker-compose -f examples/postgresql/docker-compose.yaml run --build otava bin/otava analyze aggregate_mem --update-postgres +docker-compose -f examples/postgresql/docker-compose.yaml run --build otava analyze aggregate_mem --update-postgres ``` Expected output: diff --git a/examples/postgresql/init-db/schema.sql b/examples/postgresql/init-db/schema.sql index 632cf3f..1092331 100644 --- a/examples/postgresql/init-db/schema.sql +++ b/examples/postgresql/init-db/schema.sql @@ -75,12 +75,12 @@ INSERT INTO configs (id, benchmark, store, instance_type, cache) VALUES INSERT INTO experiments (id, ts, branch, commit, commit_ts, username, details_url) VALUES - ('aggregate-36e5ccd2', '2024-03-14 12:03:02+00', 'trunk', '36e5ccd2', '2024-03-13 10:03:02+00', 'ci', 'https://example.com/experiments/aggregate-36e5ccd2'), - ('aggregate-d5460f38', '2024-03-27 12:03:02+00', 'trunk', 'd5460f38', '2024-03-25 10:03:02+00', 'ci', 'https://example.com/experiments/aggregate-d5460f38'), - ('aggregate-bc9425cb', '2024-04-01 12:03:02+00', 'trunk', 'bc9425cb', '2024-04-02 10:03:02+00', 'ci', 'https://example.com/experiments/aggregate-bc9425cb'), - ('aggregate-14df1b11', '2024-04-07 12:03:02+00', 'trunk', '14df1b11', '2024-04-06 10:03:02+00', 'ci', 'https://example.com/experiments/aggregate-14df1b11'), - ('aggregate-ac40c0d8', '2024-04-14 12:03:02+00', 'trunk', 'ac40c0d8', '2024-04-13 10:03:02+00', 'ci', 'https://example.com/experiments/aggregate-ac40c0d8'), - ('aggregate-0af4ccbc', '2024-04-28 12:03:02+00', 'trunk', '0af4ccbc', '2024-04-27 10:03:02+00', 'ci', 'https://example.com/experiments/aggregate-0af4ccbc'); + ('aggregate-36e5ccd2', '2025-03-14 12:03:02+00', 'trunk', '36e5ccd2', '2025-03-13 10:03:02+00', 'ci', 'https://example.com/experiments/aggregate-36e5ccd2'), + ('aggregate-d5460f38', '2025-03-27 12:03:02+00', 'trunk', 'd5460f38', '2025-03-25 10:03:02+00', 'ci', 'https://example.com/experiments/aggregate-d5460f38'), + ('aggregate-bc9425cb', '2025-04-01 12:03:02+00', 'trunk', 'bc9425cb', '2025-04-02 10:03:02+00', 'ci', 'https://example.com/experiments/aggregate-bc9425cb'), + ('aggregate-14df1b11', '2025-04-07 12:03:02+00', 'trunk', '14df1b11', '2025-04-06 10:03:02+00', 'ci', 'https://example.com/experiments/aggregate-14df1b11'), + ('aggregate-ac40c0d8', '2025-04-14 12:03:02+00', 'trunk', 'ac40c0d8', '2025-04-13 10:03:02+00', 'ci', 'https://example.com/experiments/aggregate-ac40c0d8'), + ('aggregate-0af4ccbc', '2025-04-28 12:03:02+00', 'trunk', '0af4ccbc', '2025-04-27 10:03:02+00', 'ci', 'https://example.com/experiments/aggregate-0af4ccbc'); INSERT INTO results (experiment_id, config_id, process_cumulative_rate_mean, process_cumulative_rate_stderr, process_cumulative_rate_diff) diff --git a/otava/config.py b/otava/config.py index 345dede..ca1d587 100644 --- a/otava/config.py +++ b/otava/config.py @@ -20,6 +20,7 @@ from pathlib import Path from typing import Dict, List, Optional import configargparse +from expandvars import expandvars from ruamel.yaml import YAML from otava.bigquery import BigQueryConfig @@ -60,7 +61,7 @@ def load_tests(config: Dict, templates: Dict) -> Dict[str, TestConfig]: raise ConfigError("Property `tests` is not a dictionary") result = {} - for (test_name, test_config) in tests.items(): + for test_name, test_config in tests.items(): template_names = test_config.get("inherit", []) if not isinstance(template_names, List): template_names = [templates] @@ -80,7 +81,7 @@ def load_test_groups(config: Dict, tests: Dict[str, TestConfig]) -> Dict[str, Li raise ConfigError("Property `test_groups` is not a dictionary") result = {} - for (group_name, test_names) in groups.items(): + for group_name, test_names in groups.items(): test_list = [] if not isinstance(test_names, List): raise ConfigError(f"Test group {group_name} must be a list") @@ -95,12 +96,33 @@ def load_test_groups(config: Dict, tests: Dict[str, TestConfig]) -> Dict[str, Li return result +def expand_env_vars_recursive(obj): + """Recursively expand environment variables in all string values within a nested structure. + + Raises ConfigError if any environment variables remain undefined after expansion. + """ + if isinstance(obj, dict): + return {key: expand_env_vars_recursive(value) for key, value in obj.items()} + elif isinstance(obj, list): + return [expand_env_vars_recursive(item) for item in obj] + elif isinstance(obj, str): + return expandvars(obj, nounset=True) + else: + return obj + + def load_config_from_parser_args(args: configargparse.Namespace) -> Config: config_file = getattr(args, "config_file", None) if config_file is not None: yaml = YAML(typ="safe") config = yaml.load(Path(config_file).read_text()) + # Expand environment variables in the entire config after CLI argument replacement + try: + config = expand_env_vars_recursive(config) + except Exception as e: + raise ConfigError(f"Error expanding environment variables: {e}") + templates = load_templates(config) tests = load_tests(config, templates) groups = load_test_groups(config, tests) diff --git a/pyproject.toml b/pyproject.toml index 24e3b36..6bb1409 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -48,6 +48,7 @@ dependencies = [ "google-cloud-bigquery>=3.25.0", "pg8000>=1.31.2", "configargparse>=1.7.1", + "expandvars>=0.12.0", ] [project.optional-dependencies] diff --git a/tests/config_test.py b/tests/config_test.py index 5ce500e..4f66b95 100644 --- a/tests/config_test.py +++ b/tests/config_test.py @@ -15,11 +15,19 @@ # specific language governing permissions and limitations # under the License. import os +import tempfile from io import StringIO import pytest +from expandvars import UnboundVariable -from otava.config import NestedYAMLConfigFileParser, load_config_from_file +from otava.config import ( + NestedYAMLConfigFileParser, + create_config_parser, + expand_env_vars_recursive, + load_config_from_file, + load_config_from_parser_args, +) from otava.test_config import CsvTestConfig, GraphiteTestConfig, HistoStatTestConfig @@ -209,3 +217,225 @@ templates: for key in result.keys(): section = key.split('-')[0] assert section not in ignored_sections, f"Found key '{key}' from ignored section '{section}'" + + +def test_expand_env_vars_recursive(): + """Test the expand_env_vars_recursive function with various data types.""" + + # Set up test environment variables + test_env_vars = { + "TEST_HOST": "localhost", + "TEST_PORT": "8080", + "TEST_DB": "testdb", + "TEST_USER": "testuser", + } + + for key, value in test_env_vars.items(): + os.environ[key] = value + + try: + # Test simple string expansion + simple_string = "${TEST_HOST}:${TEST_PORT}" + result = expand_env_vars_recursive(simple_string) + assert result == "localhost:8080" + + # Test dictionary expansion + test_dict = { + "host": "${TEST_HOST}", + "port": "${TEST_PORT}", + "database": "${TEST_DB}", + "connection_string": "postgresql://${TEST_USER}@${TEST_HOST}:${TEST_PORT}/${TEST_DB}", + "timeout": 30, # non-string should remain unchanged + "enabled": True, # non-string should remain unchanged + } + + result_dict = expand_env_vars_recursive(test_dict) + expected_dict = { + "host": "localhost", + "port": "8080", + "database": "testdb", + "connection_string": "postgresql://testuser@localhost:8080/testdb", + "timeout": 30, + "enabled": True, + } + assert result_dict == expected_dict + + # Test list expansion + test_list = [ + "${TEST_HOST}", + {"nested_host": "${TEST_HOST}", "nested_port": "${TEST_PORT}"}, + ["${TEST_USER}", "${TEST_DB}"], + 123, # non-string should remain unchanged + ] + + result_list = expand_env_vars_recursive(test_list) + expected_list = [ + "localhost", + {"nested_host": "localhost", "nested_port": "8080"}, + ["testuser", "testdb"], + 123, + ] + assert result_list == expected_list + + # Test undefined variables (should throw UnboundVariable) + with pytest.raises(UnboundVariable, match="'UNDEFINED_VAR: unbound variable"): + expand_env_vars_recursive("${UNDEFINED_VAR}") + + # Test mixed defined/undefined variables (should throw UnboundVariable) + with pytest.raises(UnboundVariable, match="'UNDEFINED_VAR: unbound variable"): + expand_env_vars_recursive("prefix-${TEST_HOST}-middle-${UNDEFINED_VAR}-suffix") + + finally: + # Clean up environment variables + for key in test_env_vars: + if key in os.environ: + del os.environ[key] + + +def test_env_var_expansion_in_templates_and_tests(): + """Test that environment variable expansion works in template and test sections.""" + + # Set up test environment variables + test_env_vars = { + "CSV_DELIMITER": "$", + "CSV_QUOTE_CHAR": "!", + "CSV_FILENAME": "/tmp/test.csv", + } + + for key, value in test_env_vars.items(): + os.environ[key] = value + + # Create a temporary config file with env var placeholders + config_content = """ +templates: + csv_template_1: + csv_options: + delimiter: "${CSV_DELIMITER}" + + csv_template_2: + csv_options: + quote_char: '${CSV_QUOTE_CHAR}' + +tests: + expansion_test: + type: csv + file: ${CSV_FILENAME} + time_column: timestamp + metrics: + response_time: + column: response_ms + unit: ms + inherit: [csv_template_1, csv_template_2] +""" + + try: + with tempfile.NamedTemporaryFile(mode="w", suffix=".yaml", delete=False) as f: + f.write(config_content) + config_file_path = f.name + + try: + # Load config and verify expansion worked + parser = create_config_parser() + args = parser.parse_args(["--config-file", config_file_path]) + config = load_config_from_parser_args(args) + + # Verify test was loaded + assert "expansion_test" in config.tests + test = config.tests["expansion_test"] + assert isinstance(test, CsvTestConfig) + + # Verify that expansion worked + assert test.file == test_env_vars["CSV_FILENAME"] + + # Verify that inheritance from templates worked with expanded values + assert test.csv_options.delimiter == test_env_vars["CSV_DELIMITER"] + assert test.csv_options.quote_char == test_env_vars["CSV_QUOTE_CHAR"] + + finally: + os.unlink(config_file_path) + + finally: + # Clean up environment variables + for key in test_env_vars: + if key in os.environ: + del os.environ[key] + + +def test_cli_precedence_over_env_vars(): + """Test that CLI arguments take precedence over environment variables.""" + + # Set up environment variables + env_vars = { + "POSTGRES_HOSTNAME": "env-host.com", + "POSTGRES_PORT": "5433", + "POSTGRES_DATABASE": "env_db", + "SLACK_BOT_TOKEN": "env-slack-token", + } + + for key, value in env_vars.items(): + os.environ[key] = value + + # Create a simple config file + config_content = """ +postgres: + hostname: config_host + port: 5432 + database: config_db + username: config_user + password: config_pass + +slack: + token: config_slack_token +""" + + try: + with tempfile.NamedTemporaryFile(mode="w", suffix=".yaml", delete=False) as f: + f.write(config_content) + config_file_path = f.name + + try: + # Test 1: Only environment variables (no CLI overrides) + config_env_only = load_config_from_file(config_file_path) + + # Environment variables should override config file values + assert config_env_only.postgres.hostname == "env-host.com" + assert config_env_only.postgres.port == 5433 + assert config_env_only.postgres.database == "env_db" + assert config_env_only.slack.bot_token == "env-slack-token" + + # Values without env vars should use config file values + assert config_env_only.postgres.username == "config_user" + assert config_env_only.postgres.password == "config_pass" + + # Test 2: CLI arguments should override environment variables + cli_overrides = [ + "--postgres-hostname", + "cli-host.com", + "--postgres-port", + "5434", + "--slack-token", + "cli-slack-token", + ] + + config_cli_override = load_config_from_file( + config_file_path, arg_overrides=cli_overrides + ) + + # CLI overrides should win + assert config_cli_override.postgres.hostname == "cli-host.com" + assert config_cli_override.postgres.port == 5434 + assert config_cli_override.slack.bot_token == "cli-slack-token" + + # Values without CLI override should still use env vars + assert config_cli_override.postgres.database == "env_db" + assert config_cli_override.postgres.username == "config_user" + assert config_cli_override.postgres.password == "config_pass" + + finally: + os.unlink(config_file_path) + + finally: + # Clean up environment variables + for key in env_vars: + if key in os.environ: + del os.environ[key] diff --git a/util/verify-asf-release b/util/verify-asf-release new file mode 100755 index 0000000..c2f7c38 --- /dev/null +++ b/util/verify-asf-release @@ -0,0 +1,106 @@ +#!/bin/bash + +# This is a script that contributors can use to verify release candidates before voting +# Note: ASF rules specifically require each contributor, ultimately committer, to personally, +# that is, manually, inspect a release candidate source tar file before voting on it. +# Scripting this "manual" process is allowed. Also sharing such scripts between contributors +# is allowed. If you use this script, consider adding your own checks or variations to diversify +# the positive impact on quality that this process is intended to have. +# Note that trying to fully automate these checks, for example by putting this script in CI, +# is not allowed. Releases must always be approved via voting, and voters must have personally +# validated the release. + + +set -e +set -x + + +PYTHON_VERSION=3.8 +PROJECT_NAME="apache-otava" +SHORT_NAME="otava" +PROJECT_VERSION="0.7.0-incubating-rc3" +DIST="dev/incubator" +#DIST="release/incubator" + +ASF_BASE_URL="https://dist.apache.org/repos/dist" +KEYS_URL="${ASF_BASE_URL}/release/incubator/KEYS" +BASE_URL="${ASF_BASE_URL}/${DIST}/${SHORT_NAME}/${PROJECT_VERSION}" + +TAR_BASE="${PROJECT_NAME}-${PROJECT_VERSION}" +TAR_FILE="${TAR_BASE}-src.tar.gz" +TAR_URL="${BASE_URL}/${TAR_FILE}" + + +# Just execute the tests, skip the download and unpack +workdir=$1 + + +if [[ ! -d ${workdir} ]] +then +###################################### Download and unpack #########################################3 + uname=asf-verifier-$RANDOM + uvenv=venv-$uname + workdir=/tmp/$uname + + set +x + + echo " " + + echo "Please execute:" + echo "" + echo " gpg --fetch-keys $KEYS_URL" + echo "" + echo "... manually." + echo "(For security reasons I won't.)" + echo "" + set -x + + + mkdir /tmp/$uname + cd /tmp/$uname + + curl -f "$TAR_URL" > "$TAR_FILE" + curl -f -o "$TAR_FILE".asc "$TAR_URL".asc + curl -f -o "$TAR_FILE".sha512 "$TAR_URL".sha512 + + gpg -v --verify "$TAR_FILE".asc "$TAR_FILE" + sha512sum --check "$TAR_FILE".sha512 + + tar xvf "$TAR_FILE" + #cd "$PROJECT_NAME" + ls -laiFhR +else + cd $workdir +fi +cd ${SHORT_NAME}-${PROJECT_VERSION} +cat README.md +cat LICENSE +cat DISCLAIMER +cat NOTICE + +echo +echo Try to actually execute something +echo + +set -x +uv venv --allow-existing +uv pip install . +uv run otava + +# Commented out lines from here on are because they don't work +# uv run otava -h +uv run otava analyze -h + +uv run otava list-groups # There are no groups yet +uv run otava --config-file examples/csv/config/otava.yaml analyze -h +uv run otava --config-file examples/csv/config/otava.yaml list-groups +uv run otava --config-file examples/csv/config/otava.yaml list-tests + +# This works, but it didn't analyze local.sample +# uv run otava --config-file examples/csv/config/otava.yaml analyze local.sample + +# This one fails and prints an "ERROR: ..." message, but doesn't return with nonzero exit +uv run otava --config-file examples/csv/config/otava.yaml analyze --tests-local.sample-file=examples/csv/data/local_sample.csv local.sample + + +docker compose -f examples/csv/docker-compose.yaml run --build otava analyze local.sample diff --git a/uv.lock b/uv.lock index a5beddf..f2b2833 100644 --- a/uv.lock +++ b/uv.lock @@ -10,11 +10,12 @@ resolution-markers = [ [[package]] name = "apache-otava" -version = "0.6.1" +version = "0.7.0" source = { editable = "." } dependencies = [ { name = "configargparse" }, { name = "dateparser" }, + { name = "expandvars" }, { name = "google-cloud-bigquery", version = "3.30.0", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.9'" }, { name = "google-cloud-bigquery", version = "3.35.1", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.9'" }, { name = "numpy" }, @@ -55,6 +56,7 @@ requires-dist = [ { name = "autoflake", marker = "extra == 'dev'", specifier = ">=1.4" }, { name = "configargparse", specifier = ">=1.7.1" }, { name = "dateparser", specifier = ">=1.0.0" }, + { name = "expandvars", specifier = ">=0.12.0" }, { name = "flake8", marker = "extra == 'dev'", specifier = ">=4.0.1" }, { name = "google-cloud-bigquery", specifier = ">=3.25.0" }, { name = "isort", marker = "extra == 'dev'", specifier = ">=5.10.1" }, @@ -261,6 +263,15 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/02/cc/b7e31358aac6ed1ef2bb790a9746ac2c69bcb3c8588b41616914eb106eaf/exceptiongroup-1.2.2-py3-none-any.whl", hash = "sha256:3111b9d131c238bec2f8f516e123e14ba243563fb135d3fe885990585aa7795b", size = 16453, upload-time = "2024-07-12T22:25:58.476Z" }, ] +[[package]] +name = "expandvars" +version = "1.1.2" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/9c/64/a9d8ea289d663a44b346203a24bf798507463db1e76679eaa72ee6de1c7a/expandvars-1.1.2.tar.gz", hash = "sha256:6c5822b7b756a99a356b915dd1267f52ab8a4efaa135963bd7f4bd5d368f71d7", size = 70842, upload-time = "2025-09-12T10:55:20.929Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/7f/e6/79c43f7a55264e479a9fbf21ddba6a73530b3ea8439a8bb7fa5a281721af/expandvars-1.1.2-py3-none-any.whl", hash = "sha256:d1652fe4e61914f5b88ada93aaedb396446f55ae4621de45c8cb9f66e5712526", size = 7526, upload-time = "2025-09-12T10:55:18.779Z" }, +] + [[package]] name = "filelock" version = "3.16.1"
