This is an automated email from the ASF dual-hosted git repository. asorokoumov pushed a commit to branch feat/118 in repository https://gitbox.apache.org/repos/asf/otava.git
commit d4647bc3bd9946d0225af8398bdf7782beb99431 Author: Alex Sorokoumov <[email protected]> AuthorDate: Mon Jan 26 19:08:50 2026 -0800 Remove `regressions` command --- otava/main.py | 108 +---------------------------- otava/series.py | 41 ----------- tests/cli_help_test.py | 169 +-------------------------------------------- tests/csv_e2e_test.py | 84 ---------------------- tests/postgres_e2e_test.py | 88 ----------------------- tests/series_test.py | 59 +--------------- 6 files changed, 6 insertions(+), 543 deletions(-) diff --git a/otava/main.py b/otava/main.py index 09e7608..046e43a 100644 --- a/otava/main.py +++ b/otava/main.py @@ -15,11 +15,9 @@ # specific language governing permissions and limitations # under the License. -import copy import logging -import sys from dataclasses import dataclass -from datetime import datetime, timedelta +from datetime import datetime from typing import Dict, List, Optional import configargparse as argparse @@ -36,7 +34,7 @@ from otava.graphite import GraphiteError from otava.importer import DataImportError, Importers from otava.postgres import Postgres, PostgresError from otava.report import Report, ReportType -from otava.series import AnalysisOptions, AnalyzedSeries, compare +from otava.series import AnalysisOptions, AnalyzedSeries from otava.slack import NotificationError, SlackNotifier from otava.test_config import ( BigQueryTestConfig, @@ -256,71 +254,6 @@ class Otava: attributes = series.attributes_at(cp.index) bigquery.insert_change_point(test, metric_name, attributes, cp) - def regressions( - self, test: TestConfig, selector: DataSelector, options: AnalysisOptions - ) -> bool: - importer = self.__importers.get(test) - - # Even if user is interested only in performance difference since some point X, - # we really need to fetch some earlier points than X. - # Otherwise, if performance went down very early after X, e.g. at X + 1, we'd have - # insufficient number of data points to compute the baseline performance. - # Instead of using `since-` selector, we're fetching everything from the - # beginning and then we find the baseline performance around the time pointed by - # the original selector. - since_version = selector.since_version - since_commit = selector.since_commit - since_time = selector.since_time - baseline_selector = copy.deepcopy(selector) - baseline_selector.last_n_points = sys.maxsize - baseline_selector.branch = None - baseline_selector.since_version = None - baseline_selector.since_commit = None - baseline_selector.since_time = since_time - timedelta(days=30) - baseline_series = importer.fetch_data(test, baseline_selector) - - if since_version: - baseline_index = baseline_series.find_by_attribute("version", since_version) - if not baseline_index: - raise OtavaError(f"No runs of test {test.name} with version {since_version}") - baseline_index = max(baseline_index) - elif since_commit: - baseline_index = baseline_series.find_by_attribute("commit", since_commit) - if not baseline_index: - raise OtavaError(f"No runs of test {test.name} with commit {since_commit}") - baseline_index = max(baseline_index) - else: - baseline_index = baseline_series.find_first_not_earlier_than(since_time) - - baseline_series = baseline_series.analyze(options=options) - - if selector.branch: - target_series = importer.fetch_data(test, selector).analyze(options=options) - else: - target_series = baseline_series - - cmp = compare(baseline_series, baseline_index, target_series, target_series.len()) - regressions = [] - for metric_name, stats in cmp.stats.items(): - direction = baseline_series.metric(metric_name).direction - m1 = stats.mean_1 - m2 = stats.mean_2 - change_percent = stats.forward_rel_change() * 100.0 - if m2 * direction < m1 * direction and stats.pvalue < options.max_pvalue: - regressions.append( - " {:16}: {:#8.3g} --> {:#8.3g} ({:+6.1f}%)".format( - metric_name, m1, m2, change_percent - ) - ) - - if regressions: - print(f"{test.name}:") - for r in regressions: - print(r) - else: - print(f"{test.name}: OK") - return len(regressions) > 0 - def __maybe_create_slack_notifier(self): if not self.__conf.slack: return None @@ -587,17 +520,6 @@ def create_otava_cli_parser() -> argparse.ArgumentParser: setup_data_selector_parser(analyze_parser) setup_analysis_options_parser(analyze_parser) - regressions_parser = subparsers.add_parser( - "regressions", - help="find performance regressions", - ) - regressions_parser.add_argument( - "tests", help="name of the test or group of the tests", nargs="+" - ) - config.add_service_option_groups(regressions_parser) - setup_data_selector_parser(regressions_parser) - setup_analysis_options_parser(regressions_parser) - remove_annotations_parser = subparsers.add_parser( "remove-annotations", ) @@ -687,32 +609,6 @@ def script_main(conf: Config = None, args: List[str] = None): since=slack_cph_since, ) - if args.command == "regressions": - data_selector = data_selector_from_args(args) - options = analysis_options_from_args(args) - tests = otava.get_tests(*args.tests) - regressing_test_count = 0 - errors = 0 - for test in tests: - try: - regressions = otava.regressions(test, selector=data_selector, options=options) - if regressions: - regressing_test_count += 1 - except OtavaError as err: - logging.error(err.message) - errors += 1 - except DataImportError as err: - logging.error(err.message) - errors += 1 - if regressing_test_count == 0: - print("No regressions found!") - elif regressing_test_count == 1: - print("Regressions in 1 test found") - else: - print(f"Regressions in {regressing_test_count} tests found") - if errors > 0: - print("Some tests were skipped due to import / analyze errors. Consult error log.") - if args.command == "remove-annotations": if args.tests: tests = otava.get_tests(*args.tests) diff --git a/otava/series.py b/otava/series.py index 5114eab..bb3bf47 100644 --- a/otava/series.py +++ b/otava/series.py @@ -21,10 +21,7 @@ from datetime import datetime, timezone from itertools import groupby from typing import Any, Dict, Iterable, List, Optional -import numpy as np - from otava.analysis import ( - TTestSignificanceTester, TTestStats, compute_change_points, compute_change_points_orig, @@ -528,41 +525,3 @@ class AnalyzedSeries: analyzed_series.change_points_by_time = AnalyzedSeries.__group_change_points_by_time(analyzed_series.__series, analyzed_series.change_points) return analyzed_series - - -@dataclass -class SeriesComparison: - series_1: AnalyzedSeries - series_2: AnalyzedSeries - index_1: int - index_2: int - stats: Dict[str, TTestStats] # keys: metric name - - -def compare( - series_1: AnalyzedSeries, - index_1: Optional[int], - series_2: AnalyzedSeries, - index_2: Optional[int], -) -> SeriesComparison: - - # if index not specified, we want to take the most recent performance - index_1 = index_1 if index_1 is not None else len(series_1.time()) - index_2 = index_2 if index_2 is not None else len(series_2.time()) - metrics = filter(lambda m: m in series_2.metric_names(), series_1.metric_names()) - - tester = TTestSignificanceTester(series_1.options.max_pvalue) - stats = {} - - for metric in metrics: - data_1 = series_1.data(metric) - (begin_1, end_1) = series_1.get_stable_range(metric, index_1) - data_1 = [x for x in data_1[begin_1:end_1] if x is not None] - - data_2 = series_2.data(metric) - (begin_2, end_2) = series_2.get_stable_range(metric, index_2) - data_2 = [x for x in data_2[begin_2:end_2] if x is not None] - - stats[metric] = tester.compare(np.array(data_1), np.array(data_2)) - - return SeriesComparison(series_1, series_2, index_1, index_2, stats) diff --git a/tests/cli_help_test.py b/tests/cli_help_test.py index a8a0255..1984062 100644 --- a/tests/cli_help_test.py +++ b/tests/cli_help_test.py @@ -56,7 +56,7 @@ usage: otava [-h] [--config-file CONFIG_FILE] [--graphite-url GRAPHITE_URL] [--postgres-username POSTGRES_USERNAME] [--postgres-password POSTGRES_PASSWORD] [--postgres-database POSTGRES_DATABASE] [--bigquery-project-id BIGQUERY_PROJECT_ID] [--bigquery-dataset BIGQUERY_DATASET] [--bigquery-credentials BIGQUERY_CREDENTIALS] - {list-tests,list-metrics,list-groups,analyze,regressions,remove-annotations,validate} ...""" + {list-tests,list-metrics,list-groups,analyze,remove-annotations,validate} ...""" else: usage_line = """\ usage: otava [-h] [--config-file CONFIG_FILE] [--graphite-url GRAPHITE_URL] @@ -66,7 +66,7 @@ usage: otava [-h] [--config-file CONFIG_FILE] [--graphite-url GRAPHITE_URL] [--postgres-username POSTGRES_USERNAME] [--postgres-password POSTGRES_PASSWORD] [--postgres-database POSTGRES_DATABASE] [--bigquery-project-id BIGQUERY_PROJECT_ID] [--bigquery-dataset BIGQUERY_DATASET] [--bigquery-credentials BIGQUERY_CREDENTIALS] - {list-tests,list-metrics,list-groups,analyze,regressions,remove-annotations,validate} + {list-tests,list-metrics,list-groups,analyze,remove-annotations,validate} ...""" assert ( @@ -76,12 +76,11 @@ usage: otava [-h] [--config-file CONFIG_FILE] [--graphite-url GRAPHITE_URL] Change Detection for Continuous Performance Engineering positional arguments: - {list-tests,list-metrics,list-groups,analyze,regressions,remove-annotations,validate} + {list-tests,list-metrics,list-groups,analyze,remove-annotations,validate} list-tests list available tests list-metrics list available metrics for a test list-groups list available groups of tests analyze analyze performance test results - regressions find performance regressions validate validates the tests and metrics defined in the configuration options: @@ -332,168 +331,6 @@ BigQuery Options: ) -def test_otava_regressions_help_output(): - result = run_help_command("regressions") - assert result.returncode == 0, ( - f"Expected exit code 0, got {result.returncode}. stderr:\n{result.stderr}" - ) - - # Python 3.13+ formats usage lines and option aliases differently - if IS_PYTHON_313_PLUS: - usage_and_options = """\ -usage: otava regressions [-h] [--graphite-url GRAPHITE_URL] [--grafana-url GRAFANA_URL] - [--grafana-user GRAFANA_USER] [--grafana-password GRAFANA_PASSWORD] - [--slack-token SLACK_TOKEN] [--postgres-hostname POSTGRES_HOSTNAME] - [--postgres-port POSTGRES_PORT] [--postgres-username POSTGRES_USERNAME] - [--postgres-password POSTGRES_PASSWORD] - [--postgres-database POSTGRES_DATABASE] - [--bigquery-project-id BIGQUERY_PROJECT_ID] - [--bigquery-dataset BIGQUERY_DATASET] - [--bigquery-credentials BIGQUERY_CREDENTIALS] [--branch [STRING]] - [--metrics LIST] [--attrs LIST] [--since-commit STRING | - --since-version STRING | --since DATE] [--until-commit STRING | - --until-version STRING | --until DATE] [--last COUNT] - [-P, --p-value PVALUE] [-M MAGNITUDE] [--window WINDOW] - [--orig-edivisive ORIG_EDIVISIVE] - tests [tests ...] - -positional arguments: - tests name of the test or group of the tests - -options: - -h, --help show this help message and exit - --branch [STRING] name of the branch - --metrics LIST a comma-separated list of metrics to analyze - --attrs LIST a comma-separated list of attribute names associated with the runs (e.g. - commit, branch, version); if not specified, it will be automatically - filled based on available information - --since-commit STRING - the commit at the start of the time span to analyze - --since-version STRING - the version at the start of the time span to analyze - --since DATE the start of the time span to analyze; accepts ISO, and human-readable - dates like '10 weeks ago' - --until-commit STRING - the commit at the end of the time span to analyze - --until-version STRING - the version at the end of the time span to analyze - --until DATE the end of the time span to analyze; same syntax as --since - --last COUNT the number of data points to take from the end of the series - -P, --p-value PVALUE maximum accepted P-value of a change-point; P denotes the probability that - the change-point has been found by a random coincidence, rather than a - real difference between the data distributions - -M, --magnitude MAGNITUDE""" - else: - usage_and_options = """\ -usage: otava regressions [-h] [--graphite-url GRAPHITE_URL] [--grafana-url GRAFANA_URL] - [--grafana-user GRAFANA_USER] [--grafana-password GRAFANA_PASSWORD] - [--slack-token SLACK_TOKEN] [--postgres-hostname POSTGRES_HOSTNAME] - [--postgres-port POSTGRES_PORT] [--postgres-username POSTGRES_USERNAME] - [--postgres-password POSTGRES_PASSWORD] - [--postgres-database POSTGRES_DATABASE] - [--bigquery-project-id BIGQUERY_PROJECT_ID] - [--bigquery-dataset BIGQUERY_DATASET] - [--bigquery-credentials BIGQUERY_CREDENTIALS] [--branch [STRING]] - [--metrics LIST] [--attrs LIST] - [--since-commit STRING | --since-version STRING | --since DATE] - [--until-commit STRING | --until-version STRING | --until DATE] - [--last COUNT] [-P, --p-value PVALUE] [-M MAGNITUDE] [--window WINDOW] - [--orig-edivisive ORIG_EDIVISIVE] - tests [tests ...] - -positional arguments: - tests name of the test or group of the tests - -options: - -h, --help show this help message and exit - --branch [STRING] name of the branch - --metrics LIST a comma-separated list of metrics to analyze - --attrs LIST a comma-separated list of attribute names associated with the runs (e.g. - commit, branch, version); if not specified, it will be automatically - filled based on available information - --since-commit STRING - the commit at the start of the time span to analyze - --since-version STRING - the version at the start of the time span to analyze - --since DATE the start of the time span to analyze; accepts ISO, and human-readable - dates like '10 weeks ago' - --until-commit STRING - the commit at the end of the time span to analyze - --until-version STRING - the version at the end of the time span to analyze - --until DATE the end of the time span to analyze; same syntax as --since - --last COUNT the number of data points to take from the end of the series - -P, --p-value PVALUE maximum accepted P-value of a change-point; P denotes the probability that - the change-point has been found by a random coincidence, rather than a - real difference between the data distributions - -M MAGNITUDE, --magnitude MAGNITUDE""" - - assert ( - result.stdout - == usage_and_options + """ - minimum accepted magnitude of a change-point computed as abs(new_mean / - old_mean - 1.0); use it to filter out stupidly small changes like < 0.01 - --window WINDOW the number of data points analyzed at once; the window size affects the - discriminative power of the change point detection algorithm; large - windows are less susceptible to noise; however, a very large window may - cause dismissing short regressions as noise so it is best to keep it short - enough to include not more than a few change points (optimally at most 1) - --orig-edivisive ORIG_EDIVISIVE - use the original edivisive algorithm with no windowing and weak change - points analysis improvements - -Graphite Options: - Options for Graphite configuration - - --graphite-url GRAPHITE_URL - Graphite server URL [env var: GRAPHITE_ADDRESS] - -Grafana Options: - Options for Grafana configuration - - --grafana-url GRAFANA_URL - Grafana server URL [env var: GRAFANA_ADDRESS] - --grafana-user GRAFANA_USER - Grafana server user [env var: GRAFANA_USER] - --grafana-password GRAFANA_PASSWORD - Grafana server password [env var: GRAFANA_PASSWORD] - -Slack Options: - Options for Slack configuration - - --slack-token SLACK_TOKEN - Slack bot token to use for sending notifications [env var: - SLACK_BOT_TOKEN] - -PostgreSQL Options: - Options for PostgreSQL configuration - - --postgres-hostname POSTGRES_HOSTNAME - PostgreSQL server hostname [env var: POSTGRES_HOSTNAME] - --postgres-port POSTGRES_PORT - PostgreSQL server port [env var: POSTGRES_PORT] - --postgres-username POSTGRES_USERNAME - PostgreSQL username [env var: POSTGRES_USERNAME] - --postgres-password POSTGRES_PASSWORD - PostgreSQL password [env var: POSTGRES_PASSWORD] - --postgres-database POSTGRES_DATABASE - PostgreSQL database name [env var: POSTGRES_DATABASE] - -BigQuery Options: - Options for BigQuery configuration - - --bigquery-project-id BIGQUERY_PROJECT_ID - BigQuery project ID [env var: BIGQUERY_PROJECT_ID] - --bigquery-dataset BIGQUERY_DATASET - BigQuery dataset [env var: BIGQUERY_DATASET] - --bigquery-credentials BIGQUERY_CREDENTIALS - BigQuery credentials file [env var: BIGQUERY_VAULT_SECRET] - - In general, command-line values override environment variables which override defaults. -""" - ) - - def test_otava_list_tests_help_output(): result = run_help_command("list-tests") assert result.returncode == 0, ( diff --git a/tests/csv_e2e_test.py b/tests/csv_e2e_test.py index 33d80f4..9baf494 100644 --- a/tests/csv_e2e_test.py +++ b/tests/csv_e2e_test.py @@ -123,87 +123,3 @@ def test_analyze_csv(): ) assert _remove_trailing_whitespaces(proc.stdout) == expected_output.rstrip("\n") - - -def test_regressions_csv(): - """ - End-to-end test for the CSV example from docs/CSV.md. - - Writes a temporary CSV and otava.yaml, runs: - uv run otava analyze local.sample - in the temporary directory, and compares stdout to the expected output. - """ - - now = datetime.now() - n = 10 - timestamps = [now - timedelta(days=i) for i in range(n)] - metrics1 = [154023, 138455, 143112, 149190, 132098, 151344, 155145, 148889, 149466, 148209] - metrics2 = [10.43, 10.23, 10.29, 10.91, 10.34, 10.69, 9.23, 9.11, 9.13, 9.03] - data_points = [] - for i in range(n): - data_points.append( - ( - timestamps[i].strftime("%Y.%m.%d %H:%M:%S %z"), # time - "aaa" + str(i), # commit - metrics1[i], - metrics2[i], - ) - ) - - config_content = textwrap.dedent( - """\ - tests: - local.sample: - type: csv - file: data/local_sample.csv - time_column: time - attributes: [commit] - metrics: [metric1, metric2] - csv_options: - delimiter: "," - quotechar: "'" - """ - ) - expected_output = textwrap.dedent( - """\ - local.sample: - metric2 : 10.5 --> 9.12 ( -12.9%) - Regressions in 1 test found - """ - ) - with tempfile.TemporaryDirectory() as td: - td_path = Path(td) - # create data directory and write CSV - data_dir = td_path / "data" - data_dir.mkdir(parents=True, exist_ok=True) - csv_path = data_dir / "local_sample.csv" - with open(csv_path, "w", newline="") as f: - writer = csv.writer(f) - writer.writerow(["time", "commit", "metric1", "metric2"]) - writer.writerows(data_points) - - # write otava.yaml in temp cwd - config_path = td_path / "otava.yaml" - config_path.write_text(config_content, encoding="utf-8") - - # run command - cmd = ["uv", "run", "otava", "regressions", "local.sample"] - proc = subprocess.run( - cmd, - cwd=str(td_path), - capture_output=True, - text=True, - timeout=120, - env=dict(os.environ, OTAVA_CONFIG=config_path), - ) - - if proc.returncode != 0: - pytest.fail( - "Command returned non-zero exit code.\n\n" - f"Command: {cmd!r}\n" - f"Exit code: {proc.returncode}\n\n" - f"Stdout:\n{proc.stdout}\n\n" - f"Stderr:\n{proc.stderr}\n" - ) - - assert _remove_trailing_whitespaces(proc.stdout) == expected_output.rstrip("\n") diff --git a/tests/postgres_e2e_test.py b/tests/postgres_e2e_test.py index e45fa39..be6a440 100644 --- a/tests/postgres_e2e_test.py +++ b/tests/postgres_e2e_test.py @@ -231,94 +231,6 @@ def test_analyze_and_update_postgres(): pytest.fail(f"DB p-value {p_value!r} not less than 0.01") -def test_regressions(): - """ - End-to-end test for the PostgreSQL regressions command. - - Starts the docker-compose stack from examples/postgresql/docker-compose.yaml, - waits for Postgres to be ready, runs the otava regressions command, - and compares stdout to the expected output. - """ - username = "exampleuser" - password = "examplepassword" - db = "benchmark_results" - with postgres_container(username, password, db) as (postgres_container_id, host_port): - # Run the Otava regressions command - proc = subprocess.run( - ["uv", "run", "otava", "regressions", "aggregate_mem"], - capture_output=True, - text=True, - timeout=600, - env=dict( - os.environ, - OTAVA_CONFIG=Path("examples/postgresql/config/otava.yaml"), - POSTGRES_HOSTNAME="localhost", - POSTGRES_PORT=host_port, - POSTGRES_USERNAME=username, - POSTGRES_PASSWORD=password, - POSTGRES_DATABASE=db, - BRANCH="trunk", - ), - ) - - if proc.returncode != 0: - pytest.fail( - "Command returned non-zero exit code.\n\n" - f"Command: {proc.args!r}\n" - f"Exit code: {proc.returncode}\n\n" - f"Stdout:\n{proc.stdout}\n\n" - f"Stderr:\n{proc.stderr}\n" - ) - - expected_output = textwrap.dedent( - """\ - aggregate_mem: - process_cumulative_rate_mean: 6.08e+04 --> 5.74e+04 ( -5.6%) - Regressions in 1 test found - """ - ) - assert proc.stdout == expected_output - - # Verify the DB was NOT updated since --update-postgres was not specified - query_proc = subprocess.run( - [ - "docker", - "exec", - postgres_container_id, - "psql", - "-U", - "exampleuser", - "-d", - "benchmark_results", - "-Atc", - """ - SELECT - process_cumulative_rate_mean_rel_forward_change, - process_cumulative_rate_mean_rel_backward_change, - process_cumulative_rate_mean_p_value - FROM results - WHERE experiment_id='aggregate-14df1b11' AND config_id=1; - """, - ], - capture_output=True, - text=True, - timeout=60, - ) - if query_proc.returncode != 0: - pytest.fail( - "Command returned non-zero exit code.\n\n" - f"Command: {query_proc.args!r}\n" - f"Exit code: {query_proc.returncode}\n\n" - f"Stdout:\n{query_proc.stdout}\n\n" - f"Stderr:\n{query_proc.stderr}\n" - ) - - # psql -Atc returns rows like: value|pvalue - forward_change, backward_change, p_value = query_proc.stdout.strip().split("|") - # --update-postgres was not specified, so no change point should be recorded - assert forward_change == backward_change == p_value == "" - - def _postgres_readiness_check_f( username: str, database: str ) -> Callable[[str, dict[int, int]], bool]: diff --git a/tests/series_test.py b/tests/series_test.py index e12ba01..94fbe54 100644 --- a/tests/series_test.py +++ b/tests/series_test.py @@ -20,7 +20,7 @@ from random import random import pytest -from otava.series import AnalysisOptions, Metric, Series, compare +from otava.series import AnalysisOptions, Metric, Series def test_change_point_detection(): @@ -137,63 +137,6 @@ def test_get_stable_range(): assert test.get_stable_range("series2", 3) == (0, 4) -def test_compare(): - series_1 = [1.02, 0.95, 0.99, 1.00, 1.04, 1.02, 0.50, 0.51, 0.48, 0.48, 0.53] - series_2 = [2.02, 2.03, 2.01, 2.04, 0.51, 0.49, 0.51, 0.49, 0.48, 0.52, 0.50] - time = list(range(len(series_1))) - test_1 = Series("test_1", None, time, {"data": Metric()}, {"data": series_1}, {}).analyze() - test_2 = Series("test_2", None, time, {"data": Metric()}, {"data": series_2}, {}).analyze() - - stats = compare(test_1, None, test_2, None).stats["data"] - assert stats.pvalue > 0.5 # tails are almost the same - assert 0.48 < stats.mean_1 < 0.52 - assert 0.48 < stats.mean_2 < 0.52 - - stats = compare(test_1, 0, test_2, 0).stats["data"] - assert stats.pvalue < 0.01 # beginnings are different - assert 0.98 < stats.mean_1 < 1.02 - assert 2.00 < stats.mean_2 < 2.03 - - stats = compare(test_1, 5, test_2, 10).stats["data"] - assert stats.pvalue < 0.01 - assert 0.98 < stats.mean_1 < 1.02 - assert 0.49 < stats.mean_2 < 0.51 - - -def test_compare_single_point(): - series_1 = [1.02, 0.95, 0.99, 1.00, 1.04, 1.02, 0.50, 0.51, 0.48, 0.48, 0.53] - series_2 = [0.51] - series_3 = [0.99] - - test_1 = Series( - "test_1", None, list(range(len(series_1))), {"data": Metric()}, {"data": series_1}, {} - ).analyze() - test_2 = Series("test_2", None, [1], {"data": Metric()}, {"data": series_2}, {}).analyze() - test_3 = Series("test_3", None, [1], {"data": Metric()}, {"data": series_3}, {}).analyze() - - stats = compare(test_1, None, test_2, None).stats["data"] - assert stats.pvalue > 0.5 - - stats = compare(test_1, 5, test_3, None).stats["data"] - assert stats.pvalue > 0.5 - - stats = compare(test_1, None, test_3, None).stats["data"] - assert stats.pvalue < 0.01 - - -def test_compare_metrics_order(): - test = Series( - "test", - branch=None, - time=list(range(3)), - metrics={"m1": Metric(), "m2": Metric(), "m3": Metric(), "m4": Metric(), "m5": Metric()}, - data={"m1": [0, 0, 0], "m2": [0, 0, 0], "m3": [0, 0, 0], "m4": [0, 0, 0], "m5": [0, 0, 0]}, - attributes={}, - ).analyze() - cmp = compare(test, None, test, None) - assert list(cmp.stats.keys()) == ["m1", "m2", "m3", "m4", "m5"] - - def test_incremental_otava(): series_1 = [1.02, 0.95, 0.99, 1.00, 1.12, 0.90, 0.50, 0.51, 0.48, 0.48, 0.55] series_2 = [2.02, 2.03, 2.01, 2.04, 1.82, 1.85, 1.79, 1.81, 1.80, 1.76, 1.78]
