This is an automated email from the ASF dual-hosted git repository. tvalentyn pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/beam.git
The following commit(s) were added to refs/heads/master by this push: new 6f82f34eeb1 Onboard Performance tests using the Change point analysis tool. (#25754) 6f82f34eeb1 is described below commit 6f82f34eeb14097810f3e8faee10221ac1c54aa3 Author: Anand Inguva <34158215+ananding...@users.noreply.github.com> AuthorDate: Fri Mar 17 14:49:07 2023 -0400 Onboard Performance tests using the Change point analysis tool. (#25754) --- .github/workflows/run_perf_alert_tool.yml | 24 +++++---- .../python/apache_beam/testing/analyzers/README.md | 9 ++-- .../apache_beam/testing/analyzers/constants.py | 2 +- .../testing/analyzers/github_issues_utils.py | 3 +- .../apache_beam/testing/analyzers/perf_analysis.py | 17 ++++--- .../testing/analyzers/perf_analysis_utils.py | 20 ++++---- .../testing/analyzers/tests_config.yaml | 59 ++++++++++++++++++---- .../benchmarks/cloudml/cloudml_benchmark_test.py | 24 --------- 8 files changed, 93 insertions(+), 65 deletions(-) diff --git a/.github/workflows/run_perf_alert_tool.yml b/.github/workflows/run_perf_alert_tool.yml index 30779e765f1..65e89ca5f5f 100644 --- a/.github/workflows/run_perf_alert_tool.yml +++ b/.github/workflows/run_perf_alert_tool.yml @@ -17,12 +17,12 @@ # To learn more about GitHub Actions in Apache Beam check the CI.md -name: Run performance alerting tool on Python load/performance/benchmark tests. +name: Performance alerting tool on Python load/performance/benchmark tests. on: + workflow_dispatch: schedule: - cron: '5 22 * * *' - jobs: python_run_change_point_analysis: name: Run Change Point Analysis. @@ -34,6 +34,11 @@ jobs: uses: actions/setup-python@v4 with: python-version: 3.8 + - name: Authenticate on GCP + uses: google-github-actions/setup-gcloud@v0 + with: + service_account_key: ${{ secrets.GCP_SA_KEY }} + export_default_credentials: true - name: Get Apache Beam Build dependencies working-directory: ./sdks/python run: pip install pip setuptools --upgrade && pip install -r build-requirements.txt @@ -43,17 +48,16 @@ jobs: - name: Install signal-processing-algorithms run: pip install signal-processing-algorithms - name: Install pandas, yaml, requests - run: pip install pandas PyYAML requests -# - name: Run Change Point Analysis. -# working-directory: ./sdks/python/apache_beam/testing/analyzers -# shell: bash -# run: python analysis.py -# env: -# GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + run: pip install pandas db-dtypes PyYAML requests + - name: Run Change Point Analysis. + working-directory: ./sdks/python/apache_beam/testing/analyzers + shell: bash + run: python perf_analysis.py + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} - name: Run change point analysis tests. working-directory: ./sdks/python/apache_beam/testing/analyzers shell: bash run: pytest perf_analysis_test.py env: GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} - diff --git a/sdks/python/apache_beam/testing/analyzers/README.md b/sdks/python/apache_beam/testing/analyzers/README.md index 9f50d9797b5..71351fe3e57 100644 --- a/sdks/python/apache_beam/testing/analyzers/README.md +++ b/sdks/python/apache_beam/testing/analyzers/README.md @@ -41,7 +41,8 @@ please follow the below structure. ``` # the test_1 must be a unique id. test_1: - test_name: apache_beam.testing.benchmarks.inference.pytorch_image_classification_benchmarks + test_name: Pytorch image classification on 50k images of size 224 x 224 with resnet 152 + test_target: apache_beam.testing.benchmarks.inference.pytorch_image_classification_benchmarks source: big_query metrics_dataset: beam_run_inference metrics_table: torch_inference_imagenet_results_resnet152 @@ -53,9 +54,9 @@ test_1: num_runs_in_change_point_window: 30 # optional parameter ``` -**NOTE**: `test_name` should be in the format `apache_beam.foo.bar`. It should point to a single test target. +**NOTE**: `test_target` is optional. It is used for identifying the test that was causing the regression. -**Note**: If the source is **BigQuery**, the metrics_dataset, metrics_table, project and metric_name should match with the values defined for performance/load tests. +**Note**: If the source is **BigQuery**, the `metrics_dataset`, `metrics_table`, `project` and `metric_name` should match with the values defined for performance/load tests. The above example uses this [test configuration](https://github.com/apache/beam/blob/0a91d139dea4276dc46176c4cdcdfce210fc50c4/.test-infra/jenkins/job_InferenceBenchmarkTests_Python.groovy#L30) to fill up the values required to fetch the data from source. @@ -83,7 +84,7 @@ All the performance/load tests metrics defined at [beam/.test-infra/jenkins](htt find the alerted test dashboard to find a spike in the metric values. For example, for the below configuration, -* test: `apache_beam.testing.benchmarks.inference.pytorch_image_classification_benchmarks` +* test_target: `apache_beam.testing.benchmarks.inference.pytorch_image_classification_benchmarks` * metric_name: `mean_load_model_latency_milli_secs` Grafana dashboard can be found at http://104.154.241.245/d/ZpS8Uf44z/python-ml-runinference-benchmarks?orgId=1&viewPanel=7 diff --git a/sdks/python/apache_beam/testing/analyzers/constants.py b/sdks/python/apache_beam/testing/analyzers/constants.py index c4bdded77a0..c0df05f61db 100644 --- a/sdks/python/apache_beam/testing/analyzers/constants.py +++ b/sdks/python/apache_beam/testing/analyzers/constants.py @@ -34,7 +34,7 @@ _NUM_RESULTS_TO_DISPLAY_ON_ISSUE_DESCRIPTION = 10 _NUM_DATA_POINTS_TO_RUN_CHANGE_POINT_ANALYSIS = 100 # Variables used for finding duplicate change points. _DEFAULT_MIN_RUNS_BETWEEN_CHANGE_POINTS = 3 -_DEFAULT_NUM_RUMS_IN_CHANGE_POINT_WINDOW = 30 +_DEFAULT_NUM_RUMS_IN_CHANGE_POINT_WINDOW = 14 _PERF_TEST_KEYS = { 'test_name', 'metrics_dataset', 'metrics_table', 'project', 'metric_name' diff --git a/sdks/python/apache_beam/testing/analyzers/github_issues_utils.py b/sdks/python/apache_beam/testing/analyzers/github_issues_utils.py index 398a98e00ce..d0944a91318 100644 --- a/sdks/python/apache_beam/testing/analyzers/github_issues_utils.py +++ b/sdks/python/apache_beam/testing/analyzers/github_issues_utils.py @@ -32,8 +32,7 @@ except KeyError as e: 'A Github Personal Access token is required ' 'to create Github Issues.') -# TODO: Change the REPO owner name to apache before merging. -_BEAM_GITHUB_REPO_OWNER = 'AnandInguva' +_BEAM_GITHUB_REPO_OWNER = 'apache' _BEAM_GITHUB_REPO_NAME = 'beam' # Adding GitHub Rest API version to the header to maintain version stability. # For more information, please look at diff --git a/sdks/python/apache_beam/testing/analyzers/perf_analysis.py b/sdks/python/apache_beam/testing/analyzers/perf_analysis.py index 870deed770c..ee00e8abf42 100644 --- a/sdks/python/apache_beam/testing/analyzers/perf_analysis.py +++ b/sdks/python/apache_beam/testing/analyzers/perf_analysis.py @@ -82,6 +82,7 @@ def run_change_point_analysis(params, test_id, big_query_metrics_fetcher): change_point_index = find_latest_change_point_index( metric_values=metric_values) if not change_point_index: + logging.info("Change point is not detected for the test %s" % test_name) return False # since timestamps are ordered in ascending order and # num_runs_in_change_point_window refers to the latest runs, @@ -92,18 +93,21 @@ def run_change_point_analysis(params, test_id, big_query_metrics_fetcher): latest_change_point_run): logging.info( 'Performance regression/improvement found for the test: %s. ' - 'Since the change point run %s ' + 'on metric %s. Since the change point run %s ' 'lies outside the num_runs_in_change_point_window distance: %s, ' 'alert is not raised.' % ( params['test_name'], - latest_change_point_run, + metric_name, + latest_change_point_run + 1, num_runs_in_change_point_window)) return False is_alert = True last_reported_issue_number = None + issue_metadata_table_name = f'{params.get("metrics_table")}_{metric_name}' existing_issue_data = get_existing_issues_data( - test_name=test_name, big_query_metrics_fetcher=big_query_metrics_fetcher) + table_name=issue_metadata_table_name, + big_query_metrics_fetcher=big_query_metrics_fetcher) if existing_issue_data is not None: existing_issue_timestamps = existing_issue_data[ @@ -116,7 +120,6 @@ def run_change_point_analysis(params, test_id, big_query_metrics_fetcher): change_point_index=change_point_index, timestamps=timestamps, min_runs_between_change_points=min_runs_between_change_points) - logging.debug( "Performance alert is %s for test %s" % (is_alert, params['test_name'])) if is_alert: @@ -124,7 +127,9 @@ def run_change_point_analysis(params, test_id, big_query_metrics_fetcher): metric_name, params['test_name'], timestamps, metric_values, change_point_index, params.get('labels', None), - last_reported_issue_number) + last_reported_issue_number, + test_target=params['test_target'] if 'test_target' in params else None + ) issue_metadata = GitHubIssueMetaData( issue_timestamp=pd.Timestamp( @@ -138,7 +143,7 @@ def run_change_point_analysis(params, test_id, big_query_metrics_fetcher): change_point_timestamp=timestamps[change_point_index]) publish_issue_metadata_to_big_query( - issue_metadata=issue_metadata, test_name=test_name) + issue_metadata=issue_metadata, table_name=issue_metadata_table_name) return is_alert diff --git a/sdks/python/apache_beam/testing/analyzers/perf_analysis_utils.py b/sdks/python/apache_beam/testing/analyzers/perf_analysis_utils.py index 247fe07f4df..ec74f206ce8 100644 --- a/sdks/python/apache_beam/testing/analyzers/perf_analysis_utils.py +++ b/sdks/python/apache_beam/testing/analyzers/perf_analysis_utils.py @@ -59,7 +59,7 @@ def is_change_point_in_valid_window( def get_existing_issues_data( - test_name: str, big_query_metrics_fetcher: BigQueryMetricsFetcher + table_name: str, big_query_metrics_fetcher: BigQueryMetricsFetcher ) -> Optional[pd.DataFrame]: """ Finds the most recent GitHub issue created for the test_name. @@ -67,7 +67,7 @@ def get_existing_issues_data( else return latest created issue_number along with """ query = f""" - SELECT * FROM {constants._BQ_PROJECT_NAME}.{constants._BQ_DATASET}.{test_name} + SELECT * FROM {constants._BQ_PROJECT_NAME}.{constants._BQ_DATASET}.{table_name} ORDER BY {constants._ISSUE_CREATION_TIMESTAMP_LABEL} DESC LIMIT 10 """ @@ -164,19 +164,19 @@ def find_latest_change_point_index(metric_values: List[Union[float, int]]): return change_points_idx[-1] -def publish_issue_metadata_to_big_query(issue_metadata, test_name): +def publish_issue_metadata_to_big_query(issue_metadata, table_name): """ Published issue_metadata to BigQuery with table name=test_name. """ bq_metrics_publisher = BigQueryMetricsPublisher( project_name=constants._BQ_PROJECT_NAME, dataset=constants._BQ_DATASET, - table=test_name, + table=table_name, bq_schema=constants._SCHEMA) bq_metrics_publisher.publish([asdict(issue_metadata)]) logging.info( 'GitHub metadata is published to Big Query Dataset %s' - ', table %s' % (constants._BQ_DATASET, test_name)) + ', table %s' % (constants._BQ_DATASET, table_name)) def create_performance_alert( @@ -186,13 +186,15 @@ def create_performance_alert( metric_values: List[Union[int, float]], change_point_index: int, labels: List[str], - existing_issue_number: Optional[int]) -> Tuple[int, str]: + existing_issue_number: Optional[int], + test_target: Optional[str] = None) -> Tuple[int, str]: """ Creates performance alert on GitHub issues and returns GitHub issue number and issue URL. """ description = github_issues_utils.get_issue_description( - test_name=test_name, + test_name=( + test_name if not test_target else test_name + ':' + test_target), metric_name=metric_name, timestamps=timestamps, metric_values=metric_values, @@ -209,6 +211,6 @@ def create_performance_alert( existing_issue_number=existing_issue_number) logging.info( - 'Performance regression is alerted on issue #%s. Link to ' - 'the issue: %s' % (issue_number, issue_url)) + 'Performance regression/improvement is alerted on issue #%s. Link ' + ': %s' % (issue_number, issue_url)) return issue_number, issue_url diff --git a/sdks/python/apache_beam/testing/analyzers/tests_config.yaml b/sdks/python/apache_beam/testing/analyzers/tests_config.yaml index 9a208ea9e81..02e649c7586 100644 --- a/sdks/python/apache_beam/testing/analyzers/tests_config.yaml +++ b/sdks/python/apache_beam/testing/analyzers/tests_config.yaml @@ -16,22 +16,63 @@ # test_1: - test_name: apache_beam.testing.benchmarks.inference.pytorch_image_classification_benchmarks_22 + test_name: Pytorch image classification on 50k images of size 224 x 224 with resnet 152 + test_target: apache_beam.testing.benchmarks.inference.pytorch_image_classification_benchmarks metrics_dataset: beam_run_inference metrics_table: torch_inference_imagenet_results_resnet152 project: apache-beam-testing metric_name: mean_load_model_latency_milli_secs - labels: - - run-inference - # Optional parameters. - min_runs_between_change_points: 3 - num_runs_in_change_point_window: 30 test_2: - test_name: apache_beam.testing.benchmarks.inference.pytorch_image_classification_benchmarks + test_name: Pytorch image classification on 50k images of size 224 x 224 with resnet 152 + test_target: apache_beam.testing.benchmarks.inference.pytorch_image_classification_benchmarks + metrics_dataset: beam_run_inference + metrics_table: torch_inference_imagenet_results_resnet152 + project: apache-beam-testing + metric_name: mean_inference_batch_latency_micro_secs + +test_3: + test_name: Pytorch image classification on 50k images of size 224 x 224 with resnet 101 + test_target: apache_beam.testing.benchmarks.inference.pytorch_image_classification_benchmarks + metrics_dataset: beam_run_inference + metrics_table: torch_inference_imagenet_results_resnet101 + project: apache-beam-testing + metric_name: mean_load_model_latency_milli_secs + +test_4: + test_name: Pytorch image classification on 50k images of size 224 x 224 with resnet 101 + test_target: apache_beam.testing.benchmarks.inference.pytorch_image_classification_benchmarks metrics_dataset: beam_run_inference metrics_table: torch_inference_imagenet_results_resnet101 project: apache-beam-testing + metric_name: mean_inference_batch_latency_micro_secs + +test_5: + test_name: test_cloudml_benchmark_cirteo_no_shuffle_10GB + metrics_dataset: beam_cloudml + metrics_table: cloudml_benchmark_cirteo_no_shuffle_10GB + project: apache-beam-testing + metric_name: runtime_sec + +test_6: + test_name: test_cloudml_benchmark_criteo_10GB + metrics_dataset: beam_cloudml + metrics_table: cloudml_benchmark_criteo_10GB + project: apache-beam-testing + metric_name: runtime_sec + +test_7: + test_name: Pytorch image classification on 50k images of size 224 x 224 with resnet 152 with Tesla T4 GPU + test_target: apache_beam.testing.benchmarks.inference.pytorch_image_classification_benchmarks + metrics_dataset: beam_run_inference + metrics_table: torch_inference_imagenet_results_resnet152_tesla_t4 + project: apache-beam-testing + metric_name: mean_inference_batch_latency_micro_secs + +test_8: + test_name: Pytorch image classification on 50k images of size 224 x 224 with resnet 152 with Tesla T4 GPU + test_target: apache_beam.testing.benchmarks.inference.pytorch_image_classification_benchmarks + metrics_dataset: beam_run_inference + metrics_table: torch_inference_imagenet_results_resnet152_tesla_t4 + project: apache-beam-testing metric_name: mean_load_model_latency_milli_secs - labels: - - run-inference diff --git a/sdks/python/apache_beam/testing/benchmarks/cloudml/cloudml_benchmark_test.py b/sdks/python/apache_beam/testing/benchmarks/cloudml/cloudml_benchmark_test.py index e35f1fd8ec9..3e5a640c7aa 100644 --- a/sdks/python/apache_beam/testing/benchmarks/cloudml/cloudml_benchmark_test.py +++ b/sdks/python/apache_beam/testing/benchmarks/cloudml/cloudml_benchmark_test.py @@ -122,30 +122,6 @@ class CloudMLTFTBenchmarkTest(unittest.TestCase): metrics_table=metrics_table, metric_name='runtime_sec') - def test_cloud_ml_benchmark_criteo_fixed_workers_10GB(self): - test_pipeline = TestPipeline(is_integration_test=True) - extra_opts = {} - extra_opts['input'] = os.path.join( - _INPUT_GCS_BUCKET_ROOT, lib.INPUT_CRITEO_10GB) - extra_opts['benchmark_type'] = 'tft' - extra_opts['classifier'] = 'criteo' - extra_opts['frequency_threshold'] = 0 - extra_opts['output'] = os.path.join( - _OUTPUT_GCS_BUCKET_ROOT, uuid.uuid4().hex) - extra_opts['num_workers'] = 50 - extra_opts['machine_type'] = 'n1-standard-4' - start_time = time.time() - workflow.run(test_pipeline.get_full_options_as_args(**extra_opts)) - end_time = time.time() - - metrics_table = 'cloudml_benchmark_criteo_fixed_workers_10GB' - - _publish_metrics( - pipeline=test_pipeline, - metric_value=end_time - start_time, - metrics_table=metrics_table, - metric_name='runtime_sec') - if __name__ == '__main__': unittest.main()