This is an automated email from the ASF dual-hosted git repository. damccorm pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/beam.git
The following commit(s) were added to refs/heads/master by this push: new fb061f27e26 Automatically refresh Performance Metrics Graphs using Looker (#34097) fb061f27e26 is described below commit fb061f27e264e86f7d4403997e5e4414ef148511 Author: Vitaly Terentyev <vitaly.terent...@akvelon.com> AuthorDate: Mon Mar 3 17:34:31 2025 +0400 Automatically refresh Performance Metrics Graphs using Looker (#34097) * Add refresh looker .yml workflow and .py script * Refactoring --- .github/workflows/refresh_looker_metrics.yml | 53 ++++++++++++ .test-infra/tools/refresh_looker_metrics.py | 119 +++++++++++++++++++++++++++ 2 files changed, 172 insertions(+) diff --git a/.github/workflows/refresh_looker_metrics.yml b/.github/workflows/refresh_looker_metrics.yml new file mode 100644 index 00000000000..3866301b039 --- /dev/null +++ b/.github/workflows/refresh_looker_metrics.yml @@ -0,0 +1,53 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +name: Refresh Looker Performance Metrics + +on: + schedule: + - cron: '10 10 * * 1' + workflow_dispatch: + inputs: + READ_ONLY: + description: 'Run in read-only mode' + required: false + default: 'true' + +env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + LOOKERSDK_BASE_URL: ${{ secrets.LOOKERSDK_BASE_URL }} + LOOKERSDK_CLIENT_ID: ${{ secrets.LOOKERSDK_CLIENT_ID }} + LOOKERSDK_CLIENT_SECRET: ${{ secrets.LOOKERSDK_CLIENT_SECRET }} + GCS_BUCKET: 'public_looker_explores_us_a3853f40' + READ_ONLY: ${{ inputs.READ_ONLY }} + +jobs: + refresh_looker_metrics: + runs-on: [self-hosted, ubuntu-20.04, main] + steps: + - uses: actions/checkout@v4 + - uses: actions/setup-python@v5 + with: + python-version: 3.11 + - run: pip install requests google-cloud-storage looker-sdk + - name: Authenticate on GCP + uses: google-github-actions/setup-gcloud@v0 + with: + service_account_email: ${{ secrets.GCP_SA_EMAIL }} + service_account_key: ${{ secrets.GCP_SA_KEY }} + export_default_credentials: true + - run: python .test-infra/tools/refresh_looker_metrics.py diff --git a/.test-infra/tools/refresh_looker_metrics.py b/.test-infra/tools/refresh_looker_metrics.py new file mode 100644 index 00000000000..842fdd6ac10 --- /dev/null +++ b/.test-infra/tools/refresh_looker_metrics.py @@ -0,0 +1,119 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +import time +import looker_sdk + +from google.cloud import storage +from looker_sdk import models40 as models + +# Load environment variables +LOOKER_API_URL = os.getenv("LOOKERSDK_BASE_URL") +LOOKER_CLIENT_ID = os.getenv("LOOKERSDK_CLIENT_ID") +LOOKER_CLIENT_SECRET = os.getenv("LOOKERSDK_CLIENT_SECRET") +TARGET_BUCKET = os.getenv("GCS_BUCKET") + +# List of Pairs (Target folder name, Look IDs to download) +LOOKS_TO_DOWNLOAD = [ + ("30", ["18", "50", "92", "49", "91"]), # BigQueryIO_Read + ("31", ["19", "52", "88", "51", "87"]), # BigQueryIO_Write + ("32", ["20", "60", "104", "59", "103"]), # BigTableIO_Read + ("33", ["21", "70", "116", "69", "115"]), # BigTableIO_Write + ("34", ["22", "56", "96", "55", "95"]), # TextIO_Read + ("35", ["23", "64", "110", "63", "109"]), # TextIO_Write +] + + +def get_look(id: str) -> models.Look: + look = next(iter(sdk.search_looks(id=id)), None) + if not look: + raise Exception(f"look '{id}' was not found") + print(f"Found look with public_slug = {look.public_slug}") + return look + + +def download_look(look: models.Look): + """Download specified look as png/jpg""" + task = sdk.create_look_render_task(look.id, "png", 810, 526,) + + if not (task and task.id): + raise Exception( + f"Could not create a render task for '{look.title}'" + ) + + # poll the render task until it completes + elapsed = 0.0 + delay = 20 + retries = 0 + max_retries = 20 + while retries < max_retries: + poll = sdk.render_task(task.id) + if poll.status == "failure": + print(poll) + raise Exception(f"Render failed for '{look.title}'") + elif poll.status == "success": + break + time.sleep(delay) + elapsed += delay + retries += 1 + print(f"Retry {retries}/{max_retries}: Render task still in progress...") + + if retries >= max_retries: + raise TimeoutError(f"Render task did not complete within {elapsed} seconds (max retries: {max_retries})") + + print(f"Render task completed in {elapsed} seconds") + + return sdk.render_task_results(task.id) + + +def upload_to_gcs(bucket_name, destination_blob_name, content): + """Upload content to GCS bucket.""" + client = storage.Client() + bucket = client.bucket(bucket_name) + blob = bucket.blob(destination_blob_name) + + # Upload content, overwriting if it exists + blob.upload_from_string(content, content_type="image/png") + print(f"Uploaded {destination_blob_name} to {bucket_name}.") + + +sdk = looker_sdk.init40() + + +def main(): + failed_looks = [] + + for folder, look_ids in LOOKS_TO_DOWNLOAD: + for look_id in look_ids: + try: + if look_id: + look = get_look(look_id) + content = download_look(look) + if content: + upload_to_gcs(TARGET_BUCKET, f"{folder}/{look.public_slug}.png", content) + else: + print(f"No content for look {look_id}") + failed_looks.append(look_id) + except Exception as e: + print(f"Error processing look {look_id}: {e}") + failed_looks.append(look_id) + + if failed_looks: + raise RuntimeError(f"Job failed due to errors in looks: {failed_looks}") + + +if __name__ == "__main__": + main()