This is an automated email from the ASF dual-hosted git repository.

damccorm pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/beam.git


The following commit(s) were added to refs/heads/master by this push:
     new fb061f27e26 Automatically refresh Performance Metrics Graphs using 
Looker (#34097)
fb061f27e26 is described below

commit fb061f27e264e86f7d4403997e5e4414ef148511
Author: Vitaly Terentyev <vitaly.terent...@akvelon.com>
AuthorDate: Mon Mar 3 17:34:31 2025 +0400

    Automatically refresh Performance Metrics Graphs using Looker (#34097)
    
    * Add refresh looker .yml workflow and .py script
    
    * Refactoring
---
 .github/workflows/refresh_looker_metrics.yml |  53 ++++++++++++
 .test-infra/tools/refresh_looker_metrics.py  | 119 +++++++++++++++++++++++++++
 2 files changed, 172 insertions(+)

diff --git a/.github/workflows/refresh_looker_metrics.yml 
b/.github/workflows/refresh_looker_metrics.yml
new file mode 100644
index 00000000000..3866301b039
--- /dev/null
+++ b/.github/workflows/refresh_looker_metrics.yml
@@ -0,0 +1,53 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+name: Refresh Looker Performance Metrics
+
+on:
+  schedule:
+    - cron: '10 10 * * 1'
+  workflow_dispatch:
+    inputs:
+      READ_ONLY:
+        description: 'Run in read-only mode'
+        required: false
+        default: 'true'
+
+env:
+  GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+  LOOKERSDK_BASE_URL: ${{ secrets.LOOKERSDK_BASE_URL }}
+  LOOKERSDK_CLIENT_ID: ${{ secrets.LOOKERSDK_CLIENT_ID }}
+  LOOKERSDK_CLIENT_SECRET: ${{ secrets.LOOKERSDK_CLIENT_SECRET }}
+  GCS_BUCKET: 'public_looker_explores_us_a3853f40'
+  READ_ONLY: ${{ inputs.READ_ONLY }}
+
+jobs:
+  refresh_looker_metrics:
+    runs-on: [self-hosted, ubuntu-20.04, main]
+    steps:
+      - uses: actions/checkout@v4
+      - uses: actions/setup-python@v5
+        with:
+          python-version: 3.11
+      - run: pip install requests google-cloud-storage looker-sdk
+      - name: Authenticate on GCP
+        uses: google-github-actions/setup-gcloud@v0
+        with:
+          service_account_email: ${{ secrets.GCP_SA_EMAIL }}
+          service_account_key: ${{ secrets.GCP_SA_KEY }}
+          export_default_credentials: true
+      - run: python .test-infra/tools/refresh_looker_metrics.py
diff --git a/.test-infra/tools/refresh_looker_metrics.py 
b/.test-infra/tools/refresh_looker_metrics.py
new file mode 100644
index 00000000000..842fdd6ac10
--- /dev/null
+++ b/.test-infra/tools/refresh_looker_metrics.py
@@ -0,0 +1,119 @@
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import os
+import time
+import looker_sdk
+
+from google.cloud import storage
+from looker_sdk import models40 as models
+
+# Load environment variables
+LOOKER_API_URL = os.getenv("LOOKERSDK_BASE_URL")
+LOOKER_CLIENT_ID = os.getenv("LOOKERSDK_CLIENT_ID")
+LOOKER_CLIENT_SECRET = os.getenv("LOOKERSDK_CLIENT_SECRET")
+TARGET_BUCKET = os.getenv("GCS_BUCKET")
+
+# List of Pairs (Target folder name, Look IDs to download)
+LOOKS_TO_DOWNLOAD = [
+    ("30", ["18", "50", "92", "49", "91"]),    # BigQueryIO_Read
+    ("31", ["19", "52", "88", "51", "87"]),    # BigQueryIO_Write
+    ("32", ["20", "60", "104", "59", "103"]),  # BigTableIO_Read
+    ("33", ["21", "70", "116", "69", "115"]),  # BigTableIO_Write
+    ("34", ["22", "56", "96", "55", "95"]),    # TextIO_Read
+    ("35", ["23", "64", "110", "63", "109"]),  # TextIO_Write
+]
+
+
+def get_look(id: str) -> models.Look:
+    look = next(iter(sdk.search_looks(id=id)), None)
+    if not look:
+        raise Exception(f"look '{id}' was not found")
+    print(f"Found look with public_slug = {look.public_slug}")
+    return look
+
+
+def download_look(look: models.Look):
+    """Download specified look as png/jpg"""
+    task = sdk.create_look_render_task(look.id, "png", 810, 526,)
+
+    if not (task and task.id):
+        raise Exception(
+            f"Could not create a render task for '{look.title}'"
+        )
+
+    # poll the render task until it completes
+    elapsed = 0.0
+    delay = 20
+    retries = 0
+    max_retries = 20
+    while retries < max_retries:
+        poll = sdk.render_task(task.id)
+        if poll.status == "failure":
+            print(poll)
+            raise Exception(f"Render failed for '{look.title}'")
+        elif poll.status == "success":
+            break
+        time.sleep(delay)
+        elapsed += delay
+        retries += 1
+        print(f"Retry {retries}/{max_retries}: Render task still in 
progress...")
+
+    if retries >= max_retries:
+        raise TimeoutError(f"Render task did not complete within {elapsed} 
seconds (max retries: {max_retries})")
+
+    print(f"Render task completed in {elapsed} seconds")
+
+    return sdk.render_task_results(task.id)
+
+
+def upload_to_gcs(bucket_name, destination_blob_name, content):
+    """Upload content to GCS bucket."""
+    client = storage.Client()
+    bucket = client.bucket(bucket_name)
+    blob = bucket.blob(destination_blob_name)
+
+    # Upload content, overwriting if it exists
+    blob.upload_from_string(content, content_type="image/png")
+    print(f"Uploaded {destination_blob_name} to {bucket_name}.")
+
+
+sdk = looker_sdk.init40()
+
+
+def main():
+    failed_looks = []
+
+    for folder, look_ids in LOOKS_TO_DOWNLOAD:
+        for look_id in look_ids:
+            try:
+                if look_id:
+                    look = get_look(look_id)
+                    content = download_look(look)
+                    if content:
+                        upload_to_gcs(TARGET_BUCKET, 
f"{folder}/{look.public_slug}.png", content)
+                    else:
+                        print(f"No content for look {look_id}")
+                        failed_looks.append(look_id)
+            except Exception as e:
+                print(f"Error processing look {look_id}: {e}")
+                failed_looks.append(look_id)
+
+    if failed_looks:
+        raise RuntimeError(f"Job failed due to errors in looks: 
{failed_looks}")
+
+
+if __name__ == "__main__":
+    main()

Reply via email to