Copilot commented on code in PR #4976:
URL: https://github.com/apache/texera/pull/4976#discussion_r3205060056


##########
.github/workflows/notify_test_workflow.yml:
##########
@@ -0,0 +1,350 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+name: On pull request update
+on:
+  pull_request_target:
+    types: [opened, reopened, synchronize]
+
+jobs:
+  notify:
+    name: Notify test workflow
+    runs-on: ubuntu-latest
+    permissions:
+      actions: read
+      statuses: write
+      pull-requests: write
+    steps:
+      - name: "Notify test workflow"
+        uses: actions/github-script@v8
+        with:
+          github-token: ${{ secrets.GITHUB_TOKEN }}
+          script: |
+            const statusContext = 'Build'
+            const pr = context.payload.pull_request
+            const head_sha = pr.head.sha
+            const head_ref = pr.head.ref
+            const headRepo = pr.head.repo ? pr.head.repo.full_name : null
+            const baseRepo = context.repo.owner + '/' + context.repo.repo
+
+            // Used as target_url for failure scenarios where there's no fork
+            // run to link to — the notify run's logs are the next-best
+            // explanation. Commit statuses honor target_url (unlike check_runs
+            // created via GITHUB_TOKEN, which silently overrides details_url).
+            const notifyRunUrl = context.serverUrl + '/' + context.repo.owner 
+ '/' + context.repo.repo + '/actions/runs/' + context.runId
+
+            console.log('=== Fork CI detection ===')
+            console.log('PR #' + pr.number + ': ' + (headRepo || '<deleted>') 
+ ':' + head_ref + ' -> ' + baseRepo + ':' + pr.base.ref)
+            console.log('  head sha:  ' + head_sha)
+            console.log('  event:     ' + context.payload.action)
+            console.log('  notify run: ' + notifyRunUrl)
+
+            // Post the initial Build status as `pending` so the required
+            // status appears on the PR the moment notify starts. Every
+            // scenario below routes through setBuildStatus to update it
+            // in place.
+            async function setBuildStatus(state, description, target_url) {
+              const desc = (description || '').slice(0, 140)
+              for (let attempt = 1; attempt <= 3; attempt++) {
+                try {
+                  await github.rest.repos.createCommitStatus({
+                    owner: context.repo.owner,
+                    repo: context.repo.repo,
+                    sha: head_sha,
+                    state: state,
+                    context: statusContext,
+                    description: desc,
+                    target_url: target_url,
+                  })
+                  console.log('  -> Build status set: state=' + state + ' 
target_url=' + target_url)
+                  return
+                } catch (error) {
+                  if (attempt < 3) {
+                    const delay = 1000 * Math.pow(2, attempt - 1)
+                    console.error('  -> setBuildStatus attempt ' + attempt + 
'/3 failed: ' + error.message + '; retrying in ' + (delay / 1000) + 's')
+                    await new Promise(r => setTimeout(r, delay))
+                  } else {
+                    console.error('  -> FAILED to set Build status after 3 
attempts: ' + error.message)
+                    core.setFailed('Could not set Build status after retries: 
' + error.message)
+                  }
+                }
+              }
+            }
+
+            await setBuildStatus('pending', 'Detecting fork CI run for ' + 
head_sha.substring(0, 8), notifyRunUrl)
+
+            // Post a comment once per unique marker. Idempotent across re-runs
+            // of this workflow on the same PR (opened/reopened/synchronize).
+            async function postOnceComment(marker, body) {
+              try {
+                const comments = await 
github.paginate(github.rest.issues.listComments, {
+                  owner: context.repo.owner,
+                  repo: context.repo.repo,
+                  issue_number: pr.number,
+                  per_page: 100,
+                })
+                if (comments.some(c => c.body && c.body.includes(marker))) {
+                  console.log('  comment "' + marker + '" already on PR; not 
reposting')
+                  return
+                }
+                await github.rest.issues.createComment({
+                  owner: context.repo.owner,
+                  repo: context.repo.repo,
+                  issue_number: pr.number,
+                  body: marker + '\n' + body,
+                })
+                console.log('  posted comment "' + marker + '"')
+              } catch (error) {
+                console.error('  failed to post comment "' + marker + '": ' + 
error.message)
+              }
+            }
+
+            // Failure scenarios: red ✗ Build status, with target_url pointing
+            // somewhere useful for the committer to dig in. The detailed
+            // explanation lives in the postOnceComment on the PR conversation.
+            async function failCheck(title, target_url) {
+              await setBuildStatus('failure', title, target_url || 
notifyRunUrl)
+              core.setFailed(title)
+            }
+
+            // Success scenario (non-fork PR): leaves the workflow green.
+            async function passCheck(title, target_url) {
+              await setBuildStatus('success', title, target_url || 
notifyRunUrl)
+            }

Review Comment:
   `failCheck()` calls `core.setFailed(...)`, which will mark the entire “On 
pull request update” workflow run as failed for expected failure scenarios 
(blocked branch, Actions disabled, fork deleted). Even if `Build` is the 
required status, a failing workflow run can be confusing/noisy in the PR checks 
UI. Consider keeping this workflow green (only setting the `Build` commit 
status to failure), and reserve `core.setFailed` for truly unexpected internal 
errors (e.g., failing to post the status/comment).



##########
.github/workflows/required-checks.yml:
##########
@@ -23,13 +23,6 @@ on:
       - 'ci-enable/**'
       - 'main'
       - 'release/**'

Review Comment:
   Removing the `pull_request` trigger means this workflow will never run the 
`backport` matrix (release/* labels) or `cleanup-stale-backport` job anymore. 
This likely breaks the direct backport automation, since 
`.github/workflows/direct-backport-push.yml` looks for successful 
`required-checks.yml` backport jobs on the PR head SHA to decide which release 
targets are “green”. Consider keeping a PR-triggered workflow for the 
backport/cleanup paths (e.g., trigger only on labeled/unlabeled/synchronize for 
release/* labels), while still skipping the full build matrix in the Apache 
repo for normal fork PRs.
   



##########
.github/workflows/automatic-email-notif-on-ddl-change.yml:
##########
@@ -16,33 +16,75 @@
 
 name: Automatic email notification on DDL change
 
+# Triggered post-merge on push to main when sql/updates/** changes. Was
+# previously `pull_request: closed`, which queued for first-time-contributor
+# approval on every fork PR even though the job condition (`merged == true`)
+# meant it never actually ran on PR open. Push trigger fires only on actual
+# main-branch updates and needs no approval.
 on:
-  pull_request:
-    types:
-      - closed
+  push:
+    branches: [main]
+    paths:
+      - 'sql/updates/**'
+
+permissions:
+  contents: read
+  pull-requests: read
 
 jobs:
   notify:
-    if: >-
-      github.event.pull_request.merged == true && 
-      contains(github.event.pull_request.labels.*.name, 'ddl-change')
     runs-on: ubuntu-latest
     steps:
+      - name: Resolve PR for this commit
+        id: pr
+        uses: actions/github-script@v8
+        with:
+          script: |
+            const pulls = await 
github.rest.repos.listPullRequestsAssociatedWithCommit({
+              owner: context.repo.owner,
+              repo: context.repo.repo,
+              commit_sha: context.sha,
+            })
+            const pr = pulls.data[0]
+            if (!pr) {
+              console.log('No PR associated with ' + context.sha + '; skipping 
email')
+              core.setOutput('skip', 'true')
+              return
+            }
+            const hasLabel = pr.labels.some(l => l.name === 'ddl-change')
+            if (!hasLabel) {
+              console.log('PR #' + pr.number + ' has no ddl-change label; 
skipping email')
+              core.setOutput('skip', 'true')
+              return
+            }
+            core.setOutput('skip', 'false')
+            core.setOutput('number', String(pr.number))
+            core.setOutput('title', pr.title)
+            core.setOutput('html_url', pr.html_url)
+            core.setOutput('user', pr.user.login)
+
       - name: Checkout
+        if: steps.pr.outputs.skip == 'false'
         uses: actions/checkout@v5
         with:
-          fetch-depth: 0
+          fetch-depth: 2
           sparse-checkout: sql/updates/
 
-      - name: Get added file in sql/updates/
+      - name: Find added SQL update file
+        if: steps.pr.outputs.skip == 'false'
         id: get_sql_file
         run: |
-          FILE=$(git diff --name-only --diff-filter=A \
-            ${{ github.event.pull_request.base.sha }} \
-            ${{ github.event.pull_request.merge_commit_sha }} \
-            -- 'sql/updates/')
+          FILE=$(git diff --name-only --diff-filter=A HEAD~1 HEAD -- 
'sql/updates/')
           echo "sql_file=$FILE" >> $GITHUB_OUTPUT
+

Review Comment:
   `git diff --name-only --diff-filter=A HEAD~1 HEAD -- 'sql/updates/'` can 
produce an empty result if the merge modifies/renames an existing SQL file (or 
if the push contains multiple commits), but the workflow still triggers on any 
`sql/updates/**` path change. This can lead to sending an email with an empty 
`${SQL_FILE}`. Consider diffing `${{ github.event.before }}..${{ github.sha }}` 
for push events and explicitly skipping (or emitting a clearer message) when no 
added files are detected, and handling multiple added files (newline-separated) 
in the email body.



##########
.github/workflows/update_build_status.yml:
##########
@@ -0,0 +1,186 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+name: Update build status workflow
+
+on:
+  workflow_run:
+    workflows: ["On pull request update"]
+    types: [completed]
+  schedule:
+    - cron: "*/5 * * * *"
+  workflow_dispatch:
+
+jobs:
+  update:
+    name: Update build status
+    runs-on: ubuntu-latest
+    # workflow_run mode polls up to 60 min waiting for fork CI completion.
+    # Other modes (cron, workflow_dispatch) finish in seconds.
+    timeout-minutes: 65
+    permissions:
+      actions: read
+      statuses: write
+    steps:
+      - name: "Update build status"
+        uses: actions/github-script@v8
+        env:
+          TRIGGER_EVENT: ${{ github.event_name }}
+        with:
+          github-token: ${{ secrets.GITHUB_TOKEN }}
+          script: |
+            const isLongPoll = process.env.TRIGGER_EVENT === 'workflow_run'
+            const maxIterations = isLongPoll ? 120 : 1   // 120 * 30s = 60 min
+            const sleepMs = 30000
+            const statusContext = 'Build'
+
+            console.log('=== update_build_status: ' + new Date().toISOString() 
+ ' ===')
+            console.log('trigger=' + process.env.TRIGGER_EVENT + ' mode=' + 
(isLongPoll ? 'long-poll up to ' + maxIterations + ' iters' : 'single pass'))
+

Review Comment:
   In `workflow_run` mode this script can loop for up to 60 minutes, and each 
iteration scans *all* open PRs and makes per-PR API calls (read commit statuses 
+ list fork workflow runs). On repos with many open PRs this can be very API- 
and runner-expensive and may hit GitHub rate limits. Consider scoping the 
`workflow_run` trigger path to only the PR/SHA associated with the triggering 
notify run (available on the `workflow_run` payload), and/or removing the 
long-poll loop and relying on the scheduled 5-minute sync to avoid sustained 
high-rate polling.



##########
.github/workflows/notify_test_workflow.yml:
##########
@@ -0,0 +1,350 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+name: On pull request update
+on:
+  pull_request_target:
+    types: [opened, reopened, synchronize]
+
+jobs:
+  notify:
+    name: Notify test workflow
+    runs-on: ubuntu-latest
+    permissions:
+      actions: read
+      statuses: write
+      pull-requests: write
+    steps:
+      - name: "Notify test workflow"
+        uses: actions/github-script@v8
+        with:
+          github-token: ${{ secrets.GITHUB_TOKEN }}
+          script: |
+            const statusContext = 'Build'
+            const pr = context.payload.pull_request
+            const head_sha = pr.head.sha
+            const head_ref = pr.head.ref
+            const headRepo = pr.head.repo ? pr.head.repo.full_name : null
+            const baseRepo = context.repo.owner + '/' + context.repo.repo
+
+            // Used as target_url for failure scenarios where there's no fork
+            // run to link to — the notify run's logs are the next-best
+            // explanation. Commit statuses honor target_url (unlike check_runs
+            // created via GITHUB_TOKEN, which silently overrides details_url).
+            const notifyRunUrl = context.serverUrl + '/' + context.repo.owner 
+ '/' + context.repo.repo + '/actions/runs/' + context.runId
+
+            console.log('=== Fork CI detection ===')
+            console.log('PR #' + pr.number + ': ' + (headRepo || '<deleted>') 
+ ':' + head_ref + ' -> ' + baseRepo + ':' + pr.base.ref)
+            console.log('  head sha:  ' + head_sha)
+            console.log('  event:     ' + context.payload.action)
+            console.log('  notify run: ' + notifyRunUrl)
+
+            // Post the initial Build status as `pending` so the required
+            // status appears on the PR the moment notify starts. Every
+            // scenario below routes through setBuildStatus to update it
+            // in place.
+            async function setBuildStatus(state, description, target_url) {
+              const desc = (description || '').slice(0, 140)
+              for (let attempt = 1; attempt <= 3; attempt++) {
+                try {
+                  await github.rest.repos.createCommitStatus({
+                    owner: context.repo.owner,
+                    repo: context.repo.repo,
+                    sha: head_sha,
+                    state: state,
+                    context: statusContext,
+                    description: desc,
+                    target_url: target_url,
+                  })
+                  console.log('  -> Build status set: state=' + state + ' 
target_url=' + target_url)
+                  return
+                } catch (error) {
+                  if (attempt < 3) {
+                    const delay = 1000 * Math.pow(2, attempt - 1)
+                    console.error('  -> setBuildStatus attempt ' + attempt + 
'/3 failed: ' + error.message + '; retrying in ' + (delay / 1000) + 's')
+                    await new Promise(r => setTimeout(r, delay))
+                  } else {
+                    console.error('  -> FAILED to set Build status after 3 
attempts: ' + error.message)
+                    core.setFailed('Could not set Build status after retries: 
' + error.message)
+                  }
+                }
+              }
+            }
+
+            await setBuildStatus('pending', 'Detecting fork CI run for ' + 
head_sha.substring(0, 8), notifyRunUrl)
+
+            // Post a comment once per unique marker. Idempotent across re-runs
+            // of this workflow on the same PR (opened/reopened/synchronize).
+            async function postOnceComment(marker, body) {
+              try {
+                const comments = await 
github.paginate(github.rest.issues.listComments, {
+                  owner: context.repo.owner,
+                  repo: context.repo.repo,
+                  issue_number: pr.number,
+                  per_page: 100,
+                })
+                if (comments.some(c => c.body && c.body.includes(marker))) {
+                  console.log('  comment "' + marker + '" already on PR; not 
reposting')
+                  return
+                }
+                await github.rest.issues.createComment({
+                  owner: context.repo.owner,
+                  repo: context.repo.repo,
+                  issue_number: pr.number,
+                  body: marker + '\n' + body,
+                })
+                console.log('  posted comment "' + marker + '"')
+              } catch (error) {
+                console.error('  failed to post comment "' + marker + '": ' + 
error.message)
+              }
+            }
+
+            // Failure scenarios: red ✗ Build status, with target_url pointing
+            // somewhere useful for the committer to dig in. The detailed
+            // explanation lives in the postOnceComment on the PR conversation.
+            async function failCheck(title, target_url) {
+              await setBuildStatus('failure', title, target_url || 
notifyRunUrl)
+              core.setFailed(title)
+            }
+
+            // Success scenario (non-fork PR): leaves the workflow green.
+            async function passCheck(title, target_url) {
+              await setBuildStatus('success', title, target_url || 
notifyRunUrl)
+            }
+
+            // Map the fork run's actual status onto the Build commit status:
+            //   completed/success    -> success
+            //   completed/cancelled  -> error    (yellow !)
+            //   completed/<other>    -> failure  (red x)
+            //   queued / in_progress -> pending  (yellow circle)
+            // target_url always goes to the fork CI run's actions page so the
+            // PR's Details link is one-click to the fork run.
+            async function linkCheck(run, actions_url) {
+              let state, description
+              if (run.status === 'completed') {
+                state = run.conclusion === 'success' ? 'success'
+                      : run.conclusion === 'cancelled' ? 'error'
+                      : 'failure'
+                description = 'Fork CI run #' + run.id + ' ' + run.conclusion
+              } else {
+                state = 'pending'
+                description = 'Fork CI run #' + run.id + ' ' + run.status
+              }
+              await setBuildStatus(state, description, actions_url)
+            }
+
+            // ----- scenario 1: fork repo gone 
----------------------------------
+            if (!pr.head.repo) {
+              console.log('SCENARIO: head repo is null (fork deleted, made 
private, or otherwise inaccessible)')
+              await postOnceComment(
+                '<!-- fork-ci-no-head-repo -->',
+                ':no_entry: **Fork CI cannot run — your fork repository is not 
accessible.**\n' +
+                '\n' +
+                'GitHub returned `null` for this PR\'s head repository. That 
usually means one of:\n' +
+                '\n' +
+                '- The fork has been **deleted**\n' +
+                '- The fork has been **made private** (Fork CI only works on 
public forks)\n' +
+                '- The branch has been **force-removed**\n' +
+                '\n' +
+                'Without access to your fork, this workflow cannot detect any 
`fork-ci.yml` runs and the required `Build` status will stay failed.\n' +
+                '\n' +
+                '**To fix:** restore (or recreate) the public fork, push your 
branch back, then close and reopen this PR (or push a new commit) to retrigger 
detection.\n'
+              )
+              await failCheck('Fork repository not accessible', pr.html_url)
+              return
+            }
+
+            // ----- scenario 2: non-fork PR (branch in base repo) 
---------------
+            if (headRepo === baseRepo) {
+              console.log('SCENARIO: non-fork PR (head repo == base repo)')
+              await postOnceComment(
+                '<!-- fork-ci-not-applicable -->',
+                ':information_source: **Fork CI is not applicable to this 
PR.**\n' +
+                '\n' +
+                'This PR is opened from a branch inside `' + baseRepo + '` 
itself, not from a fork. ' +
+                'Fork CI is the system that runs the full build matrix in 
*contributor forks* and surfaces the result on PRs to `apache/texera`. ' +
+                'Since there\'s no fork involved here, the `Build` status has 
been auto-passed.\n' +
+                '\n' +
+                'In-tree branches like this one are gated by the **Required 
Checks** status check (which runs builds directly in `' + baseRepo + '`).\n'
+              )
+              await passCheck(
+                'Fork CI not applicable (in-tree PR)',
+                context.serverUrl + '/' + baseRepo + 
'/actions/workflows/required-checks.yml'
+              )

Review Comment:
   The “non-fork PR” path auto-sets the required `Build` status to `success` 
without running any build. Since `required-checks.yml` no longer has a 
`pull_request` trigger and `.asf.yaml` now requires `Build`, an in-repo PR 
(head repo == base repo) could merge with zero CI coverage. If in-repo PRs are 
still expected, consider triggering the normal build workflow for them (or 
keeping a PR-triggered required-checks path for base-repo branches) and have 
this workflow link/mirror that result instead of auto-passing.
   



##########
.github/workflows/update_build_status.yml:
##########
@@ -0,0 +1,186 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+name: Update build status workflow
+
+on:
+  workflow_run:
+    workflows: ["On pull request update"]
+    types: [completed]
+  schedule:
+    - cron: "*/5 * * * *"
+  workflow_dispatch:
+
+jobs:
+  update:
+    name: Update build status
+    runs-on: ubuntu-latest
+    # workflow_run mode polls up to 60 min waiting for fork CI completion.
+    # Other modes (cron, workflow_dispatch) finish in seconds.
+    timeout-minutes: 65
+    permissions:
+      actions: read

Review Comment:
   This workflow lists open PRs via `GET /repos/{owner}/{repo}/pulls`, but the 
job permissions do not include `pull-requests: read`. With the explicit 
`permissions:` block, the default token permissions are removed, so the PR 
listing call may fail with "Resource not accessible by integration". Add 
`pull-requests: read` (or broader `repo`-level read, if preferred) to the job 
permissions.
   



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to