Re: [PR] ci: compare benchmark PRs with main [texera]

via GitHub Thu, 11 Jun 2026 23:36:17 -0700


Copilot commented on code in PR #5639:
URL: https://github.com/apache/texera/pull/5639#discussion_r3401218885



##########
.github/workflows/benchmarks-pr-comment.yml:
##########
@@ -197,17 +200,369 @@ jobs:
               }
             };
 
+            const readBenchJson = (filePath, suite) => {
+              if (!fs.existsSync(filePath)) return [];
+              if (fs.statSync(filePath).size > MAX_JSON_BYTES) {
+                core.warning(`${filePath} is too large; skipping JSON 
comparison input.`);
+                return [];
+              }
+              try {
+                const parsed = JSON.parse(fs.readFileSync(filePath, "utf8"));
+                if (!Array.isArray(parsed)) return [];
+                return parsed
+                  .map((bench) => ({
+                    suite,
+                    name: String(bench.name || ""),
+                    unit: String(bench.unit || ""),
+                    value: Number(bench.value),
+                  }))
+                  .filter((bench) => bench.name && 
Number.isFinite(bench.value));
+              } catch (e) {
+                core.warning(`failed to parse ${filePath}: ${e.message}`);
+                return [];
+              }
+            };
+
+            const parseCsvRows = (text) => {
+              const rows = text
+                .trim()
+                .split(/\r?\n/)
+                .map((line) => line.split(","));
+              if (rows.length < 2) return [];
+              const header = rows[0].map((h) => h.trim());
+              const idx = (col) => header.indexOf(col);
+              return rows.slice(1).map((row) => ({ row, idx }));
+            };
+
+            const prRowsFromCsv = (text) =>
+              parseCsvRows(text)
+                .map(({ row, idx }) => {
+                  const config = `bs=${row[idx("batch_size")]} 
sw=${row[idx("schema_width")]} sl=${row[idx("string_len")]}`;
+                  const metric = (suite, prefix, unit, value) => ({
+                    suite,
+                    name: `${prefix} / ${config}`,
+                    unit,
+                    value: Number(value),
+                  });
+                  return {
+                    config,
+                    throughput: metric(
+                      "Arrow Flight E2E Throughput",
+                      "throughput",
+                      "tuples/sec",
+                      row[idx("tuples_per_sec")]
+                    ),
+                    mbps: metric("Arrow Flight E2E MB/s", "MB/s", "MB/s", 
row[idx("mb_per_sec")]),
+                    p50: metric("Arrow Flight E2E Latency", "latency p50", 
"us", row[idx("lat_p50_us")]),
+                    p95: metric("Arrow Flight E2E Latency", "latency p95", 
"us", row[idx("lat_p95_us")]),
+                    p99: metric("Arrow Flight E2E Latency", "latency p99", 
"us", row[idx("lat_p99_us")]),
+                  };
+                })
+                .filter((item) =>
+                  [item.throughput, item.mbps, item.p50, item.p95, 
item.p99].every((metric) =>
+                    Number.isFinite(metric.value)
+                  )
+                );
+
+            const prRows = csv ? prRowsFromCsv(csv) : [];
+
+            const bytesPerTuple = (benchName) => {
+              const match = benchName.match(/bs=(\d+)\s+sw=(\d+)\s+sl=(\d+)/);
+              if (!match) return null;
+              return Number(match[2]) * Number(match[3]);
+            };
+
+            const derivedMbpsBench = (throughputBench) => {
+              const bytes = bytesPerTuple(throughputBench.name);
+              if (!bytes) return null;
+              return {
+                name: throughputBench.name.replace(/^throughput/, "MB/s"),
+                unit: "MB/s",
+                value: (Number(throughputBench.value) * bytes) / (1024 * 1024),
+              };
+            };
+
+            const loadMainBaseline = async () => {
+              try {
+                const { data } = await github.rest.repos.getContent({
+                  owner: context.repo.owner,
+                  repo: context.repo.repo,
+                  path: "dev/bench/data.js",
+                  ref: "gh-pages",
+                });
+                if (Array.isArray(data) || !data.content) {
+                  core.warning("gh-pages/dev/bench/data.js is not a file.");
+                  return null;
+                }
+                const raw = Buffer.from(data.content, data.encoding || 
"base64").toString("utf8");
+                const match = 
raw.match(/window\.BENCHMARK_DATA\s*=\s*(\{[\s\S]*\})\s*;?\s*$/);
+                if (!match) {
+                  core.warning("could not find BENCHMARK_DATA assignment in 
gh-pages data.js.");
+                  return null;
+                }
+                const parsed = JSON.parse(match[1]);
+                const latestBenches = new Map();
+                let baselineCommit = "";
+                let baselineDate = 0;
+                const suites = Object.entries(parsed.entries || {});
+                for (const [_suite, entries] of suites) {
+                  if (!Array.isArray(entries) || entries.length === 0) 
continue;
+                  for (const entry of entries) {
+                    if (Number(entry.date || 0) > baselineDate) {
+                      baselineDate = Number(entry.date || 0);
+                      baselineCommit = String(entry.commit?.id || "").slice(0, 
7);
+                    }
+                  }
+                }
+                const cutoffDate = baselineDate - 7 * 24 * 60 * 60 * 1000;
+                const averageSums = new Map();
+                const addBench = (target, suite, bench, extra = {}) => {
+                  const value = Number(bench.value);
+                  if (!bench.name || !Number.isFinite(value)) return;
+                  target.set(`${suite}\0${bench.name}`, {
+                    value,
+                    unit: String(bench.unit || ""),
+                    ...extra,
+                  });
+                };
+                const addAverage = (suite, bench) => {
+                  const value = Number(bench.value);
+                  if (!bench.name || !Number.isFinite(value)) return;
+                  const key = `${suite}\0${bench.name}`;
+                  const current = averageSums.get(key) || {
+                    total: 0,
+                    count: 0,
+                    unit: String(bench.unit || ""),
+                  };
+                  current.total += value;
+                  current.count += 1;
+                  averageSums.set(key, current);
+                };
+                for (const [suite, entries] of suites) {
+                  if (!Array.isArray(entries) || entries.length === 0) 
continue;
+                  const latest = entries.reduce((best, entry) => {
+                    if (!best) return entry;
+                    return Number(entry.date || 0) > Number(best.date || 0) ? 
entry : best;
+                  }, null);
+                  for (const bench of latest?.benches || []) {
+                    const value = Number(bench.value);
+                    if (!bench.name || !Number.isFinite(value)) continue;
+                    addBench(latestBenches, suite, bench, {
+                        commit: String(latest.commit?.id || "").slice(0, 7),
+                        date: Number(latest.date || 0),
+                      });
+                    if (suite === "Arrow Flight E2E Throughput") {
+                      const mbps = derivedMbpsBench(bench);
+                      if (mbps) {
+                        addBench(latestBenches, "Arrow Flight E2E MB/s", mbps, 
{
+                          commit: String(latest.commit?.id || "").slice(0, 7),
+                          date: Number(latest.date || 0),
+                        });
+                      }
+                    }
+                  }
+                  for (const entry of entries) {
+                    if (Number(entry.date || 0) < cutoffDate) continue;
+                    for (const bench of entry.benches || []) {
+                      addAverage(suite, bench);
+                      if (suite === "Arrow Flight E2E Throughput") {
+                        const mbps = derivedMbpsBench(bench);
+                        if (mbps) {
+                          addAverage("Arrow Flight E2E MB/s", mbps);
+                        }
+                      }
+                    }
+                  }
+                }
+                const average7d = new Map();
+                for (const [key, item] of averageSums.entries()) {
+                  if (item.count > 0) {
+                    average7d.set(key, {
+                      value: item.total / item.count,
+                      unit: item.unit,
+                      count: item.count,
+                    });
+                  }
+                }
+                return { latest: latestBenches, average7d, commit: 
baselineCommit, date: baselineDate };
+              } catch (e) {
+                core.warning(`failed to load gh-pages benchmark baseline: 
${e.message}`);
+                return null;
+              }
+            };
+
+            const fmtNumber = (value) => {
+              if (!Number.isFinite(value)) return "n/a";
+              if (Math.abs(value) >= 1000) return 
value.toLocaleString("en-US", { maximumFractionDigits: 0 });
+              if (Math.abs(value) >= 10) return value.toLocaleString("en-US", 
{ maximumFractionDigits: 2 });
+              return value.toLocaleString("en-US", { maximumFractionDigits: 3 
});
+            };
+
+            const fmtValue = (value, unit) => `${fmtNumber(value)}${unit ? ` 
${unit}` : ""}`;
+
+            const deltaInfo = (bench, baseline) => {
+              if (!baseline || !Number.isFinite(baseline.value) || 
baseline.value === 0) {
+                return { status: "missing", label: "no baseline", emoji: "⚪", 
text: "n/a" };
+              }
+              const pct = ((bench.value - baseline.value) / baseline.value) * 
100;
+              if (!Number.isFinite(pct)) {
+                return { status: "missing", label: "no baseline", emoji: "⚪", 
text: "n/a" };
+              }
+              const status =
+                Math.abs(pct) <= NOISE_THRESHOLD_PCT
+                  ? "noise"
+                  : bench.suite.includes("Throughput") || 
bench.suite.includes("MB/s")
+                    ? pct > 0
+                      ? "better"
+                      : "worse"
+                    : pct < 0
+                      ? "better"
+                      : "worse";
+              const labels = {
+                better: "better",
+                worse: "worse",
+                noise: "noise",
+              };
+              const emojis = {
+                better: "🟢",
+                worse: "🔴",
+                noise: "⚪",
+              };
+              const sign = pct > 0 ? "+" : "";
+              return {
+                status,
+                label: labels[status],
+                emoji: emojis[status],
+                text: `${sign}${pct.toFixed(1)}%`,
+              };
+            };
+
+            const comparisonToTable = (rows, baseline) => {
+              if (rows.length === 0 || !baseline) return null;
+              const counts = { better: 0, worse: 0, noise: 0, missing: 0 };
+              const metricKeys = ["throughput", "mbps", "p50", "p95", "p99"];
+              const lines = [
+                "| | config | throughput | MB/s | latency | max Δ latest / 7d 
|",
+                "|---|---|---:|---:|---:|---:|",
+              ];
+              const baselineLines = [
+                "| config | metric | PR | latest main | 7d avg | Δ latest |",
+                "|---|---|---:|---:|---:|---:|",
+              ];
+              const metricInfo = (config, metricLabel, bench) => {
+                const key = `${bench.suite}\0${bench.name}`;
+                const main = baseline.latest.get(key);
+                const average = baseline.average7d.get(key);
+                const delta = deltaInfo(bench, main);
+                counts[delta.status] += 1;
+                baselineLines.push(
+                  `| ${escapeCell(config)} | ${escapeCell(metricLabel)} | ` +
+                    `${escapeCell(fmtValue(bench.value, bench.unit))} | ` +
+                    `${escapeCell(main ? fmtValue(main.value, main.unit || 
bench.unit) : "n/a")} | ` +
+                    `${escapeCell(average ? fmtValue(average.value, 
average.unit || bench.unit) : "n/a")} | ` +
+                    `${escapeCell(delta.text)} |`
+                );
+                return { delta, averageDelta: deltaInfo(bench, average), 
value: fmtNumber(bench.value) };
+              };
+              for (const row of rows) {
+                const statuses = metricKeys.map((key) => deltaInfo(row[key], 
baseline.latest.get(`${row[key].suite}\0${row[key].name}`)));
+                const status = statuses.some((item) => item.status === "worse")
+                  ? "🔴"
+                  : statuses.some((item) => item.status === "better")
+                    ? "🟢"
+                    : statuses.every((item) => item.status === "missing")
+                      ? "⚪"
+                      : "⚪";
+                const throughput = metricInfo(row.config, "throughput", 
row.throughput);
+                const mbps = metricInfo(row.config, "MB/s", row.mbps);
+                const p50 = metricInfo(row.config, "p50", row.p50);
+                const p95 = metricInfo(row.config, "p95", row.p95);
+                const p99 = metricInfo(row.config, "p99", row.p99);
+                const material = [throughput, mbps, p50, p95, p99]
+                  .map((item) => item.delta)
+                  .filter((delta) => delta.status === "better" || delta.status 
=== "worse");
+                const averageMaterial = [throughput, mbps, p50, p95, p99]
+                  .map((item) => item.averageDelta)
+                  .filter((delta) => delta.status === "better" || delta.status 
=== "worse");
+                const maxByAbs = (items) =>
+                  items.sort((a, b) => Math.abs(Number(b.text.replace("%", 
""))) - Math.abs(Number(a.text.replace("%", ""))))[0];
+                const maxDelta = material.length === 0
+                  ? `⚪ within ±${NOISE_THRESHOLD_PCT}%`
+                  : maxByAbs(material);
+                const maxAverageDelta = averageMaterial.length === 0
+                  ? `⚪ within ±${NOISE_THRESHOLD_PCT}%`
+                  : maxByAbs(averageMaterial);
+                const formatMaxDelta = (delta) =>
+                  typeof delta === "string" ? delta : `${delta.emoji} 
${delta.text}`;
+                lines.push(
+                  `| ${status} | ${escapeCell(row.config)} | ` +
+                    `${escapeCell(throughput.value)} | 
${escapeCell(mbps.value)} | ` +
+                    
`${escapeCell(p50.value)}/${escapeCell(p95.value)}/${escapeCell(p99.value)} us 
| ` +
+                    `${formatMaxDelta(maxDelta)} / 
${formatMaxDelta(maxAverageDelta)} |`
+                );
+              }
+              const verdict = counts.worse > 0 ? "⚠️ Benchmark changes need a 
look" : "✅ No material benchmark regressions detected";
+              const summary =
+                `🟢 ${counts.better} better · 🔴 ${counts.worse} worse · ` +
+                `⚪ ${counts.noise} noise (<±${NOISE_THRESHOLD_PCT}%)`;

Review Comment:
   The verdict/summary ignores `counts.missing`, so the comment can report “No 
material benchmark regressions detected” even when all comparisons are missing 
a baseline. Incorporating the missing count into the verdict (or at least the 
summary) avoids giving a false sense of confidence when baseline coverage is 
incomplete.



##########
.github/workflows/benchmarks-pr-comment.yml:
##########
@@ -197,17 +200,369 @@ jobs:
               }
             };
 
+            const readBenchJson = (filePath, suite) => {
+              if (!fs.existsSync(filePath)) return [];
+              if (fs.statSync(filePath).size > MAX_JSON_BYTES) {
+                core.warning(`${filePath} is too large; skipping JSON 
comparison input.`);

Review Comment:
   `readBenchJson()` (and `MAX_JSON_BYTES`) are currently unused in this 
script. If JSON comparison input is no longer part of the approach, removing 
these keeps the workflow script smaller and easier to maintain; otherwise, wire 
it into the comparison logic so it doesn’t drift.



##########
.github/workflows/benchmarks-pr-comment.yml:
##########
@@ -197,17 +200,369 @@ jobs:
               }
             };
 
+            const readBenchJson = (filePath, suite) => {
+              if (!fs.existsSync(filePath)) return [];
+              if (fs.statSync(filePath).size > MAX_JSON_BYTES) {
+                core.warning(`${filePath} is too large; skipping JSON 
comparison input.`);
+                return [];
+              }
+              try {
+                const parsed = JSON.parse(fs.readFileSync(filePath, "utf8"));
+                if (!Array.isArray(parsed)) return [];
+                return parsed
+                  .map((bench) => ({
+                    suite,
+                    name: String(bench.name || ""),
+                    unit: String(bench.unit || ""),
+                    value: Number(bench.value),
+                  }))
+                  .filter((bench) => bench.name && 
Number.isFinite(bench.value));
+              } catch (e) {
+                core.warning(`failed to parse ${filePath}: ${e.message}`);
+                return [];
+              }
+            };
+
+            const parseCsvRows = (text) => {
+              const rows = text
+                .trim()
+                .split(/\r?\n/)
+                .map((line) => line.split(","));
+              if (rows.length < 2) return [];
+              const header = rows[0].map((h) => h.trim());
+              const idx = (col) => header.indexOf(col);
+              return rows.slice(1).map((row) => ({ row, idx }));
+            };
+
+            const prRowsFromCsv = (text) =>
+              parseCsvRows(text)
+                .map(({ row, idx }) => {
+                  const config = `bs=${row[idx("batch_size")]} 
sw=${row[idx("schema_width")]} sl=${row[idx("string_len")]}`;
+                  const metric = (suite, prefix, unit, value) => ({
+                    suite,
+                    name: `${prefix} / ${config}`,
+                    unit,
+                    value: Number(value),
+                  });
+                  return {
+                    config,
+                    throughput: metric(
+                      "Arrow Flight E2E Throughput",
+                      "throughput",
+                      "tuples/sec",
+                      row[idx("tuples_per_sec")]
+                    ),
+                    mbps: metric("Arrow Flight E2E MB/s", "MB/s", "MB/s", 
row[idx("mb_per_sec")]),
+                    p50: metric("Arrow Flight E2E Latency", "latency p50", 
"us", row[idx("lat_p50_us")]),
+                    p95: metric("Arrow Flight E2E Latency", "latency p95", 
"us", row[idx("lat_p95_us")]),
+                    p99: metric("Arrow Flight E2E Latency", "latency p99", 
"us", row[idx("lat_p99_us")]),
+                  };
+                })
+                .filter((item) =>
+                  [item.throughput, item.mbps, item.p50, item.p95, 
item.p99].every((metric) =>
+                    Number.isFinite(metric.value)
+                  )
+                );
+
+            const prRows = csv ? prRowsFromCsv(csv) : [];
+
+            const bytesPerTuple = (benchName) => {
+              const match = benchName.match(/bs=(\d+)\s+sw=(\d+)\s+sl=(\d+)/);
+              if (!match) return null;
+              return Number(match[2]) * Number(match[3]);
+            };
+
+            const derivedMbpsBench = (throughputBench) => {
+              const bytes = bytesPerTuple(throughputBench.name);
+              if (!bytes) return null;
+              return {
+                name: throughputBench.name.replace(/^throughput/, "MB/s"),
+                unit: "MB/s",
+                value: (Number(throughputBench.value) * bytes) / (1024 * 1024),
+              };
+            };
+
+            const loadMainBaseline = async () => {
+              try {
+                const { data } = await github.rest.repos.getContent({
+                  owner: context.repo.owner,
+                  repo: context.repo.repo,
+                  path: "dev/bench/data.js",
+                  ref: "gh-pages",
+                });
+                if (Array.isArray(data) || !data.content) {
+                  core.warning("gh-pages/dev/bench/data.js is not a file.");
+                  return null;
+                }
+                const raw = Buffer.from(data.content, data.encoding || 
"base64").toString("utf8");

Review Comment:
   `loadMainBaseline()` uses `repos.getContent`, but the Contents API omits 
`content` for large files (>= ~1MB) and will make baseline loading silently 
fall back even though the file exists. The current warning (`is not a file`) is 
also misleading when `content` is missing due to size. Consider using 
`download_url` (when present) to fetch the raw `data.js` contents, and only 
treat `Array.isArray(data)` / `data.type !== 'file'` as “not a file”.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Re: [PR] ci: compare benchmark PRs with main [texera]

Reply via email to