This is an automated email from the ASF dual-hosted git repository.
agrove pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/datafusion-comet.git
The following commit(s) were added to refs/heads/main by this push:
new 68b2c4d8f chore: Add Java Flight Recorder profiling to TPC benchmarks
(#3597)
68b2c4d8f is described below
commit 68b2c4d8f85882015f9646bd01221b1be163263d
Author: Andy Grove <[email protected]>
AuthorDate: Wed Feb 25 17:40:24 2026 -0700
chore: Add Java Flight Recorder profiling to TPC benchmarks (#3597)
---
benchmarks/tpc/README.md | 47 +++++++++++++++++-----
.../tpc/infra/docker/docker-compose-laptop.yml | 1 +
benchmarks/tpc/infra/docker/docker-compose.yml | 1 +
benchmarks/tpc/run.py | 32 +++++++++++++++
4 files changed, 72 insertions(+), 9 deletions(-)
diff --git a/benchmarks/tpc/README.md b/benchmarks/tpc/README.md
index eb1fb0479..319288df0 100644
--- a/benchmarks/tpc/README.md
+++ b/benchmarks/tpc/README.md
@@ -38,15 +38,17 @@ All benchmarks are run via `run.py`:
python3 run.py --engine <engine> --benchmark <tpch|tpcds> [options]
```
-| Option | Description |
-| -------------- | ------------------------------------------------ |
-| `--engine` | Engine name (matches a TOML file in `engines/`) |
-| `--benchmark` | `tpch` or `tpcds` |
-| `--iterations` | Number of iterations (default: 1) |
-| `--output` | Output directory (default: `.`) |
-| `--query` | Run a single query number |
-| `--no-restart` | Skip Spark master/worker restart |
-| `--dry-run` | Print the spark-submit command without executing |
+| Option | Description |
+| -------------- | -------------------------------------------------------- |
+| `--engine` | Engine name (matches a TOML file in `engines/`) |
+| `--benchmark` | `tpch` or `tpcds` |
+| `--iterations` | Number of iterations (default: 1) |
+| `--output` | Output directory (default: `.`) |
+| `--query` | Run a single query number |
+| `--no-restart` | Skip Spark master/worker restart |
+| `--dry-run` | Print the spark-submit command without executing |
+| `--jfr` | Enable Java Flight Recorder profiling |
+| `--jfr-dir` | Directory for JFR output files (default: `/results/jfr`) |
Available engines: `spark`, `comet`, `comet-iceberg`, `gluten`
@@ -363,3 +365,30 @@ python3 generate-comparison.py --benchmark tpch \
--title "TPC-H @ 100 GB: Parquet vs Iceberg" \
comet-tpch-*.json comet-iceberg-tpch-*.json
```
+
+## Java Flight Recorder Profiling
+
+Use the `--jfr` flag to capture JFR profiles from the Spark driver and
executors.
+JFR is built into JDK 11+ so no additional dependencies are needed.
+
+```shell
+python3 run.py --engine comet --benchmark tpch --jfr
+```
+
+JFR recordings are written to `/results/jfr/` by default (configurable with
+`--jfr-dir`). The driver writes `driver.jfr` and each executor writes
+`executor.jfr` (JFR appends the PID when multiple executors share a path).
+
+With Docker Compose, the `/results` volume is shared across all containers,
+so JFR files from both driver and executors are collected in
+`$RESULTS_DIR/jfr/` on the host:
+
+```shell
+docker compose -f benchmarks/tpc/infra/docker/docker-compose.yml \
+ run --rm bench \
+ python3 /opt/benchmarks/run.py \
+ --engine comet --benchmark tpch --output /results --no-restart --jfr
+```
+
+Open the `.jfr` files with [JDK Mission Control](https://jdk.java.net/jmc/),
+IntelliJ IDEA's profiler, or `jfr` CLI tool (`jfr summary driver.jfr`).
diff --git a/benchmarks/tpc/infra/docker/docker-compose-laptop.yml
b/benchmarks/tpc/infra/docker/docker-compose-laptop.yml
index bc882ae7b..727268406 100644
--- a/benchmarks/tpc/infra/docker/docker-compose-laptop.yml
+++ b/benchmarks/tpc/infra/docker/docker-compose-laptop.yml
@@ -72,6 +72,7 @@ services:
- SPARK_NO_DAEMONIZE=true
mem_limit: 8g
memswap_limit: 8g
+ stop_grace_period: 30s
bench:
image: ${BENCH_IMAGE:-comet-bench}
diff --git a/benchmarks/tpc/infra/docker/docker-compose.yml
b/benchmarks/tpc/infra/docker/docker-compose.yml
index 5a76a5d6e..f5c9f0ebe 100644
--- a/benchmarks/tpc/infra/docker/docker-compose.yml
+++ b/benchmarks/tpc/infra/docker/docker-compose.yml
@@ -56,6 +56,7 @@ x-worker: &worker
- SPARK_NO_DAEMONIZE=true
mem_limit: ${WORKER_MEM_LIMIT:-32g}
memswap_limit: ${WORKER_MEM_LIMIT:-32g}
+ stop_grace_period: 30s
services:
spark-master:
diff --git a/benchmarks/tpc/run.py b/benchmarks/tpc/run.py
index 38b0ed500..58afc0bbe 100755
--- a/benchmarks/tpc/run.py
+++ b/benchmarks/tpc/run.py
@@ -261,6 +261,24 @@ def build_spark_submit_cmd(config, benchmark, args):
val = "true" if val else "false"
conf[resolve_env(key)] = resolve_env(str(val))
+ # JFR profiling: append to extraJavaOptions (preserving any existing
values)
+ if args.jfr:
+ jfr_dir = args.jfr_dir
+ driver_jfr = (
+ f"-XX:StartFlightRecording=disk=true,dumponexit=true,"
+ f"filename={jfr_dir}/driver.jfr,settings=profile"
+ )
+ executor_jfr = (
+ f"-XX:StartFlightRecording=disk=true,dumponexit=true,"
+ f"filename={jfr_dir}/executor.jfr,settings=profile"
+ )
+ for spark_key, jfr_opts in [
+ ("spark.driver.extraJavaOptions", driver_jfr),
+ ("spark.executor.extraJavaOptions", executor_jfr),
+ ]:
+ existing = conf.get(spark_key, "")
+ conf[spark_key] = f"{existing} {jfr_opts}".strip()
+
for key, val in sorted(conf.items()):
cmd += ["--conf", f"{key}={val}"]
@@ -357,6 +375,16 @@ def main():
action="store_true",
help="Print the spark-submit command without executing",
)
+ parser.add_argument(
+ "--jfr",
+ action="store_true",
+ help="Enable Java Flight Recorder profiling for driver and executors",
+ )
+ parser.add_argument(
+ "--jfr-dir",
+ default="/results/jfr",
+ help="Directory for JFR output files (default: /results/jfr)",
+ )
args = parser.parse_args()
config = load_engine_config(args.engine)
@@ -373,6 +401,10 @@ def main():
if not args.no_restart and not args.dry_run:
restart_spark()
+ # Create JFR output directory if profiling is enabled
+ if args.jfr:
+ os.makedirs(args.jfr_dir, exist_ok=True)
+
cmd = build_spark_submit_cmd(config, args.benchmark, args)
if args.dry_run:
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]