Copilot commented on code in PR #50056:
URL: https://github.com/apache/arrow/pull/50056#discussion_r3460072137
##########
dev/archery/archery/benchmark/runner.py:
##########
@@ -211,48 +247,59 @@ def from_rev_or_path(src, root, rev_or_path, cmake_conf,
**kwargs):
"""
build = None
if StaticBenchmarkRunner.is_json_result(rev_or_path):
- kwargs.pop('benchmark_extras', None)
+ kwargs.pop("benchmark_extras", None)
return StaticBenchmarkRunner.from_json(rev_or_path, **kwargs)
elif CMakeBuild.is_build_dir(rev_or_path):
build = CMakeBuild.from_path(rev_or_path)
return CppBenchmarkRunner(build, **kwargs)
else:
- # Revisions can references remote via the `/` character, ensure
- # that the revision is path friendly
- path_rev = rev_or_path.replace("/", "_")
+ path_rev = _rev_or_path_dirname(src, rev_or_path)
root_rev = os.path.join(root, path_rev)
- os.mkdir(root_rev)
+ os.makedirs(root_rev, exist_ok=True)
+ # Clone dir is reused when there is known revision (git sha)
+ # in <preserve-dir>/sha/arrow
clone_dir = os.path.join(root_rev, "arrow")
- # Possibly checkout the sources at given revision, no need to
- # perform cleanup on cloned repository as root_rev is reclaimed.
- src_rev, _ = src.at_revision(rev_or_path, clone_dir)
+ if os.path.isdir(clone_dir):
+ src_rev = ArrowSources(clone_dir)
+ else:
+ src_rev, _ = src.at_revision(rev_or_path, clone_dir)
cmake_def = CppCMakeDefinition(src_rev.cpp, cmake_conf)
- build_dir = os.path.join(root_rev, "build")
- return CppBenchmarkRunner(cmake_def.build(build_dir), **kwargs)
+ run_root = os.path.join(root_rev, "build")
+ os.makedirs(run_root, exist_ok=True)
+ # run_id is a path-safe ISO-8601 UTC timestamp down to the second
+ # so users get a brief idea of what run they are looking at.
+ now = datetime.datetime.now(datetime.timezone.utc)
+ run_id = now.strftime("%Y-%m-%dT%H-%M-%SZ")
+ build_dir = os.path.join(run_root, run_id)
Review Comment:
`run_id` only has second-level precision. Two benchmark invocations started
within the same second (or a rerun after a partial failure) will collide on the
same `build_dir` / `results_dir`, potentially reusing or clobbering artifacts
unexpectedly. Use microseconds (or add a random suffix) to make the directory
name reliably unique.
##########
dev/archery/archery/benchmark/runner.py:
##########
@@ -211,48 +247,59 @@ def from_rev_or_path(src, root, rev_or_path, cmake_conf,
**kwargs):
"""
build = None
if StaticBenchmarkRunner.is_json_result(rev_or_path):
- kwargs.pop('benchmark_extras', None)
+ kwargs.pop("benchmark_extras", None)
return StaticBenchmarkRunner.from_json(rev_or_path, **kwargs)
elif CMakeBuild.is_build_dir(rev_or_path):
build = CMakeBuild.from_path(rev_or_path)
return CppBenchmarkRunner(build, **kwargs)
else:
- # Revisions can references remote via the `/` character, ensure
- # that the revision is path friendly
- path_rev = rev_or_path.replace("/", "_")
+ path_rev = _rev_or_path_dirname(src, rev_or_path)
root_rev = os.path.join(root, path_rev)
- os.mkdir(root_rev)
+ os.makedirs(root_rev, exist_ok=True)
+ # Clone dir is reused when there is known revision (git sha)
+ # in <preserve-dir>/sha/arrow
clone_dir = os.path.join(root_rev, "arrow")
- # Possibly checkout the sources at given revision, no need to
- # perform cleanup on cloned repository as root_rev is reclaimed.
- src_rev, _ = src.at_revision(rev_or_path, clone_dir)
+ if os.path.isdir(clone_dir):
+ src_rev = ArrowSources(clone_dir)
+ else:
+ src_rev, _ = src.at_revision(rev_or_path, clone_dir)
cmake_def = CppCMakeDefinition(src_rev.cpp, cmake_conf)
- build_dir = os.path.join(root_rev, "build")
- return CppBenchmarkRunner(cmake_def.build(build_dir), **kwargs)
+ run_root = os.path.join(root_rev, "build")
+ os.makedirs(run_root, exist_ok=True)
Review Comment:
The PR description shows build artifacts under `build/run/<id>`, but the
code currently writes to `build/<id>`. If the intended layout includes the
extra `run/` level, set `run_root` accordingly so the on-disk structure matches
the documented layout.
##########
dev/archery/archery/cli.py:
##########
@@ -465,6 +494,15 @@ def benchmark_run(ctx, rev_or_path, src, preserve, output,
cmake_extras,
# when asked to JSON-serialize the results, so produce a JSON
# output even when none is requested.
json_out = json.dumps(runner_base, cls=JsonEncoder)
+ if runner_base.results_dir is not None:
+ results_path = os.path.join(runner_base.results_dir,
+ "benchmark.json")
+ with open(results_path, "w") as f:
Review Comment:
New behavior writes `benchmark.json` and `metadata.json` into
`runner_base.results_dir`, but there doesn't appear to be a CLI test exercising
`archery benchmark run --preserve --preserve-dir ...` and asserting these files
are created. Adding a `CliRunner` test with a mocked
`CppBenchmarkRunner.from_rev_or_path()` (setting `results_dir` to a temp
directory) would help prevent regressions.
##########
dev/archery/archery/cli.py:
##########
@@ -41,6 +43,27 @@
BOOL = ArrowBool()
+def _run_metadata(ctx):
+ uname = platform.uname()
+ return {
+ "time": datetime.datetime.now(datetime.timezone.utc).isoformat(),
+ "cmd": {
+ "full": " ".join(sys.argv),
+ "params": ctx.params,
+ },
Review Comment:
`"full": " ".join(sys.argv)` loses quoting/escaping information (arguments
containing spaces, quotes, etc.), making the recorded command hard to reliably
re-run. Consider also storing the raw argv list so the invocation can be
reconstructed unambiguously.
##########
dev/archery/archery/benchmark/runner.py:
##########
@@ -211,48 +247,59 @@ def from_rev_or_path(src, root, rev_or_path, cmake_conf,
**kwargs):
"""
build = None
if StaticBenchmarkRunner.is_json_result(rev_or_path):
- kwargs.pop('benchmark_extras', None)
+ kwargs.pop("benchmark_extras", None)
return StaticBenchmarkRunner.from_json(rev_or_path, **kwargs)
elif CMakeBuild.is_build_dir(rev_or_path):
build = CMakeBuild.from_path(rev_or_path)
return CppBenchmarkRunner(build, **kwargs)
else:
- # Revisions can references remote via the `/` character, ensure
- # that the revision is path friendly
- path_rev = rev_or_path.replace("/", "_")
+ path_rev = _rev_or_path_dirname(src, rev_or_path)
root_rev = os.path.join(root, path_rev)
- os.mkdir(root_rev)
+ os.makedirs(root_rev, exist_ok=True)
+ # Clone dir is reused when there is known revision (git sha)
+ # in <preserve-dir>/sha/arrow
clone_dir = os.path.join(root_rev, "arrow")
- # Possibly checkout the sources at given revision, no need to
- # perform cleanup on cloned repository as root_rev is reclaimed.
- src_rev, _ = src.at_revision(rev_or_path, clone_dir)
+ if os.path.isdir(clone_dir):
+ src_rev = ArrowSources(clone_dir)
+ else:
+ src_rev, _ = src.at_revision(rev_or_path, clone_dir)
cmake_def = CppCMakeDefinition(src_rev.cpp, cmake_conf)
- build_dir = os.path.join(root_rev, "build")
- return CppBenchmarkRunner(cmake_def.build(build_dir), **kwargs)
+ run_root = os.path.join(root_rev, "build")
+ os.makedirs(run_root, exist_ok=True)
+ # run_id is a path-safe ISO-8601 UTC timestamp down to the second
+ # so users get a brief idea of what run they are looking at.
+ now = datetime.datetime.now(datetime.timezone.utc)
+ run_id = now.strftime("%Y-%m-%dT%H-%M-%SZ")
+ build_dir = os.path.join(run_root, run_id)
+ build = cmake_def.build(build_dir)
+ results_dir = os.path.join(root_rev, "bench", run_id)
+ os.makedirs(results_dir, exist_ok=True)
Review Comment:
The PR description shows benchmark outputs under `bench/run/<id>`, but the
code currently writes to `bench/<id>`. If `run/` is part of the intended
preserved layout, include it in `results_dir` for consistency with the
documented structure.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]