This is an automated email from the ASF dual-hosted git repository.
bernardobotella pushed a commit to branch trunk
in repository https://gitbox.apache.org/repos/asf/cassandra.git
The following commit(s) were added to refs/heads/trunk by this push:
new 7c3c3a1d86 Initial async-profiler Nodetool implementation
7c3c3a1d86 is described below
commit 7c3c3a1d86782a515583f89c6f17fb30e7f5e41e
Author: Yaman Ziadeh <[email protected]>
AuthorDate: Tue Jul 15 16:13:18 2025 -0400
Initial async-profiler Nodetool implementation
patch by Yaman Ziadeh, Bernardo Botella, Stefan Miklosovic; reviewed by
Dmitry Konstantinov, Jyothsna Konisa for CASSANDRA-20854
---
.build/cassandra-deps-maven-pom.xml | 4 +
.build/parent-maven-pom.xml | 6 +
CHANGES.txt | 1 +
NEWS.txt | 1 +
conf/jvm-server.options | 7 +
doc/modules/cassandra/nav.adoc | 1 +
.../pages/managing/operating/async-profiler.adoc | 141 ++++++
.../config/CassandraRelevantProperties.java | 2 +
.../cassandra/profiler/AsyncProfilerMBean.java | 107 ++++
.../cassandra/service/AsyncProfilerService.java | 538 +++++++++++++++++++++
.../apache/cassandra/service/CassandraDaemon.java | 2 +
.../apache/cassandra/service/StartupChecks.java | 63 +++
.../apache/cassandra/service/StorageService.java | 11 +
src/java/org/apache/cassandra/tools/NodeProbe.java | 11 +
.../tools/nodetool/AsyncProfileCommandGroup.java | 264 ++++++++++
.../cassandra/tools/nodetool/NodetoolCommand.java | 1 +
.../mock/nodetool/InternalNodeProbe.java | 2 +
.../distributed/test/AsyncProfilerTest.java | 198 ++++++++
test/resources/nodetool/help/nodetool | 1 +
test/resources/nodetool/help/profile | 102 ++++
test/resources/nodetool/help/profile$execute | 38 ++
test/resources/nodetool/help/profile$fetch | 40 ++
test/resources/nodetool/help/profile$list | 27 ++
test/resources/nodetool/help/profile$purge | 27 ++
test/resources/nodetool/help/profile$start | 48 ++
test/resources/nodetool/help/profile$status | 27 ++
test/resources/nodetool/help/profile$stop | 32 ++
.../service/AsyncProfilerServiceTest.java | 315 ++++++++++++
28 files changed, 2017 insertions(+)
diff --git a/.build/cassandra-deps-maven-pom.xml
b/.build/cassandra-deps-maven-pom.xml
index 4402a8efd9..474c1037d0 100644
--- a/.build/cassandra-deps-maven-pom.xml
+++ b/.build/cassandra-deps-maven-pom.xml
@@ -161,6 +161,10 @@
<groupId>com.github.jbellis</groupId>
<artifactId>jamm</artifactId>
</dependency>
+ <dependency>
+ <groupId>tools.profiler</groupId>
+ <artifactId>async-profiler</artifactId>
+ </dependency>
<dependency>
<groupId>io.netty</groupId>
<artifactId>netty-all</artifactId>
diff --git a/.build/parent-maven-pom.xml b/.build/parent-maven-pom.xml
index 5d6ca01a4c..c75e254533 100644
--- a/.build/parent-maven-pom.xml
+++ b/.build/parent-maven-pom.xml
@@ -40,6 +40,7 @@
<byteman.version>4.0.20</byteman.version>
<netty.version>4.1.130.Final</netty.version>
<ohc.version>0.5.1</ohc.version>
+ <async-profiler.version>4.2</async-profiler.version>
<!-- These are referenced in build.xml, so need to be propagated from
there -->
<asm.version>@asm.version@</asm.version>
@@ -465,6 +466,11 @@
<artifactId>jamm</artifactId>
<version>${jamm.version}</version>
</dependency>
+ <dependency>
+ <groupId>tools.profiler</groupId>
+ <artifactId>async-profiler</artifactId>
+ <version>${async-profiler.version}</version>
+ </dependency>
<dependency>
<!-- Test scoped jackson-dataformat-yaml also depends on snakeyaml.
For now, these versions are aligned
but if you happen to bump it here then exclude it in
jackson-dataformat-yaml.
diff --git a/CHANGES.txt b/CHANGES.txt
index 8b23fa63e6..a78c0876ad 100644
--- a/CHANGES.txt
+++ b/CHANGES.txt
@@ -1,4 +1,5 @@
5.1
+ * Support low-overhead async profiling (CASSANDRA-20854)
* Minor perf optimizations around memtable put logic (CASSANDRA-21088)
* When level compaction validates its table properties, it used the wrong
default value for sstable_size_in_mb which allowed properties that would later
be rejected at runtime (CASSANDRA-20570)
* Fix off-by-one bug in exponential backoff for repair retry config
(CASSANDRA-21102)
diff --git a/NEWS.txt b/NEWS.txt
index f2c18f6762..724d860ea9 100644
--- a/NEWS.txt
+++ b/NEWS.txt
@@ -135,6 +135,7 @@ New features
CASSANDRA-20102 adds a subset of the SQL99 (binary) string functions:
"octet_length" defined on all types
and "length" defined on UTF8 strings. See CASSANDRA-20102 for more
information.
- New functions `format_bytes` and `format_time` were added. See
CASSANDRA-19546.
+ - It is possible to use Async-profiler for various profiling scenarios.
See CASSANDRA-20854.
Upgrading
---------
diff --git a/conf/jvm-server.options b/conf/jvm-server.options
index d850592db0..3abb550583 100644
--- a/conf/jvm-server.options
+++ b/conf/jvm-server.options
@@ -156,6 +156,13 @@
# Disable chronicle analytics. See CASSANDRA-19656
-Dchronicle.analytics.disable=true
+# AsyncProfiler Flags
+#-Dcassandra.async_profiler.enabled=true|false
+#-Dcassandra.async_profiler.unsafe_mode=true|false
+# When not set, logs from profiler will be placed under
+# directory behind "cassandra.logdir" property, into "async-profiler"
sub-directory
+#-Dcassandra.logdir.async_profiler=/tmp/cassandra-profiling
+
### Debug options
# uncomment to enable flight recorder
diff --git a/doc/modules/cassandra/nav.adoc b/doc/modules/cassandra/nav.adoc
index 0ced80090d..09fcdd8843 100644
--- a/doc/modules/cassandra/nav.adoc
+++ b/doc/modules/cassandra/nav.adoc
@@ -112,6 +112,7 @@
**** xref:cassandra:managing/operating/password_validation.adoc[Password
validation]
**** xref:cassandra:managing/operating/role_name_generation.adoc[Role name
generation]
**** xref:cassandra:managing/operating/onboarding-to-accord.adoc[]
+**** xref:cassandra:managing/operating/async-profiler.adoc[]
*** xref:cassandra:managing/tools/index.adoc[Tools]
**** xref:cassandra:managing/tools/cqlsh.adoc[cqlsh: the CQL shell]
**** xref:cassandra:managing/tools/nodetool/nodetool.adoc[nodetool]
diff --git a/doc/modules/cassandra/pages/managing/operating/async-profiler.adoc
b/doc/modules/cassandra/pages/managing/operating/async-profiler.adoc
new file mode 100644
index 0000000000..8e0421bc74
--- /dev/null
+++ b/doc/modules/cassandra/pages/managing/operating/async-profiler.adoc
@@ -0,0 +1,141 @@
+= Async-profiler
+
+Since https://issues.apache.org/jira/browse/CASSANDRA-20854[CASSANDRA-20854],
it is possible to use
+https://github.com/async-profiler/async-profiler[async-profiler] to profile
your nodes. Async-profiler is
+shipped with Cassandra, so you do not need to do anything else but start to
use it by enabling a property.
+Async-profiler functionality is disabled by default. It can be turned on by
setting Cassandra's `cassandra.async_profiler.enabled` property to `true`.
+
+There is a command in `nodetool` called `profile` with these sub-commands:
+
+== start
+
+Basic usage:
+----
+$ nodetool profile start
+----
+
+This will start profiling, by default for 60 seconds. If you want, for
example, profile memory allocations for
+5 minutes and save results into a file memory-allocation-5m.html you would do:
+
+----
+$ nodetool profile start -e alloc -d 5m -o memory-allocation-5m.html
+----
+
+There are these events possible to profile:
+
+'cpu', 'alloc', 'lock', 'wall', 'nativemem', 'cache_misses', delimited by
comma, defaults to 'cpu'.
+
+There are these output formats possible to specify, via `--format` flag:
+
+'flat', 'traces', 'collapsed', 'flamegraph', 'tree', 'jfr', 'otlp', defaults
to 'flamegraph'
+
+== status
+
+You can then inspect the state of profiling by `status` subcommand:
+
+----
+$ nodetool profile status
+Profiling is running for 7 seconds
+----
+
+If you attempt to start another profiling while the current profiling is
running, this will not be possible:
+
+----
+$ nodetool profile start -e alloc -d 5m -o memory-allocation-5m.html
+Profiler has already started or there was a failure to start it.
+----
+
+== stop
+
+You can stop the profiling prematurely by `stop` sub-command
+
+----
+$ nodetool profile stop -o memory-allocation-5m.html
+----
+
+After the profiling is finished, either by waiting until it stops on its own
or by us explicitly, we have a result file in a results directory on a node. We
can inspect what results there are by `list` sub-command:
+
+== list
+
+----
+$ nodetool profile list
+memory-allocation-5m.html
+cpu.html
+----
+
+== fetch
+
+If you have access to a node, you can just go to, by default,
+`logs` directory of Cassandra, into `async-profiler` and obtain a respective
file. However, in a scenario when
+you are executing remote profiling (nodetool exection is on a physically
different machine from Cassandra node), or you do not have the direct access to
remote disk, you need to use `fetch` subcommand, which will sent the content of
your result file locally where you can save it to whatever destination you want:
+
+----
+$ nodetool profile fetch cpu.html /tmp/cpu.html
+----
+
+== purge
+
+Of course, more you profile, more disk space the results will occupy. If you
have direct access, you can just
+remove the files yourself, however if you do not, you need to use `purge`
sub-command which will remove all profiling files:
+
+----
+$ nodetool profile purge
+$ nodetool profile list
+<no output>
+----
+
+== execute
+
+You can also execute arbitrary commands, by `execute` subcommand, like this:
+
+----
+nodetool profile execute meminfo
+Call trace storage: 10244 KB
+ Flight recording: 0 KB
+ Dictionaries: 68 KB
+ Code cache: 11934 KB
+------------------------------
+ Total: 22246 KB
+----
+
+However, to execute arbitrary commands for Async-profiler, we need to enable
_unsafe_ async profiling by system property of Cassandra
`cassandra.async_profiler.unsafe_mode` set to `true`. You will not be able to
do this otherwise.
+
+You can also control where profiling files go via
`cassandra.logdir.async_profiler` system property. When not set, by default
they will be stored to `cassandra.logdir` + `async-profiler` directory.
+
+== Using a Different Async-Profiler Version
+
+If you need to use a different version of async-profiler (for example, to test
a newer version or a custom build), you can replace the JAR file in the
classpath.
+
+After building Cassandra, replace the JAR in the build output directory:
+
+----
+$ cp /path/to/your/async-profiler-X.Y.jar build/lib/jars/async-profiler-4.2.jar
+----
+
+Or replace it in the lib directory before building:
+
+----
+$ cp /path/to/your/async-profiler-X.Y.jar lib/async-profiler-4.2.jar
+$ ant clean jar
+----
+
+Then restart Cassandra to use the new version.
+
+=== Compatibility Requirements
+
+The replacement JAR must maintain API compatibility with the original version:
+
+* Must include the `one.profiler.AsyncProfiler` class with compatible methods
(`getInstance()`, `execute(String)`)
+* Must contain native libraries for your platform in the correct structure:
+** `linux-x64/libasyncProfiler.so`
+** `linux-arm64/libasyncProfiler.so`
+** `macos/libasyncProfiler.so`
+* Must support the same command syntax and profiling events (cpu, alloc, lock,
wall, nativemem, cache-misses)
+
+To verify the replacement worked, check the version after restart:
+
+----
+$ nodetool profile status
+----
+
+If you encounter errors, check the Cassandra logs for async-profiler
initialization issues.
\ No newline at end of file
diff --git
a/src/java/org/apache/cassandra/config/CassandraRelevantProperties.java
b/src/java/org/apache/cassandra/config/CassandraRelevantProperties.java
index ca79ff3727..cf514b7dfc 100644
--- a/src/java/org/apache/cassandra/config/CassandraRelevantProperties.java
+++ b/src/java/org/apache/cassandra/config/CassandraRelevantProperties.java
@@ -62,6 +62,8 @@ public enum CassandraRelevantProperties
ALLOW_UNSAFE_REPLACE("cassandra.allow_unsafe_replace"),
ALLOW_UNSAFE_TRANSIENT_CHANGES("cassandra.allow_unsafe_transient_changes"),
APPROXIMATE_TIME_PRECISION_MS("cassandra.approximate_time_precision_ms",
"2"),
+ ASYNC_PROFILER_ENABLED("cassandra.async_profiler.enabled", "false"),
+ ASYNC_PROFILER_UNSAFE_MODE("cassandra.async_profiler.unsafe_mode",
"false"),
/** 2 ** GENSALT_LOG2_ROUNDS rounds of hashing will be performed. */
AUTH_BCRYPT_GENSALT_LOG2_ROUNDS("cassandra.auth_bcrypt_gensalt_log2_rounds",
"4"),
/** We expect default values on cache retries and interval to be
sufficient for everyone but have this escape hatch just in case. */
diff --git a/src/java/org/apache/cassandra/profiler/AsyncProfilerMBean.java
b/src/java/org/apache/cassandra/profiler/AsyncProfilerMBean.java
new file mode 100644
index 0000000000..e9209c51cd
--- /dev/null
+++ b/src/java/org/apache/cassandra/profiler/AsyncProfilerMBean.java
@@ -0,0 +1,107 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.cassandra.profiler;
+
+import java.io.IOException;
+import java.util.List;
+import java.util.Map;
+
+import org.apache.cassandra.config.CassandraRelevantProperties;
+import org.apache.cassandra.config.DurationSpec;
+import org.apache.cassandra.service.AsyncProfilerService.AsyncProfilerEvent;
+import org.apache.cassandra.service.AsyncProfilerService.AsyncProfilerFormat;
+
+public interface AsyncProfilerMBean
+{
+ String MBEAN_NAME = "org.apache.cassandra.profiler:type=AsyncProfiler";
+
+ /**
+ * Starts profiling.
+ *
+ * As of now, valid keys to the map area:
+ * <p>
+ * <ul>
+ * <li>events - comma separated list of {@link AsyncProfilerEvent}</li>
+ * <li>outputFormat - one of {@link AsyncProfilerFormat}</li>
+ * <li>duration - duration of profiling, a string, in human-friendly
form, see {@link DurationSpec.IntSecondsBound}</li>
+ * <li>outputFileName - simple name of a file to save results to,
under {@link CassandraRelevantProperties#LOG_DIR}/profiler directory</li>
+ * </ul>
+ * </p>
+ *
+ * @param parameters Parameters for start command
+ * @return true if profiling has started, false when not (e.g. when it was
started already)
+ */
+ boolean start(Map<String, String> parameters);
+
+ /**
+ * Stops profiling.
+ *
+ * As of now, valid keys to the map are:
+ * <p>
+ * <ul>
+ * <li>outputFileName - simple file name where to store profiling
result, optional</li>
+ * </ul>
+ * </p>
+ *
+ * @param parameters Parameters for stop command
+ * @return true if profiling was stopped
+ */
+ boolean stop(Map<String, String> parameters);
+
+ /**
+ * Executes a command.
+ *
+ * @param command command to execute.
+ * @return execution result
+ */
+ String execute(String command);
+
+ /**
+ * Checks if a profiler is available.
+ *
+ * @return true if async profiling is enabled and profiler is initialized,
false otherwise.
+ */
+ boolean isEnabled();
+
+ /**
+ * Removes all profile files from disk.
+ */
+ void purge();
+
+ /**
+ * Returns list of files where profiler was saving results.
+ *
+ * @return list of profiler result files
+ */
+ List<String> list();
+
+ /**
+ * Returns the content of a result file. Use {@link #list()} to get their
names.
+ *
+ * @param resultFile file with profiler results
+ * @return content of profiler resuls file, or null, when not found.
+ * @throws IOException not found or other exception occurred
+ */
+ byte[] fetch(String resultFile) throws IOException;
+
+ /**
+ * @return status the profiler is in, as string description, for
diagnostic purposes
+ */
+ String status();
+}
diff --git a/src/java/org/apache/cassandra/service/AsyncProfilerService.java
b/src/java/org/apache/cassandra/service/AsyncProfilerService.java
new file mode 100644
index 0000000000..f287857e1b
--- /dev/null
+++ b/src/java/org/apache/cassandra/service/AsyncProfilerService.java
@@ -0,0 +1,538 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.cassandra.service;
+
+import java.io.IOException;
+import java.nio.file.Files;
+import java.nio.file.NoSuchFileException;
+import java.nio.file.Path;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.EnumSet;
+import java.util.List;
+import java.util.Map;
+import java.util.Set;
+import java.util.concurrent.atomic.AtomicReference;
+import java.util.regex.Pattern;
+
+import javax.management.StandardMBean;
+
+import com.google.common.annotations.VisibleForTesting;
+
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import org.apache.cassandra.config.CassandraRelevantProperties;
+import org.apache.cassandra.config.DatabaseDescriptor;
+import org.apache.cassandra.config.DurationSpec;
+import org.apache.cassandra.exceptions.ConfigurationException;
+import org.apache.cassandra.io.util.File;
+import org.apache.cassandra.profiler.AsyncProfilerMBean;
+import org.apache.cassandra.utils.MBeanWrapper;
+
+import one.profiler.AsyncProfiler;
+
+import static java.lang.String.format;
+import static java.util.stream.Collectors.toList;
+import static
org.apache.cassandra.config.CassandraRelevantProperties.ASYNC_PROFILER_ENABLED;
+import static
org.apache.cassandra.config.CassandraRelevantProperties.ASYNC_PROFILER_UNSAFE_MODE;
+import static org.apache.cassandra.config.CassandraRelevantProperties.LOG_DIR;
+
+public class AsyncProfilerService implements AsyncProfilerMBean
+{
+ private static final Logger logger =
LoggerFactory.getLogger(AsyncProfilerService.class);
+
+ private static final EnumSet<AsyncProfilerEvent> VALID_EVENTS =
EnumSet.allOf(AsyncProfilerEvent.class);
+ private static final EnumSet<AsyncProfilerFormat> VALID_FORMATS =
EnumSet.allOf(AsyncProfilerFormat.class);
+ private static final Pattern VALID_FILENAME_REGEX_PATTERN =
Pattern.compile("^[a-zA-Z0-9-]*\\.?[a-zA-Z0-9-]*$");
+ private static final int MAX_SAFE_PROFILING_DURATION = 43200; // 12 hours
+ private static final String ASYNC_PROFILER_LOG_DIR =
Path.of(LOG_DIR.getString(), "profiler").toAbsolutePath().toString();
+
+ public static final String ASYNC_PROFILER_START_EVENTS_PARAM = "events";
+ public static final String ASYNC_PROFILER_START_OUTPUT_FORMAT_PARAM =
"outputFormat";
+ public static final String ASYNC_PROFILER_START_DURATION_PARAM =
"duration";
+ public static final String ASYNC_PROFILER_START_OUTPUT_FILE_NAME_PARAM =
"outputFileName";
+ public static final Set<String> ASYNC_PROFILER_START_PARAMS =
Set.of(ASYNC_PROFILER_START_EVENTS_PARAM,
+
ASYNC_PROFILER_START_OUTPUT_FORMAT_PARAM,
+
ASYNC_PROFILER_START_DURATION_PARAM,
+
ASYNC_PROFILER_START_OUTPUT_FILE_NAME_PARAM);
+
+ public static final String ASYNC_PROFILER_STOP_OUTPUT_FILE_NAME_PARAM =
"outputFileName";
+ public static final Set<String> ASYNC_PROFILER_STOP_PARAMS =
Set.of(ASYNC_PROFILER_STOP_OUTPUT_FILE_NAME_PARAM);
+
+ private static AsyncProfilerService instance;
+ private static AsyncProfiler asyncProfiler;
+ private final boolean unsafeMode;
+ private static String logDir;
+ private final AtomicReference<File> currentResultFile = new
AtomicReference<>();
+ private final StartupChecks.AsyncProfilerKernelParamsCheck
kernelParamsCheck;
+
+ @VisibleForTesting
+ public static synchronized AsyncProfilerService instance(String logDir,
boolean registerMBean)
+ {
+ return instance(logDir, registerMBean, new
StartupChecks.AsyncProfilerKernelParamsCheck());
+ }
+
+ @VisibleForTesting
+ public static synchronized AsyncProfilerService instance(String logDir,
boolean registerMBean, StartupChecks.AsyncProfilerKernelParamsCheck
kernelParamsCheck)
+ {
+ AsyncProfilerService.logDir = logDir;
+ if (instance == null)
+ {
+ try
+ {
+ instance = new
AsyncProfilerService(ASYNC_PROFILER_UNSAFE_MODE.getBoolean(),
kernelParamsCheck);
+
+ if (registerMBean)
+ {
+ MBeanWrapper.instance.registerMBean(new
StandardMBean(AsyncProfilerService.instance, AsyncProfilerMBean.class),
+
AsyncProfilerService.MBEAN_NAME,
+
MBeanWrapper.OnException.LOG);
+ }
+
+ try
+ {
+ maybeCreateProfilesLogDir();
+ }
+ catch (Throwable t)
+ {
+ throw new ConfigurationException(t.getMessage());
+ }
+
+ if (ASYNC_PROFILER_ENABLED.getBoolean())
+ asyncProfiler = instance.getProfiler();
+ }
+ catch (Throwable t)
+ {
+ throw new RuntimeException(t);
+ }
+ }
+ return AsyncProfilerService.instance;
+ }
+
+ public static synchronized AsyncProfilerService instance()
+ {
+ if (instance == null)
+ return instance(ASYNC_PROFILER_LOG_DIR, true);
+ else
+ return instance;
+ }
+
+ public AsyncProfilerService(boolean unsafeMode)
+ {
+ this(unsafeMode, null);
+ }
+
+ public AsyncProfilerService(boolean unsafeMode,
StartupChecks.AsyncProfilerKernelParamsCheck kernelParamsCheck)
+ {
+ this.unsafeMode = unsafeMode;
+ this.kernelParamsCheck = kernelParamsCheck;
+ }
+
+ public enum AsyncProfilerEvent
+ {
+ cpu("cpu"),
+ alloc("alloc"),
+ lock("lock"),
+ wall("wall"),
+ nativemem("nativemem"),
+ cache_misses("cache-misses");
+
+ private final String name;
+
+ AsyncProfilerEvent(String name)
+ {
+ this.name = name;
+ }
+
+ public String getEvent()
+ {
+ return name;
+ }
+
+ public static String parseEvents(String rawString)
+ {
+ if (rawString == null || rawString.isBlank())
+ throw new IllegalArgumentException("Event can not be null nor
blank string.");
+
+ try
+ {
+ List<String> processedEvents = new ArrayList<>();
+ for (String rawEvent : rawString.split(","))
+
processedEvents.add(AsyncProfilerEvent.valueOf(rawEvent).getEvent());
+
+ return String.join(",", processedEvents);
+ }
+ catch (IllegalArgumentException ex)
+ {
+ throw new IllegalArgumentException(format("Event must be one
or a combination of %s", VALID_EVENTS));
+ }
+ }
+ }
+
+ public enum AsyncProfilerFormat
+ {
+ flat, traces, collapsed, flamegraph, tree, jfr;
+
+ public static String parseFormat(String rawFormat)
+ {
+ if (rawFormat == null || rawFormat.isBlank())
+ throw new IllegalArgumentException("Event can not be null nor
blank string.");
+
+ try
+ {
+ return AsyncProfilerFormat.valueOf(rawFormat).name();
+ }
+ catch (IllegalArgumentException ex)
+ {
+ throw new IllegalArgumentException(format("Format must be one
of %s", VALID_FORMATS));
+ }
+ }
+ }
+
+ @Override
+ public synchronized boolean start(Map<String, String> parameters)
+ {
+ if (isRunning())
+ return false;
+
+ validateStartParameters(parameters);
+
+ try
+ {
+ run(new ThrowingFunction<>()
+ {
+ @Override
+ public Object apply(AsyncProfiler profiler) throws Throwable
+ {
+ maybeCreateProfilesLogDir();
+ kernelParamsCheck.execute(null, true);
+
+ String parsedFormat =
AsyncProfilerFormat.parseFormat(parameters.get(ASYNC_PROFILER_START_OUTPUT_FORMAT_PARAM));
+ String parsedEvents =
AsyncProfilerEvent.parseEvents(parameters.get(ASYNC_PROFILER_START_EVENTS_PARAM));
+ File file = new File(logDir,
validateOutputFileName(parameters.get(ASYNC_PROFILER_START_OUTPUT_FILE_NAME_PARAM)));
+
+ String cmd = format("start,%s,event=%s,timeout=%s,file=%s",
+ parsedFormat,
+ parsedEvents,
+
parseDuration(parameters.get(ASYNC_PROFILER_START_DURATION_PARAM)),
+ file);
+
+ currentResultFile.set(file);
+
+ String result = profiler.execute(cmd);
+ logger.info("Started Async-Profiler: result={}, cmd={}",
result, cmd);
+
+ return null;
+ }
+ });
+ return true;
+ }
+ catch (IllegalStateException | IllegalArgumentException ex)
+ {
+ throw ex;
+ }
+ catch (Throwable t)
+ {
+ logger.error("Failed to start Async-Profiler", t);
+ return false;
+ }
+ }
+
+ @Override
+ public synchronized boolean stop(Map<String, String> parameters)
+ {
+ if (!isRunning())
+ return false;
+
+ validateStopParameters(parameters);
+
+ try
+ {
+ run(new ThrowingFunction<>()
+ {
+ @Override
+ public Object apply(AsyncProfiler profiler) throws Throwable
+ {
+ maybeCreateProfilesLogDir();
+ File resolvedOutputFile;
+ String cmd = "stop";
+ String outputFileName =
parameters.get(ASYNC_PROFILER_STOP_OUTPUT_FILE_NAME_PARAM);
+ if (outputFileName != null)
+ resolvedOutputFile = new File(logDir,
validateOutputFileName(outputFileName));
+ else
+ resolvedOutputFile = currentResultFile.get();
+
+ cmd += ",file=" + resolvedOutputFile.absolutePath();
+
+ String result = profiler.execute(cmd);
+ logger.debug("Stopped Async-Profiler: result={}, cmd={}",
result, cmd);
+
+ currentResultFile.set(null);
+
+ return null;
+ }
+ });
+ return true;
+ }
+ catch (IllegalStateException | IllegalArgumentException e)
+ {
+ throw e;
+ }
+ catch (Throwable e)
+ {
+ logger.error("Failed to stop Async-Profiler", e);
+ return false;
+ }
+ }
+
+ @Override
+ public String execute(String command)
+ {
+ if (!unsafeMode)
+ {
+ throw new SecurityException(String.format("The arbitrary command
execution is not permitted " +
+ "with %s MBean. If
unsafe command execution is required, " +
+ "start Cassandra with %s
property set to true. " +
+ "Rejected command: %s",
+
AsyncProfilerService.MBEAN_NAME,
+
CassandraRelevantProperties.ASYNC_PROFILER_UNSAFE_MODE.getKey(), command));
+ }
+
+ return run(new ThrowingFunction<>()
+ {
+ @Override
+ public String apply(AsyncProfiler profiler) throws Throwable
+ {
+ return profiler.execute(validateCommand(command));
+ }
+ });
+ }
+
+ @Override
+ public List<String> list()
+ {
+ try
+ {
+ maybeCreateProfilesLogDir();
+ return Arrays.stream(new
File(logDir).list()).map(File::name).sorted().collect(toList());
+ }
+ catch (Throwable t)
+ {
+ return List.of();
+ }
+ }
+
+ @Override
+ public byte[] fetch(String resultFile) throws IOException
+ {
+ try
+ {
+ if (!Path.of(logDir,
resultFile).toAbsolutePath().getParent().equals(Path.of(logDir)))
+ {
+ throw new IllegalArgumentException("Illegal file to fetch: " +
resultFile);
+ }
+ maybeCreateProfilesLogDir();
+ return Files.readAllBytes(new File(logDir, resultFile).toPath());
+ }
+ catch (NoSuchFileException t)
+ {
+ logger.error("Result file " + resultFile + " not found or error
occurred while returning it.", t);
+ throw t;
+ }
+ }
+
+ @Override
+ public void purge()
+ {
+ maybeCreateProfilesLogDir();
+ new File(logDir).deleteRecursive();
+ }
+
+ @Override
+ public String status()
+ {
+ return run(new ThrowingFunction<>()
+ {
+ @Override
+ public String apply(AsyncProfiler asyncProfiler) throws Throwable
+ {
+ return asyncProfiler.execute("status");
+ }
+ });
+ }
+
+ @Override
+ public synchronized boolean isEnabled()
+ {
+ return instance != null && asyncProfiler != null;
+ }
+
+ public static String validateOutputFileName(String outputFile)
+ {
+ if (outputFile == null || outputFile.trim().isEmpty())
+ throw new IllegalArgumentException("Output file name must not be
null or empty.");
+
+ if (!VALID_FILENAME_REGEX_PATTERN.matcher(outputFile).matches())
+ throw new IllegalArgumentException(format("Output file name must
match pattern %s.", VALID_FILENAME_REGEX_PATTERN));
+
+ return outputFile;
+ }
+
+ public static String validateCommand(String command)
+ {
+ if (command == null || command.isBlank())
+ throw new IllegalArgumentException("Command can not be null or
blank string.");
+
+ return command;
+ }
+
+ /**
+ * @param duration duration of profiling
+ * @return converted string representation of duration to seconds
+ */
+ public static int parseDuration(String duration)
+ {
+ int durationSeconds = new
DurationSpec.IntSecondsBound(duration).toSeconds();
+ if (durationSeconds > MAX_SAFE_PROFILING_DURATION)
+ throw new IllegalArgumentException(format("Max profiling duration
is %s seconds. If you need longer profiling, use execute command instead.",
+
MAX_SAFE_PROFILING_DURATION));
+ return durationSeconds;
+ }
+
+ private static void maybeCreateProfilesLogDir()
+ {
+ String dir = new File(logDir).toAbsolute().toString();
+
+ if ((DatabaseDescriptor.getCommitLogLocation() != null &&
dir.startsWith(DatabaseDescriptor.getCommitLogLocation())) ||
+ (DatabaseDescriptor.getAccordJournalDirectory() != null &&
dir.startsWith(DatabaseDescriptor.getAccordJournalDirectory())) ||
+
dir.startsWith(DatabaseDescriptor.getHintsDirectory().absolutePath()) ||
+ (DatabaseDescriptor.getCDCLogLocation() != null &&
dir.startsWith(DatabaseDescriptor.getCDCLogLocation())) ||
+ (DatabaseDescriptor.getSavedCachesLocation() != null &&
dir.startsWith(DatabaseDescriptor.getSavedCachesLocation())))
+ {
+ throw new RuntimeException("You can not store Async-Profiler
results into system Cassandra directory.");
+ }
+
+ for (String location :
StorageService.instance.getAllDataFileLocations())
+ {
+ if (dir.startsWith(location))
+ throw new RuntimeException("You can not store Async-Profiler
results into a data directory of Cassandra.");
+ }
+
+ try
+ {
+ new File(logDir).createDirectoriesIfNotExists();
+ }
+ catch (Throwable t)
+ {
+ throw new RuntimeException("Unable to create directory " + logDir);
+ }
+ }
+
+ private void validateStartParameters(Map<String, String> parameters)
+ {
+ if (!ASYNC_PROFILER_START_PARAMS.equals(parameters.keySet()))
+ {
+ throw new IllegalArgumentException("Wrong parameters passed to
start async profiler method. Passed parameters" +
+ " should be: " +
ASYNC_PROFILER_START_PARAMS);
+ }
+ }
+
+ private void validateStopParameters(Map<String, String> parameters)
+ {
+ if (!ASYNC_PROFILER_STOP_PARAMS.containsAll(parameters.keySet()))
+ {
+ throw new IllegalArgumentException("Wrong parameters passed to
stop async profiler method. Passed parameters" +
+ " should be: " +
ASYNC_PROFILER_STOP_PARAMS);
+ }
+ }
+
+ private boolean isRunning()
+ {
+ if (!isEnabled())
+ return false;
+
+ try
+ {
+ String status = status();
+ return status != null && status.contains("Profiling is running");
+ }
+ catch (Throwable t)
+ {
+ throw new RuntimeException(t);
+ }
+ }
+
+ private <T> T run(ThrowingFunction<AsyncProfiler, T> f)
+ {
+ if (!ASYNC_PROFILER_ENABLED.getBoolean())
+ throw new IllegalStateException("Async Profiler is not enabled.
Enable it by setting " + ASYNC_PROFILER_ENABLED.getKey() +
+ " property to true.");
+ if (asyncProfiler != null)
+ {
+ try
+ {
+ return f.apply(asyncProfiler);
+ }
+ catch (IllegalStateException | IllegalArgumentException t)
+ {
+ throw t;
+ }
+ catch (Throwable t)
+ {
+ throw new RuntimeException(t);
+ }
+ }
+ else
+ throw new IllegalStateException("Async profiler not available.");
+ }
+
+ public abstract static class ThrowingFunction<A, B>
+ {
+ public abstract B apply(AsyncProfiler a) throws Throwable;
+ }
+
+ private AsyncProfiler getProfiler()
+ {
+ if (!ASYNC_PROFILER_ENABLED.getBoolean())
+ return null;
+
+ if (asyncProfiler != null)
+ return asyncProfiler;
+
+ try
+ {
+ asyncProfiler = AsyncProfiler.getInstance();
+ return asyncProfiler;
+ }
+ catch (Throwable t)
+ {
+ throw new IllegalStateException("Unable to get an instance of
Async-Profiler", t);
+ }
+ }
+
+ @VisibleForTesting
+ public static void reset()
+ {
+ if (instance != null)
+ asyncProfiler = null;
+
+ instance = null;
+ }
+}
diff --git a/src/java/org/apache/cassandra/service/CassandraDaemon.java
b/src/java/org/apache/cassandra/service/CassandraDaemon.java
index 58f69a1793..67c0605111 100644
--- a/src/java/org/apache/cassandra/service/CassandraDaemon.java
+++ b/src/java/org/apache/cassandra/service/CassandraDaemon.java
@@ -263,6 +263,8 @@ public class CassandraDaemon
NativeLibrary.tryMlockall();
+ AsyncProfilerService.instance();
+
Keyspace.setInitialized();
CommitLog.instance.start();
diff --git a/src/java/org/apache/cassandra/service/StartupChecks.java
b/src/java/org/apache/cassandra/service/StartupChecks.java
index d5696fda29..daf0dc8ebe 100644
--- a/src/java/org/apache/cassandra/service/StartupChecks.java
+++ b/src/java/org/apache/cassandra/service/StartupChecks.java
@@ -147,6 +147,7 @@ public class StartupChecks
checkSSTablesFormat,
checkSystemKeyspaceState,
checkLegacyAuthTables,
+
checkKernelParamsForAsyncProfiler,
new
DataResurrectionCheck());
public StartupChecks withDefaultTests()
@@ -757,6 +758,68 @@ public class StartupChecks
}
};
+ public static final StartupCheck checkKernelParamsForAsyncProfiler = new
AsyncProfilerKernelParamsCheck();
+
+ public static class AsyncProfilerKernelParamsCheck implements StartupCheck
+ {
+ private static final String MESSAGE = "Async-profiler experience
likely affected. Kernel symbols are unavailable due to restrictions. " +
+ "Try 'sysctl
kernel.perf_event_paranoid=1' and 'sysctl kernel.kptr_restrict=0' or its " +
+ "variation on your system to
resolve the issue.";
+
+ protected int readPerfEventParanoid()
+ {
+ List<String> lines = FileUtils.readLines(new
File("/proc/sys/kernel/perf_event_paranoid"));
+ if (!lines.isEmpty())
+ return Integer.parseInt(lines.get(0));
+ return Integer.MIN_VALUE;
+ }
+
+ protected int readKptrRestrict()
+ {
+ List<String> lines = FileUtils.readLines(new
File("/proc/sys/kernel/kptr_restrict"));
+ if (!lines.isEmpty())
+ return Integer.parseInt(lines.get(0));
+ return Integer.MIN_VALUE;
+ }
+
+ public void execute(StartupChecksOptions startupChecksOptions, boolean
shouldThrow)
+ {
+ try
+ {
+ if
(!CassandraRelevantProperties.ASYNC_PROFILER_ENABLED.getBoolean())
+ return;
+
+ int perfEventParanoid = readPerfEventParanoid();
+ int kptrRestrict = readKptrRestrict();
+
+ if (perfEventParanoid == Integer.MIN_VALUE || kptrRestrict ==
Integer.MIN_VALUE)
+ {
+ logger.debug("Unable to determine values for kernel
parameter of " +
+ "'kernel.perf_event_paranoid' and
'kernel.kptr_restrict' for Async-profiler. " +
+ "Its usability might be limited.");
+ }
+ else if (perfEventParanoid > 1 || kptrRestrict != 0)
+ {
+ if (shouldThrow)
+ throw new IllegalStateException(MESSAGE);
+ else
+ logger.warn(MESSAGE);
+ }
+ }
+ catch (Throwable t)
+ {
+ if (shouldThrow)
+ throw t;
+ }
+ }
+
+ @Override
+ public void execute(StartupChecksOptions startupChecksOptions)
+ {
+ execute(startupChecksOptions, false);
+ }
+ }
+
@VisibleForTesting
public static Path getReadAheadKBPath(String blockDirectoryPath)
{
diff --git a/src/java/org/apache/cassandra/service/StorageService.java
b/src/java/org/apache/cassandra/service/StorageService.java
index aa2df20b0e..dc9d9e3f68 100644
--- a/src/java/org/apache/cassandra/service/StorageService.java
+++ b/src/java/org/apache/cassandra/service/StorageService.java
@@ -3978,6 +3978,17 @@ public class StorageService extends
NotificationBroadcasterSupport implements IE
if (isFinalShutdown)
DiskErrorsHandlerService.get().close();
+ try
+ {
+ // stop async profiler on shutdown in case a user
+ // did not stop it beforehand on their own
+ AsyncProfilerService.instance().stop(Map.of());
+ }
+ catch (Throwable t)
+ {
+ logger.error("Failed to stop async profiler.", t);
+ }
+
try
{
// we are not shutting down
ScheduledExecutors#scheduledFastTasks to be still able to progress time
diff --git a/src/java/org/apache/cassandra/tools/NodeProbe.java
b/src/java/org/apache/cassandra/tools/NodeProbe.java
index bafb922068..52ed11509b 100644
--- a/src/java/org/apache/cassandra/tools/NodeProbe.java
+++ b/src/java/org/apache/cassandra/tools/NodeProbe.java
@@ -118,7 +118,9 @@ import org.apache.cassandra.metrics.TableMetrics;
import org.apache.cassandra.metrics.ThreadPoolMetrics;
import org.apache.cassandra.net.MessagingService;
import org.apache.cassandra.net.MessagingServiceMBean;
+import org.apache.cassandra.profiler.AsyncProfilerMBean;
import org.apache.cassandra.service.ActiveRepairServiceMBean;
+import org.apache.cassandra.service.AsyncProfilerService;
import org.apache.cassandra.service.AutoRepairService;
import org.apache.cassandra.service.AutoRepairServiceMBean;
import org.apache.cassandra.service.CacheService;
@@ -188,6 +190,7 @@ public class NodeProbe implements AutoCloseable
protected PermissionsCacheMBean pcProxy;
protected RolesCacheMBean rcProxy;
protected AutoRepairServiceMBean autoRepairProxy;
+ protected AsyncProfilerMBean asyncProfilerProxy;
protected GuardrailsMBean grProxy;
protected volatile Output output;
@@ -336,6 +339,9 @@ public class NodeProbe implements AutoCloseable
name = new ObjectName(AutoRepairService.MBEAN_NAME);
autoRepairProxy = JMX.newMBeanProxy(mbeanServerConn, name,
AutoRepairServiceMBean.class);
+ name = new ObjectName(AsyncProfilerService.MBEAN_NAME);
+ asyncProfilerProxy = JMX.newMBeanProxy(mbeanServerConn, name,
AsyncProfilerMBean.class);
+
name = new ObjectName(Guardrails.MBEAN_NAME);
grProxy = JMX.newMBeanProxy(mbeanServerConn, name,
GuardrailsMBean.class);
}
@@ -1330,6 +1336,11 @@ public class NodeProbe implements AutoCloseable
return accordProxy;
}
+ public AsyncProfilerMBean getAsyncProfilerProxy()
+ {
+ return asyncProfilerProxy;
+ }
+
public GossiperMBean getGossProxy()
{
return gossProxy;
diff --git
a/src/java/org/apache/cassandra/tools/nodetool/AsyncProfileCommandGroup.java
b/src/java/org/apache/cassandra/tools/nodetool/AsyncProfileCommandGroup.java
new file mode 100644
index 0000000000..091069c44c
--- /dev/null
+++ b/src/java/org/apache/cassandra/tools/nodetool/AsyncProfileCommandGroup.java
@@ -0,0 +1,264 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.cassandra.tools.nodetool;
+
+import java.io.IOException;
+import java.nio.file.Files;
+import java.nio.file.NoSuchFileException;
+import java.time.ZoneId;
+import java.time.format.DateTimeFormatter;
+import java.util.List;
+import java.util.Map;
+import java.util.function.Consumer;
+
+import org.apache.cassandra.io.util.File;
+import org.apache.cassandra.profiler.AsyncProfilerMBean;
+import org.apache.cassandra.service.AsyncProfilerService.AsyncProfilerEvent;
+import org.apache.cassandra.service.AsyncProfilerService.AsyncProfilerFormat;
+import org.apache.cassandra.tools.NodeProbe;
+import org.apache.cassandra.utils.FBUtilities;
+
+import picocli.CommandLine.Command;
+import picocli.CommandLine.Option;
+import picocli.CommandLine.Parameters;
+
+import static java.nio.file.StandardOpenOption.CREATE;
+import static java.nio.file.StandardOpenOption.TRUNCATE_EXISTING;
+import static java.nio.file.StandardOpenOption.WRITE;
+import static java.util.stream.Collectors.joining;
+import static
org.apache.cassandra.service.AsyncProfilerService.ASYNC_PROFILER_START_DURATION_PARAM;
+import static
org.apache.cassandra.service.AsyncProfilerService.ASYNC_PROFILER_START_EVENTS_PARAM;
+import static
org.apache.cassandra.service.AsyncProfilerService.ASYNC_PROFILER_START_OUTPUT_FILE_NAME_PARAM;
+import static
org.apache.cassandra.service.AsyncProfilerService.ASYNC_PROFILER_START_OUTPUT_FORMAT_PARAM;
+import static
org.apache.cassandra.service.AsyncProfilerService.ASYNC_PROFILER_STOP_OUTPUT_FILE_NAME_PARAM;
+import static org.apache.cassandra.service.AsyncProfilerService.parseDuration;
+import static
org.apache.cassandra.service.AsyncProfilerService.validateCommand;
+import static
org.apache.cassandra.service.AsyncProfilerService.validateOutputFileName;
+
+@Command(name = "profile", description = "Manage Async-Profiler on a Cassandra
process",
+subcommands = {
+AsyncProfileCommandGroup.AsyncProfileStartCommand.class,
+AsyncProfileCommandGroup.AsyncProfileStopCommand.class,
+AsyncProfileCommandGroup.AsyncProfileExecuteCommand.class,
+AsyncProfileCommandGroup.AsyncProfilePurgeCommand.class,
+AsyncProfileCommandGroup.AsyncProfileListCommand.class,
+AsyncProfileCommandGroup.AsyncProfileFetchCommand.class,
+AsyncProfileCommandGroup.AsyncProfileStatusCommand.class
+})
+public class AsyncProfileCommandGroup extends AbstractCommand
+{
+ @Override
+ public void execute(NodeProbe probe)
+ {
+ AbstractCommand cmd = new AsyncProfileStartCommand();
+ cmd.probe(probe);
+ cmd.logger(output);
+ cmd.run();
+ }
+
+ private static void doWithProfiler(NodeProbe probe,
Consumer<AsyncProfilerMBean> consumer, boolean requiresEnabledProfiler)
+ {
+ AsyncProfilerMBean profiler = probe.getAsyncProfilerProxy();
+
+ if (requiresEnabledProfiler && !profiler.isEnabled())
+ {
+ probe.output().err.println("Async-profiler native library is not
enabled or not possible to load.");
+ System.exit(1);
+ }
+
+ consumer.accept(profiler);
+ }
+
+ private static String getOutputFileName(AsyncProfilerFormat outputFormat)
+ {
+ String filename = DateTimeFormatter.ofPattern("yyyy-MM-dd-HH-mm-ss")
+
.withZone(ZoneId.systemDefault()).format(FBUtilities.now());
+
+ if (outputFormat == AsyncProfilerFormat.jfr)
+ filename += ".jfr";
+ else
+ filename += ".html";
+
+ return filename;
+ }
+
+ public static void doWithProfiler(NodeProbe probe,
Consumer<AsyncProfilerMBean> consumer)
+ {
+ doWithProfiler(probe, consumer, true);
+ }
+
+ @Command(name = "start", description = "Run Async-Profiler on a Cassandra
process")
+ public static class AsyncProfileStartCommand extends AbstractCommand
+ {
+ @Option(names = { "-e", "--event" },
+ description = "Event(s) to profile, one of or combination of 'cpu',
'alloc', " +
+ "'lock', 'wall', 'nativemem', 'cache_misses', delimited
by comma, defaults to 'cpu'")
+ public List<AsyncProfilerEvent> event =
List.of(AsyncProfilerEvent.cpu);
+
+ @Option(names = { "-o", "--output" }, description = "File name to save
profiling results into, defaults to a " +
+ "file of name
'yyyy-MM-dd-HH-mm-ss.html'")
+ public String filename;
+
+ @Option(names = { "-d", "--duration" }, description = "Duration of
profiling, defaults to '60s'. Accepts string values " +
+ "in the form of
'5m', '30s' and similar.")
+ public String duration = "60s";
+
+ @Option(names = { "-f", "--format" },
+ description = "Output format, one of 'flat', 'traces', 'collapsed',
'flamegraph', 'tree', 'jfr', defaults to 'flamegraph'")
+ public AsyncProfilerFormat outputFormat =
AsyncProfilerFormat.flamegraph;
+
+ @Override
+ public void execute(NodeProbe probe)
+ {
+ // make sure it is valid
+ parseDuration(duration);
+
+ if (filename == null)
+ filename =
AsyncProfileCommandGroup.getOutputFileName(outputFormat);
+
+ doWithProfiler(probe, profiler -> {
+ Map<String, String> startParameters =
Map.of(ASYNC_PROFILER_START_EVENTS_PARAM,
event.stream().map(Enum::name).collect(joining(",")),
+
ASYNC_PROFILER_START_OUTPUT_FORMAT_PARAM, outputFormat.name(),
+
ASYNC_PROFILER_START_DURATION_PARAM, duration,
+
ASYNC_PROFILER_START_OUTPUT_FILE_NAME_PARAM, validateOutputFileName(filename));
+ if (!profiler.start(startParameters))
+ {
+ output.err.println("Profiler has already started or there
was a failure to start it.");
+ System.exit(1);
+ }
+ });
+ }
+ }
+
+ @Command(name = "stop", description = "Stop Async-Profiler on a Cassandra
process")
+ public static class AsyncProfileStopCommand extends AbstractCommand
+ {
+ @Option(names = { "-o", "--output" }, description = "File name to save
profiling results into, defaults to a " +
+ "file of name
'yyyy-MM-dd-HH-mm-ss.html'")
+ public String filename;
+
+ @Override
+ public void execute(NodeProbe probe)
+ {
+ doWithProfiler(probe, profiler -> {
+ String file = filename != null ?
validateOutputFileName(filename) : null;
+ Map<String, String> stopParameters = file == null ? Map.of() :
Map.of(ASYNC_PROFILER_STOP_OUTPUT_FILE_NAME_PARAM, file);
+ if (!profiler.stop(stopParameters))
+ {
+ output.err.println("Profiler has already stopped or there
was a failure to stop it.");
+ System.exit(1);
+ }
+ });
+ }
+ }
+
+ @Command(name = "execute", description = "Execute an arbitrary command on
Async-Profiler on a Cassandra process.")
+ public static class AsyncProfileExecuteCommand extends AbstractCommand
+ {
+ @Parameters(index = "0", description = "Raw command to execute. There
has to be 'unsafe' profiler configured " +
+ "in Cassandra, driven by
cassandra.async_profiler.unsafe_mode property set" +
+ " to true, to be able to do
this.", arity = "1")
+ public String command;
+
+ @Override
+ public void execute(NodeProbe probe)
+ {
+ doWithProfiler(probe, profiler -> {
+ try
+ {
+
output.out.print(profiler.execute(validateCommand(command)));
+ }
+ catch (Throwable ex)
+ {
+ output.err.print(ex.getMessage());
+ System.exit(1);
+ }
+ });
+ }
+ }
+
+ @Command(name = "purge", description = "Remove all profiling results from
node's disk")
+ public static class AsyncProfilePurgeCommand extends AbstractCommand
+ {
+ @Override
+ protected void execute(NodeProbe probe)
+ {
+ doWithProfiler(probe, AsyncProfilerMBean::purge, false);
+ }
+ }
+
+ @Command(name = "list", description = "List profiling result files of a
node")
+ public static class AsyncProfileListCommand extends AbstractCommand
+ {
+ @Override
+ protected void execute(NodeProbe probe)
+ {
+ doWithProfiler(probe, profiler -> {
+ for (String resultFile : profiler.list())
+ output.out.println(resultFile);
+ }, false);
+ }
+ }
+
+ @Command(name = "fetch", description = "Copy profiler result file from a
node to a local file")
+ public static class AsyncProfileFetchCommand extends AbstractCommand
+ {
+
+ @Parameters(index = "0", description = "Remote profiler file name",
arity = "1")
+ private String remoteFile;
+
+ @Parameters(index = "1", description = "Local file name", arity = "1")
+ private String localFile;
+
+ @Override
+ protected void execute(NodeProbe probe)
+ {
+ doWithProfiler(probe, profiler -> {
+ try
+ {
+ Files.write(new File(localFile).toPath(),
profiler.fetch(remoteFile), CREATE, TRUNCATE_EXISTING, WRITE);
+ }
+ catch (NoSuchFileException e)
+ {
+ probe.output().err.println("Remote file " + remoteFile + "
was not found.");
+ System.exit(1);
+ }
+ catch (IllegalArgumentException e)
+ {
+ probe.output().err.println(e.getMessage());
+ System.exit(1);
+ }
+ catch (IOException e)
+ {
+ throw new RuntimeException(e);
+ }
+ }, false);
+ }
+ }
+
+ @Command(name = "status", description = "Get status of profiling")
+ public static class AsyncProfileStatusCommand extends AbstractCommand
+ {
+ @Override
+ protected void execute(NodeProbe probe)
+ {
+ doWithProfiler(probe, profiler ->
output.out.print(profiler.status()));
+ }
+ }
+}
\ No newline at end of file
diff --git a/src/java/org/apache/cassandra/tools/nodetool/NodetoolCommand.java
b/src/java/org/apache/cassandra/tools/nodetool/NodetoolCommand.java
index 98793e2748..33292b2483 100644
--- a/src/java/org/apache/cassandra/tools/nodetool/NodetoolCommand.java
+++ b/src/java/org/apache/cassandra/tools/nodetool/NodetoolCommand.java
@@ -149,6 +149,7 @@ import static
org.apache.cassandra.tools.nodetool.Help.printTopCommandUsage;
NetStats.class,
PauseHandoff.class,
ProfileLoad.class,
+ AsyncProfileCommandGroup.class,
ProxyHistograms.class,
RangeKeySample.class,
Rebuild.class,
diff --git
a/test/distributed/org/apache/cassandra/distributed/mock/nodetool/InternalNodeProbe.java
b/test/distributed/org/apache/cassandra/distributed/mock/nodetool/InternalNodeProbe.java
index 1ab4f843b7..62e0dad3ea 100644
---
a/test/distributed/org/apache/cassandra/distributed/mock/nodetool/InternalNodeProbe.java
+++
b/test/distributed/org/apache/cassandra/distributed/mock/nodetool/InternalNodeProbe.java
@@ -41,6 +41,7 @@ import org.apache.cassandra.locator.SnitchAdapter;
import org.apache.cassandra.metrics.CassandraMetricsRegistry;
import org.apache.cassandra.net.MessagingService;
import org.apache.cassandra.service.ActiveRepairService;
+import org.apache.cassandra.service.AsyncProfilerService;
import org.apache.cassandra.service.CacheService;
import org.apache.cassandra.service.CacheServiceMBean;
import org.apache.cassandra.service.GCInspector;
@@ -90,6 +91,7 @@ public class InternalNodeProbe extends NodeProbe
arsProxy = ActiveRepairService.instance();
memProxy = ManagementFactory.getMemoryMXBean();
runtimeProxy = ManagementFactory.getRuntimeMXBean();
+ asyncProfilerProxy = AsyncProfilerService.instance();
}
@Override
diff --git
a/test/distributed/org/apache/cassandra/distributed/test/AsyncProfilerTest.java
b/test/distributed/org/apache/cassandra/distributed/test/AsyncProfilerTest.java
new file mode 100644
index 0000000000..9032effb5f
--- /dev/null
+++
b/test/distributed/org/apache/cassandra/distributed/test/AsyncProfilerTest.java
@@ -0,0 +1,198 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.cassandra.distributed.test;
+
+import java.nio.file.StandardOpenOption;
+import java.util.List;
+import java.util.UUID;
+
+import org.junit.Rule;
+import org.junit.Test;
+import org.junit.rules.TemporaryFolder;
+
+import org.apache.cassandra.distributed.Cluster;
+import org.apache.cassandra.distributed.api.Feature;
+import org.apache.cassandra.distributed.api.NodeToolResult;
+import org.apache.cassandra.distributed.shared.Uninterruptibles;
+import org.apache.cassandra.distributed.shared.WithProperties;
+import org.apache.cassandra.io.util.File;
+import org.apache.cassandra.io.util.FileUtils;
+import org.apache.cassandra.service.AsyncProfilerService;
+import org.apache.cassandra.service.StartupChecks;
+
+import static java.util.concurrent.TimeUnit.SECONDS;
+import static
org.apache.cassandra.config.CassandraRelevantProperties.ASYNC_PROFILER_ENABLED;
+import static org.junit.Assert.assertFalse;
+import static org.junit.Assert.assertTrue;
+
+public class AsyncProfilerTest extends TestBaseImpl
+{
+ @Rule
+ public TemporaryFolder tmpDir = new TemporaryFolder();
+
+ private Cluster cluster;
+
+ /**
+ * Test-friendly kernel params check that returns valid values without
reading from /proc
+ */
+ public static class TestAsyncProfilerKernelParamsCheck extends
StartupChecks.AsyncProfilerKernelParamsCheck
+ {
+ @Override
+ protected int readPerfEventParanoid()
+ {
+ return 1; // Valid value (must be <= 1)
+ }
+
+ @Override
+ protected int readKptrRestrict()
+ {
+ return 0; // Valid value (must be == 0)
+ }
+ }
+
+ @Test
+ public void testNodetoolCommands() throws Throwable
+ {
+ File newTmpDir = new File(tmpDir.newFolder());
+
+ try (WithProperties withProperties = new WithProperties()
+ .set(ASYNC_PROFILER_ENABLED,
true);
+ Cluster cluster = init(builder().withNodes(1).withConfig(c ->
c.with(Feature.JMX)).start()))
+ {
+ this.cluster = cluster;
+
+ // start / stop / list
+ assertTrue(status().contains("Profiler is not active"));
+ startAndAssert();
+ Uninterruptibles.sleepUninterruptibly(10, SECONDS);
+ stop();
+
+ // start / stop with file name
+ startAndAssert();
+ Uninterruptibles.sleepUninterruptibly(10, SECONDS);
+ String fileName = UUID.randomUUID().toString();
+ stop(fileName);
+ assertTrue(list().contains(fileName));
+
+ // fetch
+ File destination = new File(newTmpDir,
UUID.randomUUID().toString());
+ fetch(fileName, destination.absolutePath());
+ assertTrue(destination.length() != 0);
+
+ // list
+ assertFalse(list().isEmpty());
+ assertTrue(list().contains(fileName));
+
+ // purge
+ purge();
+ assertTrue(list().isEmpty());
+
+ // double start
+ startAndAssert();
+ NodeToolResult secondStart = start();
+ secondStart.asserts().failure();
+ assertTrue(secondStart.getStderr().contains("Profiler has already
started"));
+ }
+ }
+
+ @Test
+ public void testListPurgeFetchWorksWithDisabledProfiler() throws Throwable
+ {
+ File newTmpDir = new File(tmpDir.newFolder());
+ try (WithProperties withProperties = new
WithProperties().set(ASYNC_PROFILER_ENABLED, false);
+ Cluster cluster = init(builder().withNodes(1).withConfig(c ->
c.with(Feature.JMX)).start()))
+ {
+ this.cluster = cluster;
+
+ String fileNameToWriteTo = UUID.randomUUID().toString();
+
+ FileUtils.write(new File(newTmpDir, fileNameToWriteTo),
List.of("hello world"), StandardOpenOption.CREATE_NEW);
+
+ // Initialize AsyncProfilerService instance in the cluster node
context with the test directory
+ String tmpDirPath = newTmpDir.absolutePath();
+ cluster.get(1).runOnInstance(() -> {
+ AsyncProfilerService.instance(tmpDirPath, true, new
TestAsyncProfilerKernelParamsCheck());
+ });
+
+ // fetch
+ String destinationFileName = UUID.randomUUID().toString();
+ File destination = new File(newTmpDir, destinationFileName);
+ fetch(fileNameToWriteTo, destination.absolutePath());
+ assertTrue(destination.length() != 0);
+
+ // list
+ assertFalse(list().isEmpty());
+ assertTrue(list().contains(fileNameToWriteTo));
+
+ // purge
+ purge();
+ assertTrue(list().isEmpty());
+ }
+ }
+
+ private String list()
+ {
+ NodeToolResult result = cluster.get(1).nodetoolResult("profile",
"list");
+ result.asserts().success();
+ return result.getStdout();
+ }
+
+ private void stop()
+ {
+ NodeToolResult result = cluster.get(1).nodetoolResult("profile",
"stop");
+ result.asserts().success();
+ }
+
+ private void stop(String file)
+ {
+ NodeToolResult result = cluster.get(1).nodetoolResult("profile",
"stop", "-o", file);
+ result.asserts().success();
+ }
+
+ private NodeToolResult startAndAssert()
+ {
+ NodeToolResult result = cluster.get(1).nodetoolResult("profile",
"start", "-e", "cpu");
+ result.asserts().success();
+ return result;
+ }
+
+ private NodeToolResult start()
+ {
+ return cluster.get(1).nodetoolResult("profile", "start", "-e", "cpu");
+ }
+
+ private String status()
+ {
+ NodeToolResult result = cluster.get(1).nodetoolResult("profile",
"status");
+ result.asserts().success();
+ return result.getStdout();
+ }
+
+ private void purge()
+ {
+ NodeToolResult result = cluster.get(1).nodetoolResult("profile",
"purge");
+ result.asserts().success();
+ }
+
+ private void fetch(String what, String where)
+ {
+ NodeToolResult result = cluster.get(1).nodetoolResult("profile",
"fetch", what, where);
+ result.asserts().success();
+ }
+}
diff --git a/test/resources/nodetool/help/nodetool
b/test/resources/nodetool/help/nodetool
index b0301217f6..7597462fa0 100644
--- a/test/resources/nodetool/help/nodetool
+++ b/test/resources/nodetool/help/nodetool
@@ -97,6 +97,7 @@ The most commonly used nodetool commands are:
move Move node on the token ring to a new
token
netstats Print network information on provided
host (connecting node by default)
pausehandoff Pause hints delivery process
+ profile Manage Async-Profiler on a Cassandra
process
profileload Low footprint profiling of activity
for a period of time
proxyhistograms Print statistic histograms for network
operations
rangekeysample Shows the sampled keys held across all
keyspaces
diff --git a/test/resources/nodetool/help/profile
b/test/resources/nodetool/help/profile
new file mode 100644
index 0000000000..3724d5f3e7
--- /dev/null
+++ b/test/resources/nodetool/help/profile
@@ -0,0 +1,102 @@
+NAME
+ nodetool profile - Manage Async-Profiler on a Cassandra process
+
+SYNOPSIS
+ nodetool [(-h <host> | --host <host>)] [(-p <port> | --port <port>)]
+ [(-pp | --print-port)] [(-pw <password> | --password
<password>)]
+ [(-pwf <passwordFilePath> | --password-file
<passwordFilePath>)]
+ [(-u <username> | --username <username>)] profile <command>
[<args>]
+
+ nodetool [(-h <host> | --host <host>)] [(-p <port> | --port <port>)]
+ [(-pp | --print-port)] [(-pw <password> | --password
<password>)]
+ [(-pwf <passwordFilePath> | --password-file
<passwordFilePath>)]
+ [(-u <username> | --username <username>)] profile start
+ [(-d <duration> | --duration <duration>)]
+ [(-e <event> | --event <event>)...]
+ [(-f <outputFormat> | --format <outputFormat>)]
+ [(-o <filename> | --output <filename>)]
+
+ nodetool [(-h <host> | --host <host>)] [(-p <port> | --port <port>)]
+ [(-pp | --print-port)] [(-pw <password> | --password
<password>)]
+ [(-pwf <passwordFilePath> | --password-file
<passwordFilePath>)]
+ [(-u <username> | --username <username>)] profile stop
+ [(-o <filename> | --output <filename>)]
+
+ nodetool [(-h <host> | --host <host>)] [(-p <port> | --port <port>)]
+ [(-pp | --print-port)] [(-pw <password> | --password
<password>)]
+ [(-pwf <passwordFilePath> | --password-file
<passwordFilePath>)]
+ [(-u <username> | --username <username>)] profile execute [--]
<command>
+
+ nodetool [(-h <host> | --host <host>)] [(-p <port> | --port <port>)]
+ [(-pp | --print-port)] [(-pw <password> | --password
<password>)]
+ [(-pwf <passwordFilePath> | --password-file
<passwordFilePath>)]
+ [(-u <username> | --username <username>)] profile purge
+
+ nodetool [(-h <host> | --host <host>)] [(-p <port> | --port <port>)]
+ [(-pp | --print-port)] [(-pw <password> | --password
<password>)]
+ [(-pwf <passwordFilePath> | --password-file
<passwordFilePath>)]
+ [(-u <username> | --username <username>)] profile list
+
+ nodetool [(-h <host> | --host <host>)] [(-p <port> | --port <port>)]
+ [(-pp | --print-port)] [(-pw <password> | --password
<password>)]
+ [(-pwf <passwordFilePath> | --password-file
<passwordFilePath>)]
+ [(-u <username> | --username <username>)] profile fetch [--]
+ <remoteFile> <localFile>
+
+ nodetool [(-h <host> | --host <host>)] [(-p <port> | --port <port>)]
+ [(-pp | --print-port)] [(-pw <password> | --password
<password>)]
+ [(-pwf <passwordFilePath> | --password-file
<passwordFilePath>)]
+ [(-u <username> | --username <username>)] profile status
+
+OPTIONS
+ -h <host>, --host <host>
+ Node hostname or ip address
+
+ -p <port>, --port <port>
+ Remote jmx agent port number
+
+ -pp, --print-port
+ Operate in 4.0 mode with hosts disambiguated by port number
+
+ -pw <password>, --password <password>
+ Remote jmx agent password
+
+ -pwf <passwordFilePath>, --password-file <passwordFilePath>
+ Path to the JMX password file
+
+ -u <username>, --username <username>
+ Remote jmx agent username
+
+COMMANDS
+ With no arguments, Display help information
+
+ start
+ Run Async-Profiler on a Cassandra process
+
+ With --event option, Event(s) to profile, one of or combination of
'cpu',
+ 'alloc', 'lock', 'wall', 'nativemem', 'cache_misses', delimited by
comma,
+ defaults to 'cpu'
+
+ With --output option, File name to save profiling results into,
defaults to
+ a file of name 'yyyy-MM-dd-HH-mm-ss.html'
+
+ With --duration option, Duration of profiling, defaults to '60s'.
Accepts
+ string values in the form of '5m', '30s' and similar.
+
+ With --format option, Output format, one of 'flat', 'traces',
'collapsed',
+ 'flamegraph', 'tree', 'jfr', defaults to 'flamegraph'
+ stop
+ Stop Async-Profiler on a Cassandra process
+
+ With --output option, File name to save profiling results into,
defaults to
+ a file of name 'yyyy-MM-dd-HH-mm-ss.html'
+ execute
+ Execute an arbitrary command on Async-Profiler on a Cassandra
process.
+ purge
+ Remove all profiling results from node's disk
+ list
+ List profiling result files of a node
+ fetch
+ Copy profiler result file from a node to a local file
+ status
+ Get status of profiling
diff --git a/test/resources/nodetool/help/profile$execute
b/test/resources/nodetool/help/profile$execute
new file mode 100644
index 0000000000..b95e75fcb3
--- /dev/null
+++ b/test/resources/nodetool/help/profile$execute
@@ -0,0 +1,38 @@
+NAME
+ nodetool profile execute - Execute an arbitrary command on
+ Async-Profiler on a Cassandra process.
+
+SYNOPSIS
+ nodetool [(-h <host> | --host <host>)] [(-p <port> | --port <port>)]
+ [(-pp | --print-port)] [(-pw <password> | --password
<password>)]
+ [(-pwf <passwordFilePath> | --password-file
<passwordFilePath>)]
+ [(-u <username> | --username <username>)] profile execute [--]
<command>
+
+OPTIONS
+ -h <host>, --host <host>
+ Node hostname or ip address
+
+ -p <port>, --port <port>
+ Remote jmx agent port number
+
+ -pp, --print-port
+ Operate in 4.0 mode with hosts disambiguated by port number
+
+ -pw <password>, --password <password>
+ Remote jmx agent password
+
+ -pwf <passwordFilePath>, --password-file <passwordFilePath>
+ Path to the JMX password file
+
+ -u <username>, --username <username>
+ Remote jmx agent username
+
+ --
+ This option can be used to separate command-line options from the
+ list of argument, (useful when arguments might be mistaken for
+ command-line options
+
+ <command>
+ Raw command to execute. There has to be 'unsafe' profiler
+ configured in Cassandra, driven by cassandra.async_profiler.
+ unsafe_mode property set to true, to be able to do this.
diff --git a/test/resources/nodetool/help/profile$fetch
b/test/resources/nodetool/help/profile$fetch
new file mode 100644
index 0000000000..351bac9332
--- /dev/null
+++ b/test/resources/nodetool/help/profile$fetch
@@ -0,0 +1,40 @@
+NAME
+ nodetool profile fetch - Copy profiler result file from a node to a
+ local file
+
+SYNOPSIS
+ nodetool [(-h <host> | --host <host>)] [(-p <port> | --port <port>)]
+ [(-pp | --print-port)] [(-pw <password> | --password
<password>)]
+ [(-pwf <passwordFilePath> | --password-file
<passwordFilePath>)]
+ [(-u <username> | --username <username>)] profile fetch [--]
+ <remoteFile> <localFile>
+
+OPTIONS
+ -h <host>, --host <host>
+ Node hostname or ip address
+
+ -p <port>, --port <port>
+ Remote jmx agent port number
+
+ -pp, --print-port
+ Operate in 4.0 mode with hosts disambiguated by port number
+
+ -pw <password>, --password <password>
+ Remote jmx agent password
+
+ -pwf <passwordFilePath>, --password-file <passwordFilePath>
+ Path to the JMX password file
+
+ -u <username>, --username <username>
+ Remote jmx agent username
+
+ --
+ This option can be used to separate command-line options from the
+ list of argument, (useful when arguments might be mistaken for
+ command-line options
+
+ <remoteFile>
+ Remote profiler file name
+
+ <localFile>
+ Local file name
diff --git a/test/resources/nodetool/help/profile$list
b/test/resources/nodetool/help/profile$list
new file mode 100644
index 0000000000..46c08f6967
--- /dev/null
+++ b/test/resources/nodetool/help/profile$list
@@ -0,0 +1,27 @@
+NAME
+ nodetool profile list - List profiling result files of a node
+
+SYNOPSIS
+ nodetool [(-h <host> | --host <host>)] [(-p <port> | --port <port>)]
+ [(-pp | --print-port)] [(-pw <password> | --password
<password>)]
+ [(-pwf <passwordFilePath> | --password-file
<passwordFilePath>)]
+ [(-u <username> | --username <username>)] profile list
+
+OPTIONS
+ -h <host>, --host <host>
+ Node hostname or ip address
+
+ -p <port>, --port <port>
+ Remote jmx agent port number
+
+ -pp, --print-port
+ Operate in 4.0 mode with hosts disambiguated by port number
+
+ -pw <password>, --password <password>
+ Remote jmx agent password
+
+ -pwf <passwordFilePath>, --password-file <passwordFilePath>
+ Path to the JMX password file
+
+ -u <username>, --username <username>
+ Remote jmx agent username
diff --git a/test/resources/nodetool/help/profile$purge
b/test/resources/nodetool/help/profile$purge
new file mode 100644
index 0000000000..5afb33fade
--- /dev/null
+++ b/test/resources/nodetool/help/profile$purge
@@ -0,0 +1,27 @@
+NAME
+ nodetool profile purge - Remove all profiling results from node's disk
+
+SYNOPSIS
+ nodetool [(-h <host> | --host <host>)] [(-p <port> | --port <port>)]
+ [(-pp | --print-port)] [(-pw <password> | --password
<password>)]
+ [(-pwf <passwordFilePath> | --password-file
<passwordFilePath>)]
+ [(-u <username> | --username <username>)] profile purge
+
+OPTIONS
+ -h <host>, --host <host>
+ Node hostname or ip address
+
+ -p <port>, --port <port>
+ Remote jmx agent port number
+
+ -pp, --print-port
+ Operate in 4.0 mode with hosts disambiguated by port number
+
+ -pw <password>, --password <password>
+ Remote jmx agent password
+
+ -pwf <passwordFilePath>, --password-file <passwordFilePath>
+ Path to the JMX password file
+
+ -u <username>, --username <username>
+ Remote jmx agent username
diff --git a/test/resources/nodetool/help/profile$start
b/test/resources/nodetool/help/profile$start
new file mode 100644
index 0000000000..85a4238e56
--- /dev/null
+++ b/test/resources/nodetool/help/profile$start
@@ -0,0 +1,48 @@
+NAME
+ nodetool profile start - Run Async-Profiler on a Cassandra process
+
+SYNOPSIS
+ nodetool [(-h <host> | --host <host>)] [(-p <port> | --port <port>)]
+ [(-pp | --print-port)] [(-pw <password> | --password
<password>)]
+ [(-pwf <passwordFilePath> | --password-file
<passwordFilePath>)]
+ [(-u <username> | --username <username>)] profile start
+ [(-d <duration> | --duration <duration>)]
+ [(-e <event> | --event <event>)...]
+ [(-f <outputFormat> | --format <outputFormat>)]
+ [(-o <filename> | --output <filename>)]
+
+OPTIONS
+ -d <duration>, --duration <duration>
+ Duration of profiling, defaults to '60s'. Accepts string values in
+ the form of '5m', '30s' and similar.
+
+ -e <event>, --event <event>
+ Event(s) to profile, one of or combination of 'cpu', 'alloc',
+ 'lock', 'wall', 'nativemem', 'cache_misses', delimited by comma,
+ defaults to 'cpu'
+
+ -f <outputFormat>, --format <outputFormat>
+ Output format, one of 'flat', 'traces', 'collapsed', 'flamegraph',
+ 'tree', 'jfr', defaults to 'flamegraph'
+
+ -h <host>, --host <host>
+ Node hostname or ip address
+
+ -o <filename>, --output <filename>
+ File name to save profiling results into, defaults to a file of
+ name 'yyyy-MM-dd-HH-mm-ss.html'
+
+ -p <port>, --port <port>
+ Remote jmx agent port number
+
+ -pp, --print-port
+ Operate in 4.0 mode with hosts disambiguated by port number
+
+ -pw <password>, --password <password>
+ Remote jmx agent password
+
+ -pwf <passwordFilePath>, --password-file <passwordFilePath>
+ Path to the JMX password file
+
+ -u <username>, --username <username>
+ Remote jmx agent username
diff --git a/test/resources/nodetool/help/profile$status
b/test/resources/nodetool/help/profile$status
new file mode 100644
index 0000000000..029e47a118
--- /dev/null
+++ b/test/resources/nodetool/help/profile$status
@@ -0,0 +1,27 @@
+NAME
+ nodetool profile status - Get status of profiling
+
+SYNOPSIS
+ nodetool [(-h <host> | --host <host>)] [(-p <port> | --port <port>)]
+ [(-pp | --print-port)] [(-pw <password> | --password
<password>)]
+ [(-pwf <passwordFilePath> | --password-file
<passwordFilePath>)]
+ [(-u <username> | --username <username>)] profile status
+
+OPTIONS
+ -h <host>, --host <host>
+ Node hostname or ip address
+
+ -p <port>, --port <port>
+ Remote jmx agent port number
+
+ -pp, --print-port
+ Operate in 4.0 mode with hosts disambiguated by port number
+
+ -pw <password>, --password <password>
+ Remote jmx agent password
+
+ -pwf <passwordFilePath>, --password-file <passwordFilePath>
+ Path to the JMX password file
+
+ -u <username>, --username <username>
+ Remote jmx agent username
diff --git a/test/resources/nodetool/help/profile$stop
b/test/resources/nodetool/help/profile$stop
new file mode 100644
index 0000000000..547490c1df
--- /dev/null
+++ b/test/resources/nodetool/help/profile$stop
@@ -0,0 +1,32 @@
+NAME
+ nodetool profile stop - Stop Async-Profiler on a Cassandra process
+
+SYNOPSIS
+ nodetool [(-h <host> | --host <host>)] [(-p <port> | --port <port>)]
+ [(-pp | --print-port)] [(-pw <password> | --password
<password>)]
+ [(-pwf <passwordFilePath> | --password-file
<passwordFilePath>)]
+ [(-u <username> | --username <username>)] profile stop
+ [(-o <filename> | --output <filename>)]
+
+OPTIONS
+ -h <host>, --host <host>
+ Node hostname or ip address
+
+ -o <filename>, --output <filename>
+ File name to save profiling results into, defaults to a file of
+ name 'yyyy-MM-dd-HH-mm-ss.html'
+
+ -p <port>, --port <port>
+ Remote jmx agent port number
+
+ -pp, --print-port
+ Operate in 4.0 mode with hosts disambiguated by port number
+
+ -pw <password>, --password <password>
+ Remote jmx agent password
+
+ -pwf <passwordFilePath>, --password-file <passwordFilePath>
+ Path to the JMX password file
+
+ -u <username>, --username <username>
+ Remote jmx agent username
diff --git
a/test/unit/org/apache/cassandra/service/AsyncProfilerServiceTest.java
b/test/unit/org/apache/cassandra/service/AsyncProfilerServiceTest.java
new file mode 100644
index 0000000000..fc94a3de26
--- /dev/null
+++ b/test/unit/org/apache/cassandra/service/AsyncProfilerServiceTest.java
@@ -0,0 +1,315 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.cassandra.service;
+
+import java.util.List;
+import java.util.Map;
+import java.util.Optional;
+import java.util.UUID;
+
+import org.junit.After;
+import org.junit.Before;
+import org.junit.BeforeClass;
+import org.junit.Test;
+
+import org.apache.cassandra.config.DatabaseDescriptor;
+import org.apache.cassandra.distributed.shared.WithProperties;
+import org.apache.cassandra.io.util.File;
+import org.apache.cassandra.io.util.FileUtils;
+
+import static java.lang.String.format;
+import static
org.apache.cassandra.config.CassandraRelevantProperties.ASYNC_PROFILER_ENABLED;
+import static
org.apache.cassandra.config.CassandraRelevantProperties.ASYNC_PROFILER_UNSAFE_MODE;
+import static
org.apache.cassandra.service.AsyncProfilerService.ASYNC_PROFILER_START_DURATION_PARAM;
+import static
org.apache.cassandra.service.AsyncProfilerService.ASYNC_PROFILER_START_EVENTS_PARAM;
+import static
org.apache.cassandra.service.AsyncProfilerService.ASYNC_PROFILER_START_OUTPUT_FILE_NAME_PARAM;
+import static
org.apache.cassandra.service.AsyncProfilerService.ASYNC_PROFILER_START_OUTPUT_FORMAT_PARAM;
+import static
org.apache.cassandra.service.AsyncProfilerService.ASYNC_PROFILER_STOP_OUTPUT_FILE_NAME_PARAM;
+import static
org.apache.cassandra.service.AsyncProfilerService.AsyncProfilerEvent.cpu;
+import static
org.apache.cassandra.service.AsyncProfilerService.AsyncProfilerFormat.flamegraph;
+import static org.assertj.core.api.Assertions.assertThatThrownBy;
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertFalse;
+import static org.junit.Assert.assertNotNull;
+import static org.junit.Assert.assertTrue;
+
+public class AsyncProfilerServiceTest
+{
+ private static final String testOutputPath = FileUtils.getTempDir().path();
+
+ private AsyncProfilerService profiler;
+ private File testOutputFile;
+
+ /**
+ * Test-friendly kernel params check that returns valid values without
reading from /proc
+ */
+ private static class TestAsyncProfilerKernelParamsCheck extends
StartupChecks.AsyncProfilerKernelParamsCheck
+ {
+ @Override
+ protected int readPerfEventParanoid()
+ {
+ return 1; // Valid value (must be <= 1)
+ }
+
+ @Override
+ protected int readKptrRestrict()
+ {
+ return 0; // Valid value (must be == 0)
+ }
+ }
+
+ @BeforeClass
+ public static void setUpClass()
+ {
+ DatabaseDescriptor.daemonInitialization();
+ }
+
+ @Before
+ public void setUp()
+ {
+ ASYNC_PROFILER_ENABLED.setBoolean(true);
+ testOutputFile = new File(testOutputPath,
UUID.randomUUID().toString());
+ }
+
+ @After
+ public void tearDown()
+ {
+ try
+ {
+ Map<String, String> stopParameters =
Map.of(ASYNC_PROFILER_STOP_OUTPUT_FILE_NAME_PARAM,
testOutputFile.absolutePath());
+ profiler.stop(stopParameters);
+ testOutputFile.deleteIfExists();
+ }
+ catch (Exception e)
+ {
+ // The only meaningful exception that can surface here is if
profiler.start
+ // was not called prior to profiler.stop, we can safely ignore
this.
+ }
+
+ profiler = null;
+ }
+
+ private AsyncProfilerService getProfiler()
+ {
+ AsyncProfilerService.reset();
+ return AsyncProfilerService.instance(testOutputPath, false, new
TestAsyncProfilerKernelParamsCheck());
+ }
+
+ @Test
+ public void testStartAndStopProfiling() throws Throwable
+ {
+ try (WithProperties properties = new
WithProperties().set(ASYNC_PROFILER_UNSAFE_MODE, false))
+ {
+ Map<String, String> startParameters =
Map.of(ASYNC_PROFILER_START_EVENTS_PARAM, cpu.name(),
+
ASYNC_PROFILER_START_OUTPUT_FORMAT_PARAM, flamegraph.name(),
+
ASYNC_PROFILER_START_DURATION_PARAM, "10s",
+
ASYNC_PROFILER_START_OUTPUT_FILE_NAME_PARAM, testOutputFile.name());
+
+ Map<String, String> stopParameters =
Map.of(ASYNC_PROFILER_STOP_OUTPUT_FILE_NAME_PARAM, testOutputFile.name());
+ AsyncProfilerService profiler = getProfiler();
+ profiler.start(startParameters);
+ Thread.sleep(5000);
+ profiler.stop(stopParameters);
+
+ assertTrue("Output profile file should exist",
testOutputFile.exists());
+ assertTrue("Output profile file should not be empty",
testOutputFile.length() > 0);
+
+ List<String> list = profiler.list();
+ assertFalse(list.isEmpty());
+
+ Optional<String> resultFile = list.stream().filter(f ->
f.equals(testOutputFile.name())).findFirst();
+ assertTrue(resultFile.isPresent());
+ byte[] content = profiler.fetch(resultFile.get());
+ assertNotNull(content);
+
+ assertEquals("Profiler is not active\n", profiler.status());
+ }
+ }
+
+ @Test
+ public void testInvalidParametersThrowsException()
+ {
+ try (WithProperties properties = new
WithProperties().set(ASYNC_PROFILER_UNSAFE_MODE, false))
+ {
+ Map<String, String> startParameters = Map.of();
+ assertThatThrownBy(() -> getProfiler().start(startParameters))
+ .isInstanceOf(IllegalArgumentException.class)
+ .hasMessageContaining("Wrong parameters passed to start async
profiler method. Passed parameters " +
+ "should be:");
+ }
+ }
+
+ @Test
+ public void testInvalidEventThrowsException()
+ {
+ try (WithProperties properties = new
WithProperties().set(ASYNC_PROFILER_UNSAFE_MODE, false))
+ {
+ Map<String, String> startParameters =
Map.of(ASYNC_PROFILER_START_EVENTS_PARAM, "not_a_real_event",
+
ASYNC_PROFILER_START_OUTPUT_FORMAT_PARAM, "flamegraph",
+
ASYNC_PROFILER_START_DURATION_PARAM, "60s",
+
ASYNC_PROFILER_START_OUTPUT_FILE_NAME_PARAM, testOutputFile.name());
+
+ assertThatThrownBy(() -> getProfiler().start(startParameters))
+ .isInstanceOf(IllegalArgumentException.class)
+ .hasMessageContaining("Event must be one or a combination of [cpu,
alloc, lock, wall, nativemem, cache_misses]");
+ }
+ }
+
+ @Test
+ public void testInvalidDurationThrowsException()
+ {
+ try (WithProperties properties = new
WithProperties().set(ASYNC_PROFILER_UNSAFE_MODE, false))
+ {
+ Map<String, String> startParameters =
Map.of(ASYNC_PROFILER_START_EVENTS_PARAM, cpu.name(),
+
ASYNC_PROFILER_START_OUTPUT_FORMAT_PARAM, flamegraph.name(),
+
ASYNC_PROFILER_START_DURATION_PARAM, "13h",
+
ASYNC_PROFILER_START_OUTPUT_FILE_NAME_PARAM, testOutputFile.name());
+ assertThatThrownBy(() -> getProfiler().start(startParameters))
+ .isInstanceOf(IllegalArgumentException.class)
+ .hasMessageContaining("Max profiling duration is 43200 seconds. If
you need longer profiling, use execute command instead");
+ }
+ }
+
+ @Test
+ public void testInvalidFormatThrowsException()
+ {
+ try (WithProperties properties = new
WithProperties().set(ASYNC_PROFILER_UNSAFE_MODE, false))
+ {
+ Map<String, String> startParameters =
Map.of(ASYNC_PROFILER_START_EVENTS_PARAM, cpu.name(),
+
ASYNC_PROFILER_START_OUTPUT_FORMAT_PARAM, "not_a_real_format",
+
ASYNC_PROFILER_START_DURATION_PARAM, "60s",
+
ASYNC_PROFILER_START_OUTPUT_FILE_NAME_PARAM, testOutputFile.name());
+ assertThatThrownBy(() -> getProfiler().start(startParameters))
+ .isInstanceOf(IllegalArgumentException.class)
+ .hasMessageContaining("Format must be one of [flat, traces,
collapsed, flamegraph, tree, jfr]");
+ }
+ }
+
+ @Test
+ public void testInvalidOutputFileNameThrowsException()
+ {
+ try (WithProperties properties = new
WithProperties().set(ASYNC_PROFILER_UNSAFE_MODE, false))
+ {
+ Map<String, String> startParameters =
Map.of(ASYNC_PROFILER_START_EVENTS_PARAM, cpu.name(),
+
ASYNC_PROFILER_START_OUTPUT_FORMAT_PARAM, flamegraph.name(),
+
ASYNC_PROFILER_START_DURATION_PARAM, "60s",
+
ASYNC_PROFILER_START_OUTPUT_FILE_NAME_PARAM, "| grep test");
+ assertThatThrownBy(() -> getProfiler().start(startParameters))
+ .isInstanceOf(IllegalArgumentException.class)
+ .hasMessageContaining("Output file name must match pattern
^[a-zA-Z0-9-]*\\.?[a-zA-Z0-9-]*$");
+ }
+ }
+
+ @Test
+ public void testInvalidTimeoutThrowsException()
+ {
+ try (WithProperties properties = new
WithProperties().set(ASYNC_PROFILER_UNSAFE_MODE, false))
+ {
+ Map<String, String> startParameters =
Map.of(ASYNC_PROFILER_START_EVENTS_PARAM, cpu.name(),
+
ASYNC_PROFILER_START_OUTPUT_FORMAT_PARAM, flamegraph.name(),
+
ASYNC_PROFILER_START_DURATION_PARAM, "10abc",
+
ASYNC_PROFILER_START_OUTPUT_FILE_NAME_PARAM, "abc");
+ assertThatThrownBy(() -> {
+ getProfiler().start(startParameters);
+ })
+ .isInstanceOf(IllegalArgumentException.class)
+ .hasMessageContaining("Invalid duration: 10abc Accepted
units:[SECONDS, MINUTES, HOURS, DAYS] where case matters and only non-negative
values.");
+ }
+ }
+
+ @Test
+ public void testSecondStartNotExecuted()
+ {
+ try (WithProperties properties = new
WithProperties().set(ASYNC_PROFILER_UNSAFE_MODE, false))
+ {
+ Map<String, String> startParameters =
Map.of(ASYNC_PROFILER_START_EVENTS_PARAM, cpu.name(),
+
ASYNC_PROFILER_START_OUTPUT_FORMAT_PARAM, flamegraph.name(),
+
ASYNC_PROFILER_START_DURATION_PARAM, "60s",
+
ASYNC_PROFILER_START_OUTPUT_FILE_NAME_PARAM, testOutputFile.name());
+ Map<String, String> stopParameters =
Map.of(ASYNC_PROFILER_STOP_OUTPUT_FILE_NAME_PARAM, testOutputFile.name());
+
+ AsyncProfilerService profiler = getProfiler();
+ assertTrue(profiler.start(startParameters));
+ assertFalse(profiler.start(startParameters));
+ profiler.stop(stopParameters);
+ }
+ }
+
+ @Test
+ public void testProfilerDisabledThrowsException()
+ {
+ try (WithProperties properties = new
WithProperties().set(ASYNC_PROFILER_UNSAFE_MODE,
true).set(ASYNC_PROFILER_ENABLED, false))
+ {
+ assertThatThrownBy(() -> {
+ AsyncProfilerService profiler = getProfiler();
+ profiler.status();
+ }).hasMessageContaining("Async Profiler is not enabled. Enable it
by setting cassandra.async_profiler.enabled property to true.");
+ }
+ }
+
+ @Test
+ public void testAdvancedModeEnabledSuccess() throws Throwable
+ {
+ try (WithProperties properties = new
WithProperties().set(ASYNC_PROFILER_UNSAFE_MODE, true))
+ {
+ AsyncProfilerService profiler = getProfiler();
+
+ profiler.execute("start,event=" + cpu.name() + ",file=" +
testOutputFile.absolutePath());
+ Thread.sleep(5000);
+ profiler.execute(format("stop,file=%s", testOutputFile));
+
+ assertTrue("Output profile file for unsafe mode should exist",
testOutputFile.exists());
+ assertTrue("Output profile file for unsafe mode should not be
empty", testOutputFile.length() > 0);
+ }
+ }
+
+ @Test
+ public void testUnsafeExecute()
+ {
+ try (WithProperties properties = new
WithProperties().set(ASYNC_PROFILER_UNSAFE_MODE, true))
+ {
+ getProfiler().execute("foo");
+ }
+ }
+
+ @Test
+ public void testFetchIllegalFile()
+ {
+ assertThatThrownBy(() -> getProfiler().fetch("../abc"))
+ .isInstanceOf(IllegalArgumentException.class)
+ .hasMessage("Illegal file to fetch: ../abc");
+
+ assertThatThrownBy(() -> getProfiler().fetch("/etc/abc"))
+ .isInstanceOf(IllegalArgumentException.class)
+ .hasMessage("Illegal file to fetch: /etc/abc");
+ }
+
+ @Test
+ public void testSafeExecute()
+ {
+ try (WithProperties properties = new
WithProperties().set(ASYNC_PROFILER_UNSAFE_MODE, false))
+ {
+ assertThatThrownBy(() -> getProfiler().execute("foo"))
+ .isInstanceOf(SecurityException.class)
+ .hasMessageContaining("The arbitrary command execution is not
permitted with org.apache.cassandra.profiler:type=AsyncProfiler " +
+ "MBean. If unsafe command execution is
required, start Cassandra with " + ASYNC_PROFILER_UNSAFE_MODE.getKey() + ' ' +
+ "property set to true. Rejected command:
foo");
+ }
+ }
+}
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]