(kudu) branch master updated: [tests] check for gflags::SetCommandLineOption() result

2024-08-01 Thread alexey
This is an automated email from the ASF dual-hosted git repository.

alexey pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/kudu.git


The following commit(s) were added to refs/heads/master by this push:
 new 17dc71436 [tests] check for gflags::SetCommandLineOption() result
17dc71436 is described below

commit 17dc7143605acf547a2964c1463bd0149060008f
Author: Alexey Serbin 
AuthorDate: Tue May 14 11:20:55 2024 -0700

[tests] check for gflags::SetCommandLineOption() result

With LTO and other link-time optimization, linker might remove symbols
it didn't find in use.  [1] shows a particular example of this.

This patch adds verification for the value returned by
gflags::SetCommandLineOption() to spot such cases earlier if they
happen due to future library restructuring and unexpected regressions.
It makes sense to catch such issues before other related assertions
are triggered in a test.

[1] https://gerrit.cloudera.org/#/c/21399/

Change-Id: I225142d580fac2c911e81d95aa0e89ef037922b0
Reviewed-on: http://gerrit.cloudera.org:8080/21630
Reviewed-by: Abhishek Chennaka 
Tested-by: Alexey Serbin 
---
 src/kudu/client/client-test.cc  |  2 +-
 src/kudu/util/flag_tags-test.cc | 12 ++--
 2 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/src/kudu/client/client-test.cc b/src/kudu/client/client-test.cc
index d3372920a..c50988716 100644
--- a/src/kudu/client/client-test.cc
+++ b/src/kudu/client/client-test.cc
@@ -1775,7 +1775,7 @@ TEST_F(ClientTest, TestScanCloseProxy) {
 
 // Check that the client scanner does not redact rows.
 TEST_F(ClientTest, TestRowPtrNoRedaction) {
-  google::SetCommandLineOption("redact", "log");
+  ASSERT_NE("", google::SetCommandLineOption("redact", "log"));
 
   NO_FATALS(InsertTestRows(client_table_.get(), FLAGS_test_scan_num_rows));
   KuduScanner scanner(client_table_.get());
diff --git a/src/kudu/util/flag_tags-test.cc b/src/kudu/util/flag_tags-test.cc
index a6d2da655..cb33405c1 100644
--- a/src/kudu/util/flag_tags-test.cc
+++ b/src/kudu/util/flag_tags-test.cc
@@ -82,7 +82,7 @@ TEST_F(FlagTagsTest, TestUnlockFlags) {
   // Setting an unsafe flag without unlocking should crash.
   {
 gflags::FlagSaver s;
-gflags::SetCommandLineOption("test_unsafe_flag", "true");
+ASSERT_NE("", gflags::SetCommandLineOption("test_unsafe_flag", "true"));
 ASSERT_DEATH({ ValidateFlags(); },
  "Flag --test_unsafe_flag is unsafe and unsupported.*"
  "Use --unlock_unsafe_flags to proceed");
@@ -93,8 +93,8 @@ TEST_F(FlagTagsTest, TestUnlockFlags) {
 StringVectorSink sink;
 ScopedRegisterSink reg();
 gflags::FlagSaver s;
-gflags::SetCommandLineOption("test_unsafe_flag", "true");
-gflags::SetCommandLineOption("unlock_unsafe_flags", "true");
+ASSERT_NE("", gflags::SetCommandLineOption("test_unsafe_flag", "true"));
+ASSERT_NE("", gflags::SetCommandLineOption("unlock_unsafe_flags", "true"));
 ValidateFlags();
 ASSERT_EQ(1, sink.logged_msgs().size());
 ASSERT_STR_CONTAINS(sink.logged_msgs()[0], "Enabled unsafe flag: 
--test_unsafe_flag");
@@ -103,7 +103,7 @@ TEST_F(FlagTagsTest, TestUnlockFlags) {
   // Setting an experimental flag without unlocking should crash.
   {
 gflags::FlagSaver s;
-gflags::SetCommandLineOption("test_experimental_flag", "true");
+ASSERT_NE("", gflags::SetCommandLineOption("test_experimental_flag", 
"true"));
 ASSERT_DEATH({ ValidateFlags(); },
  "Flag --test_experimental_flag is experimental and 
unsupported.*"
  "Use --unlock_experimental_flags to proceed");
@@ -114,8 +114,8 @@ TEST_F(FlagTagsTest, TestUnlockFlags) {
 StringVectorSink sink;
 ScopedRegisterSink reg();
 gflags::FlagSaver s;
-gflags::SetCommandLineOption("test_experimental_flag", "true");
-gflags::SetCommandLineOption("unlock_experimental_flags", "true");
+ASSERT_NE("", gflags::SetCommandLineOption("test_experimental_flag", 
"true"));
+ASSERT_NE("", gflags::SetCommandLineOption("unlock_experimental_flags", 
"true"));
 ValidateFlags();
 ASSERT_EQ(1, sink.logged_msgs().size());
 ASSERT_STR_CONTAINS(sink.logged_msgs()[0],



(kudu) branch master updated: KUDU-613: Add SLRU Cache metrics

2024-08-01 Thread alexey
This is an automated email from the ASF dual-hosted git repository.

alexey pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/kudu.git


The following commit(s) were added to refs/heads/master by this push:
 new 7102025af KUDU-613: Add SLRU Cache metrics
7102025af is described below

commit 7102025af776a65715ecdabcd3be469cb1e972ea
Author: Mahesh Reddy 
AuthorDate: Tue Feb 6 15:18:27 2024 -0500

KUDU-613: Add SLRU Cache metrics

This patch adds segment-specific metrics for both
the probationary and the protected segment along
with high-level metrics for the entire SLRU cache.
It also adds these same metrics to the block cache
so it can support a cache with SLRU eviction policy.

This patch adds a template parameter to the
SLRUCacheShard class to indicate which segment
a shard belongs to.

Change-Id: I1c8181ec6bea301605aaef7db0003c3eaef3072d
Reviewed-on: http://gerrit.cloudera.org:8080/21389
Tested-by: Alexey Serbin 
Reviewed-by: Alexey Serbin 
---
 src/kudu/util/block_cache_metrics.cc | 126 +
 src/kudu/util/cache_metrics.h|  29 +
 src/kudu/util/slru_cache-test.cc |  13 +++
 src/kudu/util/slru_cache.cc  | 209 ++-
 src/kudu/util/slru_cache.h   |  34 +-
 5 files changed, 379 insertions(+), 32 deletions(-)

diff --git a/src/kudu/util/block_cache_metrics.cc 
b/src/kudu/util/block_cache_metrics.cc
index a07de117c..c6a626dd4 100644
--- a/src/kudu/util/block_cache_metrics.cc
+++ b/src/kudu/util/block_cache_metrics.cc
@@ -17,6 +17,8 @@
 
 #include "kudu/util/block_cache_metrics.h"
 
+#include 
+
 #include "kudu/util/metrics.h"
 
 METRIC_DEFINE_counter(server, block_cache_inserts,
@@ -57,6 +59,98 @@ METRIC_DEFINE_gauge_uint64(server, block_cache_usage, "Block 
Cache Memory Usage"
"Memory consumed by the block cache",
kudu::MetricLevel::kInfo);
 
+METRIC_DEFINE_counter(server, block_cache_upgrades,
+  "Block Cache Upgrades", kudu::MetricUnit::kBlocks,
+  "Number of blocks upgraded from the probationary segment 
to "
+  "the protected segment of the block cache",
+  kudu::MetricLevel::kDebug);
+METRIC_DEFINE_counter(server, block_cache_downgrades,
+  "Block Cache downgrades", kudu::MetricUnit::kBlocks,
+  "Number of blocks downgraded from the protected segment 
to "
+  "the probationary segment of the block cache",
+  kudu::MetricLevel::kDebug);
+
+METRIC_DEFINE_counter(server, block_cache_probationary_segment_inserts,
+  "Block Cache Probationary Segment Inserts", 
kudu::MetricUnit::kBlocks,
+  "Number of blocks inserted in the probationary segment 
of the cache",
+  kudu::MetricLevel::kDebug);
+METRIC_DEFINE_counter(server, block_cache_probationary_segment_lookups,
+  "Block Cache Probationary Segment Lookups", 
kudu::MetricUnit::kBlocks,
+  "Number of blocks looked up from the probationary 
segment of the cache",
+  kudu::MetricLevel::kDebug);
+METRIC_DEFINE_counter(server, block_cache_probationary_segment_evictions,
+  "Block Cache Probationary Segment Evictions", 
kudu::MetricUnit::kBlocks,
+  "Number of blocks evicted from the probationary segment 
of the cache",
+  kudu::MetricLevel::kDebug);
+METRIC_DEFINE_counter(server, block_cache_probationary_segment_misses,
+  "Block Cache Probationary Segment Misses", 
kudu::MetricUnit::kBlocks,
+  "Number of lookups in the probationary segment that 
didn't yield a block",
+  kudu::MetricLevel::kDebug);
+METRIC_DEFINE_counter(server, block_cache_probationary_segment_misses_caching,
+  "Block Cache Probationary Segment Misses (Caching)",
+  kudu::MetricUnit::kBlocks,
+  "Number of lookups in the probationary segment that were 
expecting a block "
+  "that didn't yield one. Use this number instead of "
+  "block_cache_probationary_segment_misses when trying to 
determine how "
+  "efficient the probationary segment is",
+  kudu::MetricLevel::kDebug);
+METRIC_DEFINE_counter(server, block_cache_probationary_segment_hits,
+  "Block Cache Probationary Segment Hits", 
kudu::MetricUnit::kBlocks,
+  "Number of lookups in t

(kudu) branch master updated: [fs] check block size match only for block managers

2024-07-31 Thread alexey
This is an automated email from the ASF dual-hosted git repository.

alexey pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/kudu.git


The following commit(s) were added to refs/heads/master by this push:
 new acde027b0 [fs] check block size match only for block managers
acde027b0 is described below

commit acde027b0c73bf41a4c50f1e7feebe2a3ab2b53d
Author: Alexey Serbin 
AuthorDate: Wed Jul 31 17:34:04 2024 -0700

[fs] check block size match only for block managers

Since the block size-related constraints affect only the implementation
of log block managers, this patch enables the extra sanity check in
DirInstanceMetadataFile::LoadFromDisk() only when Kudu's data is
controlled by 'log' or 'logr' block manager.

This allows for running Kudu test clusters with --block_manager=file
on Linux machines when keeping the data on a ZFS mount.

NOTE: file block manager shouldn't be used in production environments

Change-Id: I358214f4bc93fbf19bb245e3accc909943ae2f00
Reviewed-on: http://gerrit.cloudera.org:8080/21627
Reviewed-by: Yingchun Lai 
Tested-by: Alexey Serbin 
---
 src/kudu/fs/dir_util.cc | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/kudu/fs/dir_util.cc b/src/kudu/fs/dir_util.cc
index 206e5b59f..d6aebb9ac 100644
--- a/src/kudu/fs/dir_util.cc
+++ b/src/kudu/fs/dir_util.cc
@@ -249,7 +249,7 @@ Status DirInstanceMetadataFile::LoadFromDisk() {
   uint64_t block_size;
   RETURN_NOT_OK_FAIL_INSTANCE_PREPEND(env_->GetBlockSize(filename_, 
_size),
   Substitute("Failed to load metadata file. Could not get block size of 
$0", filename_));
-  if (pb->filesystem_block_size_bytes() != block_size) {
+  if (FsManager::IsLogType(dir_type_) && pb->filesystem_block_size_bytes() != 
block_size) {
 return Status::IOError("Wrong filesystem block size", Substitute(
 "Expected $0 but was $1", pb->filesystem_block_size_bytes(), 
block_size));
   }



(kudu) 02/02: [cmake] work around of Gradle-related issues

2024-07-31 Thread alexey
This is an automated email from the ASF dual-hosted git repository.

alexey pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/kudu.git

commit a1cc227df5d5dc7586d3971135643320682b2709
Author: Alexey Serbin 
AuthorDate: Wed Jul 31 11:58:55 2024 -0700

[cmake] work around of Gradle-related issues

Once Gradle started being invoked to build kudu-subprocess and kudu-hms
JARs as dependency for the C++ server-side of the project, it's become
an issue to get 100% success rate when building it from source.  With
the recent Gradle upgrade the build success rate has reduced further,
it seems.

I decided to address that by adding a few extra flags to the Gradle's
invocations under cmake when building corresponding targets.  Probably,
there is a better way of addressing that (say, finding the root cause
of the issues, reporting, and fixing them), but this simple solution
works well for me.  Also, this solution doesn't affect the build times
of the project (even when using ccache) since the targets built with
Gradle constitute a small part of the rest of the targets that are built
concurrently in a very robust manner.

Prior to this patch, Gradle would error out on follow-up invocations
after the build process has been interrupted, not being able to access
its build cache with an error like below:

--
  FAILURE: Build failed with an exception.

  * What went wrong:
  Execution failed for task ':kudu-hive:compileJava'.
  > Could not create service of type DefaultGeneralCompileCaches using 
GradleScopeCompileServices.createGeneralCompileCaches().
 > Cannot lock Java compile cache (...) as it has already been locked 
by this process.
--

Prior to this patch, building the project would fail sometimes when
invoking 'cmake -jN' (where N > 1) with errors like below while Gradle
was generating kudu-subprocess.jar and hms-plugin.jar concurrently:

--
  ...
  [  4%] Generating ../../../bin/kudu-subprocess.jar
  ...
  [  6%] Generating ../../../bin/hms-plugin.jar
  ...
  java.lang.ArrayIndexOutOfBoundsException: 6
  at 
shadow.org.objectweb.asm.ClassReader.readShort(ClassReader.java:3573)
  ...
  at 
java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
  at 
org.gradle.internal.concurrent.ThreadFactoryImpl$ManagedThreadRunnable.run(ThreadFactoryImpl.java:56)
  at java.lang.Thread.run(Thread.java:750)

  FAILURE: Build failed with an exception.

  * What went wrong:
  Execution failed for task ':kudu-client:shadowJar'.
  > 6
  ...
  BUILD FAILED in 1m 0s
  make[2]: *** [bin/hms-plugin.jar] Error 1
  make[1]: *** [src/kudu/hms/CMakeFiles/hms_plugin_jar.dir/all] Error 2
  make[1]: *** Waiting for unfinished jobs
  java.lang.ArrayIndexOutOfBoundsException: 6
  at 
shadow.org.objectweb.asm.ClassReader.readShort(ClassReader.java:3573)
  ...
at 
java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.
java:1149)
at 
java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor
.java:624)
at 
org.gradle.internal.concurrent.ThreadFactoryImpl$ManagedThreadRunnabl
e.run(ThreadFactoryImpl.java:56)
at java.lang.Thread.run(Thread.java:750)

  FAILURE: Build failed with an exception.

  * What went wrong:
  Execution failed for task ':kudu-subprocess:shadowJar'.
  > 6
  ...
  BUILD FAILED in 1m 3s
  make[2]: *** [bin/kudu-subprocess.jar] Error 1
  make[1]: *** [src/kudu/subprocess/CMakeFiles/subprocess_jar.dir/all] 
Error 2
  make: *** [all] Error 2
--

Change-Id: Ib7d1637854233592ede5388bfc2287df96ec9865
Reviewed-on: http://gerrit.cloudera.org:8080/21626
Reviewed-by: Abhishek Chennaka 
Tested-by: Alexey Serbin 
---
 CMakeLists.txt | 20 ++--
 1 file changed, 18 insertions(+), 2 deletions(-)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 0f6201447..3ebdd1b1a 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -109,8 +109,24 @@ set(JAVA_DIR ${CMAKE_CURRENT_SOURCE_DIR}/java)
 # This solves the problem of escaped spaces in the EXTRA_GRADLE_FLAGS 
environment variable.
 list(APPEND GRADLE_FLAGS $ENV{EXTRA_GRADLE_FLAGS})
 separate_arguments(GRADLE_FLAGS)
-# We always want Gradle to use the plain console and quiet flag when called 
from cmake.
-list(APPEND GRADLE_FLAGS --quiet --console=plain)
+# When invoking Gradle:
+#   * Log errors only, reducing the verbosity of the Gradle's output
+#   * Make the output better fit for plain con

(kudu) branch master updated (4116941c0 -> a1cc227df)

2024-07-31 Thread alexey
This is an automated email from the ASF dual-hosted git repository.

alexey pushed a change to branch master
in repository https://gitbox.apache.org/repos/asf/kudu.git


from 4116941c0 KUDU-3591 Fix the flaky test 
FsManagerTestBase.TestAddRemoveDataDirsFuzz
 new 834db150f [tools] fix compilation warning
 new a1cc227df [cmake] work around of Gradle-related issues

The 2 revisions listed above as "new" are entirely new to this
repository and will be described in separate emails.  The revisions
listed as "add" were already present in the repository and have only
been added to this reference.


Summary of changes:
 CMakeLists.txt  | 20 ++--
 src/kudu/tools/tool_action_table.cc |  4 ++--
 2 files changed, 20 insertions(+), 4 deletions(-)



(kudu) 01/02: [tools] fix compilation warning

2024-07-31 Thread alexey
This is an automated email from the ASF dual-hosted git repository.

alexey pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/kudu.git

commit 834db150fe9907156cfc5ca6686879e471beb838
Author: Alexey Serbin 
AuthorDate: Wed Jul 31 11:12:41 2024 -0700

[tools] fix compilation warning

This patch addresses compilation warnings like below (GCC 4.8.5):

  src/kudu/tools/tool_action_table.cc:334:24: warning: extra tokens at end 
of #undef directive
   #undef GET_PROPERTY()
  ^
  src/kudu/tools/tool_action_table.cc:335:28: warning: extra tokens at end 
of #undef directive
   #undef GET_NUM_PROPERTY()

This is a follow-up to d91d5c95dab38770890cac6a30be63f80eb82fec.

Change-Id: I2484aebb5c3334549e9b04957121f556d1120933
Reviewed-on: http://gerrit.cloudera.org:8080/21625
Tested-by: Alexey Serbin 
Reviewed-by: Abhishek Chennaka 
---
 src/kudu/tools/tool_action_table.cc | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/kudu/tools/tool_action_table.cc 
b/src/kudu/tools/tool_action_table.cc
index 21b8c04e8..5c985998b 100644
--- a/src/kudu/tools/tool_action_table.cc
+++ b/src/kudu/tools/tool_action_table.cc
@@ -331,8 +331,8 @@ class TableLister {
 {"live_row_count", GET_NUM_PROPERTY(live_row_count)},
 };
 
-#undef GET_PROPERTY()
-#undef GET_NUM_PROPERTY()
+#undef GET_PROPERTY
+#undef GET_NUM_PROPERTY
 
 for (const auto& tinfo : tables_info) {
   vector values;



(kudu) branch master updated: KUDU-3591 Fix the flaky test FsManagerTestBase.TestAddRemoveDataDirsFuzz

2024-07-30 Thread alexey
This is an automated email from the ASF dual-hosted git repository.

alexey pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/kudu.git


The following commit(s) were added to refs/heads/master by this push:
 new 4116941c0 KUDU-3591 Fix the flaky test 
FsManagerTestBase.TestAddRemoveDataDirsFuzz
4116941c0 is described below

commit 4116941c082be5d6e3addfe3f6b6d2ea468416e4
Author: Yingchun Lai 
AuthorDate: Sat Jul 20 23:09:24 2024 +0800

KUDU-3591 Fix the flaky test FsManagerTestBase.TestAddRemoveDataDirsFuzz

Reduce the failure rate of the test by lowering its loop
times. For example, when FLAGS_block_manager == "logr",
opens a data directory will open a RocksDB instance, it
consumes more time than that if FLAGS_block_manager == "log".

Change-Id: I56a03d64c8ababd63fe31fa1e7fce1efac3875e8
Reviewed-on: http://gerrit.cloudera.org:8080/21604
Tested-by: Kudu Jenkins
    Reviewed-by: Alexey Serbin 
---
 src/kudu/fs/fs_manager-test.cc | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/kudu/fs/fs_manager-test.cc b/src/kudu/fs/fs_manager-test.cc
index b030d1e03..75e26d917 100644
--- a/src/kudu/fs/fs_manager-test.cc
+++ b/src/kudu/fs/fs_manager-test.cc
@@ -1255,12 +1255,12 @@ TEST_P(FsManagerTestBase, TestAddRemoveDataDirsFuzz) {
 
 #if defined(THREAD_SANITIZER) || defined(ADDRESS_SANITIZER)
   // When using a sanitizer, reduce the loop times to get a more stable result.
-  const int kNumAttempts = 50;
+  const int kNumAttempts = 10;
 #else
   // In some situations, the tests would last too long time, so we reduce the 
loop times if not
   // AllowSlowTests(). For example, when FLAGS_block_manager == "logr", opens 
a data directory will
   // open a RocksDB instance, it consumes more time than that if 
FLAGS_block_manager == "log".
-  const int kNumAttempts = AllowSlowTests() ? 1000 : 50;
+  const int kNumAttempts = AllowSlowTests() ? 1000 : 10;
 #endif
 
   Random rng_(SeedRandom());



(kudu) branch master updated: [Tool] Fix unit test ToolTest.TableCopyLimitSpeed

2024-07-26 Thread alexey
This is an automated email from the ASF dual-hosted git repository.

alexey pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/kudu.git


The following commit(s) were added to refs/heads/master by this push:
 new 69f57e921 [Tool] Fix unit test ToolTest.TableCopyLimitSpeed
69f57e921 is described below

commit 69f57e921c3e08f0a832cda8edb13d458f7c3cbf
Author: xinghuayu007 <1450306...@qq.com>
AuthorDate: Wed Jul 24 18:15:40 2024 +0800

[Tool] Fix unit test ToolTest.TableCopyLimitSpeed

This patch refactor some code and fix an test based on the
patch: https://gerrit.cloudera.org/c/21527/

Change-Id: I8906a8c069f6133fab30b3f2da7723e98c82d869
Reviewed-on: http://gerrit.cloudera.org:8080/21609
Reviewed-by: Alexey Serbin 
Tested-by: Alexey Serbin 
---
 src/kudu/tools/kudu-tool-test.cc | 8 
 src/kudu/tools/table_scanner.cc  | 9 ++---
 src/kudu/tools/table_scanner.h   | 6 --
 3 files changed, 14 insertions(+), 9 deletions(-)

diff --git a/src/kudu/tools/kudu-tool-test.cc b/src/kudu/tools/kudu-tool-test.cc
index 8d97f4047..4ef500923 100644
--- a/src/kudu/tools/kudu-tool-test.cc
+++ b/src/kudu/tools/kudu-tool-test.cc
@@ -6026,16 +6026,16 @@ TEST_F(ToolTest, TableCopyLimitSpeed) {
 .add_master_server_addr(master_addr)
 .Build());
   shared_ptr table;
-  client->OpenTable(kNewTableName, );
+  ASSERT_OK(client->OpenTable(kNewTableName, ));
   KuduScanner scanner(table.get());
-  scanner.Open();
+  ASSERT_OK(scanner.Open());
   KuduScanBatch batch;
   int64_t data_size = 0;
   while (scanner.HasMoreRows()) {
 ASSERT_OK(scanner.NextBatch());
-data_size = batch.direct_data().size() + batch.indirect_data().size();
+data_size += batch.direct_data().size() + batch.indirect_data().size();
   }
-  // Table copy speed must less than table_copy_throttler_bytes_per_sec.
+  // Table copy speed must be less than table_copy_throttler_bytes_per_sec.
   ASSERT_LE(data_size / (end_time - start_time).ToSeconds(), 
table_copy_throttler_bytes_per_sec);
 }
 
diff --git a/src/kudu/tools/table_scanner.cc b/src/kudu/tools/table_scanner.cc
index fac0ce949..415c64ed3 100644
--- a/src/kudu/tools/table_scanner.cc
+++ b/src/kudu/tools/table_scanner.cc
@@ -61,6 +61,7 @@
 #include "kudu/util/slice.h"
 #include "kudu/util/stopwatch.h"
 #include "kudu/util/string_case.h"
+#include "kudu/util/threadpool.h"
 #include "kudu/util/throttler.h"
 
 using kudu::client::KuduClient;
@@ -582,12 +583,14 @@ TableScanner::TableScanner(
   out_(nullptr) {
   CHECK_OK(SetReplicaSelection(FLAGS_replica_selection));
   if (FLAGS_table_copy_throttler_bytes_per_sec > 0) {
-throttler_ = std::make_shared(Throttler::kNoLimit,
+throttler_ = std::make_unique(Throttler::kNoLimit,
  
FLAGS_table_copy_throttler_bytes_per_sec,
  
FLAGS_table_copy_throttler_burst_factor);
   }
 }
 
+TableScanner::~TableScanner() {}
+
 Status TableScanner::ScanData(const vector& tokens,
   const function& cb) {
   for (const auto* token : tokens) {
@@ -608,9 +611,9 @@ Status TableScanner::ScanData(const vector& 
tokens,
   count += batch.NumRows();
   total_count_ += batch.NumRows();
   ++next_batch_calls;
-  // Limit table copy speed.
+  // Limit table copying speed.
   if (throttler_) {
-SCOPED_LOG_SLOW_EXECUTION(WARNING, 1000, "Table copy throttler");
+SCOPED_LOG_SLOW_EXECUTION(INFO, 1000, "Table copy throttler");
 while (!throttler_->Take(0,
  batch.direct_data().size() + 
batch.indirect_data().size())) {
   SleepFor(MonoDelta::FromMicroseconds(Throttler::kRefillPeriodMicros 
/ 2));
diff --git a/src/kudu/tools/table_scanner.h b/src/kudu/tools/table_scanner.h
index 1eba9f295..1c55d603b 100644
--- a/src/kudu/tools/table_scanner.h
+++ b/src/kudu/tools/table_scanner.h
@@ -32,9 +32,9 @@
 #include "kudu/client/write_op.h"
 #include "kudu/util/mutex.h"
 #include "kudu/util/status.h"
-#include "kudu/util/threadpool.h"
 
 namespace kudu {
+class ThreadPool;
 class Throttler;
 
 namespace tools {
@@ -48,6 +48,8 @@ class TableScanner {
std::nullopt,
std::optional dst_table_name = std::nullopt);
 
+  ~TableScanner();
+
   // Set output stream of this tool, or disable output if not set.
   // 'out' must remain valid for the lifetime of this class.
   void SetOutput(std::ostream* out);
@@ -104,7 +106,7 @@ class TableScanner {
   std::optional dst_table_name_;
   int32_t scan_batch_size_;
   std::unique_ptr thread_pool_;
-  std::shared_ptr throttler_;
+  std::unique_ptr throttler_;
 
   // Protects output to 'out_' so that rows don't get interleaved.
   Mutex output_lock_;



(kudu) branch master updated: [log] fix the Ranger client log count limit not working

2024-07-26 Thread alexey
This is an automated email from the ASF dual-hosted git repository.

alexey pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/kudu.git


The following commit(s) were added to refs/heads/master by this push:
 new 065acfbff [log] fix the Ranger client log count limit not working
065acfbff is described below

commit 065acfbff08ab7d77308ebe9f874d9e1d08d608d
Author: kedeng 
AuthorDate: Thu Jul 11 17:37:06 2024 +0800

[log] fix the Ranger client log count limit not working

In scenarios where authentication is enabled, the master
starts a Ranger subprocess to communicate with the KMS
process. I noticed that in these scenarios, the number
of log files generated by the Java subprocess was not
limited, causing a waste of disk space.

To address this issue, I updated the configuration file
for the Java subprocess. To verify the effectiveness of
this commit, I also added new unit tests to ensure it.

Change-Id: Idc528d68947c222fa7be338057ea7af134eb0dd4
Reviewed-on: http://gerrit.cloudera.org:8080/21572
Reviewed-by: Zoltan Chovan 
Reviewed-by: Yingchun Lai 
Tested-by: Yingchun Lai 
Reviewed-by: Alexey Serbin 
---
 .../kudu/subprocess/log/LoggingTestMain.java   | 33 
 src/kudu/subprocess/subprocess_proxy-test.cc   | 92 ++
 src/kudu/subprocess/subprocess_proxy.cc| 10 ++-
 src/kudu/subprocess/subprocess_proxy.h |  1 -
 4 files changed, 133 insertions(+), 3 deletions(-)

diff --git 
a/java/kudu-subprocess/src/main/java/org/apache/kudu/subprocess/log/LoggingTestMain.java
 
b/java/kudu-subprocess/src/main/java/org/apache/kudu/subprocess/log/LoggingTestMain.java
new file mode 100644
index 0..b2146da72
--- /dev/null
+++ 
b/java/kudu-subprocess/src/main/java/org/apache/kudu/subprocess/log/LoggingTestMain.java
@@ -0,0 +1,33 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+package org.apache.kudu.subprocess.log;
+
+import org.apache.yetus.audience.InterfaceAudience;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+@InterfaceAudience.Private
+class LoggingTestMain {
+  private static final Logger logger = 
LoggerFactory.getLogger(LoggingTestMain.class);
+
+  public static void main(String[] args) throws Exception {
+for (int i = 0; i < 10; i++) {
+  logger.debug("This is a test log message number: " + i);
+}
+  }
+}
diff --git a/src/kudu/subprocess/subprocess_proxy-test.cc 
b/src/kudu/subprocess/subprocess_proxy-test.cc
index 00c615b1e..c6b66f0b9 100644
--- a/src/kudu/subprocess/subprocess_proxy-test.cc
+++ b/src/kudu/subprocess/subprocess_proxy-test.cc
@@ -17,15 +17,18 @@
 
 #include "kudu/subprocess/subprocess_proxy.h"
 
+#include 
 #include 
 #include 
 #include 
 #include 
+#include 
 #include 
 #include 
 #include 
 
 #include 
+#include 
 #include 
 
 #include "kudu/gutil/casts.h"
@@ -36,8 +39,11 @@
 #include "kudu/subprocess/subprocess.pb.h"
 #include "kudu/util/env.h"
 #include "kudu/util/metrics.h"
+#include "kudu/util/monotime.h"
 #include "kudu/util/path_util.h"
+#include "kudu/util/slice.h"
 #include "kudu/util/status.h"
+#include "kudu/util/subprocess.h"
 #include "kudu/util/test_macros.h"
 #include "kudu/util/test_util.h"
 
@@ -64,6 +70,92 @@ using strings::Substitute;
 namespace kudu {
 namespace subprocess {
 
+// Helper function to count files in a directory
+int CountLogFiles(const string& log_dir) {
+  vector logfiles;
+  string pattern = Substitute("$0/*.log.gz", log_dir);
+  CHECK_OK(Env::Default()->Glob(pattern, ));
+  LOG(INFO) << "Found " << logfiles.size() << " log files";
+  return logfiles.size();
+}
+
+class SubprocessProxyTest : public KuduTest {
+ public:
+  SubprocessProxyTest()
+  : test_dir_(GetTestDataDirectory()) {}
+
+  void TearDown() override {
+if (process_) {
+  process_->KillAndWait(SIGTERM);
+}
+KuduTest::TearDown();
+  }
+
+  string GetLogDir() const {
+return JoinPathSegments(test_dir_, &

(kudu) branch master updated: [tests] more robust TabletServerDiskErrorITest::SetUp()

2024-07-25 Thread alexey
This is an automated email from the ASF dual-hosted git repository.

alexey pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/kudu.git


The following commit(s) were added to refs/heads/master by this push:
 new 315942c8e [tests] more robust TabletServerDiskErrorITest::SetUp()
315942c8e is described below

commit 315942c8eb61223269cae25f4e5e40c9e57dca0a
Author: Alexey Serbin 
AuthorDate: Wed Jul 24 21:57:34 2024 -0700

[tests] more robust TabletServerDiskErrorITest::SetUp()

This patch adds the verification for the status of AddTabletServer()
call in TabletServerDiskErrorITest::SetUp().

The motivation for this patch is KUDU-3597.

This is a follow-up to cf6927cb153f384afb649b664de1d4276bd6d83f.

Change-Id: I192a05bee4bfadddf2a3237f25dc17882cae1dc2
Reviewed-on: http://gerrit.cloudera.org:8080/21614
Tested-by: Kudu Jenkins
Reviewed-by: Abhishek Chennaka 
---
 src/kudu/integration-tests/disk_failure-itest.cc | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/src/kudu/integration-tests/disk_failure-itest.cc 
b/src/kudu/integration-tests/disk_failure-itest.cc
index 416d1996b..22de86b8c 100644
--- a/src/kudu/integration-tests/disk_failure-itest.cc
+++ b/src/kudu/integration-tests/disk_failure-itest.cc
@@ -19,6 +19,7 @@
 #include 
 #include 
 #include 
+#include 
 #include 
 #include 
 #include 
@@ -260,7 +261,7 @@ class TabletServerDiskErrorITest : public 
DiskErrorITestBase {
 NO_FATALS(writes.StopAndJoin());
 
 // Now add the last server.
-cluster_->AddTabletServer();
+ASSERT_OK(cluster_->AddTabletServer());
 for (int i = 0; i < cluster_->num_tablet_servers(); i++) {
   // Prevent attempts to copy over replicas, e.g. ones that don't get to a
   // running state due to an error.



(kudu) branch master updated: [tests] re-enable ReplaceTabletsWhileWriting scenario

2024-07-25 Thread alexey
This is an automated email from the ASF dual-hosted git repository.

alexey pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/kudu.git


The following commit(s) were added to refs/heads/master by this push:
 new 0967b895a [tests] re-enable ReplaceTabletsWhileWriting scenario
0967b895a is described below

commit 0967b895a06f2b5b509db0ea2786d1ba721030d1
Author: Alexey Serbin 
AuthorDate: Wed Jul 24 18:03:49 2024 -0700

[tests] re-enable ReplaceTabletsWhileWriting scenario

Since KUDU-2376 seems to be a duplicate of KUDU-3461, and the latter
has already been addressed, it makes sense to re-enable the
ReplaceTabletITest.ReplaceTabletsWhileWriting scenario which is
now passing with this patch.

TestWorkload updated to allow for Status::InvalidAgrument status
that percolates from the client's meta-cache when a tablet is replaced.
This patch also contains other minor (mostly cosmetic) updates.

Change-Id: I36b0e69022f07dd701a91e72e16c93f134d00619
Reviewed-on: http://gerrit.cloudera.org:8080/21612
Tested-by: Alexey Serbin 
Reviewed-by: Ashwani Raina 
Reviewed-by: Abhishek Chennaka 
---
 src/kudu/client/client-test.cc |  6 --
 src/kudu/client/meta_cache.cc  | 16 
 src/kudu/integration-tests/CMakeLists.txt  |  2 +-
 src/kudu/integration-tests/replace_tablet-itest.cc |  7 ++-
 src/kudu/integration-tests/test_workload.cc| 20 
 src/kudu/integration-tests/test_workload.h | 11 ++-
 6 files changed, 37 insertions(+), 25 deletions(-)

diff --git a/src/kudu/client/client-test.cc b/src/kudu/client/client-test.cc
index 0f08ebbff..d3372920a 100644
--- a/src/kudu/client/client-test.cc
+++ b/src/kudu/client/client-test.cc
@@ -551,8 +551,10 @@ class ClientTest : public KuduTest {
 ASSERT_EQ(errors.size(), num_rows);
 
 // Check for only the first error.
-ASSERT_TRUE(errors[0]->status().IsInvalidArgument());
-ASSERT_STR_CONTAINS(errors[0]->status().ToString(), "Tablet id is not 
valid anymore");
+ASSERT_GE(errors.size(), 1);
+const auto& s = errors[0]->status();
+ASSERT_TRUE(s.IsInvalidArgument()) << s.ToString();
+ASSERT_STR_MATCHES(s.ToString(), "tablet ID .* is not valid");
   }
 
   // Inserts 'num_rows' using the default client.
diff --git a/src/kudu/client/meta_cache.cc b/src/kudu/client/meta_cache.cc
index 2ddfdf6c4..17f77d63a 100644
--- a/src/kudu/client/meta_cache.cc
+++ b/src/kudu/client/meta_cache.cc
@@ -551,15 +551,15 @@ void MetaCacheServerPicker::PickLeader(const 
ServerPickedCallback& callback,
   VLOG(2) << Substitute("Explicit fastpath lookup succeeded(maybe), "
 "proceed with callback, table: $0",
 table_->name());
-  if (remote_tablet &&
-  remote_tablet->tablet_id() != tablet_->tablet_id()) {
+  const auto& known_tablet_id = tablet_->tablet_id();
+  if (remote_tablet && remote_tablet->tablet_id() != known_tablet_id) {
 // Skip further processing if tablet in question has turned invalid
-LOG(INFO) << Substitute("Tablet under process found to be invalid, 
"
-"table: $0 - old tabletid: $1, new 
tabletid: $2",
-table_->name(), tablet_->tablet_id(),
-remote_tablet->tablet_id());
-callback(Status::InvalidArgument("Tablet id is not valid anymore"),
- nullptr);
+LOG(INFO) << Substitute(
+"tablet seems to be replaced: former ID $0, new ID $1 (table 
$2)",
+known_tablet_id, remote_tablet->tablet_id(), table_->name());
+callback(Status::InvalidArgument(
+ Substitute("tablet ID $0 is not valid", 
known_tablet_id)),
+ nullptr);
 return;
   }
 }
diff --git a/src/kudu/integration-tests/CMakeLists.txt 
b/src/kudu/integration-tests/CMakeLists.txt
index 9dd934dc8..36a352a5f 100644
--- a/src/kudu/integration-tests/CMakeLists.txt
+++ b/src/kudu/integration-tests/CMakeLists.txt
@@ -115,7 +115,7 @@ ADD_KUDU_TEST(raft_consensus_failure_detector-imc-itest)
 ADD_KUDU_TEST(raft_consensus_nonvoter-itest PROCESSORS 3)
 ADD_KUDU_TEST(raft_consensus_stress-itest RUN_SERIAL true)
 ADD_KUDU_TEST(raft_consensus-itest RUN_SERIAL true NUM_SHARDS 6)
-ADD_KUDU_TEST(replace_tablet-itest)
+ADD_KUDU_TEST(replace_tablet-itest PROCESSORS 4)
 ADD_KUDU_TEST(registration-test RESOURCE_LOCK "master-web-port")
 ADD_KUDU_TEST(same_tablet_concurrent_writes-itest)
 ADD_KUDU_TEST(security-faults-itest)
diff --g

(kudu) branch master updated: KUDU-3594 Fix scan_token-test on ASAN

2024-07-24 Thread alexey
This is an automated email from the ASF dual-hosted git repository.

alexey pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/kudu.git


The following commit(s) were added to refs/heads/master by this push:
 new 2d9292eb9 KUDU-3594 Fix scan_token-test on ASAN
2d9292eb9 is described below

commit 2d9292eb9bb849bf6a359f0738e92d9e6248b8a6
Author: Attila Bukor 
AuthorDate: Wed Jul 24 16:53:25 2024 +0200

KUDU-3594 Fix scan_token-test on ASAN

scan_token-test was failing on ASAN builds since
ScanTokenStaleRaftMembershipTest.TabletLeaderChange was introduced in
e44e0d48. Unfortunately, this was not caught during code review, as
builds were failing due to other reasons.

This commit fixes this issue by deallocating a tablet-server map at the
end of the test.

Change-Id: I6f807c7b6cdb34b85fd861bb557ae46a78417371
Reviewed-on: http://gerrit.cloudera.org:8080/21610
Tested-by: Kudu Jenkins
Reviewed-by: Alexey Serbin 
---
 src/kudu/client/scan_token-test.cc | 5 +
 1 file changed, 5 insertions(+)

diff --git a/src/kudu/client/scan_token-test.cc 
b/src/kudu/client/scan_token-test.cc
index 169baeefd..2275e6595 100644
--- a/src/kudu/client/scan_token-test.cc
+++ b/src/kudu/client/scan_token-test.cc
@@ -70,6 +70,7 @@
 #include "kudu/util/metrics.h"
 #include "kudu/util/monotime.h"
 #include "kudu/util/net/sockaddr.h"
+#include "kudu/util/scoped_cleanup.h"
 #include "kudu/util/status.h"
 #include "kudu/util/test_macros.h"
 #include "kudu/util/test_util.h"
@@ -1687,6 +1688,10 @@ TEST_F(ScanTokenStaleRaftMembershipTest, 
TabletLeaderChange) {
 
   const auto kRaftTimeout = MonoDelta::FromSeconds(30);
   TabletServerMap ts_map;
+  auto cleanup = MakeScopedCleanup([&] {
+  // We need to make sure this is deallocated.
+  STLDeleteValues(_map);
+  });
   ASSERT_OK(CreateTabletServerMap(cluster_->master_proxy(), 
cluster_->messenger(), _map));
   const TServerDetails* leader_tsd = FindPtrOrNull(ts_map, leader_uuid);
   ASSERT_NE(nullptr, leader_tsd);



(kudu) branch master updated: [client] add ScanTokenStaleRaftMembershipTest

2024-07-23 Thread alexey
This is an automated email from the ASF dual-hosted git repository.

alexey pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/kudu.git


The following commit(s) were added to refs/heads/master by this push:
 new e44e0d489 [client] add ScanTokenStaleRaftMembershipTest
e44e0d489 is described below

commit e44e0d4892b0e2469a18aefb78062f5aa2e1799c
Author: Alexey Serbin 
AuthorDate: Thu Jul 11 19:40:36 2024 -0700

[client] add ScanTokenStaleRaftMembershipTest

This patch adds a new test scenario TabletLeaderChange into the newly
added ScanTokenStaleRaftMembershipTest fixture.  The motivation for this
patch was a request to clarify on the Kudu C++ client's behavior in
particular scenarios, which on itself was in the context of a follow-up
to KUDU-3349.

Change-Id: I6ce3d549d4ab2502c58deae1250b49ba16bbc914
Reviewed-on: http://gerrit.cloudera.org:8080/21580
Reviewed-by: Ashwani Raina 
Reviewed-by: Abhishek Chennaka 
Tested-by: Alexey Serbin 
---
 src/kudu/client/scan_token-test.cc | 166 +
 1 file changed, 166 insertions(+)

diff --git a/src/kudu/client/scan_token-test.cc 
b/src/kudu/client/scan_token-test.cc
index 0caba95ec..169baeefd 100644
--- a/src/kudu/client/scan_token-test.cc
+++ b/src/kudu/client/scan_token-test.cc
@@ -24,6 +24,7 @@
 #include 
 #include 
 #include 
+#include 
 #include 
 #include 
 #include 
@@ -53,6 +54,7 @@
 #include "kudu/gutil/ref_counted.h"
 #include "kudu/gutil/stl_util.h"
 #include "kudu/gutil/strings/substitute.h"
+#include "kudu/integration-tests/cluster_itest_util.h"
 #include "kudu/integration-tests/test_workload.h"
 #include "kudu/master/catalog_manager.h"
 #include "kudu/master/master.h"
@@ -84,6 +86,8 @@ using kudu::client::KuduTableCreator;
 using kudu::client::sp::shared_ptr;
 using kudu::cluster::InternalMiniCluster;
 using kudu::cluster::InternalMiniClusterOptions;
+using kudu::itest::TServerDetails;
+using kudu::itest::TabletServerMap;
 using kudu::master::CatalogManager;
 using kudu::master::TabletInfo;
 using kudu::tablet::TabletReplica;
@@ -1578,6 +1582,168 @@ TEST_F(ScanTokenTest, TestMasterRequestsNoMetadata) {
   ASSERT_EQ(init_location_requests + 1, NumGetTableLocationsRequests());
 }
 
+class ScanTokenStaleRaftMembershipTest : public ScanTokenTest {
+ protected:
+  void SetUp() override {
+NO_FATALS(KuduTest::SetUp());
+
+InternalMiniClusterOptions opt;
+opt.num_tablet_servers = 3;
+// Set up the mini cluster
+cluster_.reset(new InternalMiniCluster(env_, std::move(opt)));
+ASSERT_OK(cluster_->Start());
+ASSERT_OK(cluster_->CreateClient(nullptr, _));
+  }
+};
+
+// A test scenario to verify how the client's metacache behaves when a leader
+// replica changes, given that the metacache was originally populated
+// from a scan token. The information on the replicas' Raft configuration
+// becomes outdated by the time a write request is sent out, so the client
+// should find a newly elected leader replica and retry the write request
+// with the new leader replica.
+TEST_F(ScanTokenStaleRaftMembershipTest, TabletLeaderChange) {
+  constexpr const char* const kTableName = "scan-token-stale-tablet-config";
+  constexpr const char* const kKey = "key";
+
+  KuduSchema schema;
+  {
+KuduSchemaBuilder builder;
+
builder.AddColumn(kKey)->NotNull()->Type(KuduColumnSchema::INT64)->PrimaryKey();
+builder.AddColumn("col")->Nullable()->Type(KuduColumnSchema::INT64);
+ASSERT_OK(builder.Build());
+  }
+
+  shared_ptr table;
+  {
+// Create a table of RF=3 and a single range partition [-100, 100).
+unique_ptr tc(client_->NewTableCreator());
+tc->table_name(kTableName);
+tc->schema();
+tc->num_replicas(3);
+tc->set_range_partition_columns({ kKey });
+
+{
+  unique_ptr lb(schema.NewRow());
+  ASSERT_OK(lb->SetInt64(kKey, -100));
+  unique_ptr ub(schema.NewRow());
+  ASSERT_OK(ub->SetInt64(kKey, 100));
+  tc->add_range_partition(lb.release(), ub.release());
+}
+ASSERT_OK(tc->Create());
+ASSERT_OK(client_->OpenTable(kTableName, ));
+  }
+
+  unique_ptr token;
+  {
+// Build scan token(s), embedding information on the tablet locations and
+// replicas' Raft roles.
+vector tokens;
+KuduScanTokenBuilder builder(table.get());
+ASSERT_OK(builder.IncludeTableMetadata(true));
+ASSERT_OK(builder.IncludeTabletMetadata(true));
+ASSERT_OK(builder.Build());
+ASSERT_EQ(1, tokens.size());
+token.reset(tokens.front());
+  }
+
+  shared_ptr new_client;
+  ASSERT_OK(cluster_->CreateClient(nullptr, _client));
+
+  {
+// List the tables to prevent counting initialization RPCs.
+vector tables;
+ASSERT_OK(new_client->ListTables());
+  }
+
+  const auto init_schema_requests 

(kudu) branch master updated: [util] a small clean up on the Throttler class

2024-07-21 Thread alexey
This is an automated email from the ASF dual-hosted git repository.

alexey pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/kudu.git


The following commit(s) were added to refs/heads/master by this push:
 new 274eadfd7 [util] a small clean up on the Throttler class
274eadfd7 is described below

commit 274eadfd79cb2ffcaa0c016376469d70036e11ca
Author: Alexey Serbin 
AuthorDate: Fri Jul 19 15:38:57 2024 -0700

[util] a small clean up on the Throttler class

The motivation behind this patch was an idea of making the API of the
Throttler class more robust after reviewing a changelist that used the
functionality of the Throttler.  I have doubts that the current
implementation of the Throttler behaves the way one would expect
(there is just spotty test coverage for that), but it's another story.

Change-Id: I43d60323c3d84da896c1a5429dfb7d461a24f9b2
Reviewed-on: http://gerrit.cloudera.org:8080/21603
Reviewed-by: Yingchun Lai 
Tested-by: Alexey Serbin 
---
 src/kudu/tablet/tablet.cc   |  5 ++-
 src/kudu/tools/table_scanner.cc |  6 ++--
 src/kudu/tools/tool_action_local_replica.cc |  3 +-
 src/kudu/tserver/tablet_copy_client-test.cc |  1 -
 src/kudu/tserver/tablet_copy_client.cc  |  4 +--
 src/kudu/util/throttler-test.cc | 22 
 src/kudu/util/throttler.cc  | 48 -
 src/kudu/util/throttler.h   | 55 ++---
 8 files changed, 89 insertions(+), 55 deletions(-)

diff --git a/src/kudu/tablet/tablet.cc b/src/kudu/tablet/tablet.cc
index d6d811d6a..909d5df0e 100644
--- a/src/kudu/tablet/tablet.cc
+++ b/src/kudu/tablet/tablet.cc
@@ -386,8 +386,7 @@ Tablet::Tablet(scoped_refptr metadata,
   FLAGS_tablet_compaction_budget_mb, metrics_.get()));
 
   if (FLAGS_tablet_throttler_rpc_per_sec > 0 || 
FLAGS_tablet_throttler_bytes_per_sec > 0) {
-throttler_.reset(new Throttler(MonoTime::Now(),
-   FLAGS_tablet_throttler_rpc_per_sec,
+throttler_.reset(new Throttler(FLAGS_tablet_throttler_rpc_per_sec,
FLAGS_tablet_throttler_bytes_per_sec,
FLAGS_tablet_throttler_burst_factor));
   }
@@ -1795,7 +1794,7 @@ bool Tablet::ShouldThrottleAllow(int64_t bytes) {
   if (!throttler_) {
 return true;
   }
-  return throttler_->Take(MonoTime::Now(), 1, bytes);
+  return throttler_->Take(1, bytes);
 }
 
 Status Tablet::PickRowSetsToCompact(RowSetsInCompactionOrFlush *picked,
diff --git a/src/kudu/tools/table_scanner.cc b/src/kudu/tools/table_scanner.cc
index eebb94a41..fac0ce949 100644
--- a/src/kudu/tools/table_scanner.cc
+++ b/src/kudu/tools/table_scanner.cc
@@ -582,7 +582,7 @@ TableScanner::TableScanner(
   out_(nullptr) {
   CHECK_OK(SetReplicaSelection(FLAGS_replica_selection));
   if (FLAGS_table_copy_throttler_bytes_per_sec > 0) {
-throttler_ = std::make_shared(MonoTime::Now(), 0,
+throttler_ = std::make_shared(Throttler::kNoLimit,
  
FLAGS_table_copy_throttler_bytes_per_sec,
  
FLAGS_table_copy_throttler_burst_factor);
   }
@@ -611,9 +611,9 @@ Status TableScanner::ScanData(const vector& 
tokens,
   // Limit table copy speed.
   if (throttler_) {
 SCOPED_LOG_SLOW_EXECUTION(WARNING, 1000, "Table copy throttler");
-while (!throttler_->Take(MonoTime::Now(), 0,
+while (!throttler_->Take(0,
  batch.direct_data().size() + 
batch.indirect_data().size())) {
-  SleepFor(MonoDelta::FromMilliseconds(10));
+  SleepFor(MonoDelta::FromMicroseconds(Throttler::kRefillPeriodMicros 
/ 2));
 }
   }
   RETURN_NOT_OK(cb(batch));
diff --git a/src/kudu/tools/tool_action_local_replica.cc 
b/src/kudu/tools/tool_action_local_replica.cc
index 2449db455..5c15d6bbd 100644
--- a/src/kudu/tools/tool_action_local_replica.cc
+++ b/src/kudu/tools/tool_action_local_replica.cc
@@ -334,8 +334,7 @@ class TabletCopier {
 
 shared_ptr throttler;
 if (FLAGS_tablet_copy_throttler_bytes_per_sec > 0) {
-  throttler = std::make_shared(MonoTime::Now(),
-  0,
+  throttler = std::make_shared(0,
   
FLAGS_tablet_copy_throttler_bytes_per_sec,
   
FLAGS_tablet_copy_throttler_burst_factor);
 }
diff --git a/src/kudu/tserver/tablet_copy_client-test.cc 
b/src/kudu/tserver/tablet_copy_client-test.cc
index a53f2aa31..fadaca536 100644
--- a/src/kudu/tserver/tablet_copy_client-test.cc
+++ b/src/kudu/tserver/tablet_copy_client-test.cc
@@ -319,7 +319,6 @@ class TabletCopyThrottlerTest : public TabletCopyClientTest 
{
   TabletCopyThrottlerTest() {
 mode_ = TabletCopyMode::REMO

(kudu) 03/03: KUDU-3371 Add NO_ROCKSDB build option

2024-07-16 Thread alexey
This is an automated email from the ASF dual-hosted git repository.

alexey pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/kudu.git

commit 50171fc752911c8b83972bd57d7873f5cbcc3b7c
Author: Yingchun Lai 
AuthorDate: Sun Jun 30 23:24:04 2024 +0800

KUDU-3371 Add NO_ROCKSDB build option

This patch adds a build option 'NO_ROCKSDB'. Now it's
possible to disable building 'logr' block manager and
linking librocksdb if setting -DNO_ROCKSDB=1 explicitly.

By default, NO_ROCKSDB is not set.

Change-Id: Ie9cfb5e928d6fb995ac667533a3651cad91010c7
Reviewed-on: http://gerrit.cloudera.org:8080/21560
Tested-by: Alexey Serbin 
Reviewed-by: Alexey Serbin 
---
 CMakeLists.txt  | 27 +++--
 src/kudu/benchmarks/CMakeLists.txt  | 20 
 src/kudu/client/CMakeLists.txt  |  7 --
 src/kudu/consensus/CMakeLists.txt   |  5 +++-
 src/kudu/fs/CMakeLists.txt  | 14 ---
 src/kudu/fs/block_manager-stress-test.cc|  4 
 src/kudu/fs/block_manager-test.cc   | 14 ++-
 src/kudu/fs/block_manager.h |  7 +-
 src/kudu/fs/data_dirs.cc|  2 ++
 src/kudu/fs/dir_manager.cc  | 31 ++---
 src/kudu/fs/dir_manager.h   | 20 
 src/kudu/fs/fs_manager-test.cc  | 26 +
 src/kudu/fs/fs_manager.cc   | 28 ++
 src/kudu/fs/fs_report.cc|  6 +
 src/kudu/fs/fs_report.h |  4 
 src/kudu/fs/log_block_manager-test-util.cc  | 10 
 src/kudu/fs/log_block_manager-test.cc   | 21 -
 src/kudu/fs/log_block_manager.cc| 16 +
 src/kudu/fs/log_block_manager.h |  8 +++
 src/kudu/integration-tests/CMakeLists.txt   |  6 -
 src/kudu/integration-tests/ts_recovery-itest.cc |  3 ++-
 src/kudu/server/CMakeLists.txt  | 10 +---
 src/kudu/tablet/compaction-test.cc  |  2 +-
 src/kudu/tools/CMakeLists.txt   |  5 +++-
 src/kudu/tools/kudu-tool-test.cc|  4 
 src/kudu/util/CMakeLists.txt| 16 +
 26 files changed, 245 insertions(+), 71 deletions(-)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index a8a68bb95..0f6201447 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -1279,12 +1279,19 @@ ADD_THIRDPARTY_LIB(boost_date_time
 SHARED_LIB "${BOOST_DATE_TIME_SHARED_LIB}")
 
 ## rocksdb
-find_package(Rocksdb REQUIRED)
-include_directories(SYSTEM ${ROCKSDB_INCLUDE_DIR})
-ADD_THIRDPARTY_LIB(rocksdb
-STATIC_LIB "${ROCKSDB_STATIC_LIB}"
-SHARED_LIB "${ROCKSDB_SHARED_LIB}"
-DEPS snappy)
+# The 'logr' block manager will be built if not disabled explicitly.
+if("${NO_ROCKSDB}" STREQUAL "" OR NOT NO_ROCKSDB)
+  set(NO_ROCKSDB 0)
+  find_package(Rocksdb REQUIRED)
+  include_directories(SYSTEM ${ROCKSDB_INCLUDE_DIR})
+  ADD_THIRDPARTY_LIB(rocksdb
+  STATIC_LIB "${ROCKSDB_STATIC_LIB}"
+  SHARED_LIB "${ROCKSDB_SHARED_LIB}"
+  DEPS snappy)
+else()
+  add_definitions(-DNO_ROCKSDB)
+  set(NO_ROCKSDB 1)
+endif()
 
 
 # Enable sized deallocation where supported.
@@ -1338,7 +1345,7 @@ if ("${KUDU_USE_ASAN}" OR "${KUDU_USE_TSAN}" OR 
"${KUDU_USE_UBSAN}")
 endif()
 set(KUDU_TEST_LINK_LIBS ${KUDU_MIN_TEST_LIBS})
 
-# This macro initializes KUDU_MIN_TEST_LIBS to KUDU_MIN_TEST_LIBS and
+# This macro initializes KUDU_TEST_LINK_LIBS to KUDU_MIN_TEST_LIBS and
 # appends the passed list of libraries to the end. This ensures that
 # KUDU_MIN_TEST_LIBS is linked first.
 macro(SET_KUDU_TEST_LINK_LIBS)
@@ -1346,6 +1353,12 @@ macro(SET_KUDU_TEST_LINK_LIBS)
   list(APPEND KUDU_TEST_LINK_LIBS ${ARGN})
 endmacro()
 
+# This macro appends the passed list of libraries to the end of
+# KUDU_TEST_LINK_LIBS.
+macro(ADD_KUDU_TEST_LINK_LIBS)
+  list(APPEND KUDU_TEST_LINK_LIBS ${ARGN})
+endmacro()
+
 # Use "thin archives" for our static libraries. We only use static libraries
 # internal to our own build, so thin ones are just as good and much smaller.
 if (${CMAKE_SYSTEM_NAME} STREQUAL "Linux")
diff --git a/src/kudu/benchmarks/CMakeLists.txt 
b/src/kudu/benchmarks/CMakeLists.txt
index 73b54462c..f33a21dd3 100644
--- a/src/kudu/benchmarks/CMakeLists.txt
+++ b/src/kudu/benchmarks/CMakeLists.txt
@@ -33,15 +33,21 @@ target_link_libraries(tpch
 add_executable(tpch1 tpch/tpch1.cc)
 target_link_libraries(tpch1
   ${KUDU_MIN_TEST_LIBS}
-  tpch
-  rocksdb)
+  tpch)
+if(NOT NO_ROCKSDB)
+  target_link_libraries(tpch1
+rocksdb)
+endi

(kudu) branch master updated (eedce87ca -> 50171fc75)

2024-07-16 Thread alexey
This is an automated email from the ASF dual-hosted git repository.

alexey pushed a change to branch master
in repository https://gitbox.apache.org/repos/asf/kudu.git


from eedce87ca [cfile] allocate CFileWriter field on the stack when possible
 new 079bf1d71 Fix cache cleaning in dense_node-itest
 new d91d5c95d [tool] Add '--columns' param to 'table list'
 new 50171fc75 KUDU-3371 Add NO_ROCKSDB build option

The 3 revisions listed above as "new" are entirely new to this
repository and will be described in separate emails.  The revisions
listed as "add" were already present in the repository and have only
been added to this reference.


Summary of changes:
 CMakeLists.txt  | 27 ++---
 src/kudu/benchmarks/CMakeLists.txt  | 20 +--
 src/kudu/client/CMakeLists.txt  |  7 ++-
 src/kudu/client/client-internal.cc  |  1 +
 src/kudu/client/client-internal.h   |  1 +
 src/kudu/consensus/CMakeLists.txt   |  5 +-
 src/kudu/fs/CMakeLists.txt  | 14 -
 src/kudu/fs/block_manager-stress-test.cc|  4 ++
 src/kudu/fs/block_manager-test.cc   | 14 -
 src/kudu/fs/block_manager.h |  7 ++-
 src/kudu/fs/data_dirs.cc|  2 +
 src/kudu/fs/dir_manager.cc  | 31 ++-
 src/kudu/fs/dir_manager.h   | 20 +--
 src/kudu/fs/fs_manager-test.cc  | 26 +
 src/kudu/fs/fs_manager.cc   | 28 ++
 src/kudu/fs/fs_report.cc|  6 ++
 src/kudu/fs/fs_report.h |  4 ++
 src/kudu/fs/log_block_manager-test-util.cc  | 10 
 src/kudu/fs/log_block_manager-test.cc   | 21 ++-
 src/kudu/fs/log_block_manager.cc| 16 ++
 src/kudu/fs/log_block_manager.h |  8 +++
 src/kudu/integration-tests/CMakeLists.txt   |  6 +-
 src/kudu/integration-tests/dense_node-itest.cc  |  5 +-
 src/kudu/integration-tests/ts_recovery-itest.cc |  3 +-
 src/kudu/server/CMakeLists.txt  | 10 +++-
 src/kudu/tablet/compaction-test.cc  |  2 +-
 src/kudu/tools/CMakeLists.txt   |  5 +-
 src/kudu/tools/kudu-tool-test.cc| 73 +
 src/kudu/tools/tool_action_table.cc | 64 ++
 src/kudu/util/CMakeLists.txt| 16 --
 src/kudu/util/env.h |  3 +
 src/kudu/util/env_posix.cc  | 21 +++
 src/kudu/util/os-util.cc| 15 ++---
 src/kudu/util/os-util.h |  2 +
 34 files changed, 413 insertions(+), 84 deletions(-)



(kudu) 02/03: [tool] Add '--columns' param to 'table list'

2024-07-16 Thread alexey
This is an automated email from the ASF dual-hosted git repository.

alexey pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/kudu.git

commit d91d5c95dab38770890cac6a30be63f80eb82fec
Author: zchovan 
AuthorDate: Mon Jun 10 17:59:51 2024 +0200

[tool] Add '--columns' param to 'table list'

Currently there is no easy way to get table UUIDs from the kudu CLI,
this patch adds the '--columns' optional parameter to the 'kudu table list'
command that works similiar to 'kudu master/tserver list'.
The available columns are: 'id', 'name', 'num_tablets',
'num_replicas', 'live_row_count'.

Change-Id: I324b920e6feb6139e7d884e3cf08069b0cb922a4
Reviewed-on: http://gerrit.cloudera.org:8080/21496
Reviewed-by: Wang Xixu <1450306...@qq.com>
Tested-by: Marton Greber 
Reviewed-by: Marton Greber 
---
 src/kudu/client/client-internal.cc  |  1 +
 src/kudu/client/client-internal.h   |  1 +
 src/kudu/tools/kudu-tool-test.cc| 69 +
 src/kudu/tools/tool_action_table.cc | 64 ++
 4 files changed, 135 insertions(+)

diff --git a/src/kudu/client/client-internal.cc 
b/src/kudu/client/client-internal.cc
index f5c54a064..c5c0e27d8 100644
--- a/src/kudu/client/client-internal.cc
+++ b/src/kudu/client/client-internal.cc
@@ -512,6 +512,7 @@ Status KuduClient::Data::ListTablesWithInfo(KuduClient* 
client,
   }
   for (const auto& table : resp.tables()) {
 TableInfo info;
+if (table.has_id()) info.id = table.id();
 info.table_name = table.name();
 info.live_row_count = table.has_live_row_count() ? table.live_row_count() 
: 0;
 info.num_tablets = table.has_num_tablets() ? table.num_tablets() : 0;
diff --git a/src/kudu/client/client-internal.h 
b/src/kudu/client/client-internal.h
index 56c0965c0..6efcd78b3 100644
--- a/src/kudu/client/client-internal.h
+++ b/src/kudu/client/client-internal.h
@@ -154,6 +154,7 @@ class KuduClient::Data {
   };
 
   struct TableInfo {
+std::string id;
 std::string table_name;
 uint64_t live_row_count;
 int num_tablets;
diff --git a/src/kudu/tools/kudu-tool-test.cc b/src/kudu/tools/kudu-tool-test.cc
index df689edf3..8ed943fde 100644
--- a/src/kudu/tools/kudu-tool-test.cc
+++ b/src/kudu/tools/kudu-tool-test.cc
@@ -5370,6 +5370,75 @@ TEST_F(ToolTest, TestMasterList) {
   ASSERT_STR_CONTAINS(out, "VOTER");
 }
 
+TEST_F(ToolTest, TestTableList) {
+  ExternalMiniClusterOptions opts;
+  NO_FATALS(StartExternalMiniCluster(std::move(opts)));
+  shared_ptr client;
+  ASSERT_OK(cluster_->CreateClient(nullptr, ));
+
+  string master_addr = cluster_->master()->bound_rpc_addr().ToString();
+
+  constexpr const char* const kTableName = "kudu.table";
+
+  // Create a table.
+  TestWorkload workload(cluster_.get());
+  workload.set_table_name(kTableName);
+  workload.set_num_replicas(1);
+  workload.Setup();
+
+  shared_ptr table;
+  ASSERT_OK(client->OpenTable(kTableName, ));
+
+  // Confirm that the simple table listing works
+  {
+string out;
+NO_FATALS(RunActionStdoutString(
+Substitute("table list $0", master_addr),
+));
+
+ASSERT_STR_CONTAINS(out, table->name());
+ASSERT_STR_NOT_CONTAINS(out, table->id());
+ASSERT_STR_NOT_CONTAINS(out, "num_replicas");
+  }
+
+  // Confirm that the --show_table_info flag works
+  {
+string out;
+NO_FATALS(RunActionStdoutString(
+Substitute("table list $0 --show_table_info", master_addr),
+));
+
+ASSERT_STR_CONTAINS(out, table->name());
+ASSERT_STR_NOT_CONTAINS(out, table->id());
+ASSERT_STR_CONTAINS(out, "num_tablets");
+ASSERT_STR_CONTAINS(out, "num_replicas");
+ASSERT_STR_CONTAINS(out, "live_row_count");
+  }
+
+  // Confirm that the --columns flag works
+  {
+string out;
+NO_FATALS(RunActionStdoutString(
+Substitute("table list $0 
--columns=id,name,live_row_count,num_tablets,num_replicas",
+   master_addr),
+));
+
+ASSERT_STR_CONTAINS(out, table->id());
+ASSERT_STR_CONTAINS(out, table->name());
+ASSERT_STR_CONTAINS(out, "num_replicas");
+  }
+
+  // Confirm that wrong column name doesn't crash the tool
+  {
+string stderr;
+Status s = RunActionStderrString(
+Substitute("table list $0 
--columns=VeryBadAbsolutelyIncorrectColumName", master_addr),
+);
+ASSERT_FALSE(s.ok());
+ASSERT_STR_CONTAINS(stderr, "Invalid column name");
+  }
+}
+
 // Operate on Kudu tables:
 // (1)delete a table
 // (2)rename a table
diff --git a/src/kudu/tools/tool_action_table.cc 
b/src/kudu/tools/tool_action_table.cc
index 9d8ae7885..b6901d37a 100644
--- a/src/kudu/tools/tool_action_table.cc
+++ b/src/kudu/tools/tool_action_table.cc
@@ -110,6 +110,8 @@ using std::vector;
 using strings::Split;
 using strings

(kudu) 01/03: Fix cache cleaning in dense_node-itest

2024-07-16 Thread alexey
This is an automated email from the ASF dual-hosted git repository.

alexey pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/kudu.git

commit 079bf1d71a39134b9abb4e1c250ce7988230797e
Author: Zoltan Martonka 
AuthorDate: Wed Jun 26 09:20:39 2024 +

Fix cache cleaning in dense_node-itest

Running dense_node-itest with --measure_startup_drop_caches (as
src/kudu/scripts/benchmarks.sh does) fails on Ubuntu 22.04. We try to
open the /proc/sys/vm/drop_caches file with O_RDWR mode to write "3"
into it. On Ubuntu 18.04, the file seems to have read permissions too,
but on 22.04 it is properly set to --w---, so we fail to do so.

Change-Id: I1f36f5a97d9a032aeb495989b4dc05191bf66425
Reviewed-on: http://gerrit.cloudera.org:8080/21509
Reviewed-by: Zoltan Chovan 
Reviewed-by: Wang Xixu <1450306...@qq.com>
Tested-by: Marton Greber 
Reviewed-by: Marton Greber 
---
 src/kudu/integration-tests/dense_node-itest.cc |  5 ++---
 src/kudu/util/env.h|  3 +++
 src/kudu/util/env_posix.cc | 21 +
 src/kudu/util/os-util.cc   | 15 +--
 src/kudu/util/os-util.h|  2 ++
 5 files changed, 33 insertions(+), 13 deletions(-)

diff --git a/src/kudu/integration-tests/dense_node-itest.cc 
b/src/kudu/integration-tests/dense_node-itest.cc
index afd849e6a..212181eac 100644
--- a/src/kudu/integration-tests/dense_node-itest.cc
+++ b/src/kudu/integration-tests/dense_node-itest.cc
@@ -42,6 +42,7 @@
 #include "kudu/util/env.h"
 #include "kudu/util/metrics.h"
 #include "kudu/util/monotime.h"
+#include "kudu/util/os-util.h"
 #include "kudu/util/status.h"
 #include "kudu/util/stopwatch.h"
 #include "kudu/util/test_macros.h"
@@ -247,9 +248,7 @@ TEST_P(DenseNodeTest, RunTest) {
   unique_ptr f;
   WritableFileOptions opts;
   opts.mode = Env::MUST_EXIST;
-  ASSERT_OK(env_->NewWritableFile(opts, "/proc/sys/vm/drop_caches", ));
-  ASSERT_OK(f->Append("3\n"));
-  ASSERT_OK(f->Close());
+  ASSERT_OK(FreeSlabObjectsAndPagecache());
 }
   }
 
diff --git a/src/kudu/util/env.h b/src/kudu/util/env.h
index 9f941b3f7..a69281970 100644
--- a/src/kudu/util/env.h
+++ b/src/kudu/util/env.h
@@ -396,6 +396,9 @@ class Env {
   // Set the raw server encryption key. The key size is in bits.
   virtual void SetEncryptionKey(const uint8_t* key, size_t key_size) = 0;
 
+  // Used for manipulating the proc filesystem
+  virtual Status EchoToFile(const char* file_path, const char* data_ptr, int 
data_size) = 0;
+
  private:
   DISALLOW_COPY_AND_ASSIGN(Env);
 };
diff --git a/src/kudu/util/env_posix.cc b/src/kudu/util/env_posix.cc
index 360df4f25..1b0ddd635 100644
--- a/src/kudu/util/env_posix.cc
+++ b/src/kudu/util/env_posix.cc
@@ -2464,6 +2464,27 @@ class PosixEnv : public Env {
 return Status::OK();
   }
 
+  Status EchoToFile(const char* file_path, const char* data_ptr, int 
data_size) override {
+int f;
+RETRY_ON_EINTR(f, open(file_path, O_WRONLY));
+if (f == -1)
+  return IOError(file_path, errno);
+ssize_t write_ret;
+RETRY_ON_EINTR(write_ret, write(f, data_ptr, data_size));
+if (write_ret == -1) {
+  // Try to close it anyway, but return the error during write().
+  int saved_errno = errno;
+  int dont_care;
+  RETRY_ON_EINTR(dont_care, close(f));
+  return IOError(file_path, saved_errno);
+}
+int close_ret;
+RETRY_ON_EINTR(close_ret, close(f));
+if (close_ret == -1)
+  return IOError(file_path, errno);
+return Status::OK();
+  }
+
   std::optional encryption_key_;
 };
 
diff --git a/src/kudu/util/os-util.cc b/src/kudu/util/os-util.cc
index 85ceb9f4b..bedfcd0fb 100644
--- a/src/kudu/util/os-util.cc
+++ b/src/kudu/util/os-util.cc
@@ -24,7 +24,6 @@
 
 #include "kudu/util/os-util.h"
 
-#include 
 #include 
 #include 
 
@@ -36,7 +35,6 @@
 
 #include 
 
-#include "kudu/gutil/macros.h"
 #include "kudu/gutil/strings/numbers.h"
 #include "kudu/gutil/strings/split.h"
 #include "kudu/gutil/strings/stringpiece.h"
@@ -133,14 +131,11 @@ void DisableCoreDumps() {
   // is set to a pipe rather than a file, it's not sufficient. Setting
   // this pattern results in piping a very minimal dump into the core
   // processor (eg abrtd), thus speeding up the crash.
-  int f;
-  RETRY_ON_EINTR(f, open("/proc/self/coredump_filter", O_WRONLY));
-  if (f >= 0) {
-ssize_t ret;
-RETRY_ON_EINTR(ret, write(f, "", 8));
-int close_ret;
-RETRY_ON_EINTR(close_ret, close(f));
-  }
+  (void)Env::Default()->EchoToFile("/proc/self/coredump_filter", "", 
8);
+}
+
+Status FreeSlabObjectsAndPagecache() {
+  return Env::Default()->EchoToFile("/pro

(kudu) branch master updated: KUDU-3371 check for RocksDB dir presence upon opening FSManager

2024-06-19 Thread alexey
This is an automated email from the ASF dual-hosted git repository.

alexey pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/kudu.git


The following commit(s) were added to refs/heads/master by this push:
 new 794a681bf KUDU-3371 check for RocksDB dir presence upon opening 
FSManager
794a681bf is described below

commit 794a681bfdb83fd902f953da6558fe462ee17aba
Author: Yingchun Lai 
AuthorDate: Mon Apr 8 00:29:35 2024 +0800

KUDU-3371 check for RocksDB dir presence upon opening FSManager

When using RocksDB to store LBM metadata, specify the
create_if_missing and error_if_exists options of
rocksdb::Options to make sure we can open the RocksDB
directory correctly. When creating a Kudu data directory,
open it with the options enabled, otherwise, open it with
the options disabled.

Change-Id: Iab4bffc6b902ab96edf0ca6c44f51c8db2670d52
Reviewed-on: http://gerrit.cloudera.org:8080/21295
Tested-by: Kudu Jenkins
Reviewed-by: Alexey Serbin 
---
 src/kudu/fs/data_dirs.cc |   7 +-
 src/kudu/fs/dir_manager.cc   |  42 +---
 src/kudu/fs/dir_manager.h|  22 ---
 src/kudu/fs/fs_manager-test.cc   | 138 ++-
 src/kudu/fs/log_block_manager.cc |  11 +---
 5 files changed, 190 insertions(+), 30 deletions(-)

diff --git a/src/kudu/fs/data_dirs.cc b/src/kudu/fs/data_dirs.cc
index 730d284a9..f92d6a7c8 100644
--- a/src/kudu/fs/data_dirs.cc
+++ b/src/kudu/fs/data_dirs.cc
@@ -205,7 +205,8 @@ std::unique_ptr DataDirManager::CreateNewDir(
 std::string dir, std::unique_ptr metadata_file,
 std::unique_ptr pool) {
   if (FLAGS_block_manager == "logr") {
-return std::make_unique(env, metrics, fs_type, std::move(dir),
+bool newly_created = ContainsKey(created_fs_dir_paths_, dir);
+return std::make_unique(env, metrics, fs_type, newly_created, 
std::move(dir),
 std::move(metadata_file), std::move(pool));
   }
   return std::make_unique(env, metrics, fs_type, std::move(dir),
@@ -231,6 +232,10 @@ Status DataDirManager::OpenExistingForTests(Env* env,
   for (const auto& r : data_fs_roots) {
 roots.push_back({ r, Status::OK() });
   }
+
+  // Reset the existing DataDirManager before opening the new one to release 
resources
+  // (e.g. RocksDB 'LOCK' file when --block_manager=logr) held by the existing 
one.
+  dd_manager->reset();
   return DataDirManager::OpenExisting(env, std::move(roots), opts, dd_manager);
 }
 
diff --git a/src/kudu/fs/dir_manager.cc b/src/kudu/fs/dir_manager.cc
index cc5282c11..81d120e9c 100644
--- a/src/kudu/fs/dir_manager.cc
+++ b/src/kudu/fs/dir_manager.cc
@@ -208,12 +208,31 @@ int Dir::reserved_bytes() {
 shared_ptr RdbDir::s_block_cache_;
 RdbDir::RdbDir(Env* env, DirMetrics* metrics,
FsType fs_type,
+   bool newly_created,
string dir,
unique_ptr metadata_file,
unique_ptr pool)
-: Dir(env, metrics, fs_type, std::move(dir), std::move(metadata_file), 
std::move(pool)) {}
+: Dir(env, metrics, fs_type, std::move(dir), std::move(metadata_file), 
std::move(pool)) {
+  if (!metadata_file_->healthy()) {
+LOG(WARNING) << Substitute("Skip initializing rocksdb instance for the 
non-healthy "
+   "directory $0",
+   dir_);
+return;
+  }
+
+  // Initialize the directory only if it's healthy.
+  // Note: the unhealthy directories will be kept, but will be skipped when 
opening block manager.
+  auto s = InitRocksDBInstance(newly_created);
+  if (!s.ok()) {
+s = s.CloneAndPrepend(Substitute("could not initialize $0", dir_));
+LOG(WARNING) << s.ToString();
+// Mark the directory as failed if it could not be initialized.
+DCHECK(metadata_file_->healthy());
+metadata_file_->SetInstanceFailed(s);
+  }
+}
 
-Status RdbDir::Prepare() {
+Status RdbDir::InitRocksDBInstance(bool newly_created) {
   DCHECK_STREQ(FLAGS_block_manager.c_str(), "logr");
   if (db_) {
 // Some unit tests (e.g. BlockManagerTest.PersistenceTest) reopen the 
block manager,
@@ -228,14 +247,15 @@ Status RdbDir::Prepare() {
   // https://github.com/facebook/rocksdb/blob/main/include/rocksdb/options.h
   rocksdb::Options opts;
   // A RocksDB instance is created if it does not exist when opening the Dir.
-  // TODO(yingchun): We should distinguish creating new data directory and 
opening existing data
-  // directory, and set proper options to avoid mishaps.
-  // When creating new data directory, set 
opts.error_if_exists = true.
-  // When opening existing data directory, set 
opts.create_if_missing = false.
-  opts.create_if_missing = true;
+  if (newly_created) {
+  opts.create_if_missing = true;
+  opts.error_if_exists = true;
+  } else {
+  opts.c

(kudu) branch branch-1.17.x updated (08ef2f997 -> b2e4692a0)

2024-06-19 Thread alexey
This is an automated email from the ASF dual-hosted git repository.

alexey pushed a change to branch branch-1.17.x
in repository https://gitbox.apache.org/repos/asf/kudu.git


from 08ef2f997 [ARM] Concurrent binary tree memory barriers fixed.
 new ba76a58ff KUDU-3566 fix summary metrics in Prometheus format
 new b2e4692a0 [client-test] make ClearCacheAndConcurrentWorkload more 
stable in TSAN

The 2 revisions listed above as "new" are entirely new to this
repository and will be described in separate emails.  The revisions
listed as "add" were already present in the repository and have only
been added to this reference.


Summary of changes:
 src/kudu/client/client-test.cc |   7 ++-
 src/kudu/util/metrics-test.cc  |  32 +++-
 src/kudu/util/metrics.cc   | 112 -
 3 files changed, 79 insertions(+), 72 deletions(-)



(kudu) 01/02: KUDU-3566 fix summary metrics in Prometheus format

2024-06-19 Thread alexey
This is an automated email from the ASF dual-hosted git repository.

alexey pushed a commit to branch branch-1.17.x
in repository https://gitbox.apache.org/repos/asf/kudu.git

commit ba76a58ff10f767f630fb30a043532c9edc6dbe6
Author: Alexey Serbin 
AuthorDate: Fri Apr 19 10:58:25 2024 -0700

KUDU-3566 fix summary metrics in Prometheus format

This patch corrects the output of various Kudu metrics backed by HDR
histograms.  From the Prometheus perspective, those metrics are output
as summaries [1], not histograms [2].  It's necessary to mark them
accordingly to avoid misinterpretation of the collected statistics.

I updated corresponding unit tests and verified that the updated output
was properly parsed and interpreted by a Prometheus 2.50.0 instance
running on my macOS laptop.

[1] https://prometheus.io/docs/concepts/metric_types/#summary
[2] https://prometheus.io/docs/concepts/metric_types/#histogram

Change-Id: I1375ddf1b0ecd730327cd44b4955813b80107f7b
Reviewed-on: http://gerrit.cloudera.org:8080/21338
Tested-by: Alexey Serbin 
Reviewed-by: Abhishek Chennaka 
(cherry picked from commit b236d534abeb60520e4568bb4a1452d6674bb597)
  Conflicts:
src/kudu/util/metrics.cc
Reviewed-on: http://gerrit.cloudera.org:8080/21526
Reviewed-by: Yingchun Lai 
---
 src/kudu/util/metrics-test.cc |  32 +++-
 src/kudu/util/metrics.cc  | 112 --
 2 files changed, 73 insertions(+), 71 deletions(-)

diff --git a/src/kudu/util/metrics-test.cc b/src/kudu/util/metrics-test.cc
index 8d598ebc5..d146b7e00 100644
--- a/src/kudu/util/metrics-test.cc
+++ b/src/kudu/util/metrics-test.cc
@@ -649,24 +649,30 @@ TEST_F(MetricsTest, SimpleHistogramMergeTest) {
 }
 
 TEST_F(MetricsTest, HistogramPrometheusTest) {
+  constexpr const char* const kExpectedOutput =
+  "# HELP test_hist foo\n"
+  "# TYPE test_hist summary\n"
+  "test_hist{unit_type=\"milliseconds\", quantile=\"0\"} 1\n"
+  "test_hist{unit_type=\"milliseconds\", quantile=\"0.75\"} 2\n"
+  "test_hist{unit_type=\"milliseconds\", quantile=\"0.95\"} 3\n"
+  "test_hist{unit_type=\"milliseconds\", quantile=\"0.99\"} 4\n"
+  "test_hist{unit_type=\"milliseconds\", quantile=\"0.999\"} 5\n"
+  "test_hist{unit_type=\"milliseconds\", quantile=\"0.\"} 5\n"
+  "test_hist{unit_type=\"milliseconds\", quantile=\"1\"} 5\n"
+  "test_hist_sum 1460\n"
+  "test_hist_count 1000\n";
+
   scoped_refptr hist = METRIC_test_hist.Instantiate(entity_);
+  hist->IncrementBy(1, 700);
+  hist->IncrementBy(2, 200);
+  hist->IncrementBy(3, 50);
+  hist->IncrementBy(4, 40);
+  hist->IncrementBy(5, 10);
 
   ostringstream output;
   PrometheusWriter writer();
   ASSERT_OK(hist->WriteAsPrometheus(, {}));
-
-  const string expected_output = "# HELP test_hist foo\n"
- "# TYPE test_hist histogram\n"
- "test_hist_bucket{unit_type=\"milliseconds\", 
le=\"0.75\"} 0\n"
- "test_hist_bucket{unit_type=\"milliseconds\", 
le=\"0.95\"} 0\n"
- "test_hist_bucket{unit_type=\"milliseconds\", 
le=\"0.99\"} 0\n"
- "test_hist_bucket{unit_type=\"milliseconds\", 
le=\"0.999\"} 0\n"
- "test_hist_bucket{unit_type=\"milliseconds\", 
le=\"0.\"} 0\n"
- "test_hist_bucket{unit_type=\"milliseconds\", 
le=\"+Inf\"} 0\n"
- "test_hist_sum{unit_type=\"milliseconds\"} 
0\n"
- "test_hist_count{unit_type=\"milliseconds\"} 
0\n";
-
-  ASSERT_EQ(expected_output, output.str());
+  ASSERT_EQ(kExpectedOutput, output.str());
 }
 
 TEST_F(MetricsTest, JsonPrintTest) {
diff --git a/src/kudu/util/metrics.cc b/src/kudu/util/metrics.cc
index 4b0f2c50f..e76d635fd 100644
--- a/src/kudu/util/metrics.cc
+++ b/src/kudu/util/metrics.cc
@@ -749,9 +749,18 @@ void MetricPrototype::WriteFields(JsonWriter* writer,
 
 void MetricPrototype::WriteHelpAndType(PrometheusWriter* writer,
const string& prefix) const {
+  static constexpr const char* const kSummary = "summary";
+
+  // The way how HdrHistogram-backed stats are presented in Kudu metrics
+  // corresponds to a 'summary' metric in Prometheus, not a 'histogram' one 
[1].
+  //
+  // [1] https://

(kudu) 02/02: [client-test] make ClearCacheAndConcurrentWorkload more stable in TSAN

2024-06-19 Thread alexey
This is an automated email from the ASF dual-hosted git repository.

alexey pushed a commit to branch branch-1.17.x
in repository https://gitbox.apache.org/repos/asf/kudu.git

commit b2e4692a0f56b9e4f1a4581821f0ca8da7e6be71
Author: Alexey Serbin 
AuthorDate: Mon Jun 17 09:49:59 2024 -0700

[client-test] make ClearCacheAndConcurrentWorkload more stable in TSAN

Change-Id: I1f5ce6d46260d038bf11d1972c7f7299473ab324
Reviewed-on: http://gerrit.cloudera.org:8080/21523
Reviewed-by: Abhishek Chennaka 
Tested-by: Alexey Serbin 
(cherry picked from commit 8ed4db154596136e3ef4fbe27457992c119ed2b6)
Reviewed-on: http://gerrit.cloudera.org:8080/21524
Reviewed-by: Yingchun Lai 
---
 src/kudu/client/client-test.cc | 7 ++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/src/kudu/client/client-test.cc b/src/kudu/client/client-test.cc
index 1b0ccd7e1..59095bd71 100644
--- a/src/kudu/client/client-test.cc
+++ b/src/kudu/client/client-test.cc
@@ -2378,9 +2378,14 @@ TEST_F(ClientTest, TestMetaCacheExpiry) {
 // when alterting a table by adding a new range partition (see
 // KuduTableAlterer::Alter() for details).
 TEST_F(ClientTest, ClearCacheAndConcurrentWorkload) {
+#if defined(THREAD_SANITIZER) || defined(ADDRESS_SANITIZER)
+  constexpr const int64_t kResetIntervalMs = 100;
+#else
+  constexpr const int64_t kResetIntervalMs = 3;
+#endif
   CountDownLatch latch(1);
   thread cache_cleaner([&]() {
-const auto sleep_interval = MonoDelta::FromMilliseconds(3);
+const auto sleep_interval = MonoDelta::FromMilliseconds(kResetIntervalMs);
 while (!latch.WaitFor(sleep_interval)) {
   client_->data_->meta_cache_->ClearCache();
 }



(kudu) branch master updated: KUDU-3580 Fix the crash caused when binaries run on older CPU machines

2024-06-18 Thread alexey
This is an automated email from the ASF dual-hosted git repository.

alexey pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/kudu.git


The following commit(s) were added to refs/heads/master by this push:
 new 1474380f5 KUDU-3580 Fix the crash caused when binaries run on older 
CPU machines
1474380f5 is described below

commit 1474380f5ccfd2f7e78756488d12eb52d2664132
Author: Yingchun Lai 
AuthorDate: Thu Apr 11 12:06:52 2024 +0800

KUDU-3580 Fix the crash caused when binaries run on older CPU machines

After Kudu linking rocksdb, the Kudu binaries may
crash with error "Illegal instruction" when running
on machines which don't support newer CPU
instruction (e.g. AVX512) but were built on a
machine which supports.

This patch enables the PORTABLE [1] option when building
librocksdb to fix the issue.

It should be noted that portable libraries may cause
a slight performance degradation, it's recommend to
disable portable option (by setting PORTABLE environment
variable to OFF when build Kudu thirdparties) if there
is no port requirements.

The PORTABLE option only takes effect on librocksdb
currently, the following content shows the comparation
of the 'db_bench' tool of RocksDB with the '-DPORTABLE'
option enabled and disabled benchmark results:
- The test is similar to Kudu use case, random write and
  sequential read, key and value size is about 40 bytes.
- The tests ran 3 times.
- The binaries are built and run on the same machine
  which supports newer CPU instruction (e.g. AVX512).

PORTABLE:
$ ./db_bench -benchmarks=fillrandom,readseq -num=1000 -key_size=40 
-value_size=40
1.
fillrandom   :   5.237 micros/op 190954 ops/sec 52.369 seconds 1000 
operations;   14.6 MB/s
readseq  :   0.448 micros/op 2231382 ops/sec 2.833 seconds 6322271 
operations;  170.2 MB/s

2.
fillrandom   :   5.236 micros/op 190981 ops/sec 52.361 seconds 1000 
operations;   14.6 MB/s
readseq  :   0.444 micros/op 2252646 ops/sec 2.806 seconds 6321658 
operations;  171.9 MB/s

3.
fillrandom   :   5.182 micros/op 192960 ops/sec 51.824 seconds 1000 
operations;   14.7 MB/s
readseq  :   0.444 micros/op 2252317 ops/sec 2.807 seconds 6323209 
operations;  171.8 MB/s

NON-PORTABLE:
$ ./db_bench -benchmarks=fillrandom,readseq -num=1000 -key_size=40 
-value_size=40
1.
fillrandom   :   5.190 micros/op 192676 ops/sec 51.900 seconds 1000 
operations;   14.7 MB/s
readseq  :   0.391 micros/op 2560051 ops/sec 2.470 seconds 6322786 
operations;  195.3 MB/s

2.
fillrandom   :   5.156 micros/op 193945 ops/sec 51.561 seconds 1000 
operations;   14.8 MB/s
readseq  :   0.404 micros/op 2477956 ops/sec 2.551 seconds 6320644 
operations;  189.1 MB/s

3.
fillrandom   :   5.527 micros/op 180940 ops/sec 55.267 seconds 1000 
operations;   13.8 MB/s
readseq  :   0.407 micros/op 2458297 ops/sec 2.571 seconds 6320885 
operations;  187.6 MB/s

1. https://github.com/facebook/rocksdb/blob/v7.7.3/CMakeLists.txt#L248

Change-Id: Id30ae995c41a592fccbdb822bc1f457c5e6878ac
Reviewed-on: http://gerrit.cloudera.org:8080/21287
Tested-by: Alexey Serbin 
Reviewed-by: Alexey Serbin 
---
 thirdparty/build-definitions.sh | 1 +
 thirdparty/build-thirdparty.sh  | 6 ++
 2 files changed, 7 insertions(+)

diff --git a/thirdparty/build-definitions.sh b/thirdparty/build-definitions.sh
index 4e31d9b3c..1199ecafc 100644
--- a/thirdparty/build-definitions.sh
+++ b/thirdparty/build-definitions.sh
@@ -1198,6 +1198,7 @@ build_rocksdb() {
 CXXFLAGS="$EXTRA_CXXFLAGS -fPIC" \
 cmake \
 -DROCKSDB_BUILD_SHARED=ON \
+-DPORTABLE=$PORTABLE \
 -DWITH_SNAPPY=ON \
 -Dsnappy_ROOT_DIR=$PREFIX \
 -DUSE_RTTI=ON \
diff --git a/thirdparty/build-thirdparty.sh b/thirdparty/build-thirdparty.sh
index 937beeda7..ae32a4f3b 100755
--- a/thirdparty/build-thirdparty.sh
+++ b/thirdparty/build-thirdparty.sh
@@ -35,6 +35,9 @@
 #   * EXTRA_LIBS - additional libraries to link.
 #   * EXTRA_MAKEFLAGS - additional flags passed to make.
 #   * PARALLEL - parallelism to use when compiling (defaults to number of 
cores).
+#   * PORTABLE - whether to build portable libraries, otherwise build native 
libraries. Portable
+#libraries may cause a slight performance degradation, it's 
recommend to disable
+#portable option if there is no port requirements. (defaults 
to ON).
 
 set -ex
 
@@ -204,6 +207,9 @@ else
   exit 1
 fi
 
+### Build portable libraries by default.
+PORTABLE=${PORTABLE:-"ON"}
+
 ### Detect and enable 'ninja' instead of 'make' for faster builds.
 if which ninja-build > /dev/null ; then
   NINJA=ninja-build



(kudu) branch master updated: [client-test] make ClearCacheAndConcurrentWorkload more stable in TSAN

2024-06-17 Thread alexey
This is an automated email from the ASF dual-hosted git repository.

alexey pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/kudu.git


The following commit(s) were added to refs/heads/master by this push:
 new 8ed4db154 [client-test] make ClearCacheAndConcurrentWorkload more 
stable in TSAN
8ed4db154 is described below

commit 8ed4db154596136e3ef4fbe27457992c119ed2b6
Author: Alexey Serbin 
AuthorDate: Mon Jun 17 09:49:59 2024 -0700

[client-test] make ClearCacheAndConcurrentWorkload more stable in TSAN

Change-Id: I1f5ce6d46260d038bf11d1972c7f7299473ab324
Reviewed-on: http://gerrit.cloudera.org:8080/21523
Reviewed-by: Abhishek Chennaka 
Tested-by: Alexey Serbin 
---
 src/kudu/client/client-test.cc | 7 ++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/src/kudu/client/client-test.cc b/src/kudu/client/client-test.cc
index c89fc3855..0f08ebbff 100644
--- a/src/kudu/client/client-test.cc
+++ b/src/kudu/client/client-test.cc
@@ -2445,9 +2445,14 @@ TEST_F(ClientTest, TestMetaCacheExpiry) {
 // when alterting a table by adding a new range partition (see
 // KuduTableAlterer::Alter() for details).
 TEST_F(ClientTest, ClearCacheAndConcurrentWorkload) {
+#if defined(THREAD_SANITIZER) || defined(ADDRESS_SANITIZER)
+  constexpr const int64_t kResetIntervalMs = 100;
+#else
+  constexpr const int64_t kResetIntervalMs = 3;
+#endif
   CountDownLatch latch(1);
   thread cache_cleaner([&]() {
-const auto sleep_interval = MonoDelta::FromMilliseconds(3);
+const auto sleep_interval = MonoDelta::FromMilliseconds(kResetIntervalMs);
 while (!latch.WaitFor(sleep_interval)) {
   client_->data_->meta_cache_->ClearCache();
 }



(kudu) branch master updated: [fs] remove chromium Atomics from FS

2024-06-17 Thread alexey
This is an automated email from the ASF dual-hosted git repository.

alexey pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/kudu.git


The following commit(s) were added to refs/heads/master by this push:
 new 5977402e3 [fs] remove chromium Atomics from FS
5977402e3 is described below

commit 5977402e3701b52888f91b7bb1e351f957e3c562
Author: Alexey Serbin 
AuthorDate: Fri Jun 14 21:08:28 2024 -0700

[fs] remove chromium Atomics from FS

Change-Id: Ie7ef778fd816ffa929166c9621f31ba4a2ea2b50
Reviewed-on: http://gerrit.cloudera.org:8080/21521
Tested-by: Marton Greber 
Reviewed-by: Marton Greber 
Reviewed-by: Zoltan Chovan 
---
 src/kudu/fs/block_manager-stress-test.cc | 32 +--
 src/kudu/fs/file_block_manager.cc| 27 +-
 src/kudu/fs/file_block_manager.h |  8 +--
 src/kudu/fs/log_block_manager-test.cc|  8 +--
 src/kudu/fs/log_block_manager.cc | 93 +---
 src/kudu/fs/log_block_manager.h  |  3 +-
 src/kudu/util/atomic-utils.h | 61 +
 7 files changed, 147 insertions(+), 85 deletions(-)

diff --git a/src/kudu/fs/block_manager-stress-test.cc 
b/src/kudu/fs/block_manager-stress-test.cc
index 14482e8fb..6a8b66e14 100644
--- a/src/kudu/fs/block_manager-stress-test.cc
+++ b/src/kudu/fs/block_manager-stress-test.cc
@@ -15,6 +15,7 @@
 // specific language governing permissions and limitations
 // under the License.
 
+#include 
 #include 
 #include 
 #include 
@@ -47,7 +48,6 @@
 #include "kudu/gutil/ref_counted.h"
 #include "kudu/gutil/strings/split.h"
 #include "kudu/gutil/strings/substitute.h"
-#include "kudu/util/atomic.h"
 #include "kudu/util/countdown_latch.h"
 #include "kudu/util/env.h"
 #include "kudu/util/faststring.h"
@@ -277,13 +277,13 @@ class BlockManagerStressTest : public KuduTest {
 
   // Some performance counters.
 
-  AtomicInt total_blocks_written_;
-  AtomicInt total_bytes_written_;
+  std::atomic total_blocks_written_;
+  std::atomic total_bytes_written_;
 
-  AtomicInt total_blocks_read_;
-  AtomicInt total_bytes_read_;
+  std::atomic total_blocks_read_;
+  std::atomic total_bytes_read_;
 
-  AtomicInt total_blocks_deleted_;
+  std::atomic total_blocks_deleted_;
 };
 
 template 
@@ -359,8 +359,8 @@ void BlockManagerStressTest::WriterThread() {
 }
   }
 
-  total_blocks_written_.IncrementBy(num_blocks_written);
-  total_bytes_written_.IncrementBy(num_bytes_written);
+  total_blocks_written_.fetch_add(num_blocks_written, 
std::memory_order_relaxed);
+  total_bytes_written_.fetch_add(num_bytes_written, std::memory_order_relaxed);
 }
 
 template 
@@ -426,8 +426,8 @@ void BlockManagerStressTest::ReaderThread() {
 num_bytes_read += block_size;
   }
 
-  total_blocks_read_.IncrementBy(num_blocks_read);
-  total_bytes_read_.IncrementBy(num_bytes_read);
+  total_blocks_read_.fetch_add(num_blocks_read, std::memory_order_relaxed);
+  total_bytes_read_.fetch_add(num_bytes_read, std::memory_order_relaxed);
 }
 
 template 
@@ -470,7 +470,7 @@ void BlockManagerStressTest::DeleterThread() {
 num_blocks_deleted += deleted.size();
   }
 
-  total_blocks_deleted_.IncrementBy(num_blocks_deleted);
+  total_blocks_deleted_.fetch_add(num_blocks_deleted, 
std::memory_order_relaxed);
 }
 
 template <>
@@ -558,15 +558,15 @@ TYPED_TEST(BlockManagerStressTest, StressTest) {
   LOG(INFO) << "Printing test totals";
   LOG(INFO) << "";
   LOG(INFO) << Substitute("Wrote $0 blocks ($1 bytes) via $2 threads",
-  this->total_blocks_written_.Load(),
-  this->total_bytes_written_.Load(),
+  this->total_blocks_written_.load(),
+  this->total_bytes_written_.load(),
   FLAGS_num_writer_threads);
   LOG(INFO) << Substitute("Read $0 blocks ($1 bytes) via $2 threads",
-  this->total_blocks_read_.Load(),
-  this->total_bytes_read_.Load(),
+  this->total_blocks_read_.load(),
+  this->total_bytes_read_.load(),
   FLAGS_num_reader_threads);
   LOG(INFO) << Substitute("Deleted $0 blocks via $1 threads",
-  this->total_blocks_deleted_.Load(),
+  this->total_blocks_deleted_.load(),
   FLAGS_num_deleter_threads);
 }
 
diff --git a/src/kudu/fs/file_block_manager.cc 
b/src/kudu/fs/file_block_manager.cc
index e8524fd94..c9a24eb9e 100644
--- a/src/kudu/fs/file_block_manager.cc
+++ b/src/kudu/fs/file_block_manager.cc
@@ -47,7 +47,6 @@
 #include "kudu/gutil/strings/numbers.h"
 #include "kudu/gutil/strings/substitute.h"
 #include "k

(kudu) branch master updated: [rpc] remove last vestiges of chromium Atomics from RPC

2024-06-14 Thread alexey
This is an automated email from the ASF dual-hosted git repository.

alexey pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/kudu.git


The following commit(s) were added to refs/heads/master by this push:
 new 5405d06ee [rpc] remove last vestiges of chromium Atomics from RPC
5405d06ee is described below

commit 5405d06eeccaf1b0eb559e4a531f48160141ed16
Author: Alexey Serbin 
AuthorDate: Thu Jun 13 12:49:58 2024 -0700

[rpc] remove last vestiges of chromium Atomics from RPC

Change-Id: I6039b4a08615339c8f06a4d215a0b1058a9bacea
Reviewed-on: http://gerrit.cloudera.org:8080/21513
Tested-by: Kudu Jenkins
Reviewed-by: Marton Greber 
---
 src/kudu/rpc/proxy.cc |  6 +++---
 src/kudu/rpc/proxy.h  |  4 ++--
 src/kudu/rpc/rpc-bench.cc | 15 +++
 src/kudu/rpc/rpc_stub-test.cc | 17 +
 4 files changed, 17 insertions(+), 25 deletions(-)

diff --git a/src/kudu/rpc/proxy.cc b/src/kudu/rpc/proxy.cc
index 2ec1244f0..8033343b8 100644
--- a/src/kudu/rpc/proxy.cc
+++ b/src/kudu/rpc/proxy.cc
@@ -203,7 +203,7 @@ void Proxy::AsyncRequest(const string& method,
  RpcController* controller,
  const ResponseCallback& callback) {
   DCHECK(!controller->call_) << "Controller should be reset";
-  base::subtle::NoBarrier_Store(_started_, true);
+  is_started_.store(true, std::memory_order_relaxed);
   // TODO(awong): it would be great if we didn't have to heap allocate the
   // payload.
   auto req_payload = RequestPayload::CreateRequestPayload(
@@ -269,13 +269,13 @@ Status Proxy::SyncRequest(const string& method,
 }
 
 void Proxy::set_user_credentials(UserCredentials user_credentials) {
-  DCHECK(base::subtle::NoBarrier_Load(_started_) == false)
+  DCHECK(is_started_.load(std::memory_order_relaxed) == false)
   << "illegal to call set_user_credentials() after request processing has 
started";
   conn_id_.set_user_credentials(std::move(user_credentials));
 }
 
 void Proxy::set_network_plane(string network_plane) {
-  DCHECK(base::subtle::NoBarrier_Load(_started_) == false)
+  DCHECK(is_started_.load(std::memory_order_relaxed) == false)
   << "illegal to call set_network_plane() after request processing has 
started";
   conn_id_.set_network_plane(std::move(network_plane));
 }
diff --git a/src/kudu/rpc/proxy.h b/src/kudu/rpc/proxy.h
index cb822057d..5e7d93c38 100644
--- a/src/kudu/rpc/proxy.h
+++ b/src/kudu/rpc/proxy.h
@@ -16,12 +16,12 @@
 // under the License.
 #pragma once
 
+#include 
 #include 
 #include 
 #include 
 #include 
 
-#include "kudu/gutil/atomicops.h"
 #include "kudu/gutil/macros.h"
 #include "kudu/rpc/connection_id.h"
 #include "kudu/rpc/outbound_call.h"
@@ -189,7 +189,7 @@ class Proxy {
   mutable simple_spinlock lock_;
   ConnectionId conn_id_;
 
-  mutable Atomic32 is_started_;
+  std::atomic is_started_;
 
   DISALLOW_COPY_AND_ASSIGN(Proxy);
 };
diff --git a/src/kudu/rpc/rpc-bench.cc b/src/kudu/rpc/rpc-bench.cc
index ca42fd8ef..145a24b2f 100644
--- a/src/kudu/rpc/rpc-bench.cc
+++ b/src/kudu/rpc/rpc-bench.cc
@@ -31,7 +31,6 @@
 #include 
 #include 
 
-#include "kudu/gutil/atomicops.h"
 #include "kudu/gutil/port.h"
 #include "kudu/gutil/ref_counted.h"
 #include "kudu/gutil/strings/substitute.h"
@@ -96,8 +95,8 @@ class RpcBench : public RpcTestBase {
  public:
   RpcBench()
   : should_run_(true),
-stop_(0)
-  {}
+stop_(0) {
+  }
 
   void SetUp() override {
 RpcTestBase::SetUp();
@@ -149,7 +148,7 @@ class RpcBench : public RpcTestBase {
   friend class ClientAsyncWorkload;
 
   Sockaddr server_addr_;
-  Atomic32 should_run_;
+  atomic should_run_;
   CountDownLatch stop_;
 };
 
@@ -176,7 +175,7 @@ class ClientThread {
 
 AddRequestPB req;
 AddResponsePB resp;
-while (Acquire_Load(_->should_run_)) {
+while (bench_->should_run_) {
   req.set_x(request_count_);
   req.set_y(request_count_);
   RpcController controller;
@@ -205,7 +204,7 @@ TEST_F(RpcBench, BenchmarkCalls) {
   }
 
   SleepFor(MonoDelta::FromSeconds(FLAGS_run_seconds));
-  Release_Store(_run_, false);
+  should_run_ = false;
 
   int total_reqs = 0;
 
@@ -233,7 +232,7 @@ class ClientAsyncWorkload {
   CHECK_OK(controller_.status());
   CHECK_EQ(req_.x() + req_.y(), resp_.result());
 }
-if (!Acquire_Load(_->should_run_)) {
+if (!bench_->should_run_) {
   bench_->stop_.CountDown();
   return;
 }
@@ -287,7 +286,7 @@ TEST_F(RpcBench, BenchmarkCallsAsync) {
   }
 
   SleepFor(MonoDelta::FromSeconds(FLAGS_run_seconds));
-  Release_Store(_run_, false);
+  should_run_ = false;
 
   sw.stop();
 
diff --git a/src/kudu/rpc/rpc_stub-test.cc b/src/kudu/rpc/rpc_stub-test.cc
index 2454d0d2f..9644c 100644
--- a/src/kudu/rpc/rpc_stub-test.cc
+++ b/src/kudu/rpc/rpc_stub-te

(kudu) branch master updated: KUDU-3567 Fix reource leak in AsyncKuduScanner

2024-06-13 Thread alexey
This is an automated email from the ASF dual-hosted git repository.

alexey pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/kudu.git


The following commit(s) were added to refs/heads/master by this push:
 new 3e43ae9f0 KUDU-3567 Fix reource leak in AsyncKuduScanner
3e43ae9f0 is described below

commit 3e43ae9f02da5602d4d3dc50b83204e0bafd1942
Author: zhangyifan27 
AuthorDate: Thu Jun 13 15:18:32 2024 +0800

KUDU-3567 Fix reource leak in AsyncKuduScanner

To avoid resource leak in AsyncKuduScanner, we should reuse the
HashedWheelTimer instance from the corresponding AsyncKuduClient
object in AsyncKuduScanner.

Change-Id: Id675868fd86110633e147f71bceb092ac92ac038
Reviewed-on: http://gerrit.cloudera.org:8080/21512
Reviewed-by: Wang Xixu <1450306...@qq.com>
Tested-by: Alexey Serbin 
Reviewed-by: Alexey Serbin 
---
 .../src/main/java/org/apache/kudu/client/AsyncKuduScanner.java | 10 +++---
 1 file changed, 3 insertions(+), 7 deletions(-)

diff --git 
a/java/kudu-client/src/main/java/org/apache/kudu/client/AsyncKuduScanner.java 
b/java/kudu-client/src/main/java/org/apache/kudu/client/AsyncKuduScanner.java
index 1013a148b..31a2cdf75 100644
--- 
a/java/kudu-client/src/main/java/org/apache/kudu/client/AsyncKuduScanner.java
+++ 
b/java/kudu-client/src/main/java/org/apache/kudu/client/AsyncKuduScanner.java
@@ -292,10 +292,6 @@ public final class AsyncKuduScanner {
 
   private String queryId;
 
-  private final HashedWheelTimer timer = new HashedWheelTimer(
-new ThreadFactoryBuilder().setDaemon(true).build(), 20,
-TimeUnit.MILLISECONDS);
-
   private Timeout keepAliveTimeout;
 
   /**
@@ -990,12 +986,12 @@ public final class AsyncKuduScanner {
   @Override
   public void run(final Timeout timeout) {
 keepAlive();
-keepAliveTimeout = AsyncKuduClient.newTimeout(timer, this, 
keepAliveIntervalMS);
+keepAliveTimeout = AsyncKuduClient.newTimeout(client.getTimer(), this, 
keepAliveIntervalMS);
   }
 }
 
-keepAliveTimeout = AsyncKuduClient.newTimeout(timer, new KeepAliveTimer(),
-  keepAliveIntervalMS);
+keepAliveTimeout =
+AsyncKuduClient.newTimeout(client.getTimer(), new KeepAliveTimer(), 
keepAliveIntervalMS);
 return true;
   }
 



(kudu) branch branch-1.17.x updated: [ARM] Concurrent binary tree memory barriers fixed.

2024-06-12 Thread alexey
This is an automated email from the ASF dual-hosted git repository.

alexey pushed a commit to branch branch-1.17.x
in repository https://gitbox.apache.org/repos/asf/kudu.git


The following commit(s) were added to refs/heads/branch-1.17.x by this push:
 new 08ef2f997 [ARM] Concurrent binary tree memory barriers fixed.
08ef2f997 is described below

commit 08ef2f997754f1aece1f9db118721fd77360e7a7
Author: Zoltan Martonka 
AuthorDate: Wed Jun 5 03:16:39 2024 -0400

[ARM] Concurrent binary tree memory barriers fixed.

TestCBTree.TestRacyConcurrentInsert sometimes fails on ARM.
The concurrent inserts produce a tree with some incorrectly ordered
nodes. Apart from incorrect order, there are no other errors in the
tree. All inserted elements are present, but CBTree::GetCopy cannot
find some due to the incorrect ordering.

This is not a unit test error, but a real error in the CBTree
implementation. Modifying the test to only do the inserts first, and
only start checking when the tree is finalized does not prevent the bug.
So calling only the tree->Insert function from multiple threads is
sufficient to reproduce the issue (only on ARM).

Root cause:

Some memory order restrictions are not strict enough. It does not cause
a problem on x86 (There were no real changes for 11 years), but it
causes problems on ARM. x86 guarantees a globally consistent order for
stores (TSO).
ARM, in contrast, allows stores to different memory locations to be
observed differently from the program order.
More info:
https://www.sciencedirect.com/science/article/pii/S1383762124000390

Solution:

The following barriers need to be more strict:

1. When we set the Splitting/Inserting flag on a node, then it is not
  enough to flush all previous changes. It is also very important not
  to reorder any write before it. So instead of Release_Store (which
  is practically equivalent to std::memory_order_release), we need a
  2 way barrier.

2. When we call AcquireVersion or StableVersion to verify that the
  version was not changed during our (already completed) read, a 2-way
  std::memory_order_acquire is required.

Putting the appropriate std::atomic_thread_fence(...) calls to these
places would resolve the issue. However, replacing the current function
from atomicops-internals-arm64.h to the C++ standard functions will
make the code more consistent.

Reason for changing to std::atomics:

atomicops-internals-arm64.h was picked from chromium source and the
missing functions were reimplemented. The header is gone since, and even
chromium uses a solution based on std::atomic (see
base/atomicops_internals_portable.h in chromium source). I see no reason
to update the header from chromium and implement the missing functions,
just to have one more abstraction layer, that is basically just
"function aliases" at this point.

Nodes are PACKED, which conflicts with std::atomic. However, when we
allocate a node, it is allocated with sufficient alignment for atomic
operations. Other unaligned structures (the values of the data) are
stored between nodes. Removing PACKED would increase the memory
footprint and actually slow things down slightly.

Notes:
+ std::atomic:: usually blocks reordering in one direction.
  std::atomic_thread_fence(...) blocks reordering in both directions.

- std::assume_aligned is from C++20. The used __builtin_assume_aligned
  should be supported by both Clang and GCC.

- I renamed `IsLocked` to `IsLockedUnsafe` because it should only be
  used by the thread that actually holds the lock (as it is currently
  only used in `DCHECK` macros when we hold the lock).

Performance Tests on an aws c6i.2xlarge :

=== master =

 Performance counter stats for 'bin/cbtree-test 
--gtest_filter=*TestRacyConcurrentInsert* --gtest_repeat=10':

1033864.95 msec task-clock#7.703 CPUs utilized
788348  context-switches  #0.763 K/sec
142572  cpu-migrations#0.138 K/sec
  3853  page-faults   #0.004 K/sec
 cycles
 instructions
 branches
 branch-misses

 134.221318612 seconds time elapsed

1036.566723000 seconds user
   0.369948000 seconds sys

 Performance counter stats for 'bin/cbtree-test 
--gtest_filter=*ConcurrentReadWriteBenchmark* --gtest_repeat=10 
--concurrent_rw_benchmark_num_writer_threads=4 
--concurrent_rw_benchmark_num_reader_threads=4':

 175292.55 msec task-clock#5.723 CPUs utilized
 35287  context-switches  #  

(kudu) 01/02: [util] remove last vestiges of chromium Atomics from metrics

2024-06-12 Thread alexey
This is an automated email from the ASF dual-hosted git repository.

alexey pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/kudu.git

commit 6bdd0c3d8b747169f09150874cd8751debaf2ed1
Author: Alexey Serbin 
AuthorDate: Tue Jun 11 12:06:42 2024 -0700

[util] remove last vestiges of chromium Atomics from metrics

Change-Id: I5feaeffa7912ef93479e2675ea6b27d694c1c5ee
Reviewed-on: http://gerrit.cloudera.org:8080/21505
Tested-by: Alexey Serbin 
Reviewed-by: Yingchun Lai 
---
 src/kudu/util/metrics.h | 11 +--
 1 file changed, 5 insertions(+), 6 deletions(-)

diff --git a/src/kudu/util/metrics.h b/src/kudu/util/metrics.h
index 7efc7e471..b53fd8a13 100644
--- a/src/kudu/util/metrics.h
+++ b/src/kudu/util/metrics.h
@@ -250,7 +250,6 @@
 #include "kudu/gutil/port.h"
 #include "kudu/gutil/ref_counted.h"
 #include "kudu/gutil/strings/substitute.h"
-#include "kudu/util/atomic.h"
 #include "kudu/util/hdr_histogram.h"
 #include "kudu/util/jsonwriter.h" // IWYU pragma: keep
 #include "kudu/util/locks.h"
@@ -1141,19 +1140,19 @@ class AtomicGauge : public Gauge {
 return scoped_refptr(p);
   }
   T value() const {
-return static_cast(value_.Load(kMemOrderRelease));
+return static_cast(value_.load(std::memory_order_acquire));
   }
   void set_value(const T& value) {
 UpdateModificationEpoch();
-value_.Store(static_cast(value), kMemOrderNoBarrier);
+value_.store(static_cast(value), std::memory_order_relaxed);
   }
   void Increment() {
 UpdateModificationEpoch();
-value_.IncrementBy(1, kMemOrderNoBarrier);
+value_.fetch_add(1, std::memory_order_relaxed);
   }
   void IncrementBy(int64_t amount) {
 UpdateModificationEpoch();
-value_.IncrementBy(amount, kMemOrderNoBarrier);
+value_.fetch_add(amount, std::memory_order_relaxed);
   }
   void Decrement() {
 IncrementBy(-1);
@@ -1201,7 +1200,7 @@ class AtomicGauge : public Gauge {
 value());
   }
  private:
-  AtomicInt value_;
+  std::atomic value_;
   MergeType type_;
 
   DISALLOW_COPY_AND_ASSIGN(AtomicGauge);



(kudu) branch master updated (f49a548fe -> 5063e80e1)

2024-06-12 Thread alexey
This is an automated email from the ASF dual-hosted git repository.

alexey pushed a change to branch master
in repository https://gitbox.apache.org/repos/asf/kudu.git


from f49a548fe KUDU-613: Introduce SLRU cache
 new 6bdd0c3d8 [util] remove last vestiges of chromium Atomics from metrics
 new 5063e80e1 KUDU-3584 fix flakiness in TableKeyRangeTest

The 2 revisions listed above as "new" are entirely new to this
repository and will be described in separate emails.  The revisions
listed as "add" were already present in the repository and have only
been added to this reference.


Summary of changes:
 src/kudu/client/client-test.cc | 105 -
 src/kudu/util/metrics.h|  11 ++---
 2 files changed, 76 insertions(+), 40 deletions(-)



(kudu) 02/02: KUDU-3584 fix flakiness in TableKeyRangeTest

2024-06-12 Thread alexey
This is an automated email from the ASF dual-hosted git repository.

alexey pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/kudu.git

commit 5063e80e1ca26c5f1b763a6ce3e4708cd8196a26
Author: Alexey Serbin 
AuthorDate: Tue Jun 11 18:44:39 2024 -0700

KUDU-3584 fix flakiness in TableKeyRangeTest

When running client-test in TSAN/ASAN builds, the
TableKeyRangeTest.TestGetTableKeyRange scenario would sometimes fail
on busy nodes.  This patch addresses the issue by increasing the
timeout for scanners and for the write session, and making the related
code more robust overall.

I also took the liberty of cleaning up the related code.

Change-Id: I1efc2fe6ee7f2dfe94b52b14f1316ffbafd39d52
Reviewed-on: http://gerrit.cloudera.org:8080/21506
Reviewed-by: KeDeng 
Reviewed-by: Marton Greber 
Tested-by: Marton Greber 
---
 src/kudu/client/client-test.cc | 105 -
 1 file changed, 71 insertions(+), 34 deletions(-)

diff --git a/src/kudu/client/client-test.cc b/src/kudu/client/client-test.cc
index c3b46baa5..c89fc3855 100644
--- a/src/kudu/client/client-test.cc
+++ b/src/kudu/client/client-test.cc
@@ -412,6 +412,7 @@ class ClientTest : public KuduTest {
 
   void CheckTokensInfo(const vector& tokens,
int replica_num = 1) {
+ASSERT_GE(replica_num, 1);
 for (const auto* t : tokens) {
   const KuduTablet& tablet = t->tablet();
   ASSERT_EQ(replica_num, tablet.replicas().size());
@@ -430,7 +431,7 @@ class ClientTest : public KuduTest {
 tablet_copy.reset(ptr);
   }
   ASSERT_EQ(tablet.id(), tablet_copy->id());
-  ASSERT_EQ(1, tablet_copy->replicas().size());
+  ASSERT_EQ(replica_num, tablet_copy->replicas().size());
   const KuduReplica* replica_copy = tablet_copy->replicas()[0];
 
   ASSERT_EQ(replica->is_leader(), replica_copy->is_leader());
@@ -440,36 +441,60 @@ class ClientTest : public KuduTest {
 }
   }
 
-  int CountRows(const vector& tokens) {
-atomic rows(0);
+  Status CountRows(const vector& tokens, size_t* count) {
+#define THR_RET_NOT_OK(s, res_status) do { \
+if (const Status& _s = (s); !_s.ok()) { \
+  *(res_status) = _s; \
+  return; \
+} \
+  } while (false)
+
+vector thread_status(tokens.size(), Status::OK());
 vector threads;
-for (KuduScanToken* token : tokens) {
-  string buf;
-  CHECK_OK(token->Serialize());
+atomic rows(0);
+for (size_t i = 0; i < tokens.size(); ++i) {
+  const size_t thread_idx = i;
+  const auto* token = tokens[thread_idx];
+
+  threads.emplace_back([this, ] (const KuduScanToken* token, Status* 
thread_status) {
+string serialized_token;
+THR_RET_NOT_OK(token->Serialize(_token), thread_status);
 
-  threads.emplace_back([this, ] (const string& serialized_token) {
 shared_ptr client;
-ASSERT_OK(cluster_->CreateClient(nullptr, ));
+THR_RET_NOT_OK(cluster_->CreateClient(nullptr, ), 
thread_status);
+
 KuduScanner* scanner_ptr;
-ASSERT_OK(KuduScanToken::DeserializeIntoScanner(
-client.get(), serialized_token, _ptr));
+THR_RET_NOT_OK(KuduScanToken::DeserializeIntoScanner(
+client.get(), serialized_token, _ptr), thread_status);
+
 unique_ptr scanner(scanner_ptr);
-ASSERT_OK(scanner->Open());
+// Try to avoid flakiness when running at busy nodes.
+scanner->SetTimeoutMillis(60 * 1000);
+// Make sure to read the most recent data if the test table has
+// multiple replicas.
+THR_RET_NOT_OK(scanner->SetSelection(KuduClient::LEADER_ONLY), 
thread_status);
+THR_RET_NOT_OK(scanner->Open(), thread_status);
 
 while (scanner->HasMoreRows()) {
   KuduScanBatch batch;
-  ASSERT_OK(scanner->NextBatch());
+  THR_RET_NOT_OK(scanner->NextBatch(), thread_status);
   rows += batch.NumRows();
 }
-scanner->Close();
-  }, std::move(buf));
+  }, token, _status[thread_idx]);
 }
 
 for (thread& thread : threads) {
   thread.join();
 }
+for (const auto& s : thread_status) {
+  if (!s.ok()) {
+return s;
+  }
+}
 
-return rows;
+*count = rows;
+return Status::OK();
+#undef THR_RET_NOT_OK
   }
 
   // Return the number of lookup-related RPCs which have been serviced by the 
master.
@@ -8970,14 +8995,13 @@ class TableKeyRangeTest : public ClientTest {
   }
 
   static void InsertTestRowsWithStrings(KuduTable* table, KuduSession* 
session, int num_rows) {
-string str_val = "*";
+static const string kStringVal = "*";
 for (int i = 0; i < num_rows; i++) {
   unique_ptr insert(table->NewInsert());
   ASSERT_OK(insert->mutable

(kudu) branch master updated: KUDU-613: Introduce SLRU cache

2024-06-11 Thread alexey
This is an automated email from the ASF dual-hosted git repository.

alexey pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/kudu.git


The following commit(s) were added to refs/heads/master by this push:
 new f49a548fe KUDU-613: Introduce SLRU cache
f49a548fe is described below

commit f49a548fe4bac91679744b4dd0efa895ff9c09e2
Author: Mahesh Reddy 
AuthorDate: Fri Oct 20 16:24:45 2023 -0400

KUDU-613: Introduce SLRU cache

This patch introduces the SLRU cache that has two internal segments,
the probationary and protected, to protect the cache from long/random
reads. The SLRU cache has a parameter named 'lookups_threshold_' that
determines the minimum amount of times an entry can be accessed
before it's upgraded to the protected segment. Any random scans would
then only evict entries from the probationary segment.

Both the protected and probationary segment have their own
configurable capacities. When the protected segment is at capacity,
any entries evicted will be added to the MRU end of the
probationary segment.

Metrics will be added in a follow-up patch.

Ran cache benchmarks for RELEASE build.
Used default flag values in cache-bench.
Build ran locally on macOS, 6 cores and 2.6GHz.

Here are some benchmark numbers for SLRU cache:
Test case   | Algorithm | Lookups/sec  | Hit rate
ZIPFIAN ratio=1.00x | LRU   | 11.01M   | 99.8%
ZIPFIAN ratio=1.00x | SLRU  | 10.62M   | 99.9%
ZIPFIAN ratio=3.00x | LRU   | 11.06M   | 95.9%
ZIPFIAN ratio=3.00x | SLRU  | 9.69M| 95.9%
UNIFORM ratio=1.00x | LRU   | 8.54M| 99.7%
UNIFORM ratio=1.00x | SLRU  | 6.18M| 99.7%
UNIFORM ratio=3.00x | LRU   | 6.53M| 33.3%
UNIFORM ratio=3.00x | SLRU  | 4.99M| 33.3%

Change-Id: I45531534a2049dd38c002f4dc7982df9fd46e0bb
Reviewed-on: http://gerrit.cloudera.org:8080/20607
Tested-by: Alexey Serbin 
Reviewed-by: Alexey Serbin 
---
 src/kudu/util/CMakeLists.txt |   2 +
 src/kudu/util/cache-bench.cc |  37 ++-
 src/kudu/util/cache.cc   |   2 +
 src/kudu/util/cache.h|   3 +
 src/kudu/util/slru_cache-test.cc | 679 +++
 src/kudu/util/slru_cache.cc  | 488 
 src/kudu/util/slru_cache.h   | 280 
 7 files changed, 1486 insertions(+), 5 deletions(-)

diff --git a/src/kudu/util/CMakeLists.txt b/src/kudu/util/CMakeLists.txt
index d32450c97..7bd37c5ea 100644
--- a/src/kudu/util/CMakeLists.txt
+++ b/src/kudu/util/CMakeLists.txt
@@ -244,6 +244,7 @@ set(UTIL_SRCS
   ${SEMAPHORE_CC}
   signal.cc
   slice.cc
+  slru_cache.cc
   spinlock_profiling.cc
   status.cc
   status_callback.cc
@@ -574,6 +575,7 @@ ADD_KUDU_TEST(rwc_lock-test RUN_SERIAL true)
 ADD_KUDU_TEST(safe_math-test)
 ADD_KUDU_TEST(scoped_cleanup-test)
 ADD_KUDU_TEST(slice-test)
+ADD_KUDU_TEST(slru_cache-test)
 ADD_KUDU_TEST(sorted_disjoint_interval_list-test)
 ADD_KUDU_TEST(spinlock_profiling-test)
 ADD_KUDU_TEST(stack_watchdog-test PROCESSORS 2)
diff --git a/src/kudu/util/cache-bench.cc b/src/kudu/util/cache-bench.cc
index 64016b247..5c11689d9 100644
--- a/src/kudu/util/cache-bench.cc
+++ b/src/kudu/util/cache-bench.cc
@@ -38,6 +38,7 @@
 #include "kudu/util/random.h"
 #include "kudu/util/random_util.h"
 #include "kudu/util/slice.h"
+#include "kudu/util/slru_cache.h"
 #include "kudu/util/test_util.h"
 
 DEFINE_int32(num_threads, 16, "The number of threads to access the cache 
concurrently.");
@@ -54,6 +55,10 @@ namespace kudu {
 
 // Benchmark a 1GB cache.
 static constexpr int kCacheCapacity = 1024 * 1024 * 1024;
+static constexpr int kProbationarySegmentCapacity = 820 * 1024 * 1024;
+static constexpr int kProtectedSegmentCapacity = 204 * 1024 * 1024;
+static constexpr uint32_t kLookups = 3;
+
 // Use 4kb entries.
 static constexpr int kEntrySize = 4 * 1024;
 
@@ -74,18 +79,30 @@ struct BenchSetup {
   // in the cache.
   double dataset_cache_ratio;
 
+  Cache::EvictionPolicy eviction_policy;
+
   string ToString() const {
 string ret;
 switch (pattern) {
   case Pattern::ZIPFIAN: ret += "ZIPFIAN"; break;
   case Pattern::UNIFORM: ret += "UNIFORM"; break;
 }
+if (eviction_policy == Cache::EvictionPolicy::SLRU) {
+  ret += " SLRU";
+} else {
+  ret += " LRU";
+}
 ret += StringPrintf(" ratio=%.2fx n_unique=%d", dataset_cache_ratio, 
max_key());
 return ret;
   }
 
   // Return the maximum cache key to be generated for a lookup.
   uint32_t max_key() const {
+if (eviction_policy == Cache::EvictionPolicy::SLRU) {
+  return static_cast(
+  (kProbationarySegmentCapacity + kProtectedSegmentCapacity) * 
dataset_cache_ratio)
+  / kEntrySize;
+}
 

(kudu) branch master updated: [ARM] Concurrent binary tree memory barriers fixed.

2024-06-07 Thread alexey
This is an automated email from the ASF dual-hosted git repository.

alexey pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/kudu.git


The following commit(s) were added to refs/heads/master by this push:
 new 2161950da [ARM] Concurrent binary tree memory barriers fixed.
2161950da is described below

commit 2161950da43f9daa71f2297683e60e3ff7fcd1db
Author: Zoltan Martonka 
AuthorDate: Wed Jun 5 03:16:39 2024 -0400

[ARM] Concurrent binary tree memory barriers fixed.

TestCBTree.TestRacyConcurrentInsert sometimes fails on ARM.
The concurrent inserts produce a tree with some incorrectly ordered
nodes. Apart from incorrect order, there are no other errors in the
tree. All inserted elements are present, but CBTree::GetCopy cannot
find some due to the incorrect ordering.

This is not a unit test error, but a real error in the CBTree
implementation. Modifying the test to only do the inserts first, and
only start checking when the tree is finalized does not prevent the bug.
So calling only the tree->Insert function from multiple threads is
sufficient to reproduce the issue (only on ARM).

Root cause:

Some memory order restrictions are not strict enough. It does not cause
a problem on x86 (There were no real changes for 11 years), but it
causes problems on ARM. x86 guarantees a globally consistent order for
stores (TSO).
ARM, in contrast, allows stores to different memory locations to be
observed differently from the program order.
More info:
https://www.sciencedirect.com/science/article/pii/S1383762124000390

Solution:

The following barriers need to be more strict:

1. When we set the Splitting/Inserting flag on a node, then it is not
  enough to flush all previous changes. It is also very important not
  to reorder any write before it. So instead of Release_Store (which
  is practically equivalent to std::memory_order_release), we need a
  2 way barrier.

2. When we call AcquireVersion or StableVersion to verify that the
  version was not changed during our (already completed) read, a 2-way
  std::memory_order_acquire is required.

Putting the appropriate std::atomic_thread_fence(...) calls to these
places would resolve the issue. However, replacing the current function
from atomicops-internals-arm64.h to the C++ standard functions will
make the code more consistent.

Reason for changing to std::atomics:

atomicops-internals-arm64.h was picked from chromium source and the
missing functions were reimplemented. The header is gone since, and even
chromium uses a solution based on std::atomic (see
base/atomicops_internals_portable.h in chromium source). I see no reason
to update the header from chromium and implement the missing functions,
just to have one more abstraction layer, that is basically just
"function aliases" at this point.

Nodes are PACKED, which conflicts with std::atomic. However, when we
allocate a node, it is allocated with sufficient alignment for atomic
operations. Other unaligned structures (the values of the data) are
stored between nodes. Removing PACKED would increase the memory
footprint and actually slow things down slightly.

Notes:
+ std::atomic:: usually blocks reordering in one direction.
  std::atomic_thread_fence(...) blocks reordering in both directions.

- std::assume_aligned is from C++20. The used __builtin_assume_aligned
  should be supported by both Clang and GCC.

- I renamed `IsLocked` to `IsLockedUnsafe` because it should only be
  used by the thread that actually holds the lock (as it is currently
  only used in `DCHECK` macros when we hold the lock).

Performance Tests on an aws c6i.2xlarge :

=== master =

 Performance counter stats for 'bin/cbtree-test 
--gtest_filter=*TestRacyConcurrentInsert* --gtest_repeat=10':

1033864.95 msec task-clock#7.703 CPUs utilized
788348  context-switches  #0.763 K/sec
142572  cpu-migrations#0.138 K/sec
  3853  page-faults   #0.004 K/sec
 cycles
 instructions
 branches
 branch-misses

 134.221318612 seconds time elapsed

1036.566723000 seconds user
   0.369948000 seconds sys

 Performance counter stats for 'bin/cbtree-test 
--gtest_filter=*ConcurrentReadWriteBenchmark* --gtest_repeat=10 
--concurrent_rw_benchmark_num_writer_threads=4 
--concurrent_rw_benchmark_num_reader_threads=4':

 175292.55 msec task-clock#5.723 CPUs utilized
 35287  context-switches  #  

(kudu) branch branch-1.17.x updated: KUDU-3577 fix altering tables with custom hash schemas

2024-06-07 Thread alexey
This is an automated email from the ASF dual-hosted git repository.

alexey pushed a commit to branch branch-1.17.x
in repository https://gitbox.apache.org/repos/asf/kudu.git


The following commit(s) were added to refs/heads/branch-1.17.x by this push:
 new 658a9794a KUDU-3577 fix altering tables with custom hash schemas
658a9794a is described below

commit 658a9794aa2ca2caf4064fa01c911a1f47bcb3bc
Author: Alexey Serbin 
AuthorDate: Thu Jun 6 17:33:55 2024 -0700

KUDU-3577 fix altering tables with custom hash schemas

Since partition boundaries for ranges with custom hash schemas are
represented via RowOperationsPB (see RangeWithHashSchemaPB::range_bounds
field in src/kudu/common/common.proto), addressing this design defect
requires re-encoding the information as a part of PartitionSchemaPB
stored in the system catalog upon particular modifications of the
table's schema.  This patch does exactly so, and also adds corresponding
test scenario which would fail without the fix.

A proper solution would be using primary-key-only projection of a
table's schema to encode the information on range boundaries, but it's
necessary to provide backwards compatibility with already released Kudu
clients.  See KUDU-3577 for details.

Change-Id: I21a775538063768b986edd2b6bc25d03360b5216
Reviewed-on: http://gerrit.cloudera.org:8080/21486
Tested-by: Alexey Serbin 
Reviewed-by: Mahesh Reddy 
Reviewed-by: Abhishek Chennaka 
(cherry picked from commit d254964e6037f6ae0c9459d99cffa13303596f07)
Reviewed-on: http://gerrit.cloudera.org:8080/21495
---
 src/kudu/integration-tests/alter_table-test.cc | 137 +
 src/kudu/master/catalog_manager.cc |  35 ++-
 src/kudu/master/catalog_manager.h  |   3 +-
 3 files changed, 172 insertions(+), 3 deletions(-)

diff --git a/src/kudu/integration-tests/alter_table-test.cc 
b/src/kudu/integration-tests/alter_table-test.cc
index ded8bff3b..14746cab9 100644
--- a/src/kudu/integration-tests/alter_table-test.cc
+++ b/src/kudu/integration-tests/alter_table-test.cc
@@ -100,6 +100,7 @@ using kudu::client::KuduColumnSchema;
 using kudu::client::KuduDelete;
 using kudu::client::KuduError;
 using kudu::client::KuduInsert;
+using kudu::client::KuduRangePartition;
 using kudu::client::KuduRowResult;
 using kudu::client::KuduScanBatch;
 using kudu::client::KuduScanner;
@@ -2174,6 +2175,142 @@ TEST_F(AlterTableTest, 
TestAddRangePartitionConflictExhaustive) {
   expect_range_partitions_conflict(0, 1, 0, 1);
 }
 
+// Test scenario for KUDU-3577.
+TEST_F(AlterTableTest, RangeWithCustomHashSchema) {
+  KuduSchemaBuilder b;
+  b.AddColumn("c0")->Type(KuduColumnSchema::INT32)->NotNull()->
+  Default(KuduValue::FromInt(0));
+  b.AddColumn("c1")->Type(KuduColumnSchema::INT32)->NotNull()->
+  Default(KuduValue::FromInt(0));
+  b.AddColumn("c2")->Type(KuduColumnSchema::INT32)->Nullable();
+  b.SetPrimaryKey({ "c0", "c1" });
+  KuduSchema schema;
+  ASSERT_OK(b.Build());
+
+  // Create table with table-wide hash schema: 2 hash buckets on "c1" column.
+  const string table_name = "kudu-3577";
+  unique_ptr creator(client_->NewTableCreator());
+  creator->table_name(table_name)
+  .schema()
+  .set_range_partition_columns({ "c0" })
+  .add_hash_partitions({ "c1" }, 2)
+  .num_replicas(1);
+
+  // Add a range partition with the table-wide hash schema.
+  {
+unique_ptr lower(schema.NewRow());
+ASSERT_OK(lower->SetInt32("c0", -100));
+unique_ptr upper(schema.NewRow());
+ASSERT_OK(upper->SetInt32("c0", 0));
+creator->add_range_partition(lower.release(), upper.release());
+  }
+
+  // Add a range partition with custom hash schema.
+  {
+unique_ptr lower(schema.NewRow());
+ASSERT_OK(lower->SetInt32("c0", 0));
+unique_ptr upper(schema.NewRow());
+ASSERT_OK(upper->SetInt32("c0", 100));
+unique_ptr p(
+new KuduRangePartition(lower.release(), upper.release()));
+ASSERT_OK(p->add_hash_partitions({ "c1" }, 3, 0));
+creator->add_custom_range_partition(p.release());
+  }
+  ASSERT_OK(creator->Create());
+
+  {
+// Make sure client successfully opens the newly created table.
+shared_ptr table;
+ASSERT_OK(client_->OpenTable(table_name, ));
+// The newly created table is empty, of course.
+ASSERT_EQ(0, CountTableRows(table.get()));
+// Insert 100 rows.
+ASSERT_OK(InsertRowsSequential(table_name, -50, 100));
+ASSERT_EQ(100, CountTableRows(table.get()));
+  }
+
+  {
+// Drop "c2", the only nullable column in the table as of now.
+unique_ptr alterer(client_->NewTableAlterer(table_name));
+alterer->DropColumn("c2");
+ASSERT_OK(alterer->Alter());
+
+/

(kudu) branch master updated: KUDU-3577 fix altering tables with custom hash schemas

2024-06-07 Thread alexey
This is an automated email from the ASF dual-hosted git repository.

alexey pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/kudu.git


The following commit(s) were added to refs/heads/master by this push:
 new d254964e6 KUDU-3577 fix altering tables with custom hash schemas
d254964e6 is described below

commit d254964e6037f6ae0c9459d99cffa13303596f07
Author: Alexey Serbin 
AuthorDate: Thu Jun 6 17:33:55 2024 -0700

KUDU-3577 fix altering tables with custom hash schemas

Since partition boundaries for ranges with custom hash schemas are
represented via RowOperationsPB (see RangeWithHashSchemaPB::range_bounds
field in src/kudu/common/common.proto), addressing this design defect
requires re-encoding the information as a part of PartitionSchemaPB
stored in the system catalog upon particular modifications of the
table's schema.  This patch does exactly so, and also adds corresponding
test scenario which would fail without the fix.

A proper solution would be using primary-key-only projection of a
table's schema to encode the information on range boundaries, but it's
necessary to provide backwards compatibility with already released Kudu
clients.  See KUDU-3577 for details.

Change-Id: I21a775538063768b986edd2b6bc25d03360b5216
Reviewed-on: http://gerrit.cloudera.org:8080/21486
Tested-by: Alexey Serbin 
Reviewed-by: Mahesh Reddy 
Reviewed-by: Abhishek Chennaka 
---
 src/kudu/integration-tests/alter_table-test.cc | 137 +
 src/kudu/master/catalog_manager.cc |  35 ++-
 src/kudu/master/catalog_manager.h  |   3 +-
 3 files changed, 172 insertions(+), 3 deletions(-)

diff --git a/src/kudu/integration-tests/alter_table-test.cc 
b/src/kudu/integration-tests/alter_table-test.cc
index d4884579e..2296dfe60 100644
--- a/src/kudu/integration-tests/alter_table-test.cc
+++ b/src/kudu/integration-tests/alter_table-test.cc
@@ -101,6 +101,7 @@ using kudu::client::KuduColumnSchema;
 using kudu::client::KuduDelete;
 using kudu::client::KuduError;
 using kudu::client::KuduInsert;
+using kudu::client::KuduRangePartition;
 using kudu::client::KuduRowResult;
 using kudu::client::KuduScanBatch;
 using kudu::client::KuduScanner;
@@ -2174,6 +2175,142 @@ TEST_F(AlterTableTest, 
TestAddRangePartitionConflictExhaustive) {
   expect_range_partitions_conflict(0, 1, 0, 1);
 }
 
+// Test scenario for KUDU-3577.
+TEST_F(AlterTableTest, RangeWithCustomHashSchema) {
+  KuduSchemaBuilder b;
+  b.AddColumn("c0")->Type(KuduColumnSchema::INT32)->NotNull()->
+  Default(KuduValue::FromInt(0));
+  b.AddColumn("c1")->Type(KuduColumnSchema::INT32)->NotNull()->
+  Default(KuduValue::FromInt(0));
+  b.AddColumn("c2")->Type(KuduColumnSchema::INT32)->Nullable();
+  b.SetPrimaryKey({ "c0", "c1" });
+  KuduSchema schema;
+  ASSERT_OK(b.Build());
+
+  // Create table with table-wide hash schema: 2 hash buckets on "c1" column.
+  const string table_name = "kudu-3577";
+  unique_ptr creator(client_->NewTableCreator());
+  creator->table_name(table_name)
+  .schema()
+  .set_range_partition_columns({ "c0" })
+  .add_hash_partitions({ "c1" }, 2)
+  .num_replicas(1);
+
+  // Add a range partition with the table-wide hash schema.
+  {
+unique_ptr lower(schema.NewRow());
+ASSERT_OK(lower->SetInt32("c0", -100));
+unique_ptr upper(schema.NewRow());
+ASSERT_OK(upper->SetInt32("c0", 0));
+creator->add_range_partition(lower.release(), upper.release());
+  }
+
+  // Add a range partition with custom hash schema.
+  {
+unique_ptr lower(schema.NewRow());
+ASSERT_OK(lower->SetInt32("c0", 0));
+unique_ptr upper(schema.NewRow());
+ASSERT_OK(upper->SetInt32("c0", 100));
+unique_ptr p(
+new KuduRangePartition(lower.release(), upper.release()));
+ASSERT_OK(p->add_hash_partitions({ "c1" }, 3, 0));
+creator->add_custom_range_partition(p.release());
+  }
+  ASSERT_OK(creator->Create());
+
+  {
+// Make sure client successfully opens the newly created table.
+shared_ptr table;
+ASSERT_OK(client_->OpenTable(table_name, ));
+// The newly created table is empty, of course.
+ASSERT_EQ(0, CountTableRows(table.get()));
+// Insert 100 rows.
+ASSERT_OK(InsertRowsSequential(table_name, -50, 100));
+ASSERT_EQ(100, CountTableRows(table.get()));
+  }
+
+  {
+// Drop "c2", the only nullable column in the table as of now.
+unique_ptr alterer(client_->NewTableAlterer(table_name));
+alterer->DropColumn("c2");
+ASSERT_OK(alterer->Alter());
+
+// Make sure client successfully opens the altered table.
+shared_ptr table;
+ASSERT_OK(client_->OpenTable(table_name, ));
+ASS

(kudu) branch master updated: [metrics] Add metrics for tablet copy op time

2024-06-06 Thread alexey
This is an automated email from the ASF dual-hosted git repository.

alexey pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/kudu.git


The following commit(s) were added to refs/heads/master by this push:
 new d370e0e45 [metrics] Add metrics for tablet copy op time
d370e0e45 is described below

commit d370e0e4511508790c065340a52242ee09ecfea3
Author: kedeng 
AuthorDate: Thu Apr 25 11:41:19 2024 +0800

[metrics] Add metrics for tablet copy op time

Add server-level statistics to track the time consumption of
copy tablet operations. This is effective both for the source
tablet and destination tablet during the copy operation.

The addition of monitoring items will aid in historical issue
tracking and analysis, as well as facilitate the configuration
of monitoring alarms.

Change-Id: I088f6a9a8a07ad39ca95ae8b4995ce00d1a0d00c
Reviewed-on: http://gerrit.cloudera.org:8080/21356
Reviewed-by: Alexey Serbin 
Tested-by: Alexey Serbin 
---
 src/kudu/integration-tests/tablet_copy-itest.cc | 41 +
 src/kudu/tserver/tablet_copy_client.cc  | 18 ++-
 src/kudu/tserver/tablet_copy_client.h   |  1 +
 src/kudu/tserver/tablet_copy_service.cc |  1 +
 src/kudu/tserver/tablet_copy_source_session.cc  | 19 +++-
 src/kudu/tserver/tablet_copy_source_session.h   |  7 +
 6 files changed, 85 insertions(+), 2 deletions(-)

diff --git a/src/kudu/integration-tests/tablet_copy-itest.cc 
b/src/kudu/integration-tests/tablet_copy-itest.cc
index ba3e4be45..c1a0856ad 100644
--- a/src/kudu/integration-tests/tablet_copy-itest.cc
+++ b/src/kudu/integration-tests/tablet_copy-itest.cc
@@ -149,6 +149,8 @@ METRIC_DECLARE_counter(glog_error_messages);
 METRIC_DECLARE_counter(rows_inserted);
 METRIC_DECLARE_counter(tablet_copy_bytes_fetched);
 METRIC_DECLARE_counter(tablet_copy_bytes_sent);
+METRIC_DECLARE_histogram(tablet_copy_duration);
+METRIC_DECLARE_histogram(tablet_copy_source_duration);
 METRIC_DECLARE_gauge_int32(tablet_copy_open_client_sessions);
 METRIC_DECLARE_gauge_int32(tablet_copy_open_source_sessions);
 METRIC_DECLARE_gauge_uint64(log_block_manager_blocks_under_management);
@@ -1649,6 +1651,30 @@ int64_t TabletCopyBytesFetched(ExternalTabletServer* 
ets) {
   return ret;
 }
 
+int64_t TabletCopySourceDurationTotalCount(ExternalTabletServer* ets) {
+  int64_t ret;
+  CHECK_OK(GetInt64Metric(
+  ets->bound_http_hostport(),
+  _ENTITY_server,
+  "kudu.tabletserver",
+  _tablet_copy_source_duration,
+  "total_count",
+  ));
+  return ret;
+}
+
+int64_t TabletCopyDurationTotalCount(ExternalTabletServer* ets) {
+  int64_t ret;
+  CHECK_OK(GetInt64Metric(
+  ets->bound_http_hostport(),
+  _ENTITY_server,
+  "kudu.tabletserver",
+  _tablet_copy_duration,
+  "total_count",
+  ));
+  return ret;
+}
+
 int64_t TabletCopyOpenSourceSessions(ExternalTabletServer* ets) {
   int64_t ret;
   CHECK_OK(GetInt64Metric(
@@ -1713,6 +1739,14 @@ TEST_F(TabletCopyITest, TestTabletCopyMetrics) {
   follower_index = (leader_index + 1) % cluster_->num_tablet_servers();
   follower_ts = ts_map_[cluster_->tablet_server(follower_index)->uuid()];
 
+  // Before we start the tablet copy, the metrics count should be zero.
+  int64_t copy_source_duration_cnt_before =
+  
TabletCopySourceDurationTotalCount(cluster_->tablet_server(leader_index));
+  int64_t copy_duration_cnt_before =
+  TabletCopyDurationTotalCount(cluster_->tablet_server(follower_index));
+  ASSERT_EQ(0, copy_source_duration_cnt_before);
+  ASSERT_EQ(0, copy_duration_cnt_before);
+
   LOG(INFO) << "Tombstoning follower tablet " << tablet_id
 << " on TS " << follower_ts->uuid();
   ASSERT_OK(DeleteTablet(follower_ts, tablet_id, TABLET_DATA_TOMBSTONED, 
kTimeout));
@@ -1744,6 +1778,13 @@ TEST_F(TabletCopyITest, TestTabletCopyMetrics) {
   ASSERT_OK(WaitForServersToAgree(kTimeout, ts_map_, tablet_id,
   workload.batches_completed()));
 
+  // After copying, the metrics count should be greater than zero.
+  int64_t copy_source_duration_cnt =
+  
TabletCopySourceDurationTotalCount(cluster_->tablet_server(leader_index));
+  int64_t copy_duration_cnt = 
TabletCopyDurationTotalCount(cluster_->tablet_server(follower_index));
+  ASSERT_GT(copy_source_duration_cnt, 0);
+  ASSERT_GT(copy_duration_cnt, 0);
+
   ASSERT_EQ(0, 
TabletCopyOpenSourceSessions(cluster_->tablet_server(leader_index)));
   ASSERT_EQ(0, 
TabletCopyOpenClientSessions(cluster_->tablet_server(follower_index)));
 
diff --git a/src/kudu/tserver/tablet_copy_client.cc 
b/src/kudu/tserver/tablet_copy_client.cc
index fc49f42a1..d392c72c2 100644
--- a/src/kudu/tserver/tablet_copy_client.cc
+++ b/src/kudu/tserver/tablet_copy_client.cc
@@ -172,6 +172,13 @@ METRIC_DEFINE_gauge_

(kudu) branch master updated: [build] bootstrap-dev-env.sh fix for ubuntu 22.04+

2024-06-04 Thread alexey
This is an automated email from the ASF dual-hosted git repository.

alexey pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/kudu.git


The following commit(s) were added to refs/heads/master by this push:
 new 0ace3633f [build] bootstrap-dev-env.sh fix for ubuntu 22.04+
0ace3633f is described below

commit 0ace3633fa22e04b8edc567669833c1615cd4ad9
Author: Zoltan Martonka 
AuthorDate: Tue Jun 4 15:29:09 2024 +

[build] bootstrap-dev-env.sh fix for ubuntu 22.04+

On Ubuntu 22.04 python package is renamed to python2
On 23.10 and 24.04 python2 is no longer available.
We should just use python3 on newer platforms.

Tested on: 18.04, 20.04. 22.04, 23.10, 24.04

Change-Id: I275fa06214fde6ede1c6dd8e85f786fdc950cf8c
Reviewed-on: http://gerrit.cloudera.org:8080/20559
Tested-by: Kudu Jenkins
Reviewed-by: Alexey Serbin 
---
 docker/bootstrap-dev-env.sh | 9 -
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/docker/bootstrap-dev-env.sh b/docker/bootstrap-dev-env.sh
index 1c4773578..9e32100e2 100755
--- a/docker/bootstrap-dev-env.sh
+++ b/docker/bootstrap-dev-env.sh
@@ -115,6 +115,14 @@ elif [[ -f "/usr/bin/apt-get" ]]; then
   # Update the repo.
   apt-get update -y
 
+  source /etc/os-release
+
+  if dpkg --compare-versions "$VERSION_ID" ge "22.04"
+  then
+apt-get install -y --no-install-recommends python3 python-is-python3
+  else
+apt-get install -y --no-install-recommends python
+  fi
   # Install core build libraries.
   # --no-install-recommends keeps the install smaller
   apt-get install -y --no-install-recommends \
@@ -143,7 +151,6 @@ elif [[ -f "/usr/bin/apt-get" ]]; then
 openssl \
 patch \
 pkg-config \
-python \
 rsync \
 sudo \
 unzip \



(kudu) branch master updated: Add a benchmark for CBTree concurrent writes.

2024-06-04 Thread alexey
This is an automated email from the ASF dual-hosted git repository.

alexey pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/kudu.git


The following commit(s) were added to refs/heads/master by this push:
 new f4a47fe04 Add a benchmark for CBTree concurrent writes.
f4a47fe04 is described below

commit f4a47fe041b7f547fd4816706347523e06f94f6d
Author: Zoltan Martonka 
AuthorDate: Wed May 29 13:23:17 2024 +

Add a benchmark for CBTree concurrent writes.

Before updating CBTree for ARM (where it is misbehaving currently),
we should have a proper test for two scenarios:

+ Writing on multiple threads.
+ Reading on multiple threads while there are also active writes.

If read threads wait for values to be inserted, it defeats the purpose
of benchmarking. Therefore, we should first populate a tree with
values for the read threads. The read threads will then read values
that are already in the tree, while the write threads continue to insert
new values.

Setting up the tree for the second scenario essentially involves
performing the first scenario. This is why both scenarios are combined
into a single test.

The new test provides the following new features (compared to just
running DoTestConcurrentInsert with higher parameters):

+ Different threads read the value that inserted it
+ Reader threads can't be assigned to a certain writer thread.
+ Keys are better distributed than the previous shuffle method.
+ Allows measuring read-heavy performance (with a flag).

Reading threads start concurrently with writing threads, not at the
end of each write thread (unlike DoTestConcurrentInsert).

Note that running only concurrent reads should not differ from
TestScanPerformance, since no locking takes place and they do not
sabotage each other. So no new test is required for that scenario.

Change-Id: I1b0b16e269c70716962fc5ebb4ddca1e2cbe68a4
Reviewed-on: http://gerrit.cloudera.org:8080/21447
Reviewed-by: Zoltan Chovan 
Reviewed-by: Ashwani Raina 
Reviewed-by: Alexey Serbin 
Tested-by: Alexey Serbin 
---
 src/kudu/tablet/cbtree-test.cc | 197 +
 1 file changed, 197 insertions(+)

diff --git a/src/kudu/tablet/cbtree-test.cc b/src/kudu/tablet/cbtree-test.cc
index 32cd7b75c..1f342d8cf 100644
--- a/src/kudu/tablet/cbtree-test.cc
+++ b/src/kudu/tablet/cbtree-test.cc
@@ -15,6 +15,8 @@
 // specific language governing permissions and limitations
 // under the License.
 
+#include 
+#include 
 #include 
 #include 
 #include 
@@ -25,6 +27,7 @@
 #include 
 #include 
 
+#include 
 #include 
 #include 
 
@@ -36,7 +39,9 @@
 #include "kudu/util/debug/sanitizer_scopes.h"
 #include "kudu/util/faststring.h"
 #include "kudu/util/hexdump.h"
+#include "kudu/util/mem_tracker.h"
 #include "kudu/util/memory/arena.h"
+#include "kudu/util/memory/memory.h"
 #include "kudu/util/memory/overwrite.h"
 #include "kudu/util/slice.h"
 #include "kudu/util/stopwatch.h"
@@ -49,6 +54,17 @@ using std::unordered_set;
 using std::vector;
 using strings::Substitute;
 
+DEFINE_int32(concurrent_rw_benchmark_num_writer_threads, 4,
+ "Number of writer threads in TestConcurrentReadWritePerformance");
+DEFINE_int32(concurrent_rw_benchmark_num_reader_threads, 4,
+ "Number of reader threads in TestConcurrentReadWritePerformance");
+DEFINE_int32(concurrent_rw_benchmark_num_inserts, 100,
+ "Number of inserts in TestConcurrentReadWritePerformance");
+// This might be needed, because reads are significantly faster than writes.
+DEFINE_int32(concurrent_rw_benchmark_reader_boost, 1,
+"Multiply the amount of values each reader thread reads in "
+"TestConcurrentReadWritePerformance");
+
 namespace kudu {
 namespace tablet {
 namespace btree {
@@ -889,6 +905,187 @@ TEST_F(TestCBTree, TestIteratorSeekAtOrBefore) {
   }
 }
 
+// All applications of CBTree use a threadsafe arena with default node sizes.
+struct ProdTreeTraits : public btree::BTreeTraits {
+  typedef ThreadSafeMemoryTrackingArena ArenaType;
+};
+
+// We benchmark two scenarios:
+// 1. Writing on multiple threads.
+// 2. Reading on multiple threads while there are also active writes.
+//
+// If read threads wait for values to be inserted, it defeats the purpose of 
benchmarking.
+// Therefore, we should first populate a tree with values for the read 
threads. The read threads
+// will then read values that are already in the tree, while the write threads 
continue to insert
+// new values.
+//
+// Setting up the tree for the second scenario essentially involves performing 
the first scenario.
+// This is why both scenarios are combined into a single test.
+TEST_F(TestCBTree, ConcurrentR

(kudu) 02/02: KUDU-3581: upgrade Netty to 4.1.110.Final

2024-05-29 Thread alexey
This is an automated email from the ASF dual-hosted git repository.

alexey pushed a commit to branch branch-1.17.x
in repository https://gitbox.apache.org/repos/asf/kudu.git

commit 500d3d19f15c9c9b0e273adda7ec637dbc45c5c6
Author: Alexey Serbin 
AuthorDate: Tue May 28 16:52:14 2024 -0700

KUDU-3581: upgrade Netty to 4.1.110.Final

Even if Kudu doesn't use anything from Netty at its server side and
is not affected by the HTTP/2 rapid reset issue, it makes sense to
upgrade the Netty package used by the Kudu Java client library
to include the fix for well-known CVE [1].  It would be enough to
upgrade up to 4.1.100.Final, but I took the liberty of upgrading
up to the latest available 4.1.110.Final version.

[1] https://www.cve.org/CVERecord?id=CVE-2023-44487

Change-Id: I6e2ad686374b06d7b8cb28a7a456c21977b95ea8
Reviewed-on: http://gerrit.cloudera.org:8080/21464
Tested-by: Alexey Serbin 
Reviewed-by: Yingchun Lai 
(cherry picked from commit 8d5f82483665fd6229d08fdfe94c87b07f80f986)
Reviewed-on: http://gerrit.cloudera.org:8080/21465
Reviewed-by: Attila Bukor 
---
 java/gradle/dependencies.gradle | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/java/gradle/dependencies.gradle b/java/gradle/dependencies.gradle
index 2a9724a13..e205649b3 100755
--- a/java/gradle/dependencies.gradle
+++ b/java/gradle/dependencies.gradle
@@ -49,7 +49,7 @@ versions += [
 micrometer : "1.8.2",
 mockito: "4.2.0",
 murmur : "1.0.0",
-netty  : "4.1.94.Final",
+netty  : "4.1.110.Final",
 osdetector : "1.6.2",
 protobuf   : "3.21.12",
 ranger : "2.1.0",



(kudu) 01/02: Update build pattern for fetching flaky tests list

2024-05-29 Thread alexey
This is an automated email from the ASF dual-hosted git repository.

alexey pushed a commit to branch branch-1.17.x
in repository https://gitbox.apache.org/repos/asf/kudu.git

commit 1c428a3d40d0712a329cc6f9f4c590eb9e75b09a
Author: Marton Greber 
AuthorDate: Thu Apr 18 11:13:30 2024 +0200

Update build pattern for fetching flaky tests list

During the infra changes which happened in the last months, the build_id
prefix has been changed [note 1]. As a result flaky tests are not
retried during testing in the pre-commit runs. This patch updates the
build pattern in the build-and-test.sh script, such that the
list of flaky tests is fetched correctly.

[note 1]:
The build_id of an isolated test is constructed with Jenkins job's name
and build number, prefixed with "jenkins-". To illustrate this let me
give some examples, for build_ids prior and after the infra upgrade.
Before:
jenkins-kudu-gerrit-BUILD_TYPE=ASAN-29232
jenkins-kudu-gerrit-BUILD_TYPE=DEBUG-29227
After:
jenkins-build_and_test-64
jenkins-build_and_test-63
From the above we can see that the new Jenkins job name caused the
issue. The new job is parametrised, does not contain the build type in
the job's name. Therefore, a change in the naming is justified.
In case something like this happens in the future, the build_ids can be
observed in the test_results MySQL database on the isolateserver test
infra host.

Change-Id: I317a3a32c06c06306b97566f954e0ffd508ce01f
Reviewed-on: http://gerrit.cloudera.org:8080/21327
Reviewed-by: Ashwani Raina 
Tested-by: Kudu Jenkins
Reviewed-by: Alexey Serbin 
(cherry picked from commit 7b5fc9f2f0ad538242656431bf76e253cc537772)
Reviewed-on: http://gerrit.cloudera.org:8080/21466
Reviewed-by: Marton Greber 
Reviewed-by: Zoltan Chovan 
Tested-by: Attila Bukor 
---
 build-support/jenkins/build-and-test.sh | 9 ++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/build-support/jenkins/build-and-test.sh 
b/build-support/jenkins/build-and-test.sh
index c6f0ce774..93ed40a98 100755
--- a/build-support/jenkins/build-and-test.sh
+++ b/build-support/jenkins/build-and-test.sh
@@ -39,8 +39,8 @@
 #
 #   KUDU_FLAKY_TEST_ATTEMPTS  Default: 1
 # If more than 1, will fetch the list of known flaky tests
-# from the kudu-test jenkins job, and allow those tests to
-# be flaky in this build.
+# from the jenkins jobs matching the "%jenkins-%" pattern, and allow those
+# tests to be flaky in this build.
 #
 #   TEST_RESULT_SERVER  Default: none
 # The host:port pair of a server running test_result_server.py.
@@ -183,8 +183,11 @@ mkdir -p $BUILD_ROOT
 # Same for the Java tests, which aren't inside BUILD_ROOT
 rm -rf $SOURCE_ROOT/java/*/build
 
+# The build_pattern is %jenkins-% because we are interested in two types of 
runs:
+# 1. As of now build_and_test pipeline job which is triggered by the 
pre-commit pipeline job.
+# 2. Any other job which is used to run the flaky tests only.
 list_flaky_tests() {
-  local 
url="http://$TEST_RESULT_SERVER/list_failed_tests?num_days=3_pattern=%25kudu-test%25;
+  local 
url="http://$TEST_RESULT_SERVER/list_failed_tests?num_days=3_pattern=%25jenkins-%25;
   >&2 echo Fetching flaky test list from "$url" ...
   curl -s --show-error "$url"
   return $?



(kudu) branch branch-1.17.x updated (9bb5c3c8a -> 500d3d19f)

2024-05-29 Thread alexey
This is an automated email from the ASF dual-hosted git repository.

alexey pushed a change to branch branch-1.17.x
in repository https://gitbox.apache.org/repos/asf/kudu.git


from 9bb5c3c8a Fix row_project codegen params noalias overflow
 new 1c428a3d4 Update build pattern for fetching flaky tests list
 new 500d3d19f KUDU-3581: upgrade Netty to 4.1.110.Final

The 2 revisions listed above as "new" are entirely new to this
repository and will be described in separate emails.  The revisions
listed as "add" were already present in the repository and have only
been added to this reference.


Summary of changes:
 build-support/jenkins/build-and-test.sh | 9 ++---
 java/gradle/dependencies.gradle | 2 +-
 2 files changed, 7 insertions(+), 4 deletions(-)



(kudu) branch master updated: KUDU-3581: upgrade Netty to 4.1.110.Final

2024-05-28 Thread alexey
This is an automated email from the ASF dual-hosted git repository.

alexey pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/kudu.git


The following commit(s) were added to refs/heads/master by this push:
 new 8d5f82483 KUDU-3581: upgrade Netty to 4.1.110.Final
8d5f82483 is described below

commit 8d5f82483665fd6229d08fdfe94c87b07f80f986
Author: Alexey Serbin 
AuthorDate: Tue May 28 16:52:14 2024 -0700

KUDU-3581: upgrade Netty to 4.1.110.Final

Even if Kudu doesn't use anything from Netty at its server side and
is not affected by the HTTP/2 rapid reset issue, it makes sense to
upgrade the Netty package used by the Kudu Java client library
to include the fix for well-known CVE [1].  It would be enough to
upgrade up to 4.1.100.Final, but I took the liberty of upgrading
up to the latest available 4.1.110.Final version.

[1] https://www.cve.org/CVERecord?id=CVE-2023-44487

Change-Id: I6e2ad686374b06d7b8cb28a7a456c21977b95ea8
Reviewed-on: http://gerrit.cloudera.org:8080/21464
Tested-by: Alexey Serbin 
Reviewed-by: Yingchun Lai 
---
 java/gradle/dependencies.gradle | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/java/gradle/dependencies.gradle b/java/gradle/dependencies.gradle
index 2a9724a13..e205649b3 100755
--- a/java/gradle/dependencies.gradle
+++ b/java/gradle/dependencies.gradle
@@ -49,7 +49,7 @@ versions += [
 micrometer : "1.8.2",
 mockito: "4.2.0",
 murmur : "1.0.0",
-netty  : "4.1.94.Final",
+netty  : "4.1.110.Final",
 osdetector : "1.6.2",
 protobuf   : "3.21.12",
 ranger : "2.1.0",



(kudu) branch master updated: KUDU-2671: Update upstream docs

2024-05-28 Thread alexey
This is an automated email from the ASF dual-hosted git repository.

alexey pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/kudu.git


The following commit(s) were added to refs/heads/master by this push:
 new cf550d6d7 KUDU-2671: Update upstream docs
cf550d6d7 is described below

commit cf550d6d7cdd61f6c65f9ef75a1706cb91839876
Author: Mahesh Reddy 
AuthorDate: Tue Mar 5 15:20:33 2024 -0800

KUDU-2671: Update upstream docs

This patch updates the upstream docs to include range specific
hash schemas within the partitioning section. An example
with the proper sql syntax is also included in the kudu impala
integration doc.

Change-Id: I8da554851a124d1d357be65d8bcc2c6c37875dcc
Reviewed-on: http://gerrit.cloudera.org:8080/21108
Tested-by: Kudu Jenkins
Reviewed-by: Alexey Serbin 
---
 docs/kudu_impala_integration.adoc | 42 +++
 docs/schema_design.adoc   | 16 +++
 2 files changed, 58 insertions(+)

diff --git a/docs/kudu_impala_integration.adoc 
b/docs/kudu_impala_integration.adoc
index 0def0477c..de01c3d59 100755
--- a/docs/kudu_impala_integration.adoc
+++ b/docs/kudu_impala_integration.adoc
@@ -485,6 +485,48 @@ The example creates 16 partitions. You could also use 
`HASH (id, sku) PARTITIONS
 However, a scan for `sku` values would almost always impact all 16 partitions, 
rather
 than possibly being limited to 4.
 
+.Range-Specific Hash Schemas
+As of 1.17, Kudu supports range-specific hash schemas for tables. It's 
possible to
+add ranges with a hash schema independent of the table-wide hash schema. This 
can be
+done while creating or altering the table. The number of hash partition levels 
must
+be the same across all ranges in a table.
+
+[source, sql]
+
+CREATE TABLE cust_behavior (
+  id BIGINT,
+  sku STRING,
+  salary STRING,
+  edu_level INT,
+  usergender STRING,
+  `group` STRING,
+  city STRING,
+  postcode STRING,
+  last_purchase_price FLOAT,
+  last_purchase_date BIGINT,
+  category STRING,
+  rating INT,
+  fulfilled_date BIGINT,
+  PRIMARY KEY (id, sku)
+)
+PARTITION BY HASH (id) PARTITIONS 4
+RANGE (sku)
+(
+  PARTITION VALUES < 'g'
+  PARTITION 'g' <= VALUES < 'o'
+  HASH (id) PARTITIONS 6
+  PARTITION 'o' <= VALUES < 'u'
+  HASH (id) PARTITIONS 8
+  PARTITION 'u' <= VALUES
+)
+STORED AS KUDU;
+
+
+This example uses the range-specific hash schema feature for the middle two
+ranges. The table-wide hash schema has 4 buckets while the hash schemas
+for the middle two ranges have 6 and 8 buckets respectively. This can be done
+in cases where we expect a higher workload in such ranges.
+
 .Non-Covering Range Partitions
 Kudu 1.0 and higher supports the use of non-covering range partitions,
 which address scenarios like the following:
diff --git a/docs/schema_design.adoc b/docs/schema_design.adoc
index 95d4d251c..906682b86 100644
--- a/docs/schema_design.adoc
+++ b/docs/schema_design.adoc
@@ -435,6 +435,22 @@ NOTE: see the <> and the
 <> for further discussion of multilevel
 partitioning.
 
+[[flexible-partitioning]]
+=== Flexible Partitioning
+
+As of 1.17, Kudu supports range-specific hash schema for tables. It's now
+possible to add ranges with their own unique hash schema independent of the
+table-wide hash schema. This can be done while creating or altering the table.
+This feature helps mitigate potential hotspotting as more buckets can be
+added for a hash schema of a range that expects more workload.
+
+[[same-number-of-hash-levels]]
+[IMPORTANT]
+.Same Number of Hash Levels
+The number of hash partition levels must be the same across for all the ranges
+in a table. See <> for more details on hash partition
+levels.
+
 [[partition-pruning]]
 === Partition Pruning
 



(kudu) branch master updated: [metrics] Add tablet level metrics for scans op time

2024-05-23 Thread alexey
This is an automated email from the ASF dual-hosted git repository.

alexey pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/kudu.git


The following commit(s) were added to refs/heads/master by this push:
 new 6d6364d19 [metrics] Add tablet level metrics for scans op time
6d6364d19 is described below

commit 6d6364d19d287d8effb604b6ab11dfdff5db794e
Author: kedeng 
AuthorDate: Fri Apr 12 16:08:19 2024 +0800

[metrics] Add tablet level metrics for scans op time

We currently have monitoring in place for scan data volume and
slow scans, but we are still lacking monitoring data for scan
request timings.

In this patch, I have added monitoring for scan request timings at
the tablet level to assist us in pinpointing specific scenarios of
high CPU usage during scanning operations.

Change-Id: I8f490cfb6f37aef60b34697100fb502374fcc503
Reviewed-on: http://gerrit.cloudera.org:8080/21291
Tested-by: Kudu Jenkins
Reviewed-by: Alexey Serbin 
---
 src/kudu/tablet/tablet_metrics.cc  |  24 
 src/kudu/tablet/tablet_metrics.h   |   3 +
 src/kudu/tserver/scanners.cc   |  13 ++
 src/kudu/tserver/scanners.h|   3 +
 src/kudu/tserver/tablet_server-test.cc | 231 ++---
 src/kudu/util/stopwatch.h  |   8 ++
 6 files changed, 237 insertions(+), 45 deletions(-)

diff --git a/src/kudu/tablet/tablet_metrics.cc 
b/src/kudu/tablet/tablet_metrics.cc
index 5a20b2948..804aa48e7 100644
--- a/src/kudu/tablet/tablet_metrics.cc
+++ b/src/kudu/tablet/tablet_metrics.cc
@@ -153,6 +153,27 @@ METRIC_DEFINE_gauge_size(tablet, tablet_active_scanners, 
"Active Scanners",
  "Number of scanners that are currently active on this 
tablet",
  kudu::MetricLevel::kInfo);
 
+METRIC_DEFINE_histogram(tablet, scan_duration_wall_time,
+"Scan Requests Wall Time",
+kudu::MetricUnit::kMilliseconds,
+"Duration of scan requests, wall time.",
+kudu::MetricLevel::kDebug,
+6LU, 1);
+
+METRIC_DEFINE_histogram(tablet, scan_duration_system_time,
+"Scan Requests System Time",
+kudu::MetricUnit::kMilliseconds,
+"Duration of scan requests, system time.",
+kudu::MetricLevel::kDebug,
+6LU, 1);
+
+METRIC_DEFINE_histogram(tablet, scan_duration_user_time,
+"Scan Requests User Time",
+kudu::MetricUnit::kMilliseconds,
+"Duration of scan requests, user time.",
+kudu::MetricLevel::kDebug,
+6LU, 1);
+
 METRIC_DEFINE_counter(tablet, bloom_lookups, "Bloom Filter Lookups",
   kudu::MetricUnit::kProbes,
   "Number of times a bloom filter was consulted",
@@ -433,6 +454,9 @@ TabletMetrics::TabletMetrics(const 
scoped_refptr& entity)
 MINIT(scanner_predicates_disabled),
 MINIT(scans_started),
 GINIT(tablet_active_scanners),
+MINIT(scan_duration_wall_time),
+MINIT(scan_duration_system_time),
+MINIT(scan_duration_user_time),
 MINIT(bloom_lookups),
 MINIT(key_file_lookups),
 MINIT(delta_file_lookups),
diff --git a/src/kudu/tablet/tablet_metrics.h b/src/kudu/tablet/tablet_metrics.h
index 9c6b815ab..5bd9620ce 100644
--- a/src/kudu/tablet/tablet_metrics.h
+++ b/src/kudu/tablet/tablet_metrics.h
@@ -65,6 +65,9 @@ struct TabletMetrics {
   scoped_refptr scanner_predicates_disabled;
   scoped_refptr scans_started;
   scoped_refptr> tablet_active_scanners;
+  scoped_refptr scan_duration_wall_time;
+  scoped_refptr scan_duration_system_time;
+  scoped_refptr scan_duration_user_time;
 
   // Probe stats.
   scoped_refptr bloom_lookups;
diff --git a/src/kudu/tserver/scanners.cc b/src/kudu/tserver/scanners.cc
index 06703666d..9d3383e99 100644
--- a/src/kudu/tserver/scanners.cc
+++ b/src/kudu/tserver/scanners.cc
@@ -522,9 +522,22 @@ Scanner::~Scanner() {
   }
 }
 
+void Scanner::UpdateTabletMetrics(const CpuTimes& elapsed) {
+  if (tablet_replica_) {
+auto tablet = tablet_replica_->shared_tablet();
+if (tablet && tablet->metrics()) {
+  // Store scan request's timings.
+  
tablet->metrics()->scan_duration_wall_time->Increment(elapsed.wall_millis());
+  
tablet->metrics()->scan_duration_system_time->Increment(elapsed.system_cpu_millis());
+  
tablet->metrics()->scan_duration_user_time->Increment(elapsed.user_cpu_millis());
+}
+  }
+}
+
 void Scanner::AddTimings(const CpuTimes& elapsed) {
   std::lock_guard l(cpu_times_lock_);
   cpu_times_.Add(elapsed);
+  UpdateTabletMetrics(

(kudu) branch branch-1.17.x updated: Fix row_project codegen params noalias overflow

2024-05-22 Thread alexey
This is an automated email from the ASF dual-hosted git repository.

alexey pushed a commit to branch branch-1.17.x
in repository https://gitbox.apache.org/repos/asf/kudu.git


The following commit(s) were added to refs/heads/branch-1.17.x by this push:
 new 9bb5c3c8a Fix row_project codegen params noalias overflow
9bb5c3c8a is described below

commit 9bb5c3c8ae4e2892f9a67c5dc739dc6ffbc37d3e
Author: qhsong 
AuthorDate: Tue May 30 20:22:27 2023 +0800

Fix row_project codegen params noalias overflow

function->addParamAttr is 0-based indexes, current row_project generator IR 
code is:
`define i1 @ProjRead(i8* %src, %"class.kudu::RowBlockRow"* noalias 
%rbrow, %"class.kudu::Arena"* noalias %arena)`
not same as before.

Change-Id: I1dab7d46cff96ed1ebbd020584a066f04e6ca12a
Reviewed-on: http://gerrit.cloudera.org:8080/19952
Tested-by: Kudu Jenkins
Reviewed-by: Alexey Serbin 
(cherry picked from commit c485c8c3cf4b76d8a55e2ec98e27803341285f75)
Reviewed-on: http://gerrit.cloudera.org:8080/21448
Tested-by: Alexey Serbin 
---
 src/kudu/codegen/row_projector.cc | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/src/kudu/codegen/row_projector.cc 
b/src/kudu/codegen/row_projector.cc
index 90670df68..0949a1b70 100644
--- a/src/kudu/codegen/row_projector.cc
+++ b/src/kudu/codegen/row_projector.cc
@@ -118,10 +118,9 @@ llvm::Function* MakeProjection(const string& name,
 
   // Mark our arguments as not aliasing. This eliminates a redundant
   // load of rbrow->row_block_ and rbrow->row_index_ for each column.
-  // Note that these arguments are 1-based indexes.
+  f->addParamAttr(0, llvm::Attribute::NoAlias);
   f->addParamAttr(1, llvm::Attribute::NoAlias);
   f->addParamAttr(2, llvm::Attribute::NoAlias);
-  f->addParamAttr(3, llvm::Attribute::NoAlias);
 
   // Project row function in IR (note: values in angle brackets are
   // constants whose values are determined right now, at JIT time).



(kudu) branch branch-1.17.x updated: Fix deadlock on fail for CBTree-test

2024-05-22 Thread alexey
This is an automated email from the ASF dual-hosted git repository.

alexey pushed a commit to branch branch-1.17.x
in repository https://gitbox.apache.org/repos/asf/kudu.git


The following commit(s) were added to refs/heads/branch-1.17.x by this push:
 new 7e8876a10 Fix deadlock on fail for CBTree-test
7e8876a10 is described below

commit 7e8876a10d379d58f59f8a0edfe95026530d6190
Author: Zoltan Martonka 
AuthorDate: Wed May 22 13:48:06 2024 +

Fix deadlock on fail for CBTree-test

When TestConcurrentIterateAndInsert, TestConcurrentInsert,
TestRacyConcurrentInsert fail while --gtest_repeat is used, they
will keep running forever. Instead of just returning on fail,
they should properly stop the other threads running, and then exit.

To reproduce the problem, run this on ARM (where the test actually
fails):
./bin/cbtree-test --gtest_repeat=100 --gtest_filter=*Racy*

Change-Id: Ia10d05dfdc4a12cb034450f432693f054d138498
Reviewed-on: http://gerrit.cloudera.org:8080/21446
Tested-by: Alexey Serbin 
Reviewed-by: Alexey Serbin 
(cherry picked from commit d1bc5b53ef006ac14bca4417c4745c5875d89734)
Reviewed-on: http://gerrit.cloudera.org:8080/21451
Reviewed-by: Abhishek Chennaka 
---
 src/kudu/tablet/cbtree-test.cc | 7 ---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/src/kudu/tablet/cbtree-test.cc b/src/kudu/tablet/cbtree-test.cc
index 7a8c3a420..32cd7b75c 100644
--- a/src/kudu/tablet/cbtree-test.cc
+++ b/src/kudu/tablet/cbtree-test.cc
@@ -28,6 +28,7 @@
 #include 
 #include 
 
+#include "kudu/gutil/atomicops.h"
 #include "kudu/gutil/stringprintf.h"
 #include "kudu/gutil/strings/substitute.h"
 #include "kudu/tablet/concurrent_btree.h"
@@ -474,14 +475,14 @@ void TestCBTree::DoTestConcurrentInsert() {
 
 if (::testing::Test::HasFatalFailure()) {
   tree->DebugPrint();
-  return;
+  break;
 }
   }
 
   tree.reset(nullptr);
   go_barrier.Wait();
 
-  for (thread  : threads) {
+  for (thread& thr : threads) {
 thr.join();
   }
 }
@@ -739,7 +740,7 @@ TEST_F(TestCBTree, TestConcurrentIterateAndInsert) {
 
 if (::testing::Test::HasFatalFailure()) {
   tree->DebugPrint();
-  return;
+  break;
 }
   }
 



(kudu) branch master updated: Fix deadlock on fail for CBTree-test

2024-05-22 Thread alexey
This is an automated email from the ASF dual-hosted git repository.

alexey pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/kudu.git


The following commit(s) were added to refs/heads/master by this push:
 new d1bc5b53e Fix deadlock on fail for CBTree-test
d1bc5b53e is described below

commit d1bc5b53ef006ac14bca4417c4745c5875d89734
Author: Zoltan Martonka 
AuthorDate: Wed May 22 13:48:06 2024 +

Fix deadlock on fail for CBTree-test

When TestConcurrentIterateAndInsert, TestConcurrentInsert,
TestRacyConcurrentInsert fail while --gtest_repeat is used, they
will keep running forever. Instead of just returning on fail,
they should properly stop the other threads running, and then exit.

To reproduce the problem, run this on ARM (where the test actually
fails):
./bin/cbtree-test --gtest_repeat=100 --gtest_filter=*Racy*

Change-Id: Ia10d05dfdc4a12cb034450f432693f054d138498
Reviewed-on: http://gerrit.cloudera.org:8080/21446
Tested-by: Alexey Serbin 
Reviewed-by: Alexey Serbin 
---
 src/kudu/tablet/cbtree-test.cc | 7 ---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/src/kudu/tablet/cbtree-test.cc b/src/kudu/tablet/cbtree-test.cc
index 7a8c3a420..32cd7b75c 100644
--- a/src/kudu/tablet/cbtree-test.cc
+++ b/src/kudu/tablet/cbtree-test.cc
@@ -28,6 +28,7 @@
 #include 
 #include 
 
+#include "kudu/gutil/atomicops.h"
 #include "kudu/gutil/stringprintf.h"
 #include "kudu/gutil/strings/substitute.h"
 #include "kudu/tablet/concurrent_btree.h"
@@ -474,14 +475,14 @@ void TestCBTree::DoTestConcurrentInsert() {
 
 if (::testing::Test::HasFatalFailure()) {
   tree->DebugPrint();
-  return;
+  break;
 }
   }
 
   tree.reset(nullptr);
   go_barrier.Wait();
 
-  for (thread  : threads) {
+  for (thread& thr : threads) {
 thr.join();
   }
 }
@@ -739,7 +740,7 @@ TEST_F(TestCBTree, TestConcurrentIterateAndInsert) {
 
 if (::testing::Test::HasFatalFailure()) {
   tree->DebugPrint();
-  return;
+  break;
 }
   }
 



(kudu) branch master updated: Fix row_project codegen params noalias overflow

2024-05-21 Thread alexey
This is an automated email from the ASF dual-hosted git repository.

alexey pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/kudu.git


The following commit(s) were added to refs/heads/master by this push:
 new c485c8c3c Fix row_project codegen params noalias overflow
c485c8c3c is described below

commit c485c8c3cf4b76d8a55e2ec98e27803341285f75
Author: qhsong 
AuthorDate: Tue May 30 20:22:27 2023 +0800

Fix row_project codegen params noalias overflow

function->addParamAttr is 0-based indexes, current row_project generator IR 
code is:
`define i1 @ProjRead(i8* %src, %"class.kudu::RowBlockRow"* noalias %rbrow, 
%"class.kudu::Arena"* noalias %arena)`
not same with before.

Change-Id: I1dab7d46cff96ed1ebbd020584a066f04e6ca12a
Reviewed-on: http://gerrit.cloudera.org:8080/19952
Tested-by: Kudu Jenkins
Reviewed-by: Alexey Serbin 
---
 src/kudu/codegen/row_projector.cc | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/src/kudu/codegen/row_projector.cc 
b/src/kudu/codegen/row_projector.cc
index 90670df68..0949a1b70 100644
--- a/src/kudu/codegen/row_projector.cc
+++ b/src/kudu/codegen/row_projector.cc
@@ -118,10 +118,9 @@ llvm::Function* MakeProjection(const string& name,
 
   // Mark our arguments as not aliasing. This eliminates a redundant
   // load of rbrow->row_block_ and rbrow->row_index_ for each column.
-  // Note that these arguments are 1-based indexes.
+  f->addParamAttr(0, llvm::Attribute::NoAlias);
   f->addParamAttr(1, llvm::Attribute::NoAlias);
   f->addParamAttr(2, llvm::Attribute::NoAlias);
-  f->addParamAttr(3, llvm::Attribute::NoAlias);
 
   // Project row function in IR (note: values in angle brackets are
   // constants whose values are determined right now, at JIT time).



(kudu) branch master updated: [security-flags-itest] Fix missing command line flags

2024-05-15 Thread alexey
This is an automated email from the ASF dual-hosted git repository.

alexey pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/kudu.git


The following commit(s) were added to refs/heads/master by this push:
 new 46c4cb1c2 [security-flags-itest] Fix missing command line flags
46c4cb1c2 is described below

commit 46c4cb1c2906a77f700509d02813a15dbf919927
Author: Ádám Bakai 
AuthorDate: Mon May 6 11:41:53 2024 +0200

[security-flags-itest] Fix missing command line flags

It is a known phenomenon, that static libraries won't be included into
an executable if there are no usage for any function or variable[1].
This means the initialization routines in the library won't be executed,
even if these initialization routines have side effects, such as
registering the variable in the gflags ecosystem. As a result, the
CheckRpcAuthnFlagsGroupValidator test failed because
"rpc_authentication" flag was not registered properly. To solve this
issue, a command line variable check is added, so now the library will
be used in the executable and the initialization routines will be
executed.

[1] 
https://stackoverflow.com/questions/1229430/how-do-i-prevent-my-unused-global-variables-being-compiled-out-of-my-static-li

Change-Id: Iec751e8761562612d97b886740c9b20cd134a0bc
Reviewed-on: http://gerrit.cloudera.org:8080/21399
Tested-by: Kudu Jenkins
Reviewed-by: Alexey Serbin 
---
 src/kudu/integration-tests/security-flags-itest.cc | 12 
 1 file changed, 12 insertions(+)

diff --git a/src/kudu/integration-tests/security-flags-itest.cc 
b/src/kudu/integration-tests/security-flags-itest.cc
index 81476215e..568b41334 100644
--- a/src/kudu/integration-tests/security-flags-itest.cc
+++ b/src/kudu/integration-tests/security-flags-itest.cc
@@ -28,6 +28,8 @@
 #include "kudu/util/test_macros.h"
 #include "kudu/util/test_util.h"
 
+DECLARE_string(rpc_authentication);
+
 using gflags::SetCommandLineOption;
 using kudu::cluster::ExternalMiniCluster;
 using kudu::cluster::ExternalMiniClusterOptions;
@@ -43,7 +45,17 @@ TEST_F(SecurityFlagsTest, CheckRpcAuthnFlagsGroupValidator) {
   // set them to the required values instead to verify the functionality
   // of the corresponding group flag validator.
   ASSERT_NE("", SetCommandLineOption("unlock_experimental_flags", "true"));
+
   ASSERT_NE("", SetCommandLineOption("rpc_authentication", "required"));
+  // This check has two purposes. The first purpose is that it verifies that 
the
+  // flag is set up correctly. The second purpose is that linker can omit whole
+  // library files when no function or variable is used in them. This can 
happen
+  // even if the variable's constructor has some side effects. This happenned
+  // with the command line arguments in release build in some cases. As a
+  // solution, FLAGS_rpc_authentication is used and as a consequence, all the
+  // global variable constructors are called.
+  ASSERT_EQ("required", FLAGS_rpc_authentication);
+
   ASSERT_NE("", SetCommandLineOption("keytab_file", ""));
   ASSERT_NE("", SetCommandLineOption("rpc_certificate_file", ""));
   ASSERT_DEATH({ ValidateFlags(); },



(kudu) 02/02: [common] get rid of MutexLock

2024-05-14 Thread alexey
This is an automated email from the ASF dual-hosted git repository.

alexey pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/kudu.git

commit eb0a41fdb0642baa61a58f925bb82157330abe7a
Author: Alexey Serbin 
AuthorDate: Wed May 8 17:43:57 2024 -0700

[common] get rid of MutexLock

Since contemporary STL library provides both std::lock_guard and
std::unique_lock, there is no need to keep MutexLock.

Change-Id: I49e0ef2c688ef8be74d018bb9bffe70b6655e654
Reviewed-on: http://gerrit.cloudera.org:8080/21415
Tested-by: Alexey Serbin 
Reviewed-by: Abhishek Chennaka 
---
 src/kudu/clock/builtin_ntp.cc  |  8 +--
 src/kudu/master/auto_leader_rebalancer.cc  |  2 +-
 src/kudu/master/catalog_manager.cc |  4 +-
 src/kudu/rpc/service_pool.cc   |  8 ++-
 src/kudu/rpc/service_pool.h|  2 +-
 src/kudu/rpc/service_queue.h   |  4 +-
 src/kudu/server/diagnostics_log.cc | 14 ++---
 src/kudu/tablet/tablet_metadata.cc |  8 +--
 src/kudu/tablet/tablet_metadata.h  |  2 +-
 src/kudu/tools/table_scanner.cc|  4 +-
 src/kudu/tserver/heartbeater.cc|  6 +-
 src/kudu/tserver/scanners.cc   |  4 +-
 src/kudu/tserver/scanners.h|  2 +-
 src/kudu/tserver/tablet_copy_service.cc| 16 +++---
 src/kudu/tserver/tablet_copy_service.h |  2 +-
 src/kudu/util/async_logger.cc  | 13 +++--
 src/kudu/util/async_logger.h   |  6 +-
 src/kudu/util/barrier.h|  4 +-
 src/kudu/util/blocking_queue-test.cc   |  6 +-
 src/kudu/util/blocking_queue.h | 25 
 src/kudu/util/cloud/instance_detector.cc   |  6 +-
 src/kudu/util/countdown_latch.h| 17 +++---
 src/kudu/util/debug/trace_event_impl.cc|  6 +-
 src/kudu/util/debug/trace_event_synthetic_delay.cc | 16 +++---
 src/kudu/util/kernel_stack_watchdog.cc |  2 +-
 src/kudu/util/mem_tracker.cc   | 12 ++--
 src/kudu/util/mem_tracker.h|  1 +
 src/kudu/util/mutex.h  | 67 +-
 src/kudu/util/pstack_watcher.cc|  8 +--
 src/kudu/util/rwc_lock.cc  | 14 +++--
 src/kudu/util/test_graph.cc|  5 +-
 src/kudu/util/threadpool.cc| 41 ++---
 src/kudu/util/threadpool.h |  8 +--
 33 files changed, 146 insertions(+), 197 deletions(-)

diff --git a/src/kudu/clock/builtin_ntp.cc b/src/kudu/clock/builtin_ntp.cc
index 29b64f541..5a28980c9 100644
--- a/src/kudu/clock/builtin_ntp.cc
+++ b/src/kudu/clock/builtin_ntp.cc
@@ -548,7 +548,7 @@ BuiltInNtp::~BuiltInNtp() {
 }
 
 Status BuiltInNtp::Init() {
-  MutexLock l(state_lock_);
+  std::lock_guard l(state_lock_);
   CHECK_EQ(kUninitialized, state_);
 
   RETURN_NOT_OK(InitImpl());
@@ -669,13 +669,13 @@ Status BuiltInNtp::PopulateServers(std::vector 
servers) {
 }
 
 bool BuiltInNtp::is_shutdown() const {
-  MutexLock l(state_lock_);
+  std::lock_guard l(state_lock_);
   return state_ == kShutdown;
 }
 
 void BuiltInNtp::Shutdown() {
   {
-MutexLock l(state_lock_);
+std::lock_guard l(state_lock_);
 if (state_ == kShutdown) {
   return;
 }
@@ -1116,7 +1116,7 @@ Status BuiltInNtp::CombineClocks() {
 
   // We got a valid clock result, so wake up Init() that we are ready to be 
used.
   {
-MutexLock l(state_lock_);
+std::lock_guard l(state_lock_);
 if (state_ == kStarting) {
   state_ = kStarted;
 }
diff --git a/src/kudu/master/auto_leader_rebalancer.cc 
b/src/kudu/master/auto_leader_rebalancer.cc
index fdf058623..b6bd1268d 100644
--- a/src/kudu/master/auto_leader_rebalancer.cc
+++ b/src/kudu/master/auto_leader_rebalancer.cc
@@ -387,7 +387,7 @@ Status AutoLeaderRebalancerTask::RunLeaderRebalanceForTable(
 }
 
 Status AutoLeaderRebalancerTask::RunLeaderRebalancer() {
-  MutexLock auto_lock(running_mutex_);
+  std::lock_guard guard(running_mutex_);
 
   // If catalog manager isn't initialized or isn't the leader, don't do leader
   // rebalancing. Putting the auto-rebalancer to sleep shouldn't affect the
diff --git a/src/kudu/master/catalog_manager.cc 
b/src/kudu/master/catalog_manager.cc
index 42aeee833..98af0f0b3 100644
--- a/src/kudu/master/catalog_manager.cc
+++ b/src/kudu/master/catalog_manager.cc
@@ -742,13 +742,13 @@ class CatalogManagerBgTasks {
   void Shutdown();
 
   void Wake() {
-MutexLock lock(lock_);
+std::lock_guard lock(lock_);
 pending_updates_ = true;
 cond_.Broadcast();
   }
 
   void Wait(int msec) {
-MutexLock lock(lock_);
+std::lock_guard lock(lock_);
 if (closing_) return;
 if (!pending_updates_) {
   cond_.WaitFor

(kudu) branch master updated (feaf1daf5 -> eb0a41fdb)

2024-05-14 Thread alexey
This is an automated email from the ASF dual-hosted git repository.

alexey pushed a change to branch master
in repository https://gitbox.apache.org/repos/asf/kudu.git


from feaf1daf5 [tablet] fix race accessing OpState's start time
 new a3ed48d93 [common] switch from unique_lock to lock_guard
 new eb0a41fdb [common] get rid of MutexLock

The 2 revisions listed above as "new" are entirely new to this
repository and will be described in separate emails.  The revisions
listed as "add" were already present in the repository and have only
been added to this reference.


Summary of changes:
 src/kudu/clock/builtin_ntp.cc  |  8 +--
 src/kudu/clock/hybrid_clock.cc |  2 +-
 src/kudu/consensus/consensus_peers.cc  | 12 ++--
 src/kudu/consensus/consensus_queue.cc  |  8 +--
 src/kudu/consensus/log_cache.cc|  2 +-
 .../tombstoned_voting-stress-test.cc   |  4 +-
 src/kudu/master/auto_leader_rebalancer.cc  |  2 +-
 src/kudu/master/catalog_manager.cc |  4 +-
 src/kudu/rpc/rpcz_store.cc |  2 +-
 src/kudu/rpc/service_pool.cc   |  8 ++-
 src/kudu/rpc/service_pool.h|  2 +-
 src/kudu/rpc/service_queue.h   |  4 +-
 src/kudu/security/tls_context.cc   |  9 ++-
 src/kudu/server/diagnostics_log.cc | 14 ++---
 src/kudu/server/webserver.cc   |  2 +-
 src/kudu/tablet/lock_manager-test.cc   |  5 +-
 src/kudu/tablet/lock_manager.cc| 11 ++--
 src/kudu/tablet/tablet.cc  |  4 +-
 src/kudu/tablet/tablet_metadata.cc |  8 +--
 src/kudu/tablet/tablet_metadata.h  |  2 +-
 src/kudu/tools/table_scanner.cc|  4 +-
 src/kudu/tserver/heartbeater.cc|  6 +-
 src/kudu/tserver/scanners.cc   |  6 +-
 src/kudu/tserver/scanners.h|  2 +-
 src/kudu/tserver/tablet_copy_service.cc| 16 +++---
 src/kudu/tserver/tablet_copy_service.h |  2 +-
 src/kudu/util/async_logger.cc  | 13 +++--
 src/kudu/util/async_logger.h   |  6 +-
 src/kudu/util/barrier.h|  4 +-
 src/kudu/util/blocking_queue-test.cc   |  6 +-
 src/kudu/util/blocking_queue.h | 25 
 src/kudu/util/cloud/instance_detector.cc   |  6 +-
 src/kudu/util/countdown_latch.h| 17 +++---
 src/kudu/util/debug/trace_event_impl.cc|  6 +-
 src/kudu/util/debug/trace_event_synthetic_delay.cc | 16 +++---
 src/kudu/util/kernel_stack_watchdog.cc |  2 +-
 src/kudu/util/maintenance_manager.cc   |  2 +-
 src/kudu/util/mem_tracker.cc   | 12 ++--
 src/kudu/util/mem_tracker.h|  1 +
 src/kudu/util/mutex.h  | 67 +-
 src/kudu/util/pstack_watcher.cc|  8 +--
 src/kudu/util/rwc_lock.cc  | 14 +++--
 src/kudu/util/test_graph.cc|  5 +-
 src/kudu/util/threadpool.cc| 41 ++---
 src/kudu/util/threadpool.h |  8 +--
 src/kudu/util/trace_metrics.h  |  2 +-
 46 files changed, 182 insertions(+), 228 deletions(-)



(kudu) 01/02: [common] switch from unique_lock to lock_guard

2024-05-14 Thread alexey
This is an automated email from the ASF dual-hosted git repository.

alexey pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/kudu.git

commit a3ed48d932d203e9310cca396e5530d6ed6eaa87
Author: Alexey Serbin 
AuthorDate: Wed May 8 15:51:27 2024 -0700

[common] switch from unique_lock to lock_guard

To simplify working with STL synchronization primitives, this patch
changes from std::unique_lock to std::lock_guard where appropriate.
In addition, IWYU and ClangTidy's feedback has been addressed.

Change-Id: I72d86ca730113ec0652154d5ce509fc2e479befb
Reviewed-on: http://gerrit.cloudera.org:8080/21414
Tested-by: Alexey Serbin 
Reviewed-by: Zoltan Chovan 
Reviewed-by: Abhishek Chennaka 
---
 src/kudu/clock/hybrid_clock.cc  |  2 +-
 src/kudu/consensus/consensus_peers.cc   | 12 +++-
 src/kudu/consensus/consensus_queue.cc   |  8 
 src/kudu/consensus/log_cache.cc |  2 +-
 src/kudu/integration-tests/tombstoned_voting-stress-test.cc |  4 ++--
 src/kudu/rpc/rpcz_store.cc  |  2 +-
 src/kudu/security/tls_context.cc|  9 -
 src/kudu/server/webserver.cc|  2 +-
 src/kudu/tablet/lock_manager-test.cc|  5 +++--
 src/kudu/tablet/lock_manager.cc | 11 ++-
 src/kudu/tablet/tablet.cc   |  4 +++-
 src/kudu/tserver/scanners.cc|  2 +-
 src/kudu/util/maintenance_manager.cc|  2 +-
 src/kudu/util/trace_metrics.h   |  2 +-
 14 files changed, 36 insertions(+), 31 deletions(-)

diff --git a/src/kudu/clock/hybrid_clock.cc b/src/kudu/clock/hybrid_clock.cc
index f9c0a7816..1f2eae483 100644
--- a/src/kudu/clock/hybrid_clock.cc
+++ b/src/kudu/clock/hybrid_clock.cc
@@ -709,7 +709,7 @@ Status HybridClock::WalltimeWithError(uint64_t* now_usec, 
uint64_t* error_usec)
 MonoTime read_time_max_likelihood = read_time_before +
 MonoDelta::FromMicroseconds(read_time_error_us);
 
-std::unique_lock l(last_clock_read_lock_);
+std::lock_guard l(last_clock_read_lock_);
 if (is_extrapolating_) {
   is_extrapolating_ = false;
   extrapolating_->set_value(is_extrapolating_);
diff --git a/src/kudu/consensus/consensus_peers.cc 
b/src/kudu/consensus/consensus_peers.cc
index 27dd2a1c6..492d417b9 100644
--- a/src/kudu/consensus/consensus_peers.cc
+++ b/src/kudu/consensus/consensus_peers.cc
@@ -24,6 +24,7 @@
 #include 
 #include 
 #include 
+#include 
 #include 
 
 #include 
@@ -334,7 +335,7 @@ void Peer::StartElection() {
 
 void Peer::ProcessResponse() {
   // Note: This method runs on the reactor thread.
-  std::unique_lock lock(peer_lock_);
+  std::lock_guard lock(peer_lock_);
   if (PREDICT_FALSE(closed_)) {
 return;
   }
@@ -416,7 +417,7 @@ void Peer::DoProcessResponse() {
   queue_->ResponseFromPeer(peer_pb_.permanent_uuid(), response_);
 
   {
-std::unique_lock lock(peer_lock_);
+std::lock_guard lock(peer_lock_);
 CHECK(request_pending_);
 failed_attempts_ = 0;
 request_pending_ = false;
@@ -457,12 +458,13 @@ void Peer::ProcessTabletCopyResponse() {
 queue_->UpdatePeerStatus(peer_pb_.permanent_uuid(), PeerStatus::OK, 
Status::OK());
   } else if (!tc_response_.has_error() ||
   tc_response_.error().code() != 
TabletServerErrorPB::TabletServerErrorPB::THROTTLED) {
+const auto& response_str = controller_status.ok()
+? SecureShortDebugString(tc_response_) : controller_status.ToString();
+lock.unlock();
 // THROTTLED is a common response after a tserver with many replicas fails;
 // logging it would generate a great deal of log spam.
 LOG_WITH_PREFIX_UNLOCKED(WARNING) << "Unable to start Tablet Copy on peer: 
"
-  << (controller_status.ok() ?
-  SecureShortDebugString(tc_response_) 
:
-  controller_status.ToString());
+  << response_str;
   }
 }
 
diff --git a/src/kudu/consensus/consensus_queue.cc 
b/src/kudu/consensus/consensus_queue.cc
index 86a7a3eda..dd0f5fe53 100644
--- a/src/kudu/consensus/consensus_queue.cc
+++ b/src/kudu/consensus/consensus_queue.cc
@@ -459,7 +459,7 @@ void PeerMessageQueue::TruncateOpsAfter(int64_t index) {
   LogPrefixUnlocked(),
   index));
   {
-std::unique_lock lock(queue_lock_);
+std::lock_guard lock(queue_lock_);
 DCHECK(op.IsInitialized());
 queue_state_.last_appended = op;
   }
@@ -467,13 +467,13 @@ void PeerMessageQueue::TruncateOpsAfter(int64_t index) {
 }
 
 OpId PeerMessageQueue::GetLastOpIdInLog() const {
-  s

(kudu) branch master updated: KUDU-3568 Fix compaction budgeting test by setting memory hard limit

2024-05-13 Thread alexey
This is an automated email from the ASF dual-hosted git repository.

alexey pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/kudu.git


The following commit(s) were added to refs/heads/master by this push:
 new b607633fd KUDU-3568 Fix compaction budgeting test by setting memory 
hard limit
b607633fd is described below

commit b607633fd3c2b676fbd2cbe57c44bddf818dc457
Author: Ashwani Raina 
AuthorDate: Thu May 9 22:23:14 2024 +0530

KUDU-3568 Fix compaction budgeting test by setting memory hard limit

TestRowSetCompactionSkipWithBudgetingConstraints can fail if the
memory on node running the test is high. It happens because the test
generates deltas of size worth a few MBs that is multiplied with a
preset factor to ensure the result (i.e. memory required for rowset
compaction completion) is of high value of the order of 200 GB per
rowset.

Even though nodes running the test generally don't have so much
physical memory, it is still possible to end up with high memory nodes.
On such nodes, the test might fail.

The patch fixes that problem by deterministically ensuring that
compaction memory requirement is always higher than the memory hard
limit. It does that by doing the following:
1. Move out the budgeting compaction tests out in a separate binary.
2. This gives flexibility to set the memory hard limit as per test
   needs. It is important to node that once a memory hard limit is
   set, it remains the same for all tests executed through
   binary lifecycle.
3. Set the hard memory limit to 1 GB which is enough to handle compaction
   requirements for TestRowSetCompactionProceedWithNoBudgetingConstraints.
   For TestRowSetCompactionSkipWithBudgetingConstraints, it is not
   enough because we set the delta memory factor high to exceed 1 GB.
   Both the test are now expected to succeed deterministically.

Change-Id: I85d104e1d066507ce8e72a00cc5165cc4b85e48d
Reviewed-on: http://gerrit.cloudera.org:8080/21416
Tested-by: Alexey Serbin 
Reviewed-by: Alexey Serbin 
---
 src/kudu/tablet/CMakeLists.txt |   1 +
 src/kudu/tablet/compaction-highmem-test.cc | 220 +
 src/kudu/tablet/compaction-test.cc | 143 ---
 3 files changed, 221 insertions(+), 143 deletions(-)

diff --git a/src/kudu/tablet/CMakeLists.txt b/src/kudu/tablet/CMakeLists.txt
index c48089ed8..71af1dab0 100644
--- a/src/kudu/tablet/CMakeLists.txt
+++ b/src/kudu/tablet/CMakeLists.txt
@@ -105,6 +105,7 @@ SET_KUDU_TEST_LINK_LIBS(tablet tablet_test_util)
 ADD_KUDU_TEST(all_types-scan-correctness-test NUM_SHARDS 8 PROCESSORS 2)
 ADD_KUDU_TEST(cfile_set-test)
 ADD_KUDU_TEST(compaction-test)
+ADD_KUDU_TEST(compaction-highmem-test)
 ADD_KUDU_TEST(compaction_policy-test DATA_FILES ycsb-test-rowsets.tsv)
 ADD_KUDU_TEST(composite-pushdown-test)
 ADD_KUDU_TEST(delta_compaction-test)
diff --git a/src/kudu/tablet/compaction-highmem-test.cc 
b/src/kudu/tablet/compaction-highmem-test.cc
new file mode 100644
index 0..43dbcb63d
--- /dev/null
+++ b/src/kudu/tablet/compaction-highmem-test.cc
@@ -0,0 +1,220 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+
+#include 
+#include 
+#include 
+
+#include "kudu/common/common.pb.h"
+#include "kudu/common/partial_row.h"
+#include "kudu/common/row_operations.pb.h"
+#include "kudu/common/schema.h"
+#include "kudu/gutil/ref_counted.h"
+#include "kudu/gutil/strings/join.h"
+#include "kudu/gutil/strings/substitute.h"
+#include "kudu/tablet/local_tablet_writer.h"
+#include "kudu/tablet/tablet-test-util.h"
+#include "kudu/tablet/tablet.h"
+#include "kudu/util/logging_test_util.h"
+#include "kudu/util/status.h"
+#include "kudu/util/stopwatch.h"
+#include "kudu/util/test_macros.h"
+#include "kudu/util/test_util.h"
+#include "kudu/util/trace.h"
+
+DECLARE_bool(rowset_compaction_enforce_preset_factor

(kudu) branch master updated: [master] fix race in auto leader rebalancing

2024-05-10 Thread alexey
This is an automated email from the ASF dual-hosted git repository.

alexey pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/kudu.git


The following commit(s) were added to refs/heads/master by this push:
 new 5fccfbc2b [master] fix race in auto leader rebalancing
5fccfbc2b is described below

commit 5fccfbc2bda0d017d283cbefcb3cb8e4b026c8e9
Author: Alexey Serbin 
AuthorDate: Thu May 9 12:32:22 2024 -0700

[master] fix race in auto leader rebalancing

It turned out that auto leader rebalancing task wasn't explicitly
shutdown upon shutting down catalog manager.  That lead to race
conditions as reported by TSAN, at least in test scenarios (see below).
This patch addresses the issue.

  WARNING: ThreadSanitizer: data race (pid=23827)
Write of size 1 at 0x7b408208 by main thread:
  #0 AnnotateRWLockDestroy 
thirdparty/src/llvm-11.0.0.src/projects/compiler-rt/lib/tsan/rtl/tsan_interface_ann.cpp:264
 (auto_rebalancer-test+0x33575e)
  #1 kudu::rw_spinlock::~rw_spinlock() src/kudu/util/locks.h:89:5 
(libmaster.so+0x359376)
  #2 kudu::master::TSManager::~TSManager() 
src/kudu/master/ts_manager.cc:108:1 (libmaster.so+0x4ad201)
  #3 kudu::master::TSManager::~TSManager() 
src/kudu/master/ts_manager.cc:107:25 (libmaster.so+0x4ad229)
  #4 
std::__1::default_delete::operator()(kudu::master::TSManager*)
 const thirdparty/installed/tsan/include/c++/v1/memory:2262:5 
(libmaster.so+0x407ce7)
  #5 std::__1::unique_ptr 
>::reset(kudu::master::TSManager*) 
thirdparty/installed/tsan/include/c++/v1/memory:2517:7 (libmaster.so+0x40157d)
  #6 std::__1::unique_ptr >::~unique_ptr() 
thirdparty/installed/tsan/include/c++/v1/memory:2471:19 (libmaster.so+0x4015eb)
  #7 kudu::master::Master::~Master() src/kudu/master/master.cc:263:1 
(libmaster.so+0x3f7a4a)
  #8 kudu::master::Master::~Master() src/kudu/master/master.cc:261:19 
(libmaster.so+0x3f7dc9)
  #9 
std::__1::default_delete::operator()(kudu::master::Master*)
 const thirdparty/installed/tsan/include/c++/v1/memory:2262:5 
(libmaster.so+0x435627)
  #10 std::__1::unique_ptr >::reset(kudu::master::Master*) 
thirdparty/installed/tsan/include/c++/v1/memory:2517:7 (libmaster.so+0x42e6ed)
  #11 kudu::master::MiniMaster::Shutdown() 
src/kudu/master/mini_master.cc:120:13 (libmaster.so+0x4c2612)
...
Previous atomic write of size 4 at 0x7b408208 by thread T439 
(mutexes: write M1141235379631443968):
  #0 __tsan_atomic32_compare_exchange_strong 
thirdparty/src/llvm-11.0.0.src/projects/compiler-rt/lib/tsan/rtl/tsan_interface_atomic.cpp:780
 (auto_rebalancer-test+0x33eb60)
  #1 base::subtle::Release_CompareAndSwap(int volatile*, int, int) 
/src/kudu/gutil/atomicops-internals-tsan.h:88:3 (libmaster.so+0x2e2b34)
  #2 kudu::rw_semaphore::unlock_shared() 
src/kudu/util/rw_semaphore.h:91:19 (libmaster.so+0x2e29c8)
  #3 kudu::rw_spinlock::unlock_shared() src/kudu/util/locks.h:99:10 
(libmaster.so+0x2e28ef)
  #4 std::__1::shared_lock::~shared_lock() 
/thirdparty/installed/tsan/include/c++/v1/shared_mutex:369:19 
(libmaster.so+0x2e23e0)
  #5 
kudu::master::TSManager::GetAllDescriptors(std::__1::vector,
 std::__1::allocator > >*) 
const src/kudu/master/ts_manager.cc:206:1 (libmaster.so+0x4adeb6)
  #6 kudu::master::AutoLeaderRebalancerTask::RunLeaderRebalancer() 
src/kudu/master/auto_leader_rebalancer.cc:405:16 (libmaster.so+0x2fb51b)
  #7 kudu::master::AutoLeaderRebalancerTask::RunLoop() 
src/kudu/master/auto_leader_rebalancer.cc:445:7 (libmaster.so+0x2fbaa9)

This is a follow-up to 10efaf2c77dfe5e4474505e0267c583c011703be.

Change-Id: Iccd66d00280d22b37386230874937e5260f07f3b
Reviewed-on: http://gerrit.cloudera.org:8080/21417
Reviewed-by: Wang Xixu <1450306...@qq.com>
Tested-by: Alexey Serbin 
Reviewed-by: Yifan Zhang 
---
 src/kudu/master/auto_leader_rebalancer.cc | 6 +-
 src/kudu/master/catalog_manager.cc| 4 
 2 files changed, 9 insertions(+), 1 deletion(-)

diff --git a/src/kudu/master/auto_leader_rebalancer.cc 
b/src/kudu/master/auto_leader_rebalancer.cc
index 629afb77d..fdf058623 100644
--- a/src/kudu/master/auto_leader_rebalancer.cc
+++ b/src/kudu/master/auto_leader_rebalancer.cc
@@ -101,7 +101,11 @@ 
AutoLeaderRebalancerTask::AutoLeaderRebalancerTask(CatalogManager* catalog_manag
   number_of_loop_iterations_for_test_(0),
   moves_scheduled_this_round_for_test_(0) {}
 
-AutoLeaderRebalancerTask::~AutoLeaderRebalancerTask() { Shutdown(); }
+AutoLeaderRebalancerTask::~AutoLeaderRebalancerTask() {
+  if (thread_) {
+Shutdown();
+  }
+}
 
 Status AutoLeaderRebalancerTask::Init() {
   DCHECK(!thread_) << "AutoleaderRebalancerTask is already initialized";
diff --git a/src/kudu/master/catalog_manager.cc 
b/src/kudu/master/catalog_manager.cc
index 

(kudu) branch master updated: KUDU-613: Cleanup of cache code

2024-05-08 Thread alexey
This is an automated email from the ASF dual-hosted git repository.

alexey pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/kudu.git


The following commit(s) were added to refs/heads/master by this push:
 new 358f0f172 KUDU-613: Cleanup of cache code
358f0f172 is described below

commit 358f0f172609e1a0ea359eb4e8118d0584926b3d
Author: Mahesh Reddy 
AuthorDate: Tue Feb 6 15:04:38 2024 -0500

KUDU-613: Cleanup of cache code

This patch moves some classes out of the
anonymous namespace and into the headers
of the cache and nvm_cache files. These
classes will be used by the new SLRU cache.
This path also templatizes the HandleTable class
to be used by both the cache and nvm_cache files.

Change-Id: I506d4577c0ae873b01d7fa4f53846d6fd0f664cf
Reviewed-on: http://gerrit.cloudera.org:8080/21018
Tested-by: Kudu Jenkins
Reviewed-by: Alexey Serbin 
---
 src/kudu/util/cache.cc  | 147 +++-
 src/kudu/util/cache.h   | 151 +
 src/kudu/util/file_cache.cc |   1 +
 src/kudu/util/nvm_cache.cc  | 162 +++-
 src/kudu/util/nvm_cache.h   |  32 +
 5 files changed, 206 insertions(+), 287 deletions(-)

diff --git a/src/kudu/util/cache.cc b/src/kudu/util/cache.cc
index d141caca7..c0156fe12 100644
--- a/src/kudu/util/cache.cc
+++ b/src/kudu/util/cache.cc
@@ -45,6 +45,7 @@ DEFINE_double(cache_memtracker_approximation_ratio, 0.01,
   "this ratio to improve performance. For tests.");
 TAG_FLAG(cache_memtracker_approximation_ratio, hidden);
 
+using RLHandle = kudu::Cache::RLHandle;
 using std::atomic;
 using std::shared_ptr;
 using std::string;
@@ -68,130 +69,6 @@ const Cache::IterationFunc 
Cache::kIterateOverAllEntriesFunc = [](
 
 namespace {
 
-// Recency list cache implementations (FIFO, LRU, etc.)
-
-// Recency list handle. An entry is a variable length heap-allocated structure.
-// Entries are kept in a circular doubly linked list ordered by some recency
-// criterion (e.g., access time for LRU policy, insertion time for FIFO 
policy).
-struct RLHandle {
-  Cache::EvictionCallback* eviction_callback;
-  RLHandle* next_hash;
-  RLHandle* next;
-  RLHandle* prev;
-  size_t charge;  // TODO(opt): Only allow uint32_t?
-  uint32_t key_length;
-  uint32_t val_length;
-  std::atomic refs;
-  uint32_t hash;  // Hash of key(); used for fast sharding and comparisons
-
-  // The storage for the key/value pair itself. The data is stored as:
-  //   [key bytes ...] [padding up to 8-byte boundary] [value bytes ...]
-  uint8_t kv_data[1];   // Beginning of key/value pair
-
-  Slice key() const {
-return Slice(kv_data, key_length);
-  }
-
-  uint8_t* mutable_val_ptr() {
-int val_offset = KUDU_ALIGN_UP(key_length, sizeof(void*));
-return _data[val_offset];
-  }
-
-  const uint8_t* val_ptr() const {
-return const_cast(this)->mutable_val_ptr();
-  }
-
-  Slice value() const {
-return Slice(val_ptr(), val_length);
-  }
-};
-
-// We provide our own simple hash table since it removes a whole bunch
-// of porting hacks and is also faster than some of the built-in hash
-// table implementations in some of the compiler/runtime combinations
-// we have tested.  E.g., readrandom speeds up by ~5% over the g++
-// 4.4.3's builtin hashtable.
-class HandleTable {
- public:
-  HandleTable() : length_(0), elems_(0), list_(nullptr) { Resize(); }
-  ~HandleTable() { delete[] list_; }
-
-  RLHandle* Lookup(const Slice& key, uint32_t hash) {
-return *FindPointer(key, hash);
-  }
-
-  RLHandle* Insert(RLHandle* h) {
-RLHandle** ptr = FindPointer(h->key(), h->hash);
-RLHandle* old = *ptr;
-h->next_hash = (old == nullptr ? nullptr : old->next_hash);
-*ptr = h;
-if (old == nullptr) {
-  ++elems_;
-  if (elems_ > length_) {
-// Since each cache entry is fairly large, we aim for a small
-// average linked list length (<= 1).
-Resize();
-  }
-}
-return old;
-  }
-
-  RLHandle* Remove(const Slice& key, uint32_t hash) {
-RLHandle** ptr = FindPointer(key, hash);
-RLHandle* result = *ptr;
-if (result != nullptr) {
-  *ptr = result->next_hash;
-  --elems_;
-}
-return result;
-  }
-
- private:
-  // The table consists of an array of buckets where each bucket is
-  // a linked list of cache entries that hash into the bucket.
-  uint32_t length_;
-  uint32_t elems_;
-  RLHandle** list_;
-
-  // Return a pointer to slot that points to a cache entry that
-  // matches key/hash.  If there is no such cache entry, return a
-  // pointer to the trailing slot in the corresponding linked list.
-  RLHandle** FindPointer(const Slice& key, uint32_t hash) {
-RLHandle** ptr = _[hash & (length_ - 1)];
-while (*ptr != nullptr &&
-   ((*ptr)->hash != has

(kudu) branch gh-pages updated: [blog] Fix typo in blogpost

2024-05-07 Thread alexey
This is an automated email from the ASF dual-hosted git repository.

alexey pushed a commit to branch gh-pages
in repository https://gitbox.apache.org/repos/asf/kudu.git


The following commit(s) were added to refs/heads/gh-pages by this push:
 new a71b5b54c [blog] Fix typo in blogpost
a71b5b54c is described below

commit a71b5b54c81d935ae01ce033aa07e33677aeeabb
Author: Abhishek Chennaka 
AuthorDate: Tue Apr 30 12:31:25 2024 -0700

[blog] Fix typo in blogpost

This patch adds a missing space for Backup and Restore heading and
uniform some line spacings.

Change-Id: Iec743b5151ac9c139d3b3513f99d0369cfe0fc93
Reviewed-on: http://gerrit.cloudera.org:8080/21378
Tested-by: Abhishek Chennaka 
Reviewed-by: Mahesh Reddy 
Reviewed-by: Alexey Serbin 
---
 _posts/2024-03-07-introducing-auto-incrementing-column.md | 15 +++
 1 file changed, 7 insertions(+), 8 deletions(-)

diff --git a/_posts/2024-03-07-introducing-auto-incrementing-column.md 
b/_posts/2024-03-07-introducing-auto-incrementing-column.md
index d43bf442c..0c169f357 100644
--- a/_posts/2024-03-07-introducing-auto-incrementing-column.md
+++ b/_posts/2024-03-07-introducing-auto-incrementing-column.md
@@ -53,7 +53,6 @@ SELECT \*, auto_incrementing_id FROM tablename
  Examples
 
 Create a table with two columns and two hash partitions:
-
 ```
 default> CREATE TABLE demo_table(id INT NON UNIQUE PRIMARY KEY, name STRING) 
PARTITION BY HASH (id) PARTITIONS 2 STORED AS KUDU;
 Query: CREATE TABLE demo_table(id INT NON UNIQUE PRIMARY KEY, name STRING) 
PARTITION BY HASH (id) PARTITIONS 2 STORED AS KUDU
@@ -66,7 +65,6 @@ Fetched 1 row(s) in 3.94s
 ```
 
 Describe the table:
-
 ```
 default> DESCRIBE demo_table;
 Query: DESCRIBE demo_table
@@ -79,16 +77,15 @@ Query: DESCRIBE demo_table
 
+--++-+-++--+---+---+-++
 ```
 
-Insert rows with duplicate partial primary key column values:
-
+Insert rows with duplicate partial primary key column values:
 ```
 default> INSERT INTO demo_table VALUES (1, 'John'), (2, 'Bob'), (3, 'Mary'), 
(1, 'Joe');
 Query: INSERT INTO demo_table VALUES (1, 'John'), (2, 'Bob'), (3, 'Mary'), (1, 
'Joe')
 ..
 Modified 4 row(s), 0 row error(s) in 0.41s
 ```
-Scan the table (notice the duplicate values in the 'id' column):
 
+Scan the table (notice the duplicate values in the 'id' column):
 ```
 default> SELECT * FROM demo_table;
 Query: SELECT * FROM demo_table
@@ -132,6 +129,7 @@ default> DELETE FROM demo_table where id=2;
 Query: DELETE FROM demo_table where id=2;
 Modified 1 row(s), 0 row error(s) in 1.40s
 ```
+
 Scan all the columns of the table:
 ```
 default> SELECT *, auto_incrementing_id FROM demo_table;
@@ -146,12 +144,13 @@ Query: SELECT *, auto_incrementing_id FROM demo_table
 ++--+--+
 Fetched 3 row(s) in 0.20s
 ```
+
  Limitations
 
 Impala doesn’t support UPSERT operations on tables with the auto-incrementing 
column as of writing
 this article.
 
-### Kudu clients(Java, C++, Python)
+### Kudu clients (Java, C++, Python)
 
 Unlike in Impala, scanning the table fetches all the table data including the 
auto incrementing column.
 There is no need to explicitly request the auto-incrementing column.
@@ -166,7 +165,7 @@ regular UPDATE operation. If the row is not present, it is 
considered an INSERT
 
 
<https://github.com/apache/kudu/blob/master/examples/python/basic-python-example/non_unique_primary_key.py>
 
-##Backup and Restore
+## Backup and Restore
 
 The Kudu backup tool from Kudu 1.17 and later supports backing up tables with 
the
 auto-incrementing column. The prior backup tools will fail with an error 
message -
@@ -175,4 +174,4 @@ auto-incrementing column. The prior backup tools will fail 
with an error message
 The backed up data (from Kudu 1.17 and later) includes the auto-incrementing 
column in the table
 schema and the column values as well. Restoring this backed up table with the 
Kudu restore tool
 will create a table with the auto-incrementing column and the column values 
identical to the
-original source table.
\ No newline at end of file
+original source table.



(kudu) branch master updated: [metrics] Add metrics for create and delete op time

2024-05-07 Thread alexey
This is an automated email from the ASF dual-hosted git repository.

alexey pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/kudu.git


The following commit(s) were added to refs/heads/master by this push:
 new b72fc6255 [metrics] Add metrics for create and delete op time
b72fc6255 is described below

commit b72fc6255a77418ef87fc5f07476528b027c6f99
Author: kedeng 
AuthorDate: Wed Apr 17 14:10:27 2024 +0800

[metrics] Add metrics for create and delete op time

Add server-level statistics to track the time consumption of
create tablet and delete tablet operations.
The addition of monitoring items will aid in historical issue
tracking and analysis, as well as facilitate the configuration
of monitoring alarms.

Change-Id: I02bd52013caa94a33143cb16ff3831a49b74bac4
Reviewed-on: http://gerrit.cloudera.org:8080/21316
Tested-by: Kudu Jenkins
Reviewed-by: Alexey Serbin 
---
 src/kudu/tserver/tablet_server-test.cc |  7 +
 src/kudu/tserver/ts_tablet_manager-test.cc | 41 ++
 src/kudu/tserver/ts_tablet_manager.cc  | 30 ++
 src/kudu/tserver/ts_tablet_manager.h   |  4 +++
 4 files changed, 82 insertions(+)

diff --git a/src/kudu/tserver/tablet_server-test.cc 
b/src/kudu/tserver/tablet_server-test.cc
index 6b1229776..69678c03e 100644
--- a/src/kudu/tserver/tablet_server-test.cc
+++ b/src/kudu/tserver/tablet_server-test.cc
@@ -238,6 +238,7 @@ METRIC_DECLARE_gauge_size(slow_scans);
 METRIC_DECLARE_histogram(flush_dms_duration);
 METRIC_DECLARE_histogram(op_apply_queue_length);
 METRIC_DECLARE_histogram(op_apply_queue_time);
+METRIC_DECLARE_histogram(delete_tablet_run_time);
 
 
 namespace kudu {
@@ -4051,6 +4052,9 @@ TEST_F(TabletServerTest, TestDeleteTablet) {
 }
 
 TEST_F(TabletServerTest, TestDeleteTablet_TabletNotCreated) {
+  scoped_refptr delete_tablet_run_time =
+  
METRIC_delete_tablet_run_time.Instantiate(mini_server_->server()->metric_entity());
+  ASSERT_EQ(0, delete_tablet_run_time->TotalCount());
   DeleteTabletRequestPB req;
   DeleteTabletResponsePB resp;
   RpcController rpc;
@@ -4067,6 +4071,9 @@ TEST_F(TabletServerTest, 
TestDeleteTablet_TabletNotCreated) {
 ASSERT_TRUE(resp.has_error());
 ASSERT_EQ(TabletServerErrorPB::TABLET_NOT_FOUND, resp.error().code());
   }
+
+  // Check that the histogram is not populated.
+  ASSERT_EQ(0, delete_tablet_run_time->TotalCount());
 }
 
 TEST_F(TabletServerTest, TestDeleteTabletBenchmark) {
diff --git a/src/kudu/tserver/ts_tablet_manager-test.cc 
b/src/kudu/tserver/ts_tablet_manager-test.cc
index f9b2df870..a3bf78889 100644
--- a/src/kudu/tserver/ts_tablet_manager-test.cc
+++ b/src/kudu/tserver/ts_tablet_manager-test.cc
@@ -51,6 +51,7 @@
 #include "kudu/tserver/mini_tablet_server.h"
 #include "kudu/tserver/tablet_server.h"
 #include "kudu/util/logging.h"
+#include "kudu/util/metrics.h"
 #include "kudu/util/monotime.h"
 #include "kudu/util/net/net_util.h"
 #include "kudu/util/oid_generator.h"
@@ -68,6 +69,9 @@ 
DECLARE_bool(tablet_bootstrap_skip_opening_tablet_for_testing);
 DECLARE_int32(tablet_metadata_load_inject_latency_ms);
 DECLARE_int32(update_tablet_metrics_interval_ms);
 
+METRIC_DECLARE_histogram(create_tablet_run_time);
+METRIC_DECLARE_histogram(delete_tablet_run_time);
+
 #define ASSERT_REPORT_HAS_UPDATED_TABLET(report, tablet_id) \
   NO_FATALS(AssertReportHasUpdatedTablet(report, tablet_id))
 
@@ -81,6 +85,7 @@ using kudu::master::TabletReportPB;
 using kudu::pb_util::SecureShortDebugString;
 using kudu::tablet::LocalTabletWriter;
 using kudu::tablet::Tablet;
+using kudu::tablet::TabletDataState;
 using kudu::tablet::TabletReplica;
 using std::nullopt;
 using std::optional;
@@ -183,6 +188,42 @@ class TsTabletManagerTest : public KuduTest {
   RaftConfigPB config_;
 };
 
+class TestCreateAndDeleteMetrics :
+public TsTabletManagerTest,
+public ::testing::WithParamInterface {
+};
+
+INSTANTIATE_TEST_SUITE_P(Params, TestCreateAndDeleteMetrics,
+ 
::testing::Values(TabletDataState::TABLET_DATA_DELETED,
+   
TabletDataState::TABLET_DATA_TOMBSTONED));
+
+TEST_P(TestCreateAndDeleteMetrics, TestCreateAndDifferentModeDeleteMetrics) {
+  TabletDataState data_state = GetParam();
+  scoped_refptr create_tablet_run_time =
+
METRIC_create_tablet_run_time.Instantiate(mini_server_->server()->metric_entity());
+  ASSERT_EQ(0, create_tablet_run_time->TotalCount());
+
+  scoped_refptr delete_tablet_run_time =
+
METRIC_delete_tablet_run_time.Instantiate(mini_server_->server()->metric_entity());
+  ASSERT_EQ(0, delete_tablet_run_time->TotalCount());
+
+  string test_tablet = "";
+  scoped_refptr test_replica;
+
+  // Create a new tablet.
+  ASSERT_OK(CreateNewTablet(test_tablet, schema_, true, nullo

(kudu) branch master updated: [CMakeLists] Make kudu_test_main static

2024-05-07 Thread alexey
This is an automated email from the ASF dual-hosted git repository.

alexey pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/kudu.git


The following commit(s) were added to refs/heads/master by this push:
 new 259fcc2b5 [CMakeLists] Make kudu_test_main static
259fcc2b5 is described below

commit 259fcc2b501077402e3d0e0d474a7ca475b70b5f
Author: Ádám Bakai 
AuthorDate: Wed Apr 10 13:57:47 2024 +0200

[CMakeLists] Make kudu_test_main static

This change makes sure that kudu_util.so is loaded at the beginning of
the library search order. This is needed because in unwind_safeness.cc
there is a dlsym execution in the constructor function, that creates a
wrapper for dlopen and dlclose to prevent a potential deadlock during
unwind stack resolve. It looks for the next declaration of the functions
called "dlopen" and "dlclose" in shared object files. If kudu_util is
loaded too late, then it won't find these functions and throws an error.
This happens in ubuntu 22.04 test runs. To solve this issue, the
kudu_test_main was changed to a static library and kudu_util was moved
to the front of the library list.

This is a best effort fix, and it should only have impact on test
execution.

Change-Id: I0dfeb1fa04ed91e95fd1f8d789f020dd44289fea
Reviewed-on: http://gerrit.cloudera.org:8080/21282
    Tested-by: Alexey Serbin 
    Reviewed-by: Alexey Serbin 
---
 src/kudu/util/CMakeLists.txt | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/kudu/util/CMakeLists.txt b/src/kudu/util/CMakeLists.txt
index 06a426169..d32450c97 100644
--- a/src/kudu/util/CMakeLists.txt
+++ b/src/kudu/util/CMakeLists.txt
@@ -466,15 +466,15 @@ endif()
 # kudu_test_main
 ###
 if(NOT NO_TESTS)
-  add_library(kudu_test_main
+  add_library(kudu_test_main STATIC
 test_main.cc)
   target_link_libraries(kudu_test_main
+kudu_util
 ${KRB5_REALM_OVERRIDE}
 gflags
 glog
 gmock
 gtest
-kudu_util
 kudu_test_util)
 
   if(NOT APPLE)



(kudu) branch master updated: [g++11] Fix DecrementIntCell for g++10 and g++11

2024-05-03 Thread alexey
This is an automated email from the ASF dual-hosted git repository.

alexey pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/kudu.git


The following commit(s) were added to refs/heads/master by this push:
 new 1f0c0b017 [g++11] Fix DecrementIntCell for g++10 and g++11
1f0c0b017 is described below

commit 1f0c0b0172fb211e74848c87fce983e45280e66d
Author: Zoltan Martonka 
AuthorDate: Fri May 3 12:02:44 2024 +

[g++11] Fix DecrementIntCell for g++10 and g++11

There seems to be a compiler bug, that optimizes out the safety check
for INT_MIN in the DecrementIntCell function. It appears on RHEL 9.2
with g++ 11.4.1. Only in Release build. For more infoi, see:


https://stackoverflow.com/questions/78424303/g-optimizes-away-check-for-int-min-in-release-build

The issue seems to be fixed in g++12 and not yet present in g++9.

Solution:
Slightly change the function to ensure it is compiled correctly.
This modification should not alter the correct optimized code.

Basically, any change where the compiler cannot perform the two
optimization steps (in this order) should address the issue:

+ if (x == INT_MIN) x = INT_MAX; else x -= 1; > x -= 1
(this is equivalent on the x86 platform).
+ if (x - 1 < x) > if (true)
(this equivalence holds only at the mathematical level).

Change-Id: Ia3cea2849a88c4d7e2587ceb805cd3258652e3c5
Reviewed-on: http://gerrit.cloudera.org:8080/21396
Tested-by: Alexey Serbin 
Reviewed-by: Alexey Serbin 
---
 src/kudu/common/key_util.cc | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/kudu/common/key_util.cc b/src/kudu/common/key_util.cc
index 290c4b28e..280804f48 100644
--- a/src/kudu/common/key_util.cc
+++ b/src/kudu/common/key_util.cc
@@ -118,7 +118,7 @@ bool DecrementIntCell(void* cell_ptr) {
 // Signed overflow is undefined in C. So, we'll use a branch here
 // instead of counting on undefined behavior.
 if (orig == MathLimits::kMin) {
-  dec = MathLimits::kMax;
+  return false;
 } else {
   dec = orig - 1;
 }



(kudu) branch master updated: [tserver] limit number of trace metrics for TabletCopyClient

2024-05-03 Thread alexey
This is an automated email from the ASF dual-hosted git repository.

alexey pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/kudu.git


The following commit(s) were added to refs/heads/master by this push:
 new 732f0c1e5 [tserver] limit number of trace metrics for TabletCopyClient
732f0c1e5 is described below

commit 732f0c1e51bca04091431859918a3b84866373a0
Author: Alexey Serbin 
AuthorDate: Thu May 2 18:48:50 2024 -0700

[tserver] limit number of trace metrics for TabletCopyClient

The trace metrics registry assumes that the number of entries is quite
small, with the current threshold of 100.  In its turn, a thread pool
unconditionally registers and updates its trace metrics while executing
submitted tasks.  Since a tablet server might host thousands of tablet
replicas, it's not a good idea to include the UUID of the tablet into
the name of the download thread pool spawned by every TabletCopyClient
instance.

This is a follow-up to 0d95304fa46ee5d96bcaa934c7660369f2860e06.

Change-Id: I334aa81aaed2378e7cae558bd8bb9e0f0c970fec
Reviewed-on: http://gerrit.cloudera.org:8080/21393
Tested-by: Marton Greber 
Reviewed-by: Marton Greber 
---
 src/kudu/tserver/tablet_copy_client.cc | 12 ++--
 1 file changed, 10 insertions(+), 2 deletions(-)

diff --git a/src/kudu/tserver/tablet_copy_client.cc 
b/src/kudu/tserver/tablet_copy_client.cc
index d5c114675..fc49f42a1 100644
--- a/src/kudu/tserver/tablet_copy_client.cc
+++ b/src/kudu/tserver/tablet_copy_client.cc
@@ -232,9 +232,17 @@ TabletCopyClient::TabletCopyClient(
   if (dst_tablet_copy_metrics_) {
 dst_tablet_copy_metrics_->open_client_sessions->Increment();
   }
-  CHECK_OK(ThreadPoolBuilder("tablet-download-pool-" + tablet_id_)
+
+  // These thread pools are ephemeral, and there might be multiple pools with
+  // the same name "tablet-download-pool" running at the same time. They are 
not
+  // differentiated by 'tablet_id' or other dynamic information -- that's to
+  // avoid registering too many entries in the trace metrics dictionary
+  // (see trace_metrics.{h,cc} for details). So, all the trace metrics for
+  // these thread pools will be accumulated under a few thread pool metrics
+  // for all the tablet copying clients ever spawned.
+  CHECK_OK(ThreadPoolBuilder("tablet-download-pool")
+   .set_min_threads(1)

.set_max_threads(FLAGS_tablet_copy_download_threads_nums_per_session)
-   
.set_min_threads(FLAGS_tablet_copy_download_threads_nums_per_session)
.Build(_download_pool_));
 }
 



(kudu) 01/02: KUDU-3570 fix use-after-free in MajorDeltaCompactionOp

2024-05-02 Thread alexey
This is an automated email from the ASF dual-hosted git repository.

alexey pushed a commit to branch branch-1.17.x
in repository https://gitbox.apache.org/repos/asf/kudu.git

commit 75fc23a988fb582e5d8ffcdd0289bdd6b637a1e6
Author: Alexey Serbin 
AuthorDate: Fri Apr 26 16:55:37 2024 -0700

KUDU-3570 fix use-after-free in MajorDeltaCompactionOp

This patch addresses heap-use-after-free and data race issues reported
in KUDU-3570.  With this and one prior patch, neither TSAN nor ASAN
reports any warnings when running alter_table-randomized-test, at least
that's the stats collected from more than 100 iterations.

Change-Id: I491c6d98bed8780bcfb62f152db471d7a260d305
Reviewed-on: http://gerrit.cloudera.org:8080/21362
Tested-by: Alexey Serbin 
Reviewed-by: Abhishek Chennaka 
(cherry picked from commit 3912a97cd8998ef04c4e6f9c38bd365c582e8171)
Reviewed-on: http://gerrit.cloudera.org:8080/21365
Reviewed-by: Wang Xixu <1450306...@qq.com>
---
 src/kudu/tablet/diskrowset.cc | 22 +-
 src/kudu/tablet/diskrowset.h  |  3 ++-
 2 files changed, 15 insertions(+), 10 deletions(-)

diff --git a/src/kudu/tablet/diskrowset.cc b/src/kudu/tablet/diskrowset.cc
index d0f985f87..268ff4d8f 100644
--- a/src/kudu/tablet/diskrowset.cc
+++ b/src/kudu/tablet/diskrowset.cc
@@ -578,10 +578,19 @@ Status 
DiskRowSet::MajorCompactDeltaStoresWithColumnIds(const vector&
   std::lock_guard l(*mutable_delta_tracker()->compact_flush_lock());
   RETURN_NOT_OK(mutable_delta_tracker()->CheckWritableUnlocked());
 
+  // Keep a reference to the tablet's schema. This is to prevent race condition
+  // when concurrently running Tablet::AlterSchema() destroys the underlying
+  // Schema object by calling TabletMetadata::SetSchema().
+  //
   // TODO(todd): do we need to lock schema or anything here?
+  SchemaPtr schema_ptr = rowset_metadata_->tablet_schema();
+
+  RowIteratorOptions opts;
+  opts.projection = schema_ptr.get();
+  opts.io_context = io_context;
   unique_ptr compaction;
-  RETURN_NOT_OK(NewMajorDeltaCompaction(col_ids, std::move(history_gc_opts),
-io_context, ));
+  RETURN_NOT_OK(NewMajorDeltaCompaction(
+  col_ids, opts, std::move(history_gc_opts), ));
 
   RETURN_NOT_OK(compaction->Compact(io_context));
 
@@ -627,24 +636,19 @@ Status 
DiskRowSet::MajorCompactDeltaStoresWithColumnIds(const vector&
 }
 
 Status DiskRowSet::NewMajorDeltaCompaction(const vector& col_ids,
+   const RowIteratorOptions& opts,
HistoryGcOpts history_gc_opts,
-   const IOContext* io_context,
unique_ptr* 
out) const {
   DCHECK(open_);
   shared_lock l(component_lock_);
 
-  const SchemaPtr schema_ptr = rowset_metadata_->tablet_schema();
-
-  RowIteratorOptions opts;
-  opts.projection = schema_ptr.get();
-  opts.io_context = io_context;
   vector> included_stores;
   unique_ptr delta_iter;
   RETURN_NOT_OK(delta_tracker_->NewDeltaFileIterator(
   opts, REDO, _stores, _iter));
 
   out->reset(new MajorDeltaCompaction(rowset_metadata_->fs_manager(),
-  *schema_ptr,
+  *opts.projection,
   base_data_.get(),
   std::move(delta_iter),
   std::move(included_stores),
diff --git a/src/kudu/tablet/diskrowset.h b/src/kudu/tablet/diskrowset.h
index 3cb68806f..ff4fd1958 100644
--- a/src/kudu/tablet/diskrowset.h
+++ b/src/kudu/tablet/diskrowset.h
@@ -494,9 +494,10 @@ class DiskRowSet :
   Status Open(const fs::IOContext* io_context);
 
   // Create a new major delta compaction object to compact the specified 
columns.
+  // TODO(aserbin): use the move semantics for RowIteratorOptions
   Status NewMajorDeltaCompaction(const std::vector& col_ids,
+ const RowIteratorOptions& opts,
  HistoryGcOpts history_gc_opts,
- const fs::IOContext* io_context,
  std::unique_ptr* out) 
const;
 
   // Major compacts all the delta files for the specified columns.



(kudu) branch branch-1.17.x updated (0de168f7e -> 1ca4559e4)

2024-05-02 Thread alexey
This is an automated email from the ASF dual-hosted git repository.

alexey pushed a change to branch branch-1.17.x
in repository https://gitbox.apache.org/repos/asf/kudu.git


from 0de168f7e KUDU-3518: Fix the unexpected scan predicate
 new 75fc23a98 KUDU-3570 fix use-after-free in MajorDeltaCompactionOp
 new 1ca4559e4 KUDU-3569 fix race in 
CFileSet::Iterator::OptimizePKPredicates()

The 2 revisions listed above as "new" are entirely new to this
repository and will be described in separate emails.  The revisions
listed as "add" were already present in the repository and have only
been added to this reference.


Summary of changes:
 src/kudu/tablet/cfile_set.cc  | 21 -
 src/kudu/tablet/cfile_set.h   |  1 -
 src/kudu/tablet/diskrowset.cc | 22 +-
 src/kudu/tablet/diskrowset.h  |  3 ++-
 4 files changed, 31 insertions(+), 16 deletions(-)



(kudu) 02/02: KUDU-3569 fix race in CFileSet::Iterator::OptimizePKPredicates()

2024-05-02 Thread alexey
This is an automated email from the ASF dual-hosted git repository.

alexey pushed a commit to branch branch-1.17.x
in repository https://gitbox.apache.org/repos/asf/kudu.git

commit 1ca4559e4b06141b62dbacd75a509ca2cc2e4b9e
Author: Alexey Serbin 
AuthorDate: Thu Apr 25 22:46:32 2024 -0700

KUDU-3569 fix race in CFileSet::Iterator::OptimizePKPredicates()

This patch addresses data race reported in KUDU-3569, using
the tablet schema defined by the iterator's projection instead of
the schema stored in the tablet metadata file.  The latter might
be swapped by concurrently running AlterTable, which is the root
cause of the data race.

This is a follow-up to 936d7edc4e4b69d2e1f1dffc96760cb3fd57a934.

Change-Id: I92daa74cb86a77a4350f42db9ca5dec3a0d4ff75
Reviewed-on: http://gerrit.cloudera.org:8080/21359
Tested-by: Alexey Serbin 
Reviewed-by: Abhishek Chennaka 
(cherry picked from commit 977f1911fbcc4d5c323d6ae7ce7c1ab100ed11ea)
  Conflicts:
src/kudu/tablet/cfile_set.h
Reviewed-on: http://gerrit.cloudera.org:8080/21366
---
 src/kudu/tablet/cfile_set.cc | 21 -
 src/kudu/tablet/cfile_set.h  |  1 -
 2 files changed, 16 insertions(+), 6 deletions(-)

diff --git a/src/kudu/tablet/cfile_set.cc b/src/kudu/tablet/cfile_set.cc
index 06d5f46e9..85803e6ac 100644
--- a/src/kudu/tablet/cfile_set.cc
+++ b/src/kudu/tablet/cfile_set.cc
@@ -431,18 +431,29 @@ Status CFileSet::Iterator::OptimizePKPredicates(ScanSpec* 
spec) {
   EncodedKey* implicit_ub_key = nullptr;
   bool modify_lower_bound_key = false;
   bool modify_upper_bound_key = false;
-  const Schema& tablet_schema = *base_data_->tablet_schema();
+
+  // Keep a reference to the current tablet schema to use in this scope. That's
+  // preventing data races when a concurrently running Tablet::AlterSchema()
+  // is destroying the underlying Schema object in TabletMetadata::SetSchema().
+  // Since the only information required from the schema in this context
+  // is primary key-related, any snapshot of the tablet's schema is good enough
+  // since primary key information is immutable for an existing Kudu table.
+  //
+  // NOTE: it's not possible to use the projection returned by
+  //   CFileSet::Iterator::schema() because it might not contain 
information
+  //   on primary key columns in case of non-FT scans.
+  SchemaPtr tablet_schema(base_data_->tablet_schema());
 
   if (!lb_key || lb_key->encoded_key() < base_data_->min_encoded_key_) {
 RETURN_NOT_OK(EncodedKey::DecodeEncodedString(
-tablet_schema, _, base_data_->min_encoded_key_, 
_lb_key));
+*tablet_schema, _, base_data_->min_encoded_key_, 
_lb_key));
 spec->SetLowerBoundKey(implicit_lb_key);
 modify_lower_bound_key = true;
   }
 
   RETURN_NOT_OK(EncodedKey::DecodeEncodedString(
-  tablet_schema, _, base_data_->max_encoded_key_, _ub_key));
-  Status s = EncodedKey::IncrementEncodedKey(tablet_schema, _ub_key, 
_);
+  *tablet_schema, _, base_data_->max_encoded_key_, 
_ub_key));
+  Status s = EncodedKey::IncrementEncodedKey(*tablet_schema, _ub_key, 
_);
   // Reset the exclusive_upper_bound_key only when we can get a valid and 
smaller upper bound key.
   // In the case IncrementEncodedKey return ERROR status due to allocation 
fails or no
   // lexicographically greater key exists, we fall back to scan the rowset 
without optimizing the
@@ -453,7 +464,7 @@ Status CFileSet::Iterator::OptimizePKPredicates(ScanSpec* 
spec) {
   }
 
   if (modify_lower_bound_key || modify_upper_bound_key) {
-spec->OptimizeScan(tablet_schema, _, true);
+spec->OptimizeScan(*tablet_schema, _, true);
   }
   return Status::OK();
 }
diff --git a/src/kudu/tablet/cfile_set.h b/src/kudu/tablet/cfile_set.h
index 7a119c4c6..5aea63033 100644
--- a/src/kudu/tablet/cfile_set.h
+++ b/src/kudu/tablet/cfile_set.h
@@ -35,7 +35,6 @@
 #include "kudu/common/schema.h"
 #include "kudu/gutil/macros.h"
 #include "kudu/gutil/map-util.h"
-#include "kudu/gutil/port.h"
 #include "kudu/tablet/rowset_metadata.h"
 #include "kudu/util/make_shared.h"
 #include "kudu/util/memory/arena.h"



(kudu) branch master updated: [build] Fix RocksDB Snappy dependency.

2024-05-02 Thread alexey
This is an automated email from the ASF dual-hosted git repository.

alexey pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/kudu.git


The following commit(s) were added to refs/heads/master by this push:
 new fdb42489d [build] Fix RocksDB Snappy dependency.
fdb42489d is described below

commit fdb42489d76098eeca371ac57ae02ea176438c64
Author: Zoltan Martonka 
AuthorDate: Tue Apr 30 07:25:45 2024 +

[build] Fix RocksDB Snappy dependency.

RocksDB uses Snappy. When statically linking Kudu, librocksdb.a contains
multiple undefined symbols that are defined in libsnappy.a. In the case
of static linking, the order of libraries passed to the linker matters,
and libsnappy.a must come after librocksdb.a. This sometimes causes the
release build to fail on RHEL 9.3.

Change-Id: I3ce75f69d94436f732dbe9a0011546b1ae494824
Reviewed-on: http://gerrit.cloudera.org:8080/21370
Reviewed-by: Attila Bukor 
Tested-by: Attila Bukor 
---
 CMakeLists.txt | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index d4c9f4fc7..c13d7743a 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -1283,7 +1283,8 @@ find_package(Rocksdb REQUIRED)
 include_directories(SYSTEM ${ROCKSDB_INCLUDE_DIR})
 ADD_THIRDPARTY_LIB(rocksdb
 STATIC_LIB "${ROCKSDB_STATIC_LIB}"
-SHARED_LIB "${ROCKSDB_SHARED_LIB}")
+SHARED_LIB "${ROCKSDB_SHARED_LIB}"
+DEPS snappy)
 
 
 # Enable sized deallocation where supported.



(kudu) branch master updated: [tserver] fix typo in generating name of tablet copy pool

2024-04-30 Thread alexey
This is an automated email from the ASF dual-hosted git repository.

alexey pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/kudu.git


The following commit(s) were added to refs/heads/master by this push:
 new 0d95304fa [tserver] fix typo in generating name of tablet copy pool
0d95304fa is described below

commit 0d95304fa46ee5d96bcaa934c7660369f2860e06
Author: Alexey Serbin 
AuthorDate: Mon Apr 29 23:10:11 2024 -0700

[tserver] fix typo in generating name of tablet copy pool

Don't use std::string instance after calling std::move() on it:
even if it's in valid state, it's contents is unspecified, so
the result name for the thread pool isn't what it expected to be.

Change-Id: If7287fd1255e174482e81fdfc863f5f4140199d7
Reviewed-on: http://gerrit.cloudera.org:8080/21374
Tested-by: Alexey Serbin 
Reviewed-by: Abhishek Chennaka 
---
 src/kudu/tserver/tablet_copy_client.cc | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/kudu/tserver/tablet_copy_client.cc 
b/src/kudu/tserver/tablet_copy_client.cc
index c9b949027..d5c114675 100644
--- a/src/kudu/tserver/tablet_copy_client.cc
+++ b/src/kudu/tserver/tablet_copy_client.cc
@@ -232,7 +232,7 @@ TabletCopyClient::TabletCopyClient(
   if (dst_tablet_copy_metrics_) {
 dst_tablet_copy_metrics_->open_client_sessions->Increment();
   }
-  CHECK_OK(ThreadPoolBuilder("tablet-download-pool-" + tablet_id)
+  CHECK_OK(ThreadPoolBuilder("tablet-download-pool-" + tablet_id_)

.set_max_threads(FLAGS_tablet_copy_download_threads_nums_per_session)

.set_min_threads(FLAGS_tablet_copy_download_threads_nums_per_session)
.Build(_download_pool_));



(kudu) branch master updated: Minor refactoring on Op class

2024-04-29 Thread alexey
This is an automated email from the ASF dual-hosted git repository.

alexey pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/kudu.git


The following commit(s) were added to refs/heads/master by this push:
 new ad3936521 Minor refactoring on Op class
ad3936521 is described below

commit ad3936521af034ffcac637f97cd8c932f6289b4f
Author: kedeng 
AuthorDate: Mon Apr 22 12:30:46 2024 +0800

Minor refactoring on Op class

We have derived various types of operations based on the Op class.
Now, if we need to add time statistics for these operations, it
would be repetitive to add an initial timestamp separately for
each derived class.

In this patch, I moved the 'start_time_' from the WriteOp class to
the OpState class, making it easier for subsequent derived classes
to use.

Since there are no logical changes, no additional unit tests have
been added.

Change-Id: Ie391d4a55b8da08a62025a05cc466fc2b947099c
Reviewed-on: http://gerrit.cloudera.org:8080/21342
Reviewed-by: Yingchun Lai 
Tested-by: Yingchun Lai 
Reviewed-by: Alexey Serbin 
---
 src/kudu/tablet/ops/op.h| 10 ++
 src/kudu/tablet/ops/write_op.cc |  8 
 src/kudu/tablet/ops/write_op.h  |  4 
 3 files changed, 14 insertions(+), 8 deletions(-)

diff --git a/src/kudu/tablet/ops/op.h b/src/kudu/tablet/ops/op.h
index 63b8ed017..5c184e823 100644
--- a/src/kudu/tablet/ops/op.h
+++ b/src/kudu/tablet/ops/op.h
@@ -40,6 +40,7 @@
 #include "kudu/tserver/tserver.pb.h"
 #include "kudu/util/countdown_latch.h"
 #include "kudu/util/memory/arena.h"
+#include "kudu/util/monotime.h"
 #include "kudu/util/status.h"
 
 namespace google {
@@ -267,6 +268,11 @@ class OpState {
 return request_id_;
   }
 
+  // Get the startup time of this op.
+  MonoTime start_time() const { return start_time_; }
+  // Set the startup time of this op.
+  void set_start_time(MonoTime start_time) { start_time_ = start_time; }
+
  protected:
   explicit OpState(TabletReplica* tablet_replica);
   virtual ~OpState();
@@ -306,6 +312,10 @@ class OpState {
 
   // The defined consistency mode for this op.
   ExternalConsistencyMode external_consistency_mode_;
+
+  // Use to record the op's start time.
+  // 'set_start_time()' needs to be called at the beginning of the op to 
initialize it.
+  MonoTime start_time_;
 };
 
 // A parent class for the callback that gets called when ops
diff --git a/src/kudu/tablet/ops/write_op.cc b/src/kudu/tablet/ops/write_op.cc
index d53ce15b0..0e2a90cc9 100644
--- a/src/kudu/tablet/ops/write_op.cc
+++ b/src/kudu/tablet/ops/write_op.cc
@@ -63,6 +63,7 @@
 #include "kudu/util/flag_tags.h"
 #include "kudu/util/memory/arena.h"
 #include "kudu/util/metrics.h"
+#include "kudu/util/monotime.h"
 #include "kudu/util/pb_util.h"
 #include "kudu/util/slice.h"
 #include "kudu/util/trace.h"
@@ -157,7 +158,6 @@ Status WriteAuthorizationContext::CheckPrivileges() const {
 WriteOp::WriteOp(unique_ptr state, DriverType type)
   : Op(type, Op::WRITE_OP),
   state_(std::move(state)) {
-  start_time_ = MonoTime::Now();
 }
 
 void WriteOp::NewReplicateMsg(unique_ptr* replicate_msg) {
@@ -270,6 +270,7 @@ Status WriteOp::Start() {
   TRACE("Start()");
   DCHECK(!state_->has_timestamp());
   DCHECK(state_->consensus_round()->replicate_msg()->has_timestamp());
+  state_->set_start_time(MonoTime::Now());
   
state_->set_timestamp(Timestamp(state_->consensus_round()->replicate_msg()->timestamp()));
   state_->tablet_replica()->tablet()->StartOp(state_.get());
   TRACE("Timestamp: $0", 
state_->tablet_replica()->clock()->Stringify(state_->timestamp()));
@@ -358,7 +359,7 @@ void WriteOp::Finish(OpResult result) {
 
metrics->commit_wait_duration->Increment(op_m.commit_wait_duration_usec);
   }
   uint64_t op_duration_usec =
-  (MonoTime::Now() - start_time_).ToMicroseconds();
+  (MonoTime::Now() - state_->start_time()).ToMicroseconds();
   switch (state()->external_consistency_mode()) {
 case CLIENT_PROPAGATED:
   
metrics->write_op_duration_client_propagated_consistency->Increment(op_duration_usec);
@@ -374,8 +375,7 @@ void WriteOp::Finish(OpResult result) {
 }
 
 string WriteOp::ToString() const {
-  MonoTime now(MonoTime::Now());
-  MonoDelta d = now - start_time_;
+  MonoDelta d = MonoTime::Now() - state_->start_time();
   WallTime abs_time = WallTime_Now() - d.ToSeconds();
   string abs_time_formatted;
   StringAppendStrftime(_time_formatted, "%Y-%m-%d %H:%M:%S", 
(time_t)abs_time, true);
diff --git a/src/kudu/tablet/ops/write_op.h b/src/kudu/tablet/ops/write_op.h
index c794a8a49..f1b25b55d 100644
--- a/src/kudu/tablet/ops/write_op.h
+++ b/src/kudu/tablet/ops/write_op.h
@@ -41,7 +41,6 @@
 #incl

(kudu) branch master updated: KUDU-3570 fix use-after-free in MajorDeltaCompactionOp

2024-04-27 Thread alexey
This is an automated email from the ASF dual-hosted git repository.

alexey pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/kudu.git


The following commit(s) were added to refs/heads/master by this push:
 new 3912a97cd KUDU-3570 fix use-after-free in MajorDeltaCompactionOp
3912a97cd is described below

commit 3912a97cd8998ef04c4e6f9c38bd365c582e8171
Author: Alexey Serbin 
AuthorDate: Fri Apr 26 16:55:37 2024 -0700

KUDU-3570 fix use-after-free in MajorDeltaCompactionOp

This patch addresses heap-use-after-free and data race issues reported
in KUDU-3570.  With this and one prior patch, neither TSAN nor ASAN
reports any warnings when running alter_table-randomized-test, at least
that's the stats collected from more than 100 iterations.

Change-Id: I491c6d98bed8780bcfb62f152db471d7a260d305
Reviewed-on: http://gerrit.cloudera.org:8080/21362
Tested-by: Alexey Serbin 
Reviewed-by: Abhishek Chennaka 
---
 src/kudu/tablet/diskrowset.cc | 22 +-
 src/kudu/tablet/diskrowset.h  |  3 ++-
 2 files changed, 15 insertions(+), 10 deletions(-)

diff --git a/src/kudu/tablet/diskrowset.cc b/src/kudu/tablet/diskrowset.cc
index f6c92c9ed..47b616fa4 100644
--- a/src/kudu/tablet/diskrowset.cc
+++ b/src/kudu/tablet/diskrowset.cc
@@ -580,10 +580,19 @@ Status 
DiskRowSet::MajorCompactDeltaStoresWithColumnIds(const vector&
   std::lock_guard l(*mutable_delta_tracker()->compact_flush_lock());
   RETURN_NOT_OK(mutable_delta_tracker()->CheckWritableUnlocked());
 
+  // Keep a reference to the tablet's schema. This is to prevent race condition
+  // when concurrently running Tablet::AlterSchema() destroys the underlying
+  // Schema object by calling TabletMetadata::SetSchema().
+  //
   // TODO(todd): do we need to lock schema or anything here?
+  SchemaPtr schema_ptr = rowset_metadata_->tablet_schema();
+
+  RowIteratorOptions opts;
+  opts.projection = schema_ptr.get();
+  opts.io_context = io_context;
   unique_ptr compaction;
-  RETURN_NOT_OK(NewMajorDeltaCompaction(col_ids, std::move(history_gc_opts),
-io_context, ));
+  RETURN_NOT_OK(NewMajorDeltaCompaction(
+  col_ids, opts, std::move(history_gc_opts), ));
 
   RETURN_NOT_OK(compaction->Compact(io_context));
 
@@ -629,24 +638,19 @@ Status 
DiskRowSet::MajorCompactDeltaStoresWithColumnIds(const vector&
 }
 
 Status DiskRowSet::NewMajorDeltaCompaction(const vector& col_ids,
+   const RowIteratorOptions& opts,
HistoryGcOpts history_gc_opts,
-   const IOContext* io_context,
unique_ptr* 
out) const {
   DCHECK(open_);
   shared_lock l(component_lock_);
 
-  const SchemaPtr schema_ptr = rowset_metadata_->tablet_schema();
-
-  RowIteratorOptions opts;
-  opts.projection = schema_ptr.get();
-  opts.io_context = io_context;
   vector> included_stores;
   unique_ptr delta_iter;
   RETURN_NOT_OK(delta_tracker_->NewDeltaFileIterator(
   opts, REDO, _stores, _iter));
 
   out->reset(new MajorDeltaCompaction(rowset_metadata_->fs_manager(),
-  *schema_ptr,
+  *opts.projection,
   base_data_.get(),
   std::move(delta_iter),
   std::move(included_stores),
diff --git a/src/kudu/tablet/diskrowset.h b/src/kudu/tablet/diskrowset.h
index ea1250e3e..a6b75ee6f 100644
--- a/src/kudu/tablet/diskrowset.h
+++ b/src/kudu/tablet/diskrowset.h
@@ -500,9 +500,10 @@ class DiskRowSet :
   Status Open(const fs::IOContext* io_context);
 
   // Create a new major delta compaction object to compact the specified 
columns.
+  // TODO(aserbin): use the move semantics for RowIteratorOptions
   Status NewMajorDeltaCompaction(const std::vector& col_ids,
+ const RowIteratorOptions& opts,
  HistoryGcOpts history_gc_opts,
- const fs::IOContext* io_context,
  std::unique_ptr* out) 
const;
 
   // Major compacts all the delta files for the specified columns.



(kudu) branch master updated: KUDU-3569 fix race in CFileSet::Iterator::OptimizePKPredicates()

2024-04-26 Thread alexey
This is an automated email from the ASF dual-hosted git repository.

alexey pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/kudu.git


The following commit(s) were added to refs/heads/master by this push:
 new 977f1911f KUDU-3569 fix race in 
CFileSet::Iterator::OptimizePKPredicates()
977f1911f is described below

commit 977f1911fbcc4d5c323d6ae7ce7c1ab100ed11ea
Author: Alexey Serbin 
AuthorDate: Thu Apr 25 22:46:32 2024 -0700

KUDU-3569 fix race in CFileSet::Iterator::OptimizePKPredicates()

This patch addresses data race reported in KUDU-3569, using
the tablet schema defined by the iterator's projection instead of
the schema stored in the tablet metadata file.  The latter might
be swapped by concurrently running AlterTable, which is the root
cause of the data race.

This is a follow-up to 936d7edc4e4b69d2e1f1dffc96760cb3fd57a934.

Change-Id: I92daa74cb86a77a4350f42db9ca5dec3a0d4ff75
Reviewed-on: http://gerrit.cloudera.org:8080/21359
Tested-by: Alexey Serbin 
Reviewed-by: Abhishek Chennaka 
---
 src/kudu/tablet/cfile_set.cc | 21 -
 src/kudu/tablet/cfile_set.h  |  1 -
 2 files changed, 16 insertions(+), 6 deletions(-)

diff --git a/src/kudu/tablet/cfile_set.cc b/src/kudu/tablet/cfile_set.cc
index c998bd939..48ed9e26f 100644
--- a/src/kudu/tablet/cfile_set.cc
+++ b/src/kudu/tablet/cfile_set.cc
@@ -438,18 +438,29 @@ Status CFileSet::Iterator::OptimizePKPredicates(ScanSpec* 
spec) {
   EncodedKey* implicit_ub_key = nullptr;
   bool modify_lower_bound_key = false;
   bool modify_upper_bound_key = false;
-  const Schema& tablet_schema = *base_data_->tablet_schema();
+
+  // Keep a reference to the current tablet schema to use in this scope. That's
+  // preventing data races when a concurrently running Tablet::AlterSchema()
+  // is destroying the underlying Schema object in TabletMetadata::SetSchema().
+  // Since the only information required from the schema in this context
+  // is primary key-related, any snapshot of the tablet's schema is good enough
+  // since primary key information is immutable for an existing Kudu table.
+  //
+  // NOTE: it's not possible to use the projection returned by
+  //   CFileSet::Iterator::schema() because it might not contain 
information
+  //   on primary key columns in case of non-FT scans.
+  SchemaPtr tablet_schema(base_data_->tablet_schema());
 
   if (!lb_key || lb_key->encoded_key() < base_data_->min_encoded_key_) {
 RETURN_NOT_OK(EncodedKey::DecodeEncodedString(
-tablet_schema, _, base_data_->min_encoded_key_, 
_lb_key));
+*tablet_schema, _, base_data_->min_encoded_key_, 
_lb_key));
 spec->SetLowerBoundKey(implicit_lb_key);
 modify_lower_bound_key = true;
   }
 
   RETURN_NOT_OK(EncodedKey::DecodeEncodedString(
-  tablet_schema, _, base_data_->max_encoded_key_, _ub_key));
-  Status s = EncodedKey::IncrementEncodedKey(tablet_schema, _ub_key, 
_);
+  *tablet_schema, _, base_data_->max_encoded_key_, 
_ub_key));
+  Status s = EncodedKey::IncrementEncodedKey(*tablet_schema, _ub_key, 
_);
   // Reset the exclusive_upper_bound_key only when we can get a valid and 
smaller upper bound key.
   // In the case IncrementEncodedKey return ERROR status due to allocation 
fails or no
   // lexicographically greater key exists, we fall back to scan the rowset 
without optimizing the
@@ -460,7 +471,7 @@ Status CFileSet::Iterator::OptimizePKPredicates(ScanSpec* 
spec) {
   }
 
   if (modify_lower_bound_key || modify_upper_bound_key) {
-spec->OptimizeScan(tablet_schema, _, true);
+spec->OptimizeScan(*tablet_schema, _, true);
   }
   return Status::OK();
 }
diff --git a/src/kudu/tablet/cfile_set.h b/src/kudu/tablet/cfile_set.h
index 714bbbf8e..07d421ec5 100644
--- a/src/kudu/tablet/cfile_set.h
+++ b/src/kudu/tablet/cfile_set.h
@@ -36,7 +36,6 @@
 #include "kudu/common/schema.h"
 #include "kudu/gutil/macros.h"
 #include "kudu/gutil/map-util.h"
-#include "kudu/gutil/port.h"
 #include "kudu/tablet/rowset_metadata.h" // IWYU pragma: keep
 #include "kudu/util/make_shared.h"
 #include "kudu/util/memory/arena.h"



(kudu) 01/02: [docs] encryption-at-rest is now natively supported

2024-04-25 Thread alexey
This is an automated email from the ASF dual-hosted git repository.

alexey pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/kudu.git

commit c57263c62ee620b74673d19cd182cb07adc1815d
Author: Alexey Serbin 
AuthorDate: Fri Apr 19 12:27:41 2024 -0700

[docs] encryption-at-rest is now natively supported

This is a follow-up to c5956652522311e2bf5263aa05129d4b79c22d52.

Change-Id: Ia0ebd410dae1e17641826795b2ac0b0ba7648d00
Reviewed-on: http://gerrit.cloudera.org:8080/21336
Reviewed-by: Mahesh Reddy 
Tested-by: Alexey Serbin 
Reviewed-by: Attila Bukor 
---
 docs/known_issues.adoc | 4 
 docs/security.adoc | 4 
 2 files changed, 8 deletions(-)

diff --git a/docs/known_issues.adoc b/docs/known_issues.adoc
index 1079ab3cb..16c0788e6 100644
--- a/docs/known_issues.adoc
+++ b/docs/known_issues.adoc
@@ -172,10 +172,6 @@ anecdotal values that have been seen in real world 
production clusters:
 
 * Row-level authorization is not available.
 
-* Data encryption at rest is not directly built into Kudu. Encryption of
-  Kudu data at rest can be achieved through the use of local block device
-  encryption software such as `dmcrypt`.
-
 * Server certificates generated by Kudu IPKI are incompatible with
   link:https://www.bouncycastle.org/[bouncycastle] version 1.52 and earlier. 
See
   link:https://issues.apache.org/jira/browse/KUDU-2145[KUDU-2145] for details.
diff --git a/docs/security.adoc b/docs/security.adoc
index 90f9c2ce6..6f5b2012c 100644
--- a/docs/security.adoc
+++ b/docs/security.adoc
@@ -629,7 +629,3 @@ Kudu has a few known security limitations:
 
 External PKI:: Kudu does not support externally-issued certificates for 
internal
 wire encryption (server to server and client to server).
-
-On-disk Encryption:: Kudu does not have built-in on-disk encryption. However,
-Kudu can be used with whole-disk encryption tools such as dm-crypt.
-



(kudu) 02/02: [util] fix TidyBot warnings in ScopedTracer

2024-04-25 Thread alexey
This is an automated email from the ASF dual-hosted git repository.

alexey pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/kudu.git

commit b2a6fe26266e170b5e885d1ddac6a5f03ded1759
Author: Alexey Serbin 
AuthorDate: Wed Apr 24 19:03:27 2024 -0700

[util] fix TidyBot warnings in ScopedTracer

This patch removes superfluous constructs from ScopedTracer, addressing
warnings output by CLANG tidy, e.g.:

  2024-04-24 18:49:17,524 INFO: src/kudu/util/debug/trace_event.h:1379:3: 
warning: 5 uninitialized fields at the end of the constructor call [
  clang-analyzer-optin.cplusplus.UninitializedObject]
ScopedTracer() : p_data_(nullptr) {}
^
  ...

This patch doesn't contain any functional modifications.

Change-Id: I0e2f8578180955d0c8c788a0ee160deefba6a3ca
Reviewed-on: http://gerrit.cloudera.org:8080/21352
Reviewed-by: Mahesh Reddy 
Reviewed-by: Yingchun Lai 
Tested-by: Alexey Serbin 
---
 src/kudu/util/debug/trace_event.h | 34 ++
 1 file changed, 14 insertions(+), 20 deletions(-)

diff --git a/src/kudu/util/debug/trace_event.h 
b/src/kudu/util/debug/trace_event.h
index f43b48de3..4994ed047 100644
--- a/src/kudu/util/debug/trace_event.h
+++ b/src/kudu/util/debug/trace_event.h
@@ -1375,37 +1375,31 @@ static inline kudu::debug::TraceEventHandle 
AddTraceEvent(
 // Used by TRACE_EVENTx macros. Do not use directly.
 class ScopedTracer {
  public:
-  // Note: members of data_ intentionally left uninitialized. See Initialize.
-  ScopedTracer() : p_data_(nullptr) {}
+  ScopedTracer()
+  : category_group_enabled_(nullptr),
+name_(nullptr),
+event_handle_({ 0, 0, 0 }) {
+  }
 
   ~ScopedTracer() {
-if (p_data_ && *data_.category_group_enabled)
+if (category_group_enabled_) {
   TRACE_EVENT_API_UPDATE_TRACE_EVENT_DURATION(
-  data_.category_group_enabled, data_.name, data_.event_handle);
+  category_group_enabled_, name_, event_handle_);
+}
   }
 
   void Initialize(const unsigned char* category_group_enabled,
   const char* name,
   kudu::debug::TraceEventHandle event_handle) {
-data_.category_group_enabled = category_group_enabled;
-data_.name = name;
-data_.event_handle = event_handle;
-p_data_ = _;
+category_group_enabled_ = category_group_enabled;
+name_ = name;
+event_handle_ = event_handle;
   }
 
  private:
-  // This Data struct workaround is to avoid initializing all the members
-  // in Data during construction of this object, since this object is always
-  // constructed, even when tracing is disabled. If the members of Data were
-  // members of this class instead, compiler warnings occur about potential
-  // uninitialized accesses.
-  struct Data {
-const unsigned char* category_group_enabled;
-const char* name;
-kudu::debug::TraceEventHandle event_handle;
-  };
-  Data* p_data_;
-  Data data_;
+  const unsigned char* category_group_enabled_;
+  const char* name_;
+  kudu::debug::TraceEventHandle event_handle_;
 };
 
 // Used by TRACE_EVENT_BINARY_EFFICIENTx macro. Do not use directly.



(kudu) branch master updated (a0a523018 -> b2a6fe262)

2024-04-25 Thread alexey
This is an automated email from the ASF dual-hosted git repository.

alexey pushed a change to branch master
in repository https://gitbox.apache.org/repos/asf/kudu.git


from a0a523018 [util] switch to using std::shared_lock
 new c57263c62 [docs] encryption-at-rest is now natively supported
 new b2a6fe262 [util] fix TidyBot warnings in ScopedTracer

The 2 revisions listed above as "new" are entirely new to this
repository and will be described in separate emails.  The revisions
listed as "add" were already present in the repository and have only
been added to this reference.


Summary of changes:
 docs/known_issues.adoc|  4 
 docs/security.adoc|  4 
 src/kudu/util/debug/trace_event.h | 34 ++
 3 files changed, 14 insertions(+), 28 deletions(-)



(kudu) branch master updated: [util] switch to using std::shared_lock

2024-04-24 Thread alexey
This is an automated email from the ASF dual-hosted git repository.

alexey pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/kudu.git


The following commit(s) were added to refs/heads/master by this push:
 new a0a523018 [util] switch to using std::shared_lock
a0a523018 is described below

commit a0a523018983d5c28efed44ef85c7680e7ce8c03
Author: Alexey Serbin 
AuthorDate: Tue Apr 23 12:19:19 2024 -0700

[util] switch to using std::shared_lock

Since Kudu switched to using C++17 long time ago, the home-grown
kudu::shared_lock is no longer needed.

Change-Id: Idcda724a3bdab86f593d0241e062dd40bcd26af1
Reviewed-on: http://gerrit.cloudera.org:8080/21349
Reviewed-by: Wang Xixu <1450306...@qq.com>
Tested-by: Alexey Serbin 
Reviewed-by: Abhishek Chennaka 
---
 src/kudu/client/meta_cache.cc  |  2 ++
 src/kudu/clock/builtin_ntp.cc  |  3 ++
 src/kudu/common/generic_iterators.cc   |  2 ++
 src/kudu/consensus/log.cc  |  2 ++
 src/kudu/consensus/log.h   |  4 ++-
 src/kudu/fs/data_dirs.cc   | 13 
 src/kudu/fs/dir_manager.cc |  5 +--
 src/kudu/fs/dir_manager.h  |  6 ++--
 src/kudu/fs/fs_manager.cc  |  6 ++--
 src/kudu/fs/log_block_manager.cc   |  2 ++
 src/kudu/master/catalog_manager.cc |  2 ++
 src/kudu/master/catalog_manager.h  |  8 +++--
 src/kudu/master/location_cache.cc  |  3 ++
 src/kudu/master/ts_descriptor.cc   |  2 ++
 src/kudu/master/ts_descriptor.h| 15 -
 src/kudu/master/ts_manager.cc  |  2 ++
 src/kudu/rpc/messenger.cc  |  2 ++
 src/kudu/rpc/messenger.h   |  4 ++-
 src/kudu/rpc/rpcz_store.cc |  2 ++
 src/kudu/security/tls_context.cc   |  2 ++
 src/kudu/security/tls_context.h| 11 ---
 src/kudu/security/token_signer.cc  |  3 +-
 src/kudu/security/token_verifier.cc|  4 ++-
 src/kudu/server/webserver.cc   |  3 ++
 src/kudu/tablet/delta_tracker.cc   | 26 
 src/kudu/tablet/diskrowset.cc  |  3 +-
 src/kudu/tablet/ops/write_op.cc|  4 ++-
 src/kudu/tablet/ops/write_op.h |  5 +--
 src/kudu/tablet/tablet.cc  | 11 +++
 src/kudu/tablet/tablet.h   |  6 ++--
 src/kudu/tablet/txn_participant.cc |  2 ++
 src/kudu/tablet/txn_participant.h  |  3 +-
 src/kudu/tools/rebalancer_tool.cc  |  2 ++
 src/kudu/transactions/txn_status_manager.h |  3 +-
 src/kudu/tserver/scanners.cc   |  7 +++--
 src/kudu/tserver/ts_tablet_manager.cc  |  2 ++
 src/kudu/tserver/ts_tablet_manager.h   |  4 ++-
 src/kudu/util/locks.h  | 49 +-
 src/kudu/util/rw_mutex-test.cc |  4 ++-
 src/kudu/util/rw_mutex.h   |  2 +-
 src/kudu/util/rw_semaphore-test.cc |  4 ++-
 src/kudu/util/thread.cc|  3 ++
 42 files changed, 134 insertions(+), 114 deletions(-)

diff --git a/src/kudu/client/meta_cache.cc b/src/kudu/client/meta_cache.cc
index 232e1900b..2ddfdf6c4 100644
--- a/src/kudu/client/meta_cache.cc
+++ b/src/kudu/client/meta_cache.cc
@@ -23,6 +23,7 @@
 #include 
 #include 
 #include 
+#include 
 #include 
 #include 
 #include 
@@ -79,6 +80,7 @@ using kudu::security::SignedTokenPB;
 using kudu::tserver::TabletServerAdminServiceProxy;
 using kudu::tserver::TabletServerServiceProxy;
 using std::set;
+using std::shared_lock;
 using std::shared_ptr;
 using std::string;
 using std::unique_ptr;
diff --git a/src/kudu/clock/builtin_ntp.cc b/src/kudu/clock/builtin_ntp.cc
index 348a85e8b..29b64f541 100644
--- a/src/kudu/clock/builtin_ntp.cc
+++ b/src/kudu/clock/builtin_ntp.cc
@@ -31,7 +31,9 @@
 #include 
 #include 
 #include 
+#include 
 #include 
+#include 
 #include 
 #include 
 
@@ -150,6 +152,7 @@ using kudu::clock::internal::kIntervalNone;
 using kudu::clock::internal::RecordedResponse;
 using std::deque;
 using std::lock_guard;
+using std::shared_lock;
 using std::string;
 using std::unique_ptr;
 using std::vector;
diff --git a/src/kudu/common/generic_iterators.cc 
b/src/kudu/common/generic_iterators.cc
index af4b4d9bc..3ca481ac9 100644
--- a/src/kudu/common/generic_iterators.cc
+++ b/src/kudu/common/generic_iterators.cc
@@ -29,6 +29,7 @@
 #include 
 #include 
 #include 
+#include 
 #include 
 #include 
 #include 
@@ -74,6 +75,7 @@ template  struct compare;
 
 using std::deque;
 using std::get;
+using std::shared_lock;
 using std::sort;
 using std::string;
 using std::unique_ptr;
diff --git a/src/kudu/consensus/log.cc b/src/kudu/consensus/log.cc
index 062b496e2..ed861d252 100644
--- a/src/kudu/consensus/log.cc
+++ b/src/kudu/consensus/log.cc
@@ -23,6 +23,7 @@
 #include 
 #include 
 #include 
+#include 
 #include 
 #include 
 
@@ -179,6 

(kudu) branch master updated: KUDU-3566 fix summary metrics in Prometheus format

2024-04-24 Thread alexey
This is an automated email from the ASF dual-hosted git repository.

alexey pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/kudu.git


The following commit(s) were added to refs/heads/master by this push:
 new b236d534a KUDU-3566 fix summary metrics in Prometheus format
b236d534a is described below

commit b236d534abeb60520e4568bb4a1452d6674bb597
Author: Alexey Serbin 
AuthorDate: Fri Apr 19 10:58:25 2024 -0700

KUDU-3566 fix summary metrics in Prometheus format

This patch corrects the output of various Kudu metrics backed by HDR
histograms.  From the Prometheus perspective, those metrics are output
as summaries [1], not histograms [2].  It's necessary to mark them
accordingly to avoid misinterpretation of the collected statistics.

I updated corresponding unit tests and verified that the updated output
was properly parsed and interpreted by a Prometheus 2.50.0 instance
running on my macOS laptop.

[1] https://prometheus.io/docs/concepts/metric_types/#summary
[2] https://prometheus.io/docs/concepts/metric_types/#histogram

Change-Id: I1375ddf1b0ecd730327cd44b4955813b80107f7b
Reviewed-on: http://gerrit.cloudera.org:8080/21338
Tested-by: Alexey Serbin 
Reviewed-by: Abhishek Chennaka 
---
 src/kudu/util/metrics-test.cc |  32 +++-
 src/kudu/util/metrics.cc  | 110 --
 2 files changed, 72 insertions(+), 70 deletions(-)

diff --git a/src/kudu/util/metrics-test.cc b/src/kudu/util/metrics-test.cc
index 95a6dcf76..550bfcdac 100644
--- a/src/kudu/util/metrics-test.cc
+++ b/src/kudu/util/metrics-test.cc
@@ -651,24 +651,30 @@ TEST_F(MetricsTest, SimpleHistogramMergeTest) {
 }
 
 TEST_F(MetricsTest, HistogramPrometheusTest) {
+  constexpr const char* const kExpectedOutput =
+  "# HELP test_hist foo\n"
+  "# TYPE test_hist summary\n"
+  "test_hist{unit_type=\"milliseconds\", quantile=\"0\"} 1\n"
+  "test_hist{unit_type=\"milliseconds\", quantile=\"0.75\"} 2\n"
+  "test_hist{unit_type=\"milliseconds\", quantile=\"0.95\"} 3\n"
+  "test_hist{unit_type=\"milliseconds\", quantile=\"0.99\"} 4\n"
+  "test_hist{unit_type=\"milliseconds\", quantile=\"0.999\"} 5\n"
+  "test_hist{unit_type=\"milliseconds\", quantile=\"0.\"} 5\n"
+  "test_hist{unit_type=\"milliseconds\", quantile=\"1\"} 5\n"
+  "test_hist_sum 1460\n"
+  "test_hist_count 1000\n";
+
   scoped_refptr hist = METRIC_test_hist.Instantiate(entity_);
+  hist->IncrementBy(1, 700);
+  hist->IncrementBy(2, 200);
+  hist->IncrementBy(3, 50);
+  hist->IncrementBy(4, 40);
+  hist->IncrementBy(5, 10);
 
   ostringstream output;
   PrometheusWriter writer();
   ASSERT_OK(hist->WriteAsPrometheus(, {}));
-
-  const string expected_output = "# HELP test_hist foo\n"
- "# TYPE test_hist histogram\n"
- "test_hist_bucket{unit_type=\"milliseconds\", 
le=\"0.75\"} 0\n"
- "test_hist_bucket{unit_type=\"milliseconds\", 
le=\"0.95\"} 0\n"
- "test_hist_bucket{unit_type=\"milliseconds\", 
le=\"0.99\"} 0\n"
- "test_hist_bucket{unit_type=\"milliseconds\", 
le=\"0.999\"} 0\n"
- "test_hist_bucket{unit_type=\"milliseconds\", 
le=\"0.\"} 0\n"
- "test_hist_bucket{unit_type=\"milliseconds\", 
le=\"+Inf\"} 0\n"
- "test_hist_sum{unit_type=\"milliseconds\"} 
0\n"
- "test_hist_count{unit_type=\"milliseconds\"} 
0\n";
-
-  ASSERT_EQ(expected_output, output.str());
+  ASSERT_EQ(kExpectedOutput, output.str());
 }
 
 TEST_F(MetricsTest, JsonPrintTest) {
diff --git a/src/kudu/util/metrics.cc b/src/kudu/util/metrics.cc
index b24aeb135..a67902aa5 100644
--- a/src/kudu/util/metrics.cc
+++ b/src/kudu/util/metrics.cc
@@ -749,9 +749,18 @@ void MetricPrototype::WriteFields(JsonWriter* writer,
 
 void MetricPrototype::WriteHelpAndType(PrometheusWriter* writer,
const string& prefix) const {
+  static constexpr const char* const kSummary = "summary";
+
+  // The way how HdrHistogram-backed stats are presented in Kudu metrics
+  // corresponds to a 'summary' metric in Prometheus, not a 'histogram' one 
[1].
+  //
+  // [1] https://prometheus.io/docs/concepts/metric_typ

(kudu) branch master updated: [util] expose recently seen value in a histogram

2024-04-24 Thread alexey
This is an automated email from the ASF dual-hosted git repository.

alexey pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/kudu.git


The following commit(s) were added to refs/heads/master by this push:
 new 78b785b78 [util] expose recently seen value in a histogram
78b785b78 is described below

commit 78b785b78ed7e85bcaca11233acdecb6a134c173
Author: Alexey Serbin 
AuthorDate: Wed Apr 17 17:40:16 2024 -0700

[util] expose recently seen value in a histogram

When troubleshooting a performance issue with a Kudu cluster, I found it
would be useful if each of the existing histogram metrics exposed most
recently seen value (in particular, I was interested in knowing
the most recently captured length of a tablet's prepare queue).

This does not align with the semantics of a histogram since collected
statistics on the distribution of observed values aren't supposed to
include any notion of recency.  However, I think this is a useful
improvement from the observability standpoint because it allows for
collection of valuable information on monitored parameters
without introducing additional metrics.

NOTE: even if chromium-based Atomics are obsolete and STL atomics
  should be used in new code instead, I opted to use the former
  because it would look much uglier otherwise.  I think
  it's a better option to switch to using STL atomics in
  hdr_histogram.{h,cc} altogether in a separate changelist.

Change-Id: Ia4547faba050e09e31c83372105a9fe97b77ccbc
Reviewed-on: http://gerrit.cloudera.org:8080/21321
Reviewed-by: Abhishek Chennaka 
Reviewed-by: Wang Xixu <1450306...@qq.com>
Tested-by: Alexey Serbin 
---
 src/kudu/util/hdr_histogram-test.cc | 11 +++
 src/kudu/util/hdr_histogram.cc  | 22 +++---
 src/kudu/util/hdr_histogram.h   |  5 -
 src/kudu/util/histogram.proto   |  1 +
 src/kudu/util/metrics-test.cc   |  2 ++
 src/kudu/util/metrics.cc|  2 ++
 6 files changed, 39 insertions(+), 4 deletions(-)

diff --git a/src/kudu/util/hdr_histogram-test.cc 
b/src/kudu/util/hdr_histogram-test.cc
index 22a9ab606..065c6fca3 100644
--- a/src/kudu/util/hdr_histogram-test.cc
+++ b/src/kudu/util/hdr_histogram-test.cc
@@ -35,21 +35,27 @@ TEST_F(HdrHistogramTest, SimpleTest) {
 
   HdrHistogram hist(highest_val, kSigDigits);
   ASSERT_EQ(0, hist.CountInBucketForValue(1));
+  ASSERT_EQ(0, hist.LastValue());
   hist.Increment(1);
   ASSERT_EQ(1, hist.CountInBucketForValue(1));
+  ASSERT_EQ(1, hist.LastValue());
   hist.IncrementBy(1, 3);
   ASSERT_EQ(4, hist.CountInBucketForValue(1));
+  ASSERT_EQ(1, hist.LastValue());
   hist.Increment(10);
   ASSERT_EQ(1, hist.CountInBucketForValue(10));
+  ASSERT_EQ(10, hist.LastValue());
   hist.Increment(20);
   ASSERT_EQ(1, hist.CountInBucketForValue(20));
   ASSERT_EQ(0, hist.CountInBucketForValue(1000));
+  ASSERT_EQ(20, hist.LastValue());
   hist.Increment(1000);
   hist.Increment(1001);
   ASSERT_EQ(2, hist.CountInBucketForValue(1000));
 
   ASSERT_EQ(1 + 1 * 3 + 10 + 20 + 1000 + 1001,
 hist.TotalSum());
+  ASSERT_EQ(1001, hist.LastValue());
 }
 
 TEST_F(HdrHistogramTest, TestCoordinatedOmission) {
@@ -111,6 +117,7 @@ TEST_F(HdrHistogramTest, PercentileAndCopyTest) {
   NO_FATALS(validate_percentiles(, specified_max));
 
   ASSERT_EQ(hist.TotalSum(), copy.TotalSum());
+  ASSERT_EQ(hist.LastValue(), copy.LastValue());
 }
 
 void PopulateHistogram(HdrHistogram* histogram, uint64_t low, uint64_t high) {
@@ -126,7 +133,9 @@ TEST_F(HdrHistogramTest, MergeTest) {
   HdrHistogram other(highest_val, kSigDigits);
 
   PopulateHistogram(, 1, 100);
+  ASSERT_EQ(100, hist.LastValue());
   PopulateHistogram(, 101, 250);
+  ASSERT_EQ(250, other.LastValue());
   HdrHistogram old(hist);
   hist.MergeFrom(other);
 
@@ -134,6 +143,8 @@ TEST_F(HdrHistogramTest, MergeTest) {
   ASSERT_EQ(hist.TotalSum(), old.TotalSum() + other.TotalSum());
   ASSERT_EQ(hist.MinValue(), 1);
   ASSERT_EQ(hist.MaxValue(), 250);
+  ASSERT_EQ(100, old.LastValue());
+  ASSERT_EQ(other.LastValue(), hist.LastValue());
   ASSERT_NEAR(hist.MeanValue(), (1 + 250) / 2.0, 1e3);
   ASSERT_EQ(hist.ValueAtPercentile(100.0), 250);
   ASSERT_NEAR(hist.ValueAtPercentile(99.0), 250 * 99.0 / 100, 1e3);
diff --git a/src/kudu/util/hdr_histogram.cc b/src/kudu/util/hdr_histogram.cc
index f8d2bd222..82571ef89 100644
--- a/src/kudu/util/hdr_histogram.cc
+++ b/src/kudu/util/hdr_histogram.cc
@@ -32,6 +32,7 @@
 
 #include "kudu/gutil/atomicops.h"
 #include "kudu/gutil/bits.h"
+#include "kudu/gutil/port.h"
 #include "kudu/gutil/strings/substitute.h"
 #include "kudu/util/status.h"
 
@@ -57,7 +58,8 @@ HdrHistogram::HdrHistogram(uint64_t highest_trackable_value, 
int num_significant
 total_count_(0),
 total_sum_(0),
 min_value_(std::numeric_limits::max()),
-max_value

(kudu) 02/02: [rpc-test] make RpcPendingConnectionsMetric more stable

2024-04-23 Thread alexey
This is an automated email from the ASF dual-hosted git repository.

alexey pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/kudu.git

commit a5eb21270f9b5b13ffdb6184fc3e82e04450ddc5
Author: Alexey Serbin 
AuthorDate: Fri Apr 5 19:23:12 2024 -0700

[rpc-test] make RpcPendingConnectionsMetric more stable

I noticed that on very fast machines the RpcPendingConnectionsMetric
scenario would rarely fail (~1 time per 500 runs) because the
sock_diag() netlink facility seemingly reported stale data, i.e.
reported one connection was pending while the only attempted connection
had already been accepted by the server and even closed by the client.

With this test no flakiness has been observed in more than 50K runs
of the scenario.

Change-Id: If8e66c471452b1e04c84cf2d5c979578c287b4fa
Reviewed-on: http://gerrit.cloudera.org:8080/21251
Tested-by: Alexey Serbin 
Reviewed-by: Mahesh Reddy 
Reviewed-by: Abhishek Chennaka 
---
 src/kudu/rpc/rpc-test.cc | 32 +++-
 1 file changed, 23 insertions(+), 9 deletions(-)

diff --git a/src/kudu/rpc/rpc-test.cc b/src/kudu/rpc/rpc-test.cc
index 3bef361d0..74284735c 100644
--- a/src/kudu/rpc/rpc-test.cc
+++ b/src/kudu/rpc/rpc-test.cc
@@ -1461,16 +1461,13 @@ TEST_P(TestRpc, AcceptorDispatchingTimesMetric) {
 }
 
 // Basic verification of the 'rpc_pending_connections' metric.
+// The number of pending connections is properly reported on Linux; on other
+// platforms that don't support sock_diag() netlink facility (e.g., macOS)
+// the metric should report -1.
 TEST_P(TestRpc, RpcPendingConnectionsMetric) {
   Sockaddr server_addr;
   ASSERT_OK(StartTestServer(_addr));
 
-  {
-Socket socket;
-ASSERT_OK(socket.Init(server_addr.family(), /*flags=*/0));
-ASSERT_OK(socket.Connect(server_addr));
-  }
-
   // Get the reference to already registered metric with the proper callback
   // to fetch the necessary information. The { 'return -3'; } fake callback
   // is to make sure the actual gauge returns a proper value,
@@ -1479,9 +1476,26 @@ TEST_P(TestRpc, RpcPendingConnectionsMetric) {
   METRIC_rpc_pending_connections.InstantiateFunctionGauge(
   server_messenger_->metric_entity(), []() { return -3; });
 
-  // There should be no connection pending -- the only received connection
-  // request has been handled already above. The number of pending connections
-  // is properly reported at Linux only as of now; on macOS it should report 
-1.
+  // No connection attempts have been made yet.
+#if defined(__linux__)
+  ASSERT_EQ(0, pending_connections_gauge->value());
+#else
+  ASSERT_EQ(-1, pending_connections_gauge->value());
+#endif
+
+  {
+Socket socket;
+ASSERT_OK(socket.Init(server_addr.family(), /*flags=*/0));
+ASSERT_OK(socket.Connect(server_addr));
+  }
+
+  // A small pause below is to avoid reading 1 from the metric: it's not quite
+  // clear why the sock_diag() netlink facility reports stale data on very fast
+  // machines in rare cases.
+  SleepFor(MonoDelta::FromMilliseconds(10));
+
+  // At this point, there should be no connection pending: the only received
+  // connection request has already been handled above.
 #if defined(__linux__)
   ASSERT_EQ(0, pending_connections_gauge->value());
 #else



(kudu) 01/02: Update build pattern for fetching flaky tests list

2024-04-23 Thread alexey
This is an automated email from the ASF dual-hosted git repository.

alexey pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/kudu.git

commit 7b5fc9f2f0ad538242656431bf76e253cc537772
Author: Marton Greber 
AuthorDate: Thu Apr 18 11:13:30 2024 +0200

Update build pattern for fetching flaky tests list

During the infra changes which happened in the last months, the build_id
prefix has been changed [note 1]. As a result flaky tests are not
retried during testing in the pre-commit runs. This patch updates the
build pattern in the build-and-test.sh script, such that the
list of flaky tests is fetched correctly.

[note 1]:
The build_id of an isolated test is constructed with Jenkins job's name
and build number, prefixed with "jenkins-". To illustrate this let me
give some examples, for build_ids prior and after the infra upgrade.
Before:
jenkins-kudu-gerrit-BUILD_TYPE=ASAN-29232
jenkins-kudu-gerrit-BUILD_TYPE=DEBUG-29227
After:
jenkins-build_and_test-64
jenkins-build_and_test-63
From the above we can see that the new Jenkins job name caused the
issue. The new job is parametrised, does not contain the build type in
the job's name. Therefore, a change in the naming is justified.
In case something like this happens in the future, the build_ids can be
observed in the test_results MySQL database on the isolateserver test
infra host.

Change-Id: I317a3a32c06c06306b97566f954e0ffd508ce01f
Reviewed-on: http://gerrit.cloudera.org:8080/21327
Reviewed-by: Ashwani Raina 
Tested-by: Kudu Jenkins
Reviewed-by: Alexey Serbin 
---
 build-support/jenkins/build-and-test.sh | 9 ++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/build-support/jenkins/build-and-test.sh 
b/build-support/jenkins/build-and-test.sh
index 8f6d8c74e..6801f9e42 100755
--- a/build-support/jenkins/build-and-test.sh
+++ b/build-support/jenkins/build-and-test.sh
@@ -39,8 +39,8 @@
 #
 #   KUDU_FLAKY_TEST_ATTEMPTS  Default: 1
 # If more than 1, will fetch the list of known flaky tests
-# from the kudu-test jenkins job, and allow those tests to
-# be flaky in this build.
+# from the jenkins jobs matching the "%jenkins-%" pattern, and allow those
+# tests to be flaky in this build.
 #
 #   TEST_RESULT_SERVER  Default: none
 # The host:port pair of a server running test_result_server.py.
@@ -183,8 +183,11 @@ mkdir -p $BUILD_ROOT
 # Same for the Java tests, which aren't inside BUILD_ROOT
 rm -rf $SOURCE_ROOT/java/*/build
 
+# The build_pattern is %jenkins-% because we are interested in two types of 
runs:
+# 1. As of now build_and_test pipeline job which is triggered by the 
pre-commit pipeline job.
+# 2. Any other job which is used to run the flaky tests only.
 list_flaky_tests() {
-  local 
url="http://$TEST_RESULT_SERVER/list_failed_tests?num_days=3_pattern=%25kudu-test%25;
+  local 
url="http://$TEST_RESULT_SERVER/list_failed_tests?num_days=3_pattern=%25jenkins-%25;
   >&2 echo Fetching flaky test list from "$url" ...
   curl -s --show-error "$url"
   return $?



(kudu) branch master updated (946acb711 -> a5eb21270)

2024-04-23 Thread alexey
This is an automated email from the ASF dual-hosted git repository.

alexey pushed a change to branch master
in repository https://gitbox.apache.org/repos/asf/kudu.git


from 946acb711 KUDU-3518: Fix the unexpected scan predicate
 new 7b5fc9f2f Update build pattern for fetching flaky tests list
 new a5eb21270 [rpc-test] make RpcPendingConnectionsMetric more stable

The 2 revisions listed above as "new" are entirely new to this
repository and will be described in separate emails.  The revisions
listed as "add" were already present in the repository and have only
been added to this reference.


Summary of changes:
 build-support/jenkins/build-and-test.sh |  9 ++---
 src/kudu/rpc/rpc-test.cc| 32 +++-
 2 files changed, 29 insertions(+), 12 deletions(-)



(kudu) branch gh-pages updated: [blog] blogpost about auto-incrementing column in Kudu

2024-04-23 Thread alexey
This is an automated email from the ASF dual-hosted git repository.

alexey pushed a commit to branch gh-pages
in repository https://gitbox.apache.org/repos/asf/kudu.git


The following commit(s) were added to refs/heads/gh-pages by this push:
 new b7bf6b03b [blog] blogpost about auto-incrementing column in Kudu
b7bf6b03b is described below

commit b7bf6b03b01a7c8f561d9bd7ff132fc255b2209f
Author: Abhishek Chennaka 
AuthorDate: Thu Mar 7 17:43:30 2024 -0800

[blog] blogpost about auto-incrementing column in Kudu

Change-Id: I39f34eea6877a8e050ba2e187ff71555256bf797
Reviewed-on: http://gerrit.cloudera.org:8080/21119
Reviewed-by: Alexey Serbin 
Tested-by: Abhishek Chennaka 
---
 ...4-03-07-introducing-auto-incrementing-column.md | 178 +
 1 file changed, 178 insertions(+)

diff --git a/_posts/2024-03-07-introducing-auto-incrementing-column.md 
b/_posts/2024-03-07-introducing-auto-incrementing-column.md
new file mode 100644
index 0..d43bf442c
--- /dev/null
+++ b/_posts/2024-03-07-introducing-auto-incrementing-column.md
@@ -0,0 +1,178 @@
+---
+layout: post
+title: "Introducing Auto-incrementing Column in Kudu"
+author: Abhishek Chennaka
+---
+
+
+# Introduction
+
+Kudu has a strict requirement for a primary key presence in a table. This is 
primarily to help in
+point lookups and support DELETE and UPDATE operations on the table data. 
There are situations where
+users are unable to define a unique primary key in their data set and have to 
either introduce
+additional columns to be a part of the primary key or define a new column and 
maintain it to enforce
+uniqueness. Kudu 1.17 has introduced support for the auto-incrementing column 
to have partially
+defined primary keys (keys which are not unique across the table) during table 
creation. This way a
+user does not have to worry about the uniqueness constraint when defining a 
primary key.
+
+# Implementation Details
+
+When a primary key is partially defined, Kudu internally creates a new column 
named
+“auto_incrementing_id” as a part of the primary key. The column is populated 
with a monotonically
+increasing counter. The system updates the counter value upon every INSERT 
operation and populates
+the "auto_incrementing_id" column on the server side. The counter is 
partition-local i.e. every
+tablet has its own counter.
+
+## Server Side
+
+When a user writes data into a table with the auto-incrementing column, the 
server makes sure that
+no INSERT operations have the “auto_incrementing_id” column field value set 
and populates this
+column value. The highest value of the counter written into the 
"auto_incrementing_id" column
+until any particular point is stored in memory and this is used to set the 
column value for the
+next INSERT operation.
+
+## Client Side
+
+When creating a table without an explicitly defined primary key, users will 
have to declare the key
+as non-unique. Internally, the client builds a schema with an extra column 
named
+“auto_incrementing_id” and forwards the request to the server where the table 
is created. For
+INSERT operations, the user shouldn’t specify the “auto_incrementing_id” 
column value as it will be
+populated on the server side.
+
+### Impala Integration
+
+In Impala, the new column is not exposed to the user by default. This is due 
to the reason that it
+is not a part of the user table schema. The below query will not return the 
“auto_incrementing_id”
+column
+SELECT \* FROM tablename
+
+If the auto-incrementing column's data is needed, the column name has to be 
specifically requested.
+The below query will return the column values:
+SELECT \*, auto_incrementing_id FROM tablename
+
+ Examples
+
+Create a table with two columns and two hash partitions:
+
+```
+default> CREATE TABLE demo_table(id INT NON UNIQUE PRIMARY KEY, name STRING) 
PARTITION BY HASH (id) PARTITIONS 2 STORED AS KUDU;
+Query: CREATE TABLE demo_table(id INT NON UNIQUE PRIMARY KEY, name STRING) 
PARTITION BY HASH (id) PARTITIONS 2 STORED AS KUDU
++-+
+| summary |
++-+
+| Table has been created. |
++-+
+Fetched 1 row(s) in 3.94s
+```
+
+Describe the table:
+
+```
+default> DESCRIBE demo_table;
+Query: DESCRIBE demo_table
++--++-+-++--+---+---+-++
+| name | type   | comment | primary_key | key_unique | 
nullable | default_value | encoding  | compression | block_size |
++--++-+-++--+---+---+-++
+| id   | int| | true| false  | false   
 |   | AUTO_ENCODING | DEFAULT_COMPRESSION | 0  |
+| auto_incrementing_id | bigint |   

(kudu) branch branch-1.17.x updated: KUDU-3518: Fix the unexpected scan predicate

2024-04-16 Thread alexey
This is an automated email from the ASF dual-hosted git repository.

alexey pushed a commit to branch branch-1.17.x
in repository https://gitbox.apache.org/repos/asf/kudu.git


The following commit(s) were added to refs/heads/branch-1.17.x by this push:
 new 0de168f7e KUDU-3518: Fix the unexpected scan predicate
0de168f7e is described below

commit 0de168f7e0abcf0c29facefcc9c0c9e12b284140
Author: Abhishek Chennaka 
AuthorDate: Mon Apr 15 15:52:48 2024 -0700

KUDU-3518: Fix the unexpected scan predicate

With the changes introduced in a previous patch[1], there have been
reports of errors during scan operations. This patch fixes these
scan errors.

This patch removes NON-NULL predicates on primary columns implicitly
added by ScanSpec::LiftPrimaryKeyBounds(), calling
ScanSpec::OptimizeScan() while optimizing InList scan predicates at the
DRS level.  That's similar to what's done be optimizing the InList
predicates based on tablet PK bounds at a higher level.

The issue has been investigated and fixed in collaboration with Alexey
Serbin.

There will be follow-up patches to include tests of this failure.

[1] https://gerrit.cloudera.org/#/c/18434/

Change-Id: I220df7ec1b4f95726c913a19125f4647267d12b1
Reviewed-on: http://gerrit.cloudera.org:8080/21244
Reviewed-by: Alexey Serbin 
Tested-by: Alexey Serbin 
Reviewed-by: Yifan Zhang 
(cherry picked from commit 946acb711d722b1e6fe27af2c7de92960d724980)
Reviewed-on: http://gerrit.cloudera.org:8080/21314
---
 src/kudu/tablet/cfile_set.cc | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/kudu/tablet/cfile_set.cc b/src/kudu/tablet/cfile_set.cc
index 7620eea51..06d5f46e9 100644
--- a/src/kudu/tablet/cfile_set.cc
+++ b/src/kudu/tablet/cfile_set.cc
@@ -453,7 +453,7 @@ Status CFileSet::Iterator::OptimizePKPredicates(ScanSpec* 
spec) {
   }
 
   if (modify_lower_bound_key || modify_upper_bound_key) {
-spec->UnifyPrimaryKeyBoundsAndColumnPredicates(tablet_schema, _, 
true);
+spec->OptimizeScan(tablet_schema, _, true);
   }
   return Status::OK();
 }



(kudu) branch master updated: KUDU-3518: Fix the unexpected scan predicate

2024-04-16 Thread alexey
This is an automated email from the ASF dual-hosted git repository.

alexey pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/kudu.git


The following commit(s) were added to refs/heads/master by this push:
 new 946acb711 KUDU-3518: Fix the unexpected scan predicate
946acb711 is described below

commit 946acb711d722b1e6fe27af2c7de92960d724980
Author: Abhishek Chennaka 
AuthorDate: Mon Apr 15 15:52:48 2024 -0700

KUDU-3518: Fix the unexpected scan predicate

With the changes introduced in a previous patch[1], there have been
reports of errors during scan operations. This patch fixes these
scan errors.

This patch removes NON-NULL predicates on primary columns implicitly
added by ScanSpec::LiftPrimaryKeyBounds(), calling
ScanSpec::OptimizeScan() while optimizing InList scan predicates at the
DRS level.  That's similar to what's done be optimizing the InList
predicates based on tablet PK bounds at a higher level.

The issue has been investigated and fixed in collaboration with Alexey
Serbin.

There will be follow-up patches to include tests of this failure.

[1] https://gerrit.cloudera.org/#/c/18434/

Change-Id: I220df7ec1b4f95726c913a19125f4647267d12b1
Reviewed-on: http://gerrit.cloudera.org:8080/21244
Reviewed-by: Alexey Serbin 
Tested-by: Alexey Serbin 
Reviewed-by: Yifan Zhang 
---
 src/kudu/tablet/cfile_set.cc | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/kudu/tablet/cfile_set.cc b/src/kudu/tablet/cfile_set.cc
index ab46bf3aa..c998bd939 100644
--- a/src/kudu/tablet/cfile_set.cc
+++ b/src/kudu/tablet/cfile_set.cc
@@ -460,7 +460,7 @@ Status CFileSet::Iterator::OptimizePKPredicates(ScanSpec* 
spec) {
   }
 
   if (modify_lower_bound_key || modify_upper_bound_key) {
-spec->UnifyPrimaryKeyBoundsAndColumnPredicates(tablet_schema, _, 
true);
+spec->OptimizeScan(tablet_schema, _, true);
   }
   return Status::OK();
 }



(kudu) branch branch-1.17.x updated: KUDU-3495 Initialize the bitmap while decoding insert/upsert

2024-04-16 Thread alexey
This is an automated email from the ASF dual-hosted git repository.

alexey pushed a commit to branch branch-1.17.x
in repository https://gitbox.apache.org/repos/asf/kudu.git


The following commit(s) were added to refs/heads/branch-1.17.x by this push:
 new 3f10f3e25 KUDU-3495 Initialize the bitmap while decoding insert/upsert
3f10f3e25 is described below

commit 3f10f3e251ff80e35978ea98b61f31cb52308705
Author: 宋家成 
AuthorDate: Tue Aug 8 18:33:25 2023 +0800

KUDU-3495 Initialize the bitmap while decoding insert/upsert

The upsert request with an old client schema might lead to
unexpected result eventually.

If client A opens a table first and then client B add a column
to the table, the upsert request of client A might set the
newly added column to default value.

The reason is that we did not initialize the tablet_isset_bitmap
while decoding the insert or upsert requests.

A user does not want to change the column which he did not request
to even if the client schema is not correct, so initialize the bitmap.

Change-Id: I38a33cb58a085bc83854c5145e904c8ed51092a8
Reviewed-on: http://gerrit.cloudera.org:8080/20327
Tested-by: Kudu Jenkins
Reviewed-by: Alexey Serbin 
(cherry picked from commit f1f3c907fc6f012b6d7a72f0103e1db4f56f0e7d)
Reviewed-on: http://gerrit.cloudera.org:8080/21296
Reviewed-by: Yingchun Lai 
Tested-by: Alexey Serbin 
---
 src/kudu/common/row_operations-test.cc | 27 +++
 src/kudu/common/row_operations.cc  |  4 
 2 files changed, 31 insertions(+)

diff --git a/src/kudu/common/row_operations-test.cc 
b/src/kudu/common/row_operations-test.cc
index 17c336dda..137d6f4f3 100644
--- a/src/kudu/common/row_operations-test.cc
+++ b/src/kudu/common/row_operations-test.cc
@@ -1005,4 +1005,31 @@ TEST_F(RowOperationsTest, ExceedCellLimit) {
   }
 }
 
+TEST_F(RowOperationsTest, SchemasDoNotMatch) {
+  Schema client_schema({ ColumnSchema("key", INT32),
+ ColumnSchema("int_val", INT32) },
+   1);
+  SchemaBuilder server_schema_builder;
+  ASSERT_OK(server_schema_builder.AddKeyColumn("key", INT32));
+  ASSERT_OK(server_schema_builder.AddColumn("int_val", INT32));
+  ASSERT_OK(server_schema_builder.AddNullableColumn("string_val", STRING));
+  Schema server_schema = server_schema_builder.Build();
+
+  KuduPartialRow row(_schema);
+  ASSERT_OK(row.SetInt32("key", 1));
+  ASSERT_OK(row.SetInt32("int_val", 2));
+  RowOperationsPB pb;
+  RowOperationsPBEncoder().Add(RowOperationsPB::UPSERT, row);
+
+  arena_.Reset();
+  RowOperationsPBDecoder decoder(, _schema, _schema, _);
+  vector ops;
+  ASSERT_OK(decoder.DecodeOperations());
+  // The correct bitmap should be {1, 1, 0}.
+  ASSERT_EQ(1, ops.size());
+  ASSERT_TRUE(BitmapTest(ops[0].isset_bitmap, 0));
+  ASSERT_TRUE(BitmapTest(ops[0].isset_bitmap, 1));
+  ASSERT_FALSE(BitmapTest(ops[0].isset_bitmap, 2));
+}
+
 } // namespace kudu
diff --git a/src/kudu/common/row_operations.cc 
b/src/kudu/common/row_operations.cc
index 5e8d560ec..b1cf61362 100644
--- a/src/kudu/common/row_operations.cc
+++ b/src/kudu/common/row_operations.cc
@@ -439,6 +439,10 @@ Status RowOperationsPBDecoder::DecodeInsertOrUpsert(const 
uint8_t* prototype_row
   if (PREDICT_FALSE(!tablet_row_storage || !tablet_isset_bitmap)) {
 return Status::RuntimeError("Out of memory");
   }
+  // Initialize the bitmap since some columns might be lost in the client 
schema,
+  // in which case the original value of the lost columns might be set to 
default
+  // value by upsert request.
+  memset(tablet_isset_bitmap, 0, BitmapSize(tablet_schema_->num_columns()));
 
   // Initialize the new row from the 'prototype' row which has been set
   // with all of the server-side default values. This copy may be entirely



(kudu) branch master updated: [tablet] one less memory allocation in CBTree::count()

2024-04-10 Thread alexey
This is an automated email from the ASF dual-hosted git repository.

alexey pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/kudu.git


The following commit(s) were added to refs/heads/master by this push:
 new 634d967a0 [tablet] one less memory allocation in CBTree::count()
634d967a0 is described below

commit 634d967a0c620db2b3932c09b1fe13be1dc70f44
Author: Alexey Serbin 
AuthorDate: Tue Apr 9 10:36:05 2024 -0700

[tablet] one less memory allocation in CBTree::count()

While reviewing [1], I noticed there is room for improvement in CBTree.
One trivial one is avoiding calls to 'new' when an object might be
allocated on the stack.  This makes sense since in some intensive Kudu
workloads we might see significant lock contention in tcmalloc, and
removing needless calls to 'new' helps to relieve that, even if it's
not in the hot path.

[1] https://gerrit.cloudera.org/#/c/21127/

Change-Id: I01a68e0427b399db92b33c910185654d195150a5
Reviewed-on: http://gerrit.cloudera.org:8080/21276
Reviewed-by: Yingchun Lai 
Reviewed-by: Yifan Zhang 
Tested-by: Alexey Serbin 
---
 src/kudu/tablet/concurrent_btree.h | 14 +++---
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/src/kudu/tablet/concurrent_btree.h 
b/src/kudu/tablet/concurrent_btree.h
index 2965c332f..85f38a690 100644
--- a/src/kudu/tablet/concurrent_btree.h
+++ b/src/kudu/tablet/concurrent_btree.h
@@ -1102,13 +1102,13 @@ class CBTree {
   // Note that this requires iterating through the entire tree,
   // so it is not very efficient.
   size_t count() const {
-std::unique_ptr> iter(NewIterator());
+CBTreeIterator iter(this, frozen_);
 bool exact;
-iter->SeekAtOrAfter(Slice(""), );
+iter.SeekAtOrAfter(Slice(""), );
 size_t count = 0;
-while (iter->IsValid()) {
-  count++;
-  iter->Next();
+while (iter.IsValid()) {
+  ++count;
+  iter.Next();
 }
 return count;
   }
@@ -1855,11 +1855,11 @@ class CBTreeIterator {
 }
   }
 
-  const CBTree *tree_;
+  const CBTree* const tree_;
 
   // If true, the tree we are scanning is completely frozen and we don't
   // need to perform optimistic concurrency control or copies for safety.
-  bool tree_frozen_;
+  const bool tree_frozen_;
 
   bool seeked_;
   size_t idx_in_leaf_;



(kudu) branch branch-1.17.x updated (597d2bf15 -> 9de4325a1)

2024-04-10 Thread alexey
This is an automated email from the ASF dual-hosted git repository.

alexey pushed a change to branch branch-1.17.x
in repository https://gitbox.apache.org/repos/asf/kudu.git


from 597d2bf15 KUDU-3564: Fix IN list predicate pruning
 new 44f4b930a KUDU-3326 correct error messages in tool usage instructions
 new 9de4325a1 KUDU-3433 Fix flakiness in 
ClientTest.TestDeleteWithDeletedTableReserveSecondsWorks

The 2 revisions listed above as "new" are entirely new to this
repository and will be described in separate emails.  The revisions
listed as "add" were already present in the repository and have only
been added to this reference.


Summary of changes:
 src/kudu/client/client-test.cc   | 3 ++-
 src/kudu/tools/tool_action_common.cc | 1 +
 src/kudu/tools/tool_action_common.h  | 1 +
 src/kudu/tools/tool_action_table.cc  | 4 ++--
 4 files changed, 6 insertions(+), 3 deletions(-)



(kudu) 01/02: KUDU-3326 correct error messages in tool usage instructions

2024-04-10 Thread alexey
This is an automated email from the ASF dual-hosted git repository.

alexey pushed a commit to branch branch-1.17.x
in repository https://gitbox.apache.org/repos/asf/kudu.git

commit 44f4b930a8c2a4babc41870678683845694f3975
Author: kedeng 
AuthorDate: Tue Apr 9 14:44:26 2024 +0800

KUDU-3326 correct error messages in tool usage instructions

This patch mainly fixes the error messages in the recall tool as follows:
`
Usage: kudu table recall   
[-new_table_name=]
`
We expect to recall tables in soft delete state using the table ID,
not the tablet ID. The correct output is as follows:
`
Usage: kudu table recall   
[-new_table_name=]
`

This is just about correcting the errors in the prompts and does not
involve any related recall logic. Therefore, there are no additional
unit tests added.

Change-Id: Ib1c6df0806eee78280b00f6a528d42d434a63e2f
Reviewed-on: http://gerrit.cloudera.org:8080/21266
Reviewed-by: Alexey Serbin 
Tested-by: Alexey Serbin 
(cherry picked from commit d06f35db2e5a0e93a3dcab0d3b26e4443914df89)
Reviewed-on: http://gerrit.cloudera.org:8080/21273
Reviewed-by: Yingchun Lai 
---
 src/kudu/tools/tool_action_common.cc | 1 +
 src/kudu/tools/tool_action_common.h  | 1 +
 src/kudu/tools/tool_action_table.cc  | 4 ++--
 3 files changed, 4 insertions(+), 2 deletions(-)

diff --git a/src/kudu/tools/tool_action_common.cc 
b/src/kudu/tools/tool_action_common.cc
index d878d4173..6ee9ce6ec 100644
--- a/src/kudu/tools/tool_action_common.cc
+++ b/src/kudu/tools/tool_action_common.cc
@@ -272,6 +272,7 @@ const char* const kDestMasterAddressesArgDesc = "Either 
comma-separated list of
 "master addresses where each address is of form 'hostname:port', or a 
cluster name if it has "
 "been configured in ${KUDU_CONFIG}/kudurc";
 const char* const kTableNameArg = "table_name";
+const char* const kTableIdArg = "table_id";
 const char* const kTabletIdArg = "tablet_id";
 const char* const kTabletIdArgDesc = "Tablet Identifier";
 const char* const kTabletIdsCsvArg = "tablet_ids";
diff --git a/src/kudu/tools/tool_action_common.h 
b/src/kudu/tools/tool_action_common.h
index 157c4aa86..d314788b2 100644
--- a/src/kudu/tools/tool_action_common.h
+++ b/src/kudu/tools/tool_action_common.h
@@ -69,6 +69,7 @@ extern const char* const kMasterAddressesArgDesc;
 extern const char* const kDestMasterAddressesArg;
 extern const char* const kDestMasterAddressesArgDesc;
 extern const char* const kTableNameArg;
+extern const char* const kTableIdArg;
 extern const char* const kTabletIdArg;
 extern const char* const kTabletIdArgDesc;
 extern const char* const kTabletIdsCsvArg;
diff --git a/src/kudu/tools/tool_action_table.cc 
b/src/kudu/tools/tool_action_table.cc
index 9cc084bc5..f613445c3 100644
--- a/src/kudu/tools/tool_action_table.cc
+++ b/src/kudu/tools/tool_action_table.cc
@@ -826,7 +826,7 @@ Status SetRowCountLimit(const RunnerContext& context) {
 }
 
 Status RecallTable(const RunnerContext& context) {
-  const string& table_id = FindOrDie(context.required_args, kTabletIdArg);
+  const string& table_id = FindOrDie(context.required_args, kTableIdArg);
   client::sp::shared_ptr client;
   RETURN_NOT_OK(CreateKuduClient(context, ));
   return client->RecallTable(table_id, FLAGS_new_table_name);
@@ -1858,7 +1858,7 @@ unique_ptr BuildTableMode() {
   ActionBuilder("recall", )
   .Description("Recall a deleted but still reserved table")
   .AddRequiredParameter({ kMasterAddressesArg, kMasterAddressesArgDesc })
-  .AddRequiredParameter({ kTabletIdArg, "ID of the table to recall" })
+  .AddRequiredParameter({ kTableIdArg, "ID of the table to recall" })
   .AddOptionalParameter("new_table_name")
   .Build();
 



(kudu) 02/02: KUDU-3433 Fix flakiness in ClientTest.TestDeleteWithDeletedTableReserveSecondsWorks

2024-04-10 Thread alexey
This is an automated email from the ASF dual-hosted git repository.

alexey pushed a commit to branch branch-1.17.x
in repository https://gitbox.apache.org/repos/asf/kudu.git

commit 9de4325a1a62f05ff29e6000daf60df4ef622e24
Author: kedeng 
AuthorDate: Tue Apr 9 16:57:57 2024 +0800

KUDU-3433 Fix flakiness in 
ClientTest.TestDeleteWithDeletedTableReserveSecondsWorks

The reason for the flakiness of the unit test is that we did not consider 
corner
cases, namely, the time reserved from detecting a table in soft-deleted 
state
until the execution of deletion, during which some operations may occur. We 
need
to reserve more time.

However, in order to verify that FLAGS_table_reserve_seconds is indeed 
effective,
the additional time cannot be too long. Taking these two factors into 
consideration,
I added one second of waiting time and performed multiple tests locally 
using the
gtest_repeat parameter to ensure that this modification is effective.

Change-Id: Iab3a6a5f54701470e368c043133f4646aa71126d
Reviewed-on: http://gerrit.cloudera.org:8080/21268
Tested-by: Alexey Serbin 
Reviewed-by: Alexey Serbin 
(cherry picked from commit e5f3d2b19e7a210f64e76584fbe53454886668cb)
Reviewed-on: http://gerrit.cloudera.org:8080/21272
Reviewed-by: Yingchun Lai 
---
 src/kudu/client/client-test.cc | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/src/kudu/client/client-test.cc b/src/kudu/client/client-test.cc
index dfc250348..1b0ccd7e1 100644
--- a/src/kudu/client/client-test.cc
+++ b/src/kudu/client/client-test.cc
@@ -5428,7 +5428,8 @@ TEST_F(ClientTest, 
TestDeleteWithDeletedTableReserveSecondsWorks) {
   ASSERT_TRUE(tables.empty());
 
   // Test FLAGS_table_reserve_seconds.
-  SleepFor(MonoDelta::FromMilliseconds(5 * 1000));
+  // We allow a bit more time than reserved to avoid corner cases.
+  SleepFor(MonoDelta::FromMilliseconds(6 * 1000));
 
   // No tables left.
   ASSERT_OK(client_->ListTables());



(kudu) branch master updated: KUDU-3433 Fix flakiness in ClientTest.TestDeleteWithDeletedTableReserveSecondsWorks

2024-04-09 Thread alexey
This is an automated email from the ASF dual-hosted git repository.

alexey pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/kudu.git


The following commit(s) were added to refs/heads/master by this push:
 new e5f3d2b19 KUDU-3433 Fix flakiness in 
ClientTest.TestDeleteWithDeletedTableReserveSecondsWorks
e5f3d2b19 is described below

commit e5f3d2b19e7a210f64e76584fbe53454886668cb
Author: kedeng 
AuthorDate: Tue Apr 9 16:57:57 2024 +0800

KUDU-3433 Fix flakiness in 
ClientTest.TestDeleteWithDeletedTableReserveSecondsWorks

The reason for the flakiness of the unit test is that we did not consider 
corner
cases, namely, the time reserved from detecting a table in soft-deleted 
state
until the execution of deletion, during which some operations may occur. We 
need
to reserve more time.

However, in order to verify that FLAGS_table_reserve_seconds is indeed 
effective,
the additional time cannot be too long. Taking these two factors into 
consideration,
I added one second of waiting time and performed multiple tests locally 
using the
gtest_repeat parameter to ensure that this modification is effective.

Change-Id: Iab3a6a5f54701470e368c043133f4646aa71126d
Reviewed-on: http://gerrit.cloudera.org:8080/21268
Tested-by: Alexey Serbin 
Reviewed-by: Alexey Serbin 
---
 src/kudu/client/client-test.cc | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/src/kudu/client/client-test.cc b/src/kudu/client/client-test.cc
index 1078c6884..c3b46baa5 100644
--- a/src/kudu/client/client-test.cc
+++ b/src/kudu/client/client-test.cc
@@ -5710,7 +5710,8 @@ TEST_F(ClientTest, 
TestDeleteWithDeletedTableReserveSecondsWorks) {
   ASSERT_TRUE(tables.empty());
 
   // Test FLAGS_table_reserve_seconds.
-  SleepFor(MonoDelta::FromMilliseconds(5 * 1000));
+  // We allow a bit more time than reserved to avoid corner cases.
+  SleepFor(MonoDelta::FromMilliseconds(6 * 1000));
 
   // No tables left.
   ASSERT_OK(client_->ListTables());



(kudu) branch master updated: KUDU-3326 correct error messages in tool usage instructions

2024-04-09 Thread alexey
This is an automated email from the ASF dual-hosted git repository.

alexey pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/kudu.git


The following commit(s) were added to refs/heads/master by this push:
 new d06f35db2 KUDU-3326 correct error messages in tool usage instructions
d06f35db2 is described below

commit d06f35db2e5a0e93a3dcab0d3b26e4443914df89
Author: kedeng 
AuthorDate: Tue Apr 9 14:44:26 2024 +0800

KUDU-3326 correct error messages in tool usage instructions

This patch mainly fixes the error messages in the recall tool as follows:
`
Usage: kudu table recall   
[-new_table_name=]
`
We expect to recall tables in soft delete state using the table ID,
not the tablet ID. The correct output is as follows:
`
Usage: kudu table recall   
[-new_table_name=]
`

This is just about correcting the errors in the prompts and does not
involve any related recall logic. Therefore, there are no additional
unit tests added.

Change-Id: Ib1c6df0806eee78280b00f6a528d42d434a63e2f
Reviewed-on: http://gerrit.cloudera.org:8080/21266
Reviewed-by: Alexey Serbin 
Tested-by: Alexey Serbin 
---
 src/kudu/tools/tool_action_common.cc | 1 +
 src/kudu/tools/tool_action_common.h  | 1 +
 src/kudu/tools/tool_action_table.cc  | 4 ++--
 3 files changed, 4 insertions(+), 2 deletions(-)

diff --git a/src/kudu/tools/tool_action_common.cc 
b/src/kudu/tools/tool_action_common.cc
index 31d39f3c4..f4cd2e354 100644
--- a/src/kudu/tools/tool_action_common.cc
+++ b/src/kudu/tools/tool_action_common.cc
@@ -294,6 +294,7 @@ const char* const kDestMasterAddressesArgDesc = "Either 
comma-separated list of
 "master addresses where each address is of form 'hostname:port', or a 
cluster name if it has "
 "been configured in ${KUDU_CONFIG}/kudurc";
 const char* const kTableNameArg = "table_name";
+const char* const kTableIdArg = "table_id";
 const char* const kTabletIdArg = "tablet_id";
 const char* const kTabletIdArgDesc = "Tablet Identifier";
 const char* const kTabletIdsCsvArg = "tablet_ids";
diff --git a/src/kudu/tools/tool_action_common.h 
b/src/kudu/tools/tool_action_common.h
index 061b4c677..ad3be7828 100644
--- a/src/kudu/tools/tool_action_common.h
+++ b/src/kudu/tools/tool_action_common.h
@@ -69,6 +69,7 @@ extern const char* const kMasterAddressesArgDesc;
 extern const char* const kDestMasterAddressesArg;
 extern const char* const kDestMasterAddressesArgDesc;
 extern const char* const kTableNameArg;
+extern const char* const kTableIdArg;
 extern const char* const kTabletIdArg;
 extern const char* const kTabletIdArgDesc;
 extern const char* const kTabletIdsCsvArg;
diff --git a/src/kudu/tools/tool_action_table.cc 
b/src/kudu/tools/tool_action_table.cc
index 218209f6a..9d8ae7885 100644
--- a/src/kudu/tools/tool_action_table.cc
+++ b/src/kudu/tools/tool_action_table.cc
@@ -832,7 +832,7 @@ Status SetRowCountLimit(const RunnerContext& context) {
 }
 
 Status RecallTable(const RunnerContext& context) {
-  const string& table_id = FindOrDie(context.required_args, kTabletIdArg);
+  const string& table_id = FindOrDie(context.required_args, kTableIdArg);
   client::sp::shared_ptr client;
   RETURN_NOT_OK(CreateKuduClient(context, ));
   return client->RecallTable(table_id, FLAGS_new_table_name);
@@ -1914,7 +1914,7 @@ unique_ptr BuildTableMode() {
   ActionBuilder("recall", )
   .Description("Recall a deleted but still reserved table")
   .AddRequiredParameter({ kMasterAddressesArg, kMasterAddressesArgDesc })
-  .AddRequiredParameter({ kTabletIdArg, "ID of the table to recall" })
+  .AddRequiredParameter({ kTableIdArg, "ID of the table to recall" })
   .AddOptionalParameter("new_table_name")
   .Build();
 



(kudu) branch branch-1.17.x updated: KUDU-3564: Fix IN list predicate pruning

2024-04-07 Thread alexey
This is an automated email from the ASF dual-hosted git repository.

alexey pushed a commit to branch branch-1.17.x
in repository https://gitbox.apache.org/repos/asf/kudu.git


The following commit(s) were added to refs/heads/branch-1.17.x by this push:
 new 597d2bf15 KUDU-3564: Fix IN list predicate pruning
597d2bf15 is described below

commit 597d2bf156df097e7b04c7040323a55b291d0f3f
Author: zhangyifan27 
AuthorDate: Fri Apr 5 09:35:46 2024 +0800

KUDU-3564: Fix IN list predicate pruning

This patch fixes IN list predicate pruning with a range specific
hash schema by modifying the content of 'PartitionMayContainRow'
method. We now get the right hash schema based on specific
partition's lower bound key.

This is a follow-up to 607d9d0.

Change-Id: I964b1ccfb85602741843ab555cdee53343217033
Reviewed-on: http://gerrit.cloudera.org:8080/21243
Tested-by: Alexey Serbin 
Reviewed-by: Alexey Serbin 
(cherry picked from commit 5a2c776dfb894310fc286f3ebe60d53c8a5e9341)
Reviewed-on: http://gerrit.cloudera.org:8080/21253
Reviewed-by: Yifan Zhang 
---
 src/kudu/common/column_predicate.cc |   2 +-
 src/kudu/common/partition.cc|   4 +-
 src/kudu/common/scan_spec-test.cc   | 101 
 3 files changed, 93 insertions(+), 14 deletions(-)

diff --git a/src/kudu/common/column_predicate.cc 
b/src/kudu/common/column_predicate.cc
index 4b163da9e..08d582715 100644
--- a/src/kudu/common/column_predicate.cc
+++ b/src/kudu/common/column_predicate.cc
@@ -464,7 +464,7 @@ void ColumnPredicate::MergeIntoIsNull(const ColumnPredicate 
) {
 
 void ColumnPredicate::MergeIntoInList(const ColumnPredicate ) {
   CHECK(predicate_type_ == PredicateType::InList);
-  DCHECK(values_.size() > 1);
+  DCHECK(values_.size() >= 1);
 
   switch (other.predicate_type()) {
 case PredicateType::None: {
diff --git a/src/kudu/common/partition.cc b/src/kudu/common/partition.cc
index cd8599ca3..0d3c150a9 100644
--- a/src/kudu/common/partition.cc
+++ b/src/kudu/common/partition.cc
@@ -808,9 +808,7 @@ bool PartitionSchema::PartitionMayContainRow(const 
Partition& partition,
 return false;
   }
 
-  string range_key;
-  EncodeColumns(row, range_schema_.column_ids, _key);
-  const auto& hash_schema = GetHashSchemaForRange(range_key);
+  const auto& hash_schema = 
GetHashSchemaForRange(partition.begin_.range_key());
   for (size_t i = 0; i < hash_schema.size(); ++i) {
 const auto& hash_dimension = hash_schema[i];
 if (hash_dimension.column_ids.size() == 1 &&
diff --git a/src/kudu/common/scan_spec-test.cc 
b/src/kudu/common/scan_spec-test.cc
index 6c8e68ac9..90549a32a 100644
--- a/src/kudu/common/scan_spec-test.cc
+++ b/src/kudu/common/scan_spec-test.cc
@@ -35,6 +35,8 @@
 #include "kudu/common/partial_row.h"
 #include "kudu/common/partition.h"
 #include "kudu/common/row.h"
+#include "kudu/common/row_operations.h"
+#include "kudu/common/row_operations.pb.h"
 #include "kudu/common/schema.h"
 #include "kudu/gutil/map-util.h"
 #include "kudu/gutil/strings/stringpiece.h"
@@ -51,16 +53,13 @@ using std::vector;
 namespace kudu {
 
 namespace {
-// Generate partition schema of a table with given hash_partitions and range 
partition keys.
-// E.g. GeneratePartitionSchema(schema, {make_pair({a, b}, 3), make_pair({c}, 
5) })
-// Returns 'partition by hash(a, b) partitions 3, hash(c) partitions 5'.
-void GeneratePartitionSchema(const Schema& schema,
- const vector, int>>& 
hash_partitions,
- const vector& range_partition_columns,
- PartitionSchema* partition_schema) {
-  PartitionSchemaPB partition_schema_pb;
+
+void GeneratePartitionSchemaPB(const Schema& schema,
+   const vector, int>>& 
hash_partitions,
+   const vector& range_partition_columns,
+   PartitionSchemaPB* partition_schema_pb) {
   for (const auto& col_names_and_num_buckets : hash_partitions) {
-auto* hash_dimension_pb = partition_schema_pb.add_hash_schema();
+auto* hash_dimension_pb = partition_schema_pb->add_hash_schema();
 hash_dimension_pb->set_num_buckets(col_names_and_num_buckets.second);
 hash_dimension_pb->set_seed(0);
 for (const auto& col_name : col_names_and_num_buckets.first) {
@@ -71,14 +70,53 @@ void GeneratePartitionSchema(const Schema& schema,
 }
   }
   if (!range_partition_columns.empty()) {
-auto* range_schema = partition_schema_pb.mutable_range_schema();
+auto* range_schema = partition_schema_pb->mutable_range_schema();
 for (const auto& range_column : range_partition_columns) {
   range_schema->add_columns()->set_name(range_column);
 }
   }
+}
+
+// Generate partition schema of 

(kudu) branch master updated: KUDU-3564: Fix IN list predicate pruning

2024-04-07 Thread alexey
This is an automated email from the ASF dual-hosted git repository.

alexey pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/kudu.git


The following commit(s) were added to refs/heads/master by this push:
 new 5a2c776df KUDU-3564: Fix IN list predicate pruning
5a2c776df is described below

commit 5a2c776dfb894310fc286f3ebe60d53c8a5e9341
Author: zhangyifan27 
AuthorDate: Fri Apr 5 09:35:46 2024 +0800

KUDU-3564: Fix IN list predicate pruning

This patch fixes IN list predicate pruning with a range specific
hash schema by modifying the content of 'PartitionMayContainRow'
method. We now get the right hash schema based on specific
partition's lower bound key.

This is a follow-up to 607d9d0.

Change-Id: I964b1ccfb85602741843ab555cdee53343217033
Reviewed-on: http://gerrit.cloudera.org:8080/21243
Tested-by: Alexey Serbin 
Reviewed-by: Alexey Serbin 
---
 src/kudu/common/column_predicate.cc |   2 +-
 src/kudu/common/partition.cc|   4 +-
 src/kudu/common/scan_spec-test.cc   | 101 
 3 files changed, 93 insertions(+), 14 deletions(-)

diff --git a/src/kudu/common/column_predicate.cc 
b/src/kudu/common/column_predicate.cc
index 80f41bb77..b0626708f 100644
--- a/src/kudu/common/column_predicate.cc
+++ b/src/kudu/common/column_predicate.cc
@@ -462,7 +462,7 @@ void ColumnPredicate::MergeIntoIsNull(const ColumnPredicate 
) {
 
 void ColumnPredicate::MergeIntoInList(const ColumnPredicate ) {
   CHECK(predicate_type_ == PredicateType::InList);
-  DCHECK(values_.size() > 1);
+  DCHECK(values_.size() >= 1);
 
   switch (other.predicate_type()) {
 case PredicateType::None: {
diff --git a/src/kudu/common/partition.cc b/src/kudu/common/partition.cc
index b8ccd386c..3b5af8a5c 100644
--- a/src/kudu/common/partition.cc
+++ b/src/kudu/common/partition.cc
@@ -807,9 +807,7 @@ bool PartitionSchema::PartitionMayContainRow(const 
Partition& partition,
 return false;
   }
 
-  string range_key;
-  EncodeColumns(row, range_schema_.column_ids, _key);
-  const auto& hash_schema = GetHashSchemaForRange(range_key);
+  const auto& hash_schema = 
GetHashSchemaForRange(partition.begin_.range_key());
   for (size_t i = 0; i < hash_schema.size(); ++i) {
 const auto& hash_dimension = hash_schema[i];
 if (hash_dimension.column_ids.size() == 1 &&
diff --git a/src/kudu/common/scan_spec-test.cc 
b/src/kudu/common/scan_spec-test.cc
index 6c8e68ac9..90549a32a 100644
--- a/src/kudu/common/scan_spec-test.cc
+++ b/src/kudu/common/scan_spec-test.cc
@@ -35,6 +35,8 @@
 #include "kudu/common/partial_row.h"
 #include "kudu/common/partition.h"
 #include "kudu/common/row.h"
+#include "kudu/common/row_operations.h"
+#include "kudu/common/row_operations.pb.h"
 #include "kudu/common/schema.h"
 #include "kudu/gutil/map-util.h"
 #include "kudu/gutil/strings/stringpiece.h"
@@ -51,16 +53,13 @@ using std::vector;
 namespace kudu {
 
 namespace {
-// Generate partition schema of a table with given hash_partitions and range 
partition keys.
-// E.g. GeneratePartitionSchema(schema, {make_pair({a, b}, 3), make_pair({c}, 
5) })
-// Returns 'partition by hash(a, b) partitions 3, hash(c) partitions 5'.
-void GeneratePartitionSchema(const Schema& schema,
- const vector, int>>& 
hash_partitions,
- const vector& range_partition_columns,
- PartitionSchema* partition_schema) {
-  PartitionSchemaPB partition_schema_pb;
+
+void GeneratePartitionSchemaPB(const Schema& schema,
+   const vector, int>>& 
hash_partitions,
+   const vector& range_partition_columns,
+   PartitionSchemaPB* partition_schema_pb) {
   for (const auto& col_names_and_num_buckets : hash_partitions) {
-auto* hash_dimension_pb = partition_schema_pb.add_hash_schema();
+auto* hash_dimension_pb = partition_schema_pb->add_hash_schema();
 hash_dimension_pb->set_num_buckets(col_names_and_num_buckets.second);
 hash_dimension_pb->set_seed(0);
 for (const auto& col_name : col_names_and_num_buckets.first) {
@@ -71,14 +70,53 @@ void GeneratePartitionSchema(const Schema& schema,
 }
   }
   if (!range_partition_columns.empty()) {
-auto* range_schema = partition_schema_pb.mutable_range_schema();
+auto* range_schema = partition_schema_pb->mutable_range_schema();
 for (const auto& range_column : range_partition_columns) {
   range_schema->add_columns()->set_name(range_column);
 }
   }
+}
+
+// Generate partition schema of a table with given hash_partitions and range 
partition keys.
+// E.g. GeneratePartitionSchema(schema, {make_pair({a, b}, 3), make_pair({c}, 
5) })
+// Return

(kudu) 02/02: [c++17] fix unsorted compilation warnings with CLANG15

2024-04-05 Thread alexey
This is an automated email from the ASF dual-hosted git repository.

alexey pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/kudu.git

commit d8c315782bcd64e6c512885cc3ba578acbd6f45c
Author: Alexey Serbin 
AuthorDate: Thu Apr 4 13:04:13 2024 -0700

[c++17] fix unsorted compilation warnings with CLANG15

As a follow-up to a few prior patches, this patch addresses the rest
of warnings output by CLANG15 when compiling the Kudu project
on macOS Sonoma.

There are still some warnings from the linker, e.g. warnings about
the difference between target OS versions of the LLVM libraries
from 3rd-party and Kudu binaries being linked.  I think it's better
to address them in a separate patch, if ever doing so.

Change-Id: Iab094ec0766994d4e4d399a4ae1eb112ec16e5ee
Reviewed-on: http://gerrit.cloudera.org:8080/21242
Tested-by: Alexey Serbin 
Reviewed-by: Abhishek Chennaka 
---
 src/kudu/gutil/strings/escaping.cc | 6 ++
 src/kudu/gutil/sysinfo.cc  | 5 +++--
 src/kudu/server/webserver.cc   | 6 ++
 src/kudu/tablet/ops/op_tracker-test.cc | 3 ++-
 src/kudu/tablet/tablet.cc  | 2 --
 5 files changed, 17 insertions(+), 5 deletions(-)

diff --git a/src/kudu/gutil/strings/escaping.cc 
b/src/kudu/gutil/strings/escaping.cc
index 6ad75035a..b9b0401a6 100644
--- a/src/kudu/gutil/strings/escaping.cc
+++ b/src/kudu/gutil/strings/escaping.cc
@@ -492,6 +492,11 @@ bool CUnescapeForNullTerminatedString(const StringPiece& 
source,
   return CUnescapeInternal(source, kLeaveNullsEscaped, dest, error);
 }
 
+// Avoid warnings about sprintf() deprecation from contemporary compilers.
+// Silencing warnings seems to be a good option because this code has been
+// imported to Kudu from an external project repo and doesn't change much.
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wdeprecated-declarations"
 // --
 // CEscapeString()
 // CHexEscapeString()
@@ -551,6 +556,7 @@ int CEscapeInternal(const char* src, int src_len, char* 
dest,
   dest[used] = '\0';   // doesn't count towards return value though
   return used;
 }
+#pragma GCC diagnostic pop
 
 int CEscapeString(const char* src, int src_len, char* dest, int dest_len) {
   return CEscapeInternal(src, src_len, dest, dest_len, false, false);
diff --git a/src/kudu/gutil/sysinfo.cc b/src/kudu/gutil/sysinfo.cc
index 539d37a52..610c7db93 100644
--- a/src/kudu/gutil/sysinfo.cc
+++ b/src/kudu/gutil/sysinfo.cc
@@ -101,6 +101,7 @@ void SleepForMilliseconds(int64_t milliseconds) {
   SleepForNanoseconds(milliseconds * 1000 * 1000);
 }
 
+#if !(defined(__MACH__) && defined(__APPLE__))
 // Helper function estimates cycles/sec by observing cycles elapsed during
 // sleep(). Using small sleep time decreases accuracy significantly.
 static int64 EstimateCyclesPerSecond(const int estimate_time_ms) {
@@ -114,6 +115,7 @@ static int64 EstimateCyclesPerSecond(const int 
estimate_time_ms) {
   const int64 guess = int64(multiplier * (CycleClock::Now() - start_ticks));
   return guess;
 }
+#endif
 
 // ReadIntFromFile is only called on linux and cygwin platforms.
 #if defined(__linux__) || defined(__CYGWIN__) || defined(__CYGWIN32__)
@@ -236,8 +238,8 @@ static void InitializeSystemInfo() {
   if (already_called)  return;
   already_called = true;
 
+#if defined(__linux__) || defined(__CYGWIN__) || defined(__CYGWIN32__)
   bool saw_mhz = false;
-
   if (RunningOnValgrind()) {
 // Valgrind may slow the progress of time artificially (--scale-time=N
 // option). We thus can't rely on CPU Mhz info stored in /sys or /proc
@@ -246,7 +248,6 @@ static void InitializeSystemInfo() {
 saw_mhz = true;
   }
 
-#if defined(__linux__) || defined(__CYGWIN__) || defined(__CYGWIN32__)
   char line[1024];
   char* err;
   int freq;
diff --git a/src/kudu/server/webserver.cc b/src/kudu/server/webserver.cc
index b920f2b95..c5c18715f 100644
--- a/src/kudu/server/webserver.cc
+++ b/src/kudu/server/webserver.cc
@@ -365,7 +365,13 @@ Status Webserver::Start() {
   return Status::InvalidArgument("Unable to configure web server for 
SPNEGO authentication: "
  "must configure a keytab file for the 
server");
 }
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wdeprecated-declarations"
+// NOTE: this call is wrapped into 'ignored' pragma to suppress compilation
+//   warnings on macOS with Xcode where many gssapi_krb5 functions are
+//   deprecated in favor of GSS.framework.
 krb5_gss_register_acceptor_identity(kt_file);
+#pragma GCC diagnostic pop
   }
 
   options.emplace_back("listening_ports");
diff --git a/src/kudu/tablet/ops/op_tracker-test.cc 
b/src/kudu/tablet/ops/op_tracker-test.cc
index c948041a9..c3c15d938 100644
--- a/src/kudu/tablet/ops/op_tracker-test.

(kudu) branch master updated (6fd9410d2 -> d8c315782)

2024-04-05 Thread alexey
This is an automated email from the ASF dual-hosted git repository.

alexey pushed a change to branch master
in repository https://gitbox.apache.org/repos/asf/kudu.git


from 6fd9410d2 [test] add binary path for SLES15 SP14
 new 1b523322c [c++17] address std::move() warnings
 new d8c315782 [c++17] fix unsorted compilation warnings with CLANG15

The 2 revisions listed above as "new" are entirely new to this
repository and will be described in separate emails.  The revisions
listed as "add" were already present in the repository and have only
been added to this reference.


Summary of changes:
 src/kudu/client/scan_predicate.cc|  6 ++--
 src/kudu/codegen/module_builder.cc   |  3 +-
 src/kudu/common/column_predicate.cc  | 38 +++-
 src/kudu/common/partition_pruner.cc  |  1 -
 src/kudu/common/scan_spec.cc |  5 ++--
 src/kudu/gutil/strings/escaping.cc   |  6 
 src/kudu/gutil/sysinfo.cc|  5 ++--
 src/kudu/hms/hms_catalog.cc  |  1 -
 src/kudu/integration-tests/master_authz-itest.cc | 31 ++-
 src/kudu/ranger/ranger_client-test.cc| 11 ---
 src/kudu/ranger/ranger_client.cc | 13 
 src/kudu/rpc/outbound_call.cc|  2 +-
 src/kudu/rpc/result_tracker.cc   |  1 -
 src/kudu/security/ca/cert_management.cc  |  3 +-
 src/kudu/security/ca/cert_management.h   |  3 +-
 src/kudu/server/webserver.cc |  6 
 src/kudu/tablet/ops/op_tracker-test.cc   |  3 +-
 src/kudu/tablet/tablet.cc|  2 --
 src/kudu/tools/tool_action_fs.cc |  1 -
 src/kudu/tools/tool_action_local_replica.cc  | 26 
 src/kudu/util/block_bloom_filter-test.cc |  2 +-
 21 files changed, 85 insertions(+), 84 deletions(-)



(kudu) 01/02: [c++17] address std::move() warnings

2024-04-05 Thread alexey
This is an automated email from the ASF dual-hosted git repository.

alexey pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/kudu.git

commit 1b523322cb786d650ee5122ff26e45986f896761
Author: Alexey Serbin 
AuthorDate: Wed Apr 3 23:01:34 2024 -0700

[c++17] address std::move() warnings

This patch addresses warnings about unqualified calls to std::move()
produced by CLANG15.  For more context, discussion, and references,
see [1].  As a part of this patch, follow-up warnings issued by IWYU
and TidyBot are addressed as well.

Prior to this patch, there were many warnings output when compiling
the project with CLANG from Xcode 15.3 on macOS Sonoma, for example:

  src/kudu/codegen/module_builder.cc:334:26: warning: unqualified call to 
'std::move' [-Wunqualified-std-cast-call]
EngineBuilder ebuilder(move(module_));
   ^
   std::

This patch doesn't contain any functional modifications.

[1] https://reviews.llvm.org/D119670?id=408276

Change-Id: Id05c4d002fabc5fbd6d7b9cc8b886ddc7a8da0a1
Reviewed-on: http://gerrit.cloudera.org:8080/21241
Reviewed-by: Mahesh Reddy 
Tested-by: Alexey Serbin 
Reviewed-by: Abhishek Chennaka 
---
 src/kudu/client/scan_predicate.cc|  6 ++--
 src/kudu/codegen/module_builder.cc   |  3 +-
 src/kudu/common/column_predicate.cc  | 38 +++-
 src/kudu/common/partition_pruner.cc  |  1 -
 src/kudu/common/scan_spec.cc |  5 ++--
 src/kudu/hms/hms_catalog.cc  |  1 -
 src/kudu/integration-tests/master_authz-itest.cc | 31 ++-
 src/kudu/ranger/ranger_client-test.cc| 11 ---
 src/kudu/ranger/ranger_client.cc | 13 
 src/kudu/rpc/outbound_call.cc|  2 +-
 src/kudu/rpc/result_tracker.cc   |  1 -
 src/kudu/security/ca/cert_management.cc  |  3 +-
 src/kudu/security/ca/cert_management.h   |  3 +-
 src/kudu/tools/tool_action_fs.cc |  1 -
 src/kudu/tools/tool_action_local_replica.cc  | 26 
 src/kudu/util/block_bloom_filter-test.cc |  2 +-
 16 files changed, 68 insertions(+), 79 deletions(-)

diff --git a/src/kudu/client/scan_predicate.cc 
b/src/kudu/client/scan_predicate.cc
index 98a72e616..8e446c101 100644
--- a/src/kudu/client/scan_predicate.cc
+++ b/src/kudu/client/scan_predicate.cc
@@ -19,6 +19,7 @@
 
 #include 
 #include 
+#include 
 #include 
 #include 
 
@@ -38,7 +39,6 @@
 #include "kudu/util/status.h"
 
 using std::optional;
-using std::move;
 using std::shared_ptr;
 using std::unique_ptr;
 using std::vector;
@@ -71,7 +71,7 @@ KuduPredicate* KuduPredicate::Clone() const {
 ComparisonPredicateData::ComparisonPredicateData(ColumnSchema col,
  KuduPredicate::ComparisonOp 
op,
  KuduValue* val)
-: col_(move(col)),
+: col_(std::move(col)),
   op_(op),
   val_(val) {
 }
@@ -122,7 +122,7 @@ Status ComparisonPredicateData::AddToScanSpec(ScanSpec* 
spec, Arena* arena) {
 
 InListPredicateData::InListPredicateData(ColumnSchema col,
  vector* values)
-: col_(move(col)) {
+: col_(std::move(col)) {
   vals_.swap(*values);
 }
 
diff --git a/src/kudu/codegen/module_builder.cc 
b/src/kudu/codegen/module_builder.cc
index 59f3ec4af..cd5d4c022 100644
--- a/src/kudu/codegen/module_builder.cc
+++ b/src/kudu/codegen/module_builder.cc
@@ -94,7 +94,6 @@ using llvm::SMDiagnostic;
 using llvm::TargetMachine;
 using llvm::Type;
 using llvm::Value;
-using std::move;
 using std::ostream;
 using std::ostringstream;
 using std::string;
@@ -331,7 +330,7 @@ Status ModuleBuilder::Compile(unique_ptr* 
out) {
   Level opt_level = llvm::CodeGenOpt::None;
 #endif
   Module* module = module_.get();
-  EngineBuilder ebuilder(move(module_));
+  EngineBuilder ebuilder(std::move(module_));
   ebuilder.setErrorStr();
   ebuilder.setOptLevel(opt_level);
   ebuilder.setMCPU(llvm::sys::getHostCPUName());
diff --git a/src/kudu/common/column_predicate.cc 
b/src/kudu/common/column_predicate.cc
index 4b163da9e..80f41bb77 100644
--- a/src/kudu/common/column_predicate.cc
+++ b/src/kudu/common/column_predicate.cc
@@ -35,7 +35,6 @@
 #include "kudu/util/logging.h"
 #include "kudu/util/memory/arena.h"
 
-using std::move;
 using std::string;
 using std::vector;
 
@@ -46,7 +45,7 @@ ColumnPredicate::ColumnPredicate(PredicateType predicate_type,
  const void* lower,
  const void* upper)
 : predicate_type_(predicate_type),
-  column_(move(column)),
+  column_(std::move(column)),
   lower_(lower),
   upper_(upper) {
 }
@@ -55,7 +54,7 @@ ColumnPredicate:

(kudu) branch master updated: [c++17] address std::iterator deprecation warnings

2024-04-05 Thread alexey
This is an automated email from the ASF dual-hosted git repository.

alexey pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/kudu.git


The following commit(s) were added to refs/heads/master by this push:
 new 25e7a7549 [c++17] address std::iterator deprecation warnings
25e7a7549 is described below

commit 25e7a7549cf01ac68ea2de53d68e6b710e03cd9d
Author: Alexey Serbin 
AuthorDate: Wed Apr 3 22:16:53 2024 -0700

[c++17] address std::iterator deprecation warnings

This patch addresses warnings about deprecation of std::iterator [1]
produced by CLANG 15 on macOS Sonoma: there were many of those generated
prior to this patch.  See a blog post [2] referred from the isocpp.org
site [3] for more details.

This patch doesn't contain any functional modifications.

[1] https://en.cppreference.com/w/cpp/iterator/iterator
[2] https://www.fluentcpp.com/2018/05/08/std-iterator-deprecated/
[3] 
https://isocpp.org/blog/2018/05/stditerator-is-deprecated-why-what-it-was-and-what-to-use-instead-jonathan

Change-Id: Id82af8f860156f3452a1c6522603ffacaa1ad0cd
Reviewed-on: http://gerrit.cloudera.org:8080/21240
Reviewed-by: Mahesh Reddy 
Reviewed-by: Abhishek Chennaka 
Tested-by: Alexey Serbin 
---
 src/kudu/client/scan_batch.h| 10 +-
 src/kudu/gutil/strings/join.h   |  2 +-
 src/kudu/gutil/strings/split_internal.h | 10 --
 src/kudu/util/bitset.h  | 10 --
 4 files changed, 26 insertions(+), 6 deletions(-)

diff --git a/src/kudu/client/scan_batch.h b/src/kudu/client/scan_batch.h
index 500253684..cef7f7609 100644
--- a/src/kudu/client/scan_batch.h
+++ b/src/kudu/client/scan_batch.h
@@ -367,6 +367,14 @@ class KUDU_EXPORT KuduScanBatch::RowPtr {
   const uint8_t* row_data_;
 };
 
+// std::iterator has been deprecated in C++17, but this code should still be
+// compilable by legacy C++98 compilers as well. It's also necessary to keep
+// backward compatibility with the ABI provided by earlier Kudu releases,
+// so modifiying the inheritance chain isn't an option. Instead of removing
+// the inheritance from std::iterator<...> and explicitly defining the types
+// required by the STL iterator traits, the deprecation warnings are silenced.
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wdeprecated-declarations"
 class KUDU_EXPORT KuduScanBatch::const_iterator
 : public std::iterator {
  public:
@@ -435,7 +443,7 @@ class KUDU_EXPORT KuduScanBatch::const_iterator
   const KuduScanBatch* const batch_;
   int idx_;
 };
-
+#pragma GCC diagnostic pop
 
 inline KuduScanBatch::const_iterator KuduScanBatch::begin() const {
   return const_iterator(this, 0);
diff --git a/src/kudu/gutil/strings/join.h b/src/kudu/gutil/strings/join.h
index c7c5c85d9..06f726686 100644
--- a/src/kudu/gutil/strings/join.h
+++ b/src/kudu/gutil/strings/join.h
@@ -204,7 +204,7 @@ void JoinStringsIterator(const ITERATOR& start,
 
   // Precompute resulting length so we can reserve() memory in one shot.
   if (start != end) {
-int length = delim.size()*(distance(start, end)-1);
+auto length = delim.size() * (std::distance(start, end) - 1);
 for (ITERATOR iter = start; iter != end; ++iter) {
   length += iter->size();
 }
diff --git a/src/kudu/gutil/strings/split_internal.h 
b/src/kudu/gutil/strings/split_internal.h
index c29be8807..15f2a0993 100644
--- a/src/kudu/gutil/strings/split_internal.h
+++ b/src/kudu/gutil/strings/split_internal.h
@@ -63,9 +63,15 @@ struct NoFilter {
 // The two-argument constructor is used to split the given text using the given
 // delimiter.
 template 
-class SplitIterator
-: public std::iterator {
+class SplitIterator {
  public:
+  // Types required by the iterator traits.
+  using iterator_category = std::input_iterator_tag;
+  using value_type = StringPiece;
+  using difference_type = std::ptrdiff_t;
+  using pointer = StringPiece*;
+  using reference = StringPiece&;
+
   // Two constructors for "end" iterators.
   explicit SplitIterator(Delimiter d)
   : delimiter_(std::move(d)), predicate_(), is_end_(true) {}
diff --git a/src/kudu/util/bitset.h b/src/kudu/util/bitset.h
index 61a6274aa..a80e9c607 100644
--- a/src/kudu/util/bitset.h
+++ b/src/kudu/util/bitset.h
@@ -142,9 +142,15 @@ class FixedBitSet {
 
 // Forward iterator class for a FixedBitSet.
 template 
-class FixedBitSet::iterator :
-public std::iterator {
+class FixedBitSet::iterator {
  public:
+  // Types required by the iterator traits.
+  using iterator_category = std::forward_iterator_tag;
+  using value_type = IntType;
+  using difference_type = std::ptrdiff_t;
+  using pointer = IntType*;
+  using reference = IntType&;
+
   // Returns the value currently pointed at by this iterator.
   IntType operator*() {
 return static_cast(idx_);



(kudu) branch master updated: [compaction] Code cleanup and readability improvement

2024-04-05 Thread alexey
This is an automated email from the ASF dual-hosted git repository.

alexey pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/kudu.git


The following commit(s) were added to refs/heads/master by this push:
 new 48ee1e8b5 [compaction] Code cleanup and readability improvement
48ee1e8b5 is described below

commit 48ee1e8b5d7792384178c75840a998e413aaa512
Author: Ashwani Raina 
AuthorDate: Tue Jan 23 17:50:11 2024 +0530

[compaction] Code cleanup and readability improvement

This is a base patch that does not change any functionality.
Goal is to break the compaction memory usage improvement
change into small ones to make it easy to review.

Change-Id: I54709b5e27751581c889854911323fbddab1c4ab
Reviewed-on: http://gerrit.cloudera.org:8080/21098
Tested-by: Alexey Serbin 
Reviewed-by: Alexey Serbin 
---
 src/kudu/tablet/compaction.cc   | 269 ++--
 src/kudu/tablet/compaction.h|  17 ++-
 src/kudu/tablet/delta_compaction.cc |  16 +--
 3 files changed, 178 insertions(+), 124 deletions(-)

diff --git a/src/kudu/tablet/compaction.cc b/src/kudu/tablet/compaction.cc
index d56b79b78..9ad3a1576 100644
--- a/src/kudu/tablet/compaction.cc
+++ b/src/kudu/tablet/compaction.cc
@@ -990,11 +990,11 @@ Mutation* MergeUndoHistories(Mutation* left, Mutation* 
right) {
 // and adds them to 'new_undo_head'.
 Status MergeDuplicatedRowHistory(const string& tablet_id,
  const scoped_refptr& 
error_manager,
- CompactionInputRow* old_row,
- Mutation** new_undo_head,
+ const CompactionInputRow& old_row,
  Arena* arena,
- const HistoryGcOpts& history_gc_opts) {
-  if (PREDICT_TRUE(old_row->previous_ghost == nullptr)) return Status::OK();
+ const HistoryGcOpts& history_gc_opts,
+ Mutation** new_undo_head) {
+  if (PREDICT_TRUE(old_row.previous_ghost == nullptr)) return Status::OK();
 
   // Use an all inclusive snapshot as all of the previous version's undos and 
redos
   // are guaranteed to be committed, otherwise the compaction wouldn't be able 
to
@@ -1003,7 +1003,7 @@ Status MergeDuplicatedRowHistory(const string& tablet_id,
 
   faststring dst;
 
-  CompactionInputRow* previous_ghost = old_row->previous_ghost;
+  CompactionInputRow* previous_ghost = old_row.previous_ghost;
   while (previous_ghost != nullptr) {
 
 // First step is to transform the old rows REDO's into UNDOs, if there are 
any.
@@ -1016,11 +1016,11 @@ Status MergeDuplicatedRowHistory(const string& 
tablet_id,
 
 RETURN_NOT_OK(ApplyMutationsAndGenerateUndos(all_snap,
  *previous_ghost,
- _new_undos_head,
- _delete_redo,
  arena,
+ history_gc_opts,
  _ghost->row,
- history_gc_opts));
+ _new_undos_head,
+ _delete_redo));
 
 // We should be left with only one redo, the delete.
 #ifndef NDEBUG
@@ -1179,13 +1179,11 @@ void RowSetsInCompactionOrFlush::DumpToLog() const {
   }
 }
 
-void RemoveAncientUndos(const HistoryGcOpts& history_gc_opts,
-Mutation** undo_head,
+bool RemoveAncientUndos(const HistoryGcOpts& history_gc_opts,
 const Mutation* redo_head,
-bool* is_garbage_collected) {
-  *is_garbage_collected = false;
+Mutation** undo_head) {
   if (!history_gc_opts.gc_enabled()) {
-return;
+return false;
   }
 
   // Make sure there is at most one REDO in the redo_head and that, if 
present, it's a DELETE.
@@ -1195,8 +1193,7 @@ void RemoveAncientUndos(const HistoryGcOpts& 
history_gc_opts,
 
 // Garbage collect rows that are deleted before the AHM.
 if (history_gc_opts.IsAncientHistory(redo_head->timestamp())) {
-  *is_garbage_collected = true;
-  return;
+  return true;
 }
   }
 
@@ -1218,6 +1215,7 @@ void RemoveAncientUndos(const HistoryGcOpts& 
history_gc_opts,
 prev_undo = undo_mut;
 undo_mut = undo_mut->next();
   }
+  return false;
 }
 
 // Applies the REDOs of 'src_row' in accordance with the input snapshot,
@@ -1228,11 +1226,11 @@ void RemoveAncientUndos(const HistoryGcOpts& 
history_gc_opts,
 // NOTE: input REDOs are expected to be in increasing timestamp order.
 Status ApplyMutationsAndGenerateUndos(const MvccSnapshot& snap,
 

(kudu) branch master updated: [codegen] fix compilation with CLANG 15 on macOS

2024-04-03 Thread alexey
This is an automated email from the ASF dual-hosted git repository.

alexey pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/kudu.git


The following commit(s) were added to refs/heads/master by this push:
 new 3a856f450 [codegen] fix compilation with CLANG 15 on macOS
3a856f450 is described below

commit 3a856f4502a29f26bb8c9b10a17b06eb16892d02
Author: Alexey Serbin 
AuthorDate: Mon Apr 1 19:29:26 2024 -0700

[codegen] fix compilation with CLANG 15 on macOS

Prior to this fix, compiling Kudu with CLANG 15 on macOS Sonoma
would fail with an error like below:

  [..%] Generating precompiled.ll
  In file included from src/kudu/codegen/precompiled.cc:39:
  In file included from src/kudu/common/rowblock.h:21:
  ...
  
/Applications/Xcode-15.3.0.app/Contents/Developer/Platforms/MacOSX.platform/Developer/SDKs/MacOSX14.4.sdk/usr/include/c++/v1/cstdlib:144:9:
 error: no member named 'at_quick_exit' in the global namespace
  using ::at_quick_exit _LIBCPP_USING_IF_EXISTS;
~~^
  
/Applications/Xcode-15.3.0.app/Contents/Developer/Platforms/MacOSX.platform/Developer/SDKs/MacOSX14.4.sdk/usr/include/c++/v1/cstdlib:145:9:
 error: no member named 'quick_exit' in the global namespace
  using ::quick_exit _LIBCPP_USING_IF_EXISTS;

Change-Id: Ibe92b7f00fdd446010cea1bda5fd25868a9acabc
Reviewed-on: http://gerrit.cloudera.org:8080/21232
Reviewed-by: Abhishek Chennaka 
Tested-by: Alexey Serbin 
---
 src/kudu/codegen/CMakeLists.txt | 98 +
 1 file changed, 40 insertions(+), 58 deletions(-)

diff --git a/src/kudu/codegen/CMakeLists.txt b/src/kudu/codegen/CMakeLists.txt
index eb32a41d5..6ffa499e8 100644
--- a/src/kudu/codegen/CMakeLists.txt
+++ b/src/kudu/codegen/CMakeLists.txt
@@ -56,57 +56,50 @@ set(IR_SOURCE ${CMAKE_CURRENT_SOURCE_DIR}/precompiled.cc)
 set(IR_OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/precompiled.ll)
 set(IR_OUTPUT_CC ${IR_OUTPUT}.cc)
 
-# Retrieve all includes directories needed for precompilation.
-get_directory_property(IR_INCLUDES
-  DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}
-  INCLUDE_DIRECTORIES)
-foreach(noprefix ${IR_INCLUDES})
-  set(PREFIXED_IR_INCLUDES ${PREFIXED_IR_INCLUDES} -I${noprefix})
-endforeach()
-
+# Set proper include directories to run CLANG from the Kudu's thirdparty
+# when generating the pre-compiled.ll target (a.k.a. IR_OUTPUT). To avoid
+# mixing third-party CLANG's headers with the headers of the C++ compiler
+# used to build the top-level project, one shouldn't rely on the cmake's
+# INCLUDE_DIRECTORIES property, but instead explicitly specify
+# only the required paths with the '-cxx-isystem', '-isystem', and '-I' flags.
+# Otherwise, the contaminated header file space might lead either to
+# compilation errors or inconsistencies in the built binaries
+# if the compilation succeeds. The latter, in its turn, might result
+# in undefined behavior of the LLVM-generated code.
 if (APPLE)
-  # The macOS keeps the libc++ headers in a non-standard location so
-  # that the thirdparty CLANG does not know about by default.
-  #
-  # Xcode starting with version 12.5 has the libc++ headers under
-  # Contents/Developer/Platforms/MacOSX.platform/Developer/SDKs/MacOSX.sdk,
-  # which corresponds to CMAKE_OSX_SYSROOT, so it's enough to add -isysroot
-  # pointing to CMAKE_OSX_SYSROOT.
-  #
-  # Xcode prior to version 12.5 (12.4 and earlier, inclusive) doesn't have the
-  # libc++ headers under CMAKE_OSX_SYSROOT, but instead keeps those under
-  # Contents/Developer/Toolchains/XcodeDefault.xctoolchain: with that,
-  # it's easy to deduce the path to the libc++ headers from the output
-  # produced by `clang++ --version`.
-  #
-  # For non-clang compilers, assume the libc++ include directory provided
-  # with the Xcode command line tools.
-  if (NOT "${COMPILER_FAMILY}" STREQUAL "clang")
-set(PREFIXED_IR_INCLUDES
-  ${PREFIXED_IR_INCLUDES}
-  -cxx-isystem "/Library/Developer/CommandLineTools/usr/include/c++/v1")
-  elseif (${CMAKE_CXX_COMPILER_VERSION} VERSION_LESS 12.0.5)
-execute_process(
-  COMMAND ${CMAKE_CXX_COMPILER} --version
-  COMMAND grep -E "^InstalledDir: "
-  COMMAND sed "s/^InstalledDir: \\(.*\\)$/\\1/"
-  RESULT_VARIABLE CXX_INSTALLED_DIR_CMD_EXIT_CODE
-  OUTPUT_VARIABLE CXX_INSTALLED_DIR_CMD_OUT
-  OUTPUT_STRIP_TRAILING_WHITESPACE)
-if (${CXX_INSTALLED_DIR_CMD_EXIT_CODE} EQUAL 0 AND
-NOT ${CXX_INSTALLED_DIR_CMD_OUT} STREQUAL "")
-  set(PREFIXED_IR_INCLUDES
-${PREFIXED_IR_INCLUDES}
--cxx-isystem "${CXX_INSTALLED_DIR_CMD_OUT}/../include/c++/v1")
-else()
-  message(FATAL_ERROR "failed to deduce path to libc++ headers")
-endif()
+  # On macOS, the built-in C++ header search directories in the 3rd-party CLANG
+  # disappear upon adding "-isystem ${CMAKE_OSX_SYSROOT}/usr/in

(kudu) branch master updated (d980c688d -> dd4e37b44)

2024-04-02 Thread alexey
This is an automated email from the ASF dual-hosted git repository.

alexey pushed a change to branch master
in repository https://gitbox.apache.org/repos/asf/kudu.git


from d980c688d [rpc] relax settings for MeasureAcceptorDispatchTimes
 new ab368443d [tests] fix a misprint
 new dd4e37b44 KUDU-3561 skip tablet entities in Prometheus format

The 2 revisions listed above as "new" are entirely new to this
repository and will be described in separate emails.  The revisions
listed as "add" were already present in the repository and have only
been added to this reference.


Summary of changes:
 .../integration-tests/disk_reservation-itest.cc|  2 +-
 src/kudu/util/metrics.cc   | 51 +-
 2 files changed, 32 insertions(+), 21 deletions(-)



(kudu) 01/02: [tests] fix a misprint

2024-04-02 Thread alexey
This is an automated email from the ASF dual-hosted git repository.

alexey pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/kudu.git

commit ab368443d276006e93370a5b6bd5862eea2475fe
Author: Alexey Serbin 
AuthorDate: Mon Apr 1 19:40:38 2024 -0700

[tests] fix a misprint

Tests should use ASSERT_NE(), not DCHECK_NE().

This is a follow-up to e54c80cf9ba129b943eed6af3266899753b1b4ac.

Change-Id: I78428cf60449820508f995c4a7c2a84582ce32c1
Reviewed-on: http://gerrit.cloudera.org:8080/21229
Tested-by: Alexey Serbin 
Reviewed-by: Mahesh Reddy 
Reviewed-by: Abhishek Chennaka 
---
 src/kudu/integration-tests/disk_reservation-itest.cc | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/kudu/integration-tests/disk_reservation-itest.cc 
b/src/kudu/integration-tests/disk_reservation-itest.cc
index 519a2dfa7..d7e8dad01 100644
--- a/src/kudu/integration-tests/disk_reservation-itest.cc
+++ b/src/kudu/integration-tests/disk_reservation-itest.cc
@@ -177,7 +177,7 @@ TEST_F(DiskReservationITest, AvailableSpaceMetrics) {
   NO_FATALS(StartCluster(ts_flags, {}, 1));
 
   auto* ts = cluster_->tablet_server(0);
-  DCHECK_NE(nullptr, ts);
+  ASSERT_NE(nullptr, ts);
   const auto& addr = ts->bound_http_hostport();
 
   auto space_getter_data_dirs = [&](int64_t* available_bytes) {



(kudu) 02/02: KUDU-3561 skip tablet entities in Prometheus format

2024-04-02 Thread alexey
This is an automated email from the ASF dual-hosted git repository.

alexey pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/kudu.git

commit dd4e37b445da7f367746fb83d92e21b8dc3d8eb8
Author: Alexey Serbin 
AuthorDate: Sun Mar 31 21:05:36 2024 -0700

KUDU-3561 skip tablet entities in Prometheus format

Currently, the Prometheus metrics writer outputs only server-level
metrics.  Prior to this patch, the Prometheus metrics writer would
output a warning message for every tablet it encounters while iterating
through all the existing metric entities.

This patch addresses the issue: the tablet entries are now silently
skipped, as they should.

I also added a TODO for KUDU-3563 to output tablet-level metrics
in Prometheus format as well.

Change-Id: I618bbc2caab7a8d9812eeaeb67ac42b0293b0654
Reviewed-on: http://gerrit.cloudera.org:8080/21226
Reviewed-by: Mahesh Reddy 
Reviewed-by: Abhishek Chennaka 
Tested-by: Alexey Serbin 
---
 src/kudu/util/metrics.cc | 51 +---
 1 file changed, 31 insertions(+), 20 deletions(-)

diff --git a/src/kudu/util/metrics.cc b/src/kudu/util/metrics.cc
index dfcea6db1..0f1590de5 100644
--- a/src/kudu/util/metrics.cc
+++ b/src/kudu/util/metrics.cc
@@ -405,15 +405,25 @@ Status MetricEntity::WriteAsJson(JsonWriter* writer, 
const MetricJsonOptions& op
 }
 
 Status MetricEntity::WriteAsPrometheus(PrometheusWriter* writer) const {
-  MetricMap metrics;
-  AttributeMap attrs;
+  static const string kIdMaster = "kudu.master";
+  static const string kIdTabletServer = "kudu.tabletserver";
+
+  if (strcmp(prototype_->name(), "server") != 0) {
+// Only server-level metrics are emitted in Prometheus format as of now,
+// non-server metric entities are currently silently skipped.
+//
+// TODO(KUDU-3563): output tablet-level metrics in Prometheus format as 
well
+return Status::OK();
+  }
+
+  // Empty filters result in getting all the metrics for this MetricEntity.
+  //
+  // TODO(aserbin): instead of hard-coding, pass MetricFilters as a parameter
   MetricFilters filters;
   filters.entity_level = "debug";
-  const string master_prefix = "kudu_master_";
-  const string tserver_prefix = "kudu_tserver_";
-  const string master_server = "kudu.master";
-  const string tablet_server = "kudu.tabletserver";
-  // Empty filters results in getting all the metrics for this MetricEntity.
+
+  MetricMap metrics;
+  AttributeMap attrs;
   const auto s = GetMetricsAndAttrs(filters, , );
   if (s.IsNotFound()) {
 // Status::NotFound is returned when this entity has been filtered, treat 
it
@@ -421,21 +431,22 @@ Status MetricEntity::WriteAsPrometheus(PrometheusWriter* 
writer) const {
 return Status::OK();
   }
   RETURN_NOT_OK(s);
-  // Only emit server level metrics
-  if (strcmp(prototype_->name(), "server") == 0) {
-if (id_ == master_server) {
-  // attach kudu_master_ as prefix to metrics
-  WriteMetricsPrometheus(writer, metrics, master_prefix);
-  return Status::OK();
-}
-if (id_ == tablet_server) {
-  // attach kudu_tserver_ as prefix to metrics
-  WriteMetricsPrometheus(writer, metrics, tserver_prefix);
-  return Status::OK();
-}
+
+  if (id_ == kIdMaster) {
+// Prefix all master metrics with 'kudu_master_'.
+static const string kMasterPrefix = "kudu_master_";
+WriteMetricsPrometheus(writer, metrics, kMasterPrefix);
+return Status::OK();
+  }
+  if (id_ == kIdTabletServer) {
+// Prefix all tablet server metrics with 'kudu_tserver_'.
+static const string kTabletServerPrefix = "kudu_tserver_";
+WriteMetricsPrometheus(writer, metrics, kTabletServerPrefix);
+return Status::OK();
   }
 
-  return Status::NotFound("Entity is not relevant to Prometheus");
+  return Status::NotSupported(
+  Substitute("$0: unexpected server-level metric entity", id_));
 }
 
 Status MetricEntity::CollectTo(MergedEntityMetrics* collections,



(kudu) branch branch-1.17.x updated: KUDU-3562 fix integer overflow in available space metrics

2024-04-02 Thread alexey
This is an automated email from the ASF dual-hosted git repository.

alexey pushed a commit to branch branch-1.17.x
in repository https://gitbox.apache.org/repos/asf/kudu.git


The following commit(s) were added to refs/heads/branch-1.17.x by this push:
 new 6e729fd76 KUDU-3562 fix integer overflow in available space metrics
6e729fd76 is described below

commit 6e729fd762bed42fe78e49b20a757ffd0f9b73aa
Author: Alexey Serbin 
AuthorDate: Sun Mar 31 12:48:27 2024 -0700

KUDU-3562 fix integer overflow in available space metrics

This patch addresses KUDU-3562.  The problem was in implicit casting
of 64-bit integers down to 32-bit ones when introducing a helper
variable in CalculateAvailableSpace().  A new test scenario is added
to cover the fixed issue and catch regressions in future, if any.

In addition, this patch corrects the type of the related metrics in
disk_failure-itest.cc.  It also clarifies on the description and tags
of the --fs_{data_dirs,wal_dir}_available_space_cache_seconds flags.

Change-Id: I974aea822626e4648886388c0de3741ac459f2ec
Reviewed-on: http://gerrit.cloudera.org:8080/21227
Reviewed-by: Abhishek Chennaka 
Tested-by: Abhishek Chennaka 
Reviewed-by: Mahesh Reddy 
(cherry picked from commit e54c80cf9ba129b943eed6af3266899753b1b4ac)
Reviewed-on: http://gerrit.cloudera.org:8080/21228
---
 src/kudu/fs/data_dirs.cc   |  8 +-
 src/kudu/integration-tests/disk_failure-itest.cc   |  8 +-
 .../integration-tests/disk_reservation-itest.cc| 92 +-
 src/kudu/server/server_base.cc | 14 ++--
 4 files changed, 108 insertions(+), 14 deletions(-)

diff --git a/src/kudu/fs/data_dirs.cc b/src/kudu/fs/data_dirs.cc
index b5dc54c00..9c705fea4 100644
--- a/src/kudu/fs/data_dirs.cc
+++ b/src/kudu/fs/data_dirs.cc
@@ -77,18 +77,22 @@ TAG_FLAG(fs_data_dirs_reserved_bytes, runtime);
 TAG_FLAG(fs_data_dirs_reserved_bytes, evolving);
 
 DEFINE_int32(fs_data_dirs_available_space_cache_seconds, 10,
- "Number of seconds we cache the available disk space in the block 
manager.");
+ "TTL for the cached metric of the available disk space "
+ "in the data directories, in seconds");
 DEFINE_validator(fs_data_dirs_available_space_cache_seconds,
  [](const char* /*n*/, int32_t v) { return v >= 0; });
 TAG_FLAG(fs_data_dirs_available_space_cache_seconds, advanced);
 TAG_FLAG(fs_data_dirs_available_space_cache_seconds, evolving);
+TAG_FLAG(fs_data_dirs_available_space_cache_seconds, runtime);
 
 DEFINE_int32(fs_wal_dir_available_space_cache_seconds, 10,
- "Number of seconds we cache the available disk space the WAL 
directory.");
+ "TTL for the cached metric of the available disk space "
+ "in the WAL directories, in seconds");
 DEFINE_validator(fs_wal_dir_available_space_cache_seconds,
  [](const char* /*n*/, int32_t v) { return v >= 0; });
 TAG_FLAG(fs_wal_dir_available_space_cache_seconds, advanced);
 TAG_FLAG(fs_wal_dir_available_space_cache_seconds, evolving);
+TAG_FLAG(fs_wal_dir_available_space_cache_seconds, runtime);
 
 DEFINE_bool(fs_lock_data_dirs, true,
 "Lock the data directories to prevent concurrent usage. "
diff --git a/src/kudu/integration-tests/disk_failure-itest.cc 
b/src/kudu/integration-tests/disk_failure-itest.cc
index 3cbcddc62..2f20f7592 100644
--- a/src/kudu/integration-tests/disk_failure-itest.cc
+++ b/src/kudu/integration-tests/disk_failure-itest.cc
@@ -57,8 +57,8 @@ METRIC_DECLARE_gauge_int32(num_raft_leaders);
 METRIC_DECLARE_gauge_size(num_rowsets_on_disk);
 METRIC_DECLARE_gauge_uint64(data_dirs_failed);
 METRIC_DECLARE_gauge_uint32(tablets_num_failed);
-METRIC_DECLARE_gauge_uint64(wal_dir_space_available_bytes);
-METRIC_DECLARE_gauge_uint64(data_dirs_space_available_bytes);
+METRIC_DECLARE_gauge_int64(wal_dir_space_available_bytes);
+METRIC_DECLARE_gauge_int64(data_dirs_space_available_bytes);
 
 using kudu::client::sp::shared_ptr;
 using kudu::client::KuduClient;
@@ -321,7 +321,7 @@ TEST_P(TabletServerDiskErrorITest, TestFailOnBootstrap) {
   // Wait for the cluster to return to a healthy state.
   ClusterVerifier v(cluster_.get());
   NO_FATALS(v.CheckCluster());
-};
+}
 
 TEST_P(TabletServerDiskErrorITest, TestSpaceAvailableMetrics) {
   // Get the wal_dir_space_available_bytes, data_dirs_space_available_bytes 
and make sure
@@ -367,7 +367,7 @@ TEST_P(TabletServerDiskErrorITest, 
TestSpaceAvailableMetrics) {
   ASSERT_OK(get_metrics(_dir_space, _dir_space));
   ASSERT_NE(wal_dir_space, -1);
   ASSERT_EQ(data_dir_space, -1);
-};
+}
 
 TEST_P(TabletServerDiskErrorITest, TestFailDuringScanWorkload) {
   // Make one server to be more likely to host leader replicas: its Raft
diff --git a/src/kudu/integration-tests/disk_reservation-itest.cc 
b/src/kudu/integration-test

(kudu) 02/02: [rpc] relax settings for MeasureAcceptorDispatchTimes

2024-04-01 Thread alexey
This is an automated email from the ASF dual-hosted git repository.

alexey pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/kudu.git

commit d980c688db3b86bd248723d23dc093c668307211
Author: Alexey Serbin 
AuthorDate: Wed Mar 27 16:45:23 2024 -0700

[rpc] relax settings for MeasureAcceptorDispatchTimes

On some nodes, RpcAcceptorBench.MeasureAcceptorDispatchTimes might
run out of file descriptors if run with 16 concurrent client threads,
failing the test.  This patch modifies the default settings for the
scenario to run with just a single client thread by default.  That's
enough to provide good coverage for the related metrics while easing
the load on a test node.

Change-Id: Ic9af59046d6bcf28134cc88e7ff9e54643efc37f
Reviewed-on: http://gerrit.cloudera.org:8080/21212
Reviewed-by: Mahesh Reddy 
Reviewed-by: Abhishek Chennaka 
Tested-by: Alexey Serbin 
---
 src/kudu/rpc/rpc-bench.cc | 9 +
 1 file changed, 9 insertions(+)

diff --git a/src/kudu/rpc/rpc-bench.cc b/src/kudu/rpc/rpc-bench.cc
index 6b0d4bfd5..ca42fd8ef 100644
--- a/src/kudu/rpc/rpc-bench.cc
+++ b/src/kudu/rpc/rpc-bench.cc
@@ -337,6 +337,15 @@ class RpcAcceptorBench : public RpcTestBase {
 };
 
 TEST_F(RpcAcceptorBench, MeasureAcceptorDispatchTimes) {
+  // It's enough to have just one client thread to verify that the acceptor
+  // dispatch times metric works as expected, so let's set this minimum viable
+  // configuration as the default one. The option of changing the default
+  // setting for --client_threads for all the test scenarios in this file
+  // doesn't look attractive since other scenarios rely on client-side
+  // concurrency to provide some meaningful test coverage.
+  ASSERT_NE("", SetCommandLineOptionWithMode("client_threads",
+ "1",
+ gflags::SET_FLAGS_DEFAULT));
   const size_t threads_num = FLAGS_client_threads;
 
   thread threads[threads_num];



  1   2   3   4   5   6   7   8   9   10   >