This is an automated email from the ASF dual-hosted git repository. elserj pushed a commit to branch branch-2 in repository https://gitbox.apache.org/repos/asf/hbase.git
The following commit(s) were added to refs/heads/branch-2 by this push: new 31917b0 HBASE-25278 Add CACHE_BLOCKS option to count shell command 31917b0 is described below commit 31917b0a8ab1d7f90ab6997dcc80a38dcea98013 Author: Josh Elser <els...@apache.org> AuthorDate: Thu Nov 12 16:04:26 2020 -0500 HBASE-25278 Add CACHE_BLOCKS option to count shell command Expose an argument on the `count` command which is passed to the `setCacheBlocks` method on the Scan which the count command uses. This is a quick and dirty approach to read all of the blocks for a table into the block cache. * Raise an error when the value isn't a boolean or the expected string Closes #2650 Signed-off-by: Zach York <zy...@apache.org> Signed-off-by: Peter Somogyi <psomo...@apache.org> --- hbase-shell/src/main/ruby/hbase/table.rb | 6 ++--- hbase-shell/src/main/ruby/shell/commands/count.rb | 27 +++++++++++++++++++++-- 2 files changed, 28 insertions(+), 5 deletions(-) diff --git a/hbase-shell/src/main/ruby/hbase/table.rb b/hbase-shell/src/main/ruby/hbase/table.rb index 4e8a186..d779261 100644 --- a/hbase-shell/src/main/ruby/hbase/table.rb +++ b/hbase-shell/src/main/ruby/hbase/table.rb @@ -303,18 +303,18 @@ EOF #---------------------------------------------------------------------------------------------- # Count rows in a table - def _count_internal(interval = 1000, scan = nil) + def _count_internal(interval = 1000, scan = nil, cacheBlocks=false) raise(ArgumentError, 'Scan argument should be org.apache.hadoop.hbase.client.Scan') \ unless scan.nil? || scan.is_a?(org.apache.hadoop.hbase.client.Scan) # We can safely set scanner caching with the first key only filter if scan.nil? scan = org.apache.hadoop.hbase.client.Scan.new - scan.setCacheBlocks(false) + scan.setCacheBlocks(cacheBlocks) scan.setCaching(10) scan.setFilter(org.apache.hadoop.hbase.filter.FirstKeyOnlyFilter.new) else - scan.setCacheBlocks(false) + scan.setCacheBlocks(cacheBlocks) filter = scan.getFilter firstKeyOnlyFilter = org.apache.hadoop.hbase.filter.FirstKeyOnlyFilter.new if filter.nil? diff --git a/hbase-shell/src/main/ruby/shell/commands/count.rb b/hbase-shell/src/main/ruby/shell/commands/count.rb index 03840d0..7052358 100644 --- a/hbase-shell/src/main/ruby/shell/commands/count.rb +++ b/hbase-shell/src/main/ruby/shell/commands/count.rb @@ -49,6 +49,17 @@ t to table 't1', the corresponding commands would be: hbase> t.count FILTER => " (QualifierFilter (>=, 'binary:xyz')) AND (TimestampsFilter ( 123, 456))" hbase> t.count COLUMNS => ['c1', 'c2'], STARTROW => 'abc', STOPROW => 'xyz' + +By default, this operation does not cause any new blocks to be read into +the RegionServer block cache. This is typically the desired action; however, +if you want to force all blocks for a table to be loaded into the block cache +on-demand, you can pass the 'CACHE_BLOCKS' option with a value of 'true'. A value +of 'false' is the default and will result in no blocks being cached. This +command can be used in conjunction with all other options. + +hbase> count 'ns1:t1', CACHE_BLOCKS => true +hbase> count 'ns1:t1', CACHE_BLOCKS => 'true' +hbase> count 'ns1:t1', INTERVAL => 100000, CACHE_BLOCKS => false EOF end @@ -60,17 +71,29 @@ EOF # If the second parameter is an integer, then it is the old command syntax params = { 'INTERVAL' => params } if params.is_a?(Integer) + # Try to be nice and convert a string to a bool + if params.include?('CACHE_BLOCKS') and params['CACHE_BLOCKS'].is_a?(String) + if params['CACHE_BLOCKS'].downcase == 'true' + params['CACHE_BLOCKS'] = true + elsif params['CACHE_BLOCKS'].downcase == 'false' + params['CACHE_BLOCKS'] = false + else + raise(ArgumentError, "Expected CACHE_BLOCKS value to be a boolean or the string 'true' or 'false'") + end + end + # Merge params with defaults params = { 'INTERVAL' => 1000, - 'CACHE' => 10 + 'CACHE' => 10, + 'CACHE_BLOCKS' => false }.merge(params) scan = table._hash_to_scan(params) # Call the counter method @start_time = Time.now formatter.header - count = table._count_internal(params['INTERVAL'].to_i, scan) do |cnt, row| + count = table._count_internal(params['INTERVAL'].to_i, scan, params['CACHE_BLOCKS']) do |cnt, row| formatter.row(["Current count: #{cnt}, row: #{row}"]) end formatter.footer(count)