HBASE-20293 get_splits returns duplicate split points when region replication is on
Signed-off-by: Ted Yu <yuzhih...@gmail.com> Signed-off-by: Huaxiang Sun <h...@apache.org> Signed-off-by: Sean Busbey <bus...@apache.org> Project: http://git-wip-us.apache.org/repos/asf/hbase/repo Commit: http://git-wip-us.apache.org/repos/asf/hbase/commit/59d9e0f4 Tree: http://git-wip-us.apache.org/repos/asf/hbase/tree/59d9e0f4 Diff: http://git-wip-us.apache.org/repos/asf/hbase/diff/59d9e0f4 Branch: refs/heads/branch-1 Commit: 59d9e0f407cbcb387bc850a56c919a8c12509160 Parents: af172e0 Author: Toshihiro Suzuki <brfrn...@gmail.com> Authored: Wed Apr 18 14:47:04 2018 +0900 Committer: Sean Busbey <bus...@apache.org> Committed: Fri Apr 20 12:40:50 2018 -0500 ---------------------------------------------------------------------- hbase-shell/src/main/ruby/hbase/table.rb | 52 ++++++++++++++-------- hbase-shell/src/test/ruby/hbase/table_test.rb | 18 +++++++- hbase-shell/src/test/ruby/test_helper.rb | 11 +++++ 3 files changed, 61 insertions(+), 20 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/hbase/blob/59d9e0f4/hbase-shell/src/main/ruby/hbase/table.rb ---------------------------------------------------------------------- diff --git a/hbase-shell/src/main/ruby/hbase/table.rb b/hbase-shell/src/main/ruby/hbase/table.rb index 4109006..8e3f6e4 100644 --- a/hbase-shell/src/main/ruby/hbase/table.rb +++ b/hbase-shell/src/main/ruby/hbase/table.rb @@ -20,6 +20,8 @@ include Java java_import org.apache.hadoop.hbase.util.Bytes +java_import org.apache.hadoop.hbase.client.RegionReplicaUtil +java_import org.apache.hadoop.hbase.client.Scan # Wrapper for org.apache.hadoop.hbase.client.Table @@ -48,8 +50,9 @@ module Hbase method = name.to_sym self.class_eval do define_method method do |*args| - @shell.internal_command(shell_command, internal_method_name, self, *args) - end + @shell.internal_command(shell_command, internal_method_name, self, + *args) + end end end @@ -143,7 +146,7 @@ EOF end #Case where attributes are specified without timestamp if timestamp.kind_of?(Hash) - timestamp.each do |k, v| + timestamp.each do |k, v| if k == 'ATTRIBUTES' set_attributes(p, v) elsif k == 'VISIBILITY' @@ -185,12 +188,12 @@ EOF timestamp = org.apache.hadoop.hbase.HConstants::LATEST_TIMESTAMP end d = org.apache.hadoop.hbase.client.Delete.new(row.to_s.to_java_bytes, timestamp) - if temptimestamp.kind_of?(Hash) - temptimestamp.each do |k, v| - if v.kind_of?(String) - set_cell_visibility(d, v) if v - end - end + if temptimestamp.is_a?(Hash) + temptimestamp.each do |_, v| + if v.is_a?(String) + set_cell_visibility(d, v) if v + end + end end if args.any? visibility = args[VISIBILITY] @@ -262,9 +265,11 @@ EOF #---------------------------------------------------------------------------------------------- # Count rows in a table + + # rubocop:disable Metrics/AbcSize def _count_internal(interval = 1000, caching_rows = 10) # We can safely set scanner caching with the first key only filter - scan = org.apache.hadoop.hbase.client.Scan.new + scan = Scan.new scan.setCacheBlocks(false) scan.setCaching(caching_rows) scan.setFilter(org.apache.hadoop.hbase.filter.FirstKeyOnlyFilter.new) @@ -288,6 +293,7 @@ EOF # Return the counter return count end + # rubocop:enable Metrics/AbcSize #---------------------------------------------------------------------------------------------- # Get from table @@ -425,6 +431,8 @@ EOF org.apache.hadoop.hbase.util.Bytes::toLong(cell.getValue) end + # rubocop:disable Metrics/AbcSize + # rubocop:disable Metrics/MethodLength def _hash_to_scan(args) if args.any? enablemetrics = args["ALL_METRICS"].nil? ? false : args["ALL_METRICS"] @@ -453,10 +461,10 @@ EOF end scan = if stoprow - org.apache.hadoop.hbase.client.Scan.new(startrow.to_java_bytes, stoprow.to_java_bytes) - else - org.apache.hadoop.hbase.client.Scan.new(startrow.to_java_bytes) - end + Scan.new(startrow.to_java_bytes, stoprow.to_java_bytes) + else + Scan.new(startrow.to_java_bytes) + end # This will overwrite any startrow/stoprow settings scan.setRowPrefixFilter(rowprefixfilter.to_java_bytes) if rowprefixfilter @@ -493,11 +501,13 @@ EOF set_authorizations(scan, authorizations) if authorizations scan.setConsistency(org.apache.hadoop.hbase.client.Consistency.valueOf(consistency)) if consistency else - scan = org.apache.hadoop.hbase.client.Scan.new + scan = Scan.new end scan end + # rubocop:enable Metrics/MethodLength + # rubocop:enable Metrics/AbcSize def _get_scanner(args) @table.getScanner(_hash_to_scan(args)) @@ -505,10 +515,11 @@ EOF #---------------------------------------------------------------------------------------------- # Scans whole table or a range of keys and returns rows matching specific criteria + # rubocop:disable Metrics/AbcSize def _scan_internal(args = {}, scan = nil) raise(ArgumentError, "Args should be a Hash") unless args.kind_of?(Hash) raise(ArgumentError, "Scan argument should be org.apache.hadoop.hbase.client.Scan") \ - unless scan == nil || scan.kind_of?(org.apache.hadoop.hbase.client.Scan) + unless scan.nil? || scan.is_a?(Scan) limit = args["LIMIT"] || -1 maxlength = args.delete("MAXLENGTH") || -1 @@ -552,8 +563,9 @@ EOF scanner.close() return ((block_given?) ? [count, is_stale] : res) end + # rubocop:enable Metrics/AbcSize - # Apply OperationAttributes to puts/scans/gets + # Apply OperationAttributes to puts/scans/gets def set_attributes(oprattr, attributes) raise(ArgumentError, "Attributes must be a Hash type") unless attributes.kind_of?(Hash) for k,v in attributes @@ -723,11 +735,13 @@ EOF # rubocop:disable Style/MultilineBlockChain def _get_splits_internal() locator = @table.getRegionLocator - locator.getAllRegionLocations.map do |i| + locator.getAllRegionLocations.select do |s| + RegionReplicaUtil.isDefaultReplica(s.getRegionInfo) + end.map do |i| Bytes.toStringBinary(i.getRegionInfo.getStartKey) end.delete_if { |k| k == '' } ensure - locator.close() + locator.close end end # rubocop:enable Style/MultilineBlockChain http://git-wip-us.apache.org/repos/asf/hbase/blob/59d9e0f4/hbase-shell/src/test/ruby/hbase/table_test.rb ---------------------------------------------------------------------- diff --git a/hbase-shell/src/test/ruby/hbase/table_test.rb b/hbase-shell/src/test/ruby/hbase/table_test.rb index 6ffdf89..a631fc5 100644 --- a/hbase-shell/src/test/ruby/hbase/table_test.rb +++ b/hbase-shell/src/test/ruby/hbase/table_test.rb @@ -188,6 +188,7 @@ module Hbase end # Complex data management methods tests + # rubocop:disable Metrics/ClassLength class TableComplexMethodsTest < Test::Unit::TestCase include TestHelpers @@ -302,7 +303,8 @@ module Hbase assert_not_nil(res['x:b']) end - define_test "get should work with hash columns spec and TIMESTAMP and AUTHORIZATIONS" do + define_test 'get should work with hash columns spec and TIMESTAMP and' \ + ' AUTHORIZATIONS' do res = @test_table._get_internal('1', TIMESTAMP => 1234, AUTHORIZATIONS=>['PRIVATE']) assert_nil(res) end @@ -635,5 +637,19 @@ module Hbase assert_equal(0, splits.size) assert_equal([], splits) end + + define_test 'Split count for a table with region replicas' do + @test_table_name = 'tableWithRegionReplicas' + create_test_table_with_region_replicas(@test_table_name, 3, + SPLITS => ['10']) + @table = table(@test_table_name) + splits = @table._get_splits_internal + # In this case, total splits should be 1 even if the number of region + # replicas is 3. + assert_equal(1, splits.size) + assert_equal(['10'], splits) + drop_test_table(@test_table_name) + end end + # rubocop:enable Metrics/ClassLength end http://git-wip-us.apache.org/repos/asf/hbase/blob/59d9e0f4/hbase-shell/src/test/ruby/test_helper.rb ---------------------------------------------------------------------- diff --git a/hbase-shell/src/test/ruby/test_helper.rb b/hbase-shell/src/test/ruby/test_helper.rb index b4bec90..c947439 100644 --- a/hbase-shell/src/test/ruby/test_helper.rb +++ b/hbase-shell/src/test/ruby/test_helper.rb @@ -107,6 +107,17 @@ module Hbase end end + def create_test_table_with_region_replicas(name, num_of_replicas, splits) + # Create the table if needed + unless admin.exists?(name) + admin.create name, 'f1', { REGION_REPLICATION => num_of_replicas }, + splits + end + + # Enable the table if needed + admin.enable(name) unless admin.enabled?(name) + end + def drop_test_table(name) return unless admin.exists?(name) begin