This is an automated email from the ASF dual-hosted git repository. alexey pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/kudu.git
The following commit(s) were added to refs/heads/master by this push: new 3b91db407 [java] KUDU-2671 fix bug in PartitionSchema::getHashSchemaForRange() 3b91db407 is described below commit 3b91db407d9973f154567e0b280f7fe39899c557 Author: Alexey Serbin <ale...@apache.org> AuthorDate: Mon Jul 25 16:26:09 2022 -0700 [java] KUDU-2671 fix bug in PartitionSchema::getHashSchemaForRange() This patch fixes a bug in PartitionSchema::getHashSchemaForRange() that manifested itself when a right-side unbounded range with custom hash schema was present in a table. The patch also contains test scenarios that allowed to see the manifestation of the bug before the fix; from now they allow to spot regressions in the corresponding code, if any. These scenarios cover both {Create,Alter}Table code paths when adding right-side unbounded ranges with custom hash schemas. Change-Id: Ib0ef5bbe4528ce5c1d4c9591a8152cf7ec4af0bb Reviewed-on: http://gerrit.cloudera.org:8080/18782 Tested-by: Kudu Jenkins Reviewed-by: Attila Bukor <abu...@apache.org> --- .../org/apache/kudu/client/PartitionSchema.java | 11 ++- .../org/apache/kudu/client/TestAlterTable.java | 110 +++++++++++++++++++++ .../java/org/apache/kudu/client/TestKuduTable.java | 89 +++++++++++++++++ 3 files changed, 206 insertions(+), 4 deletions(-) diff --git a/java/kudu-client/src/main/java/org/apache/kudu/client/PartitionSchema.java b/java/kudu-client/src/main/java/org/apache/kudu/client/PartitionSchema.java index e62ac8dbf..50851a4ae 100644 --- a/java/kudu-client/src/main/java/org/apache/kudu/client/PartitionSchema.java +++ b/java/kudu-client/src/main/java/org/apache/kudu/client/PartitionSchema.java @@ -205,7 +205,8 @@ public class PartitionSchema { /** * Find hash schema for the given encoded range key. Depending on the * partition schema and the key, it might be either table-wide or a custom - * hash schema for a particular range. + * hash schema for a particular range. Just as a convention, this method + * returns the table-wide hash schema for keys in non-covered ranges. * * @return hash bucket schema for the encoded range key */ @@ -221,10 +222,12 @@ public class PartitionSchema { if (entry == null) { return hashBucketSchemas; } - // Check if 'rangeKey' is in the range (null upper boundary means unbounded - // range partition). + // Check if 'rangeKey' is in the range. + // NOTE: the right boundary is exclusive; an empty array for upper boundary + // means that the range partition is unbounded. final byte[] upper = entry.upper; - if (upper == null || Bytes.memcmp(rangeKey, upper) < 0) { + Preconditions.checkNotNull(upper); + if (upper.length == 0 || Bytes.memcmp(rangeKey, upper) < 0) { return entry.hashSchemas; } return hashBucketSchemas; diff --git a/java/kudu-client/src/test/java/org/apache/kudu/client/TestAlterTable.java b/java/kudu-client/src/test/java/org/apache/kudu/client/TestAlterTable.java index 7e722e0de..f0270b446 100644 --- a/java/kudu-client/src/test/java/org/apache/kudu/client/TestAlterTable.java +++ b/java/kudu-client/src/test/java/org/apache/kudu/client/TestAlterTable.java @@ -599,6 +599,116 @@ public class TestAlterTable { client.deleteTable(tableName); } + /** + * Test altering a table, adding unbounded range partitions + * with custom hash schema. + */ + @Test(timeout = 100000) + public void testAlterAddUnboundedRangeWithCustomHashSchema() throws Exception { + ArrayList<ColumnSchema> columns = new ArrayList<>(2); + columns.add(new ColumnSchema.ColumnSchemaBuilder("c0", Type.INT32) + .nullable(false) + .key(true) + .build()); + columns.add(new ColumnSchema.ColumnSchemaBuilder("c1", Type.INT32) + .nullable(false) + .build()); + final Schema schema = new Schema(columns); + + CreateTableOptions createOptions = + new CreateTableOptions() + .setRangePartitionColumns(ImmutableList.of("c0")) + .addHashPartitions(ImmutableList.of("c0"), 2, 0) + .setNumReplicas(1); + // Add range partition [-100, 100) with the table-wide hash schema + // (to be added upon creating the new table below). + { + PartialRow lower = schema.newPartialRow(); + lower.addInt("c0", -100); + PartialRow upper = schema.newPartialRow(); + upper.addInt("c0", 100); + createOptions.addRangePartition(lower, upper); + } + // Add unbounded range partition [100, +inf) with custom hash schema. + { + PartialRow lower = schema.newPartialRow(); + lower.addInt("c0", 100); + PartialRow upper = schema.newPartialRow(); + RangePartitionWithCustomHashSchema range = + new RangePartitionWithCustomHashSchema( + lower, + upper, + RangePartitionBound.INCLUSIVE_BOUND, + RangePartitionBound.EXCLUSIVE_BOUND); + range.addHashPartitions(ImmutableList.of("c0"), 5, 0); + createOptions.addRangePartition(range); + } + + client.createTable(tableName, schema, createOptions); + + // Alter the table: add unbounded range partition [-inf, -100) with custom hash schema. + { + PartialRow lower = schema.newPartialRow(); + PartialRow upper = schema.newPartialRow(); + upper.addInt("c0", -100); + RangePartitionWithCustomHashSchema range = + new RangePartitionWithCustomHashSchema( + lower, + upper, + RangePartitionBound.INCLUSIVE_BOUND, + RangePartitionBound.EXCLUSIVE_BOUND); + range.addHashPartitions(ImmutableList.of("c0"), 3, 0); + client.alterTable(tableName, new AlterTableOptions().addRangePartition(range)); + } + + KuduTable table = client.openTable(tableName); + + // Insert some rows and then drop partitions, ensuring the row count comes + // out as expected. + insertRows(table, -250, -200); + assertEquals(50, countRowsInTable(table)); + insertRows(table, -200, -50); + assertEquals(200, countRowsInTable(table)); + insertRows(table, -50, 50); + assertEquals(300, countRowsInTable(table)); + insertRows(table, 50, 200); + assertEquals(450, countRowsInTable(table)); + insertRows(table, 200, 250); + assertEquals(500, countRowsInTable(table)); + + { + AlterTableOptions alter = new AlterTableOptions(); + alter.setWait(true); + PartialRow lower = schema.newPartialRow(); + lower.addInt("c0", -100); + PartialRow upper = schema.newPartialRow(); + upper.addInt("c0", 100); + alter.dropRangePartition(lower, upper); + client.alterTable(tableName, alter); + assertEquals(300, countRowsInTable(table)); + } + { + AlterTableOptions alter = new AlterTableOptions(); + PartialRow lower = schema.newPartialRow(); + PartialRow upper = schema.newPartialRow(); + upper.addInt("c0", -100); + alter.dropRangePartition(lower, upper); + client.alterTable(tableName, alter); + assertEquals(150, countRowsInTable(table)); + } + { + AlterTableOptions alter = new AlterTableOptions(); + PartialRow lower = schema.newPartialRow(); + lower.addInt("c0", 100); + PartialRow upper = schema.newPartialRow(); + alter.dropRangePartition(lower, upper); + client.alterTable(tableName, alter); + assertEquals(0, countRowsInTable(table)); + } + + client.deleteTable(tableName); + } + /** * Test altering a table, adding range partitions with custom hash schema * per range and dropping partition in the middle, resulting in non-covered diff --git a/java/kudu-client/src/test/java/org/apache/kudu/client/TestKuduTable.java b/java/kudu-client/src/test/java/org/apache/kudu/client/TestKuduTable.java index 28c4ac66c..68c5b18f7 100644 --- a/java/kudu-client/src/test/java/org/apache/kudu/client/TestKuduTable.java +++ b/java/kudu-client/src/test/java/org/apache/kudu/client/TestKuduTable.java @@ -888,6 +888,95 @@ public class TestKuduTable { assertEquals(0, rowStrings.size()); } + @Test(timeout = 100000) + public void testCreateTableCustomHashSchemasTwoUnboundedRanges() throws Exception { + CreateTableOptions builder = getBasicCreateTableOptions(); + + { + PartialRow lower = basicSchema.newPartialRow(); + PartialRow upper = basicSchema.newPartialRow(); + upper.addInt(0, 0); + + RangePartitionWithCustomHashSchema rangePartition = + new RangePartitionWithCustomHashSchema( + lower, + upper, + RangePartitionBound.INCLUSIVE_BOUND, + RangePartitionBound.EXCLUSIVE_BOUND); + rangePartition.addHashPartitions(ImmutableList.of("key"), 2, 0); + builder.addRangePartition(rangePartition); + } + { + PartialRow lower = basicSchema.newPartialRow(); + lower.addInt(0, 0); + PartialRow upper = basicSchema.newPartialRow(); + + RangePartitionWithCustomHashSchema rangePartition = + new RangePartitionWithCustomHashSchema( + lower, + upper, + RangePartitionBound.INCLUSIVE_BOUND, + RangePartitionBound.EXCLUSIVE_BOUND); + rangePartition.addHashPartitions(ImmutableList.of("key"), 3, 0); + builder.addRangePartition(rangePartition); + } + + // Add table-wide schema as well -- that's to satisfy the constraint on + // the number of hash dimensions in table's hash schemas. However, this + // scenario isn't going to create a range with table-wide hash schema. + builder.addHashPartitions(ImmutableList.of("key"), 5, 0); + + KuduTable table = client.createTable(tableName, basicSchema, builder); + + // There should be 5 tablets: 2 for [-inf, 0) range and 3 for [0, +inf). + List<LocatedTablet> tablets = table.getTabletsLocations(10000); + assertEquals(5, tablets.size()); + + assertEquals( + ImmutableList.of("VALUES < 0", "VALUES >= 0"), + client.openTable(tableName).getFormattedRangePartitions(10000)); + assertEquals( + ImmutableList.of( + "VALUES < 0 HASH(key) PARTITIONS 2", + "VALUES >= 0 HASH(key) PARTITIONS 3"), + client.openTable(tableName).getFormattedRangePartitionsWithHashSchema(10000)); + + // Insert data into the newly created table and read it back. + KuduSession session = client.newSession(); + for (int key = -250; key < 250; ++key) { + Insert insert = createBasicSchemaInsert(table, key); + session.apply(insert); + } + session.flush(); + + // Do full table scan. + List<String> rowStrings = scanTableToStrings(table); + assertEquals(500, rowStrings.size()); + + rowStrings = scanTableToStrings(table, + KuduPredicate.newComparisonPredicate(basicSchema.getColumn("key"), GREATER_EQUAL, 0)); + assertEquals(250, rowStrings.size()); + + rowStrings = scanTableToStrings(table, + KuduPredicate.newComparisonPredicate(basicSchema.getColumn("key"), LESS, 0)); + assertEquals(250, rowStrings.size()); + + // Predicate to have one part of the rows in the range with table-wide hash + // schema, and the other part from the range with custom hash schema. + rowStrings = scanTableToStrings(table, + KuduPredicate.newComparisonPredicate(basicSchema.getColumn("key"), GREATER_EQUAL, -150), + KuduPredicate.newComparisonPredicate(basicSchema.getColumn("key"), LESS, 150)); + assertEquals(300, rowStrings.size()); + + rowStrings = scanTableToStrings(table, + KuduPredicate.newComparisonPredicate(basicSchema.getColumn("key"), EQUAL, -250)); + assertEquals(1, rowStrings.size()); + + rowStrings = scanTableToStrings(table, + KuduPredicate.newComparisonPredicate(basicSchema.getColumn("key"), EQUAL, 250)); + assertEquals(0, rowStrings.size()); + } + @Test(timeout = 100000) public void testCreateTableCustomHashSchemasTwoMixedRanges() throws Exception { CreateTableOptions builder = getBasicCreateTableOptions();