[ 
https://issues.apache.org/jira/browse/HIVE-26716?focusedWorklogId=827080&page=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-827080
 ]

ASF GitHub Bot logged work on HIVE-26716:
-----------------------------------------

                Author: ASF GitHub Bot
            Created on: 18/Nov/22 08:40
            Start Date: 18/Nov/22 08:40
    Worklog Time Spent: 10m 
      Work Description: veghlaci05 commented on code in PR #3746:
URL: https://github.com/apache/hive/pull/3746#discussion_r1026156511


##########
itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/txn/compactor/TestCrudCompactorOnTez.java:
##########
@@ -89,6 +89,270 @@
 @SuppressWarnings("deprecation")
 public class TestCrudCompactorOnTez extends CompactorOnTezTest {
 
+  @Test
+  public void testRebalanceCompactionNotPartitionedWithoutBuckets() throws 
Exception {
+    conf.setBoolVar(HiveConf.ConfVars.COMPACTOR_CRUD_QUERY_BASED, true);
+    conf.setBoolVar(HiveConf.ConfVars.HIVE_COMPACTOR_GATHER_STATS, false);
+    conf.setBoolVar(HiveConf.ConfVars.HIVESTATSAUTOGATHER, false);
+
+    //set grouping size to have 3 buckets, and re-create driver with the new 
config
+    conf.set("tez.grouping.min-size", "1000");
+    conf.set("tez.grouping.max-size", "80000");
+    driver = new Driver(conf);
+
+    final String stageTableName = "stage_rebalance_test";
+    final String tableName = "rebalance_test";
+    AcidOutputFormat.Options options = new AcidOutputFormat.Options(conf);
+
+    TestDataProvider testDataProvider = new TestDataProvider();
+    testDataProvider.createFullAcidTable(stageTableName, true, false);
+    testDataProvider.insertTestDataPartitioned(stageTableName);
+
+    executeStatementOnDriver("drop table if exists " + tableName, driver);
+    executeStatementOnDriver("CREATE TABLE " + tableName + "(a string, b int) 
" +
+        "STORED AS ORC TBLPROPERTIES('transactional'='true')", driver);
+    executeStatementOnDriver("INSERT OVERWRITE TABLE " + tableName + " select 
a, b from " + stageTableName, driver);
+
+    //do some single inserts to have more data in the first bucket.
+    executeStatementOnDriver("INSERT INTO TABLE " + tableName + " values 
('12',12)", driver);
+    executeStatementOnDriver("INSERT INTO TABLE " + tableName + " values 
('13',13)", driver);
+    executeStatementOnDriver("INSERT INTO TABLE " + tableName + " values 
('14',14)", driver);
+    executeStatementOnDriver("INSERT INTO TABLE " + tableName + " values 
('15',15)", driver);
+    executeStatementOnDriver("INSERT INTO TABLE " + tableName + " values 
('16',16)", driver);
+    executeStatementOnDriver("INSERT INTO TABLE " + tableName + " values 
('17',17)", driver);
+
+    // Verify buckets and their content before rebalance
+    Table table = msClient.getTable("default", tableName);
+    FileSystem fs = FileSystem.get(conf);
+    Assert.assertEquals("Test setup does not match the expected: different 
buckets",
+        Arrays.asList("bucket_00000_0", "bucket_00001_0", "bucket_00002_0"),
+        CompactorTestUtil.getBucketFileNames(fs, table, null, "base_0000001"));
+    String[][] expectedBuckets = new String[][] {
+        {
+            "{\"writeid\":1,\"bucketid\":536870912,\"rowid\":0}\t5\t4",
+            "{\"writeid\":1,\"bucketid\":536870912,\"rowid\":1}\t6\t2",
+            "{\"writeid\":1,\"bucketid\":536870912,\"rowid\":2}\t6\t3",
+            "{\"writeid\":1,\"bucketid\":536870912,\"rowid\":3}\t6\t4",
+            "{\"writeid\":1,\"bucketid\":536870912,\"rowid\":4}\t5\t2",
+            "{\"writeid\":1,\"bucketid\":536870912,\"rowid\":5}\t5\t3",
+            "{\"writeid\":2,\"bucketid\":536870912,\"rowid\":0}\t12\t12",
+            "{\"writeid\":3,\"bucketid\":536870912,\"rowid\":0}\t13\t13",
+            "{\"writeid\":4,\"bucketid\":536870912,\"rowid\":0}\t14\t14",
+            "{\"writeid\":5,\"bucketid\":536870912,\"rowid\":0}\t15\t15",
+            "{\"writeid\":6,\"bucketid\":536870912,\"rowid\":0}\t16\t16",
+            "{\"writeid\":7,\"bucketid\":536870912,\"rowid\":0}\t17\t17",
+        },
+        {
+            "{\"writeid\":1,\"bucketid\":536936448,\"rowid\":0}\t2\t4",
+            "{\"writeid\":1,\"bucketid\":536936448,\"rowid\":1}\t3\t3",
+            "{\"writeid\":1,\"bucketid\":536936448,\"rowid\":2}\t4\t4",
+            "{\"writeid\":1,\"bucketid\":536936448,\"rowid\":3}\t4\t3",
+        },
+        {
+            "{\"writeid\":1,\"bucketid\":537001984,\"rowid\":0}\t2\t3",
+            "{\"writeid\":1,\"bucketid\":537001984,\"rowid\":1}\t3\t4",
+        },
+    };
+    for(int i = 0; i < 3; i++) {
+      Assert.assertEquals("rebalanced bucket " + i, 
Arrays.asList(expectedBuckets[i]),
+          testDataProvider.getBucketData(tableName, 
BucketCodec.V1.encode(options.bucket(i)) + ""));
+    }
+
+    //Try to do a rebalancing compaction
+    executeStatementOnDriver("ALTER TABLE " + tableName + " COMPACT 
'rebalance'", driver);
+    runWorker(conf);
+
+    //Check if the compaction succeed
+    TxnStore txnHandler = TxnUtils.getTxnStore(conf);
+    ShowCompactResponse rsp = txnHandler.showCompact(new ShowCompactRequest());
+    List<ShowCompactResponseElement> compacts = rsp.getCompacts();
+    Assert.assertEquals("Expecting 1 rows and found " + compacts.size(), 1, 
compacts.size());
+    Assert.assertEquals("Expecting compaction state 'ready for cleaning' and 
found:" + compacts.get(0).getState(),
+        "ready for cleaning", compacts.get(0).getState());

Review Comment:
   In the current form not, since it's expecting the compaction in succeeded 
state, not `ready for cleaning`. However, I think it's a good idea to 
generalize it to accept the expected state as well through a second param.





Issue Time Tracking
-------------------

    Worklog Id:     (was: 827080)
    Time Spent: 2h 40m  (was: 2.5h)

> Query based Rebalance compaction on full acid tables
> ----------------------------------------------------
>
>                 Key: HIVE-26716
>                 URL: https://issues.apache.org/jira/browse/HIVE-26716
>             Project: Hive
>          Issue Type: Sub-task
>          Components: Hive
>            Reporter: László Végh
>            Assignee: László Végh
>            Priority: Major
>              Labels: ACID, compaction, pull-request-available
>          Time Spent: 2h 40m
>  Remaining Estimate: 0h
>
> Support rebalancing compaction on fully ACID tables.



--
This message was sent by Atlassian Jira
(v8.20.10#820010)

Reply via email to