Repository: hive
Updated Branches:
  refs/heads/master 9a59592e5 -> 624bfa61f


HIVE-17900 analyze stats on columns triggered by Compactor generates malformed 
SQL with > 1 partition column (Eugene Koifman, reviewed by Sergey Shelukhin)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/624bfa61
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/624bfa61
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/624bfa61

Branch: refs/heads/master
Commit: 624bfa61f584e00b37c83b2167ab941a6e0d5656
Parents: 9a59592
Author: Eugene Koifman <ekoif...@hortonworks.com>
Authored: Wed Nov 29 11:59:53 2017 -0800
Committer: Eugene Koifman <ekoif...@hortonworks.com>
Committed: Wed Nov 29 11:59:53 2017 -0800

----------------------------------------------------------------------
 .../hadoop/hive/ql/txn/compactor/Worker.java    |  5 +-
 .../apache/hadoop/hive/ql/TestTxnCommands2.java |  4 --
 .../apache/hadoop/hive/ql/TestTxnNoBuckets.java | 76 ++++++++++++++++++++
 3 files changed, 79 insertions(+), 6 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hive/blob/624bfa61/ql/src/java/org/apache/hadoop/hive/ql/txn/compactor/Worker.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/txn/compactor/Worker.java 
b/ql/src/java/org/apache/hadoop/hive/ql/txn/compactor/Worker.java
index d3be11a..aea04aa 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/txn/compactor/Worker.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/txn/compactor/Worker.java
@@ -273,8 +273,9 @@ public class Worker extends CompactorThread {
           sb.append(" partition(");
           Map<String, String> partitionColumnValues = 
Warehouse.makeEscSpecFromName(ci.partName);
           for(Map.Entry<String, String> ent : 
partitionColumnValues.entrySet()) {
-            
sb.append(ent.getKey()).append("='").append(ent.getValue()).append("'");
+            
sb.append(ent.getKey()).append("='").append(ent.getValue()).append("',");
           }
+          sb.setLength(sb.length() - 1); //remove trailing ,
           sb.append(")");
         }
         catch(MetaException ex) {
@@ -285,7 +286,7 @@ public class Worker extends CompactorThread {
       for(String colName : columnList) {
         sb.append(colName).append(",");
       }
-      sb.setLength(sb.length() - 1);//remove trailing ,
+      sb.setLength(sb.length() - 1); //remove trailing ,
       LOG.info("running '" + sb.toString() + "'");
       Driver d = new Driver(conf, userName);
       SessionState localSession = null;

http://git-wip-us.apache.org/repos/asf/hive/blob/624bfa61/ql/src/test/org/apache/hadoop/hive/ql/TestTxnCommands2.java
----------------------------------------------------------------------
diff --git a/ql/src/test/org/apache/hadoop/hive/ql/TestTxnCommands2.java 
b/ql/src/test/org/apache/hadoop/hive/ql/TestTxnCommands2.java
index 2e73e48..17d976a 100644
--- a/ql/src/test/org/apache/hadoop/hive/ql/TestTxnCommands2.java
+++ b/ql/src/test/org/apache/hadoop/hive/ql/TestTxnCommands2.java
@@ -69,10 +69,6 @@ import org.junit.rules.TestName;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
-/**
- * TODO: this should be merged with TestTxnCommands once that is checked in
- * specifically the tests; the supporting code here is just a clone of 
TestTxnCommands
- */
 public class TestTxnCommands2 {
   static final private Logger LOG = 
LoggerFactory.getLogger(TestTxnCommands2.class);
   protected static final String TEST_DATA_DIR = new 
File(System.getProperty("java.io.tmpdir") +

http://git-wip-us.apache.org/repos/asf/hive/blob/624bfa61/ql/src/test/org/apache/hadoop/hive/ql/TestTxnNoBuckets.java
----------------------------------------------------------------------
diff --git a/ql/src/test/org/apache/hadoop/hive/ql/TestTxnNoBuckets.java 
b/ql/src/test/org/apache/hadoop/hive/ql/TestTxnNoBuckets.java
index f0d9ff2..7f5e091 100644
--- a/ql/src/test/org/apache/hadoop/hive/ql/TestTxnNoBuckets.java
+++ b/ql/src/test/org/apache/hadoop/hive/ql/TestTxnNoBuckets.java
@@ -18,12 +18,15 @@
 package org.apache.hadoop.hive.ql;
 
 import org.apache.hadoop.hive.conf.HiveConf;
+import org.apache.hadoop.hive.metastore.IMetaStoreClient;
+import org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj;
 import org.apache.hadoop.hive.metastore.api.ShowCompactRequest;
 import org.apache.hadoop.hive.metastore.api.ShowCompactResponse;
 import org.apache.hadoop.hive.metastore.txn.TxnStore;
 import org.apache.hadoop.hive.metastore.txn.TxnUtils;
 import org.apache.hadoop.hive.ql.io.AcidUtils;
 import org.apache.hadoop.hive.ql.io.BucketCodec;
+import org.apache.hadoop.hive.ql.metadata.Hive;
 import org.apache.hadoop.hive.ql.processors.CommandProcessorResponse;
 import org.junit.Assert;
 import org.junit.Before;
@@ -34,8 +37,10 @@ import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
 import java.io.File;
+import java.util.ArrayList;
 import java.util.HashSet;
 import java.util.List;
+import java.util.Map;
 import java.util.Set;
 
 public class TestTxnNoBuckets extends TxnCommandsBaseForTests {
@@ -640,5 +645,76 @@ ekoifman:apache-hive-3.0.0-SNAPSHOT-bin ekoifman$ tree 
/Users/ekoifman/dev/hiver
     }
     Assert.assertTrue("Din't find expected 'vectorized' in plan", !vectorized);
   }
+  /**
+   * HIVE-17900
+   */
+  @Test
+  public void testCompactStatsGather() throws Exception {
+    hiveConf.setVar(HiveConf.ConfVars.DYNAMICPARTITIONINGMODE, "nonstrict");
+    runStatementOnDriver("drop table if exists T");
+    runStatementOnDriver("create table T(a int, b int) partitioned by (p int, 
q int) " +
+      "stored as orc TBLPROPERTIES ('transactional'='true')");
+
+    int[][] targetVals = {{4, 1, 1}, {4, 2, 2}, {4, 3, 1}, {4, 4, 2}};
+    //we only recompute stats after major compact if they existed before
+    runStatementOnDriver("insert into T partition(p=1,q) " + 
makeValuesClause(targetVals));
+    runStatementOnDriver("analyze table T  partition(p=1) compute statistics 
for columns");
+
+    IMetaStoreClient hms = Hive.get().getMSC();
+    List<String> partNames = new ArrayList<>();
+    partNames.add("p=1/q=2");
+    List<String> colNames = new ArrayList<>();
+    colNames.add("a");
+    Map<String, List<ColumnStatisticsObj>> map = 
hms.getPartitionColumnStatistics("default",
+      "T", partNames, colNames);
+    Assert.assertEquals(4, 
map.get(partNames.get(0)).get(0).getStatsData().getLongStats().getHighValue());
+
+
+    int[][] targetVals2 = {{5, 1, 1}, {5, 2, 2}, {5, 3, 1}, {5, 4, 2}};
+    runStatementOnDriver("insert into T partition(p=1,q) " + 
makeValuesClause(targetVals2));
+
+    String query = "select ROW__ID, p, q, a, b, INPUT__FILE__NAME from T order 
by p, q, a, b";
+    List<String> rs = runStatementOnDriver(query);
+    String[][] expected = {
+      
{"{\"transactionid\":15,\"bucketid\":536870912,\"rowid\":0}\t1\t1\t4\t1", 
"t/p=1/q=1/delta_0000015_0000015_0000/bucket_00000"},
+      
{"{\"transactionid\":15,\"bucketid\":536870912,\"rowid\":1}\t1\t1\t4\t3", 
"t/p=1/q=1/delta_0000015_0000015_0000/bucket_00000"},
+      
{"{\"transactionid\":17,\"bucketid\":536870912,\"rowid\":0}\t1\t1\t5\t1", 
"t/p=1/q=1/delta_0000017_0000017_0000/bucket_00000"},
+      
{"{\"transactionid\":17,\"bucketid\":536870912,\"rowid\":1}\t1\t1\t5\t3", 
"t/p=1/q=1/delta_0000017_0000017_0000/bucket_00000"},
+      
{"{\"transactionid\":15,\"bucketid\":536870912,\"rowid\":0}\t1\t2\t4\t2", 
"t/p=1/q=2/delta_0000015_0000015_0000/bucket_00000"},
+      
{"{\"transactionid\":15,\"bucketid\":536870912,\"rowid\":1}\t1\t2\t4\t4", 
"t/p=1/q=2/delta_0000015_0000015_0000/bucket_00000"},
+      
{"{\"transactionid\":17,\"bucketid\":536870912,\"rowid\":0}\t1\t2\t5\t2", 
"t/p=1/q=2/delta_0000017_0000017_0000/bucket_00000"},
+      
{"{\"transactionid\":17,\"bucketid\":536870912,\"rowid\":1}\t1\t2\t5\t4", 
"t/p=1/q=2/delta_0000017_0000017_0000/bucket_00000"}
+    };
+    checkExpected(rs, expected, "insert data");
+
+    //run major compaction
+    runStatementOnDriver("alter table T partition(p=1,q=2) compact 'major'");
+    TestTxnCommands2.runWorker(hiveConf);
+
+    query = "select ROW__ID, p, q, a, b, INPUT__FILE__NAME from T order by p, 
q, a, b";
+    rs = runStatementOnDriver(query);
+    String[][] expected2 = {
+      
{"{\"transactionid\":15,\"bucketid\":536870912,\"rowid\":0}\t1\t1\t4\t1", 
"t/p=1/q=1/delta_0000015_0000015_0000/bucket_00000"},
+      
{"{\"transactionid\":15,\"bucketid\":536870912,\"rowid\":1}\t1\t1\t4\t3", 
"t/p=1/q=1/delta_0000015_0000015_0000/bucket_00000"},
+      
{"{\"transactionid\":17,\"bucketid\":536870912,\"rowid\":0}\t1\t1\t5\t1", 
"t/p=1/q=1/delta_0000017_0000017_0000/bucket_00000"},
+      
{"{\"transactionid\":17,\"bucketid\":536870912,\"rowid\":1}\t1\t1\t5\t3", 
"t/p=1/q=1/delta_0000017_0000017_0000/bucket_00000"},
+      
{"{\"transactionid\":15,\"bucketid\":536870912,\"rowid\":0}\t1\t2\t4\t2", 
"t/p=1/q=2/base_0000017/bucket_00000"},
+      
{"{\"transactionid\":15,\"bucketid\":536870912,\"rowid\":1}\t1\t2\t4\t4", 
"t/p=1/q=2/base_0000017/bucket_00000"},
+      
{"{\"transactionid\":17,\"bucketid\":536870912,\"rowid\":0}\t1\t2\t5\t2", 
"t/p=1/q=2/base_0000017/bucket_00000"},
+      
{"{\"transactionid\":17,\"bucketid\":536870912,\"rowid\":1}\t1\t2\t5\t4", 
"t/p=1/q=2/base_0000017/bucket_00000"}
+    };
+    checkExpected(rs, expected2, "after major compaction");
+
+    //check status of compaction job
+    TxnStore txnHandler = TxnUtils.getTxnStore(hiveConf);
+    ShowCompactResponse resp = txnHandler.showCompact(new 
ShowCompactRequest());
+    Assert.assertEquals("Unexpected number of compactions in history", 1, 
resp.getCompactsSize());
+    Assert.assertEquals("Unexpected 0 compaction state", 
TxnStore.CLEANING_RESPONSE, resp.getCompacts().get(0).getState());
+    
Assert.assertTrue(resp.getCompacts().get(0).getHadoopJobId().startsWith("job_local"));
+
+    //now check that stats were updated
+    map = hms.getPartitionColumnStatistics("default","T", partNames, colNames);
+    Assert.assertEquals("", 5, 
map.get(partNames.get(0)).get(0).getStatsData().getLongStats().getHighValue());
+  }
 }
 

Reply via email to