Repository: hive Updated Branches: refs/heads/master 9a59592e5 -> 624bfa61f
HIVE-17900 analyze stats on columns triggered by Compactor generates malformed SQL with > 1 partition column (Eugene Koifman, reviewed by Sergey Shelukhin) Project: http://git-wip-us.apache.org/repos/asf/hive/repo Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/624bfa61 Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/624bfa61 Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/624bfa61 Branch: refs/heads/master Commit: 624bfa61f584e00b37c83b2167ab941a6e0d5656 Parents: 9a59592 Author: Eugene Koifman <ekoif...@hortonworks.com> Authored: Wed Nov 29 11:59:53 2017 -0800 Committer: Eugene Koifman <ekoif...@hortonworks.com> Committed: Wed Nov 29 11:59:53 2017 -0800 ---------------------------------------------------------------------- .../hadoop/hive/ql/txn/compactor/Worker.java | 5 +- .../apache/hadoop/hive/ql/TestTxnCommands2.java | 4 -- .../apache/hadoop/hive/ql/TestTxnNoBuckets.java | 76 ++++++++++++++++++++ 3 files changed, 79 insertions(+), 6 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/hive/blob/624bfa61/ql/src/java/org/apache/hadoop/hive/ql/txn/compactor/Worker.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/txn/compactor/Worker.java b/ql/src/java/org/apache/hadoop/hive/ql/txn/compactor/Worker.java index d3be11a..aea04aa 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/txn/compactor/Worker.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/txn/compactor/Worker.java @@ -273,8 +273,9 @@ public class Worker extends CompactorThread { sb.append(" partition("); Map<String, String> partitionColumnValues = Warehouse.makeEscSpecFromName(ci.partName); for(Map.Entry<String, String> ent : partitionColumnValues.entrySet()) { - sb.append(ent.getKey()).append("='").append(ent.getValue()).append("'"); + sb.append(ent.getKey()).append("='").append(ent.getValue()).append("',"); } + sb.setLength(sb.length() - 1); //remove trailing , sb.append(")"); } catch(MetaException ex) { @@ -285,7 +286,7 @@ public class Worker extends CompactorThread { for(String colName : columnList) { sb.append(colName).append(","); } - sb.setLength(sb.length() - 1);//remove trailing , + sb.setLength(sb.length() - 1); //remove trailing , LOG.info("running '" + sb.toString() + "'"); Driver d = new Driver(conf, userName); SessionState localSession = null; http://git-wip-us.apache.org/repos/asf/hive/blob/624bfa61/ql/src/test/org/apache/hadoop/hive/ql/TestTxnCommands2.java ---------------------------------------------------------------------- diff --git a/ql/src/test/org/apache/hadoop/hive/ql/TestTxnCommands2.java b/ql/src/test/org/apache/hadoop/hive/ql/TestTxnCommands2.java index 2e73e48..17d976a 100644 --- a/ql/src/test/org/apache/hadoop/hive/ql/TestTxnCommands2.java +++ b/ql/src/test/org/apache/hadoop/hive/ql/TestTxnCommands2.java @@ -69,10 +69,6 @@ import org.junit.rules.TestName; import org.slf4j.Logger; import org.slf4j.LoggerFactory; -/** - * TODO: this should be merged with TestTxnCommands once that is checked in - * specifically the tests; the supporting code here is just a clone of TestTxnCommands - */ public class TestTxnCommands2 { static final private Logger LOG = LoggerFactory.getLogger(TestTxnCommands2.class); protected static final String TEST_DATA_DIR = new File(System.getProperty("java.io.tmpdir") + http://git-wip-us.apache.org/repos/asf/hive/blob/624bfa61/ql/src/test/org/apache/hadoop/hive/ql/TestTxnNoBuckets.java ---------------------------------------------------------------------- diff --git a/ql/src/test/org/apache/hadoop/hive/ql/TestTxnNoBuckets.java b/ql/src/test/org/apache/hadoop/hive/ql/TestTxnNoBuckets.java index f0d9ff2..7f5e091 100644 --- a/ql/src/test/org/apache/hadoop/hive/ql/TestTxnNoBuckets.java +++ b/ql/src/test/org/apache/hadoop/hive/ql/TestTxnNoBuckets.java @@ -18,12 +18,15 @@ package org.apache.hadoop.hive.ql; import org.apache.hadoop.hive.conf.HiveConf; +import org.apache.hadoop.hive.metastore.IMetaStoreClient; +import org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj; import org.apache.hadoop.hive.metastore.api.ShowCompactRequest; import org.apache.hadoop.hive.metastore.api.ShowCompactResponse; import org.apache.hadoop.hive.metastore.txn.TxnStore; import org.apache.hadoop.hive.metastore.txn.TxnUtils; import org.apache.hadoop.hive.ql.io.AcidUtils; import org.apache.hadoop.hive.ql.io.BucketCodec; +import org.apache.hadoop.hive.ql.metadata.Hive; import org.apache.hadoop.hive.ql.processors.CommandProcessorResponse; import org.junit.Assert; import org.junit.Before; @@ -34,8 +37,10 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; import java.io.File; +import java.util.ArrayList; import java.util.HashSet; import java.util.List; +import java.util.Map; import java.util.Set; public class TestTxnNoBuckets extends TxnCommandsBaseForTests { @@ -640,5 +645,76 @@ ekoifman:apache-hive-3.0.0-SNAPSHOT-bin ekoifman$ tree /Users/ekoifman/dev/hiver } Assert.assertTrue("Din't find expected 'vectorized' in plan", !vectorized); } + /** + * HIVE-17900 + */ + @Test + public void testCompactStatsGather() throws Exception { + hiveConf.setVar(HiveConf.ConfVars.DYNAMICPARTITIONINGMODE, "nonstrict"); + runStatementOnDriver("drop table if exists T"); + runStatementOnDriver("create table T(a int, b int) partitioned by (p int, q int) " + + "stored as orc TBLPROPERTIES ('transactional'='true')"); + + int[][] targetVals = {{4, 1, 1}, {4, 2, 2}, {4, 3, 1}, {4, 4, 2}}; + //we only recompute stats after major compact if they existed before + runStatementOnDriver("insert into T partition(p=1,q) " + makeValuesClause(targetVals)); + runStatementOnDriver("analyze table T partition(p=1) compute statistics for columns"); + + IMetaStoreClient hms = Hive.get().getMSC(); + List<String> partNames = new ArrayList<>(); + partNames.add("p=1/q=2"); + List<String> colNames = new ArrayList<>(); + colNames.add("a"); + Map<String, List<ColumnStatisticsObj>> map = hms.getPartitionColumnStatistics("default", + "T", partNames, colNames); + Assert.assertEquals(4, map.get(partNames.get(0)).get(0).getStatsData().getLongStats().getHighValue()); + + + int[][] targetVals2 = {{5, 1, 1}, {5, 2, 2}, {5, 3, 1}, {5, 4, 2}}; + runStatementOnDriver("insert into T partition(p=1,q) " + makeValuesClause(targetVals2)); + + String query = "select ROW__ID, p, q, a, b, INPUT__FILE__NAME from T order by p, q, a, b"; + List<String> rs = runStatementOnDriver(query); + String[][] expected = { + {"{\"transactionid\":15,\"bucketid\":536870912,\"rowid\":0}\t1\t1\t4\t1", "t/p=1/q=1/delta_0000015_0000015_0000/bucket_00000"}, + {"{\"transactionid\":15,\"bucketid\":536870912,\"rowid\":1}\t1\t1\t4\t3", "t/p=1/q=1/delta_0000015_0000015_0000/bucket_00000"}, + {"{\"transactionid\":17,\"bucketid\":536870912,\"rowid\":0}\t1\t1\t5\t1", "t/p=1/q=1/delta_0000017_0000017_0000/bucket_00000"}, + {"{\"transactionid\":17,\"bucketid\":536870912,\"rowid\":1}\t1\t1\t5\t3", "t/p=1/q=1/delta_0000017_0000017_0000/bucket_00000"}, + {"{\"transactionid\":15,\"bucketid\":536870912,\"rowid\":0}\t1\t2\t4\t2", "t/p=1/q=2/delta_0000015_0000015_0000/bucket_00000"}, + {"{\"transactionid\":15,\"bucketid\":536870912,\"rowid\":1}\t1\t2\t4\t4", "t/p=1/q=2/delta_0000015_0000015_0000/bucket_00000"}, + {"{\"transactionid\":17,\"bucketid\":536870912,\"rowid\":0}\t1\t2\t5\t2", "t/p=1/q=2/delta_0000017_0000017_0000/bucket_00000"}, + {"{\"transactionid\":17,\"bucketid\":536870912,\"rowid\":1}\t1\t2\t5\t4", "t/p=1/q=2/delta_0000017_0000017_0000/bucket_00000"} + }; + checkExpected(rs, expected, "insert data"); + + //run major compaction + runStatementOnDriver("alter table T partition(p=1,q=2) compact 'major'"); + TestTxnCommands2.runWorker(hiveConf); + + query = "select ROW__ID, p, q, a, b, INPUT__FILE__NAME from T order by p, q, a, b"; + rs = runStatementOnDriver(query); + String[][] expected2 = { + {"{\"transactionid\":15,\"bucketid\":536870912,\"rowid\":0}\t1\t1\t4\t1", "t/p=1/q=1/delta_0000015_0000015_0000/bucket_00000"}, + {"{\"transactionid\":15,\"bucketid\":536870912,\"rowid\":1}\t1\t1\t4\t3", "t/p=1/q=1/delta_0000015_0000015_0000/bucket_00000"}, + {"{\"transactionid\":17,\"bucketid\":536870912,\"rowid\":0}\t1\t1\t5\t1", "t/p=1/q=1/delta_0000017_0000017_0000/bucket_00000"}, + {"{\"transactionid\":17,\"bucketid\":536870912,\"rowid\":1}\t1\t1\t5\t3", "t/p=1/q=1/delta_0000017_0000017_0000/bucket_00000"}, + {"{\"transactionid\":15,\"bucketid\":536870912,\"rowid\":0}\t1\t2\t4\t2", "t/p=1/q=2/base_0000017/bucket_00000"}, + {"{\"transactionid\":15,\"bucketid\":536870912,\"rowid\":1}\t1\t2\t4\t4", "t/p=1/q=2/base_0000017/bucket_00000"}, + {"{\"transactionid\":17,\"bucketid\":536870912,\"rowid\":0}\t1\t2\t5\t2", "t/p=1/q=2/base_0000017/bucket_00000"}, + {"{\"transactionid\":17,\"bucketid\":536870912,\"rowid\":1}\t1\t2\t5\t4", "t/p=1/q=2/base_0000017/bucket_00000"} + }; + checkExpected(rs, expected2, "after major compaction"); + + //check status of compaction job + TxnStore txnHandler = TxnUtils.getTxnStore(hiveConf); + ShowCompactResponse resp = txnHandler.showCompact(new ShowCompactRequest()); + Assert.assertEquals("Unexpected number of compactions in history", 1, resp.getCompactsSize()); + Assert.assertEquals("Unexpected 0 compaction state", TxnStore.CLEANING_RESPONSE, resp.getCompacts().get(0).getState()); + Assert.assertTrue(resp.getCompacts().get(0).getHadoopJobId().startsWith("job_local")); + + //now check that stats were updated + map = hms.getPartitionColumnStatistics("default","T", partNames, colNames); + Assert.assertEquals("", 5, map.get(partNames.get(0)).get(0).getStatsData().getLongStats().getHighValue()); + } }