This is an automated email from the ASF dual-hosted git repository. pvary pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/hive.git
The following commit(s) were added to refs/heads/master by this push: new 2d923cb HIVE-25580: Increase the performance of getTableColumnStatistics and getPartitionColumnStatistics (Peter Vary reviewed by David Mollitor and Zoltan Haindrich) (#2692) 2d923cb is described below commit 2d923cbd38fff830cde31d7b643a8c28d775379f Author: pvary <pv...@cloudera.com> AuthorDate: Mon Oct 11 13:23:09 2021 +0200 HIVE-25580: Increase the performance of getTableColumnStatistics and getPartitionColumnStatistics (Peter Vary reviewed by David Mollitor and Zoltan Haindrich) (#2692) --- .../apache/hadoop/hive/metastore/ObjectStore.java | 8 +++-- .../hadoop/hive/metastore/TestObjectStore.java | 35 ++++++++++++++++++---- 2 files changed, 35 insertions(+), 8 deletions(-) diff --git a/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/ObjectStore.java b/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/ObjectStore.java index 164cd5b..590884c 100644 --- a/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/ObjectStore.java +++ b/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/ObjectStore.java @@ -9848,8 +9848,10 @@ public class ObjectStore implements RawStore, Configurable { try { openTransaction(); query = pm.newQuery(MTableColumnStatistics.class); + query.setFilter("tableName == t1 && dbName == t2 && catName == t3"); + query.declareParameters("java.lang.String t1, java.lang.String t2, java.lang.String t3"); query.setResult("DISTINCT engine"); - Collection names = (Collection) query.execute(); + Collection names = (Collection) query.execute(tableName, dbName, catName); List<String> engines = new ArrayList<>(); for (Iterator i = names.iterator(); i.hasNext();) { engines.add((String) i.next()); @@ -9954,8 +9956,10 @@ public class ObjectStore implements RawStore, Configurable { try { openTransaction(); query = pm.newQuery(MPartitionColumnStatistics.class); + query.setFilter("tableName == t1 && dbName == t2 && catName == t3"); + query.declareParameters("java.lang.String t1, java.lang.String t2, java.lang.String t3"); query.setResult("DISTINCT engine"); - Collection names = (Collection) query.execute(); + Collection names = (Collection) query.execute(tableName, dbName, catName); List<String> engines = new ArrayList<>(); for (Iterator i = names.iterator(); i.hasNext();) { engines.add((String) i.next()); diff --git a/standalone-metastore/metastore-server/src/test/java/org/apache/hadoop/hive/metastore/TestObjectStore.java b/standalone-metastore/metastore-server/src/test/java/org/apache/hadoop/hive/metastore/TestObjectStore.java index bcfac9d..379dcba 100644 --- a/standalone-metastore/metastore-server/src/test/java/org/apache/hadoop/hive/metastore/TestObjectStore.java +++ b/standalone-metastore/metastore-server/src/test/java/org/apache/hadoop/hive/metastore/TestObjectStore.java @@ -24,7 +24,6 @@ import com.google.common.collect.ImmutableSet; import org.apache.hadoop.hive.metastore.ObjectStore.RetryingExecutor; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hive.metastore.annotation.MetastoreUnitTest; -import org.apache.hadoop.hive.metastore.api.BooleanColumnStatsData; import org.apache.hadoop.hive.metastore.api.Catalog; import org.apache.hadoop.hive.metastore.api.ColumnStatistics; import org.apache.hadoop.hive.metastore.api.ColumnStatisticsData; @@ -44,6 +43,7 @@ import org.apache.hadoop.hive.metastore.api.InvalidInputException; import org.apache.hadoop.hive.metastore.api.InvalidObjectException; import org.apache.hadoop.hive.metastore.api.ListPackageRequest; import org.apache.hadoop.hive.metastore.api.ListStoredProcedureRequest; +import org.apache.hadoop.hive.metastore.api.LongColumnStatsData; import org.apache.hadoop.hive.metastore.api.MetaException; import org.apache.hadoop.hive.metastore.api.NoSuchObjectException; import org.apache.hadoop.hive.metastore.api.NotificationEvent; @@ -602,6 +602,28 @@ public class TestObjectStore { checkBackendTableSize("SERDES", 1); // Table has a serde } + @Test + public void testGetPartitionStatistics() throws Exception { + createPartitionedTable(true, true); + + List<List<ColumnStatistics>> stat; + try (AutoCloseable c = deadline()) { + stat = objectStore.getPartitionColumnStatistics(DEFAULT_CATALOG_NAME, DB1, TABLE1, + Arrays.asList("test_part_col=a0", "test_part_col=a1", "test_part_col=a2"), + Arrays.asList("test_part_col")); + } + + Assert.assertEquals(1, stat.size()); + Assert.assertEquals(3, stat.get(0).size()); + Assert.assertEquals(ENGINE, stat.get(0).get(0).getEngine()); + Assert.assertEquals(1, stat.get(0).get(0).getStatsObj().size()); + Assert.assertTrue(stat.get(0).get(0).getStatsObj().get(0).getStatsData().isSetLongStats()); + Assert.assertEquals(1, stat.get(0).get(0).getStatsObj().get(0).getStatsData().getLongStats().getNumNulls()); + Assert.assertEquals(2, stat.get(0).get(0).getStatsObj().get(0).getStatsData().getLongStats().getNumDVs()); + Assert.assertEquals(3, stat.get(0).get(0).getStatsObj().get(0).getStatsData().getLongStats().getLowValue()); + Assert.assertEquals(4, stat.get(0).get(0).getStatsObj().get(0).getStatsData().getLongStats().getHighValue()); + } + /** * Creates DB1 database, TABLE1 table with 3 partitions. * @param withPrivileges Should we create privileges as well @@ -683,11 +705,12 @@ public class TestObjectStore { stats.setEngine(ENGINE); ColumnStatisticsData data = new ColumnStatisticsData(); - BooleanColumnStatsData boolStats = new BooleanColumnStatsData(); - boolStats.setNumTrues(0); - boolStats.setNumFalses(0); - boolStats.setNumNulls(0); - data.setBooleanStats(boolStats); + LongColumnStatsData longStats = new LongColumnStatsData(); + longStats.setNumNulls(1); + longStats.setNumDVs(2); + longStats.setLowValue(3); + longStats.setHighValue(4); + data.setLongStats(longStats); ColumnStatisticsObj partStats = new ColumnStatisticsObj("test_part_col", "int", data); statsObjList.add(partStats);