asolimando commented on code in PR #3137:
URL: https://github.com/apache/hive/pull/3137#discussion_r1039397581


##########
standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/columnstats/aggr/LongColumnStatsAggregator.java:
##########
@@ -51,73 +52,94 @@ public ColumnStatisticsObj 
aggregate(List<ColStatsObjWithSourceInfo> colStatsWit
     checkStatisticsList(colStatsWithSourceInfo);
 
     ColumnStatisticsObj statsObj = null;
-    String colType = null;
+    String colType;
     String colName = null;
     // check if all the ColumnStatisticsObjs contain stats and all the ndv are
     // bitvectors
     boolean doAllPartitionContainStats = partNames.size() == 
colStatsWithSourceInfo.size();
     NumDistinctValueEstimator ndvEstimator = null;
+    KllHistogramEstimator histogramEstimator = null;
+    boolean areAllNDVEstimatorsMergeable = true;
+    boolean areAllHistogramEstimatorsMergeable = true;
     for (ColStatsObjWithSourceInfo csp : colStatsWithSourceInfo) {
       ColumnStatisticsObj cso = csp.getColStatsObj();
       if (statsObj == null) {
         colName = cso.getColName();
         colType = cso.getColType();
         statsObj = ColumnStatsAggregatorFactory.newColumnStaticsObj(colName, 
colType,
             cso.getStatsData().getSetField());
-        LOG.trace("doAllPartitionContainStats for column: {} is: {}", colName,
-            doAllPartitionContainStats);
+        LOG.trace("doAllPartitionContainStats for column: {} is: {}", colName, 
doAllPartitionContainStats);
       }
-      LongColumnStatsDataInspector longColumnStatsData = 
longInspectorFromStats(cso);
-      if (longColumnStatsData.getNdvEstimator() == null) {
-        ndvEstimator = null;
-        break;
-      } else {
-        // check if all of the bit vectors can merge
-        NumDistinctValueEstimator estimator = 
longColumnStatsData.getNdvEstimator();
+      LongColumnStatsDataInspector columnStatsData = 
longInspectorFromStats(cso);
+
+      // check if we can merge NDV estimators
+      if (columnStatsData.getNdvEstimator() == null) {
+        areAllNDVEstimatorsMergeable = false;
+      } else if (areAllNDVEstimatorsMergeable) {
+        NumDistinctValueEstimator estimator = 
columnStatsData.getNdvEstimator();
         if (ndvEstimator == null) {
           ndvEstimator = estimator;
         } else {
-          if (ndvEstimator.canMerge(estimator)) {
-            continue;
-          } else {
-            ndvEstimator = null;
-            break;
+          if (!ndvEstimator.canMerge(estimator)) {
+            areAllNDVEstimatorsMergeable = false;
+          }
+        }
+      }
+      // check if we can merge histogram estimators
+      if (columnStatsData.getHistogramEstimator() == null) {

Review Comment:
   You are right, I have double checked and indeed it can be simplified as you 
suggest, I have added those fours lines you have cited right before the 
`return` statement of `aggregate` method and it's enough.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to