godfreyhe commented on code in PR #20595:
URL: https://github.com/apache/flink/pull/20595#discussion_r948904096


##########
flink-table/flink-table-common/src/test/java/org/apache/flink/table/plan/stats/TableStatsTest.java:
##########
@@ -55,23 +63,29 @@ void testMergeLackColumnStats() {
         TableStats stats2 = new TableStats(32, colStats2);
 
         Map<String, ColumnStats> colStatsMerge = new HashMap<>();
-        colStatsMerge.put("a", new ColumnStats(7L, 20L, 7D, 23, 35, 2));
-        assertThat(stats1.merge(stats2)).isEqualTo(new TableStats(62, 
colStatsMerge));
+        colStatsMerge.put("a", new ColumnStats(4L, 20L, 7D, 23, 35, 2));
+        assertThat(stats1.merge(stats2, null)).isEqualTo(new TableStats(62, 
colStatsMerge));
+        // test column stats merge while column 'a' is partition key. Merged 
Ndv for columns which
+        // are partition keys using sum instead of max.
+        Map<String, ColumnStats> colStatsMerge2 = new HashMap<>();
+        colStatsMerge2.put("a", new ColumnStats(7L, 20L, 7D, 23, 35, 2));
+        assertThat(stats1.merge(stats2, new 
HashSet<>(Collections.singletonList("a"))))
+                .isEqualTo(new TableStats(62, colStatsMerge2));

Review Comment:
   The test does not cover the non-partition columns merge



##########
flink-table/flink-table-planner/src/main/scala/org/apache/flink/table/planner/plan/optimize/program/FlinkRecomputeStatisticsProgram.java:
##########
@@ -207,12 +209,24 @@ private Optional<TableStats> getPartitionStats(
             return Optional.of(
                     convertToAccumulatedTableStates(
                             catalog.bulkGetPartitionStatistics(tablePath, 
partitionSpecs),
-                            
catalog.bulkGetPartitionColumnStatistics(tablePath, partitionSpecs)));
+                            
catalog.bulkGetPartitionColumnStatistics(tablePath, partitionSpecs),
+                            getPartitionKeys(partitionSpecs)));
         } catch (PartitionNotExistException e) {
             return Optional.empty();
         }
     }
 
+    private static Set<String> getPartitionKeys(List<CatalogPartitionSpec> 
catalogPartitionSpecs) {
+        Set<String> partitionKeys = new HashSet<>();
+        for (CatalogPartitionSpec catalogPartitionSpec : 
catalogPartitionSpecs) {
+            Map<String, String> partitionSpec = 
catalogPartitionSpec.getPartitionSpec();
+            for (Map.Entry<String, String> entry : partitionSpec.entrySet()) {
+                partitionKeys.add(entry.getKey());
+            }

Review Comment:
   partitionKeys.addAll(partitionSpec.keySet());



##########
flink-table/flink-table-planner/src/main/java/org/apache/flink/table/planner/utils/CatalogTableStatisticsConverter.java:
##########
@@ -80,7 +82,16 @@ public static TableStats convertToAccumulatedTableStates(
                     CatalogTableStatisticsConverter.convertToTableStats(
                             catalogTableStatistics, catalogColumnStatistics));
         }
-        return 
tableStats.stream().reduce(TableStats::merge).orElse(TableStats.UNKNOWN);
+
+        if (tableStats.isEmpty()) {
+            return TableStats.UNKNOWN;
+        }
+
+        TableStats resultTableStats = tableStats.get(0);
+        for (int i = 1; i < tableStats.size(); i++) {
+            resultTableStats = resultTableStats.merge(tableStats.get(i), 
partitionKeys);
+        }
+        return resultTableStats;

Review Comment:
   return tableStats.stream().reduce((s1, s2) -> s1.merge(s2, 
partitionKeys)).orElse(TableStats.UNKNOWN);



##########
flink-connectors/flink-connector-hive/src/main/java/org/apache/flink/table/catalog/hive/util/HiveStatsUtil.java:
##########
@@ -312,18 +313,25 @@ private static Long getPartitionColumnNullCount(
                                                                     
p.getParameters())
                                                             .getRowCount()))
                             .collect(Collectors.toList());
-            Optional<TableStats> optionalTableStats =
-                    catalogTableStatistics.stream().reduce(TableStats::merge);
-            if (!optionalTableStats.isPresent()) {

Review Comment:
   return tableStats.stream()
                   .reduce((s1, s2) -> s1.merge(s2, partitionKeys))
                   .orElse(TableStats.UNKNOWN);



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: issues-unsubscr...@flink.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org

Reply via email to