Github user parthchandra commented on a diff in the pull request: https://github.com/apache/drill/pull/949#discussion_r140033471 --- Diff: exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/ParquetGroupScan.java --- @@ -819,63 +827,64 @@ private void init() throws IOException { } } rowGroupInfo.setEndpointByteMap(endpointByteMap); + rowGroupInfo.setColumns(rg.getColumns()); rgIndex++; rowGroupInfos.add(rowGroupInfo); } } this.endpointAffinities = AffinityCreator.getAffinityMap(rowGroupInfos); + updatePartitionColTypeMap(); + } + private void updatePartitionColTypeMap() { columnValueCounts = Maps.newHashMap(); this.rowCount = 0; boolean first = true; - for (ParquetFileMetadata file : parquetTableMetadata.getFiles()) { - for (RowGroupMetadata rowGroup : file.getRowGroups()) { - long rowCount = rowGroup.getRowCount(); - for (ColumnMetadata column : rowGroup.getColumns()) { - SchemaPath schemaPath = SchemaPath.getCompoundPath(column.getName()); - Long previousCount = columnValueCounts.get(schemaPath); - if (previousCount != null) { - if (previousCount != GroupScan.NO_COLUMN_STATS) { - if (column.getNulls() != null) { - Long newCount = rowCount - column.getNulls(); - columnValueCounts.put(schemaPath, columnValueCounts.get(schemaPath) + newCount); - } - } - } else { + for (RowGroupInfo rowGroup : this.rowGroupInfos) { --- End diff -- Isn't this doing the same thing as the original code? RowGroupInfos is built from the RowGroupMetadata in the files?
---