[GitHub] [drill] vvysotskyi commented on a change in pull request #1810: DRILL-7271: Refactor Metadata interfaces and classes to contain all needed information for the File based Metastore

GitBox Fri, 21 Jun 2019 08:43:34 -0700

vvysotskyi commented on a change in pull request #1810: DRILL-7271: Refactor 
Metadata interfaces and classes to contain all needed information for the File 
based Metastore
URL: https://github.com/apache/drill/pull/1810#discussion_r296288223


 ##########
 File path: 
exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/ParquetTableMetadataUtils.java
 ##########
 @@ -148,112 +142,71 @@ private ParquetTableMetadataUtils() {
   public static RowGroupMetadata 
getRowGroupMetadata(MetadataBase.ParquetTableMetadataBase tableMetadata,
       MetadataBase.RowGroupMetadata rowGroupMetadata, int rgIndexInFile, Path 
location) {
     Map<SchemaPath, ColumnStatistics> columnsStatistics = 
getRowGroupColumnStatistics(tableMetadata, rowGroupMetadata);
-    Map<StatisticsKind, Object> rowGroupStatistics = new HashMap<>();
-    rowGroupStatistics.put(TableStatisticsKind.ROW_COUNT, 
rowGroupMetadata.getRowCount());
-    rowGroupStatistics.put(() -> ExactStatisticsConstants.START, 
rowGroupMetadata.getStart());
-    rowGroupStatistics.put(() -> ExactStatisticsConstants.LENGTH, 
rowGroupMetadata.getLength());
+    List<StatisticsHolder> rowGroupStatistics = new ArrayList<>();
+    rowGroupStatistics.add(new 
StatisticsHolder<>(rowGroupMetadata.getRowCount(), 
TableStatisticsKind.ROW_COUNT));
+    rowGroupStatistics.add(new StatisticsHolder<>(rowGroupMetadata.getStart(), 
new BaseStatisticsKind(ExactStatisticsConstants.START, true)));
+    rowGroupStatistics.add(new 
StatisticsHolder<>(rowGroupMetadata.getLength(), new 
BaseStatisticsKind(ExactStatisticsConstants.LENGTH, true)));
 
     Map<SchemaPath, TypeProtos.MajorType> columns = 
getRowGroupFields(tableMetadata, rowGroupMetadata);
 
     TupleSchema schema = new TupleSchema();
     columns.forEach((schemaPath, majorType) -> 
MetadataUtils.addColumnMetadata(schema, schemaPath, majorType));
 
-    return new RowGroupMetadata(
-        schema, columnsStatistics, rowGroupStatistics, 
rowGroupMetadata.getHostAffinity(), rgIndexInFile, location);
-  }
+    MetadataInfo metadataInfo = new MetadataInfo(MetadataType.ROW_GROUP, 
MetadataInfo.GENERAL_INFO_KEY, null);
 
-  /**
-   * Merges list of specified metadata into the map of {@link 
ColumnStatistics} with columns as keys.
-   *
-   * @param <T>                 type of metadata to collect
-   * @param metadataList        list of metadata to be merged
-   * @param columns             set of columns whose statistics should be 
merged
-   * @param statisticsToCollect kinds of statistics that should be collected
-   * @param parquetTableMetadata ParquetTableMetadata object to fetch the 
non-interesting columns
-   * @return list of merged metadata
-   */
-  @SuppressWarnings("unchecked")
-  public static <T extends BaseMetadata> Map<SchemaPath, ColumnStatistics> 
mergeColumnsStatistics(
-          Collection<T> metadataList, Set<SchemaPath> columns, 
List<CollectableColumnStatisticsKind> statisticsToCollect, 
MetadataBase.ParquetTableMetadataBase parquetTableMetadata) {
-    Map<SchemaPath, ColumnStatistics> columnsStatistics = new HashMap<>();
-
-    for (SchemaPath column : columns) {
-      List<ColumnStatistics> statisticsList = new ArrayList<>();
-      for (T metadata : metadataList) {
-        ColumnStatistics statistics = 
metadata.getColumnsStatistics().get(column);
-        if (statistics == null) {
-          // schema change happened, set statistics which represents all nulls
-          statistics = new ColumnStatisticsImpl(
-              ImmutableMap.of(ColumnStatisticsKind.NULLS_COUNT, 
metadata.getStatistic(TableStatisticsKind.ROW_COUNT)),
-              getNaturalNullsFirstComparator());
-        }
-        statisticsList.add(statistics);
-      }
-      Map<StatisticsKind, Object> statisticsMap = new HashMap<>();
-      for (CollectableColumnStatisticsKind statisticsKind : 
statisticsToCollect) {
-        Object mergedStatistic = 
statisticsKind.mergeStatistics(statisticsList);
-        statisticsMap.put(statisticsKind, mergedStatistic);
-      }
-      columnsStatistics.put(column, new ColumnStatisticsImpl(statisticsMap, 
statisticsList.iterator().next().getValueComparator()));
-    }
-    return columnsStatistics;
+    return new RowGroupMetadata(TableInfo.UNKNOWN_TABLE_INFO, metadataInfo,
+        schema, columnsStatistics, rowGroupStatistics, 
rowGroupMetadata.getHostAffinity(), rgIndexInFile, location);
   }
 
   /**
    * Returns {@link FileMetadata} instance received by merging specified 
{@link RowGroupMetadata} list.
    *
    * @param rowGroups list of {@link RowGroupMetadata} to be merged
-   * @param tableName name of the table
-   * @param parquetTableMetadata the source of column metadata for 
non-interesting column's statistics
    * @return {@link FileMetadata} instance
    */
-  public static FileMetadata getFileMetadata(List<RowGroupMetadata> rowGroups, 
String tableName,
-      MetadataBase.ParquetTableMetadataBase parquetTableMetadata) {
+  public static FileMetadata getFileMetadata(List<RowGroupMetadata> rowGroups) 
{
     if (rowGroups.isEmpty()) {
       return null;
     }
-    Map<StatisticsKind, Object> fileStatistics = new HashMap<>();
-    fileStatistics.put(TableStatisticsKind.ROW_COUNT, 
TableStatisticsKind.ROW_COUNT.mergeStatistics(rowGroups));
+    List<StatisticsHolder> fileStatistics = new ArrayList<>();
+    fileStatistics.add(new 
StatisticsHolder<>(TableStatisticsKind.ROW_COUNT.mergeStatistics(rowGroups), 
TableStatisticsKind.ROW_COUNT));
+
+    RowGroupMetadata rowGroupMetadata = rowGroups.iterator().next();
+    TupleMetadata schema = rowGroupMetadata.getSchema();
+
+    Set<SchemaPath> columns = rowGroupMetadata.getColumnsStatistics().keySet();
 
-    TupleMetadata schema = rowGroups.iterator().next().getSchema();
+    MetadataInfo metadataInfo = new MetadataInfo(MetadataType.FILE, 
MetadataInfo.GENERAL_INFO_KEY, null);
 
-    return new FileMetadata(rowGroups.iterator().next().getLocation(), schema,
-      mergeColumnsStatistics(rowGroups, 
rowGroups.iterator().next().getColumnsStatistics().keySet(), 
PARQUET_STATISTICS, parquetTableMetadata),
-      fileStatistics, tableName, -1);
+    return new FileMetadata(rowGroupMetadata.getTableInfo(), metadataInfo, 
rowGroupMetadata.getPath(), schema,
+        TableMetadataUtils.mergeColumnsStatistics(rowGroups, columns, 
PARQUET_COLUMN_STATISTICS),
+        fileStatistics, BaseTableMetadata.NON_DEFINED_LAST_MODIFIED_TIME);
   }
 
   /**
    * Returns {@link PartitionMetadata} instance received by merging specified 
{@link FileMetadata} list.
    *
    * @param partitionColumn partition column
    * @param files           list of files to be merged
-   * @param tableName       name of the table
    * @return {@link PartitionMetadata} instance
    */
-  public static PartitionMetadata getPartitionMetadata(SchemaPath 
partitionColumn, List<FileMetadata> files, String tableName) {
+  public static PartitionMetadata getPartitionMetadata(SchemaPath 
partitionColumn, List<FileMetadata> files) {
     Set<Path> locations = new HashSet<>();
     Set<SchemaPath> columns = new HashSet<>();
 
     for (FileMetadata file : files) {
       columns.addAll(file.getColumnsStatistics().keySet());
-      locations.add(file.getLocation());
+      locations.add(file.getPath());
     }
 
-    Map<StatisticsKind, Object> partStatistics = new HashMap<>();
-    partStatistics.put(TableStatisticsKind.ROW_COUNT, 
TableStatisticsKind.ROW_COUNT.mergeStatistics(files));
+    FileMetadata fileMetadata = files.iterator().next();
 
-    return new PartitionMetadata(partitionColumn, 
files.iterator().next().getSchema(),
-        mergeColumnsStatistics(files, columns, PARQUET_STATISTICS, null), 
partStatistics, locations, tableName, -1);
-  }
+    MetadataInfo metadataInfo = new MetadataInfo(MetadataType.PARTITION, 
MetadataInfo.GENERAL_INFO_KEY, null);
 
-  /**
-   * Returns "natural order" comparator which threads nulls as min values.
-   *
-   * @param <T> type to compare
-   * @return "natural order" comparator
-   */
-  public static <T extends Comparable<T>> Comparator<T> 
getNaturalNullsFirstComparator() {
-    return Comparator.nullsFirst(Comparator.naturalOrder());
+    return new PartitionMetadata(fileMetadata.getTableInfo(), metadataInfo, 
partitionColumn, fileMetadata.getSchema(),
 
 Review comment:
   Done.

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
[email protected]


With regards,
Apache Git Services

[GitHub] [drill] vvysotskyi commented on a change in pull request #1810: DRILL-7271: Refactor Metadata interfaces and classes to contain all needed information for the File based Metastore

Reply via email to