vvysotskyi commented on a change in pull request #1810: DRILL-7271: Refactor 
Metadata interfaces and classes to contain all needed information for the File 
based Metastore
URL: https://github.com/apache/drill/pull/1810#discussion_r296253667
 
 

 ##########
 File path: 
exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/BaseParquetMetadataProvider.java
 ##########
 @@ -361,6 +365,102 @@ public FileMetadata getFileMetadata(Path location) {
     return new ArrayList<>(getFilesMetadataMap().values());
   }
 
+  @Override
+  public List<SegmentMetadata> getSegmentsMetadata() {
+    return new ArrayList<>(getSegmentsMetadataMap().values());
+  }
+
+  @Override
+  public Map<Path, SegmentMetadata> getSegmentsMetadataMap() {
+    if (segments == null) {
+      if (entries.isEmpty() || !collectMetadata) {
+        return Collections.emptyMap();
+      }
+
+      segments = new LinkedHashMap<>();
+
+      Path fileLocation = getFilesMetadata().iterator().next().getPath();
+      int levelsCount = fileLocation.depth() - tableLocation.depth();
+
+      Map<Path, FileMetadata> filesMetadata = getFilesMetadataMap();
+      int segmentsIndex = levelsCount - 1;
+      Map<Path, SegmentMetadata> segmentMetadata = 
getSegmentsForMetadata(filesMetadata,
+          SchemaPath.getSimplePath(MetadataInfo.DEFAULT_COLUMN_PREFIX + 
segmentsIndex));
+      segments.putAll(segmentMetadata);
+      for (int i = segmentsIndex - 1; i >= 0; i--) {
+        String segmentColumn = MetadataInfo.DEFAULT_COLUMN_PREFIX + i;
+        segmentMetadata = getMetadataForSegments(segmentMetadata,
+            SchemaPath.getSimplePath(segmentColumn));
+        segments.putAll(segmentMetadata);
+      }
+
+    }
+    return segments;
+  }
+
+  private static <T extends BaseMetadata & LocationProvider> Map<Path, 
SegmentMetadata> getSegmentsForMetadata(
+      Map<Path, T> metadata, SchemaPath column) {
+    Multimap<Path, T> metadataMultimap = LinkedListMultimap.create();
+    metadata.forEach((key, value) -> metadataMultimap.put(key.getParent(), 
value));
+
+    Map<Path, SegmentMetadata> result = new HashMap<>();
+    metadataMultimap.asMap().forEach((key, value) -> result.put(key, 
combineToSegmentMetadata(value, column)));
+
+    return result;
+  }
+
+  private static Map<Path, SegmentMetadata> getMetadataForSegments(Map<Path, 
SegmentMetadata> metadata, SchemaPath column) {
+    Multimap<Path, SegmentMetadata> metadataMultimap = 
LinkedListMultimap.create();
+    metadata.forEach((key, value) -> metadataMultimap.put(key.getParent(), 
value));
+
+    Map<Path, SegmentMetadata> result = new HashMap<>();
+    metadataMultimap.asMap().forEach((key, value) -> result.put(key, 
combineSegmentMetadata(value, column)));
+
+    return result;
+  }
+
+  private static <T extends BaseMetadata & LocationProvider> SegmentMetadata 
combineToSegmentMetadata(Collection<T> metadataList, SchemaPath column) {
+    List<Path> metadataLocations = metadataList.stream()
+        .map(metadata -> metadata.getPath()) // used lambda instead of method 
reference due to JDK-8141508
+        .collect(Collectors.toList());
+    return combineToSegmentMetadata(metadataList, column, metadataLocations);
+  }
+
+  private static SegmentMetadata 
combineSegmentMetadata(Collection<SegmentMetadata> metadataList, SchemaPath 
column) {
+    List<Path> metadataLocations = metadataList.stream()
+        .flatMap(metadata -> metadata.getLocations().stream())
+        .collect(Collectors.toList());
+
+    return combineToSegmentMetadata(metadataList, column, metadataLocations);
+  }
+
+  /**
+   * Returns {@link SegmentMetadata} which is combined metadata of list of 
specified metadata
+   *
+   * @param metadataList      metadata to combine
+   * @param column            segment column
+   * @param metadataLocations locations of metadata combined in resulting 
segment
+   * @param <T>               type of metadata to combine
+   * @return {@link SegmentMetadata} from combined metadata
+   */
+  private static <T extends BaseMetadata & LocationProvider> SegmentMetadata 
combineToSegmentMetadata(Collection<T> metadataList,
+      SchemaPath column, List<Path> metadataLocations) {
+    List<StatisticsHolder> segmentStatistics =
+        Collections.singletonList(
+            new StatisticsHolder<>(
+                TableStatisticsKind.ROW_COUNT.mergeStatistics(metadataList),
+                TableStatisticsKind.ROW_COUNT));
+    // this code is used only to collect segment metadata to be used only 
during filtering,
+    // so metadata identifier is not required here and in other places in this 
class
+    MetadataInfo metadataInfo = new MetadataInfo(MetadataType.SEGMENT, 
MetadataInfo.GENERAL_INFO_KEY, null);
+    T firstMetadata = metadataList.iterator().next();
+
+    return new SegmentMetadata(firstMetadata.getTableInfo(), metadataInfo, 
column, firstMetadata.getSchema(),
+        metadataList.iterator().next().getPath().getParent(),
 
 Review comment:
   Done.

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


With regards,
Apache Git Services

Reply via email to