vvysotskyi commented on a change in pull request #1810: DRILL-7271: Refactor Metadata interfaces and classes to contain all needed information for the File based Metastore URL: https://github.com/apache/drill/pull/1810#discussion_r296253667
########## File path: exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/BaseParquetMetadataProvider.java ########## @@ -361,6 +365,102 @@ public FileMetadata getFileMetadata(Path location) { return new ArrayList<>(getFilesMetadataMap().values()); } + @Override + public List<SegmentMetadata> getSegmentsMetadata() { + return new ArrayList<>(getSegmentsMetadataMap().values()); + } + + @Override + public Map<Path, SegmentMetadata> getSegmentsMetadataMap() { + if (segments == null) { + if (entries.isEmpty() || !collectMetadata) { + return Collections.emptyMap(); + } + + segments = new LinkedHashMap<>(); + + Path fileLocation = getFilesMetadata().iterator().next().getPath(); + int levelsCount = fileLocation.depth() - tableLocation.depth(); + + Map<Path, FileMetadata> filesMetadata = getFilesMetadataMap(); + int segmentsIndex = levelsCount - 1; + Map<Path, SegmentMetadata> segmentMetadata = getSegmentsForMetadata(filesMetadata, + SchemaPath.getSimplePath(MetadataInfo.DEFAULT_COLUMN_PREFIX + segmentsIndex)); + segments.putAll(segmentMetadata); + for (int i = segmentsIndex - 1; i >= 0; i--) { + String segmentColumn = MetadataInfo.DEFAULT_COLUMN_PREFIX + i; + segmentMetadata = getMetadataForSegments(segmentMetadata, + SchemaPath.getSimplePath(segmentColumn)); + segments.putAll(segmentMetadata); + } + + } + return segments; + } + + private static <T extends BaseMetadata & LocationProvider> Map<Path, SegmentMetadata> getSegmentsForMetadata( + Map<Path, T> metadata, SchemaPath column) { + Multimap<Path, T> metadataMultimap = LinkedListMultimap.create(); + metadata.forEach((key, value) -> metadataMultimap.put(key.getParent(), value)); + + Map<Path, SegmentMetadata> result = new HashMap<>(); + metadataMultimap.asMap().forEach((key, value) -> result.put(key, combineToSegmentMetadata(value, column))); + + return result; + } + + private static Map<Path, SegmentMetadata> getMetadataForSegments(Map<Path, SegmentMetadata> metadata, SchemaPath column) { + Multimap<Path, SegmentMetadata> metadataMultimap = LinkedListMultimap.create(); + metadata.forEach((key, value) -> metadataMultimap.put(key.getParent(), value)); + + Map<Path, SegmentMetadata> result = new HashMap<>(); + metadataMultimap.asMap().forEach((key, value) -> result.put(key, combineSegmentMetadata(value, column))); + + return result; + } + + private static <T extends BaseMetadata & LocationProvider> SegmentMetadata combineToSegmentMetadata(Collection<T> metadataList, SchemaPath column) { + List<Path> metadataLocations = metadataList.stream() + .map(metadata -> metadata.getPath()) // used lambda instead of method reference due to JDK-8141508 + .collect(Collectors.toList()); + return combineToSegmentMetadata(metadataList, column, metadataLocations); + } + + private static SegmentMetadata combineSegmentMetadata(Collection<SegmentMetadata> metadataList, SchemaPath column) { + List<Path> metadataLocations = metadataList.stream() + .flatMap(metadata -> metadata.getLocations().stream()) + .collect(Collectors.toList()); + + return combineToSegmentMetadata(metadataList, column, metadataLocations); + } + + /** + * Returns {@link SegmentMetadata} which is combined metadata of list of specified metadata + * + * @param metadataList metadata to combine + * @param column segment column + * @param metadataLocations locations of metadata combined in resulting segment + * @param <T> type of metadata to combine + * @return {@link SegmentMetadata} from combined metadata + */ + private static <T extends BaseMetadata & LocationProvider> SegmentMetadata combineToSegmentMetadata(Collection<T> metadataList, + SchemaPath column, List<Path> metadataLocations) { + List<StatisticsHolder> segmentStatistics = + Collections.singletonList( + new StatisticsHolder<>( + TableStatisticsKind.ROW_COUNT.mergeStatistics(metadataList), + TableStatisticsKind.ROW_COUNT)); + // this code is used only to collect segment metadata to be used only during filtering, + // so metadata identifier is not required here and in other places in this class + MetadataInfo metadataInfo = new MetadataInfo(MetadataType.SEGMENT, MetadataInfo.GENERAL_INFO_KEY, null); + T firstMetadata = metadataList.iterator().next(); + + return new SegmentMetadata(firstMetadata.getTableInfo(), metadataInfo, column, firstMetadata.getSchema(), + metadataList.iterator().next().getPath().getParent(), Review comment: Done. ---------------------------------------------------------------- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org With regards, Apache Git Services