[ https://issues.apache.org/jira/browse/DRILL-7330?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=17062738#comment-17062738 ]
ASF GitHub Bot commented on DRILL-7330: --------------------------------------- vvysotskyi commented on pull request #2026: DRILL-7330: Implement metadata usage for all format plugins URL: https://github.com/apache/drill/pull/2026#discussion_r395113673 ########## File path: exec/java-exec/src/main/java/org/apache/drill/exec/store/dfs/easy/EasyGroupScan.java ########## @@ -333,21 +338,73 @@ public boolean canPushdownProjects(List<SchemaPath> columns) { } @JsonProperty + @JsonIgnore(value = false) public TupleMetadata getSchema() { return getTableMetadata().getSchema(); } @Override - @JsonIgnore - public TableMetadata getTableMetadata() { - if (tableMetadata == null) { - tableMetadata = metadataProvider.getTableMetadata(); + public AnalyzeInfoProvider getAnalyzeInfoProvider() { + return new AnalyzeFileInfoProviderImpl(formatPlugin.getName()); + } + + @Override + protected GroupScanWithMetadataFilterer<?> getFilterer() { + return new EasyGroupScanFilterer(this); + } + + @Override + protected FileTableMetadataProviderBuilder<?> tableMetadataProviderBuilder(MetadataProviderManager source) { + if (source.usesMetastore()) { + return new MetastoreFileTableMetadataProvider.Builder<>((MetastoreMetadataProviderManager) source); + } else { + return defaultTableMetadataProviderBuilder(source); } - return tableMetadata; } @Override - public TableMetadataProvider getMetadataProvider() { - return metadataProvider; + protected FileTableMetadataProviderBuilder<?> defaultTableMetadataProviderBuilder(MetadataProviderManager source) { + return new SimpleFileTableMetadataProvider.Builder(source); + } + + /** + * Implementation of GroupScanWithMetadataFilterer which uses {@link EasyGroupScan} as source and + * builds {@link EasyGroupScan} instance with filtered metadata. + */ + private static class EasyGroupScanFilterer extends GroupScanWithMetadataFilterer<EasyGroupScanFilterer> { + + EasyGroupScanFilterer(EasyGroupScan source) { + super(source); + } + + @Override + public AbstractGroupScanWithMetadata<?> build() { + EasyGroupScan newScan = new EasyGroupScan((EasyGroupScan) source); + newScan.tableMetadata = tableMetadata; + // updates common row count and nulls counts for every column + if (newScan.getTableMetadata() != null && files != null && newScan.getFilesMetadata().size() != files.size()) { + newScan.tableMetadata = TableMetadataUtils.updateRowCount(newScan.getTableMetadata(), files.values()); + } + newScan.partitions = partitions; + newScan.segments = segments; + newScan.files = files; + newScan.matchAllMetadata = matchAllMetadata; + newScan.nonInterestingColumnsMetadata = nonInterestingColumnsMetadata; + + newScan.fileSet = newScan.getFilesMetadata().keySet(); + newScan.selection = FileSelection.create(null, new ArrayList<>(newScan.fileSet), newScan.selectionRoot); + try { + newScan.initFromSelection(newScan.selection, newScan.formatPlugin); + } catch (IOException e) { + throw new RuntimeException("Failed to initialize scan from the selection.", e); + } + + return newScan; + } + + @Override + protected EasyGroupScanFilterer self() { + return this; + } Review comment: Its parent class is `AbstractGroupScanWithMetadata.GroupScanWithMetadataFilterer`, it is used in `AbstractGroupScanWithMetadata.applyFilter()` method. This class is extended by `AbstractParquetGroupScan.RowGroupScanFilterer` where additional methods provided. `AbstractParquetGroupScan.RowGroupScanFilterer` is used in `AbstractParquetGroupScan.applyFilter()`. Another example of such an approach is `BaseMetadataBuilder` class. ---------------------------------------------------------------- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org > Implement metadata usage for text format plugin > ----------------------------------------------- > > Key: DRILL-7330 > URL: https://issues.apache.org/jira/browse/DRILL-7330 > Project: Apache Drill > Issue Type: Sub-task > Reporter: Arina Ielchiieva > Assignee: Vova Vysotskyi > Priority: Major > Fix For: 1.18.0 > > > 1. Change the current group scan to leverage Schema from Metastore; > 2. Use stats for enabling additional logical planning rules for text format > plugin. It will enable such optimizations as limit, filter push and so on. > + add possibility to pass schema through schema file (using path or table > root), inline. > + check for other enhancements in analyze command -- This message was sent by Atlassian Jira (v8.3.4#803005)