Github user dprofeta commented on a diff in the pull request: https://github.com/apache/drill/pull/976#discussion_r143403559 --- Diff: exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/ParquetScanBatchCreator.java --- @@ -156,18 +160,39 @@ public ScanBatch getBatch(FragmentContext context, ParquetRowGroupScan rowGroupS return new ScanBatch(rowGroupScan, context, oContext, readers, implicitColumns); } - private static boolean isComplex(ParquetMetadata footer) { - MessageType schema = footer.getFileMetaData().getSchema(); + private static boolean isComplex(ParquetMetadata footer, List<SchemaPath> columns) { + if (Utilities.isStarQuery(columns)) { + MessageType schema = footer.getFileMetaData().getSchema(); - for (Type type : schema.getFields()) { - if (!type.isPrimitive()) { - return true; + for (Type type : schema.getFields()) { + if (!type.isPrimitive()) { + return true; + } } - } - for (ColumnDescriptor col : schema.getColumns()) { - if (col.getMaxRepetitionLevel() > 0) { - return true; + for (ColumnDescriptor col : schema.getColumns()) { + if (col.getMaxRepetitionLevel() > 0) { + return true; + } + } + return false; + } else { + for (SchemaPath column : columns) { + if (isColumnComplex(footer.getFileMetaData().getSchema(), column)) { + return true; + } } + return false; + } + } + + private static boolean isColumnComplex(GroupType grouptype, SchemaPath column) { + PathSegment.NameSegment root = column.getRootSegment(); + if (!grouptype.containsField(root.getPath().toLowerCase())) { + return false; + } + Type type = grouptype.getType(root.getPath().toLowerCase()); + if (type.isRepetition(Type.Repetition.REPEATED) || !type.isPrimitive()) { --- End diff -- Yes, sure. I wanted to check it in a loop first, but ParquetRecordReader doesn't handle any nested type, so the loop is not needed now. But I didn't refactor enough.
---