danielxjd commented on a change in pull request #12786:
URL: https://github.com/apache/beam/pull/12786#discussion_r485260306
##########
File path:
sdks/java/io/parquet/src/main/java/org/apache/beam/sdk/io/parquet/ParquetIO.java
##########
@@ -336,36 +388,41 @@ public void processElement(
+ tracker.currentRestriction().getFrom()
+ " to "
+ tracker.currentRestriction().getTo());
- ParquetReadOptions options =
HadoopReadOptions.builder(getConfWithModelClass()).build();
- ParquetFileReader reader =
- ParquetFileReader.open(new
BeamParquetInputFile(file.openSeekable()), options);
+ Configuration conf = getConfWithModelClass();
GenericData model = null;
if (modelClass != null) {
model = (GenericData) modelClass.getMethod("get").invoke(null);
}
- ReadSupport<GenericRecord> readSupport = new
AvroReadSupport<GenericRecord>(model);
-
+ AvroReadSupport<GenericRecord> readSupport = new
AvroReadSupport<GenericRecord>(model);
+ if (requestSchemaString != null) {
+ AvroReadSupport.setRequestedProjection(
+ conf, new Schema.Parser().parse(requestSchemaString));
+ }
+ ParquetReadOptions options = HadoopReadOptions.builder(conf).build();
+ ParquetFileReader reader =
+ ParquetFileReader.open(new
BeamParquetInputFile(file.openSeekable()), options);
Filter filter = checkNotNull(options.getRecordFilter(), "filter");
Configuration hadoopConf = ((HadoopReadOptions) options).getConf();
+ for (String property : options.getPropertyNames()) {
+ hadoopConf.set(property, options.getProperty(property));
Review comment:
You are right ,I should delete this part.
----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
For queries about this service, please contact Infrastructure at:
[email protected]