luocooong commented on code in PR #2515: URL: https://github.com/apache/drill/pull/2515#discussion_r851624724
########## contrib/format-hdf5/src/main/java/org/apache/drill/exec/store/hdf5/HDF5BatchReader.java: ########## @@ -171,107 +168,109 @@ public HDF5ReaderConfig(HDF5FormatPlugin plugin, HDF5FormatConfig formatConfig) } } - public HDF5BatchReader(HDF5ReaderConfig readerConfig, int maxRecords) { - this.readerConfig = readerConfig; - this.maxRecords = maxRecords; + public HDF5BatchReader(HDF5ReaderConfig config, EasySubScan scan, FileSchemaNegotiator negotiator) { + errorContext = negotiator.parentErrorContext(); + file = negotiator.file(); + readerConfig = config; dataWriters = new ArrayList<>(); - this.showMetadataPreview = readerConfig.formatConfig.showPreview(); - } + showMetadataPreview = readerConfig.formatConfig.showPreview(); - @Override - public boolean open(FileSchemaNegotiator negotiator) { - split = negotiator.split(); - errorContext = negotiator.parentErrorContext(); // Since the HDF file reader uses a stream to actually read the file, the file name from the // module is incorrect. - fileName = split.getPath().getName(); - try { - openFile(negotiator); - } catch (IOException e) { - throw UserException - .dataReadError(e) - .addContext("Failed to close input file: %s", split.getPath()) - .addContext(errorContext) - .build(logger); - } + fileName = file.split().getPath().getName(); - ResultSetLoader loader; - if (readerConfig.defaultPath == null) { - // Get file metadata - List<HDF5DrillMetadata> metadata = getFileMetadata(hdfFile, new ArrayList<>()); - metadataIterator = metadata.iterator(); - - // Schema for Metadata query - SchemaBuilder builder = new SchemaBuilder() - .addNullable(PATH_COLUMN_NAME, MinorType.VARCHAR) - .addNullable(DATA_TYPE_COLUMN_NAME, MinorType.VARCHAR) - .addNullable(FILE_NAME_COLUMN_NAME, MinorType.VARCHAR) - .addNullable(DATA_SIZE_COLUMN_NAME, MinorType.BIGINT) - .addNullable(IS_LINK_COLUMN_NAME, MinorType.BIT) - .addNullable(ELEMENT_COUNT_NAME, MinorType.BIGINT) - .addNullable(DATASET_DATA_TYPE_NAME, MinorType.VARCHAR) - .addNullable(DIMENSIONS_FIELD_NAME, MinorType.VARCHAR); - - negotiator.tableSchema(builder.buildSchema(), false); - - loader = negotiator.build(); - dimensions = new int[0]; - rowWriter = loader.writer(); - - } else { - // This is the case when the default path is specified. Since the user is explicitly asking for a dataset - // Drill can obtain the schema by getting the datatypes below and ultimately mapping that schema to columns - Dataset dataSet = hdfFile.getDatasetByPath(readerConfig.defaultPath); - dimensions = dataSet.getDimensions(); - - loader = negotiator.build(); - rowWriter = loader.writer(); - writerSpec = new WriterSpec(rowWriter, negotiator.providedSchema(), - negotiator.parentErrorContext()); - if (dimensions.length <= 1) { - buildSchemaFor1DimensionalDataset(dataSet); - } else if (dimensions.length == 2) { - buildSchemaFor2DimensionalDataset(dataSet); - } else { - // Case for datasets of greater than 2D - // These are automatically flattened - buildSchemaFor2DimensionalDataset(dataSet); + { // Opens an HDF5 file Review Comment: Interesting thing, when we use EVF2, the batch reader only needs a constructor to start all initialization operations. But we will write a lot of code into that function, initialize the final variables, open files, define the schema, and so on. So I used the code blocks to group these actions to make them easier to read. -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: dev-unsubscr...@drill.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org