[
https://issues.apache.org/jira/browse/DRILL-8188?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel=17521460#comment-17521460
]
ASF GitHub Bot commented on DRILL-8188:
---
paul-rogers commented on code in PR #2515:
URL: https://github.com/apache/drill/pull/2515#discussion_r849083600
##
contrib/format-hdf5/src/main/java/org/apache/drill/exec/store/hdf5/HDF5BatchReader.java:
##
@@ -171,107 +168,109 @@ public HDF5ReaderConfig(HDF5FormatPlugin plugin,
HDF5FormatConfig formatConfig)
}
}
- public HDF5BatchReader(HDF5ReaderConfig readerConfig, int maxRecords) {
-this.readerConfig = readerConfig;
-this.maxRecords = maxRecords;
+ public HDF5BatchReader(HDF5ReaderConfig config, EasySubScan scan,
FileSchemaNegotiator negotiator) {
+errorContext = negotiator.parentErrorContext();
+file = negotiator.file();
+readerConfig = config;
dataWriters = new ArrayList<>();
-this.showMetadataPreview = readerConfig.formatConfig.showPreview();
- }
+showMetadataPreview = readerConfig.formatConfig.showPreview();
- @Override
- public boolean open(FileSchemaNegotiator negotiator) {
-split = negotiator.split();
-errorContext = negotiator.parentErrorContext();
// Since the HDF file reader uses a stream to actually read the file, the
file name from the
// module is incorrect.
-fileName = split.getPath().getName();
-try {
- openFile(negotiator);
-} catch (IOException e) {
- throw UserException
-.dataReadError(e)
-.addContext("Failed to close input file: %s", split.getPath())
-.addContext(errorContext)
-.build(logger);
-}
+fileName = file.split().getPath().getName();
-ResultSetLoader loader;
-if (readerConfig.defaultPath == null) {
- // Get file metadata
- List metadata = getFileMetadata(hdfFile, new
ArrayList<>());
- metadataIterator = metadata.iterator();
-
- // Schema for Metadata query
- SchemaBuilder builder = new SchemaBuilder()
-.addNullable(PATH_COLUMN_NAME, MinorType.VARCHAR)
-.addNullable(DATA_TYPE_COLUMN_NAME, MinorType.VARCHAR)
-.addNullable(FILE_NAME_COLUMN_NAME, MinorType.VARCHAR)
-.addNullable(DATA_SIZE_COLUMN_NAME, MinorType.BIGINT)
-.addNullable(IS_LINK_COLUMN_NAME, MinorType.BIT)
-.addNullable(ELEMENT_COUNT_NAME, MinorType.BIGINT)
-.addNullable(DATASET_DATA_TYPE_NAME, MinorType.VARCHAR)
-.addNullable(DIMENSIONS_FIELD_NAME, MinorType.VARCHAR);
-
- negotiator.tableSchema(builder.buildSchema(), false);
-
- loader = negotiator.build();
- dimensions = new int[0];
- rowWriter = loader.writer();
-
-} else {
- // This is the case when the default path is specified. Since the user
is explicitly asking for a dataset
- // Drill can obtain the schema by getting the datatypes below and
ultimately mapping that schema to columns
- Dataset dataSet = hdfFile.getDatasetByPath(readerConfig.defaultPath);
- dimensions = dataSet.getDimensions();
-
- loader = negotiator.build();
- rowWriter = loader.writer();
- writerSpec = new WriterSpec(rowWriter, negotiator.providedSchema(),
- negotiator.parentErrorContext());
- if (dimensions.length <= 1) {
-buildSchemaFor1DimensionalDataset(dataSet);
- } else if (dimensions.length == 2) {
-buildSchemaFor2DimensionalDataset(dataSet);
- } else {
-// Case for datasets of greater than 2D
-// These are automatically flattened
-buildSchemaFor2DimensionalDataset(dataSet);
+{ // Opens an HDF5 file
Review Comment:
Is the nested block necessary? It is handy when we want to reuse variable
names (as sometimes occurs in tests), but is it needed here?
##
contrib/format-hdf5/src/main/java/org/apache/drill/exec/store/hdf5/HDF5BatchReader.java:
##
@@ -410,8 +390,22 @@ public boolean next() {
return true;
}
+ @Override
+ public void close() {
+AutoCloseables.closeSilently(hdfFile, reader);
+/*
+ * The current implementation of the HDF5 reader creates a temp file which
needs to be removed
+ * when the batch reader is closed. A possible future functionality might
be to used to.
Review Comment:
seems an incomplete comment: "used to" do what?
##
contrib/format-hdf5/src/main/java/org/apache/drill/exec/store/hdf5/HDF5BatchReader.java:
##
@@ -410,8 +390,22 @@ public boolean next() {
return true;
}
+ @Override
+ public void close() {
+AutoCloseables.closeSilently(hdfFile, reader);
+/*
+ * The current implementation of the HDF5 reader creates a temp file which
needs to be removed
+ * when the batch reader is closed. A possible future functionality might
be to used to.
+ */
+boolean result = hdfFile.getFile().delete();
Review Comment:
There was a comment above about reading