arina-ielchiieva commented on a change in pull request #1836: DRILL-7156: 
Support empty Parquet files creation
URL: https://github.com/apache/drill/pull/1836#discussion_r313800525
 
 

 ##########
 File path: 
exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/ParquetRecordWriter.java
 ##########
 @@ -486,11 +467,54 @@ public void abort() throws IOException {
 
   @Override
   public void cleanup() throws IOException {
-    flush();
+    flush(true);
 
     codecFactory.release();
   }
 
+  private void createParquetFileWriter() throws IOException {
+    assert parquetFileWriter == null;
+
+    Path path = new Path(location, prefix + "_" + index + ".parquet");
+    // to ensure that our writer was the first to create output file, we 
create empty file first and fail if file exists
+    Path firstCreatedPath = storageStrategy.createFileAndApply(fs, path);
+
+    // since parquet reader supports partitions, it means that several output 
files may be created
+    // if this writer was the one to create table folder, we store only folder 
and delete it with its content in case of abort
+    // if table location was created before, we store only files created by 
this writer and delete them in case of abort
+    addCleanUpLocation(fs, firstCreatedPath);
+
+    // since ParquetFileWriter will overwrite empty output file (append is not 
supported)
+    // we need to re-apply file permission
+    if (useSingleFSBlock) {
+      // Passing blockSize creates files with this blockSize instead of 
filesystem default blockSize.
+      // Currently, this is supported only by filesystems included in
+      // BLOCK_FS_SCHEMES (ParquetFileWriter.java in parquet-mr), which 
includes HDFS.
+      // For other filesystems, it uses default blockSize configured for the 
file system.
+      parquetFileWriter = new ParquetFileWriter(conf, schema, path, 
ParquetFileWriter.Mode.OVERWRITE, blockSize, 0);
+    } else {
+      parquetFileWriter = new ParquetFileWriter(conf, schema, path, 
ParquetFileWriter.Mode.OVERWRITE);
+    }
+    storageStrategy.applyToFile(fs, path);
+    parquetFileWriter.start();
+  }
+
+  private void flushParquetFileWriter() throws IOException {
+    assert parquetFileWriter != null;
 
 Review comment:
   Same here.

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
[email protected]


With regards,
Apache Git Services

Reply via email to