arina-ielchiieva commented on a change in pull request #1836: DRILL-7156: 
Support empty Parquet files creation
URL: https://github.com/apache/drill/pull/1836#discussion_r313803238
 
 

 ##########
 File path: 
exec/java-exec/src/test/java/org/apache/drill/exec/physical/impl/writer/TestParquetWriterEmptyFiles.java
 ##########
 @@ -43,47 +49,99 @@ public void testWriteEmptyFile() throws Exception {
     final File outputFile = 
FileUtils.getFile(dirTestWatcher.getDfsTestTmpDir(), outputFileName);
 
     test("CREATE TABLE dfs.tmp.%s AS SELECT * FROM cp.`employee.json` WHERE 
1=0", outputFileName);
+    Assert.assertTrue(outputFile.exists());
+  }
+
+  @Test
+  public void testWriteEmptyFileWithEmptySchema() throws Exception {
+    final String outputFileName = 
"testparquetwriteremptyfiles_testwriteemptyfileemptyschema";
+    final File outputFile = 
FileUtils.getFile(dirTestWatcher.getDfsTestTmpDir(), outputFileName);
+
+    test("CREATE TABLE dfs.tmp.%s AS SELECT * FROM cp.`empty.json`", 
outputFileName);
     Assert.assertFalse(outputFile.exists());
   }
 
   @Test
-  public void testMultipleWriters() throws Exception {
-    final String outputFile = 
"testparquetwriteremptyfiles_testmultiplewriters";
+  public void testWriteEmptySchemaChange() throws Exception {
+    final String outputFileName = 
"testparquetwriteremptyfiles_testwriteemptyschemachange";
+    final File outputFile = 
FileUtils.getFile(dirTestWatcher.getDfsTestTmpDir(), outputFileName);
 
-    runSQL("alter session set `planner.slice_target` = 1");
+    test("CREATE TABLE dfs.tmp.%s AS select id, a, b from 
dfs.`schemachange/multi/*.json` WHERE id = 0", outputFileName);
 
-    try {
-      final String query = "SELECT position_id FROM cp.`employee.json` WHERE 
position_id IN (15, 16) GROUP BY position_id";
+    // Only the last scan scheme is written
+    SchemaBuilder schemaBuilder = new SchemaBuilder()
+      .addNullable("id", TypeProtos.MinorType.BIGINT)
+      .addNullable("a", TypeProtos.MinorType.BIGINT)
+      .addNullable("b", TypeProtos.MinorType.BIT);
+    BatchSchema expectedSchema = new BatchSchemaBuilder()
+      .withSchemaBuilder(schemaBuilder)
+      .build();
 
-      test("CREATE TABLE dfs.tmp.%s AS %s", outputFile, query);
+    testBuilder()
+      .unOrdered()
+      .sqlQuery("select * from dfs.tmp.%s", outputFileName)
+      .schemaBaseLine(expectedSchema)
+      .go();
 
-      // this query will fail if an "empty" file was created
-      testBuilder()
-        .unOrdered()
-        .sqlQuery("SELECT * FROM dfs.tmp.%s", outputFile)
-        .sqlBaselineQuery(query)
-        .go();
-    } finally {
-      runSQL("alter session set `planner.slice_target` = " + 
ExecConstants.SLICE_TARGET_DEFAULT);
-    }
+    // Make sure that only 1 parquet file was created
+    Assert.assertEquals(1, outputFile.list((dir, name) -> 
name.endsWith("parquet")).length);
+  }
+
+  @Test
+  public void testEmptyFileSchema() throws Exception {
+    final String outputFileName = 
"testparquetwriteremptyfiles_testemptyfileschema";
+
+    test("CREATE TABLE dfs.tmp.%s AS SELECT * FROM cp.`employee.json` WHERE 
1=0", outputFileName);
+
+    // end_date column is null, so it missing in result schema.
+    SchemaBuilder schemaBuilder = new SchemaBuilder()
+            .addNullable("employee_id", TypeProtos.MinorType.BIGINT)
+            .addNullable("full_name", TypeProtos.MinorType.VARCHAR)
+            .addNullable("first_name", TypeProtos.MinorType.VARCHAR)
+            .addNullable("last_name", TypeProtos.MinorType.VARCHAR)
+            .addNullable("position_id", TypeProtos.MinorType.BIGINT)
+            .addNullable("position_title", TypeProtos.MinorType.VARCHAR)
+            .addNullable("store_id", TypeProtos.MinorType.BIGINT)
+            .addNullable("department_id", TypeProtos.MinorType.BIGINT)
+            .addNullable("birth_date", TypeProtos.MinorType.VARCHAR)
+            .addNullable("hire_date", TypeProtos.MinorType.VARCHAR)
+            .addNullable("salary", TypeProtos.MinorType.FLOAT8)
+            .addNullable("supervisor_id", TypeProtos.MinorType.BIGINT)
+            .addNullable("education_level", TypeProtos.MinorType.VARCHAR)
+            .addNullable("marital_status", TypeProtos.MinorType.VARCHAR)
+            .addNullable("gender", TypeProtos.MinorType.VARCHAR)
+            .addNullable("management_role", TypeProtos.MinorType.VARCHAR);
+    BatchSchema expectedSchema = new BatchSchemaBuilder()
+            .withSchemaBuilder(schemaBuilder)
+            .build();
+
+    testBuilder()
+            .unOrdered()
+            .sqlQuery("select * from dfs.tmp.%s", outputFileName)
+            .schemaBaseLine(expectedSchema)
+            .go();
   }
 
   @Test // see DRILL-2408
   public void testWriteEmptyFileAfterFlush() throws Exception {
-    final String outputFile = 
"testparquetwriteremptyfiles_test_write_empty_file_after_flush";
+    final String outputFileName = 
"testparquetwriteremptyfiles_test_write_empty_file_after_flush";
+    final File outputFile = 
FileUtils.getFile(dirTestWatcher.getDfsTestTmpDir(), outputFileName);
 
     try {
       // this specific value will force a flush just after the final row is 
written
       // this may cause the creation of a new "empty" parquet file
       test("ALTER SESSION SET `store.parquet.block-size` = 19926");
 
       final String query = "SELECT * FROM cp.`employee.json` LIMIT 100";
-      test("CREATE TABLE dfs.tmp.%s AS %s", outputFile, query);
+      test("CREATE TABLE dfs.tmp.%s AS %s", outputFileName, query);
+
+      // Make sure that only 1 parquet file was created
+      Assert.assertEquals(1, outputFile.list((dir, name) -> 
name.endsWith("parquet")).length);
 
 Review comment:
   Please use static import.

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
[email protected]


With regards,
Apache Git Services

Reply via email to