[hive] branch master updated: HIVE-26394 : Query based compaction fails for table with more than 6 columns (Mahesh Kumar Behera, reviewed by Denys Kuzmenko)

mahesh Mon, 18 Jul 2022 05:22:46 -0700

This is an automated email from the ASF dual-hosted git repository.

mahesh pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hive.git



The following commit(s) were added to refs/heads/master by this push:
     new 69e6a5a4151 HIVE-26394 : Query based compaction fails for table with 
more than 6 columns (Mahesh Kumar Behera, reviewed by Denys Kuzmenko)
69e6a5a4151 is described below

commit 69e6a5a4151100849d2b03b6b14b1605c3abc3f1
Author: mahesh kumar behera <mah...@apache.org>
AuthorDate: Mon Jul 18 17:52:29 2022 +0530

    HIVE-26394 : Query based compaction fails for table with more than 6 
columns (Mahesh Kumar Behera, reviewed by Denys Kuzmenko)
---
 .../ql/txn/compactor/TestCrudCompactorOnTez.java   |  4 +--
 .../hadoop/hive/ql/io/orc/OrcInputFormat.java      | 32 +++++++++++++++++++---
 .../hadoop/hive/ql/io/orc/OrcNewInputFormat.java   | 12 +++++---
 3 files changed, 38 insertions(+), 10 deletions(-)

diff --git 
a/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/txn/compactor/TestCrudCompactorOnTez.java
 
b/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/txn/compactor/TestCrudCompactorOnTez.java
index cffb58bc41c..eb9f4c4e2a8 100644
--- 
a/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/txn/compactor/TestCrudCompactorOnTez.java
+++ 
b/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/txn/compactor/TestCrudCompactorOnTez.java
@@ -1484,8 +1484,8 @@ public class TestCrudCompactorOnTez extends 
CompactorOnTezTest {
     String tableName = "testMinorCompaction";
     executeStatementOnDriver("drop table if exists " + tableName, driver);
     executeStatementOnDriver(
-        "CREATE TABLE " + tableName + "(a INT, b STRING) " + " STORED AS ORC  
TBLPROPERTIES ('transactional'='true')",
-        driver);
+            "CREATE TABLE " + tableName + "(a INT, b STRING, c int, d int, e 
int, f int, j int, i int) " +
+                    " STORED AS ORC  TBLPROPERTIES ('transactional'='true')", 
driver);
     CompactorTestUtil.runStreamingAPI(conf, dbName, tableName, Lists
         .newArrayList(new CompactorTestUtil.StreamingConnectionOption(false, 
false),
             new CompactorTestUtil.StreamingConnectionOption(true, false),
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java 
b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java
index 0f1333b9a68..4b70ff5c5b7 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java
@@ -251,12 +251,29 @@ public class OrcInputFormat implements 
InputFormat<NullWritable, OrcStruct>,
 
 
     OrcRecordReader(Reader file, Configuration conf,
-                    FileSplit split) throws IOException {
+                    InputSplit inputSplit) throws IOException {
       this.file = file;
       numColumns = file.getSchema().getChildren().size();
+      FileSplit split = (FileSplit)inputSplit;
       this.offset = split.getStart();
       this.length = split.getLength();
-      this.reader = createReaderFromFile(file, conf, offset, length);
+
+      // In case of query based compaction, the ACID table location is used as 
the location of the external table.
+      // The assumption is that the table is treated as a external table. But 
as per file, the table is ACID and thus
+      // the file schema can not be used to judge if the table is original or 
not. It has to be as per the file split.
+
+      // CREATE temporary external table 
delete_delta_default_tmp_compactor_testminorcompaction_1657797233724_result(
+      // `operation` int, `originalTransaction` bigint, `bucket` int, `rowId` 
bigint, `currentTransaction` bigint,
+      // `row` struct<`a` :int, `b` :string, `c` :int, `d` :int, `e` :int, `f` 
:int, `j` :int, `i` :int>)
+      // clustered by (`bucket`) sorted by (`originalTransaction`, `bucket`, 
`rowId`) into 1 buckets stored as
+      // orc LOCATION 
'file:/warehouse/testminorcompaction/delete_delta_0000001_0000006_v0000009'
+      // TBLPROPERTIES ('compactiontable'='true', 'bucketing_version'='2', 
'transactional'='false')
+      if (inputSplit instanceof OrcSplit) {
+        this.reader = createReaderFromFile(file, conf, offset, length, 
((OrcSplit) inputSplit).isOriginal());
+      } else {
+        this.reader = createReaderFromFile(file, conf, offset, length);
+      }
+
       this.stats = new SerDeStats();
     }
 
@@ -327,6 +344,14 @@ public class OrcInputFormat implements 
InputFormat<NullWritable, OrcStruct>,
   public static RecordReader createReaderFromFile(Reader file,
                                                   Configuration conf,
                                                   long offset, long length
+  ) throws IOException {
+    return createReaderFromFile(file, conf, offset, length, isOriginal(file));
+  }
+
+  public static RecordReader createReaderFromFile(Reader file,
+                                                  Configuration conf,
+                                                  long offset, long length,
+                                                  boolean isOriginal
                                                   ) throws IOException {
     if (AcidUtils.isFullAcidScan(conf)) {
       raiseAcidTablesMustBeReadWithAcidReaderException(conf);
@@ -339,7 +364,6 @@ public class OrcInputFormat implements 
InputFormat<NullWritable, OrcStruct>,
 
     Reader.Options options = new Reader.Options(conf).range(offset, length);
     options.schema(schema);
-    boolean isOriginal = isOriginal(file);
     if (schema == null) {
       schema = file.getSchema();
     }
@@ -1979,7 +2003,7 @@ public class OrcInputFormat implements 
InputFormat<NullWritable, OrcStruct>,
         return new OrcRecordReader(OrcFile.createReader(
             ((FileSplit) inputSplit).getPath(),
             readerOptions),
-            conf, (FileSplit) inputSplit);
+            conf, inputSplit);
       }
     }
 
diff --git 
a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcNewInputFormat.java 
b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcNewInputFormat.java
index 645f00602a9..acf600266d3 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcNewInputFormat.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcNewInputFormat.java
@@ -54,7 +54,7 @@ public class OrcNewInputFormat extends 
InputFormat<NullWritable, OrcStruct>{
     return new OrcRecordReader(OrcFile.createReader(path,
                                                    
OrcFile.readerOptions(conf)),
         ShimLoader.getHadoopShims().getConfiguration(context),
-        fileSplit.getStart(), fileSplit.getLength());
+        fileSplit.getStart(), fileSplit.getLength(), inputSplit);
   }
 
   private static class OrcRecordReader
@@ -65,11 +65,15 @@ public class OrcNewInputFormat extends 
InputFormat<NullWritable, OrcStruct>{
     private float progress = 0.0f;
 
     OrcRecordReader(Reader file, Configuration conf,
-                    long offset, long length) throws IOException {
+                    long offset, long length, InputSplit inputSplit) throws 
IOException {
       numColumns = file.getSchema().getChildren().size();
       value = new OrcStruct(numColumns);
-      this.reader = OrcInputFormat.createReaderFromFile(file, conf, offset,
-          length);
+      if (inputSplit instanceof OrcNewSplit) {
+        this.reader = OrcInputFormat.createReaderFromFile(file, conf, offset,
+                length, ((OrcNewSplit)inputSplit).isOriginal());
+      } else {
+        this.reader = OrcInputFormat.createReaderFromFile(file, conf, offset, 
length);
+      }
     }
 
     @Override

[hive] branch master updated: HIVE-26394 : Query based compaction fails for table with more than 6 columns (Mahesh Kumar Behera, reviewed by Denys Kuzmenko)

Reply via email to