This is an automated email from the ASF dual-hosted git repository. mahesh pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/hive.git
The following commit(s) were added to refs/heads/master by this push: new 69e6a5a4151 HIVE-26394 : Query based compaction fails for table with more than 6 columns (Mahesh Kumar Behera, reviewed by Denys Kuzmenko) 69e6a5a4151 is described below commit 69e6a5a4151100849d2b03b6b14b1605c3abc3f1 Author: mahesh kumar behera <mah...@apache.org> AuthorDate: Mon Jul 18 17:52:29 2022 +0530 HIVE-26394 : Query based compaction fails for table with more than 6 columns (Mahesh Kumar Behera, reviewed by Denys Kuzmenko) --- .../ql/txn/compactor/TestCrudCompactorOnTez.java | 4 +-- .../hadoop/hive/ql/io/orc/OrcInputFormat.java | 32 +++++++++++++++++++--- .../hadoop/hive/ql/io/orc/OrcNewInputFormat.java | 12 +++++--- 3 files changed, 38 insertions(+), 10 deletions(-) diff --git a/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/txn/compactor/TestCrudCompactorOnTez.java b/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/txn/compactor/TestCrudCompactorOnTez.java index cffb58bc41c..eb9f4c4e2a8 100644 --- a/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/txn/compactor/TestCrudCompactorOnTez.java +++ b/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/txn/compactor/TestCrudCompactorOnTez.java @@ -1484,8 +1484,8 @@ public class TestCrudCompactorOnTez extends CompactorOnTezTest { String tableName = "testMinorCompaction"; executeStatementOnDriver("drop table if exists " + tableName, driver); executeStatementOnDriver( - "CREATE TABLE " + tableName + "(a INT, b STRING) " + " STORED AS ORC TBLPROPERTIES ('transactional'='true')", - driver); + "CREATE TABLE " + tableName + "(a INT, b STRING, c int, d int, e int, f int, j int, i int) " + + " STORED AS ORC TBLPROPERTIES ('transactional'='true')", driver); CompactorTestUtil.runStreamingAPI(conf, dbName, tableName, Lists .newArrayList(new CompactorTestUtil.StreamingConnectionOption(false, false), new CompactorTestUtil.StreamingConnectionOption(true, false), diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java index 0f1333b9a68..4b70ff5c5b7 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java @@ -251,12 +251,29 @@ public class OrcInputFormat implements InputFormat<NullWritable, OrcStruct>, OrcRecordReader(Reader file, Configuration conf, - FileSplit split) throws IOException { + InputSplit inputSplit) throws IOException { this.file = file; numColumns = file.getSchema().getChildren().size(); + FileSplit split = (FileSplit)inputSplit; this.offset = split.getStart(); this.length = split.getLength(); - this.reader = createReaderFromFile(file, conf, offset, length); + + // In case of query based compaction, the ACID table location is used as the location of the external table. + // The assumption is that the table is treated as a external table. But as per file, the table is ACID and thus + // the file schema can not be used to judge if the table is original or not. It has to be as per the file split. + + // CREATE temporary external table delete_delta_default_tmp_compactor_testminorcompaction_1657797233724_result( + // `operation` int, `originalTransaction` bigint, `bucket` int, `rowId` bigint, `currentTransaction` bigint, + // `row` struct<`a` :int, `b` :string, `c` :int, `d` :int, `e` :int, `f` :int, `j` :int, `i` :int>) + // clustered by (`bucket`) sorted by (`originalTransaction`, `bucket`, `rowId`) into 1 buckets stored as + // orc LOCATION 'file:/warehouse/testminorcompaction/delete_delta_0000001_0000006_v0000009' + // TBLPROPERTIES ('compactiontable'='true', 'bucketing_version'='2', 'transactional'='false') + if (inputSplit instanceof OrcSplit) { + this.reader = createReaderFromFile(file, conf, offset, length, ((OrcSplit) inputSplit).isOriginal()); + } else { + this.reader = createReaderFromFile(file, conf, offset, length); + } + this.stats = new SerDeStats(); } @@ -327,6 +344,14 @@ public class OrcInputFormat implements InputFormat<NullWritable, OrcStruct>, public static RecordReader createReaderFromFile(Reader file, Configuration conf, long offset, long length + ) throws IOException { + return createReaderFromFile(file, conf, offset, length, isOriginal(file)); + } + + public static RecordReader createReaderFromFile(Reader file, + Configuration conf, + long offset, long length, + boolean isOriginal ) throws IOException { if (AcidUtils.isFullAcidScan(conf)) { raiseAcidTablesMustBeReadWithAcidReaderException(conf); @@ -339,7 +364,6 @@ public class OrcInputFormat implements InputFormat<NullWritable, OrcStruct>, Reader.Options options = new Reader.Options(conf).range(offset, length); options.schema(schema); - boolean isOriginal = isOriginal(file); if (schema == null) { schema = file.getSchema(); } @@ -1979,7 +2003,7 @@ public class OrcInputFormat implements InputFormat<NullWritable, OrcStruct>, return new OrcRecordReader(OrcFile.createReader( ((FileSplit) inputSplit).getPath(), readerOptions), - conf, (FileSplit) inputSplit); + conf, inputSplit); } } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcNewInputFormat.java b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcNewInputFormat.java index 645f00602a9..acf600266d3 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcNewInputFormat.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcNewInputFormat.java @@ -54,7 +54,7 @@ public class OrcNewInputFormat extends InputFormat<NullWritable, OrcStruct>{ return new OrcRecordReader(OrcFile.createReader(path, OrcFile.readerOptions(conf)), ShimLoader.getHadoopShims().getConfiguration(context), - fileSplit.getStart(), fileSplit.getLength()); + fileSplit.getStart(), fileSplit.getLength(), inputSplit); } private static class OrcRecordReader @@ -65,11 +65,15 @@ public class OrcNewInputFormat extends InputFormat<NullWritable, OrcStruct>{ private float progress = 0.0f; OrcRecordReader(Reader file, Configuration conf, - long offset, long length) throws IOException { + long offset, long length, InputSplit inputSplit) throws IOException { numColumns = file.getSchema().getChildren().size(); value = new OrcStruct(numColumns); - this.reader = OrcInputFormat.createReaderFromFile(file, conf, offset, - length); + if (inputSplit instanceof OrcNewSplit) { + this.reader = OrcInputFormat.createReaderFromFile(file, conf, offset, + length, ((OrcNewSplit)inputSplit).isOriginal()); + } else { + this.reader = OrcInputFormat.createReaderFromFile(file, conf, offset, length); + } } @Override