This is an automated email from the ASF dual-hosted git repository. hashutosh pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/hive.git
The following commit(s) were added to refs/heads/master by this push: new d7ee83d HIVE-23733 : genIncludedColNames functionality for ORCIputFormat ( Panos G via Ashutosh Chauhan) Adding getOriginalColumnNames as part of LLAP Includes interface d7ee83d is described below commit d7ee83d0e23d74f0258ab3678bced016d4043db3 Author: Panos Garefalakis <pga...@cloudera.com> AuthorDate: Sat Jun 20 21:59:16 2020 +0100 HIVE-23733 : genIncludedColNames functionality for ORCIputFormat ( Panos G via Ashutosh Chauhan) Adding getOriginalColumnNames as part of LLAP Includes interface Signed-off-by: Ashutosh Chauhan <ashut...@cloudera.com> --- .../hive/llap/io/api/impl/LlapRecordReader.java | 6 ++++++ .../hive/llap/io/decode/ColumnVectorProducer.java | 1 + .../hadoop/hive/ql/io/orc/OrcInputFormat.java | 25 ++++++++++++++++++++++ 3 files changed, 32 insertions(+) diff --git a/llap-server/src/java/org/apache/hadoop/hive/llap/io/api/impl/LlapRecordReader.java b/llap-server/src/java/org/apache/hadoop/hive/llap/io/api/impl/LlapRecordReader.java index a257a06..55a142e 100644 --- a/llap-server/src/java/org/apache/hadoop/hive/llap/io/api/impl/LlapRecordReader.java +++ b/llap-server/src/java/org/apache/hadoop/hive/llap/io/api/impl/LlapRecordReader.java @@ -742,6 +742,12 @@ class LlapRecordReader implements RecordReader<NullWritable, VectorizedRowBatch> } @Override + public String[] getOriginalColumnNames(TypeDescription fileSchema) { + return OrcInputFormat.genIncludedColNames( + fileSchema, filePhysicalColumnIds, acidStructColumnId); + } + + @Override public String getQueryId() { return HiveConf.getVar(jobConf, HiveConf.ConfVars.HIVEQUERYID); } diff --git a/llap-server/src/java/org/apache/hadoop/hive/llap/io/decode/ColumnVectorProducer.java b/llap-server/src/java/org/apache/hadoop/hive/llap/io/decode/ColumnVectorProducer.java index e37379b..2a3d7fd 100644 --- a/llap-server/src/java/org/apache/hadoop/hive/llap/io/decode/ColumnVectorProducer.java +++ b/llap-server/src/java/org/apache/hadoop/hive/llap/io/decode/ColumnVectorProducer.java @@ -49,6 +49,7 @@ public interface ColumnVectorProducer { List<Integer> getPhysicalColumnIds(); List<Integer> getReaderLogicalColumnIds(); TypeDescription[] getBatchReaderTypes(TypeDescription fileSchema); + String[] getOriginalColumnNames(TypeDescription fileSchema); String getQueryId(); boolean isProbeDecodeEnabled(); byte getProbeMjSmallTablePos(); diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java index de962cd..1380185 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java @@ -421,6 +421,31 @@ public class OrcInputFormat implements InputFormat<NullWritable, OrcStruct>, return result; } + // Mostly dup of genIncludedColumns + public static String[] genIncludedColNames(TypeDescription fileSchema, + List<Integer> included, Integer recursiveStruct) { + String[] originalColNames = new String[included.size()]; + List<TypeDescription> children = fileSchema.getChildren(); + for (int columnNumber = 0; columnNumber < children.size(); ++columnNumber) { + int indexInBatchCols = included.indexOf(columnNumber); + if (indexInBatchCols >= 0) { + // child Index and FiledIdx should be the same + originalColNames[indexInBatchCols] = fileSchema.getFieldNames().get(columnNumber); + } else if (recursiveStruct != null && recursiveStruct == columnNumber) { + // This assumes all struct cols immediately follow struct + List<TypeDescription> nestedChildren = children.get(columnNumber).getChildren(); + for (int columnNumberDelta = 0; columnNumberDelta < nestedChildren.size(); ++columnNumberDelta) { + int columnNumberNested = columnNumber + 1 + columnNumberDelta; + int nestedIxInBatchCols = included.indexOf(columnNumberNested); + if (nestedIxInBatchCols >= 0) { + originalColNames[nestedIxInBatchCols] = children.get(columnNumber).getFieldNames().get(columnNumberDelta); + } + } + } + } + return originalColNames; + } + private static void addColumnToIncludes(TypeDescription child, boolean[] result) { for(int col = child.getId(); col <= child.getMaximumId(); ++col) {