[hive] branch master updated: HIVE-23733 : genIncludedColNames functionality for ORCIputFormat ( Panos G via Ashutosh Chauhan) Adding getOriginalColumnNames as part of LLAP Includes interface

hashutosh Tue, 21 Jul 2020 21:57:14 -0700

This is an automated email from the ASF dual-hosted git repository.

hashutosh pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hive.git



The following commit(s) were added to refs/heads/master by this push:
     new d7ee83d  HIVE-23733 : genIncludedColNames functionality for 
ORCIputFormat ( Panos G via Ashutosh Chauhan) Adding getOriginalColumnNames as 
part of LLAP Includes interface
d7ee83d is described below

commit d7ee83d0e23d74f0258ab3678bced016d4043db3
Author: Panos Garefalakis <pga...@cloudera.com>
AuthorDate: Sat Jun 20 21:59:16 2020 +0100

    HIVE-23733 : genIncludedColNames functionality for ORCIputFormat ( Panos G 
via Ashutosh Chauhan)
    Adding getOriginalColumnNames as part of LLAP Includes interface
    
    Signed-off-by: Ashutosh Chauhan <ashut...@cloudera.com>
---
 .../hive/llap/io/api/impl/LlapRecordReader.java    |  6 ++++++
 .../hive/llap/io/decode/ColumnVectorProducer.java  |  1 +
 .../hadoop/hive/ql/io/orc/OrcInputFormat.java      | 25 ++++++++++++++++++++++
 3 files changed, 32 insertions(+)

diff --git 
a/llap-server/src/java/org/apache/hadoop/hive/llap/io/api/impl/LlapRecordReader.java
 
b/llap-server/src/java/org/apache/hadoop/hive/llap/io/api/impl/LlapRecordReader.java
index a257a06..55a142e 100644
--- 
a/llap-server/src/java/org/apache/hadoop/hive/llap/io/api/impl/LlapRecordReader.java
+++ 
b/llap-server/src/java/org/apache/hadoop/hive/llap/io/api/impl/LlapRecordReader.java
@@ -742,6 +742,12 @@ class LlapRecordReader implements 
RecordReader<NullWritable, VectorizedRowBatch>
     }
 
     @Override
+    public String[] getOriginalColumnNames(TypeDescription fileSchema) {
+      return OrcInputFormat.genIncludedColNames(
+              fileSchema, filePhysicalColumnIds, acidStructColumnId);
+    }
+
+    @Override
     public String getQueryId() {
       return HiveConf.getVar(jobConf, HiveConf.ConfVars.HIVEQUERYID);
     }
diff --git 
a/llap-server/src/java/org/apache/hadoop/hive/llap/io/decode/ColumnVectorProducer.java
 
b/llap-server/src/java/org/apache/hadoop/hive/llap/io/decode/ColumnVectorProducer.java
index e37379b..2a3d7fd 100644
--- 
a/llap-server/src/java/org/apache/hadoop/hive/llap/io/decode/ColumnVectorProducer.java
+++ 
b/llap-server/src/java/org/apache/hadoop/hive/llap/io/decode/ColumnVectorProducer.java
@@ -49,6 +49,7 @@ public interface ColumnVectorProducer {
     List<Integer> getPhysicalColumnIds();
     List<Integer> getReaderLogicalColumnIds();
     TypeDescription[] getBatchReaderTypes(TypeDescription fileSchema);
+    String[] getOriginalColumnNames(TypeDescription fileSchema);
     String getQueryId();
     boolean isProbeDecodeEnabled();
     byte getProbeMjSmallTablePos();
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java 
b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java
index de962cd..1380185 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java
@@ -421,6 +421,31 @@ public class OrcInputFormat implements 
InputFormat<NullWritable, OrcStruct>,
     return result;
   }
 
+  // Mostly dup of genIncludedColumns
+  public static String[] genIncludedColNames(TypeDescription fileSchema,
+         List<Integer> included, Integer recursiveStruct) {
+    String[] originalColNames = new String[included.size()];
+    List<TypeDescription> children = fileSchema.getChildren();
+    for (int columnNumber = 0; columnNumber < children.size(); ++columnNumber) 
{
+      int indexInBatchCols = included.indexOf(columnNumber);
+      if (indexInBatchCols >= 0) {
+        // child Index and FiledIdx should be the same
+        originalColNames[indexInBatchCols] = 
fileSchema.getFieldNames().get(columnNumber);
+      } else if (recursiveStruct != null && recursiveStruct == columnNumber) {
+        // This assumes all struct cols immediately follow struct
+        List<TypeDescription> nestedChildren = 
children.get(columnNumber).getChildren();
+        for (int columnNumberDelta = 0; columnNumberDelta < 
nestedChildren.size(); ++columnNumberDelta) {
+          int columnNumberNested = columnNumber + 1 + columnNumberDelta;
+          int nestedIxInBatchCols = included.indexOf(columnNumberNested);
+          if (nestedIxInBatchCols >= 0) {
+            originalColNames[nestedIxInBatchCols] = 
children.get(columnNumber).getFieldNames().get(columnNumberDelta);
+          }
+        }
+      }
+    }
+    return originalColNames;
+  }
+
 
   private static void addColumnToIncludes(TypeDescription child, boolean[] 
result) {
     for(int col = child.getId(); col <= child.getMaximumId(); ++col) {

[hive] branch master updated: HIVE-23733 : genIncludedColNames functionality for ORCIputFormat ( Panos G via Ashutosh Chauhan) Adding getOriginalColumnNames as part of LLAP Includes interface

Reply via email to