[4/6] hive git commit: HIVE-19307: Support ArrowOutputStream in LlapOutputFormatService (Eric Wohlstadter, reviewed by Jason Dere)

2018-05-29 Thread vgarg
HIVE-19307: Support ArrowOutputStream in LlapOutputFormatService (Eric 
Wohlstadter, reviewed by Jason Dere)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/f7f90a04
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/f7f90a04
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/f7f90a04

Branch: refs/heads/branch-3
Commit: f7f90a044499739a2bd6a3ea543f70cb59e3f870
Parents: 2726f30
Author: Jason Dere 
Authored: Tue May 15 14:25:40 2018 -0700
Committer: Vineet Garg 
Committed: Tue May 29 13:58:16 2018 -0700

--
 .../org/apache/hadoop/hive/conf/HiveConf.java   |   3 +
 .../hadoop/hive/llap/LlapArrowRecordWriter.java |  70 +++
 .../hive/llap/LlapOutputFormatService.java  |  11 +-
 .../hive/llap/WritableByteChannelAdapter.java   | 125 +++
 .../hadoop/hive/ql/exec/FileSinkOperator.java   |  26 ++--
 .../hadoop/hive/ql/parse/SemanticAnalyzer.java  |  28 +++--
 .../hadoop/hive/ql/plan/FileSinkDesc.java   |  12 +-
 7 files changed, 251 insertions(+), 24 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/hive/blob/f7f90a04/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
--
diff --git a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java 
b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
index 128e892..8780374 100644
--- a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
+++ b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
@@ -397,6 +397,7 @@ public class HiveConf extends Configuration {
 llapDaemonVarsSetLocal.add(ConfVars.LLAP_VALIDATE_ACLS.varname);
 llapDaemonVarsSetLocal.add(ConfVars.LLAP_DAEMON_LOGGER.varname);
 llapDaemonVarsSetLocal.add(ConfVars.LLAP_DAEMON_AM_USE_FQDN.varname);
+llapDaemonVarsSetLocal.add(ConfVars.LLAP_OUTPUT_FORMAT_ARROW.varname);
   }
 
   /**
@@ -4160,6 +4161,8 @@ public class HiveConf extends Configuration {
 Constants.LLAP_LOGGER_NAME_RFA,
 Constants.LLAP_LOGGER_NAME_CONSOLE),
 "logger used for llap-daemons."),
+LLAP_OUTPUT_FORMAT_ARROW("hive.llap.output.format.arrow", false,
+  "Whether LLapOutputFormatService should output arrow batches"),
 
 HIVE_TRIGGER_VALIDATION_INTERVAL("hive.trigger.validation.interval", 
"500ms",
   new TimeValidator(TimeUnit.MILLISECONDS),

http://git-wip-us.apache.org/repos/asf/hive/blob/f7f90a04/ql/src/java/org/apache/hadoop/hive/llap/LlapArrowRecordWriter.java
--
diff --git a/ql/src/java/org/apache/hadoop/hive/llap/LlapArrowRecordWriter.java 
b/ql/src/java/org/apache/hadoop/hive/llap/LlapArrowRecordWriter.java
new file mode 100644
index 000..1b3a3eb
--- /dev/null
+++ b/ql/src/java/org/apache/hadoop/hive/llap/LlapArrowRecordWriter.java
@@ -0,0 +1,70 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.llap;
+
+import java.io.IOException;
+
+import org.apache.arrow.vector.VectorSchemaRoot;
+import org.apache.arrow.vector.ipc.ArrowStreamWriter;
+import org.apache.hadoop.hive.ql.io.arrow.ArrowWrapperWritable;
+import org.apache.hadoop.io.Writable;
+import java.nio.channels.WritableByteChannel;
+import org.apache.hadoop.mapred.RecordWriter;
+import org.apache.hadoop.mapred.Reporter;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+/**
+ * Writes Arrow batches to an {@link 
org.apache.arrow.vector.ipc.ArrowStreamWriter}.
+ * The byte stream will be formatted according to the Arrow Streaming format.
+ * Because ArrowStreamWriter is bound to a {@link 
org.apache.arrow.vector.VectorSchemaRoot}
+ * when it is created,
+ * calls to the {@link #write(Writable, Writable)} method only serve as a 
signal that
+ * a new batch has been loaded to the associated VectorSchemaRoot.
+ * Payload data for writing is indirectly made available by reference:
+ * ArrowStreamWriter -> VectorSchemaRoot -> List
+ * i.e. both they key and value are ignored once a reference to the 

hive git commit: HIVE-19307: Support ArrowOutputStream in LlapOutputFormatService (Eric Wohlstadter, reviewed by Jason Dere)

2018-05-15 Thread jdere
Repository: hive
Updated Branches:
  refs/heads/master 4b418a4ae -> d04db94f6


HIVE-19307: Support ArrowOutputStream in LlapOutputFormatService (Eric 
Wohlstadter, reviewed by Jason Dere)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/d04db94f
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/d04db94f
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/d04db94f

Branch: refs/heads/master
Commit: d04db94f6c21050edf5d782efcc23fe48192d624
Parents: 4b418a4
Author: Jason Dere 
Authored: Tue May 15 14:25:40 2018 -0700
Committer: Jason Dere 
Committed: Tue May 15 14:27:30 2018 -0700

--
 .../org/apache/hadoop/hive/conf/HiveConf.java   |   3 +
 .../hadoop/hive/llap/LlapArrowRecordWriter.java |  70 +++
 .../hive/llap/LlapOutputFormatService.java  |  11 +-
 .../hive/llap/WritableByteChannelAdapter.java   | 125 +++
 .../hadoop/hive/ql/exec/FileSinkOperator.java   |  26 ++--
 .../hadoop/hive/ql/parse/SemanticAnalyzer.java  |  28 +++--
 .../hadoop/hive/ql/plan/FileSinkDesc.java   |  12 +-
 7 files changed, 251 insertions(+), 24 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/hive/blob/d04db94f/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
--
diff --git a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java 
b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
index 0a997a1..b12a7a4 100644
--- a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
+++ b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
@@ -397,6 +397,7 @@ public class HiveConf extends Configuration {
 llapDaemonVarsSetLocal.add(ConfVars.LLAP_VALIDATE_ACLS.varname);
 llapDaemonVarsSetLocal.add(ConfVars.LLAP_DAEMON_LOGGER.varname);
 llapDaemonVarsSetLocal.add(ConfVars.LLAP_DAEMON_AM_USE_FQDN.varname);
+llapDaemonVarsSetLocal.add(ConfVars.LLAP_OUTPUT_FORMAT_ARROW.varname);
   }
 
   /**
@@ -4165,6 +4166,8 @@ public class HiveConf extends Configuration {
 Constants.LLAP_LOGGER_NAME_RFA,
 Constants.LLAP_LOGGER_NAME_CONSOLE),
 "logger used for llap-daemons."),
+LLAP_OUTPUT_FORMAT_ARROW("hive.llap.output.format.arrow", false,
+  "Whether LLapOutputFormatService should output arrow batches"),
 
 HIVE_TRIGGER_VALIDATION_INTERVAL("hive.trigger.validation.interval", 
"500ms",
   new TimeValidator(TimeUnit.MILLISECONDS),

http://git-wip-us.apache.org/repos/asf/hive/blob/d04db94f/ql/src/java/org/apache/hadoop/hive/llap/LlapArrowRecordWriter.java
--
diff --git a/ql/src/java/org/apache/hadoop/hive/llap/LlapArrowRecordWriter.java 
b/ql/src/java/org/apache/hadoop/hive/llap/LlapArrowRecordWriter.java
new file mode 100644
index 000..1b3a3eb
--- /dev/null
+++ b/ql/src/java/org/apache/hadoop/hive/llap/LlapArrowRecordWriter.java
@@ -0,0 +1,70 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.llap;
+
+import java.io.IOException;
+
+import org.apache.arrow.vector.VectorSchemaRoot;
+import org.apache.arrow.vector.ipc.ArrowStreamWriter;
+import org.apache.hadoop.hive.ql.io.arrow.ArrowWrapperWritable;
+import org.apache.hadoop.io.Writable;
+import java.nio.channels.WritableByteChannel;
+import org.apache.hadoop.mapred.RecordWriter;
+import org.apache.hadoop.mapred.Reporter;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+/**
+ * Writes Arrow batches to an {@link 
org.apache.arrow.vector.ipc.ArrowStreamWriter}.
+ * The byte stream will be formatted according to the Arrow Streaming format.
+ * Because ArrowStreamWriter is bound to a {@link 
org.apache.arrow.vector.VectorSchemaRoot}
+ * when it is created,
+ * calls to the {@link #write(Writable, Writable)} method only serve as a 
signal that
+ * a new batch has been loaded to the associated VectorSchemaRoot.
+ * Payload data for writing is indirectly made available by reference:
+ *