[4/6] hive git commit: HIVE-19307: Support ArrowOutputStream in LlapOutputFormatService (Eric Wohlstadter, reviewed by Jason Dere)
HIVE-19307: Support ArrowOutputStream in LlapOutputFormatService (Eric Wohlstadter, reviewed by Jason Dere) Project: http://git-wip-us.apache.org/repos/asf/hive/repo Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/f7f90a04 Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/f7f90a04 Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/f7f90a04 Branch: refs/heads/branch-3 Commit: f7f90a044499739a2bd6a3ea543f70cb59e3f870 Parents: 2726f30 Author: Jason Dere Authored: Tue May 15 14:25:40 2018 -0700 Committer: Vineet Garg Committed: Tue May 29 13:58:16 2018 -0700 -- .../org/apache/hadoop/hive/conf/HiveConf.java | 3 + .../hadoop/hive/llap/LlapArrowRecordWriter.java | 70 +++ .../hive/llap/LlapOutputFormatService.java | 11 +- .../hive/llap/WritableByteChannelAdapter.java | 125 +++ .../hadoop/hive/ql/exec/FileSinkOperator.java | 26 ++-- .../hadoop/hive/ql/parse/SemanticAnalyzer.java | 28 +++-- .../hadoop/hive/ql/plan/FileSinkDesc.java | 12 +- 7 files changed, 251 insertions(+), 24 deletions(-) -- http://git-wip-us.apache.org/repos/asf/hive/blob/f7f90a04/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java -- diff --git a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java index 128e892..8780374 100644 --- a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java +++ b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java @@ -397,6 +397,7 @@ public class HiveConf extends Configuration { llapDaemonVarsSetLocal.add(ConfVars.LLAP_VALIDATE_ACLS.varname); llapDaemonVarsSetLocal.add(ConfVars.LLAP_DAEMON_LOGGER.varname); llapDaemonVarsSetLocal.add(ConfVars.LLAP_DAEMON_AM_USE_FQDN.varname); +llapDaemonVarsSetLocal.add(ConfVars.LLAP_OUTPUT_FORMAT_ARROW.varname); } /** @@ -4160,6 +4161,8 @@ public class HiveConf extends Configuration { Constants.LLAP_LOGGER_NAME_RFA, Constants.LLAP_LOGGER_NAME_CONSOLE), "logger used for llap-daemons."), +LLAP_OUTPUT_FORMAT_ARROW("hive.llap.output.format.arrow", false, + "Whether LLapOutputFormatService should output arrow batches"), HIVE_TRIGGER_VALIDATION_INTERVAL("hive.trigger.validation.interval", "500ms", new TimeValidator(TimeUnit.MILLISECONDS), http://git-wip-us.apache.org/repos/asf/hive/blob/f7f90a04/ql/src/java/org/apache/hadoop/hive/llap/LlapArrowRecordWriter.java -- diff --git a/ql/src/java/org/apache/hadoop/hive/llap/LlapArrowRecordWriter.java b/ql/src/java/org/apache/hadoop/hive/llap/LlapArrowRecordWriter.java new file mode 100644 index 000..1b3a3eb --- /dev/null +++ b/ql/src/java/org/apache/hadoop/hive/llap/LlapArrowRecordWriter.java @@ -0,0 +1,70 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.llap; + +import java.io.IOException; + +import org.apache.arrow.vector.VectorSchemaRoot; +import org.apache.arrow.vector.ipc.ArrowStreamWriter; +import org.apache.hadoop.hive.ql.io.arrow.ArrowWrapperWritable; +import org.apache.hadoop.io.Writable; +import java.nio.channels.WritableByteChannel; +import org.apache.hadoop.mapred.RecordWriter; +import org.apache.hadoop.mapred.Reporter; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * Writes Arrow batches to an {@link org.apache.arrow.vector.ipc.ArrowStreamWriter}. + * The byte stream will be formatted according to the Arrow Streaming format. + * Because ArrowStreamWriter is bound to a {@link org.apache.arrow.vector.VectorSchemaRoot} + * when it is created, + * calls to the {@link #write(Writable, Writable)} method only serve as a signal that + * a new batch has been loaded to the associated VectorSchemaRoot. + * Payload data for writing is indirectly made available by reference: + * ArrowStreamWriter -> VectorSchemaRoot -> List + * i.e. both they key and value are ignored once a reference to the
hive git commit: HIVE-19307: Support ArrowOutputStream in LlapOutputFormatService (Eric Wohlstadter, reviewed by Jason Dere)
Repository: hive Updated Branches: refs/heads/master 4b418a4ae -> d04db94f6 HIVE-19307: Support ArrowOutputStream in LlapOutputFormatService (Eric Wohlstadter, reviewed by Jason Dere) Project: http://git-wip-us.apache.org/repos/asf/hive/repo Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/d04db94f Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/d04db94f Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/d04db94f Branch: refs/heads/master Commit: d04db94f6c21050edf5d782efcc23fe48192d624 Parents: 4b418a4 Author: Jason DereAuthored: Tue May 15 14:25:40 2018 -0700 Committer: Jason Dere Committed: Tue May 15 14:27:30 2018 -0700 -- .../org/apache/hadoop/hive/conf/HiveConf.java | 3 + .../hadoop/hive/llap/LlapArrowRecordWriter.java | 70 +++ .../hive/llap/LlapOutputFormatService.java | 11 +- .../hive/llap/WritableByteChannelAdapter.java | 125 +++ .../hadoop/hive/ql/exec/FileSinkOperator.java | 26 ++-- .../hadoop/hive/ql/parse/SemanticAnalyzer.java | 28 +++-- .../hadoop/hive/ql/plan/FileSinkDesc.java | 12 +- 7 files changed, 251 insertions(+), 24 deletions(-) -- http://git-wip-us.apache.org/repos/asf/hive/blob/d04db94f/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java -- diff --git a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java index 0a997a1..b12a7a4 100644 --- a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java +++ b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java @@ -397,6 +397,7 @@ public class HiveConf extends Configuration { llapDaemonVarsSetLocal.add(ConfVars.LLAP_VALIDATE_ACLS.varname); llapDaemonVarsSetLocal.add(ConfVars.LLAP_DAEMON_LOGGER.varname); llapDaemonVarsSetLocal.add(ConfVars.LLAP_DAEMON_AM_USE_FQDN.varname); +llapDaemonVarsSetLocal.add(ConfVars.LLAP_OUTPUT_FORMAT_ARROW.varname); } /** @@ -4165,6 +4166,8 @@ public class HiveConf extends Configuration { Constants.LLAP_LOGGER_NAME_RFA, Constants.LLAP_LOGGER_NAME_CONSOLE), "logger used for llap-daemons."), +LLAP_OUTPUT_FORMAT_ARROW("hive.llap.output.format.arrow", false, + "Whether LLapOutputFormatService should output arrow batches"), HIVE_TRIGGER_VALIDATION_INTERVAL("hive.trigger.validation.interval", "500ms", new TimeValidator(TimeUnit.MILLISECONDS), http://git-wip-us.apache.org/repos/asf/hive/blob/d04db94f/ql/src/java/org/apache/hadoop/hive/llap/LlapArrowRecordWriter.java -- diff --git a/ql/src/java/org/apache/hadoop/hive/llap/LlapArrowRecordWriter.java b/ql/src/java/org/apache/hadoop/hive/llap/LlapArrowRecordWriter.java new file mode 100644 index 000..1b3a3eb --- /dev/null +++ b/ql/src/java/org/apache/hadoop/hive/llap/LlapArrowRecordWriter.java @@ -0,0 +1,70 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.llap; + +import java.io.IOException; + +import org.apache.arrow.vector.VectorSchemaRoot; +import org.apache.arrow.vector.ipc.ArrowStreamWriter; +import org.apache.hadoop.hive.ql.io.arrow.ArrowWrapperWritable; +import org.apache.hadoop.io.Writable; +import java.nio.channels.WritableByteChannel; +import org.apache.hadoop.mapred.RecordWriter; +import org.apache.hadoop.mapred.Reporter; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * Writes Arrow batches to an {@link org.apache.arrow.vector.ipc.ArrowStreamWriter}. + * The byte stream will be formatted according to the Arrow Streaming format. + * Because ArrowStreamWriter is bound to a {@link org.apache.arrow.vector.VectorSchemaRoot} + * when it is created, + * calls to the {@link #write(Writable, Writable)} method only serve as a signal that + * a new batch has been loaded to the associated VectorSchemaRoot. + * Payload data for writing is indirectly made available by reference: + *