This is an automated email from the ASF dual-hosted git repository. hashutosh pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/hive.git
The following commit(s) were added to refs/heads/master by this push: new 8c88676 HIVE-23393 : LLapInputFormat reader policy for Random IO formats (Panos G via Ashutosh Chauhan) 8c88676 is described below commit 8c88676a91115be56eee1a2fca5c9c2c3ee0402d Author: Panagiotis Garefalakis <panga...@gmail.com> AuthorDate: Sat May 9 10:17:42 2020 -0700 HIVE-23393 : LLapInputFormat reader policy for Random IO formats (Panos G via Ashutosh Chauhan) Signed-off-by: Ashutosh Chauhan <hashut...@apache.org> --- .../apache/hadoop/hive/llap/io/api/impl/LlapInputFormat.java | 11 +++++++++++ ql/src/java/org/apache/hadoop/hive/ql/io/HiveInputFormat.java | 4 ++-- 2 files changed, 13 insertions(+), 2 deletions(-) diff --git a/llap-server/src/java/org/apache/hadoop/hive/llap/io/api/impl/LlapInputFormat.java b/llap-server/src/java/org/apache/hadoop/hive/llap/io/api/impl/LlapInputFormat.java index ac1aca8..e184655 100644 --- a/llap-server/src/java/org/apache/hadoop/hive/llap/io/api/impl/LlapInputFormat.java +++ b/llap-server/src/java/org/apache/hadoop/hive/llap/io/api/impl/LlapInputFormat.java @@ -19,6 +19,9 @@ package org.apache.hadoop.hive.llap.io.api.impl; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.s3a.S3AFileSystem; +import org.apache.hadoop.fs.s3a.S3AInputPolicy; import org.apache.hadoop.hive.ql.exec.vector.VectorizedSupport; import org.apache.hadoop.hive.ql.io.BatchToRowInputFormat; import org.apache.hadoop.hive.conf.HiveConf; @@ -62,6 +65,9 @@ import org.apache.hadoop.mapred.RecordReader; import org.apache.hadoop.mapred.Reporter; import org.apache.hive.common.util.HiveStringUtils; +import static org.apache.hadoop.hive.common.FileUtils.isS3a; +import static org.apache.hadoop.hive.ql.io.HiveInputFormat.isRandomAccessInputFormat; + public class LlapInputFormat implements InputFormat<NullWritable, VectorizedRowBatch>, VectorizedInputFormatInterface, SelfDescribingInputFormatInterface, AvoidSplitCombination { @@ -100,6 +106,11 @@ public class LlapInputFormat implements InputFormat<NullWritable, VectorizedRowB FileSplit fileSplit = (FileSplit) split; reporter.setStatus(fileSplit.toString()); + FileSystem splitFileSystem = fileSplit.getPath().getFileSystem(job); + if (isS3a(splitFileSystem) && isRandomAccessInputFormat(sourceInputFormat)) { + LlapIoImpl.LOG.debug("Changing S3A input policy to RANDOM"); + ((S3AFileSystem) splitFileSystem).setInputPolicy(S3AInputPolicy.Random); + } try { // At this entry point, we are going to assume that these are logical table columns. // Perhaps we should go thru the code and clean this up to be more explicit; for now, we diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/HiveInputFormat.java b/ql/src/java/org/apache/hadoop/hive/ql/io/HiveInputFormat.java index 218d665..62ef0c6 100755 --- a/ql/src/java/org/apache/hadoop/hive/ql/io/HiveInputFormat.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/io/HiveInputFormat.java @@ -386,7 +386,7 @@ public class HiveInputFormat<K extends WritableComparable, V extends Writable> * @param inputFormat * @return */ - private static boolean isRandomAccessInputFormat(InputFormat inputFormat) { + public static boolean isRandomAccessInputFormat(InputFormat inputFormat) { if (inputFormat instanceof OrcInputFormat || inputFormat instanceof VectorizedParquetInputFormat) { return true; @@ -449,7 +449,7 @@ public class HiveInputFormat<K extends WritableComparable, V extends Writable> FileSystem splitFileSystem = splitPath.getFileSystem(job); if (isS3a(splitFileSystem) && isRandomAccessInputFormat(inputFormat)) { - LOG.debug("Changing S3A input policy to RANDOM for split {}", splitPath); + LOG.debug("Changing S3A input policy to RANDOM"); ((S3AFileSystem) splitFileSystem).setInputPolicy(S3AInputPolicy.Random); }