[hive] branch master updated: HIVE-25521 - Fix concatenate file handling when files of different compressions are in same table/partition. (#2639)( Harish Jaiprakash, reviewed by Panagiotis Garefalaki

2021-10-08 Thread ychena
This is an automated email from the ASF dual-hosted git repository.

ychena pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hive.git


The following commit(s) were added to refs/heads/master by this push:
 new f0169cd  HIVE-25521 - Fix concatenate file handling when files of 
different compressions are in same table/partition. (#2639)( Harish Jaiprakash, 
reviewed by Panagiotis Garefalakis)
f0169cd is described below

commit f0169cdd472b08a77b7a72666899d904f6ce1e9a
Author: Harish Jaiprakash 
AuthorDate: Sat Oct 9 00:04:31 2021 +0530

HIVE-25521 - Fix concatenate file handling when files of different 
compressions are in same table/partition. (#2639)( Harish Jaiprakash, reviewed 
by Panagiotis Garefalakis)

* HIVE-25521 - Fix concatenate file handling when files of different 
compressions are in same table/partition.

* Split test cases into 2 part, and not creating the reader at all for 
ignored splits.
---
 .../ql/io/orc/OrcFileStripeMergeRecordReader.java  | 67 +++---
 .../io/orc/TestOrcFileStripeMergeRecordReader.java | 34 +--
 2 files changed, 64 insertions(+), 37 deletions(-)

diff --git 
a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcFileStripeMergeRecordReader.java
 
b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcFileStripeMergeRecordReader.java
index 2ebfd29..e677842 100644
--- 
a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcFileStripeMergeRecordReader.java
+++ 
b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcFileStripeMergeRecordReader.java
@@ -40,19 +40,23 @@ public class OrcFileStripeMergeRecordReader implements
   protected Iterator iter;
   protected List stripeStatistics;
   private int stripeIdx;
-  private long start;
-  private long end;
   private boolean skipFile;
 
   public OrcFileStripeMergeRecordReader(Configuration conf, FileSplit split) 
throws IOException {
 path = split.getPath();
-start = split.getStart();
-end = start + split.getLength();
-FileSystem fs = path.getFileSystem(conf);
-this.reader = OrcFile.createReader(path, 
OrcFile.readerOptions(conf).filesystem(fs));
-this.iter = reader.getStripes().iterator();
-this.stripeIdx = 0;
-this.stripeStatistics = ((ReaderImpl) 
reader).getOrcProtoStripeStatistics();
+long start = split.getStart();
+// if the combined split has only part of the file split, the entire file 
will be handled by the mapper that
+// owns the start of file split.
+skipFile = start > 0; // skip the file if start is not 0
+if (!skipFile) {
+  FileSystem fs = path.getFileSystem(conf);
+  this.reader = OrcFile.createReader(path, 
OrcFile.readerOptions(conf).filesystem(fs));
+  this.iter = reader.getStripes().iterator();
+  this.stripeIdx = 0;
+  this.stripeStatistics = ((ReaderImpl) 
reader).getOrcProtoStripeStatistics();
+} else {
+  reader = null;
+}
   }
 
   public Class getKeyClass() {
@@ -90,33 +94,27 @@ public class OrcFileStripeMergeRecordReader implements
   return true;
 }
 
-while (iter.hasNext()) {
+// file split starts with 0 and hence this mapper owns concatenate of all 
stripes in the file.
+if (iter.hasNext()) {
   StripeInformation si = iter.next();
-
-  // if stripe offset is outside the split boundary then ignore the current
-  // stripe as it will be handled by some other mapper.
-  if (si.getOffset() >= start && si.getOffset() < end) {
-valueWrapper.setStripeStatistics(stripeStatistics.get(stripeIdx++));
-valueWrapper.setStripeInformation(si);
-if (!iter.hasNext()) {
-  valueWrapper.setLastStripeInFile(true);
-  Map userMeta = new HashMap<>();
-  for(String key: reader.getMetadataKeys()) {
-userMeta.put(key, reader.getMetadataValue(key));
-  }
-  valueWrapper.setUserMetadata(userMeta);
+  valueWrapper.setStripeStatistics(stripeStatistics.get(stripeIdx));
+  valueWrapper.setStripeInformation(si);
+  if (!iter.hasNext()) {
+valueWrapper.setLastStripeInFile(true);
+Map userMeta = new HashMap<>();
+for(String key: reader.getMetadataKeys()) {
+  userMeta.put(key, reader.getMetadataValue(key));
 }
-keyWrapper.setInputPath(path);
-keyWrapper.setCompression(reader.getCompressionKind());
-keyWrapper.setCompressBufferSize(reader.getCompressionSize());
-keyWrapper.setFileVersion(reader.getFileVersion());
-keyWrapper.setWriterVersion(reader.getWriterVersion());
-keyWrapper.setRowIndexStride(reader.getRowIndexStride());
-keyWrapper.setFileSchema(reader.getSchema());
-  } else {
-stripeIdx++;
-continue;
+valueWrapper.setUserMetadata(userMeta);
   }
+  keyWrapper.setInputPath(path);
+  keyWrapper.setCompression(reader.getCompressionKind());
+  keyWrapper.setCompressBufferSize(reader.getCompressionSize());
+  keyWrapper

[hive] branch master updated: HIVE-25590: Able to create views referencing temporary tables and materialized views (Krisztian Kasa, reviewed by Zoltan Haindrich, Alessandro Solimando)

2021-10-08 Thread krisztiankasa
This is an automated email from the ASF dual-hosted git repository.

krisztiankasa pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hive.git


The following commit(s) were added to refs/heads/master by this push:
 new ac40207  HIVE-25590: Able to create views referencing temporary tables 
and materialized views (Krisztian Kasa, reviewed by Zoltan Haindrich, 
Alessandro Solimando)
ac40207 is described below

commit ac4020789149115c71bcee9b85577fff7dd01f46
Author: Krisztian Kasa 
AuthorDate: Fri Oct 8 18:18:34 2021 +0200

HIVE-25590: Able to create views referencing temporary tables and 
materialized views (Krisztian Kasa, reviewed by Zoltan Haindrich, Alessandro 
Solimando)
---
 .../view/create/AbstractCreateViewAnalyzer.java| 21 +++
 .../hadoop/hive/ql/parse/SemanticAnalyzer.java | 24 --
 .../queries/clientnegative/create_view_failure11.q |  5 +
 .../queries/clientnegative/create_view_failure12.q |  7 +++
 .../materialized_view_create_failure.q | 12 +++
 .../clientnegative/create_view_failure10.q.out |  2 +-
 ...failure10.q.out => create_view_failure11.q.out} |  2 +-
 ...failure10.q.out => create_view_failure12.q.out} | 10 -
 .../materialized_view_create_failure.q.out | 17 +++
 ...aterialized_view_no_transactional_rewrite.q.out |  2 +-
 10 files changed, 68 insertions(+), 34 deletions(-)

diff --git 
a/ql/src/java/org/apache/hadoop/hive/ql/ddl/view/create/AbstractCreateViewAnalyzer.java
 
b/ql/src/java/org/apache/hadoop/hive/ql/ddl/view/create/AbstractCreateViewAnalyzer.java
index 1f9f103..bd0f6bf 100644
--- 
a/ql/src/java/org/apache/hadoop/hive/ql/ddl/view/create/AbstractCreateViewAnalyzer.java
+++ 
b/ql/src/java/org/apache/hadoop/hive/ql/ddl/view/create/AbstractCreateViewAnalyzer.java
@@ -20,7 +20,6 @@ package org.apache.hadoop.hive.ql.ddl.view.create;
 
 import java.util.List;
 import java.util.Map;
-import java.util.Set;
 
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.hive.conf.HiveConf;
@@ -42,7 +41,7 @@ import org.apache.hadoop.hive.ql.plan.PlanUtils;
 /**
  * Abstract ancestor of analyzers that can create a view.
  */
-abstract class AbstractCreateViewAnalyzer extends BaseSemanticAnalyzer {
+public abstract class AbstractCreateViewAnalyzer extends BaseSemanticAnalyzer {
   AbstractCreateViewAnalyzer(QueryState queryState) throws SemanticException {
 super(queryState);
   }
@@ -85,26 +84,20 @@ abstract class AbstractCreateViewAnalyzer extends 
BaseSemanticAnalyzer {
 }
   }
 
-  protected void validateTablesUsed(SemanticAnalyzer analyzer) throws 
SemanticException {
+  public static void validateTablesUsed(SemanticAnalyzer analyzer) throws 
SemanticException {
 // Do not allow view to be defined on temp table or other materialized view
-Set tableAliases = analyzer.getQB().getTabAliases();
-for (String alias : tableAliases) {
-  if (SemanticAnalyzer.DUMMY_TABLE.equals(alias)) {
+for (TableScanOperator ts : analyzer.getTopOps().values()) {
+  Table table = ts.getConf().getTableMetadata();
+  if (SemanticAnalyzer.DUMMY_TABLE.equals(table.getTableName())) {
 continue;
   }
-  Table table = null;
-  try {
-table = 
analyzer.getTableObjectByName(analyzer.getQB().getTabNameForAlias(alias));
-  } catch (HiveException ex) {
-throw new SemanticException(ex);
-  }
 
   if (table.isTemporary()) {
-throw new SemanticException("View definition references temporary 
table " + alias);
+throw new SemanticException("View definition references temporary 
table " + table.getCompleteName());
   }
 
   if (table.isMaterializedView()) {
-throw new SemanticException("View definition references materialized 
view " + alias);
+throw new SemanticException("View definition references materialized 
view " + table.getCompleteName());
   }
 }
   }
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java 
b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
index b210b94..b2dc3c0 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
@@ -23,12 +23,12 @@ import static 
org.apache.hadoop.hive.conf.HiveConf.ConfVars.DYNAMICPARTITIONCONV
 import static 
org.apache.hadoop.hive.conf.HiveConf.ConfVars.HIVE_DEFAULT_STORAGE_HANDLER;
 import static org.apache.hadoop.hive.conf.HiveConf.ConfVars.HIVESTATSDBCLASS;
 import static 
org.apache.hadoop.hive.metastore.api.hive_metastoreConstants.TABLE_IS_CTAS;
+import static 
org.apache.hadoop.hive.ql.ddl.view.create.AbstractCreateViewAnalyzer.validateTablesUsed;
 import static 
org.apache.hadoop.hive.ql.optimizer.calcite.translator.ASTConverter.NON_FK_FILTERED;
 
 import java.io.FileNotFoundException;
 import java.io.IOException;
 import java.security.AccessControlException;
-impor

[hive] branch master updated (38771b1 -> 853bf18)

2021-10-08 Thread pgaref
This is an automated email from the ASF dual-hosted git repository.

pgaref pushed a change to branch master
in repository https://gitbox.apache.org/repos/asf/hive.git.


from 38771b1  HIVE-25604: Iceberg should implement the authorization 
storage handler (Marton Bod, reviewed by Peter Vary)
 add 853bf18  HIVE-25362: LLAP: ensure tasks with locality have a chance to 
adjust delay (#2513) (Panos Garefalakis, reviewed by Laszlo Bodor)

No new revisions were added by this update.

Summary of changes:
 .../llap/tezplugins/LlapTaskSchedulerService.java  | 19 +++--
 .../tezplugins/TestLlapTaskSchedulerService.java   | 33 ++
 2 files changed, 44 insertions(+), 8 deletions(-)


[hive] branch branch-3.1 updated: HIVE-24797: Disable validate default values when parsing Avro schemas (#2699)

2021-10-08 Thread pvary
This is an automated email from the ASF dual-hosted git repository.

pvary pushed a commit to branch branch-3.1
in repository https://gitbox.apache.org/repos/asf/hive.git


The following commit(s) were added to refs/heads/branch-3.1 by this push:
 new a7d27c6  HIVE-24797: Disable validate default values when parsing Avro 
schemas (#2699)
a7d27c6 is described below

commit a7d27c60a4ce685bbd48abbae9a1409faa243b96
Author: Jacob Ilias Komissar 
AuthorDate: Fri Oct 8 09:25:55 2021 -0400

HIVE-24797: Disable validate default values when parsing Avro schemas 
(#2699)
---
 .../org/apache/hadoop/hive/serde2/avro/AvroSerdeUtils.java | 14 --
 1 file changed, 8 insertions(+), 6 deletions(-)

diff --git 
a/serde/src/java/org/apache/hadoop/hive/serde2/avro/AvroSerdeUtils.java 
b/serde/src/java/org/apache/hadoop/hive/serde2/avro/AvroSerdeUtils.java
index 3b96d30..7addf4b 100644
--- a/serde/src/java/org/apache/hadoop/hive/serde2/avro/AvroSerdeUtils.java
+++ b/serde/src/java/org/apache/hadoop/hive/serde2/avro/AvroSerdeUtils.java
@@ -276,17 +276,20 @@ public class AvroSerdeUtils {
 return dec;
   }
 
+  private static Schema.Parser getSchemaParser() {
+// HIVE-24797: Disable validate default values when parsing Avro schemas.
+return new Schema.Parser().setValidateDefaults(false);
+  }
+
   public static Schema getSchemaFor(String str) {
-Schema.Parser parser = new Schema.Parser();
-Schema schema = parser.parse(str);
+Schema schema = getSchemaParser().parse(str);
 return schema;
   }
 
   public static Schema getSchemaFor(File file) {
-Schema.Parser parser = new Schema.Parser();
 Schema schema;
 try {
-  schema = parser.parse(file);
+  schema = getSchemaParser().parse(file);
 } catch (IOException e) {
   throw new RuntimeException("Failed to parse Avro schema from " + 
file.getName(), e);
 }
@@ -294,10 +297,9 @@ public class AvroSerdeUtils {
   }
 
   public static Schema getSchemaFor(InputStream stream) {
-Schema.Parser parser = new Schema.Parser();
 Schema schema;
 try {
-  schema = parser.parse(stream);
+  schema = getSchemaParser().parse(stream);
 } catch (IOException e) {
   throw new RuntimeException("Failed to parse Avro schema", e);
 }


[hive] branch master updated: HIVE-25604: Iceberg should implement the authorization storage handler (Marton Bod, reviewed by Peter Vary)

2021-10-08 Thread mbod
This is an automated email from the ASF dual-hosted git repository.

mbod pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hive.git


The following commit(s) were added to refs/heads/master by this push:
 new 38771b1  HIVE-25604: Iceberg should implement the authorization 
storage handler (Marton Bod, reviewed by Peter Vary)
38771b1 is described below

commit 38771b18af70ae1ac71f4a88f64a54d099625173
Author: Marton Bod 
AuthorDate: Fri Oct 8 12:21:43 2021 +0200

HIVE-25604: Iceberg should implement the authorization storage handler 
(Marton Bod, reviewed by Peter Vary)

Iceberg's StorageHandler should implement the 
HiveStorageAuthorizationHandler interface for authorization purposes. We'll use 
the iceberg table root location as the basis for permission handling.
---
 .../iceberg/mr/hive/HiveIcebergStorageHandler.java | 13 ++-
 .../mr/hive/TestHiveIcebergStorageHandler.java | 44 ++
 2 files changed, 56 insertions(+), 1 deletion(-)

diff --git 
a/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/HiveIcebergStorageHandler.java
 
b/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/HiveIcebergStorageHandler.java
index 7a357af..a4392b4 100644
--- 
a/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/HiveIcebergStorageHandler.java
+++ 
b/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/HiveIcebergStorageHandler.java
@@ -21,6 +21,8 @@ package org.apache.iceberg.mr.hive;
 
 import java.io.IOException;
 import java.io.Serializable;
+import java.net.URI;
+import java.net.URISyntaxException;
 import java.util.ArrayList;
 import java.util.Collection;
 import java.util.HashMap;
@@ -43,6 +45,7 @@ import org.apache.hadoop.hive.ql.hooks.WriteEntity;
 import org.apache.hadoop.hive.ql.io.sarg.ConvertAstToSearchArg;
 import org.apache.hadoop.hive.ql.io.sarg.SearchArgument;
 import org.apache.hadoop.hive.ql.metadata.HiveException;
+import org.apache.hadoop.hive.ql.metadata.HiveStorageAuthorizationHandler;
 import org.apache.hadoop.hive.ql.metadata.HiveStorageHandler;
 import org.apache.hadoop.hive.ql.metadata.HiveStoragePredicateHandler;
 import org.apache.hadoop.hive.ql.parse.PartitionTransformSpec;
@@ -84,9 +87,11 @@ import org.apache.iceberg.util.SerializationUtil;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
-public class HiveIcebergStorageHandler implements HiveStoragePredicateHandler, 
HiveStorageHandler {
+public class HiveIcebergStorageHandler implements HiveStoragePredicateHandler, 
HiveStorageHandler,
+HiveStorageAuthorizationHandler {
   private static final Logger LOG = 
LoggerFactory.getLogger(HiveIcebergStorageHandler.class);
 
+  private static final String ICEBERG_URI_PREFIX = "iceberg://";
   private static final Splitter TABLE_NAME_SPLITTER = Splitter.on("..");
   private static final String TABLE_NAME_SEPARATOR = "..";
 
@@ -360,6 +365,12 @@ public class HiveIcebergStorageHandler implements 
HiveStoragePredicateHandler, H
 return IcebergMetadataTables.isValidMetaTable(metaTableName);
   }
 
+  @Override
+  public URI getURIForAuth(Map tableProperties) throws 
URISyntaxException {
+String tableLocation = tableProperties.get(Catalogs.LOCATION);
+return new URI(ICEBERG_URI_PREFIX + tableLocation);
+  }
+
   private void setCommonJobConf(JobConf jobConf) {
 jobConf.set("tez.mrreader.config.update.properties", 
"hive.io.file.readcolumn.names,hive.io.file.readcolumn.ids");
   }
diff --git 
a/iceberg/iceberg-handler/src/test/java/org/apache/iceberg/mr/hive/TestHiveIcebergStorageHandler.java
 
b/iceberg/iceberg-handler/src/test/java/org/apache/iceberg/mr/hive/TestHiveIcebergStorageHandler.java
new file mode 100644
index 000..3d9c5ce
--- /dev/null
+++ 
b/iceberg/iceberg-handler/src/test/java/org/apache/iceberg/mr/hive/TestHiveIcebergStorageHandler.java
@@ -0,0 +1,44 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.iceberg.mr.hive;
+
+import java.net.URI;
+import java.net.URISyntaxException;
+import java.util.Map;
+import org.apache.iceberg.mr.Catalogs;
+import org.apache.iceberg.relocated.com.google.common.collect.ImmutableMap;
+import org.junit.Assert;
+