[1/2] hive git commit: HIVE-18386 : Create dummy materialized views registry and make it configurable (Jesus Camacho Rodriguez via Ashutosh Chauhan)

2018-01-17 Thread hashutosh
Repository: hive
Updated Branches:
  refs/heads/master a45becb16 -> 80e6f7b0f


http://git-wip-us.apache.org/repos/asf/hive/blob/80e6f7b0/ql/src/test/results/clientpositive/llap/materialized_view_create_rewrite_rebuild_dummy.q.out
--
diff --git 
a/ql/src/test/results/clientpositive/llap/materialized_view_create_rewrite_rebuild_dummy.q.out
 
b/ql/src/test/results/clientpositive/llap/materialized_view_create_rewrite_rebuild_dummy.q.out
new file mode 100644
index 000..20b138c
--- /dev/null
+++ 
b/ql/src/test/results/clientpositive/llap/materialized_view_create_rewrite_rebuild_dummy.q.out
@@ -0,0 +1,1027 @@
+PREHOOK: query: create table cmv_basetable (a int, b varchar(256), c 
decimal(10,2), d int) stored as orc TBLPROPERTIES ('transactional'='true')
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@cmv_basetable
+POSTHOOK: query: create table cmv_basetable (a int, b varchar(256), c 
decimal(10,2), d int) stored as orc TBLPROPERTIES ('transactional'='true')
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@cmv_basetable
+PREHOOK: query: insert into cmv_basetable values
+ (1, 'alfred', 10.30, 2),
+ (2, 'bob', 3.14, 3),
+ (2, 'bonnie', 172342.2, 3),
+ (3, 'calvin', 978.76, 3),
+ (3, 'charlie', 9.8, 1)
+PREHOOK: type: QUERY
+PREHOOK: Input: _dummy_database@_dummy_table
+PREHOOK: Output: default@cmv_basetable
+POSTHOOK: query: insert into cmv_basetable values
+ (1, 'alfred', 10.30, 2),
+ (2, 'bob', 3.14, 3),
+ (2, 'bonnie', 172342.2, 3),
+ (3, 'calvin', 978.76, 3),
+ (3, 'charlie', 9.8, 1)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: _dummy_database@_dummy_table
+POSTHOOK: Output: default@cmv_basetable
+POSTHOOK: Lineage: cmv_basetable.a SCRIPT []
+POSTHOOK: Lineage: cmv_basetable.b SCRIPT []
+POSTHOOK: Lineage: cmv_basetable.c SCRIPT []
+POSTHOOK: Lineage: cmv_basetable.d SCRIPT []
+PREHOOK: query: analyze table cmv_basetable compute statistics for columns
+PREHOOK: type: QUERY
+PREHOOK: Input: default@cmv_basetable
+PREHOOK: Output: default@cmv_basetable
+ A masked pattern was here 
+POSTHOOK: query: analyze table cmv_basetable compute statistics for columns
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@cmv_basetable
+POSTHOOK: Output: default@cmv_basetable
+ A masked pattern was here 
+PREHOOK: query: create table cmv_basetable_2 (a int, b varchar(256), c 
decimal(10,2), d int) stored as orc TBLPROPERTIES ('transactional'='true')
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@cmv_basetable_2
+POSTHOOK: query: create table cmv_basetable_2 (a int, b varchar(256), c 
decimal(10,2), d int) stored as orc TBLPROPERTIES ('transactional'='true')
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@cmv_basetable_2
+PREHOOK: query: insert into cmv_basetable_2 values
+ (1, 'alfred', 10.30, 2),
+ (3, 'calvin', 978.76, 3)
+PREHOOK: type: QUERY
+PREHOOK: Input: _dummy_database@_dummy_table
+PREHOOK: Output: default@cmv_basetable_2
+POSTHOOK: query: insert into cmv_basetable_2 values
+ (1, 'alfred', 10.30, 2),
+ (3, 'calvin', 978.76, 3)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: _dummy_database@_dummy_table
+POSTHOOK: Output: default@cmv_basetable_2
+POSTHOOK: Lineage: cmv_basetable_2.a SCRIPT []
+POSTHOOK: Lineage: cmv_basetable_2.b SCRIPT []
+POSTHOOK: Lineage: cmv_basetable_2.c SCRIPT []
+POSTHOOK: Lineage: cmv_basetable_2.d SCRIPT []
+PREHOOK: query: analyze table cmv_basetable_2 compute statistics for columns
+PREHOOK: type: QUERY
+PREHOOK: Input: default@cmv_basetable_2
+PREHOOK: Output: default@cmv_basetable_2
+ A masked pattern was here 
+POSTHOOK: query: analyze table cmv_basetable_2 compute statistics for columns
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@cmv_basetable_2
+POSTHOOK: Output: default@cmv_basetable_2
+ A masked pattern was here 
+PREHOOK: query: EXPLAIN
+CREATE MATERIALIZED VIEW cmv_mat_view ENABLE REWRITE AS
+  SELECT cmv_basetable.a, cmv_basetable_2.c
+  FROM cmv_basetable JOIN cmv_basetable_2 ON (cmv_basetable.a = 
cmv_basetable_2.a)
+  WHERE cmv_basetable_2.c > 10.0
+  GROUP BY cmv_basetable.a, cmv_basetable_2.c
+PREHOOK: type: CREATE_MATERIALIZED_VIEW
+POSTHOOK: query: EXPLAIN
+CREATE MATERIALIZED VIEW cmv_mat_view ENABLE REWRITE AS
+  SELECT cmv_basetable.a, cmv_basetable_2.c
+  FROM cmv_basetable JOIN cmv_basetable_2 ON (cmv_basetable.a = 
cmv_basetable_2.a)
+  WHERE cmv_basetable_2.c > 10.0
+  GROUP BY cmv_basetable.a, cmv_basetable_2.c
+POSTHOOK: type: CREATE_MATERIALIZED_VIEW
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-2 depends on stages: Stage-1
+  Stage-4 depends on stages: Stage-2, Stage-0
+  Stage-3 depends on stages: Stage-4
+  Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+  Stage: Stage-1
+Tez
+ A masked pattern was here 
+  Edges:
+Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_

[2/2] hive git commit: HIVE-18386 : Create dummy materialized views registry and make it configurable (Jesus Camacho Rodriguez via Ashutosh Chauhan)

2018-01-17 Thread hashutosh
HIVE-18386 : Create dummy materialized views registry and make it configurable 
(Jesus Camacho Rodriguez via Ashutosh Chauhan)

Signed-off-by: Ashutosh Chauhan 


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/80e6f7b0
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/80e6f7b0
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/80e6f7b0

Branch: refs/heads/master
Commit: 80e6f7b0f13c134642763b0a692c3abbf3ffe106
Parents: a45becb
Author: Jesus Camacho Rodriguez 
Authored: Wed Jan 17 22:22:07 2018 -0800
Committer: Ashutosh Chauhan 
Committed: Wed Jan 17 22:22:07 2018 -0800

--
 .../org/apache/hadoop/hive/cli/CliDriver.java   |4 +
 .../org/apache/hadoop/hive/conf/HiveConf.java   |5 +
 data/conf/hive-site.xml |2 +-
 .../test/resources/testconfiguration.properties |2 +
 .../org/apache/hadoop/hive/ql/QTestUtil.java|4 +
 .../MaterializedViewRegistryPropertiesHook.java |   41 +
 .../MaterializedViewRegistryUpdateHook.java |5 +-
 .../apache/hadoop/hive/ql/metadata/Hive.java|3 +-
 .../metadata/HiveMaterializedViewsRegistry.java |   88 +-
 .../ql/metadata/SessionHiveMetaStoreClient.java |2 +-
 .../materialized_view_create_rewrite_dummy.q|   87 ++
 ...rialized_view_create_rewrite_rebuild_dummy.q |  148 +++
 ...materialized_view_create_rewrite_dummy.q.out |  517 +
 ...ized_view_create_rewrite_rebuild_dummy.q.out | 1027 ++
 .../apache/hive/service/server/HiveServer2.java |2 +-
 15 files changed, 1903 insertions(+), 34 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/hive/blob/80e6f7b0/cli/src/java/org/apache/hadoop/hive/cli/CliDriver.java
--
diff --git a/cli/src/java/org/apache/hadoop/hive/cli/CliDriver.java 
b/cli/src/java/org/apache/hadoop/hive/cli/CliDriver.java
index 2cd3c95..54d17df 100644
--- a/cli/src/java/org/apache/hadoop/hive/cli/CliDriver.java
+++ b/cli/src/java/org/apache/hadoop/hive/cli/CliDriver.java
@@ -72,6 +72,7 @@ import org.apache.hadoop.hive.ql.QueryPlan;
 import org.apache.hadoop.hive.ql.exec.FunctionRegistry;
 import org.apache.hadoop.hive.ql.exec.mr.HadoopJobExecHelper;
 import org.apache.hadoop.hive.ql.exec.tez.TezJobExecHelper;
+import org.apache.hadoop.hive.ql.metadata.HiveMaterializedViewsRegistry;
 import org.apache.hadoop.hive.ql.parse.HiveParser;
 import org.apache.hadoop.hive.ql.processors.CommandProcessor;
 import org.apache.hadoop.hive.ql.processors.CommandProcessorFactory;
@@ -760,6 +761,9 @@ public class CliDriver {
 
 ss.updateThreadName();
 
+// Create views registry
+HiveMaterializedViewsRegistry.get().init();
+
 // execute cli driver work
 try {
   return executeDriver(ss, conf, oproc);

http://git-wip-us.apache.org/repos/asf/hive/blob/80e6f7b0/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
--
diff --git a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java 
b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
index f2e927f..d0e2ad2 100644
--- a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
+++ b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
@@ -2430,6 +2430,11 @@ public class HiveConf extends Configuration {
 "Setting it to 0s disables the timeout."),
 
HIVE_SERVER2_PARALLEL_OPS_IN_SESSION("hive.server2.parallel.ops.in.session", 
true,
 "Whether to allow several parallel operations (such as SQL statements) 
in one session."),
+
HIVE_SERVER2_MATERIALIZED_VIEWS_REGISTRY_IMPL("hive.server2.materializedviews.registry.impl",
 "DEFAULT",
+new StringSet("DEFAULT", "DUMMY"),
+"The implementation that we should use for the materialized views 
registry. \n" +
+"  DEFAULT: Default cache for materialized views\n" +
+"  DUMMY: Do not cache materialized views and hence forward requests 
to metastore"),
 
 // HiveServer2 WebUI
 HIVE_SERVER2_WEBUI_BIND_HOST("hive.server2.webui.host", "0.0.0.0", "The 
host address the HiveServer2 WebUI will listen on"),

http://git-wip-us.apache.org/repos/asf/hive/blob/80e6f7b0/data/conf/hive-site.xml
--
diff --git a/data/conf/hive-site.xml b/data/conf/hive-site.xml
index 9f6aec1..01f83d1 100644
--- a/data/conf/hive-site.xml
+++ b/data/conf/hive-site.xml
@@ -141,7 +141,7 @@
 
 
   hive.exec.pre.hooks
-  org.apache.hadoop.hive.ql.hooks.PreExecutePrinter, 
org.apache.hadoop.hive.ql.hooks.EnforceReadOnlyTables
+  org.apache.hadoop.hive.ql.hooks.PreExecutePrinter, 
org.apache.hadoop.hive.ql.hooks.EnforceReadOnlyTables, 
org.apache.hadoop.hive.ql.hooks.MaterializedViewRegistryPropertiesHook
   Pre Execute Hook for Tests

hive git commit: HIVE-18473: Infer timezone information correctly in DruidSerde (Jesus Camacho Rodriguez, reviewed by Ashutosh Chauhan)

2018-01-17 Thread jcamacho
Repository: hive
Updated Branches:
  refs/heads/master 01816fca2 -> a45becb16


HIVE-18473: Infer timezone information correctly in DruidSerde (Jesus Camacho 
Rodriguez, reviewed by Ashutosh Chauhan)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/a45becb1
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/a45becb1
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/a45becb1

Branch: refs/heads/master
Commit: a45becb1602573cae32673e32161b1a3aa10a86b
Parents: 01816fc
Author: Jesus Camacho Rodriguez 
Authored: Wed Jan 17 13:07:38 2018 -0800
Committer: Jesus Camacho Rodriguez 
Committed: Wed Jan 17 16:49:06 2018 -0800

--
 .../hadoop/hive/druid/serde/DruidSerDe.java | 44 ++-
 .../test/resources/testconfiguration.properties |  3 +-
 .../queries/clientpositive/druid_timestamptz.q  | 20 +
 .../druid/druid_timestamptz.q.out   | 80 
 4 files changed, 127 insertions(+), 20 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/hive/blob/a45becb1/druid-handler/src/java/org/apache/hadoop/hive/druid/serde/DruidSerDe.java
--
diff --git 
a/druid-handler/src/java/org/apache/hadoop/hive/druid/serde/DruidSerDe.java 
b/druid-handler/src/java/org/apache/hadoop/hive/druid/serde/DruidSerDe.java
index 4a7952e..5495822 100644
--- a/druid-handler/src/java/org/apache/hadoop/hive/druid/serde/DruidSerDe.java
+++ b/druid-handler/src/java/org/apache/hadoop/hive/druid/serde/DruidSerDe.java
@@ -121,6 +121,9 @@ public class DruidSerDe extends AbstractSerDe {
 final List columnTypes = new ArrayList<>();
 List inspectors = new ArrayList<>();
 
+final TimestampLocalTZTypeInfo tsTZTypeInfo = new TimestampLocalTZTypeInfo(
+  configuration.get(HiveConf.ConfVars.HIVE_LOCAL_TIME_ZONE.varname));
+
 // Druid query
 String druidQuery = properties.getProperty(Constants.DRUID_QUERY_JSON);
 if (druidQuery == null) {
@@ -137,8 +140,9 @@ public class DruidSerDe extends AbstractSerDe {
   "') not specified in create table; list of columns is : " +
   properties.getProperty(serdeConstants.LIST_COLUMNS));
 }
-
columnTypes.addAll(Lists.transform(Utilities.getColumnTypes(properties),
-type -> TypeInfoFactory.getPrimitiveTypeInfo(type)
+columnTypes.addAll(Lists.transform(
+Lists.transform(Utilities.getColumnTypes(properties), type -> 
TypeInfoFactory.getPrimitiveTypeInfo(type)),
+e -> e instanceof TimestampLocalTZTypeInfo ? tsTZTypeInfo : e
 ));
 inspectors.addAll(Lists.transform(columnTypes,
 (Function) type -> 
PrimitiveObjectInspectorFactory
@@ -179,7 +183,7 @@ public class DruidSerDe extends AbstractSerDe {
   if 
(columnInfo.getKey().equals(DruidStorageHandlerUtils.DEFAULT_TIMESTAMP_COLUMN)) 
{
 // Special handling for timestamp column
 columnNames.add(columnInfo.getKey()); // field name
-PrimitiveTypeInfo type = TypeInfoFactory.timestampLocalTZTypeInfo; 
// field type
+PrimitiveTypeInfo type = tsTZTypeInfo; // field type
 columnTypes.add(type);
 inspectors
 
.add(PrimitiveObjectInspectorFactory.getPrimitiveWritableObjectInspector(type));
@@ -188,7 +192,7 @@ public class DruidSerDe extends AbstractSerDe {
   columnNames.add(columnInfo.getKey()); // field name
   PrimitiveTypeInfo type = DruidSerDeUtils.convertDruidToHiveType(
   columnInfo.getValue().getType()); // field type
-  columnTypes.add(type);
+  columnTypes.add(type instanceof TimestampLocalTZTypeInfo ? 
tsTZTypeInfo : type);
   
inspectors.add(PrimitiveObjectInspectorFactory.getPrimitiveWritableObjectInspector(type));
 }
 columns = columnNames.toArray(new String[columnNames.size()]);
@@ -211,19 +215,21 @@ public class DruidSerDe extends AbstractSerDe {
   List propColumnNames = Utilities.getColumnNames(properties);
   List propColumnTypes = Utilities.getColumnTypes(properties);
   for (int i = 0; i < propColumnNames.size(); i++) {
-mapColumnNamesTypes.put(
-propColumnNames.get(i),
-
TypeInfoFactory.getPrimitiveTypeInfo(propColumnTypes.get(i)));
+PrimitiveTypeInfo type = 
TypeInfoFactory.getPrimitiveTypeInfo(propColumnTypes.get(i));
+if (type instanceof TimestampLocalTZTypeInfo) {
+  type  = tsTZTypeInfo;
+}
+mapColumnNamesTypes.put(propColumnNames.get(i), type);
   }
 }
 
 switch (query.getType()) {
   case Query.TIMESERIES:
-inferSchema((TimeseriesQuery) que

hive git commit: HIVE-18430: Add new determinism category for runtime constants (current_date, current_timestamp) (Jason Dere, reviewed by Jesus Camacho Rodriguez)

2018-01-17 Thread jdere
Repository: hive
Updated Branches:
  refs/heads/master 790976907 -> 01816fca2


HIVE-18430: Add new determinism category for runtime constants (current_date, 
current_timestamp) (Jason Dere, reviewed by Jesus Camacho Rodriguez)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/01816fca
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/01816fca
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/01816fca

Branch: refs/heads/master
Commit: 01816fca2c2620f4f57a2b2a5c6ea404e1f0038f
Parents: 7909769
Author: Jason Dere 
Authored: Wed Jan 17 13:07:35 2018 -0800
Committer: Jason Dere 
Committed: Wed Jan 17 13:07:35 2018 -0800

--
 .../hadoop/hive/ql/exec/ExprNodeEvaluator.java  | 16 +
 .../hive/ql/exec/ExprNodeEvaluatorFactory.java  |  2 +-
 .../ql/exec/ExprNodeGenericFuncEvaluator.java   | 12 ++-
 .../hadoop/hive/ql/exec/FunctionRegistry.java   | 34 ++
 .../optimizer/ConstantPropagateProcFactory.java | 18 +-
 .../PrunerExpressionOperatorFactory.java|  2 +-
 .../calcite/rules/HiveExceptRewriteRule.java| 16 -
 .../calcite/rules/HiveIntersectRewriteRule.java |  2 +-
 .../optimizer/calcite/rules/PartitionPrune.java |  2 +-
 .../translator/SqlFunctionConverter.java| 23 +---
 .../ql/optimizer/pcr/PcrExprProcFactory.java|  2 +-
 .../hive/ql/optimizer/ppr/PartitionPruner.java  |  2 +-
 .../hadoop/hive/ql/parse/SemanticAnalyzer.java  |  6 ++--
 .../hive/ql/parse/TypeCheckProcFactory.java |  2 +-
 .../hadoop/hive/ql/plan/ExprNodeDescUtils.java  |  7 ++--
 .../hive/ql/ppd/ExprWalkerProcFactory.java  |  2 +-
 .../apache/hadoop/hive/ql/stats/StatsUtils.java |  2 +-
 .../org/apache/hadoop/hive/ql/udf/UDFType.java  | 10 ++
 .../hadoop/hive/ql/udf/generic/GenericUDF.java  |  3 +-
 .../ql/udf/generic/GenericUDFCurrentDate.java   |  6 ++--
 .../udf/generic/GenericUDFCurrentTimestamp.java |  6 ++--
 .../ql/udf/generic/GenericUDFCurrentUser.java   |  4 ++-
 .../ql/udf/generic/GenericUDFLoggedInUser.java  |  4 ++-
 .../hive/ql/udf/generic/GenericUDFMacro.java|  7 
 .../hive/ql/udf/generic/UDFCurrentDB.java   |  5 ++-
 .../hive/ql/exec/TestFunctionRegistry.java  | 37 
 26 files changed, 182 insertions(+), 50 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/hive/blob/01816fca/ql/src/java/org/apache/hadoop/hive/ql/exec/ExprNodeEvaluator.java
--
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/ExprNodeEvaluator.java 
b/ql/src/java/org/apache/hadoop/hive/ql/exec/ExprNodeEvaluator.java
index 375d65f..f8248a8 100755
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/ExprNodeEvaluator.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/ExprNodeEvaluator.java
@@ -102,6 +102,22 @@ public abstract class ExprNodeEvaluator {
   }
 
   /**
+   * Return whether this node (or any children nodes) are runtime constants.
+   */
+  public boolean isRuntimeConstant() {
+return false;
+  }
+
+  /**
+   * Returns whether the expression, for a single query, returns the same 
result given
+   * the same arguments. This includes deterministic functions as well as 
runtime
+   * constants (which may not be deterministic across queries).
+   */
+  public boolean isConsistentWithinQuery() {
+return (isDeterministic() || isRuntimeConstant()) && !isStateful();
+  }
+
+  /**
* Return child evaluators if exist
*/
   public ExprNodeEvaluator[] getChildren() {

http://git-wip-us.apache.org/repos/asf/hive/blob/01816fca/ql/src/java/org/apache/hadoop/hive/ql/exec/ExprNodeEvaluatorFactory.java
--
diff --git 
a/ql/src/java/org/apache/hadoop/hive/ql/exec/ExprNodeEvaluatorFactory.java 
b/ql/src/java/org/apache/hadoop/hive/ql/exec/ExprNodeEvaluatorFactory.java
index cc40cae..9c0900b 100755
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/ExprNodeEvaluatorFactory.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/ExprNodeEvaluatorFactory.java
@@ -99,7 +99,7 @@ public final class ExprNodeEvaluatorFactory {
   }
 
   private static ExprNodeEvaluator iterate(ExprNodeEvaluator eval, 
EvaluatorContext context) {
-if (!(eval instanceof ExprNodeConstantEvaluator) && 
eval.isDeterministic()) {
+if (!(eval instanceof ExprNodeConstantEvaluator) && 
eval.isConsistentWithinQuery()) {
   ExprNodeEvaluator replace = context.getEvaluated(eval);
   if (replace != null) {
 return replace;

http://git-wip-us.apache.org/repos/asf/hive/blob/01816fca/ql/src/java/org/apache/hadoop/hive/ql/exec/ExprNodeGenericFuncEvaluator.java
--
diff --git 
a/ql/src/java/org/apache/hadoop/hive/ql/exec/ExprNodeGenericFuncEvalua

hive git commit: HIVE-18385: mergejoin fails with java.lang.IllegalStateException (Jason Dere, reviewed by Deepak Jaiswal)

2018-01-17 Thread jdere
Repository: hive
Updated Branches:
  refs/heads/master 456a65180 -> 790976907


HIVE-18385: mergejoin fails with java.lang.IllegalStateException (Jason Dere, 
reviewed by Deepak Jaiswal)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/79097690
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/79097690
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/79097690

Branch: refs/heads/master
Commit: 790976907070a2a737186543d81c71eb542a5337
Parents: 456a651
Author: Jason Dere 
Authored: Wed Jan 17 12:34:28 2018 -0800
Committer: Jason Dere 
Committed: Wed Jan 17 12:34:28 2018 -0800

--
 .../ql/exec/tez/DynamicValueRegistryTez.java|  3 ++-
 .../results/clientpositive/llap/mergejoin.q.out | 24 ++--
 2 files changed, 14 insertions(+), 13 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/hive/blob/79097690/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/DynamicValueRegistryTez.java
--
diff --git 
a/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/DynamicValueRegistryTez.java 
b/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/DynamicValueRegistryTez.java
index b7687c5..af736ed 100644
--- 
a/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/DynamicValueRegistryTez.java
+++ 
b/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/DynamicValueRegistryTez.java
@@ -31,6 +31,7 @@ import org.apache.hadoop.hive.ql.exec.DynamicValueRegistry;
 import org.apache.hadoop.hive.ql.metadata.HiveException;
 import org.apache.hadoop.hive.ql.parse.RuntimeValuesInfo;
 import org.apache.hadoop.hive.ql.plan.BaseWork;
+import org.apache.hadoop.hive.ql.plan.DynamicValue.NoDynamicValuesException;
 import org.apache.hadoop.hive.ql.plan.ExprNodeDesc;
 import org.apache.hadoop.hive.ql.plan.TableDesc;
 import org.apache.hadoop.hive.serde2.Deserializer;
@@ -73,7 +74,7 @@ public class DynamicValueRegistryTez implements 
DynamicValueRegistry {
   @Override
   public Object getValue(String key) {
 if (!values.containsKey(key)) {
-  throw new IllegalStateException("Value does not exist in registry: " + 
key);
+  throw new NoDynamicValuesException("Value does not exist in registry: " 
+ key);
 }
 return values.get(key);
   }

http://git-wip-us.apache.org/repos/asf/hive/blob/79097690/ql/src/test/results/clientpositive/llap/mergejoin.q.out
--
diff --git a/ql/src/test/results/clientpositive/llap/mergejoin.q.out 
b/ql/src/test/results/clientpositive/llap/mergejoin.q.out
index ff0b1f1..c07afdf 100644
--- a/ql/src/test/results/clientpositive/llap/mergejoin.q.out
+++ b/ql/src/test/results/clientpositive/llap/mergejoin.q.out
@@ -294,53 +294,53 @@ POSTHOOK: query: CREATE TABLE srcbucket_mapjoin_part (key 
int, value string) par
 POSTHOOK: type: CREATETABLE
 POSTHOOK: Output: database:default
 POSTHOOK: Output: default@srcbucket_mapjoin_part
-PREHOOK: query: load data local inpath '../../data/files/srcbucket20.txt' INTO 
TABLE srcbucket_mapjoin partition(ds='2008-04-08')
+PREHOOK: query: load data local inpath '../../data/files/bmj/00_0' INTO 
TABLE srcbucket_mapjoin partition(ds='2008-04-08')
 PREHOOK: type: LOAD
  A masked pattern was here 
 PREHOOK: Output: default@srcbucket_mapjoin
-POSTHOOK: query: load data local inpath '../../data/files/srcbucket20.txt' 
INTO TABLE srcbucket_mapjoin partition(ds='2008-04-08')
+POSTHOOK: query: load data local inpath '../../data/files/bmj/00_0' INTO 
TABLE srcbucket_mapjoin partition(ds='2008-04-08')
 POSTHOOK: type: LOAD
  A masked pattern was here 
 POSTHOOK: Output: default@srcbucket_mapjoin
 POSTHOOK: Output: default@srcbucket_mapjoin@ds=2008-04-08
-PREHOOK: query: load data local inpath '../../data/files/srcbucket22.txt' INTO 
TABLE srcbucket_mapjoin partition(ds='2008-04-08')
+PREHOOK: query: load data local inpath '../../data/files/bmj1/01_0' INTO 
TABLE srcbucket_mapjoin partition(ds='2008-04-08')
 PREHOOK: type: LOAD
  A masked pattern was here 
 PREHOOK: Output: default@srcbucket_mapjoin@ds=2008-04-08
-POSTHOOK: query: load data local inpath '../../data/files/srcbucket22.txt' 
INTO TABLE srcbucket_mapjoin partition(ds='2008-04-08')
+POSTHOOK: query: load data local inpath '../../data/files/bmj1/01_0' INTO 
TABLE srcbucket_mapjoin partition(ds='2008-04-08')
 POSTHOOK: type: LOAD
  A masked pattern was here 
 POSTHOOK: Output: default@srcbucket_mapjoin@ds=2008-04-08
-PREHOOK: query: load data local inpath '../../data/files/srcbucket20.txt' INTO 
TABLE srcbucket_mapjoin_part partition(ds='2008-04-08')
+PREHOOK: query: load data local inpath '../../data/files/bmj/00_0' INTO 
TABLE srcbucket_mapjoin_part partition(ds='2008-04-08')
 PREHOOK: type: LOAD
  A masked pattern was here 
 

[1/2] hive git commit: HIVE-17495: CachedStore: prewarm improvement (avoid multiple sql calls to read partition column stats), refactoring and caching some aggregate stats (Vaibhav Gumashta reviewed b

2018-01-17 Thread vgumashta
Repository: hive
Updated Branches:
  refs/heads/master fbb3ed15f -> 456a65180


http://git-wip-us.apache.org/repos/asf/hive/blob/456a6518/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/columnstats/aggr/LongColumnStatsAggregator.java
--
diff --git 
a/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/columnstats/aggr/LongColumnStatsAggregator.java
 
b/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/columnstats/aggr/LongColumnStatsAggregator.java
index d12cdc0..e6823d3 100644
--- 
a/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/columnstats/aggr/LongColumnStatsAggregator.java
+++ 
b/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/columnstats/aggr/LongColumnStatsAggregator.java
@@ -34,6 +34,7 @@ import 
org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj;
 import org.apache.hadoop.hive.metastore.api.LongColumnStatsData;
 import org.apache.hadoop.hive.metastore.api.MetaException;
 import 
org.apache.hadoop.hive.metastore.columnstats.cache.LongColumnStatsDataInspector;
+import 
org.apache.hadoop.hive.metastore.utils.MetaStoreUtils.ColStatsObjWithSourceInfo;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
@@ -43,27 +44,24 @@ public class LongColumnStatsAggregator extends 
ColumnStatsAggregator implements
   private static final Logger LOG = 
LoggerFactory.getLogger(LongColumnStatsAggregator.class);
 
   @Override
-  public ColumnStatisticsObj aggregate(String colName, List partNames,
-  List css) throws MetaException {
+  public ColumnStatisticsObj aggregate(List 
colStatsWithSourceInfo,
+  List partNames, boolean areAllPartsFound) throws MetaException {
 ColumnStatisticsObj statsObj = null;
-
+String colType = null;
+String colName = null;
 // check if all the ColumnStatisticsObjs contain stats and all the ndv are
 // bitvectors
-boolean doAllPartitionContainStats = partNames.size() == css.size();
-LOG.debug("doAllPartitionContainStats for " + colName + " is " + 
doAllPartitionContainStats);
+boolean doAllPartitionContainStats = partNames.size() == 
colStatsWithSourceInfo.size();
 NumDistinctValueEstimator ndvEstimator = null;
-String colType = null;
-for (ColumnStatistics cs : css) {
-  if (cs.getStatsObjSize() != 1) {
-throw new MetaException(
-"The number of columns should be exactly one in aggrStats, but 
found "
-+ cs.getStatsObjSize());
-  }
-  ColumnStatisticsObj cso = cs.getStatsObjIterator().next();
+for (ColStatsObjWithSourceInfo csp : colStatsWithSourceInfo) {
+  ColumnStatisticsObj cso = csp.getColStatsObj();
   if (statsObj == null) {
+colName = cso.getColName();
 colType = cso.getColType();
-statsObj = ColumnStatsAggregatorFactory.newColumnStaticsObj(colName, 
colType, cso
-.getStatsData().getSetField());
+statsObj = ColumnStatsAggregatorFactory.newColumnStaticsObj(colName, 
colType,
+cso.getStatsData().getSetField());
+LOG.trace("doAllPartitionContainStats for column: {} is: {}", colName,
+doAllPartitionContainStats);
   }
   LongColumnStatsDataInspector longColumnStatsData =
   (LongColumnStatsDataInspector) cso.getStatsData().getLongStats();
@@ -91,13 +89,13 @@ public class LongColumnStatsAggregator extends 
ColumnStatsAggregator implements
 }
 LOG.debug("all of the bit vectors can merge for " + colName + " is " + 
(ndvEstimator != null));
 ColumnStatisticsData columnStatisticsData = new ColumnStatisticsData();
-if (doAllPartitionContainStats || css.size() < 2) {
+if (doAllPartitionContainStats || colStatsWithSourceInfo.size() < 2) {
   LongColumnStatsDataInspector aggregateData = null;
   long lowerBound = 0;
   long higherBound = 0;
   double densityAvgSum = 0.0;
-  for (ColumnStatistics cs : css) {
-ColumnStatisticsObj cso = cs.getStatsObjIterator().next();
+  for (ColStatsObjWithSourceInfo csp : colStatsWithSourceInfo) {
+ColumnStatisticsObj cso = csp.getColStatsObj();
 LongColumnStatsDataInspector newData =
 (LongColumnStatsDataInspector) cso.getStatsData().getLongStats();
 lowerBound = Math.max(lowerBound, newData.getNumDVs());
@@ -155,9 +153,9 @@ public class LongColumnStatsAggregator extends 
ColumnStatsAggregator implements
   if (ndvEstimator == null) {
 // if not every partition uses bitvector for ndv, we just fall back to
 // the traditional extrapolation methods.
-for (ColumnStatistics cs : css) {
-  String partName = cs.getStatsDesc().getPartName();
-  ColumnStatisticsObj cso = cs.getStatsObjIterator().next();
+for (ColStatsObjWithSourceInfo csp : colStatsWithSourceInfo) {
+  ColumnStatisticsObj cso = csp.getColStatsObj();
+  String partName = cs

[2/2] hive git commit: HIVE-17495: CachedStore: prewarm improvement (avoid multiple sql calls to read partition column stats), refactoring and caching some aggregate stats (Vaibhav Gumashta reviewed b

2018-01-17 Thread vgumashta
HIVE-17495: CachedStore: prewarm improvement (avoid multiple sql calls to read 
partition column stats), refactoring and caching some aggregate stats (Vaibhav 
Gumashta reviewed by Daniel Dai)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/456a6518
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/456a6518
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/456a6518

Branch: refs/heads/master
Commit: 456a65180dcb84f69f26b4c9b9265165ad16dfe4
Parents: fbb3ed1
Author: Vaibhav Gumashta 
Authored: Wed Jan 17 09:59:02 2018 -0800
Committer: Vaibhav Gumashta 
Committed: Wed Jan 17 09:59:02 2018 -0800

--
 .../listener/DummyRawStoreFailEvent.java|  15 +-
 .../hive/metastore/MetaStoreDirectSql.java  |  95 ++---
 .../hadoop/hive/metastore/ObjectStore.java  |  24 +-
 .../apache/hadoop/hive/metastore/RawStore.java  |  11 +-
 .../hadoop/hive/metastore/cache/CacheUtils.java |  12 +
 .../hive/metastore/cache/CachedStore.java   | 392 ++-
 .../hive/metastore/cache/SharedCache.java   | 167 +++-
 .../aggr/BinaryColumnStatsAggregator.java   |  23 +-
 .../aggr/BooleanColumnStatsAggregator.java  |  23 +-
 .../columnstats/aggr/ColumnStatsAggregator.java |   8 +-
 .../aggr/DateColumnStatsAggregator.java |  56 ++-
 .../aggr/DecimalColumnStatsAggregator.java  |  57 ++-
 .../aggr/DoubleColumnStatsAggregator.java   |  57 ++-
 .../aggr/LongColumnStatsAggregator.java |  56 +--
 .../aggr/StringColumnStatsAggregator.java   |  59 ++-
 .../hive/metastore/utils/MetaStoreUtils.java| 144 ---
 .../DummyRawStoreControlledCommit.java  |  16 +-
 .../DummyRawStoreForJdoConnection.java  |  16 +-
 18 files changed, 805 insertions(+), 426 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/hive/blob/456a6518/itests/hcatalog-unit/src/test/java/org/apache/hive/hcatalog/listener/DummyRawStoreFailEvent.java
--
diff --git 
a/itests/hcatalog-unit/src/test/java/org/apache/hive/hcatalog/listener/DummyRawStoreFailEvent.java
 
b/itests/hcatalog-unit/src/test/java/org/apache/hive/hcatalog/listener/DummyRawStoreFailEvent.java
index 1fca332..bc9ef62 100644
--- 
a/itests/hcatalog-unit/src/test/java/org/apache/hive/hcatalog/listener/DummyRawStoreFailEvent.java
+++ 
b/itests/hcatalog-unit/src/test/java/org/apache/hive/hcatalog/listener/DummyRawStoreFailEvent.java
@@ -33,7 +33,6 @@ import org.apache.hadoop.hive.metastore.TableType;
 import org.apache.hadoop.hive.metastore.api.AggrStats;
 import org.apache.hadoop.hive.metastore.api.AlreadyExistsException;
 import org.apache.hadoop.hive.metastore.api.ColumnStatistics;
-import org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj;
 import org.apache.hadoop.hive.metastore.api.CurrentNotificationEventId;
 import org.apache.hadoop.hive.metastore.api.Database;
 import org.apache.hadoop.hive.metastore.api.FieldSchema;
@@ -75,6 +74,7 @@ import 
org.apache.hadoop.hive.metastore.api.UnknownTableException;
 import org.apache.hadoop.hive.metastore.api.WMMapping;
 import org.apache.hadoop.hive.metastore.api.WMPool;
 import org.apache.hadoop.hive.metastore.partition.spec.PartitionSpecProxy;
+import 
org.apache.hadoop.hive.metastore.utils.MetaStoreUtils.ColStatsObjWithSourceInfo;
 import org.apache.thrift.TException;
 
 /**
@@ -976,12 +976,6 @@ public class DummyRawStoreFailEvent implements RawStore, 
Configurable {
   }
 
   @Override
-  public Map> 
getColStatsForTablePartitions(String dbName,
-  String tableName) throws MetaException, NoSuchObjectException {
-return objectStore.getColStatsForTablePartitions(dbName, tableName);
-  }
-
-  @Override
   public String getMetastoreDbUuid() throws MetaException {
 throw new MetaException("getMetastoreDbUuid is not implemented");
   }
@@ -1092,4 +1086,11 @@ public class DummyRawStoreFailEvent implements RawStore, 
Configurable {
   String poolPath) throws NoSuchObjectException, 
InvalidOperationException, MetaException {
 objectStore.dropWMTriggerToPoolMapping(resourcePlanName, triggerName, 
poolPath);
   }
+
+  @Override
+  public List 
getPartitionColStatsForDatabase(String dbName)
+  throws MetaException, NoSuchObjectException {
+// TODO Auto-generated method stub
+return null;
+  }
 }

http://git-wip-us.apache.org/repos/asf/hive/blob/456a6518/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/MetaStoreDirectSql.java
--
diff --git 
a/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/MetaStoreDirectSql.java
 
b/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/MetaStoreDirectSql.java
index 786732f..47e2e6f 100644
--- 
a/standalone-metastore/src/mai

hive git commit: HIVE-18372: Create testing infra to test different HMS instances (Peter Vary, reviewed by Marta Kuczora, Vihang Karajgaonkar and Adam Szita)

2018-01-17 Thread pvary
Repository: hive
Updated Branches:
  refs/heads/master 17abdb211 -> fbb3ed15f


HIVE-18372: Create testing infra to test different HMS instances (Peter Vary, 
reviewed by Marta Kuczora, Vihang Karajgaonkar and Adam Szita)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/fbb3ed15
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/fbb3ed15
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/fbb3ed15

Branch: refs/heads/master
Commit: fbb3ed15fd0f078369c1fcb2edd68326d272758a
Parents: 17abdb2
Author: Peter Vary 
Authored: Wed Jan 17 15:13:45 2018 +0100
Committer: Peter Vary 
Committed: Wed Jan 17 15:13:45 2018 +0100

--
 .../client/MetaStoreFactoryForTests.java| 111 
 .../hive/metastore/client/TestDatabases.java| 617 +++
 .../minihms/AbstractMetaStoreService.java   | 153 +
 .../minihms/ClusterMetaStoreForTests.java   |  32 +
 .../minihms/EmbeddedMetaStoreForTests.java  |  33 +
 .../hadoop/hive/metastore/minihms/MiniHMS.java  |  70 +++
 .../minihms/RemoteMetaStoreForTests.java|  43 ++
 7 files changed, 1059 insertions(+)
--


http://git-wip-us.apache.org/repos/asf/hive/blob/fbb3ed15/standalone-metastore/src/test/java/org/apache/hadoop/hive/metastore/client/MetaStoreFactoryForTests.java
--
diff --git 
a/standalone-metastore/src/test/java/org/apache/hadoop/hive/metastore/client/MetaStoreFactoryForTests.java
 
b/standalone-metastore/src/test/java/org/apache/hadoop/hive/metastore/client/MetaStoreFactoryForTests.java
new file mode 100644
index 000..e2b1a68
--- /dev/null
+++ 
b/standalone-metastore/src/test/java/org/apache/hadoop/hive/metastore/client/MetaStoreFactoryForTests.java
@@ -0,0 +1,111 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.metastore.client;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hive.metastore.DefaultPartitionExpressionProxy;
+import org.apache.hadoop.hive.metastore.conf.MetastoreConf;
+import org.apache.hadoop.hive.metastore.events.EventCleanerTask;
+import org.apache.hadoop.hive.metastore.minihms.AbstractMetaStoreService;
+import org.apache.hadoop.hive.metastore.minihms.MiniHMS;
+
+import java.util.ArrayList;
+import java.util.List;
+
+/**
+ * Factory for creating specific
+ * {@link org.apache.hadoop.hive.metastore.minihms.AbstractMetaStoreService} 
implementations for
+ * tests.
+ */
+public class MetaStoreFactoryForTests {
+  private static final int DEFAULT_LIMIT_PARTITION_REQUEST = 100;
+
+  /**
+   * We would like to run the tests with 2 MetaStore configurations
+   * - Embedded - Where the MetaStore is running in the same thread, and does 
not use Thrift
+   * - Remote - Where the MetaStore is started in a different thread, and uses 
Thrift for
+   * communication
+   *
+   * Or if the test.hms.client.configs system property is set, it would return 
a single test
+   * MetaStoreService which uses these configs. In this case the MetaStore 
should be created
+   * manually or by an external application.
+   * @return The list of the test MetaStoreService implementations usable by 
@Parameterized
+   * .Parameters
+   */
+  public static List getMetaStores() throws Exception {
+List metaStores = new ArrayList();
+
+Configuration conf = MetastoreConf.newMetastoreConf();
+// set some values to use for getting conf. vars
+MetastoreConf.setBoolVar(conf, MetastoreConf.ConfVars.METRICS_ENABLED, 
true);
+MetastoreConf.setLongVar(conf, MetastoreConf.ConfVars.BATCH_RETRIEVE_MAX, 
2);
+MetastoreConf.setLongVar(conf, 
MetastoreConf.ConfVars.LIMIT_PARTITION_REQUEST,
+DEFAULT_LIMIT_PARTITION_REQUEST);
+MetastoreConf.setVar(conf, MetastoreConf.ConfVars.EXPRESSION_PROXY_CLASS,
+DefaultPartitionExpressionProxy.class.getName());
+MetastoreConf.setVar(conf, MetastoreConf.ConfVars.TASK_THREADS_ALWAYS,
+EventCleanerTask.class.getName());
+
+// Do this o

hive git commit: HIVE-18411: Fix ArrayIndexOutOfBoundsException for VectorizedListColumnReader (Colin Ma, reviewed by Ferdinand Xu)

2018-01-17 Thread xuf
Repository: hive
Updated Branches:
  refs/heads/master 7942bc6c9 -> 17abdb211


HIVE-18411: Fix ArrayIndexOutOfBoundsException for VectorizedListColumnReader 
(Colin Ma, reviewed by Ferdinand Xu)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/17abdb21
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/17abdb21
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/17abdb21

Branch: refs/heads/master
Commit: 17abdb211c1b2b749fc7d8265d31e6c5987cea4b
Parents: 7942bc6
Author: Ferdinand Xu 
Authored: Wed Jan 17 15:39:54 2018 +0800
Committer: Ferdinand Xu 
Committed: Wed Jan 17 15:39:54 2018 +0800

--
 .../vector/VectorizedListColumnReader.java  |  5 +++
 .../parquet/TestVectorizedListColumnReader.java | 33 
 2 files changed, 38 insertions(+)
--


http://git-wip-us.apache.org/repos/asf/hive/blob/17abdb21/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/vector/VectorizedListColumnReader.java
--
diff --git 
a/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/vector/VectorizedListColumnReader.java
 
b/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/vector/VectorizedListColumnReader.java
index 12af77c..04fa129 100644
--- 
a/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/vector/VectorizedListColumnReader.java
+++ 
b/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/vector/VectorizedListColumnReader.java
@@ -19,6 +19,7 @@ import 
org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector;
 import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector;
 import org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector;
 import org.apache.hadoop.hive.ql.exec.vector.ColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch;
 import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector;
 import org.apache.hadoop.hive.serde2.typeinfo.ListTypeInfo;
 import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo;
@@ -52,6 +53,10 @@ public class VectorizedListColumnReader extends 
BaseVectorizedColumnReader {
   @Override
   public void readBatch(int total, ColumnVector column, TypeInfo columnType) 
throws IOException {
 ListColumnVector lcv = (ListColumnVector) column;
+// before readBatch, initial the size of offsets & lengths as the default 
value,
+// the actual size will be assigned in setChildrenInfo() after reading 
complete.
+lcv.offsets = new long[VectorizedRowBatch.DEFAULT_SIZE];
+lcv.lengths = new long[VectorizedRowBatch.DEFAULT_SIZE];
 // Because the length of ListColumnVector.child can't be known now,
 // the valueList will save all data for ListColumnVector temporary.
 List valueList = new ArrayList<>();

http://git-wip-us.apache.org/repos/asf/hive/blob/17abdb21/ql/src/test/org/apache/hadoop/hive/ql/io/parquet/TestVectorizedListColumnReader.java
--
diff --git 
a/ql/src/test/org/apache/hadoop/hive/ql/io/parquet/TestVectorizedListColumnReader.java
 
b/ql/src/test/org/apache/hadoop/hive/ql/io/parquet/TestVectorizedListColumnReader.java
index 8ea5d25..d241fc8 100644
--- 
a/ql/src/test/org/apache/hadoop/hive/ql/io/parquet/TestVectorizedListColumnReader.java
+++ 
b/ql/src/test/org/apache/hadoop/hive/ql/io/parquet/TestVectorizedListColumnReader.java
@@ -166,6 +166,14 @@ public class TestVectorizedListColumnReader extends 
VectorizedColumnReaderTestBa
 removeFile();
   }
 
+  @Test
+  public void testVectorizedRowBatchSizeChange() throws Exception {
+removeFile();
+writeListData(initWriterFromFile(), false, 1200);
+testVectorizedRowBatchSizeChangeListRead();
+removeFile();
+  }
+
   private void testListReadAllType(boolean isDictionaryEncoding, int 
elementNum) throws Exception {
 testListRead(isDictionaryEncoding, "int", elementNum);
 testListRead(isDictionaryEncoding, "long", elementNum);
@@ -337,4 +345,29 @@ public class TestVectorizedListColumnReader extends 
VectorizedColumnReaderTestBa
   reader.close();
 }
   }
+
+  private void testVectorizedRowBatchSizeChangeListRead() throws Exception {
+Configuration conf = new Configuration();
+conf.set(IOConstants.COLUMNS, "list_binary_field_for_repeat_test");
+conf.set(IOConstants.COLUMNS_TYPES, "array");
+conf.setBoolean(ColumnProjectionUtils.READ_ALL_COLUMNS, false);
+conf.set(ColumnProjectionUtils.READ_COLUMN_IDS_CONF_STR, "0");
+VectorizedParquetRecordReader reader = createTestParquetReader(
+"message hive_schema {repeated binary 
list_binary_field_for_repeat_test;}", conf);
+VectorizedRowBatch previous = reader.createValue();
+try {
+  while (reader.next(NullWritable.get(), previous)) {
+ListColumnVector vector = (ListColumnVector) previous.cols[0];
+