hive git commit: HIVE-19009 : Retain and use runtime statistics during hs2 lifetime (Zoltan Haindrich via Ashutosh Chauhan)

2018-04-20 Thread kgyrtkirk
Repository: hive
Updated Branches:
  refs/heads/branch-3 a39b24660 -> d0769c573


HIVE-19009 : Retain and use runtime statistics during hs2 lifetime (Zoltan 
Haindrich via Ashutosh Chauhan)

Signed-off-by: Ashutosh Chauhan 
(cherry picked from commit 9f15e22f4aea99891a37aa1e54d490921e6e1174)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/d0769c57
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/d0769c57
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/d0769c57

Branch: refs/heads/branch-3
Commit: d0769c573b482fe440fbd4dd3c68d43f9c8f9524
Parents: a39b246
Author: Zoltan Haindrich 
Authored: Tue Apr 3 08:51:00 2018 -0700
Committer: Zoltan Haindrich 
Committed: Fri Apr 20 16:12:54 2018 +0200

--
 .../org/apache/hadoop/hive/conf/HiveConf.java   |  11 +-
 .../test/resources/testconfiguration.properties |   1 +
 .../org/apache/hadoop/hive/ql/QTestUtil.java|   3 +
 .../java/org/apache/hadoop/hive/ql/Context.java |  12 +-
 .../java/org/apache/hadoop/hive/ql/Driver.java  |   4 +
 .../hive/ql/optimizer/physical/Vectorizer.java  |  12 +-
 .../apache/hadoop/hive/ql/plan/JoinDesc.java|   2 +-
 .../hive/ql/plan/mapper/CachingStatsSource.java |  68 +
 .../hive/ql/plan/mapper/EmptyStatsSource.java   |  11 ++
 .../plan/mapper/SimpleRuntimeStatsSource.java   |   6 +
 .../hadoop/hive/ql/plan/mapper/StatsSource.java |   5 +-
 .../hive/ql/plan/mapper/StatsSources.java   | 122 
 .../hive/ql/reexec/IReExecutionPlugin.java  |   1 +
 .../hadoop/hive/ql/reexec/ReExecDriver.java |  20 ++-
 .../ql/reexec/ReExecutionOverlayPlugin.java |   4 +
 .../hadoop/hive/ql/reexec/ReOptimizePlugin.java |  48 +--
 .../signature/TestOperatorSignature.java|   9 +-
 .../ql/plan/mapping/TestCounterMapping.java |   1 -
 .../queries/clientpositive/runtime_stats_hs2.q  |  22 +++
 .../clientpositive/llap/runtime_stats_hs2.q.out | 141 +++
 20 files changed, 476 insertions(+), 27 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/hive/blob/d0769c57/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
--
diff --git a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java 
b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
index 7dd16e3..607f8ba 100644
--- a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
+++ b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
@@ -4257,10 +4257,19 @@ public class HiveConf extends Configuration {
 "comma separated list of plugin can be used:\n"
 + "  overlay: hiveconf subtree 'reexec.overlay' is used as an 
overlay in case of an execution errors out\n"
 + "  reoptimize: collects operator statistics during execution and 
recompile the query after a failure"),
+
HIVE_QUERY_REEXECUTION_STATS_PERSISTENCE("hive.query.reexecution.stats.persist.scope",
 "query",
+new StringSet("query", "hiveserver", "metastore"),
+"Sets the persistence scope of runtime statistics\n"
++ "  query: runtime statistics are only used during re-execution\n"
++ "  hiveserver: runtime statistics are persisted in the 
hiveserver - all sessions share it"),
+
 HIVE_QUERY_MAX_REEXECUTION_COUNT("hive.query.reexecution.max.count", 1,
 "Maximum number of re-executions for a single query."),
 
HIVE_QUERY_REEXECUTION_ALWAYS_COLLECT_OPERATOR_STATS("hive.query.reexecution.always.collect.operator.stats",
 false,
-"Used during testing"),
+"If sessionstats are enabled; this option can be used to collect 
statistics all the time"),
+
HIVE_QUERY_REEXECUTION_STATS_CACHE_SIZE("hive.query.reexecution.stats.cache.size",
 100_000,
+"Size of the runtime statistics cache. Unit is: OperatorStat entry; a 
query plan consist ~100"),
+
 
 HIVE_QUERY_RESULTS_CACHE_ENABLED("hive.query.results.cache.enabled", true,
 "If the query results cache is enabled. This will keep results of 
previously executed queries " +

http://git-wip-us.apache.org/repos/asf/hive/blob/d0769c57/itests/src/test/resources/testconfiguration.properties
--
diff --git a/itests/src/test/resources/testconfiguration.properties 
b/itests/src/test/resources/testconfiguration.properties
index 183dc4c..d7c5877 100644
--- a/itests/src/test/resources/testconfiguration.properties
+++ b/itests/src/test/resources/testconfiguration.properties
@@ -514,6 +514,7 @@ minillaplocal.query.files=\
   retry_failure.q,\
   retry_failure_stat_changes.q,\
   retry_failure_oom.q,\
+  runtime_stats_hs2.q,\
   bucketsortoptimize_insert_2.q,\
   check_constraint.q,\
   cbo_gby.q,\


hive git commit: HIVE-19009 : Retain and use runtime statistics during hs2 lifetime (Zoltan Haindrich via Ashutosh Chauhan)

2018-04-19 Thread hashutosh
Repository: hive
Updated Branches:
  refs/heads/master 046bc646b -> 9f15e22f4


HIVE-19009 : Retain and use runtime statistics during hs2 lifetime (Zoltan 
Haindrich via Ashutosh Chauhan)

Signed-off-by: Ashutosh Chauhan 


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/9f15e22f
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/9f15e22f
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/9f15e22f

Branch: refs/heads/master
Commit: 9f15e22f4aea99891a37aa1e54d490921e6e1174
Parents: 046bc64
Author: Zoltan Haindrich 
Authored: Tue Apr 3 08:51:00 2018 -0700
Committer: Ashutosh Chauhan 
Committed: Thu Apr 19 11:44:04 2018 -0700

--
 .../org/apache/hadoop/hive/conf/HiveConf.java   |  11 +-
 .../test/resources/testconfiguration.properties |   1 +
 .../org/apache/hadoop/hive/ql/QTestUtil.java|  10 +-
 .../java/org/apache/hadoop/hive/ql/Context.java |  12 +-
 .../java/org/apache/hadoop/hive/ql/Driver.java  |   4 +
 .../hive/ql/optimizer/physical/Vectorizer.java  |  12 +-
 .../apache/hadoop/hive/ql/plan/JoinDesc.java|   2 +-
 .../hive/ql/plan/mapper/CachingStatsSource.java |  68 +
 .../hive/ql/plan/mapper/EmptyStatsSource.java   |  11 ++
 .../plan/mapper/SimpleRuntimeStatsSource.java   |   6 +
 .../hadoop/hive/ql/plan/mapper/StatsSource.java |   5 +-
 .../hive/ql/plan/mapper/StatsSources.java   | 122 
 .../hive/ql/reexec/IReExecutionPlugin.java  |   1 +
 .../hadoop/hive/ql/reexec/ReExecDriver.java |  20 ++-
 .../ql/reexec/ReExecutionOverlayPlugin.java |   4 +
 .../hadoop/hive/ql/reexec/ReOptimizePlugin.java |  48 +--
 .../signature/TestOperatorSignature.java|   9 +-
 .../ql/plan/mapping/TestCounterMapping.java |   1 -
 .../queries/clientpositive/runtime_stats_hs2.q  |  22 +++
 .../clientpositive/llap/runtime_stats_hs2.q.out | 141 +++
 20 files changed, 479 insertions(+), 31 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/hive/blob/9f15e22f/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
--
diff --git a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java 
b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
index 73492ff..536c7b4 100644
--- a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
+++ b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
@@ -4263,10 +4263,19 @@ public class HiveConf extends Configuration {
 "comma separated list of plugin can be used:\n"
 + "  overlay: hiveconf subtree 'reexec.overlay' is used as an 
overlay in case of an execution errors out\n"
 + "  reoptimize: collects operator statistics during execution and 
recompile the query after a failure"),
+
HIVE_QUERY_REEXECUTION_STATS_PERSISTENCE("hive.query.reexecution.stats.persist.scope",
 "query",
+new StringSet("query", "hiveserver", "metastore"),
+"Sets the persistence scope of runtime statistics\n"
++ "  query: runtime statistics are only used during re-execution\n"
++ "  hiveserver: runtime statistics are persisted in the 
hiveserver - all sessions share it"),
+
 HIVE_QUERY_MAX_REEXECUTION_COUNT("hive.query.reexecution.max.count", 1,
 "Maximum number of re-executions for a single query."),
 
HIVE_QUERY_REEXECUTION_ALWAYS_COLLECT_OPERATOR_STATS("hive.query.reexecution.always.collect.operator.stats",
 false,
-"Used during testing"),
+"If sessionstats are enabled; this option can be used to collect 
statistics all the time"),
+
HIVE_QUERY_REEXECUTION_STATS_CACHE_SIZE("hive.query.reexecution.stats.cache.size",
 100_000,
+"Size of the runtime statistics cache. Unit is: OperatorStat entry; a 
query plan consist ~100"),
+
 
 HIVE_QUERY_RESULTS_CACHE_ENABLED("hive.query.results.cache.enabled", true,
 "If the query results cache is enabled. This will keep results of 
previously executed queries " +

http://git-wip-us.apache.org/repos/asf/hive/blob/9f15e22f/itests/src/test/resources/testconfiguration.properties
--
diff --git a/itests/src/test/resources/testconfiguration.properties 
b/itests/src/test/resources/testconfiguration.properties
index d26f0cc..4e7c519 100644
--- a/itests/src/test/resources/testconfiguration.properties
+++ b/itests/src/test/resources/testconfiguration.properties
@@ -480,6 +480,7 @@ minillaplocal.query.files=\
   retry_failure.q,\
   retry_failure_stat_changes.q,\
   retry_failure_oom.q,\
+  runtime_stats_hs2.q,\
   bucketsortoptimize_insert_2.q,\
   check_constraint.q,\
   cbo_gby.q,\