HIVE-13376 : HoS emits too many logs with application state (Szehon, via Rui Li and Xuefu)
Project: http://git-wip-us.apache.org/repos/asf/hive/repo Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/9a0dabdf Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/9a0dabdf Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/9a0dabdf Branch: refs/heads/llap Commit: 9a0dabdf439e11cccf9aa02e5356ab21617e1f6e Parents: ac273b6 Author: Szehon Ho <sze...@cloudera.com> Authored: Fri Apr 1 11:47:52 2016 -0700 Committer: Szehon Ho <sze...@cloudera.com> Committed: Fri Apr 1 11:49:09 2016 -0700 ---------------------------------------------------------------------- .../hadoop/hive/ql/exec/spark/HiveSparkClientFactory.java | 9 +++++++++ 1 file changed, 9 insertions(+) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/hive/blob/9a0dabdf/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/HiveSparkClientFactory.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/HiveSparkClientFactory.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/HiveSparkClientFactory.java index 2427321..b36c60e 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/HiveSparkClientFactory.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/HiveSparkClientFactory.java @@ -53,6 +53,7 @@ public class HiveSparkClientFactory { private static final String SPARK_DEFAULT_APP_NAME = "Hive on Spark"; private static final String SPARK_DEFAULT_SERIALIZER = "org.apache.spark.serializer.KryoSerializer"; private static final String SPARK_DEFAULT_REFERENCE_TRACKING = "false"; + private static final String SPARK_YARN_REPORT_INTERVAL = "spark.yarn.report.interval"; public static HiveSparkClient createHiveSparkClient(HiveConf hiveconf) throws Exception { Map<String, String> sparkConf = initiateSparkConf(hiveconf); @@ -187,6 +188,14 @@ public class HiveSparkClientFactory { } } + //The application reports tend to spam the hive logs. This is controlled by spark, and the default seems to be 1s. + //If it is not specified, set it to a much higher number. It can always be overriden by user. + String sparkYarnReportInterval = sparkConf.get(SPARK_YARN_REPORT_INTERVAL); + if (sparkMaster.startsWith("yarn") && sparkYarnReportInterval == null) { + //the new version of spark also takes time-units, but old versions do not. + sparkConf.put(SPARK_YARN_REPORT_INTERVAL, "60000"); + } + return sparkConf; }