[jira] [Work logged] (HIVE-25429) Delta metrics collection may cause number of tez counters to exceed tez.counters.max limit

ASF GitHub Bot (Jira) Tue, 24 Aug 2021 05:03:04 -0700


     [ 
https://issues.apache.org/jira/browse/HIVE-25429?focusedWorklogId=641044&page=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-641044
 ]


ASF GitHub Bot logged work on HIVE-25429:
-----------------------------------------

                Author: ASF GitHub Bot
            Created on: 24/Aug/21 12:02
            Start Date: 24/Aug/21 12:02
    Worklog Time Spent: 10m 
      Work Description: klcopp commented on a change in pull request #2563:
URL: https://github.com/apache/hive/pull/2563#discussion_r694770338



##########
File path: 
itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/txn/compactor/TestCompactionMetricsOnTez.java
##########
@@ -17,42 +17,59 @@
  */
 package org.apache.hadoop.hive.ql.txn.compactor;
 
-import com.codahale.metrics.Gauge;
 import org.apache.commons.lang3.RandomStringUtils;
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.hive.common.metrics.common.MetricsFactory;
-import org.apache.hadoop.hive.common.metrics.metrics2.CodahaleMetrics;
 import org.apache.hadoop.hive.conf.HiveConf;
 import org.apache.hadoop.hive.metastore.api.CompactionType;
+import org.apache.hadoop.hive.metastore.conf.MetastoreConf;
 import org.apache.hadoop.hive.metastore.metrics.MetricsConstants;
 import 
org.apache.hadoop.hive.ql.txn.compactor.metrics.DeltaFilesMetricReporter;
+import org.apache.tez.dag.api.TezConfiguration;
+import org.junit.After;
 import org.junit.Assert;
 import org.junit.Test;
 
 import java.text.MessageFormat;
 import java.util.HashMap;
-import java.util.Map;
 import java.util.concurrent.TimeUnit;
 
-import static 
org.apache.hadoop.hive.ql.txn.compactor.TestCompactionMetrics.equivalent;
-import static 
org.apache.hadoop.hive.ql.txn.compactor.TestCompactionMetrics.gaugeToMap;
+import static 
org.apache.hadoop.hive.ql.txn.compactor.TestDeltaFilesMetrics.gaugeToMap;
+import static 
org.apache.hadoop.hive.ql.txn.compactor.TestDeltaFilesMetrics.verifyMetricsMatch;
 import static 
org.apache.hadoop.hive.ql.txn.compactor.TestCompactor.executeStatementOnDriver;
 
 public class TestCompactionMetricsOnTez extends CompactorOnTezTest {
 
-  @Test
-  public void testDeltaFilesMetric() throws Exception {
-    MetricsFactory.close();
-    HiveConf conf = driver.getConf();
+  /**
+   * Use {@link 
CompactorOnTezTest#setupWithConf(org.apache.hadoop.hive.conf.HiveConf)} when 
HiveConf is
+   * configured to your liking.
+   */
+  @Override
+  public void setup() {
+  }
 
-    HiveConf.setBoolVar(conf, HiveConf.ConfVars.HIVE_SERVER2_METRICS_ENABLED, 
true);
-    MetricsFactory.init(conf);
+  @After
+  public void tearDown() {
+    DeltaFilesMetricReporter.close();
+  }
 
+  private void configureMetrics(HiveConf conf) {
     HiveConf.setIntVar(conf, 
HiveConf.ConfVars.HIVE_TXN_ACID_METRICS_OBSOLETE_DELTA_NUM_THRESHOLD, 0);
     HiveConf.setIntVar(conf, 
HiveConf.ConfVars.HIVE_TXN_ACID_METRICS_DELTA_NUM_THRESHOLD, 0);
     HiveConf.setTimeVar(conf, 
HiveConf.ConfVars.HIVE_TXN_ACID_METRICS_REPORTING_INTERVAL, 1, 
TimeUnit.SECONDS);
     HiveConf.setTimeVar(conf, 
HiveConf.ConfVars.HIVE_TXN_ACID_METRICS_DELTA_CHECK_THRESHOLD, 0, 
TimeUnit.SECONDS);
     HiveConf.setFloatVar(conf, 
HiveConf.ConfVars.HIVE_TXN_ACID_METRICS_DELTA_PCT_THRESHOLD, 0.7f);
+  }
+
+  @Test
+  public void testDeltaFilesMetric() throws Exception {
+    HiveConf conf = new HiveConf();
+    HiveConf.setBoolVar(conf, HiveConf.ConfVars.HIVE_SERVER2_METRICS_ENABLED, 
true);
+    configureMetrics(conf);
+    setupWithConf(conf);

Review comment:
       We need to be able to set different (conflicting) configs before 
setupWithConf is called. setup() would need to be parametrized. Do you know how 
to do that?

##########
File path: 
itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/txn/compactor/TestCompactionMetricsOnTez.java
##########
@@ -105,12 +118,100 @@ public void testDeltaFilesMetric() throws Exception {
     executeStatementOnDriver("select avg(b) from " + tableName, driver);
     Thread.sleep(1000);
 
-    Assert.assertTrue(
-      equivalent(
-        new HashMap<String, String>() {{
+    verifyMetricsMatch(new HashMap<String, String>() {{
           put(tableName + Path.SEPARATOR + partitionToday, "1");
-        }}, gaugeToMap(MetricsConstants.COMPACTION_NUM_SMALL_DELTAS)));
+        }}, gaugeToMap(MetricsConstants.COMPACTION_NUM_SMALL_DELTAS));
+  }
 
-    DeltaFilesMetricReporter.close();
+  /**
+   * Queries shouldn't fail, but metrics should be 0, if tez.counters.max 
limit is passed.
+   * @throws Exception
+   */
+  @Test
+  public void testDeltaFilesMetricTezMaxCounters() throws Exception {
+    HiveConf conf = new HiveConf();
+    conf.setInt(TezConfiguration.TEZ_COUNTERS_MAX, 50);
+    HiveConf.setBoolVar(conf, HiveConf.ConfVars.HIVE_SERVER2_METRICS_ENABLED, 
true);
+    configureMetrics(conf);
+    setupWithConf(conf);
+
+    MetricsFactory.close();
+    MetricsFactory.init(conf);
+    DeltaFilesMetricReporter.init(conf);
+
+    String tableName = "test_metrics";
+    CompactorOnTezTest.TestDataProvider testDataProvider = new 
CompactorOnTezTest.TestDataProvider();
+    testDataProvider.createFullAcidTable(tableName, true, false);
+    // Create 51 partitions
+    for (int i = 0; i < 51; i++) {
+      executeStatementOnDriver("insert into " + tableName + " values('1', " + 
i * i + ", '" + i + "')", driver);
+    }
+
+    // Touch all partitions
+    executeStatementOnDriver("select avg(b) from " + tableName, driver);
+    Thread.sleep(1000);
+
+    Assert.assertEquals(0, 
gaugeToMap(MetricsConstants.COMPACTION_NUM_DELTAS).size());
+    Assert.assertEquals(0, 
gaugeToMap(MetricsConstants.COMPACTION_NUM_OBSOLETE_DELTAS).size());
+    Assert.assertEquals(0, 
gaugeToMap(MetricsConstants.COMPACTION_NUM_SMALL_DELTAS).size());
+  }
+
+  /**
+   * Queries should succeed if additional acid metrics are disabled.
+   * @throws Exception
+   */
+  @Test
+  public void testDeltaFilesMetricWithMetricsDisabled() throws Exception {
+    HiveConf conf = new HiveConf();
+    HiveConf.setBoolVar(conf, HiveConf.ConfVars.HIVE_SERVER2_METRICS_ENABLED, 
false);
+    MetastoreConf.setBoolVar(conf, 
MetastoreConf.ConfVars.METASTORE_ACIDMETRICS_EXT_ON, true);
+    configureMetrics(conf);
+    super.setupWithConf(conf);
+
+    MetricsFactory.close();
+    MetricsFactory.init(conf);
+
+    String tableName = "test_metrics";
+    CompactorOnTezTest.TestDataProvider testDataProvider = new 
CompactorOnTezTest.TestDataProvider();
+    testDataProvider.createFullAcidTable(tableName, true, false);
+    testDataProvider.insertTestDataPartitioned(tableName);
+
+    executeStatementOnDriver("select avg(b) from " + tableName, driver);
+
+    try {
+      Assert.assertEquals(0, 
gaugeToMap(MetricsConstants.COMPACTION_NUM_DELTAS).size());

Review comment:
       Sure

##########
File path: 
ql/src/java/org/apache/hadoop/hive/ql/exec/tez/HiveSplitGenerator.java
##########
@@ -272,30 +272,34 @@ private void prepare(InputInitializerContext 
initializerContext) throws IOExcept
         String groupName = null;
         String vertexName = null;
         if (inputInitializerContext != null) {
-          tezCounters = new TezCounters();
-          groupName = HiveInputCounters.class.getName();
-          vertexName = jobConf.get(Operator.CONTEXT_NAME_KEY, "");
-          counterName = 
Utilities.getVertexCounterName(HiveInputCounters.RAW_INPUT_SPLITS.name(), 
vertexName);
-          tezCounters.findCounter(groupName, 
counterName).increment(splits.length);
-          final List<Path> paths = Utilities.getInputPathsTez(jobConf, work);
-          counterName = 
Utilities.getVertexCounterName(HiveInputCounters.INPUT_DIRECTORIES.name(), 
vertexName);
-          tezCounters.findCounter(groupName, 
counterName).increment(paths.size());
-          final Set<String> files = new HashSet<>();
-          for (InputSplit inputSplit : splits) {
-            if (inputSplit instanceof FileSplit) {
-              final FileSplit fileSplit = (FileSplit) inputSplit;
-              final Path path = fileSplit.getPath();
-              // The assumption here is the path is a file. Only case this is 
different is ACID deltas.
-              // The isFile check is avoided here for performance reasons.
-              final String fileStr = path.toString();
-              if (!files.contains(fileStr)) {
-                files.add(fileStr);
+          try {
+            tezCounters = new TezCounters();
+            groupName = HiveInputCounters.class.getName();
+            vertexName = jobConf.get(Operator.CONTEXT_NAME_KEY, "");
+            counterName = 
Utilities.getVertexCounterName(HiveInputCounters.RAW_INPUT_SPLITS.name(), 
vertexName);
+            tezCounters.findCounter(groupName, 
counterName).increment(splits.length);
+            final List<Path> paths = Utilities.getInputPathsTez(jobConf, work);
+            counterName = 
Utilities.getVertexCounterName(HiveInputCounters.INPUT_DIRECTORIES.name(), 
vertexName);
+            tezCounters.findCounter(groupName, 
counterName).increment(paths.size());
+            final Set<String> files = new HashSet<>();
+            for (InputSplit inputSplit : splits) {
+              if (inputSplit instanceof FileSplit) {
+                final FileSplit fileSplit = (FileSplit) inputSplit;
+                final Path path = fileSplit.getPath();
+                // The assumption here is the path is a file. Only case this 
is different is ACID deltas.
+                // The isFile check is avoided here for performance reasons.
+                final String fileStr = path.toString();
+                if (!files.contains(fileStr)) {
+                  files.add(fileStr);
+                }
               }
             }
+            counterName = 
Utilities.getVertexCounterName(HiveInputCounters.INPUT_FILES.name(), 
vertexName);
+            tezCounters.findCounter(groupName, 
counterName).increment(files.size());
+            DeltaFilesMetricReporter.createCountersForAcidMetrics(tezCounters, 
jobConf);
+          } catch (Exception e) {

Review comment:
       Basically, the idea here is that we don't want the query to fail just 
because the tez counters aren't updated




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: gitbox-unsubscr...@hive.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


Issue Time Tracking
-------------------

    Worklog Id:     (was: 641044)
    Time Spent: 1h 20m  (was: 1h 10m)

> Delta metrics collection may cause number of tez counters to exceed 
> tez.counters.max limit
> ------------------------------------------------------------------------------------------
>
>                 Key: HIVE-25429
>                 URL: https://issues.apache.org/jira/browse/HIVE-25429
>             Project: Hive
>          Issue Type: Sub-task
>          Components: Hive
>    Affects Versions: 4.0.0
>            Reporter: Karen Coppage
>            Assignee: Karen Coppage
>            Priority: Major
>              Labels: pull-request-available
>          Time Spent: 1h 20m
>  Remaining Estimate: 0h
>
> There's a limit to the number of tez counters allowed (tez.counters.max). 
> Delta metrics collection (i.e. DeltaFileMetricsReporter) was creating 3 
> counters for each partition touched by a given query, which can result in a 
> huge number of counters, which is unnecessary because we're only interested 
> in n number of partitions with the most deltas. This change limits the number 
> of counters created to hive.txn.acid.metrics.max.cache.size*3.
> Also when tez.counters.max is reached a LimitExceededException is thrown but 
> isn't caught on the Hive side and causes the query to fail. We should catch 
> this and skip delta metrics collection in this case.
> Also make sure that metrics are only collected if 
> hive.metastore.acidmetrics.ext.on=true



--
This message was sent by Atlassian Jira
(v8.3.4#803005)

[jira] [Work logged] (HIVE-25429) Delta metrics collection may cause number of tez counters to exceed tez.counters.max limit

Reply via email to