[ 
https://issues.apache.org/jira/browse/HIVE-25429?focusedWorklogId=636845&page=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-636845
 ]

ASF GitHub Bot logged work on HIVE-25429:
-----------------------------------------

                Author: ASF GitHub Bot
            Created on: 11/Aug/21 10:12
            Start Date: 11/Aug/21 10:12
    Worklog Time Spent: 10m 
      Work Description: klcopp commented on a change in pull request #2563:
URL: https://github.com/apache/hive/pull/2563#discussion_r686691938



##########
File path: 
ql/src/java/org/apache/hadoop/hive/ql/txn/compactor/metrics/DeltaFilesMetricReporter.java
##########
@@ -136,67 +133,77 @@ public static synchronized void init(HiveConf conf) 
throws Exception {
 
   private void configure(HiveConf conf) throws Exception {
     acidMetricsExtEnabled = MetastoreConf.getBoolVar(conf, 
MetastoreConf.ConfVars.METASTORE_ACIDMETRICS_EXT_ON);
+    if (acidMetricsExtEnabled) {
 
-    deltasThreshold = HiveConf.getIntVar(conf, 
HiveConf.ConfVars.HIVE_TXN_ACID_METRICS_DELTA_NUM_THRESHOLD);
-    obsoleteDeltasThreshold = HiveConf.getIntVar(conf, 
HiveConf.ConfVars.HIVE_TXN_ACID_METRICS_OBSOLETE_DELTA_NUM_THRESHOLD);
-
-    initCachesForMetrics(conf);
-    initObjectsForMetrics();
+      initCachesForMetrics(conf);
+      initObjectsForMetrics();
 
-    long reportingInterval = HiveConf.getTimeVar(conf,
-      HiveConf.ConfVars.HIVE_TXN_ACID_METRICS_REPORTING_INTERVAL, 
TimeUnit.SECONDS);
+      long reportingInterval =
+          HiveConf.getTimeVar(conf, 
HiveConf.ConfVars.HIVE_TXN_ACID_METRICS_REPORTING_INTERVAL, TimeUnit.SECONDS);
 
-    ThreadFactory threadFactory =
-      new ThreadFactoryBuilder()
-        .setDaemon(true)
-        .setNameFormat("DeltaFilesMetricReporter %d")
-        .build();
-    executorService = 
Executors.newSingleThreadScheduledExecutor(threadFactory);
-    executorService.scheduleAtFixedRate(
-      new ReportingTask(), 0, reportingInterval, TimeUnit.SECONDS);
+      ThreadFactory threadFactory =
+          new 
ThreadFactoryBuilder().setDaemon(true).setNameFormat("DeltaFilesMetricReporter 
%d").build();
+      executorService = 
Executors.newSingleThreadScheduledExecutor(threadFactory);
+      executorService.scheduleAtFixedRate(new ReportingTask(), 0, 
reportingInterval, TimeUnit.SECONDS);
 
-    LOG.info("Started DeltaFilesMetricReporter thread");
+      LOG.info("Started DeltaFilesMetricReporter thread");
+    }
   }
 
   public void submit(TezCounters counters) {
     if (acidMetricsExtEnabled) {
-      updateMetrics(NUM_OBSOLETE_DELTAS,
-        obsoleteDeltaCache, obsoleteDeltaTopN, obsoleteDeltasThreshold,
-        counters);
-      updateMetrics(NUM_DELTAS,
-        deltaCache, deltaTopN, deltasThreshold,
-        counters);
-      updateMetrics(NUM_SMALL_DELTAS,
-        smallDeltaCache, smallDeltaTopN, deltasThreshold,
-        counters);
+      updateMetrics(NUM_OBSOLETE_DELTAS, obsoleteDeltaCache, 
obsoleteDeltaTopN, counters);
+      updateMetrics(NUM_DELTAS, deltaCache, deltaTopN, counters);
+      updateMetrics(NUM_SMALL_DELTAS, smallDeltaCache, smallDeltaTopN, 
counters);
     }
   }
 
+  /**
+   * Copy counters to caches.
+   */
   private void updateMetrics(DeltaFilesMetricType metric, Cache<String, 
Integer> cache, Queue<Pair<String, Integer>> topN,
-        int threshold, TezCounters counters) {
-    counters.getGroup(metric.value).forEach(counter -> {
-      Integer prev = cache.getIfPresent(counter.getName());
-      if (prev != null && prev != counter.getValue()) {
-        cache.invalidate(counter.getName());
+      TezCounters counters) {
+    try {
+      CounterGroup group = counters.getGroup(metric.value);
+      // if the group is empty, clear the cache
+      if (group.size() == 0) {
+        cache.invalidateAll();
+      } else {
+        // if there is no counter corresponding to a cache entry, remove from 
cache
+        ConcurrentMap<String, Integer> cacheMap = cache.asMap();

Review comment:
       As discussed offline, we will also collect input ReadEntities and update 
metrics based on those




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: gitbox-unsubscr...@hive.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


Issue Time Tracking
-------------------

    Worklog Id:     (was: 636845)
    Time Spent: 40m  (was: 0.5h)

> Delta metrics collection may cause number of tez counters to exceed 
> tez.counters.max limit
> ------------------------------------------------------------------------------------------
>
>                 Key: HIVE-25429
>                 URL: https://issues.apache.org/jira/browse/HIVE-25429
>             Project: Hive
>          Issue Type: Sub-task
>          Components: Hive
>    Affects Versions: 4.0.0
>            Reporter: Karen Coppage
>            Assignee: Karen Coppage
>            Priority: Major
>              Labels: pull-request-available
>          Time Spent: 40m
>  Remaining Estimate: 0h
>
> There's a limit to the number of tez counters allowed (tez.counters.max). 
> Delta metrics collection (i.e. DeltaFileMetricsReporter) was creating 3 
> counters for each partition touched by a given query, which can result in a 
> huge number of counters, which is unnecessary because we're only interested 
> in n number of partitions with the most deltas. This change limits the number 
> of counters created to hive.txn.acid.metrics.max.cache.size*3.
> Also when tez.counters.max is reached a LimitExceededException is thrown but 
> isn't caught on the Hive side and causes the query to fail. We should catch 
> this and skip delta metrics collection in this case.
> Also make sure that metrics are only collected if 
> hive.metastore.acidmetrics.ext.on=true



--
This message was sent by Atlassian Jira
(v8.3.4#803005)

Reply via email to