klcopp commented on a change in pull request #3034:
URL: https://github.com/apache/hive/pull/3034#discussion_r819678060



##########
File path: ql/src/java/org/apache/hadoop/hive/ql/txn/compactor/Cleaner.java
##########
@@ -288,14 +285,30 @@ private void clean(CompactionInfo ci, long minOpenTxnGLB, 
boolean metricsEnabled
       if (metricsEnabled) {
         
Metrics.getOrCreateCounter(MetricsConstants.COMPACTION_CLEANER_FAILURE_COUNTER).inc();
       }
-      txnHandler.markFailed(ci);
-    } finally {
+      handleCleanerAttemptFailure(ci);
+    }  finally {
       if (metricsEnabled) {
         perfLogger.perfLogEnd(CLASS_NAME, cleanerMetric);
       }
     }
   }
 
+  private void handleCleanerAttemptFailure(CompactionInfo ci) throws 
MetaException {
+    long defaultRetention = getTimeVar(conf, 
HIVE_COMPACTOR_CLEANER_RETRY_RETENTION_TIME, TimeUnit.MILLISECONDS);
+    int cleanAttempts = 0;

Review comment:
       Shouldn't cleanAttempts be initialized to 1?
   Because HIVE_COMPACTOR_CLEANER_MAX_RETRY_ATTEMPTS >= 1 because of its 
RangeValidator? (Speaking of, it might be good to increase the range to (0, 10) 
as a kind of feature flag, but I'll leave that up to you)

##########
File path: ql/src/java/org/apache/hadoop/hive/ql/txn/compactor/Cleaner.java
##########
@@ -288,14 +285,30 @@ private void clean(CompactionInfo ci, long minOpenTxnGLB, 
boolean metricsEnabled
       if (metricsEnabled) {
         
Metrics.getOrCreateCounter(MetricsConstants.COMPACTION_CLEANER_FAILURE_COUNTER).inc();
       }
-      txnHandler.markFailed(ci);
-    } finally {
+      handleCleanerAttemptFailure(ci);
+    }  finally {
       if (metricsEnabled) {
         perfLogger.perfLogEnd(CLASS_NAME, cleanerMetric);
       }
     }
   }
 
+  private void handleCleanerAttemptFailure(CompactionInfo ci) throws 
MetaException {
+    long defaultRetention = getTimeVar(conf, 
HIVE_COMPACTOR_CLEANER_RETRY_RETENTION_TIME, TimeUnit.MILLISECONDS);
+    int cleanAttempts = 0;
+    if (ci.retryRetention > 0) {
+      cleanAttempts = (int)(Math.log(ci.retryRetention / defaultRetention) / 
Math.log(2)) + 1;
+    }
+    if (cleanAttempts >= getIntVar(conf, 
HIVE_COMPACTOR_CLEANER_MAX_RETRY_ATTEMPTS)) {
+      //Mark it as failed if the max attempt threshold is reached.
+      txnHandler.markFailed(ci);
+    } else {
+      //Calculate retry retention time and update record.
+      ci.retryRetention = (long)Math.pow(2, cleanAttempts) * defaultRetention;

Review comment:
       So assuming HIVE_COMPACTOR_CLEANER_MAX_RETRY_ATTEMPTS==5m,  we try at 
CQ_COMMIT_TIME + 5m then CQ_COMMIT_TIME + 5^2 minutes then CQ_COMMIT_TIME + 5^3 
minutes?

##########
File path: 
standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/txn/CompactionTxnHandler.java
##########
@@ -353,11 +353,9 @@ public void markCompacted(CompactionInfo info) throws 
MetaException {
         if (minOpenTxnWaterMark > 0) {
           whereClause += " AND (\"CQ_NEXT_TXN_ID\" <= " + minOpenTxnWaterMark 
+ " OR \"CQ_NEXT_TXN_ID\" IS NULL)";
         }
-        if (retentionTime > 0) {
-          whereClause += " AND \"CQ_COMMIT_TIME\" < (" + getEpochFn(dbProduct) 
+ " - " + retentionTime + ")";
-        }
+        whereClause += " AND (\"CQ_COMMIT_TIME\" < (" + getEpochFn(dbProduct) 
+ " - CQ_RETRY_RETENTION - " + retentionTime + ") OR \"CQ_COMMIT_TIME\" IS 
NULL)";

Review comment:
       It would probably be best to fix the `if (retentionTime > 0)` removal in 
a separate ticket since it fixes an unrelated bug

##########
File path: 
standalone-metastore/metastore-server/src/main/sql/derby/hive-schema-4.0.0.derby.sql
##########
@@ -629,7 +629,8 @@ CREATE TABLE COMPACTION_QUEUE (
   CQ_INITIATOR_ID varchar(128),
   CQ_INITIATOR_VERSION varchar(128),
   CQ_WORKER_VERSION varchar(128),
-  CQ_CLEANER_START bigint
+  CQ_CLEANER_START bigint,
+  CQ_RETRY_RETENTION integer NOT NULL DEFAULT 0

Review comment:
       I'm a little iffy about declaring this as an integer vs. a bigint, since 
we store milliseconds and this value could be 2^10 * 
hive.compactor.cleaner.retry.retentionTime which has no upper limit

##########
File path: ql/src/java/org/apache/hadoop/hive/ql/txn/compactor/Cleaner.java
##########
@@ -288,14 +285,30 @@ private void clean(CompactionInfo ci, long minOpenTxnGLB, 
boolean metricsEnabled
       if (metricsEnabled) {
         
Metrics.getOrCreateCounter(MetricsConstants.COMPACTION_CLEANER_FAILURE_COUNTER).inc();
       }
-      txnHandler.markFailed(ci);
-    } finally {
+      handleCleanerAttemptFailure(ci);
+    }  finally {
       if (metricsEnabled) {
         perfLogger.perfLogEnd(CLASS_NAME, cleanerMetric);
       }
     }
   }
 
+  private void handleCleanerAttemptFailure(CompactionInfo ci) throws 
MetaException {
+    long defaultRetention = getTimeVar(conf, 
HIVE_COMPACTOR_CLEANER_RETRY_RETENTION_TIME, TimeUnit.MILLISECONDS);
+    int cleanAttempts = 0;
+    if (ci.retryRetention > 0) {
+      cleanAttempts = (int)(Math.log(ci.retryRetention / defaultRetention) / 
Math.log(2)) + 1;
+    }
+    if (cleanAttempts >= getIntVar(conf, 
HIVE_COMPACTOR_CLEANER_MAX_RETRY_ATTEMPTS)) {
+      //Mark it as failed if the max attempt threshold is reached.
+      txnHandler.markFailed(ci);
+    } else {
+      //Calculate retry retention time and update record.
+      ci.retryRetention = (long)Math.pow(2, cleanAttempts) * defaultRetention;

Review comment:
       I'm okay with this, I just can't remember if the original intention was 
this or e.g. the 3rd attempt should be at CQ_COMMIT_TIME + 5 + 5^2 etc.

##########
File path: 
standalone-metastore/metastore-common/src/main/java/org/apache/hadoop/hive/metastore/conf/MetastoreConf.java
##########
@@ -613,6 +613,13 @@ public static ConfVars getMetaConf(String name) {
         "metastore.compactor.enable.stats.compression",
         "metastore.compactor.enable.stats.compression", true,
         "Can be used to disable compression and ORC indexes for files produced 
by minor compaction."),
+    
HIVE_COMPACTOR_CLEANER_MAX_RETRY_ATTEMPTS("hive.compactor.cleaner.retry.maxattempts",
+            "hive.compactor.cleaner.retry.maxattempts", 5, new 
RangeValidator(1, 10),
+            "Maximum number of attempts to clean a table again after a failed 
cycle. Must be between 1 and 10."),
+    
HIVE_COMPACTOR_CLEANER_RETRY_RETENTION_TIME("hive.compactor.cleaner.retry.retentionTime",
+            "hive.compactor.cleaner.retry.retentionTime", 300, 
TimeUnit.SECONDS, new TimeValidator(TimeUnit.SECONDS),

Review comment:
       cosmetic: retention.time instead of retentionTime fits the pattern 
better. Also I have no idea how case insensitive hive configs are




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]



---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to