[ 
https://issues.apache.org/jira/browse/HIVE-27020?focusedWorklogId=857251&page=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-857251
 ]

ASF GitHub Bot logged work on HIVE-27020:
-----------------------------------------

                Author: ASF GitHub Bot
            Created on: 16/Apr/23 18:13
            Start Date: 16/Apr/23 18:13
    Worklog Time Spent: 10m 
      Work Description: SourabhBadhya commented on code in PR #4091:
URL: https://github.com/apache/hive/pull/4091#discussion_r1167991398


##########
ql/src/java/org/apache/hadoop/hive/ql/txn/compactor/handler/CompactionCleaner.java:
##########
@@ -259,49 +247,11 @@ private void cleanUsingAcidDir(CompactionInfo ci, String 
location, long minOpenT
      */
 
     // Creating 'reader' list since we are interested in the set of 'obsolete' 
files
-    ValidReaderWriteIdList validWriteIdList = getValidCleanerWriteIdList(ci, 
validTxnList);
-    LOG.debug("Cleaning based on writeIdList: {}", validWriteIdList);
-
-    Path path = new Path(location);
-    FileSystem fs = path.getFileSystem(conf);
-
-    // Collect all the files/dirs
-    Map<Path, AcidUtils.HdfsDirSnapshot> dirSnapshots = 
AcidUtils.getHdfsDirSnapshotsForCleaner(fs, path);
-    AcidDirectory dir = AcidUtils.getAcidState(fs, path, conf, 
validWriteIdList, Ref.from(false), false,
-            dirSnapshots);
+    ValidReaderWriteIdList validWriteIdList = 
getValidCleanerWriteIdListForCompactionCleaner(ci, validTxnList);
     Table table = metadataCache.computeIfAbsent(ci.getFullTableName(), () -> 
resolveTable(ci.dbname, ci.tableName));
-    boolean isDynPartAbort = CompactorUtil.isDynPartAbort(table, ci.partName);
-
-    List<Path> obsoleteDirs = CompactorUtil.getObsoleteDirs(dir, 
isDynPartAbort);
-    if (isDynPartAbort || dir.hasUncompactedAborts()) {
-      ci.setWriteIds(dir.hasUncompactedAborts(), dir.getAbortedWriteIds());
-    }
-
-    List<Path> deleted = fsRemover.clean(new 
CleanupRequestBuilder().setLocation(location)
-            
.setDbName(ci.dbname).setFullPartitionName(ci.getFullPartitionName())
-            .setRunAs(ci.runAs).setObsoleteDirs(obsoleteDirs).setPurge(true)
-            .build());
-
-    if (!deleted.isEmpty()) {
-      AcidMetricService.updateMetricsFromCleaner(ci.dbname, ci.tableName, 
ci.partName, dir.getObsolete(), conf,
-              txnHandler);
-    }
-
-    // Make sure there are no leftovers below the compacted watermark
-    boolean success = false;
-    conf.set(ValidTxnList.VALID_TXNS_KEY, new ValidReadTxnList().toString());
-    dir = AcidUtils.getAcidState(fs, path, conf, new ValidReaderWriteIdList(
-                    ci.getFullTableName(), new long[0], new BitSet(), 
ci.highestWriteId, Long.MAX_VALUE),
-            Ref.from(false), false, dirSnapshots);
+    LOG.debug("Cleaning based on writeIdList: {}", validWriteIdList);
 
-    List<Path> remained = subtract(CompactorUtil.getObsoleteDirs(dir, 
isDynPartAbort), deleted);
-    if (!remained.isEmpty()) {
-      LOG.warn("{} Remained {} obsolete directories from {}. {}",
-              idWatermark(ci), remained.size(), location, 
CompactorUtil.getDebugInfo(remained));
-    } else {
-      LOG.debug("{} All cleared below the watermark: {} from {}", 
idWatermark(ci), ci.highestWriteId, location);
-      success = true;
-    }
+    boolean success = cleanAndVerifyObsoleteDirectories(ci, location, 
validWriteIdList, table);

Review Comment:
   This is part of the cleanup task and will be removed when the config is 
removed.



##########
ql/src/java/org/apache/hadoop/hive/ql/txn/compactor/handler/TaskHandlerFactory.java:
##########
@@ -43,7 +44,14 @@ private TaskHandlerFactory() {
 
   public List<TaskHandler> getHandlers(HiveConf conf, TxnStore txnHandler, 
MetadataCache metadataCache,
                                                   boolean metricsEnabled, 
FSRemover fsRemover) {
-    return Arrays.asList(new CompactionCleaner(conf, txnHandler, metadataCache,
+    boolean useAbortHandler = MetastoreConf.getBoolVar(conf, 
MetastoreConf.ConfVars.COMPACTOR_CLEAN_ABORTS_USING_CLEANER);
+    List<TaskHandler> taskHandlers = new ArrayList<>();
+    if (useAbortHandler) {

Review Comment:
   This is part of the cleanup task when the config will be removed. Until then 
this is supposed to be here.



##########
standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/txn/AcidTxnInfo.java:
##########
@@ -0,0 +1,88 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hive.metastore.txn;
+
+import org.apache.commons.lang3.builder.ToStringBuilder;
+import org.apache.hadoop.hive.common.ValidCompactorWriteIdList;
+import org.apache.hadoop.hive.metastore.api.TableValidWriteIds;
+
+import java.util.Set;
+
+/**
+ * A class used for encapsulating information of abort-cleanup activities and 
compaction activities.
+ */
+public class AcidTxnInfo {

Review Comment:
   Removed. Done





Issue Time Tracking
-------------------

    Worklog Id:     (was: 857251)
    Time Spent: 12h 40m  (was: 12.5h)

> Implement a separate handler to handle aborted transaction cleanup
> ------------------------------------------------------------------
>
>                 Key: HIVE-27020
>                 URL: https://issues.apache.org/jira/browse/HIVE-27020
>             Project: Hive
>          Issue Type: Sub-task
>            Reporter: Sourabh Badhya
>            Assignee: Sourabh Badhya
>            Priority: Major
>              Labels: pull-request-available
>          Time Spent: 12h 40m
>  Remaining Estimate: 0h
>
> As described in the parent task, once the cleaner is separated into different 
> entities, implement a separate handler which can create requests for aborted 
> transactions cleanup. This would move the aborted transaction cleanup 
> exclusively to the cleaner.



--
This message was sent by Atlassian Jira
(v8.20.10#820010)

Reply via email to