[ 
https://issues.apache.org/jira/browse/HIVE-26414?focusedWorklogId=797139&page=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-797139
 ]

ASF GitHub Bot logged work on HIVE-26414:
-----------------------------------------

                Author: ASF GitHub Bot
            Created on: 02/Aug/22 07:25
            Start Date: 02/Aug/22 07:25
    Worklog Time Spent: 10m 
      Work Description: deniskuzZ commented on code in PR #3457:
URL: https://github.com/apache/hive/pull/3457#discussion_r935205909


##########
ql/src/java/org/apache/hadoop/hive/ql/HiveQueryLifeTimeHook.java:
##########
@@ -0,0 +1,100 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.ql;
+
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hive.conf.HiveConf;
+import org.apache.hadoop.hive.metastore.api.CompactionRequest;
+import org.apache.hadoop.hive.metastore.api.CompactionType;
+import org.apache.hadoop.hive.metastore.txn.TxnUtils;
+import org.apache.hadoop.hive.ql.hooks.PrivateHookContext;
+import org.apache.hadoop.hive.ql.hooks.QueryLifeTimeHook;
+import org.apache.hadoop.hive.ql.hooks.QueryLifeTimeHookContext;
+import org.apache.hadoop.hive.ql.metadata.Hive;
+import org.apache.hadoop.hive.ql.metadata.HiveException;
+import org.apache.hadoop.hive.ql.metadata.Table;
+import org.apache.hadoop.hive.ql.plan.FileSinkDesc;
+import org.apache.thrift.TException;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+import java.io.IOException;
+
+import static 
org.apache.hadoop.hive.metastore.api.hive_metastoreConstants.IF_PURGE;
+import static 
org.apache.hadoop.hive.metastore.api.hive_metastoreConstants.META_TABLE_LOCATION;
+
+public class HiveQueryLifeTimeHook implements QueryLifeTimeHook {
+
+  private static final Logger LOG = 
LoggerFactory.getLogger(HiveQueryLifeTimeHook.class);
+
+  @Override
+  public void beforeCompile(QueryLifeTimeHookContext ctx) {
+
+  }
+
+  @Override
+  public void afterCompile(QueryLifeTimeHookContext ctx, boolean hasError) {
+
+  }
+
+  @Override
+  public void beforeExecution(QueryLifeTimeHookContext ctx) {
+
+  }
+
+  @Override
+  public void afterExecution(QueryLifeTimeHookContext ctx, boolean hasError) {
+    if (hasError) {
+      checkAndRollbackCTAS(ctx);
+    }
+  }
+
+  private void checkAndRollbackCTAS(QueryLifeTimeHookContext ctx) {
+    HiveConf conf = ctx.getHiveConf();
+    PrivateHookContext pCtx = (PrivateHookContext) ctx.getHookContext();
+    QueryPlan queryPlan = ctx.getHookContext().getQueryPlan();
+    if (queryPlan.getAcidSinks() != null && queryPlan.getAcidSinks().size() > 
0) {
+      FileSinkDesc fileSinkDesc = queryPlan.getAcidSinks().iterator().next();
+      Table table = fileSinkDesc.getTable();
+      long writeId = fileSinkDesc.getTableWriteId();
+      boolean isCTAS = 
ctx.getHookContext().getQueryPlan().getQueryProperties().isCTAS();
+      Path destinationPath = pCtx.getContext().getLocation();
+
+      if (destinationPath != null && table != null && isCTAS &&
+              HiveConf.getBoolVar(conf, HiveConf.ConfVars.TXN_CTAS_X_LOCK)) {
+        LOG.info("Performing cleanup as part of rollback: {}", 
table.getFullTableName().toString());
+        try {
+          CompactionRequest rqst = new CompactionRequest(table.getDbName(), 
table.getTableName(),
+                  CompactionType.MAJOR);
+          rqst.setRunas(TxnUtils.findUserToRunAs(destinationPath.toString(), 
table.getTTable(), conf));
+          rqst.putToProperties(META_TABLE_LOCATION, 
destinationPath.toString());
+          rqst.putToProperties(IF_PURGE, Boolean.toString(true));
+          boolean success = Hive.get(conf).getMSC().submitForCleanup(rqst, 
writeId,
+                  pCtx.getQueryState().getTxnManager().getCurrentTxnId());
+          if (success) {
+            LOG.info("The cleanup request has been submitted");
+          } else {
+            LOG.info("The cleanup request has not been submitted");
+          }
+        } catch (HiveException | IOException | InterruptedException | 
TException e) {

Review Comment:
   we shouldn't catch InterruptedException





Issue Time Tracking
-------------------

    Worklog Id:     (was: 797139)
    Time Spent: 12h 40m  (was: 12.5h)

> Aborted/Cancelled CTAS operations must initiate cleanup of uncommitted data
> ---------------------------------------------------------------------------
>
>                 Key: HIVE-26414
>                 URL: https://issues.apache.org/jira/browse/HIVE-26414
>             Project: Hive
>          Issue Type: Improvement
>            Reporter: Sourabh Badhya
>            Assignee: Sourabh Badhya
>            Priority: Major
>              Labels: pull-request-available
>          Time Spent: 12h 40m
>  Remaining Estimate: 0h
>
> When a CTAS query fails before creation of table and after writing the data, 
> the data is present in the directory and not cleaned up currently by the 
> cleaner or any other mechanism currently. This is because the cleaner 
> requires a table corresponding to what its cleaning. In order surpass such a 
> situation, we can directly pass the relevant information to the cleaner so 
> that such uncommitted data is deleted.



--
This message was sent by Atlassian Jira
(v8.20.10#820010)

Reply via email to