[ https://issues.apache.org/jira/browse/HIVE-26414?focusedWorklogId=797139&page=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-797139 ]
ASF GitHub Bot logged work on HIVE-26414: ----------------------------------------- Author: ASF GitHub Bot Created on: 02/Aug/22 07:25 Start Date: 02/Aug/22 07:25 Worklog Time Spent: 10m Work Description: deniskuzZ commented on code in PR #3457: URL: https://github.com/apache/hive/pull/3457#discussion_r935205909 ########## ql/src/java/org/apache/hadoop/hive/ql/HiveQueryLifeTimeHook.java: ########## @@ -0,0 +1,100 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql; + +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.hive.conf.HiveConf; +import org.apache.hadoop.hive.metastore.api.CompactionRequest; +import org.apache.hadoop.hive.metastore.api.CompactionType; +import org.apache.hadoop.hive.metastore.txn.TxnUtils; +import org.apache.hadoop.hive.ql.hooks.PrivateHookContext; +import org.apache.hadoop.hive.ql.hooks.QueryLifeTimeHook; +import org.apache.hadoop.hive.ql.hooks.QueryLifeTimeHookContext; +import org.apache.hadoop.hive.ql.metadata.Hive; +import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.ql.metadata.Table; +import org.apache.hadoop.hive.ql.plan.FileSinkDesc; +import org.apache.thrift.TException; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; +import java.io.IOException; + +import static org.apache.hadoop.hive.metastore.api.hive_metastoreConstants.IF_PURGE; +import static org.apache.hadoop.hive.metastore.api.hive_metastoreConstants.META_TABLE_LOCATION; + +public class HiveQueryLifeTimeHook implements QueryLifeTimeHook { + + private static final Logger LOG = LoggerFactory.getLogger(HiveQueryLifeTimeHook.class); + + @Override + public void beforeCompile(QueryLifeTimeHookContext ctx) { + + } + + @Override + public void afterCompile(QueryLifeTimeHookContext ctx, boolean hasError) { + + } + + @Override + public void beforeExecution(QueryLifeTimeHookContext ctx) { + + } + + @Override + public void afterExecution(QueryLifeTimeHookContext ctx, boolean hasError) { + if (hasError) { + checkAndRollbackCTAS(ctx); + } + } + + private void checkAndRollbackCTAS(QueryLifeTimeHookContext ctx) { + HiveConf conf = ctx.getHiveConf(); + PrivateHookContext pCtx = (PrivateHookContext) ctx.getHookContext(); + QueryPlan queryPlan = ctx.getHookContext().getQueryPlan(); + if (queryPlan.getAcidSinks() != null && queryPlan.getAcidSinks().size() > 0) { + FileSinkDesc fileSinkDesc = queryPlan.getAcidSinks().iterator().next(); + Table table = fileSinkDesc.getTable(); + long writeId = fileSinkDesc.getTableWriteId(); + boolean isCTAS = ctx.getHookContext().getQueryPlan().getQueryProperties().isCTAS(); + Path destinationPath = pCtx.getContext().getLocation(); + + if (destinationPath != null && table != null && isCTAS && + HiveConf.getBoolVar(conf, HiveConf.ConfVars.TXN_CTAS_X_LOCK)) { + LOG.info("Performing cleanup as part of rollback: {}", table.getFullTableName().toString()); + try { + CompactionRequest rqst = new CompactionRequest(table.getDbName(), table.getTableName(), + CompactionType.MAJOR); + rqst.setRunas(TxnUtils.findUserToRunAs(destinationPath.toString(), table.getTTable(), conf)); + rqst.putToProperties(META_TABLE_LOCATION, destinationPath.toString()); + rqst.putToProperties(IF_PURGE, Boolean.toString(true)); + boolean success = Hive.get(conf).getMSC().submitForCleanup(rqst, writeId, + pCtx.getQueryState().getTxnManager().getCurrentTxnId()); + if (success) { + LOG.info("The cleanup request has been submitted"); + } else { + LOG.info("The cleanup request has not been submitted"); + } + } catch (HiveException | IOException | InterruptedException | TException e) { Review Comment: we shouldn't catch InterruptedException Issue Time Tracking ------------------- Worklog Id: (was: 797139) Time Spent: 12h 40m (was: 12.5h) > Aborted/Cancelled CTAS operations must initiate cleanup of uncommitted data > --------------------------------------------------------------------------- > > Key: HIVE-26414 > URL: https://issues.apache.org/jira/browse/HIVE-26414 > Project: Hive > Issue Type: Improvement > Reporter: Sourabh Badhya > Assignee: Sourabh Badhya > Priority: Major > Labels: pull-request-available > Time Spent: 12h 40m > Remaining Estimate: 0h > > When a CTAS query fails before creation of table and after writing the data, > the data is present in the directory and not cleaned up currently by the > cleaner or any other mechanism currently. This is because the cleaner > requires a table corresponding to what its cleaning. In order surpass such a > situation, we can directly pass the relevant information to the cleaner so > that such uncommitted data is deleted. -- This message was sent by Atlassian Jira (v8.20.10#820010)