Repository: hive Updated Branches: refs/heads/hive-14535 543e0b01a -> e3fb11671
HIVE-17479: Staging directories do not get cleaned up for update/delete queries (Jesus Camacho Rodriguez, reviewed by Ashutosh Chauhan, Eugene Koifman) Project: http://git-wip-us.apache.org/repos/asf/hive/repo Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/1706a6b5 Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/1706a6b5 Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/1706a6b5 Branch: refs/heads/hive-14535 Commit: 1706a6b5d7f8df7cb84d0f84bc82674e11fe856d Parents: 50fb6f3 Author: Jesus Camacho Rodriguez <jcama...@apache.org> Authored: Wed Sep 13 11:21:43 2017 -0700 Committer: Jesus Camacho Rodriguez <jcama...@apache.org> Committed: Fri Sep 15 14:23:30 2017 -0700 ---------------------------------------------------------------------- .../java/org/apache/hadoop/hive/ql/Context.java | 18 +++++++++++++++++- .../ql/parse/UpdateDeleteSemanticAnalyzer.java | 18 ++++++++---------- 2 files changed, 25 insertions(+), 11 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/hive/blob/1706a6b5/ql/src/java/org/apache/hadoop/hive/ql/Context.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/Context.java b/ql/src/java/org/apache/hadoop/hive/ql/Context.java index 9183edf..f04aed4 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/Context.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/Context.java @@ -25,9 +25,11 @@ import java.net.URI; import java.text.SimpleDateFormat; import java.util.Date; import java.util.HashMap; +import java.util.HashSet; import java.util.List; import java.util.Map; import java.util.Random; +import java.util.Set; import java.util.concurrent.ConcurrentHashMap; import java.util.concurrent.atomic.AtomicInteger; @@ -101,6 +103,10 @@ public class Context { private TokenRewriteStream tokenRewriteStream; private final String executionId; + // Some statements, e.g., UPDATE, DELETE, or MERGE, get rewritten into different + // subqueries that create new contexts. We keep them here so we can clean them + // up when we are done. + private final Set<Context> rewrittenStatementContexts; // List of Locks for this query protected List<HiveLock> hiveLocks; @@ -254,9 +260,10 @@ public class Context { * Create a Context with a given executionId. ExecutionId, together with * user name and conf, will determine the temporary directory locations. */ - public Context(Configuration conf, String executionId) { + private Context(Configuration conf, String executionId) { this.conf = conf; this.executionId = executionId; + this.rewrittenStatementContexts = new HashSet<>(); // local & non-local tmp location is configurable. however it is the same across // all external file systems @@ -662,6 +669,11 @@ public class Context { } public void clear() throws IOException { + // First clear the other contexts created by this query + for (Context subContext : rewrittenStatementContexts) { + subContext.clear(); + } + // Then clear this context if (resDir != null) { try { FileSystem fs = resDir.getFileSystem(conf); @@ -841,6 +853,10 @@ public class Context { } } + public void addRewrittenStatementContext(Context context) { + rewrittenStatementContexts.add(context); + } + public void addCS(String path, ContentSummary cs) { pathToCS.put(path, cs); } http://git-wip-us.apache.org/repos/asf/hive/blob/1706a6b5/ql/src/java/org/apache/hadoop/hive/ql/parse/UpdateDeleteSemanticAnalyzer.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/UpdateDeleteSemanticAnalyzer.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/UpdateDeleteSemanticAnalyzer.java index 78c511b..1bca967 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/UpdateDeleteSemanticAnalyzer.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/UpdateDeleteSemanticAnalyzer.java @@ -32,18 +32,15 @@ import org.antlr.runtime.TokenRewriteStream; import org.apache.hadoop.fs.Path; import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.metastore.TableType; -import org.apache.hadoop.hive.metastore.TransactionalValidationListener; import org.apache.hadoop.hive.metastore.Warehouse; import org.apache.hadoop.hive.metastore.api.FieldSchema; import org.apache.hadoop.hive.metastore.api.MetaException; -import org.apache.hadoop.hive.metastore.api.hive_metastoreConstants; import org.apache.hadoop.hive.ql.Context; import org.apache.hadoop.hive.ql.ErrorMsg; import org.apache.hadoop.hive.ql.QueryState; import org.apache.hadoop.hive.ql.hooks.Entity; import org.apache.hadoop.hive.ql.hooks.ReadEntity; import org.apache.hadoop.hive.ql.hooks.WriteEntity; -import org.apache.hadoop.hive.ql.io.AcidUtils; import org.apache.hadoop.hive.ql.lib.Node; import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.ql.metadata.HiveUtils; @@ -284,22 +281,23 @@ public class UpdateDeleteSemanticAnalyzer extends SemanticAnalyzer { } } /** - * Parse the newly generated SQL statment to get a new AST + * Parse the newly generated SQL statement to get a new AST */ private ReparseResult parseRewrittenQuery(StringBuilder rewrittenQueryStr, String originalQuery) throws SemanticException { + // Set dynamic partitioning to nonstrict so that queries do not need any partition + // references. + // todo: this may be a perf issue as it prevents the optimizer.. or not + HiveConf.setVar(conf, HiveConf.ConfVars.DYNAMICPARTITIONINGMODE, "nonstrict"); // Parse the rewritten query string Context rewrittenCtx; try { - // Set dynamic partitioning to nonstrict so that queries do not need any partition - // references. - // todo: this may be a perf issue as it prevents the optimizer.. or not - HiveConf.setVar(conf, HiveConf.ConfVars.DYNAMICPARTITIONINGMODE, "nonstrict"); rewrittenCtx = new Context(conf); - rewrittenCtx.setExplainConfig(ctx.getExplainConfig()); - rewrittenCtx.setIsUpdateDeleteMerge(true); + ctx.addRewrittenStatementContext(rewrittenCtx); } catch (IOException e) { throw new SemanticException(ErrorMsg.UPDATEDELETE_IO_ERROR.getMsg()); } + rewrittenCtx.setExplainConfig(ctx.getExplainConfig()); + rewrittenCtx.setIsUpdateDeleteMerge(true); rewrittenCtx.setCmd(rewrittenQueryStr.toString()); ASTNode rewrittenTree;