Repository: hive
Updated Branches:
  refs/heads/master 5971015ee -> 32e854ef1


HIVE-17496: Bootstrap repl is not cleaning up staging dirs (Tao Li, reviewed by 
Daniel Dai)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/32e854ef
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/32e854ef
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/32e854ef

Branch: refs/heads/master
Commit: 32e854ef1c25f21d53f7932723cfc76bf75a71cd
Parents: 5971015
Author: Daniel Dai <da...@hortonworks.com>
Authored: Tue Sep 19 00:14:49 2017 -0700
Committer: Daniel Dai <da...@hortonworks.com>
Committed: Tue Sep 19 00:14:49 2017 -0700

----------------------------------------------------------------------
 .../hive/ql/parse/TestReplicationScenarios.java | 60 ++++++++++++++++++++
 .../java/org/apache/hadoop/hive/ql/Context.java |  3 +
 .../ql/exec/repl/bootstrap/ReplLoadTask.java    |  5 +-
 .../exec/repl/bootstrap/load/util/PathInfo.java |  4 ++
 4 files changed, 70 insertions(+), 2 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hive/blob/32e854ef/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/parse/TestReplicationScenarios.java
----------------------------------------------------------------------
diff --git 
a/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/parse/TestReplicationScenarios.java
 
b/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/parse/TestReplicationScenarios.java
index 4b687d6..3800e6a 100644
--- 
a/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/parse/TestReplicationScenarios.java
+++ 
b/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/parse/TestReplicationScenarios.java
@@ -18,8 +18,10 @@
 package org.apache.hadoop.hive.ql.parse;
 
 import org.apache.commons.io.FileUtils;
+import org.apache.hadoop.fs.FileStatus;
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.fs.PathFilter;
 import org.apache.hadoop.hive.cli.CliSessionState;
 import org.apache.hadoop.hive.conf.HiveConf;
 import org.apache.hadoop.hive.metastore.HiveMetaStoreClient;
@@ -3051,6 +3053,55 @@ public class TestReplicationScenarios {
     verifyIfTableNotExist(dbName + "_dupe", "acid_table_incremental", 
metaStoreClientMirror);
   }
 
+  @Test
+  public void testDeleteStagingDir() throws IOException {
+       String testName = "deleteStagingDir";
+       String dbName = createDB(testName, driver);
+       String tableName = "unptned";
+    run("CREATE TABLE " + dbName + "." + tableName + "(a string) STORED AS 
TEXTFILE", driver);
+
+    String[] unptn_data = new String[] {"one", "two"};
+    String unptn_locn = new Path(TEST_PATH , testName + 
"_unptn").toUri().getPath();
+    createTestDataFile(unptn_locn, unptn_data);
+    run("LOAD DATA LOCAL INPATH '" + unptn_locn + "' OVERWRITE INTO TABLE " + 
dbName + ".unptned", driver);
+    verifySetup("SELECT * from " + dbName + ".unptned", unptn_data, driver);
+
+    // Perform repl
+    advanceDumpDir();
+    run("REPL DUMP " + dbName, driver);
+    String replDumpLocn = getResult(0,0,driver);
+    // Reset the driver
+    driverMirror.close();
+    driverMirror.init();
+    run("REPL LOAD " + dbName + "_dupe FROM '" + replDumpLocn + "'", 
driverMirror);
+    // Calling close() explicitly to clean up the staging dirs
+    driverMirror.close();
+    // Check result
+    Path warehouse = new Path(System.getProperty("test.warehouse.dir", 
"/tmp"));
+    FileSystem fs = FileSystem.get(warehouse.toUri(), hconf);
+    try {
+      Path path = new Path(warehouse, dbName + "_dupe.db" + Path.SEPARATOR + 
tableName);
+      // First check if the table dir exists (could have been deleted for some 
reason in pre-commit tests)
+      if (!fs.exists(path))
+      {
+        return;
+      }
+      PathFilter filter = new PathFilter()
+      {
+        @Override
+        public boolean accept(Path path)
+        {
+          return path.getName().startsWith(HiveConf.getVar(hconf, 
HiveConf.ConfVars.STAGINGDIR));
+        }
+      };
+      FileStatus[] statuses = fs.listStatus(path, filter);
+      assertEquals(0, statuses.length);
+    } catch (IOException e) {
+      LOG.error("Failed to list files in: " + warehouse, e);
+      assert(false);
+    }
+  }
+
   private static String createDB(String name, Driver myDriver) {
     LOG.info("Testing " + name);
     String dbName = name + "_" + tid;
@@ -3298,6 +3349,15 @@ public class TestReplicationScenarios {
     }
   }
 
+  private void verifyIfDirNotExist(FileSystem fs, Path path, PathFilter 
filter){
+    try {
+      FileStatus[] statuses = fs.listStatus(path, filter);
+      assertEquals(0, statuses.length);
+    } catch (IOException e) {
+      assert(false);
+    }
+  }
+
   private void verifySetup(String cmd, String[] data, Driver myDriver) throws  
IOException {
     if (VERIFY_SETUP_STEPS){
       run(cmd, myDriver);

http://git-wip-us.apache.org/repos/asf/hive/blob/32e854ef/ql/src/java/org/apache/hadoop/hive/ql/Context.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/Context.java 
b/ql/src/java/org/apache/hadoop/hive/ql/Context.java
index d6046d1..ed091ae 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/Context.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/Context.java
@@ -284,6 +284,9 @@ public class Context {
     opContext = new CompilationOpContext();
   }
 
+  public Map<String, Path> getFsScratchDirs() {
+    return fsScratchDirs;
+  }
 
   public Map<LoadTableDesc, WriteEntity> getLoadTableOutputMap() {
     return loadTableOutputMap;

http://git-wip-us.apache.org/repos/asf/hive/blob/32e854ef/ql/src/java/org/apache/hadoop/hive/ql/exec/repl/bootstrap/ReplLoadTask.java
----------------------------------------------------------------------
diff --git 
a/ql/src/java/org/apache/hadoop/hive/ql/exec/repl/bootstrap/ReplLoadTask.java 
b/ql/src/java/org/apache/hadoop/hive/ql/exec/repl/bootstrap/ReplLoadTask.java
index 706d0b6..ca2e992 100644
--- 
a/ql/src/java/org/apache/hadoop/hive/ql/exec/repl/bootstrap/ReplLoadTask.java
+++ 
b/ql/src/java/org/apache/hadoop/hive/ql/exec/repl/bootstrap/ReplLoadTask.java
@@ -19,7 +19,6 @@ package org.apache.hadoop.hive.ql.exec.repl.bootstrap;
 
 import org.apache.hadoop.hive.conf.HiveConf;
 import org.apache.hadoop.hive.metastore.api.Database;
-import org.apache.hadoop.hive.metastore.TableType;
 import org.apache.hadoop.hive.ql.DriverContext;
 import org.apache.hadoop.hive.ql.exec.Task;
 import org.apache.hadoop.hive.ql.exec.TaskFactory;
@@ -42,7 +41,6 @@ import 
org.apache.hadoop.hive.ql.exec.repl.bootstrap.load.table.TableContext;
 import org.apache.hadoop.hive.ql.exec.repl.bootstrap.load.util.Context;
 import org.apache.hadoop.hive.ql.parse.SemanticException;
 import org.apache.hadoop.hive.ql.parse.repl.ReplLogger;
-import org.apache.hadoop.hive.ql.plan.ImportTableDesc;
 import org.apache.hadoop.hive.ql.plan.api.StageType;
 
 import java.io.Serializable;
@@ -209,6 +207,9 @@ public class ReplLoadTask extends Task<ReplLoadWork> 
implements Serializable {
       }
       this.childTasks = scope.rootTasks;
       LOG.info("Root Tasks / Total Tasks : {} / {} ", childTasks.size(), 
loadTaskTracker.numberOfTasks());
+
+      // Populate the driver context with the scratch dir info from the repl 
context, so that the temp dirs will be cleaned up later
+      
driverContext.getCtx().getFsScratchDirs().putAll(context.pathInfo.getFsScratchDirs());
     } catch (Exception e) {
       LOG.error("failed replication", e);
       setException(e);

http://git-wip-us.apache.org/repos/asf/hive/blob/32e854ef/ql/src/java/org/apache/hadoop/hive/ql/exec/repl/bootstrap/load/util/PathInfo.java
----------------------------------------------------------------------
diff --git 
a/ql/src/java/org/apache/hadoop/hive/ql/exec/repl/bootstrap/load/util/PathInfo.java
 
b/ql/src/java/org/apache/hadoop/hive/ql/exec/repl/bootstrap/load/util/PathInfo.java
index f9f3750..f42f632 100644
--- 
a/ql/src/java/org/apache/hadoop/hive/ql/exec/repl/bootstrap/load/util/PathInfo.java
+++ 
b/ql/src/java/org/apache/hadoop/hive/ql/exec/repl/bootstrap/load/util/PathInfo.java
@@ -44,6 +44,10 @@ public class PathInfo {
     stagingDir = HiveConf.getVar(hiveConf, HiveConf.ConfVars.STAGINGDIR);
   }
 
+  public Map<String, Path> getFsScratchDirs() {
+    return fsScratchDirs;
+  }
+
   Path computeStagingDir(Path inputPath) {
     final URI inputPathUri = inputPath.toUri();
     final String inputPathName = inputPathUri.getPath();

Reply via email to