Repository: hive Updated Branches: refs/heads/master 77d7d090b -> 419593e70
HIVE-18478: Data files deleted from temp table should not be recycled to CM path (Mahesh Kumar Behera, reviewed by Sankar Hariappan) Project: http://git-wip-us.apache.org/repos/asf/hive/repo Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/419593e7 Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/419593e7 Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/419593e7 Branch: refs/heads/master Commit: 419593e70565923eb52c704b2bb58007476aa404 Parents: 77d7d09 Author: Sankar Hariappan <sank...@apache.org> Authored: Thu Feb 1 09:16:55 2018 +0530 Committer: Sankar Hariappan <sank...@apache.org> Committed: Thu Feb 1 09:16:55 2018 +0530 ---------------------------------------------------------------------- .../hive/ql/parse/TestReplicationScenarios.java | 52 ++++++++++++++++++++ .../apache/hadoop/hive/ql/metadata/Hive.java | 16 +++--- .../ql/metadata/SessionHiveMetaStoreClient.java | 2 +- .../apache/hadoop/hive/metastore/Warehouse.java | 9 +++- .../minihms/AbstractMetaStoreService.java | 4 +- 5 files changed, 71 insertions(+), 12 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/hive/blob/419593e7/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/parse/TestReplicationScenarios.java ---------------------------------------------------------------------- diff --git a/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/parse/TestReplicationScenarios.java b/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/parse/TestReplicationScenarios.java index 0062052..39d077a 100644 --- a/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/parse/TestReplicationScenarios.java +++ b/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/parse/TestReplicationScenarios.java @@ -22,6 +22,7 @@ import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.fs.PathFilter; +import org.apache.hadoop.fs.ContentSummary; import org.apache.hadoop.hive.cli.CliSessionState; import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.metastore.HiveMetaStoreClient; @@ -3507,6 +3508,57 @@ public class TestReplicationScenarios { } } + @Test + public void testRecycleFileDropTempTable() throws IOException { + String dbName = createDB(testName.getMethodName(), driver); + + run("CREATE TABLE " + dbName + ".normal(a int)", driver); + run("INSERT INTO " + dbName + ".normal values (1)", driver); + run("DROP TABLE " + dbName + ".normal", driver); + + String cmDir = hconf.getVar(HiveConf.ConfVars.REPLCMDIR); + Path path = new Path(cmDir); + FileSystem fs = path.getFileSystem(hconf); + ContentSummary cs = fs.getContentSummary(path); + long fileCount = cs.getFileCount(); + + assertTrue(fileCount != 0); + + run("CREATE TABLE " + dbName + ".normal(a int)", driver); + run("INSERT INTO " + dbName + ".normal values (1)", driver); + + run("CREATE TEMPORARY TABLE " + dbName + ".temp(a int)", driver); + run("INSERT INTO " + dbName + ".temp values (2)", driver); + run("INSERT OVERWRITE TABLE " + dbName + ".temp select * from " + dbName + ".normal", driver); + + cs = fs.getContentSummary(path); + long fileCountAfter = cs.getFileCount(); + + assertTrue(fileCount == fileCountAfter); + + run("INSERT INTO " + dbName + ".temp values (3)", driver); + run("TRUNCATE TABLE " + dbName + ".temp", driver); + + cs = fs.getContentSummary(path); + fileCountAfter = cs.getFileCount(); + assertTrue(fileCount == fileCountAfter); + + run("INSERT INTO " + dbName + ".temp values (4)", driver); + run("ALTER TABLE " + dbName + ".temp RENAME to " + dbName + ".temp1", driver); + verifyRun("SELECT count(*) from " + dbName + ".temp1", new String[]{"1"}, driver); + + cs = fs.getContentSummary(path); + fileCountAfter = cs.getFileCount(); + assertTrue(fileCount == fileCountAfter); + + run("INSERT INTO " + dbName + ".temp1 values (5)", driver); + run("DROP TABLE " + dbName + ".temp1", driver); + + cs = fs.getContentSummary(path); + fileCountAfter = cs.getFileCount(); + assertTrue(fileCount == fileCountAfter); + } + private NotificationEvent createDummyEvent(String dbname, String tblname, long evid) { MessageFactory msgFactory = MessageFactory.getInstance(); Table t = new Table(); http://git-wip-us.apache.org/repos/asf/hive/blob/419593e7/ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java b/ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java index c8299e2..3b97dac 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java @@ -1881,7 +1881,7 @@ public class Hive { // base_x. (there is Insert Overwrite and Load Data Overwrite) boolean isAutoPurge = "true".equalsIgnoreCase(tbl.getProperty("auto.purge")); replaceFiles(tbl.getPath(), loadPath, destPath, oldPartPath, getConf(), - isSrcLocal, isAutoPurge, newFiles, filter, isMmTableWrite?true:false); + isSrcLocal, isAutoPurge, newFiles, filter, isMmTableWrite?true:false, !tbl.isTemporary()); } else { FileSystem fs = tbl.getDataLocation().getFileSystem(conf); copyFiles(conf, loadPath, destPath, fs, isSrcLocal, isAcidIUDoperation, @@ -2427,7 +2427,7 @@ private void constructOneLBLocationMap(FileStatus fSta, //todo: should probably do the same for MM IOW boolean isAutopurge = "true".equalsIgnoreCase(tbl.getProperty("auto.purge")); replaceFiles(tblPath, loadPath, destPath, tblPath, - sessionConf, isSrcLocal, isAutopurge, newFiles, filter, isMmTable?true:false); + sessionConf, isSrcLocal, isAutopurge, newFiles, filter, isMmTable?true:false, !tbl.isTemporary()); } else { try { FileSystem fs = tbl.getDataLocation().getFileSystem(sessionConf); @@ -3963,7 +3963,7 @@ private void constructOneLBLocationMap(FileStatus fSta, */ protected void replaceFiles(Path tablePath, Path srcf, Path destf, Path oldPath, HiveConf conf, boolean isSrcLocal, boolean purge, List<Path> newFiles, PathFilter deletePathFilter, - boolean isMmTableOverwrite) throws HiveException { + boolean isMmTableOverwrite, boolean isNeedRecycle) throws HiveException { try { FileSystem destFs = destf.getFileSystem(conf); @@ -3984,7 +3984,7 @@ private void constructOneLBLocationMap(FileStatus fSta, if (oldPath != null) { // Note: we assume lbLevels is 0 here. Same as old code for non-MM. // For MM tables, this can only be a LOAD command. Does LOAD even support LB? - deleteOldPathForReplace(destf, oldPath, conf, purge, deletePathFilter, isMmTableOverwrite, 0); + deleteOldPathForReplace(destf, oldPath, conf, purge, deletePathFilter, isMmTableOverwrite, 0, isNeedRecycle); } // first call FileUtils.mkdir to make sure that destf directory exists, if not, it creates @@ -4030,7 +4030,7 @@ private void constructOneLBLocationMap(FileStatus fSta, } private void deleteOldPathForReplace(Path destPath, Path oldPath, HiveConf conf, boolean purge, - PathFilter pathFilter, boolean isMmTableOverwrite, int lbLevels) throws HiveException { + PathFilter pathFilter, boolean isMmTableOverwrite, int lbLevels, boolean isNeedRecycle) throws HiveException { Utilities.FILE_OP_LOGGER.debug("Deleting old paths for replace in " + destPath + " and old path " + oldPath); boolean isOldPathUnderDestf = false; @@ -4044,7 +4044,7 @@ private void constructOneLBLocationMap(FileStatus fSta, isOldPathUnderDestf = isSubDir(oldPath, destPath, oldFs, destFs, false); if (isOldPathUnderDestf || isMmTableOverwrite) { if (lbLevels == 0 || !isMmTableOverwrite) { - cleanUpOneDirectoryForReplace(oldPath, oldFs, pathFilter, conf, purge); + cleanUpOneDirectoryForReplace(oldPath, oldFs, pathFilter, conf, purge, isNeedRecycle); } } } catch (IOException e) { @@ -4061,8 +4061,8 @@ private void constructOneLBLocationMap(FileStatus fSta, private void cleanUpOneDirectoryForReplace(Path path, FileSystem fs, - PathFilter pathFilter, HiveConf conf, boolean purge) throws IOException, HiveException { - if (conf.getBoolVar(HiveConf.ConfVars.REPLCMENABLED)) { + PathFilter pathFilter, HiveConf conf, boolean purge, boolean isNeedRecycle) throws IOException, HiveException { + if (isNeedRecycle && conf.getBoolVar(HiveConf.ConfVars.REPLCMENABLED)) { recycleDirToCmPath(path, purge); } FileStatus[] statuses = fs.listStatus(path, pathFilter); http://git-wip-us.apache.org/repos/asf/hive/blob/419593e7/ql/src/java/org/apache/hadoop/hive/ql/metadata/SessionHiveMetaStoreClient.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/metadata/SessionHiveMetaStoreClient.java b/ql/src/java/org/apache/hadoop/hive/ql/metadata/SessionHiveMetaStoreClient.java index a22d068..d79b6ed 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/metadata/SessionHiveMetaStoreClient.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/metadata/SessionHiveMetaStoreClient.java @@ -643,7 +643,7 @@ public class SessionHiveMetaStoreClient extends HiveMetaStoreClient implements I if (envContext != null){ ifPurge = Boolean.parseBoolean(envContext.getProperties().get("ifPurge")); } - getWh().deleteDir(tablePath, true, ifPurge); + getWh().deleteDir(tablePath, true, ifPurge, false); } catch (Exception err) { LOG.error("Failed to delete temp table directory: " + tablePath, err); // Forgive error http://git-wip-us.apache.org/repos/asf/hive/blob/419593e7/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/Warehouse.java ---------------------------------------------------------------------- diff --git a/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/Warehouse.java b/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/Warehouse.java index 2d52e0e..20c1060 100755 --- a/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/Warehouse.java +++ b/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/Warehouse.java @@ -228,7 +228,14 @@ public class Warehouse { } public boolean deleteDir(Path f, boolean recursive, boolean ifPurge) throws MetaException { - cm.recycle(f, RecycleType.MOVE, ifPurge); + return deleteDir(f, recursive, ifPurge, true); + } + + public boolean deleteDir(Path f, boolean recursive, boolean ifPurge, boolean needCmRecycle) throws MetaException { + // no need to create the CM recycle file for temporary tables + if (needCmRecycle) { + cm.recycle(f, RecycleType.MOVE, ifPurge); + } FileSystem fs = getFs(f); return fsHandler.deleteDir(fs, f, recursive, ifPurge, conf); } http://git-wip-us.apache.org/repos/asf/hive/blob/419593e7/standalone-metastore/src/test/java/org/apache/hadoop/hive/metastore/minihms/AbstractMetaStoreService.java ---------------------------------------------------------------------- diff --git a/standalone-metastore/src/test/java/org/apache/hadoop/hive/metastore/minihms/AbstractMetaStoreService.java b/standalone-metastore/src/test/java/org/apache/hadoop/hive/metastore/minihms/AbstractMetaStoreService.java index 1cc2843..b549c7c 100644 --- a/standalone-metastore/src/test/java/org/apache/hadoop/hive/metastore/minihms/AbstractMetaStoreService.java +++ b/standalone-metastore/src/test/java/org/apache/hadoop/hive/metastore/minihms/AbstractMetaStoreService.java @@ -148,8 +148,8 @@ public abstract class AbstractMetaStoreService { * @throws MetaException IO failure */ public void cleanWarehouseDirs() throws MetaException { - warehouse.deleteDir(getWarehouseRoot(), true, true); - warehouse.deleteDir(trashDir, true, true); + warehouse.deleteDir(getWarehouseRoot(), true, true, false); + warehouse.deleteDir(trashDir, true, true, false); } /**