Repository: hive Updated Branches: refs/heads/master 9a1498439 -> d45d462f5
HIVE-17196: CM: ReplCopyTask should retain the original file names even if copied from CM path (Daniel Dai, reviewed by Thejas Nair) Project: http://git-wip-us.apache.org/repos/asf/hive/repo Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/d45d462f Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/d45d462f Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/d45d462f Branch: refs/heads/master Commit: d45d462f5447a55b1f3cbec1b9ba71ec47781bb1 Parents: 9a14984 Author: Daniel Dai <da...@hortonworks.com> Authored: Tue Sep 19 13:53:37 2017 -0700 Committer: Daniel Dai <da...@hortonworks.com> Committed: Tue Sep 19 13:53:37 2017 -0700 ---------------------------------------------------------------------- .../hive/metastore/TestReplChangeManager.java | 44 ++++++++++---------- .../hive/ql/parse/TestReplicationScenarios.java | 25 +++++++++++ .../hive/metastore/ReplChangeManager.java | 11 ++--- 3 files changed, 53 insertions(+), 27 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/hive/blob/d45d462f/itests/hive-unit/src/test/java/org/apache/hadoop/hive/metastore/TestReplChangeManager.java ---------------------------------------------------------------------- diff --git a/itests/hive-unit/src/test/java/org/apache/hadoop/hive/metastore/TestReplChangeManager.java b/itests/hive-unit/src/test/java/org/apache/hadoop/hive/metastore/TestReplChangeManager.java index d9e206c..6ade76d 100644 --- a/itests/hive-unit/src/test/java/org/apache/hadoop/hive/metastore/TestReplChangeManager.java +++ b/itests/hive-unit/src/test/java/org/apache/hadoop/hive/metastore/TestReplChangeManager.java @@ -175,19 +175,19 @@ public class TestReplChangeManager { // verify cm.recycle(db, table, part) api moves file to cmroot dir int ret = cm.recycle(part1Path, RecycleType.MOVE, false); Assert.assertEquals(ret, 1); - Path cmPart1Path = ReplChangeManager.getCMPath(hiveConf, path1Chksum); + Path cmPart1Path = ReplChangeManager.getCMPath(hiveConf, part1Path.getName(), path1Chksum); assertTrue(cmPart1Path.getFileSystem(hiveConf).exists(cmPart1Path)); // Verify dropPartition recycle part files client.dropPartition(dbName, tblName, Arrays.asList("20160102")); assertFalse(part2Path.getFileSystem(hiveConf).exists(part2Path)); - Path cmPart2Path = ReplChangeManager.getCMPath(hiveConf, path2Chksum); + Path cmPart2Path = ReplChangeManager.getCMPath(hiveConf, part2Path.getName(), path2Chksum); assertTrue(cmPart2Path.getFileSystem(hiveConf).exists(cmPart2Path)); // Verify dropTable recycle partition files client.dropTable(dbName, tblName); assertFalse(part3Path.getFileSystem(hiveConf).exists(part3Path)); - Path cmPart3Path = ReplChangeManager.getCMPath(hiveConf, path3Chksum); + Path cmPart3Path = ReplChangeManager.getCMPath(hiveConf, part3Path.getName(), path3Chksum); assertTrue(cmPart3Path.getFileSystem(hiveConf).exists(cmPart3Path)); client.dropDatabase(dbName, true, true); @@ -246,17 +246,17 @@ public class TestReplChangeManager { cm.recycle(filePath1, RecycleType.MOVE, false); assertFalse(filePath1.getFileSystem(hiveConf).exists(filePath1)); - Path cmPath1 = ReplChangeManager.getCMPath(hiveConf, fileChksum1); + Path cmPath1 = ReplChangeManager.getCMPath(hiveConf, filePath1.getName(), fileChksum1); assertTrue(cmPath1.getFileSystem(hiveConf).exists(cmPath1)); // Verify dropTable recycle table files client.dropTable(dbName, tblName); - Path cmPath2 = ReplChangeManager.getCMPath(hiveConf, fileChksum2); + Path cmPath2 = ReplChangeManager.getCMPath(hiveConf, filePath2.getName(), fileChksum2); assertFalse(filePath2.getFileSystem(hiveConf).exists(filePath2)); assertTrue(cmPath2.getFileSystem(hiveConf).exists(cmPath2)); - Path cmPath3 = ReplChangeManager.getCMPath(hiveConf, fileChksum3); + Path cmPath3 = ReplChangeManager.getCMPath(hiveConf, filePath3.getName(), fileChksum3); assertFalse(filePath3.getFileSystem(hiveConf).exists(filePath3)); assertTrue(cmPath3.getFileSystem(hiveConf).exists(cmPath3)); @@ -298,17 +298,17 @@ public class TestReplChangeManager { ReplChangeManager.getInstance(hiveConf).recycle(dirTbl2, RecycleType.MOVE, false); ReplChangeManager.getInstance(hiveConf).recycle(dirTbl3, RecycleType.MOVE, true); - assertTrue(fs.exists(ReplChangeManager.getCMPath(hiveConf, fileChksum11))); - assertTrue(fs.exists(ReplChangeManager.getCMPath(hiveConf, fileChksum12))); - assertTrue(fs.exists(ReplChangeManager.getCMPath(hiveConf, fileChksum21))); - assertTrue(fs.exists(ReplChangeManager.getCMPath(hiveConf, fileChksum22))); - assertTrue(fs.exists(ReplChangeManager.getCMPath(hiveConf, fileChksum31))); - assertTrue(fs.exists(ReplChangeManager.getCMPath(hiveConf, fileChksum32))); + assertTrue(fs.exists(ReplChangeManager.getCMPath(hiveConf, part11.getName(), fileChksum11))); + assertTrue(fs.exists(ReplChangeManager.getCMPath(hiveConf, part12.getName(), fileChksum12))); + assertTrue(fs.exists(ReplChangeManager.getCMPath(hiveConf, part21.getName(), fileChksum21))); + assertTrue(fs.exists(ReplChangeManager.getCMPath(hiveConf, part22.getName(), fileChksum22))); + assertTrue(fs.exists(ReplChangeManager.getCMPath(hiveConf, part31.getName(), fileChksum31))); + assertTrue(fs.exists(ReplChangeManager.getCMPath(hiveConf, part32.getName(), fileChksum32))); - fs.setTimes(ReplChangeManager.getCMPath(hiveConf, fileChksum11), now - 86400*1000*2, now - 86400*1000*2); - fs.setTimes(ReplChangeManager.getCMPath(hiveConf, fileChksum21), now - 86400*1000*2, now - 86400*1000*2); - fs.setTimes(ReplChangeManager.getCMPath(hiveConf, fileChksum31), now - 86400*1000*2, now - 86400*1000*2); - fs.setTimes(ReplChangeManager.getCMPath(hiveConf, fileChksum32), now - 86400*1000*2, now - 86400*1000*2); + fs.setTimes(ReplChangeManager.getCMPath(hiveConf, part11.getName(), fileChksum11), now - 86400*1000*2, now - 86400*1000*2); + fs.setTimes(ReplChangeManager.getCMPath(hiveConf, part21.getName(), fileChksum21), now - 86400*1000*2, now - 86400*1000*2); + fs.setTimes(ReplChangeManager.getCMPath(hiveConf, part31.getName(), fileChksum31), now - 86400*1000*2, now - 86400*1000*2); + fs.setTimes(ReplChangeManager.getCMPath(hiveConf, part32.getName(), fileChksum32), now - 86400*1000*2, now - 86400*1000*2); ReplChangeManager.scheduleCMClearer(hiveConf); @@ -321,12 +321,12 @@ public class TestReplChangeManager { if (end - start > 5000) { Assert.fail("timeout, cmroot has not been cleared"); } - if (!fs.exists(ReplChangeManager.getCMPath(hiveConf, fileChksum11)) && - fs.exists(ReplChangeManager.getCMPath(hiveConf, fileChksum12)) && - !fs.exists(ReplChangeManager.getCMPath(hiveConf, fileChksum21)) && - fs.exists(ReplChangeManager.getCMPath(hiveConf, fileChksum22)) && - !fs.exists(ReplChangeManager.getCMPath(hiveConf, fileChksum31)) && - !fs.exists(ReplChangeManager.getCMPath(hiveConf, fileChksum31))) { + if (!fs.exists(ReplChangeManager.getCMPath(hiveConf, part11.getName(), fileChksum11)) && + fs.exists(ReplChangeManager.getCMPath(hiveConf, part12.getName(), fileChksum12)) && + !fs.exists(ReplChangeManager.getCMPath(hiveConf, part21.getName(), fileChksum21)) && + fs.exists(ReplChangeManager.getCMPath(hiveConf, part22.getName(), fileChksum22)) && + !fs.exists(ReplChangeManager.getCMPath(hiveConf, part31.getName(), fileChksum31)) && + !fs.exists(ReplChangeManager.getCMPath(hiveConf, part32.getName(), fileChksum32))) { cleared = true; } } while (!cleared); http://git-wip-us.apache.org/repos/asf/hive/blob/d45d462f/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/parse/TestReplicationScenarios.java ---------------------------------------------------------------------- diff --git a/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/parse/TestReplicationScenarios.java b/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/parse/TestReplicationScenarios.java index 3800e6a..2e880c7 100644 --- a/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/parse/TestReplicationScenarios.java +++ b/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/parse/TestReplicationScenarios.java @@ -3102,6 +3102,31 @@ public class TestReplicationScenarios { } } + @Test + public void testCMConflict() throws IOException { + String testName = "cmConflict"; + String dbName = createDB(testName, driver); + + // Create table and insert two file of the same content + run("CREATE TABLE " + dbName + ".unptned(a string) STORED AS TEXTFILE", driver); + run("INSERT INTO TABLE " + dbName + ".unptned values('ten')", driver); + run("INSERT INTO TABLE " + dbName + ".unptned values('ten')", driver); + + // Bootstrap test + advanceDumpDir(); + run("REPL DUMP " + dbName, driver); + String replDumpLocn = getResult(0, 0,driver); + String replDumpId = getResult(0, 1, true, driver); + + // Drop two files so they are moved to CM + run("TRUNCATE TABLE " + dbName + ".unptned", driver); + + LOG.info("Bootstrap-Dump: Dumped to {} with id {}", replDumpLocn, replDumpId); + run("REPL LOAD " + dbName + "_dupe FROM '" + replDumpLocn + "'", driverMirror); + + verifyRun("SELECT count(*) from " + dbName + "_dupe.unptned", new String[]{"2"}, driverMirror); + } + private static String createDB(String name, Driver myDriver) { LOG.info("Testing " + name); String dbName = name + "_" + tid; http://git-wip-us.apache.org/repos/asf/hive/blob/d45d462f/metastore/src/java/org/apache/hadoop/hive/metastore/ReplChangeManager.java ---------------------------------------------------------------------- diff --git a/metastore/src/java/org/apache/hadoop/hive/metastore/ReplChangeManager.java b/metastore/src/java/org/apache/hadoop/hive/metastore/ReplChangeManager.java index 88d6a7a..6cb5fa8 100644 --- a/metastore/src/java/org/apache/hadoop/hive/metastore/ReplChangeManager.java +++ b/metastore/src/java/org/apache/hadoop/hive/metastore/ReplChangeManager.java @@ -132,7 +132,7 @@ public class ReplChangeManager { } } else { String fileCheckSum = checksumFor(path, fs); - Path cmPath = getCMPath(hiveConf, fileCheckSum); + Path cmPath = getCMPath(hiveConf, path.getName(), fileCheckSum); // set timestamp before moving to cmroot, so we can // avoid race condition CM remove the file before setting @@ -237,11 +237,12 @@ public class ReplChangeManager { * to a deterministic location of cmroot. So user can retrieve the file back * with the original location plus checksum. * @param conf + * @param name original filename * @param checkSum checksum of the file, can be retrieved by {@link #checksumFor(Path, FileSystem)} * @return Path */ - static Path getCMPath(Configuration conf, String checkSum) throws IOException, MetaException { - String newFileName = checkSum; + static Path getCMPath(Configuration conf, String name, String checkSum) throws IOException, MetaException { + String newFileName = name + "_" + checkSum; int maxLength = conf.getInt(DFSConfigKeys.DFS_NAMENODE_MAX_COMPONENT_LENGTH_KEY, DFSConfigKeys.DFS_NAMENODE_MAX_COMPONENT_LENGTH_DEFAULT); @@ -269,14 +270,14 @@ public class ReplChangeManager { } if (!srcFs.exists(src)) { - return srcFs.getFileStatus(getCMPath(hiveConf, checksumString)); + return srcFs.getFileStatus(getCMPath(hiveConf, src.getName(), checksumString)); } String currentChecksumString = checksumFor(src, srcFs); if (currentChecksumString == null || checksumString.equals(currentChecksumString)) { return srcFs.getFileStatus(src); } else { - return srcFs.getFileStatus(getCMPath(hiveConf, checksumString)); + return srcFs.getFileStatus(getCMPath(hiveConf, src.getName(), checksumString)); } } catch (IOException e) { throw new MetaException(StringUtils.stringifyException(e));