HIVE-17196: CM: ReplCopyTask should retain the original file names even if 
copied from CM path (Daniel Dai, reviewed by Thejas Nair)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/d45d462f
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/d45d462f
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/d45d462f

Branch: refs/heads/hive-14535
Commit: d45d462f5447a55b1f3cbec1b9ba71ec47781bb1
Parents: 9a14984
Author: Daniel Dai <da...@hortonworks.com>
Authored: Tue Sep 19 13:53:37 2017 -0700
Committer: Daniel Dai <da...@hortonworks.com>
Committed: Tue Sep 19 13:53:37 2017 -0700

----------------------------------------------------------------------
 .../hive/metastore/TestReplChangeManager.java   | 44 ++++++++++----------
 .../hive/ql/parse/TestReplicationScenarios.java | 25 +++++++++++
 .../hive/metastore/ReplChangeManager.java       | 11 ++---
 3 files changed, 53 insertions(+), 27 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hive/blob/d45d462f/itests/hive-unit/src/test/java/org/apache/hadoop/hive/metastore/TestReplChangeManager.java
----------------------------------------------------------------------
diff --git 
a/itests/hive-unit/src/test/java/org/apache/hadoop/hive/metastore/TestReplChangeManager.java
 
b/itests/hive-unit/src/test/java/org/apache/hadoop/hive/metastore/TestReplChangeManager.java
index d9e206c..6ade76d 100644
--- 
a/itests/hive-unit/src/test/java/org/apache/hadoop/hive/metastore/TestReplChangeManager.java
+++ 
b/itests/hive-unit/src/test/java/org/apache/hadoop/hive/metastore/TestReplChangeManager.java
@@ -175,19 +175,19 @@ public class TestReplChangeManager {
     // verify cm.recycle(db, table, part) api moves file to cmroot dir
     int ret = cm.recycle(part1Path, RecycleType.MOVE, false);
     Assert.assertEquals(ret, 1);
-    Path cmPart1Path = ReplChangeManager.getCMPath(hiveConf, path1Chksum);
+    Path cmPart1Path = ReplChangeManager.getCMPath(hiveConf, 
part1Path.getName(), path1Chksum);
     assertTrue(cmPart1Path.getFileSystem(hiveConf).exists(cmPart1Path));
 
     // Verify dropPartition recycle part files
     client.dropPartition(dbName, tblName, Arrays.asList("20160102"));
     assertFalse(part2Path.getFileSystem(hiveConf).exists(part2Path));
-    Path cmPart2Path = ReplChangeManager.getCMPath(hiveConf, path2Chksum);
+    Path cmPart2Path = ReplChangeManager.getCMPath(hiveConf, 
part2Path.getName(), path2Chksum);
     assertTrue(cmPart2Path.getFileSystem(hiveConf).exists(cmPart2Path));
 
     // Verify dropTable recycle partition files
     client.dropTable(dbName, tblName);
     assertFalse(part3Path.getFileSystem(hiveConf).exists(part3Path));
-    Path cmPart3Path = ReplChangeManager.getCMPath(hiveConf, path3Chksum);
+    Path cmPart3Path = ReplChangeManager.getCMPath(hiveConf, 
part3Path.getName(), path3Chksum);
     assertTrue(cmPart3Path.getFileSystem(hiveConf).exists(cmPart3Path));
 
     client.dropDatabase(dbName, true, true);
@@ -246,17 +246,17 @@ public class TestReplChangeManager {
     cm.recycle(filePath1, RecycleType.MOVE, false);
     assertFalse(filePath1.getFileSystem(hiveConf).exists(filePath1));
 
-    Path cmPath1 = ReplChangeManager.getCMPath(hiveConf, fileChksum1);
+    Path cmPath1 = ReplChangeManager.getCMPath(hiveConf, filePath1.getName(), 
fileChksum1);
     assertTrue(cmPath1.getFileSystem(hiveConf).exists(cmPath1));
 
     // Verify dropTable recycle table files
     client.dropTable(dbName, tblName);
 
-    Path cmPath2 = ReplChangeManager.getCMPath(hiveConf, fileChksum2);
+    Path cmPath2 = ReplChangeManager.getCMPath(hiveConf, filePath2.getName(), 
fileChksum2);
     assertFalse(filePath2.getFileSystem(hiveConf).exists(filePath2));
     assertTrue(cmPath2.getFileSystem(hiveConf).exists(cmPath2));
 
-    Path cmPath3 = ReplChangeManager.getCMPath(hiveConf, fileChksum3);
+    Path cmPath3 = ReplChangeManager.getCMPath(hiveConf, filePath3.getName(), 
fileChksum3);
     assertFalse(filePath3.getFileSystem(hiveConf).exists(filePath3));
     assertTrue(cmPath3.getFileSystem(hiveConf).exists(cmPath3));
 
@@ -298,17 +298,17 @@ public class TestReplChangeManager {
     ReplChangeManager.getInstance(hiveConf).recycle(dirTbl2, RecycleType.MOVE, 
false);
     ReplChangeManager.getInstance(hiveConf).recycle(dirTbl3, RecycleType.MOVE, 
true);
 
-    assertTrue(fs.exists(ReplChangeManager.getCMPath(hiveConf, fileChksum11)));
-    assertTrue(fs.exists(ReplChangeManager.getCMPath(hiveConf, fileChksum12)));
-    assertTrue(fs.exists(ReplChangeManager.getCMPath(hiveConf, fileChksum21)));
-    assertTrue(fs.exists(ReplChangeManager.getCMPath(hiveConf, fileChksum22)));
-    assertTrue(fs.exists(ReplChangeManager.getCMPath(hiveConf, fileChksum31)));
-    assertTrue(fs.exists(ReplChangeManager.getCMPath(hiveConf, fileChksum32)));
+    assertTrue(fs.exists(ReplChangeManager.getCMPath(hiveConf, 
part11.getName(), fileChksum11)));
+    assertTrue(fs.exists(ReplChangeManager.getCMPath(hiveConf, 
part12.getName(), fileChksum12)));
+    assertTrue(fs.exists(ReplChangeManager.getCMPath(hiveConf, 
part21.getName(), fileChksum21)));
+    assertTrue(fs.exists(ReplChangeManager.getCMPath(hiveConf, 
part22.getName(), fileChksum22)));
+    assertTrue(fs.exists(ReplChangeManager.getCMPath(hiveConf, 
part31.getName(), fileChksum31)));
+    assertTrue(fs.exists(ReplChangeManager.getCMPath(hiveConf, 
part32.getName(), fileChksum32)));
 
-    fs.setTimes(ReplChangeManager.getCMPath(hiveConf, fileChksum11), now - 
86400*1000*2, now - 86400*1000*2);
-    fs.setTimes(ReplChangeManager.getCMPath(hiveConf, fileChksum21), now - 
86400*1000*2, now - 86400*1000*2);
-    fs.setTimes(ReplChangeManager.getCMPath(hiveConf, fileChksum31), now - 
86400*1000*2, now - 86400*1000*2);
-    fs.setTimes(ReplChangeManager.getCMPath(hiveConf, fileChksum32), now - 
86400*1000*2, now - 86400*1000*2);
+    fs.setTimes(ReplChangeManager.getCMPath(hiveConf, part11.getName(), 
fileChksum11), now - 86400*1000*2, now - 86400*1000*2);
+    fs.setTimes(ReplChangeManager.getCMPath(hiveConf, part21.getName(), 
fileChksum21), now - 86400*1000*2, now - 86400*1000*2);
+    fs.setTimes(ReplChangeManager.getCMPath(hiveConf, part31.getName(), 
fileChksum31), now - 86400*1000*2, now - 86400*1000*2);
+    fs.setTimes(ReplChangeManager.getCMPath(hiveConf, part32.getName(), 
fileChksum32), now - 86400*1000*2, now - 86400*1000*2);
 
     ReplChangeManager.scheduleCMClearer(hiveConf);
 
@@ -321,12 +321,12 @@ public class TestReplChangeManager {
       if (end - start > 5000) {
         Assert.fail("timeout, cmroot has not been cleared");
       }
-      if (!fs.exists(ReplChangeManager.getCMPath(hiveConf, fileChksum11)) &&
-          fs.exists(ReplChangeManager.getCMPath(hiveConf, fileChksum12)) &&
-          !fs.exists(ReplChangeManager.getCMPath(hiveConf, fileChksum21)) &&
-          fs.exists(ReplChangeManager.getCMPath(hiveConf, fileChksum22)) &&
-          !fs.exists(ReplChangeManager.getCMPath(hiveConf, fileChksum31)) &&
-          !fs.exists(ReplChangeManager.getCMPath(hiveConf, fileChksum31))) {
+      if (!fs.exists(ReplChangeManager.getCMPath(hiveConf, part11.getName(), 
fileChksum11)) &&
+          fs.exists(ReplChangeManager.getCMPath(hiveConf, part12.getName(), 
fileChksum12)) &&
+          !fs.exists(ReplChangeManager.getCMPath(hiveConf, part21.getName(), 
fileChksum21)) &&
+          fs.exists(ReplChangeManager.getCMPath(hiveConf, part22.getName(), 
fileChksum22)) &&
+          !fs.exists(ReplChangeManager.getCMPath(hiveConf, part31.getName(), 
fileChksum31)) &&
+          !fs.exists(ReplChangeManager.getCMPath(hiveConf, part32.getName(), 
fileChksum32))) {
         cleared = true;
       }
     } while (!cleared);

http://git-wip-us.apache.org/repos/asf/hive/blob/d45d462f/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/parse/TestReplicationScenarios.java
----------------------------------------------------------------------
diff --git 
a/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/parse/TestReplicationScenarios.java
 
b/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/parse/TestReplicationScenarios.java
index 3800e6a..2e880c7 100644
--- 
a/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/parse/TestReplicationScenarios.java
+++ 
b/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/parse/TestReplicationScenarios.java
@@ -3102,6 +3102,31 @@ public class TestReplicationScenarios {
     }
   }
 
+  @Test
+  public void testCMConflict() throws IOException {
+    String testName = "cmConflict";
+    String dbName = createDB(testName, driver);
+
+    // Create table and insert two file of the same content
+    run("CREATE TABLE " + dbName + ".unptned(a string) STORED AS TEXTFILE", 
driver);
+    run("INSERT INTO TABLE " + dbName + ".unptned values('ten')", driver);
+    run("INSERT INTO TABLE " + dbName + ".unptned values('ten')", driver);
+
+    // Bootstrap test
+    advanceDumpDir();
+    run("REPL DUMP " + dbName, driver);
+    String replDumpLocn = getResult(0, 0,driver);
+    String replDumpId = getResult(0, 1, true, driver);
+
+    // Drop two files so they are moved to CM
+    run("TRUNCATE TABLE " + dbName + ".unptned", driver);
+
+    LOG.info("Bootstrap-Dump: Dumped to {} with id {}", replDumpLocn, 
replDumpId);
+    run("REPL LOAD " + dbName + "_dupe FROM '" + replDumpLocn + "'", 
driverMirror);
+
+    verifyRun("SELECT count(*) from " + dbName + "_dupe.unptned", new 
String[]{"2"}, driverMirror);
+  }
+
   private static String createDB(String name, Driver myDriver) {
     LOG.info("Testing " + name);
     String dbName = name + "_" + tid;

http://git-wip-us.apache.org/repos/asf/hive/blob/d45d462f/metastore/src/java/org/apache/hadoop/hive/metastore/ReplChangeManager.java
----------------------------------------------------------------------
diff --git 
a/metastore/src/java/org/apache/hadoop/hive/metastore/ReplChangeManager.java 
b/metastore/src/java/org/apache/hadoop/hive/metastore/ReplChangeManager.java
index 88d6a7a..6cb5fa8 100644
--- a/metastore/src/java/org/apache/hadoop/hive/metastore/ReplChangeManager.java
+++ b/metastore/src/java/org/apache/hadoop/hive/metastore/ReplChangeManager.java
@@ -132,7 +132,7 @@ public class ReplChangeManager {
         }
       } else {
         String fileCheckSum = checksumFor(path, fs);
-        Path cmPath = getCMPath(hiveConf, fileCheckSum);
+        Path cmPath = getCMPath(hiveConf, path.getName(), fileCheckSum);
 
         // set timestamp before moving to cmroot, so we can
         // avoid race condition CM remove the file before setting
@@ -237,11 +237,12 @@ public class ReplChangeManager {
    *   to a deterministic location of cmroot. So user can retrieve the file 
back
    *   with the original location plus checksum.
    * @param conf
+   * @param name original filename
    * @param checkSum checksum of the file, can be retrieved by {@link 
#checksumFor(Path, FileSystem)}
    * @return Path
    */
-  static Path getCMPath(Configuration conf, String checkSum) throws 
IOException, MetaException {
-    String newFileName = checkSum;
+  static Path getCMPath(Configuration conf, String name, String checkSum) 
throws IOException, MetaException {
+    String newFileName = name + "_" + checkSum;
     int maxLength = 
conf.getInt(DFSConfigKeys.DFS_NAMENODE_MAX_COMPONENT_LENGTH_KEY,
         DFSConfigKeys.DFS_NAMENODE_MAX_COMPONENT_LENGTH_DEFAULT);
 
@@ -269,14 +270,14 @@ public class ReplChangeManager {
       }
 
       if (!srcFs.exists(src)) {
-        return srcFs.getFileStatus(getCMPath(hiveConf, checksumString));
+        return srcFs.getFileStatus(getCMPath(hiveConf, src.getName(), 
checksumString));
       }
 
       String currentChecksumString = checksumFor(src, srcFs);
       if (currentChecksumString == null || 
checksumString.equals(currentChecksumString)) {
         return srcFs.getFileStatus(src);
       } else {
-        return srcFs.getFileStatus(getCMPath(hiveConf, checksumString));
+        return srcFs.getFileStatus(getCMPath(hiveConf, src.getName(), 
checksumString));
       }
     } catch (IOException e) {
       throw new MetaException(StringUtils.stringifyException(e));

Reply via email to