Repository: hive
Updated Branches:
  refs/heads/master 91d25b48a -> 9945b5d5d


HIVE-16347 : HiveMetastoreChecker should skip listing partitions which are not 
valid when hive.msck.path.validation is set to skip or ignore (Vihang 
Karajgaonkar via Ashutosh Chauhan)

Signed-off-by: Ashutosh Chauhan <hashut...@apache.org>


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/9945b5d5
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/9945b5d5
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/9945b5d5

Branch: refs/heads/master
Commit: 9945b5d5d284677a2bcf042f236b43c2647c50e4
Parents: 91d25b4
Author: Vihang Karajgaonkar <vih...@cloudera.com>
Authored: Mon Apr 3 14:36:46 2017 -0700
Committer: Ashutosh Chauhan <hashut...@apache.org>
Committed: Mon Apr 3 14:36:46 2017 -0700

----------------------------------------------------------------------
 .../hive/ql/metadata/HiveMetaStoreChecker.java  |  5 +-
 .../ql/metadata/TestHiveMetaStoreChecker.java   | 52 ++++++++++++++++++++
 2 files changed, 55 insertions(+), 2 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hive/blob/9945b5d5/ql/src/java/org/apache/hadoop/hive/ql/metadata/HiveMetaStoreChecker.java
----------------------------------------------------------------------
diff --git 
a/ql/src/java/org/apache/hadoop/hive/ql/metadata/HiveMetaStoreChecker.java 
b/ql/src/java/org/apache/hadoop/hive/ql/metadata/HiveMetaStoreChecker.java
index da24c70..4add836 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/metadata/HiveMetaStoreChecker.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/metadata/HiveMetaStoreChecker.java
@@ -498,9 +498,10 @@ public class HiveMetaStoreChecker {
             } else if 
(!parts[0].equalsIgnoreCase(partColNames.get(currentDepth))) {
               logOrThrowExceptionWithMsg(
                   "Unexpected partition key " + parts[0] + " found at " + 
nextPath);
+            } else {
+              // add sub-directory to the work queue if maxDepth is not yet 
reached
+              pendingPaths.add(new PathDepthInfo(nextPath, currentDepth + 1));
             }
-            // add sub-directory to the work queue if maxDepth is not yet 
reached
-            pendingPaths.add(new PathDepthInfo(nextPath, currentDepth + 1));
           }
         }
         if (currentDepth == partColNames.size()) {

http://git-wip-us.apache.org/repos/asf/hive/blob/9945b5d5/ql/src/test/org/apache/hadoop/hive/ql/metadata/TestHiveMetaStoreChecker.java
----------------------------------------------------------------------
diff --git 
a/ql/src/test/org/apache/hadoop/hive/ql/metadata/TestHiveMetaStoreChecker.java 
b/ql/src/test/org/apache/hadoop/hive/ql/metadata/TestHiveMetaStoreChecker.java
index d7fbbce..90e6781 100644
--- 
a/ql/src/test/org/apache/hadoop/hive/ql/metadata/TestHiveMetaStoreChecker.java
+++ 
b/ql/src/test/org/apache/hadoop/hive/ql/metadata/TestHiveMetaStoreChecker.java
@@ -235,6 +235,34 @@ public class TestHiveMetaStoreChecker {
     checker.checkMetastore(dbName, tableName, null, new CheckResult());
   }
 
+  /*
+   * skip mode should not throw exception when a invalid partition directory
+   * is found. It should just ignore it
+   */
+  @Test
+  public void testSkipInvalidPartitionKeyName()
+      throws HiveException, AlreadyExistsException, IOException {
+    hive.getConf().set(HiveConf.ConfVars.HIVE_MSCK_PATH_VALIDATION.varname, 
"skip");
+    checker = new HiveMetaStoreChecker(hive);
+    Table table = createTestTable();
+    List<Partition> partitions = hive.getPartitions(table);
+    assertEquals(2, partitions.size());
+    // add a fake partition dir on fs
+    fs = partitions.get(0).getDataLocation().getFileSystem(hive.getConf());
+    Path fakePart =
+        new Path(table.getDataLocation().toString(), 
"fakedate=2009-01-01/fakecity=sanjose");
+    fs.mkdirs(fakePart);
+    fs.deleteOnExit(fakePart);
+    createPartitionsDirectoriesOnFS(table, 2);
+    CheckResult result = new CheckResult();
+    checker.checkMetastore(dbName, tableName, null, result);
+    assertEquals(Collections.<String> emptySet(), result.getTablesNotInMs());
+    assertEquals(Collections.<String> emptySet(), result.getTablesNotOnFs());
+    assertEquals(Collections.<String> emptySet(), 
result.getPartitionsNotOnFs());
+    // only 2 valid partitions should be added
+    assertEquals(2, result.getPartitionsNotInMs().size());
+  }
+
   private Table createTestTable() throws AlreadyExistsException, HiveException 
{
     Database db = new Database();
     db.setName(dbName);
@@ -487,6 +515,30 @@ public class TestHiveMetaStoreChecker {
     CheckResult result = new CheckResult();
     checker.checkMetastore(dbName, tableName, null, result);
   }
+
+  /*
+   * In skip mode msck should ignore invalid partitions instead of
+   * throwing exception
+   */
+  @Test
+  public void testSkipInvalidOrderForPartitionKeysOnFS()
+      throws AlreadyExistsException, HiveException, IOException {
+    hive.getConf().set(HiveConf.ConfVars.HIVE_MSCK_PATH_VALIDATION.varname, 
"skip");
+    checker = new HiveMetaStoreChecker(hive);
+    Table testTable = createPartitionedTestTable(dbName, tableName, 2, 0);
+    // add 10 partitions on the filesystem
+    createInvalidPartitionDirsOnFS(testTable, 2);
+    // add 10 partitions on the filesystem
+    createPartitionsDirectoriesOnFS(testTable, 2);
+    CheckResult result = new CheckResult();
+    checker.checkMetastore(dbName, tableName, null, result);
+    assertEquals(Collections.<String> emptySet(), result.getTablesNotInMs());
+    assertEquals(Collections.<String> emptySet(), result.getTablesNotOnFs());
+    assertEquals(Collections.<String> emptySet(), 
result.getPartitionsNotOnFs());
+    // only 2 valid partitions should be added
+    assertEquals(2, result.getPartitionsNotInMs().size());
+  }
+
   /*
    * Test if single-threaded implementation checker throws HiveException when 
the there is a dummy
    * directory present in the nested level

Reply via email to