Repository: hive Updated Branches: refs/heads/master 91d25b48a -> 9945b5d5d
HIVE-16347 : HiveMetastoreChecker should skip listing partitions which are not valid when hive.msck.path.validation is set to skip or ignore (Vihang Karajgaonkar via Ashutosh Chauhan) Signed-off-by: Ashutosh Chauhan <hashut...@apache.org> Project: http://git-wip-us.apache.org/repos/asf/hive/repo Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/9945b5d5 Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/9945b5d5 Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/9945b5d5 Branch: refs/heads/master Commit: 9945b5d5d284677a2bcf042f236b43c2647c50e4 Parents: 91d25b4 Author: Vihang Karajgaonkar <vih...@cloudera.com> Authored: Mon Apr 3 14:36:46 2017 -0700 Committer: Ashutosh Chauhan <hashut...@apache.org> Committed: Mon Apr 3 14:36:46 2017 -0700 ---------------------------------------------------------------------- .../hive/ql/metadata/HiveMetaStoreChecker.java | 5 +- .../ql/metadata/TestHiveMetaStoreChecker.java | 52 ++++++++++++++++++++ 2 files changed, 55 insertions(+), 2 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/hive/blob/9945b5d5/ql/src/java/org/apache/hadoop/hive/ql/metadata/HiveMetaStoreChecker.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/metadata/HiveMetaStoreChecker.java b/ql/src/java/org/apache/hadoop/hive/ql/metadata/HiveMetaStoreChecker.java index da24c70..4add836 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/metadata/HiveMetaStoreChecker.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/metadata/HiveMetaStoreChecker.java @@ -498,9 +498,10 @@ public class HiveMetaStoreChecker { } else if (!parts[0].equalsIgnoreCase(partColNames.get(currentDepth))) { logOrThrowExceptionWithMsg( "Unexpected partition key " + parts[0] + " found at " + nextPath); + } else { + // add sub-directory to the work queue if maxDepth is not yet reached + pendingPaths.add(new PathDepthInfo(nextPath, currentDepth + 1)); } - // add sub-directory to the work queue if maxDepth is not yet reached - pendingPaths.add(new PathDepthInfo(nextPath, currentDepth + 1)); } } if (currentDepth == partColNames.size()) { http://git-wip-us.apache.org/repos/asf/hive/blob/9945b5d5/ql/src/test/org/apache/hadoop/hive/ql/metadata/TestHiveMetaStoreChecker.java ---------------------------------------------------------------------- diff --git a/ql/src/test/org/apache/hadoop/hive/ql/metadata/TestHiveMetaStoreChecker.java b/ql/src/test/org/apache/hadoop/hive/ql/metadata/TestHiveMetaStoreChecker.java index d7fbbce..90e6781 100644 --- a/ql/src/test/org/apache/hadoop/hive/ql/metadata/TestHiveMetaStoreChecker.java +++ b/ql/src/test/org/apache/hadoop/hive/ql/metadata/TestHiveMetaStoreChecker.java @@ -235,6 +235,34 @@ public class TestHiveMetaStoreChecker { checker.checkMetastore(dbName, tableName, null, new CheckResult()); } + /* + * skip mode should not throw exception when a invalid partition directory + * is found. It should just ignore it + */ + @Test + public void testSkipInvalidPartitionKeyName() + throws HiveException, AlreadyExistsException, IOException { + hive.getConf().set(HiveConf.ConfVars.HIVE_MSCK_PATH_VALIDATION.varname, "skip"); + checker = new HiveMetaStoreChecker(hive); + Table table = createTestTable(); + List<Partition> partitions = hive.getPartitions(table); + assertEquals(2, partitions.size()); + // add a fake partition dir on fs + fs = partitions.get(0).getDataLocation().getFileSystem(hive.getConf()); + Path fakePart = + new Path(table.getDataLocation().toString(), "fakedate=2009-01-01/fakecity=sanjose"); + fs.mkdirs(fakePart); + fs.deleteOnExit(fakePart); + createPartitionsDirectoriesOnFS(table, 2); + CheckResult result = new CheckResult(); + checker.checkMetastore(dbName, tableName, null, result); + assertEquals(Collections.<String> emptySet(), result.getTablesNotInMs()); + assertEquals(Collections.<String> emptySet(), result.getTablesNotOnFs()); + assertEquals(Collections.<String> emptySet(), result.getPartitionsNotOnFs()); + // only 2 valid partitions should be added + assertEquals(2, result.getPartitionsNotInMs().size()); + } + private Table createTestTable() throws AlreadyExistsException, HiveException { Database db = new Database(); db.setName(dbName); @@ -487,6 +515,30 @@ public class TestHiveMetaStoreChecker { CheckResult result = new CheckResult(); checker.checkMetastore(dbName, tableName, null, result); } + + /* + * In skip mode msck should ignore invalid partitions instead of + * throwing exception + */ + @Test + public void testSkipInvalidOrderForPartitionKeysOnFS() + throws AlreadyExistsException, HiveException, IOException { + hive.getConf().set(HiveConf.ConfVars.HIVE_MSCK_PATH_VALIDATION.varname, "skip"); + checker = new HiveMetaStoreChecker(hive); + Table testTable = createPartitionedTestTable(dbName, tableName, 2, 0); + // add 10 partitions on the filesystem + createInvalidPartitionDirsOnFS(testTable, 2); + // add 10 partitions on the filesystem + createPartitionsDirectoriesOnFS(testTable, 2); + CheckResult result = new CheckResult(); + checker.checkMetastore(dbName, tableName, null, result); + assertEquals(Collections.<String> emptySet(), result.getTablesNotInMs()); + assertEquals(Collections.<String> emptySet(), result.getTablesNotOnFs()); + assertEquals(Collections.<String> emptySet(), result.getPartitionsNotOnFs()); + // only 2 valid partitions should be added + assertEquals(2, result.getPartitionsNotInMs().size()); + } + /* * Test if single-threaded implementation checker throws HiveException when the there is a dummy * directory present in the nested level