Repository: hive Updated Branches: refs/heads/master 0a81e1ec3 -> d81aed31d
HIVE-19050 : DBNotificationListener does not catch exceptions in the cleaner thread (Vihang Karajgaonkar reviewed by Peter Vary) Project: http://git-wip-us.apache.org/repos/asf/hive/repo Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/d81aed31 Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/d81aed31 Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/d81aed31 Branch: refs/heads/master Commit: d81aed31dec95d54711084264c7a3222a20e5530 Parents: 0a81e1e Author: Vihang Karajgaonkar <vih...@cloudera.com> Authored: Fri Apr 6 10:02:33 2018 -0700 Committer: Vihang Karajgaonkar <vih...@cloudera.com> Committed: Fri Apr 6 10:02:33 2018 -0700 ---------------------------------------------------------------------- .../listener/DbNotificationListener.java | 19 ++++++++++--- .../listener/DummyRawStoreFailEvent.java | 4 +++ .../listener/TestDbNotificationListener.java | 30 +++++++++++++++++++- 3 files changed, 48 insertions(+), 5 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/hive/blob/d81aed31/hcatalog/server-extensions/src/main/java/org/apache/hive/hcatalog/listener/DbNotificationListener.java ---------------------------------------------------------------------- diff --git a/hcatalog/server-extensions/src/main/java/org/apache/hive/hcatalog/listener/DbNotificationListener.java b/hcatalog/server-extensions/src/main/java/org/apache/hive/hcatalog/listener/DbNotificationListener.java index 9480145..7f21573 100644 --- a/hcatalog/server-extensions/src/main/java/org/apache/hive/hcatalog/listener/DbNotificationListener.java +++ b/hcatalog/server-extensions/src/main/java/org/apache/hive/hcatalog/listener/DbNotificationListener.java @@ -104,7 +104,8 @@ public class DbNotificationListener extends TransactionalMetaStoreEventListener private Configuration conf; private MessageFactory msgFactory; - private synchronized void init(Configuration conf) throws MetaException { + //cleaner is a static object, use static synchronized to make sure its thread-safe + private static synchronized void init(Configuration conf) throws MetaException { if (cleaner == null) { cleaner = new CleanerThread(conf, RawStoreProxy.getProxy(conf, conf, @@ -116,7 +117,7 @@ public class DbNotificationListener extends TransactionalMetaStoreEventListener public DbNotificationListener(Configuration config) throws MetaException { super(config); conf = config; - init(conf); + DbNotificationListener.init(conf); msgFactory = MessageFactory.getInstance(); } @@ -724,7 +725,7 @@ public class DbNotificationListener extends TransactionalMetaStoreEventListener static private long sleepTime = 60000; CleanerThread(Configuration conf, RawStore rs) { - super("CleanerThread"); + super("DB-Notification-Cleaner"); this.rs = rs; setTimeToLive(MetastoreConf.getTimeVar(conf, ConfVars.EVENT_DB_LISTENER_TTL, TimeUnit.SECONDS)); @@ -734,7 +735,17 @@ public class DbNotificationListener extends TransactionalMetaStoreEventListener @Override public void run() { while (true) { - rs.cleanNotificationEvents(ttl); + try { + rs.cleanNotificationEvents(ttl); + } catch (Exception ex) { + //catching exceptions here makes sure that the thread doesn't die in case of unexpected + //exceptions + LOG.warn( + "Exception received while cleaning notifications. More details can be found in debug mode" + + ex.getMessage()); + LOG.debug(ex.getMessage(), ex); + } + LOG.debug("Cleaner thread done"); try { Thread.sleep(sleepTime); http://git-wip-us.apache.org/repos/asf/hive/blob/d81aed31/itests/hcatalog-unit/src/test/java/org/apache/hive/hcatalog/listener/DummyRawStoreFailEvent.java ---------------------------------------------------------------------- diff --git a/itests/hcatalog-unit/src/test/java/org/apache/hive/hcatalog/listener/DummyRawStoreFailEvent.java b/itests/hcatalog-unit/src/test/java/org/apache/hive/hcatalog/listener/DummyRawStoreFailEvent.java index 4697f60..801de7a 100644 --- a/itests/hcatalog-unit/src/test/java/org/apache/hive/hcatalog/listener/DummyRawStoreFailEvent.java +++ b/itests/hcatalog-unit/src/test/java/org/apache/hive/hcatalog/listener/DummyRawStoreFailEvent.java @@ -853,6 +853,10 @@ public class DummyRawStoreFailEvent implements RawStore, Configurable { @Override public void cleanNotificationEvents(int olderThan) { + if (!shouldEventSucceed) { + //throw exception to simulate an issue with cleaner thread + throw new RuntimeException("Dummy exception while cleaning notifications"); + } objectStore.cleanNotificationEvents(olderThan); } http://git-wip-us.apache.org/repos/asf/hive/blob/d81aed31/itests/hcatalog-unit/src/test/java/org/apache/hive/hcatalog/listener/TestDbNotificationListener.java ---------------------------------------------------------------------- diff --git a/itests/hcatalog-unit/src/test/java/org/apache/hive/hcatalog/listener/TestDbNotificationListener.java b/itests/hcatalog-unit/src/test/java/org/apache/hive/hcatalog/listener/TestDbNotificationListener.java index 5459554..823312b 100644 --- a/itests/hcatalog-unit/src/test/java/org/apache/hive/hcatalog/listener/TestDbNotificationListener.java +++ b/itests/hcatalog-unit/src/test/java/org/apache/hive/hcatalog/listener/TestDbNotificationListener.java @@ -119,7 +119,7 @@ public class TestDbNotificationListener { private long firstEventId; private static List<String> testsToSkipForReplV1BackwardCompatTesting = - new ArrayList<>(Arrays.asList("cleanupNotifs", "sqlTempTable")); + new ArrayList<>(Arrays.asList("cleanupNotifs", "cleanupNotificationWithError", "sqlTempTable")); // Make sure we skip backward-compat checking for those tests that don't generate events private static ReplicationV1CompatRule bcompat = null; @@ -1370,4 +1370,32 @@ public class TestDbNotificationListener { LOG.info("second trigger done"); assertEquals(0, rsp2.getEventsSize()); } + + @Test + public void cleanupNotificationWithError() throws Exception { + Database db = new Database("cleanup1", "no description", "file:/tmp", emptyParameters); + msClient.createDatabase(db); + msClient.dropDatabase("cleanup1"); + + LOG.info("Pulling events immediately after createDatabase/dropDatabase"); + NotificationEventResponse rsp = msClient.getNextNotification(firstEventId, 0, null); + assertEquals(2, rsp.getEventsSize()); + //this simulates that cleaning thread will error out while cleaning the notifications + DummyRawStoreFailEvent.setEventSucceed(false); + // sleep for expiry time, and then fetch again + // sleep twice the TTL interval - things should have been cleaned by then. + Thread.sleep(EVENTS_TTL * 2 * 1000); + + LOG.info("Pulling events again after failing to cleanup"); + NotificationEventResponse rsp2 = msClient.getNextNotification(firstEventId, 0, null); + LOG.info("second trigger done"); + assertEquals(2, rsp2.getEventsSize()); + DummyRawStoreFailEvent.setEventSucceed(true); + Thread.sleep(EVENTS_TTL * 2 * 1000); + + LOG.info("Pulling events again after cleanup"); + rsp2 = msClient.getNextNotification(firstEventId, 0, null); + LOG.info("third trigger done"); + assertEquals(0, rsp2.getEventsSize()); + } }