HDFS-13112. Token expiration edits may cause log corruption or deadlock. Contributed by Daryn Sharp.
Project: http://git-wip-us.apache.org/repos/asf/hadoop/repo Commit: http://git-wip-us.apache.org/repos/asf/hadoop/commit/47473952 Tree: http://git-wip-us.apache.org/repos/asf/hadoop/tree/47473952 Diff: http://git-wip-us.apache.org/repos/asf/hadoop/diff/47473952 Branch: refs/heads/HDFS-7240 Commit: 47473952e56b0380147d42f4110ad03c2276c961 Parents: a53d62a Author: Kihwal Lee <kih...@apache.org> Authored: Thu Feb 15 15:32:42 2018 -0600 Committer: Kihwal Lee <kih...@apache.org> Committed: Thu Feb 15 15:32:42 2018 -0600 ---------------------------------------------------------------------- .../DelegationTokenSecretManager.java | 53 ++++++++++++++------ .../hdfs/server/namenode/FSNamesystem.java | 17 ++++--- .../hdfs/server/namenode/FSNamesystemLock.java | 7 +++ .../org/apache/hadoop/hdfs/util/RwLock.java | 5 +- .../namenode/TestSecurityTokenEditLog.java | 24 ++++++++- 5 files changed, 83 insertions(+), 23 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/hadoop/blob/47473952/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/security/token/delegation/DelegationTokenSecretManager.java ---------------------------------------------------------------------- diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/security/token/delegation/DelegationTokenSecretManager.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/security/token/delegation/DelegationTokenSecretManager.java index b7f89a8..3547c96 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/security/token/delegation/DelegationTokenSecretManager.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/security/token/delegation/DelegationTokenSecretManager.java @@ -21,7 +21,6 @@ package org.apache.hadoop.hdfs.security.token.delegation; import java.io.DataInput; import java.io.DataOutputStream; import java.io.IOException; -import java.io.InterruptedIOException; import java.net.InetSocketAddress; import java.util.ArrayList; import java.util.Iterator; @@ -366,34 +365,58 @@ public class DelegationTokenSecretManager @Override //AbstractDelegationTokenManager protected void logUpdateMasterKey(DelegationKey key) throws IOException { - synchronized (noInterruptsLock) { + try { // The edit logging code will fail catastrophically if it // is interrupted during a logSync, since the interrupt // closes the edit log files. Doing this inside the - // above lock and then checking interruption status - // prevents this bug. - if (Thread.interrupted()) { - throw new InterruptedIOException( - "Interrupted before updating master key"); + // fsn lock will prevent being interrupted when stopping + // the secret manager. + namesystem.readLockInterruptibly(); + try { + // this monitor isn't necessary if stopped while holding write lock + // but for safety, guard against a stop with read lock. + synchronized (noInterruptsLock) { + if (Thread.currentThread().isInterrupted()) { + return; // leave flag set so secret monitor exits. + } + namesystem.logUpdateMasterKey(key); + } + } finally { + namesystem.readUnlock(); } - namesystem.logUpdateMasterKey(key); + } catch (InterruptedException ie) { + // AbstractDelegationTokenManager may crash if an exception is thrown. + // The interrupt flag will be detected when it attempts to sleep. + Thread.currentThread().interrupt(); } } @Override //AbstractDelegationTokenManager protected void logExpireToken(final DelegationTokenIdentifier dtId) throws IOException { - synchronized (noInterruptsLock) { + try { // The edit logging code will fail catastrophically if it // is interrupted during a logSync, since the interrupt // closes the edit log files. Doing this inside the - // above lock and then checking interruption status - // prevents this bug. - if (Thread.interrupted()) { - throw new InterruptedIOException( - "Interrupted before expiring delegation token"); + // fsn lock will prevent being interrupted when stopping + // the secret manager. + namesystem.readLockInterruptibly(); + try { + // this monitor isn't necessary if stopped while holding write lock + // but for safety, guard against a stop with read lock. + synchronized (noInterruptsLock) { + if (Thread.currentThread().isInterrupted()) { + return; // leave flag set so secret monitor exits. + } + namesystem.logExpireDelegationToken(dtId); + } + } finally { + namesystem.readUnlock(); } - namesystem.logExpireDelegationToken(dtId); + } catch (InterruptedException ie) { + // AbstractDelegationTokenManager may crash if an exception is thrown. + // The interrupt flag will be detected when it attempts to sleep. + Thread.currentThread().interrupt(); } } http://git-wip-us.apache.org/repos/asf/hadoop/blob/47473952/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java ---------------------------------------------------------------------- diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java index 6c27d7e..b0973a9 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java @@ -1580,6 +1580,10 @@ public class FSNamesystem implements Namesystem, FSNamesystemMBean, this.fsLock.readLock(); } @Override + public void readLockInterruptibly() throws InterruptedException { + this.fsLock.readLockInterruptibly(); + } + @Override public void readUnlock() { this.fsLock.readUnlock(); } @@ -5675,9 +5679,9 @@ public class FSNamesystem implements Namesystem, FSNamesystemMBean, assert !isInSafeMode() : "this should never be called while in safemode, since we stop " + "the DT manager before entering safemode!"; - // No need to hold FSN lock since we don't access any internal - // structures, and this is stopped before the FSN shuts itself - // down, etc. + // edit log rolling is not thread-safe and must be protected by the + // fsn lock. not updating namespace so read lock is sufficient. + assert hasReadLock(); getEditLog().logUpdateMasterKey(key); getEditLog().logSync(); } @@ -5691,9 +5695,10 @@ public class FSNamesystem implements Namesystem, FSNamesystemMBean, assert !isInSafeMode() : "this should never be called while in safemode, since we stop " + "the DT manager before entering safemode!"; - // No need to hold FSN lock since we don't access any internal - // structures, and this is stopped before the FSN shuts itself - // down, etc. + // edit log rolling is not thread-safe and must be protected by the + // fsn lock. not updating namespace so read lock is sufficient. + assert hasReadLock(); + // do not logSync so expiration edits are batched getEditLog().logCancelDelegationToken(id); } http://git-wip-us.apache.org/repos/asf/hadoop/blob/47473952/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystemLock.java ---------------------------------------------------------------------- diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystemLock.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystemLock.java index 32c7efa..900f8a2 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystemLock.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystemLock.java @@ -145,6 +145,13 @@ class FSNamesystemLock { } } + public void readLockInterruptibly() throws InterruptedException { + coarseLock.readLock().lockInterruptibly(); + if (coarseLock.getReadHoldCount() == 1) { + readLockHeldTimeStampNanos.set(timer.monotonicNowNanos()); + } + } + public void readUnlock() { readUnlock(OP_NAME_OTHER); } http://git-wip-us.apache.org/repos/asf/hadoop/blob/47473952/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/util/RwLock.java ---------------------------------------------------------------------- diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/util/RwLock.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/util/RwLock.java index e36f0f7..deaeaa4 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/util/RwLock.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/util/RwLock.java @@ -21,7 +21,10 @@ package org.apache.hadoop.hdfs.util; public interface RwLock { /** Acquire read lock. */ public void readLock(); - + + /** Acquire read lock, unless interrupted while waiting */ + void readLockInterruptibly() throws InterruptedException; + /** Release read lock. */ public void readUnlock(); http://git-wip-us.apache.org/repos/asf/hadoop/blob/47473952/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestSecurityTokenEditLog.java ---------------------------------------------------------------------- diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestSecurityTokenEditLog.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestSecurityTokenEditLog.java index 5aa19bb..c43c909 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestSecurityTokenEditLog.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestSecurityTokenEditLog.java @@ -24,6 +24,7 @@ import java.io.File; import java.io.IOException; import java.net.URI; import java.util.Iterator; +import java.util.concurrent.atomic.AtomicReference; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileSystem; @@ -37,7 +38,11 @@ import org.apache.hadoop.hdfs.server.namenode.NNStorage.NameNodeDirType; import org.apache.hadoop.io.Text; import org.apache.hadoop.security.UserGroupInformation; import org.apache.hadoop.security.token.Token; +import org.junit.Assert; import org.junit.Test; +import org.mockito.invocation.InvocationOnMock; +import org.mockito.stubbing.Answer; + import static org.mockito.Mockito.*; /** @@ -180,8 +185,25 @@ public class TestSecurityTokenEditLog { Text renewer = new Text(UserGroupInformation.getCurrentUser().getUserName()); FSImage fsImage = mock(FSImage.class); FSEditLog log = mock(FSEditLog.class); - doReturn(log).when(fsImage).getEditLog(); + doReturn(log).when(fsImage).getEditLog(); + // verify that the namesystem read lock is held while logging token + // expirations. the namesystem is not updated, so write lock is not + // necessary, but the lock is required because edit log rolling is not + // thread-safe. + final AtomicReference<FSNamesystem> fsnRef = new AtomicReference<>(); + doAnswer( + new Answer<Void>() { + @Override + public Void answer(InvocationOnMock invocation) throws Throwable { + // fsn claims read lock if either read or write locked. + Assert.assertTrue(fsnRef.get().hasReadLock()); + Assert.assertFalse(fsnRef.get().hasWriteLock()); + return null; + } + } + ).when(log).logCancelDelegationToken(any(DelegationTokenIdentifier.class)); FSNamesystem fsn = new FSNamesystem(conf, fsImage); + fsnRef.set(fsn); DelegationTokenSecretManager dtsm = fsn.getDelegationTokenSecretManager(); try { --------------------------------------------------------------------- To unsubscribe, e-mail: common-commits-unsubscr...@hadoop.apache.org For additional commands, e-mail: common-commits-h...@hadoop.apache.org