Github user franz1981 commented on a diff in the pull request:
https://github.com/apache/activemq-artemis/pull/2287#discussion_r231513243
--- Diff:
artemis-server/src/main/java/org/apache/activemq/artemis/core/server/impl/FileLockNodeManager.java
---
@@ -299,36 +301,57 @@ protected FileLock tryLock(final long lockPos) throws
IOException {
protected FileLock lock(final long lockPosition) throws Exception {
long start = System.currentTimeMillis();
+ boolean isRecurringFailure = false;
while (!interrupted) {
- FileLock lock = tryLock(lockPosition);
-
- if (lock == null) {
- try {
- Thread.sleep(500);
- } catch (InterruptedException e) {
- return null;
- }
-
- if (lockAcquisitionTimeout != -1 &&
(System.currentTimeMillis() - start) > lockAcquisitionTimeout) {
- throw new Exception("timed out waiting for lock");
+ try {
+ FileLock lock = tryLock(lockPosition);
+ isRecurringFailure = false;
+
+ if (lock == null) {
+ logger.debug("lock is null");
+ try {
+ Thread.sleep(500);
+ } catch (InterruptedException e) {
+ return null;
+ }
+
+ if (lockAcquisitionTimeout != -1 &&
(System.currentTimeMillis() - start) > lockAcquisitionTimeout) {
+ throw new Exception("timed out waiting for lock");
+ }
+ } else {
+ return lock;
}
- } else {
- return lock;
+ } catch (IOException e) {
+ // IOException during trylock() may be a temporary issue, e.g.
NFS volume not being accessible
+ logger.log(isRecurringFailure ? Logger.Level.DEBUG :
Logger.Level.WARN,
+ "Failure when accessing a lock file", e);
+ isRecurringFailure = true;
+ Thread.sleep(LOCK_ACCESS_FAILURE_WAIT_TIME);
}
}
// todo this is here because sometimes channel.lock throws a
resource deadlock exception but trylock works,
// need to investigate further and review
- FileLock lock;
+ FileLock lock = null;
--- End diff --
Same thing as the comment above.
---