Github user franz1981 commented on a diff in the pull request: https://github.com/apache/activemq-artemis/pull/2287#discussion_r231513243 --- Diff: artemis-server/src/main/java/org/apache/activemq/artemis/core/server/impl/FileLockNodeManager.java --- @@ -299,36 +301,57 @@ protected FileLock tryLock(final long lockPos) throws IOException { protected FileLock lock(final long lockPosition) throws Exception { long start = System.currentTimeMillis(); + boolean isRecurringFailure = false; while (!interrupted) { - FileLock lock = tryLock(lockPosition); - - if (lock == null) { - try { - Thread.sleep(500); - } catch (InterruptedException e) { - return null; - } - - if (lockAcquisitionTimeout != -1 && (System.currentTimeMillis() - start) > lockAcquisitionTimeout) { - throw new Exception("timed out waiting for lock"); + try { + FileLock lock = tryLock(lockPosition); + isRecurringFailure = false; + + if (lock == null) { + logger.debug("lock is null"); + try { + Thread.sleep(500); + } catch (InterruptedException e) { + return null; + } + + if (lockAcquisitionTimeout != -1 && (System.currentTimeMillis() - start) > lockAcquisitionTimeout) { + throw new Exception("timed out waiting for lock"); + } + } else { + return lock; } - } else { - return lock; + } catch (IOException e) { + // IOException during trylock() may be a temporary issue, e.g. NFS volume not being accessible + logger.log(isRecurringFailure ? Logger.Level.DEBUG : Logger.Level.WARN, + "Failure when accessing a lock file", e); + isRecurringFailure = true; + Thread.sleep(LOCK_ACCESS_FAILURE_WAIT_TIME); } } // todo this is here because sometimes channel.lock throws a resource deadlock exception but trylock works, // need to investigate further and review - FileLock lock; + FileLock lock = null; --- End diff -- Same thing as the comment above.
---