Github user franz1981 commented on a diff in the pull request:

    https://github.com/apache/activemq-artemis/pull/2287#discussion_r231513243
  
    --- Diff: 
artemis-server/src/main/java/org/apache/activemq/artemis/core/server/impl/FileLockNodeManager.java
 ---
    @@ -299,36 +301,57 @@ protected FileLock tryLock(final long lockPos) throws 
IOException {
     
        protected FileLock lock(final long lockPosition) throws Exception {
           long start = System.currentTimeMillis();
    +      boolean isRecurringFailure = false;
     
           while (!interrupted) {
    -         FileLock lock = tryLock(lockPosition);
    -
    -         if (lock == null) {
    -            try {
    -               Thread.sleep(500);
    -            } catch (InterruptedException e) {
    -               return null;
    -            }
    -
    -            if (lockAcquisitionTimeout != -1 && 
(System.currentTimeMillis() - start) > lockAcquisitionTimeout) {
    -               throw new Exception("timed out waiting for lock");
    +         try {
    +            FileLock lock = tryLock(lockPosition);
    +            isRecurringFailure = false;
    +
    +            if (lock == null) {
    +               logger.debug("lock is null");
    +               try {
    +                  Thread.sleep(500);
    +               } catch (InterruptedException e) {
    +                  return null;
    +               }
    +
    +               if (lockAcquisitionTimeout != -1 && 
(System.currentTimeMillis() - start) > lockAcquisitionTimeout) {
    +                  throw new Exception("timed out waiting for lock");
    +               }
    +            } else {
    +               return lock;
                 }
    -         } else {
    -            return lock;
    +         } catch (IOException e) {
    +            // IOException during trylock() may be a temporary issue, e.g. 
NFS volume not being accessible
    +            logger.log(isRecurringFailure ? Logger.Level.DEBUG : 
Logger.Level.WARN,
    +                    "Failure when accessing a lock file", e);
    +            isRecurringFailure = true;
    +            Thread.sleep(LOCK_ACCESS_FAILURE_WAIT_TIME);
              }
           }
     
           // todo this is here because sometimes channel.lock throws a 
resource deadlock exception but trylock works,
           // need to investigate further and review
    -      FileLock lock;
    +      FileLock lock = null;
    --- End diff --
    
    Same thing as the comment above.


---

Reply via email to