[ https://issues.apache.org/jira/browse/OOZIE-3113?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ]
Satish Subhashrao Saley updated OOZIE-3113: ------------------------------------------- Description: ZK Lock might not get released if release call fails. Oozie should do exponential retry for some time before giving up. {code} 2017-10-25 03:07:45,787 WARN ZKLocksService:523 [pool-12-thread-74] - SERVER[localhost] USER[-] GROUP[-] TOKEN[-] APP[-] JOB[123-123--oozie_saley-C] ACTION[123-123--oozie_saley-C@67] Could not release lock: KeeperErrorCode = ConnectionLoss for /oozie/localhost-oozie/locks/123-123--oozie_saley-C/1234__WRIT__0000000111 org.apache.zookeeper.KeeperException$ConnectionLossException: KeeperErrorCode = ConnectionLoss for /oozie/localhost-oozie/locks/123-123--oozie_saley-C/1234__WRIT__0000000111 at org.apache.zookeeper.KeeperException.create(KeeperException.java:99) at org.apache.zookeeper.KeeperException.create(KeeperException.java:51) at org.apache.zookeeper.ZooKeeper.delete(ZooKeeper.java:873) at org.apache.curator.framework.imps.DeleteBuilderImpl$5.call(DeleteBuilderImpl.java:239) at org.apache.curator.framework.imps.DeleteBuilderImpl$5.call(DeleteBuilderImpl.java:234) at org.apache.curator.RetryLoop.callWithRetry(RetryLoop.java:107) at org.apache.curator.framework.imps.DeleteBuilderImpl.pathInForeground(DeleteBuilderImpl.java:230) at org.apache.curator.framework.imps.DeleteBuilderImpl.forPath(DeleteBuilderImpl.java:215) at org.apache.curator.framework.imps.DeleteBuilderImpl.forPath(DeleteBuilderImpl.java:42) at org.apache.curator.framework.recipes.locks.LockInternals.deleteOurPath(LockInternals.java:345) at org.apache.curator.framework.recipes.locks.LockInternals.releaseLock(LockInternals.java:123) at org.apache.curator.framework.recipes.locks.InterProcessMutex.release(InterProcessMutex.java:142) at org.apache.oozie.service.ZKLocksService$ZKLockToken.release(ZKLocksService.java:206) at org.apache.oozie.command.XCommand.releaseLock(XCommand.java:233) at org.apache.oozie.command.XCommand.call(XCommand.java:305) at java.util.concurrent.FutureTask.run(FutureTask.java:266) at org.apache.oozie.service.CallableQueueService$CallableWrapper.run(CallableQueueService.java:178) at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142) at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617) at java.lang.Thread.run(Thread.java:745) {code} was: ZK Lock might not get released if release call fails. Oozie should do exponential retry for some time before giving up. {code} 2017-10-25 03:07:45,787 WARN ZKLocksService:523 [pool-12-thread-74] - SERVER[localhost] USER[-] GROUP[-] TOKEN[-] APP[-] JOB[123-123--oozie_saley-C] ACTION[123-123--oozie_saley-C@67] Could not release lock: KeeperErrorCode = ConnectionLoss for /oozie/localhost-oozie/locks/123-123--oozie_saley-C/1234__WRIT__0000000111 org.apache.zookeeper.KeeperException$ConnectionLossException: KeeperErrorCode = ConnectionLoss for /oozie/phazontan-oozie/locks/123-123--oozie_saley-C/1234__WRIT__0000000111 at org.apache.zookeeper.KeeperException.create(KeeperException.java:99) at org.apache.zookeeper.KeeperException.create(KeeperException.java:51) at org.apache.zookeeper.ZooKeeper.delete(ZooKeeper.java:873) at org.apache.curator.framework.imps.DeleteBuilderImpl$5.call(DeleteBuilderImpl.java:239) at org.apache.curator.framework.imps.DeleteBuilderImpl$5.call(DeleteBuilderImpl.java:234) at org.apache.curator.RetryLoop.callWithRetry(RetryLoop.java:107) at org.apache.curator.framework.imps.DeleteBuilderImpl.pathInForeground(DeleteBuilderImpl.java:230) at org.apache.curator.framework.imps.DeleteBuilderImpl.forPath(DeleteBuilderImpl.java:215) at org.apache.curator.framework.imps.DeleteBuilderImpl.forPath(DeleteBuilderImpl.java:42) at org.apache.curator.framework.recipes.locks.LockInternals.deleteOurPath(LockInternals.java:345) at org.apache.curator.framework.recipes.locks.LockInternals.releaseLock(LockInternals.java:123) at org.apache.curator.framework.recipes.locks.InterProcessMutex.release(InterProcessMutex.java:142) at org.apache.oozie.service.ZKLocksService$ZKLockToken.release(ZKLocksService.java:206) at org.apache.oozie.command.XCommand.releaseLock(XCommand.java:233) at org.apache.oozie.command.XCommand.call(XCommand.java:305) at java.util.concurrent.FutureTask.run(FutureTask.java:266) at org.apache.oozie.service.CallableQueueService$CallableWrapper.run(CallableQueueService.java:178) at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142) at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617) at java.lang.Thread.run(Thread.java:745) {code} > Retry for ZK lock release > ------------------------- > > Key: OOZIE-3113 > URL: https://issues.apache.org/jira/browse/OOZIE-3113 > Project: Oozie > Issue Type: Bug > Reporter: Satish Subhashrao Saley > Assignee: Satish Subhashrao Saley > Attachments: OOZIE-3113-1.patch > > > ZK Lock might not get released if release call fails. Oozie should do > exponential retry for some time before giving up. > {code} > 2017-10-25 03:07:45,787 WARN ZKLocksService:523 [pool-12-thread-74] - > SERVER[localhost] USER[-] GROUP[-] TOKEN[-] APP[-] > JOB[123-123--oozie_saley-C] ACTION[123-123--oozie_saley-C@67] Could not > release lock: KeeperErrorCode = ConnectionLoss for > /oozie/localhost-oozie/locks/123-123--oozie_saley-C/1234__WRIT__0000000111 > org.apache.zookeeper.KeeperException$ConnectionLossException: KeeperErrorCode > = ConnectionLoss for > /oozie/localhost-oozie/locks/123-123--oozie_saley-C/1234__WRIT__0000000111 > at > org.apache.zookeeper.KeeperException.create(KeeperException.java:99) > at > org.apache.zookeeper.KeeperException.create(KeeperException.java:51) > at org.apache.zookeeper.ZooKeeper.delete(ZooKeeper.java:873) > at > org.apache.curator.framework.imps.DeleteBuilderImpl$5.call(DeleteBuilderImpl.java:239) > at > org.apache.curator.framework.imps.DeleteBuilderImpl$5.call(DeleteBuilderImpl.java:234) > at org.apache.curator.RetryLoop.callWithRetry(RetryLoop.java:107) > at > org.apache.curator.framework.imps.DeleteBuilderImpl.pathInForeground(DeleteBuilderImpl.java:230) > at > org.apache.curator.framework.imps.DeleteBuilderImpl.forPath(DeleteBuilderImpl.java:215) > at > org.apache.curator.framework.imps.DeleteBuilderImpl.forPath(DeleteBuilderImpl.java:42) > at > org.apache.curator.framework.recipes.locks.LockInternals.deleteOurPath(LockInternals.java:345) > at > org.apache.curator.framework.recipes.locks.LockInternals.releaseLock(LockInternals.java:123) > at > org.apache.curator.framework.recipes.locks.InterProcessMutex.release(InterProcessMutex.java:142) > at > org.apache.oozie.service.ZKLocksService$ZKLockToken.release(ZKLocksService.java:206) > at org.apache.oozie.command.XCommand.releaseLock(XCommand.java:233) > at org.apache.oozie.command.XCommand.call(XCommand.java:305) > at java.util.concurrent.FutureTask.run(FutureTask.java:266) > at > org.apache.oozie.service.CallableQueueService$CallableWrapper.run(CallableQueueService.java:178) > at > java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142) > at > java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617) > at java.lang.Thread.run(Thread.java:745) > {code} -- This message was sent by Atlassian JIRA (v6.4.14#64029)