[
https://issues.apache.org/jira/browse/CURATOR-196?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
]
Kezhu Wang closed CURATOR-196.
------------------------------
Resolution: Not A Problem
{{NodeExistException}} is expected if target node exists. You can resort to
{{CreateBuilder.orSetData}} if you don't want such an exception.
> this.client.create().creatingParentsIfNeeded() throw Puzzling EXCEPTION
> ------------------------------------------------------------------------
>
> Key: CURATOR-196
> URL: https://issues.apache.org/jira/browse/CURATOR-196
> Project: Apache Curator
> Issue Type: Bug
> Components: Framework
> Affects Versions: 2.6.0
> Environment: RedHat
> Reporter: HuanWang
> Priority: Major
>
> Scene One:In Single test. when I wanna register to zk. The code as below:
> {code}
> private void startWorker() {
> try {
> LOG.info("Start With Worker IP:" + this.workerIP);
>
>
> this.client.makeDir(SuperionConstant.ZOOKEEPER_WORKER_MONITOR_PATH);
>
> this.client.makeDir(SuperionConstant.ZOOKEEPER_WORKER_PATH);
>
> this.workerMonitorPath =
> SuperionConstant.ZOOKEEPER_WORKER_MONITOR_PATH + "/" + this.workerIP;
> /** Ephemeral Node: /workersMonitor/192.168.0.2 */
> this.client.createEphemeralNode(this.workerMonitorPath);
>
>
> this.workerPath =
> SuperionConstant.ZOOKEEPER_WORKER_PATH + "/" + this.workerIP;
> /** worker Node: /workers/192.168.0.2 */
> this.client.makeDir(this.workerPath);
>
> String workerStatePath = this.workerPath + "/" +
> "state";
> /** Persistent Node: /workers/192.168.0.2/state */
> this.client.makeDir(workerStatePath);
>
> /** Persistent Node:
> /workers/192.168.0.2/state/ProcessID */
> String workerStatePidPath = workerStatePath + "/" +
> "ProcessID";
> this.client.writeInt32(workerStatePidPath, workerPID);
>
>
> //this.client.makeDir(SuperionConstant.ZOOKEEPER_JOB_PATH);
> /** Persistent Node: /jobs/tmp */
>
> this.client.makeDir(SuperionConstant.ZOOKEEPER_JOB_TMP_PATH);
> /** Persistent Node: /jobs/state */
>
> this.client.makeDir(SuperionConstant.ZOOKEEPER_JOB_STATE_PATH);
>
> //register the worker in Zookeeper success
>
> this.containerManager.setBlockNewContainerRequests(false);
> } catch (Exception e) {
> String errorMsg = "Worker Register Error Happen, Maker
> Sure Zookeeper Server Can Be Connected";
> LOG.error(errorMsg, e);
> throw new SuperionRuntimeException(errorMsg,e);
> }
> }
> {code}
> ==========================================================
> the function I use is creatingParentsIfNeeded().
> ==========================================================
> {code}
> public synchronized void writeData(String path,byte data[]) throws Exception {
> System.out.println(path+" : writeData");
> if(this.client.checkExists().forPath(path)!=null) {
> //node exit
> System.out.println(path+" : checkExist");
> this.client.setData().forPath(path, data);
> } else {
> //node not exit, create new
> System.out.println(path+ " : node not exit");
> this.client.create().creatingParentsIfNeeded()
> .withMode(CreateMode.PERSISTENT).forPath(path, data);
> //
> this.client.create().withMode(CreateMode.PERSISTENT).forPath(path, data);
> System.out.println(path+ " : creatingParentsIfNeeded");
> }
> {code}
> ======================================================
> but sometimes (not every time) .it would throw NodeExistException:
> =======================================================
> {code}
> 015-03-31 15:29:49,452 INFO [main-EventThread] state.ConnectionStateManager
> (ConnectionStateManager.java:postState(228)) - State change: CONNECTED
> /workersMonitor : checkExist
> /workers : writeData
> /workers : checkExist
> /workers/10.24.76.52 : writeData
> /workers/10.24.76.52 : node not exit
> /workers/10.24.76.52 : creatingParentsIfNeeded
> /workers/10.24.76.52/state : writeData
> /workers/10.24.76.52/state : node not exit
> 2015-03-31 15:29:50,508 ERROR [main] zookeeper.ZookeeperService
> (ZookeeperService.java:startWorker(331)) - Worker Register Error Happen,
> Maker Sure Zookeeper Server Can Be Connected
> org.apache.zookeeper.KeeperException$NodeExistsException: KeeperErrorCode =
> NodeExists for /workers/10.24.76.52/state
> at org.apache.zookeeper.KeeperException.create(KeeperException.java:119)
> at org.apache.zookeeper.KeeperException.create(KeeperException.java:51)
> at org.apache.zookeeper.ZooKeeper.create(ZooKeeper.java:783)
> at
> org.apache.curator.framework.imps.CreateBuilderImpl$11.call(CreateBuilderImpl.java:688)
> at
> org.apache.curator.framework.imps.CreateBuilderImpl$11.call(CreateBuilderImpl.java:672)
> at org.apache.curator.RetryLoop.callWithRetry(RetryLoop.java:107)
> at
> org.apache.curator.framework.imps.CreateBuilderImpl.pathInForeground(CreateBuilderImpl.java:668)
> at
> org.apache.curator.framework.imps.CreateBuilderImpl.protectedPathInForeground(CreateBuilderImpl.java:453)
> at
> org.apache.curator.framework.imps.CreateBuilderImpl.forPath(CreateBuilderImpl.java:443)
> at
> org.apache.curator.framework.imps.CreateBuilderImpl.forPath(CreateBuilderImpl.java:44)
> at
> com.suning.cybertron.superion.worker.containermanager.zookeeper.ZookeeperClient.writeData(ZookeeperClient.java:125)
> at
> com.suning.cybertron.superion.worker.containermanager.zookeeper.ZookeeperClient.makeDir(ZookeeperClient.java:169)
> at
> com.suning.cybertron.superion.worker.containermanager.zookeeper.ZookeeperService.startWorker(ZookeeperService.java:315)
> at
> com.suning.cybertron.superion.worker.containermanager.zookeeper.ZookeeperService.serviceStart(ZookeeperService.java:86)
> at
> org.apache.hadoop.service.AbstractService.start(AbstractService.java:193)
> at
> org.apache.hadoop.service.CompositeService.serviceStart(CompositeService.java:121)
> at
> com.suning.cybertron.superion.worker.containermanager.ContainerManagerImpl.serviceStart(ContainerManagerImpl.java:230)
> at
> org.apache.hadoop.service.AbstractService.start(AbstractService.java:193)
> at
> org.apache.hadoop.service.CompositeService.serviceStart(CompositeService.java:121)
> at
> com.suning.cybertron.superion.worker.Worker.serviceStart(Worker.java:143)
> at
> org.apache.hadoop.service.AbstractService.start(AbstractService.java:193)
> at
> com.suning.cybertron.superion.worker.Worker.initAndStartNodeManager(Worker.java:182)
> at com.suning.cybertron.superion.worker.Worker.main(Worker.java:227)
> 2015-03-31 15:29:50,510 INFO [main] service.AbstractService
> (AbstractService.java:noteFailure(272)) - Service
> com.suning.cybertron.superion.worker.containermanager.zookeeper.ZookeeperService
> failed in state STARTED; cause:
> com.suning.cybertron.superion.exception.SuperionRuntimeException: Worker
> Register Error Happen, Maker Sure Zookeeper Server Can Be Connected
> com.suning.cybertron.superion.exception.SuperionRuntimeException: Worker
> Register Error Happen, Maker Sure Zookeeper Server Can Be Connected
> at
> com.suning.cybertron.superion.worker.containermanager.zookeeper.ZookeeperService.startWorker(ZookeeperService.java:332)
> at
> com.suning.cybertron.superion.worker.containermanager.zookeeper.ZookeeperService.serviceStart(ZookeeperService.java:86)
> at
> org.apache.hadoop.service.AbstractService.start(AbstractService.java:193)
> at
> org.apache.hadoop.service.CompositeService.serviceStart(CompositeService.java:121)
> at
> com.suning.cybertron.superion.worker.containermanager.ContainerManagerImpl.serviceStart(ContainerManagerImpl.java:230)
> at
> org.apache.hadoop.service.AbstractService.start(AbstractService.java:193)
> at
> org.apache.hadoop.service.CompositeService.serviceStart(CompositeService.java:121)
> at
> com.suning.cybertron.superion.worker.Worker.serviceStart(Worker.java:143)
> at
> org.apache.hadoop.service.AbstractService.start(AbstractService.java:193)
> at
> com.suning.cybertron.superion.worker.Worker.initAndStartNodeManager(Worker.java:182)
> at com.suning.cybertron.superion.worker.Worker.main(Worker.java:227)
> Caused by: org.apache.zookeeper.KeeperException$NodeExistsException:
> KeeperErrorCode = NodeExists for /workers/10.24.76.52/state
> at org.apache.zookeeper.KeeperException.create(KeeperException.java:119)
> at org.apache.zookeeper.KeeperException.create(KeeperException.java:51)
> at org.apache.zookeeper.ZooKeeper.create(ZooKeeper.java:783)
> at
> org.apache.curator.framework.imps.CreateBuilderImpl$11.call(CreateBuilderImpl.java:688)
> at
> org.apache.curator.framework.imps.CreateBuilderImpl$11.call(CreateBuilderImpl.java:672)
> at org.apache.curator.RetryLoop.callWithRetry(RetryLoop.java:107)
> at
> org.apache.curator.framework.imps.CreateBuilderImpl.pathInForeground(CreateBuilderImpl.java:668)
> at
> org.apache.curator.framework.imps.CreateBuilderImpl.protectedPathInForeground(CreateBuilderImpl.java:453)
> at
> org.apache.curator.framework.imps.CreateBuilderImpl.forPath(CreateBuilderImpl.java:443)
> at
> org.apache.curator.framework.imps.CreateBuilderImpl.forPath(CreateBuilderImpl.java:44)
> at
> com.suning.cybertron.superion.worker.containermanager.zookeeper.ZookeeperClient.writeData(ZookeeperClient.java:125)
> at
> com.suning.cybertron.superion.worker.containermanager.zookeeper.ZookeeperClient.makeDir(ZookeeperClient.java:169)
> at
> com.suning.cybertron.superion.worker.containermanager.zookeeper.ZookeeperService.startWorker(ZookeeperService.java:315)
> ... 10 more
> 2015-03-31 15:29:50,557 INFO [main] zookeeper.ZooKeeper
> (ZooKeeper.java:close(684)) - Session: 0x34a75c727c204a4 closed
> 2015-03-31 15:29:50,557 INFO [main-EventThread] zookeeper.ClientCnxn
> (ClientCnxn.java:run(512)) - EventThread shut down
> 2015-03-31 15:29:50,558 INFO [main] service.AbstractService
> (AbstractService.java:noteFailure(272)) - Service
> com.suning.cybertron.superion.worker.containermanager.ContainerManagerImpl
> failed in state STARTED; cause:
> com.suning.cybertron.superion.exception.SuperionRuntimeException: Worker
> Register Error Happen, Maker Sure Zookeeper Server Can Be Connected
> com.suning.cybertron.superion.exception.SuperionRuntimeException: Worker
> Register Error Happen, Maker Sure Zookeeper Server Can Be Connected
> at
> com.suning.cybertron.superion.worker.containermanager.zookeeper.ZookeeperService.startWorker(ZookeeperService.java:332)
> at
> com.suning.cybertron.superion.worker.containermanager.zookeeper.ZookeeperService.serviceStart(ZookeeperService.java:86)
> at
> org.apache.hadoop.service.AbstractService.start(AbstractService.java:193)
> at
> org.apache.hadoop.service.CompositeService.serviceStart(CompositeService.java:121)
> at
> com.suning.cybertron.superion.worker.containermanager.ContainerManagerImpl.serviceStart(ContainerManagerImpl.java:230)
> at
> org.apache.hadoop.service.AbstractService.start(AbstractService.java:193)
> at
> org.apache.hadoop.service.CompositeService.serviceStart(CompositeService.java:121)
> at
> com.suning.cybertron.superion.worker.Worker.serviceStart(Worker.java:143)
> at
> org.apache.hadoop.service.AbstractService.start(AbstractService.java:193)
> at
> com.suning.cybertron.superion.worker.Worker.initAndStartNodeManager(Worker.java:182)
> at com.suning.cybertron.superion.worker.Worker.main(Worker.java:227)
> Caused by: org.apache.zookeeper.KeeperException$NodeExistsException:
> KeeperErrorCode = NodeExists for /workers/10.24.76.52/state
> at org.apache.zookeeper.KeeperException.create(KeeperException.java:119)
> at org.apache.zookeeper.KeeperException.create(KeeperException.java:51)
> at org.apache.zookeeper.ZooKeeper.create(ZooKeeper.java:783)
> at
> org.apache.curator.framework.imps.CreateBuilderImpl$11.call(CreateBuilderImpl.java:688)
> at
> org.apache.curator.framework.imps.CreateBuilderImpl$11.call(CreateBuilderImpl.java:672)
> at org.apache.curator.RetryLoop.callWithRetry(RetryLoop.java:107)
> at
> org.apache.curator.framework.imps.CreateBuilderImpl.pathInForeground(CreateBuilderImpl.java:668)
> at
> org.apache.curator.framework.imps.CreateBuilderImpl.protectedPathInForeground(CreateBuilderImpl.java:453)
> at
> org.apache.curator.framework.imps.CreateBuilderImpl.forPath(CreateBuilderImpl.java:443)
> at
> org.apache.curator.framework.imps.CreateBuilderImpl.forPath(CreateBuilderImpl.java:44)
> at
> com.suning.cybertron.superion.worker.containermanager.zookeeper.ZookeeperClient.writeData(ZookeeperClient.java:125)
> at
> com.suning.cybertron.superion.worker.containermanager.zookeeper.ZookeeperClient.makeDir(ZookeeperClient.java:169)
> at
> com.suning.cybertron.superion.worker.containermanager.zookeeper.ZookeeperService.startWorker(ZookeeperService.java:315)
> ... 10 more
> 2015-03-31 15:29:50,561 INFO [main] monitor.ContainersMonitorImpl
> (ContainersMonitorImpl.java:isEnabled(168)) - Neither virutal-memory nor
> physical-memory monitoring is needed. Not running the monitor-thread
> 2015-03-31 15:29:50,562 INFO [main] service.AbstractService
> (AbstractService.java:noteFailure(272)) - Service NodeManager failed in state
> STARTED; cause:
> com.suning.cybertron.superion.exception.SuperionRuntimeException: Worker
> Register Error Happen, Maker Sure Zookeeper Server Can Be Connected
> com.suning.cybertron.superion.exception.SuperionRuntimeException: Worker
> Register Error Happen, Maker Sure Zookeeper Server Can Be Connected
> at
> com.suning.cybertron.superion.worker.containermanager.zookeeper.ZookeeperService.startWorker(ZookeeperService.java:332)
> at
> com.suning.cybertron.superion.worker.containermanager.zookeeper.ZookeeperService.serviceStart(ZookeeperService.java:86)
> at
> org.apache.hadoop.service.AbstractService.start(AbstractService.java:193)
> at
> org.apache.hadoop.service.CompositeService.serviceStart(CompositeService.java:121)
> at
> com.suning.cybertron.superion.worker.containermanager.ContainerManagerImpl.serviceStart(ContainerManagerImpl.java:230)
> at
> org.apache.hadoop.service.AbstractService.start(AbstractService.java:193)
> at
> org.apache.hadoop.service.CompositeService.serviceStart(CompositeService.java:121)
> at
> com.suning.cybertron.superion.worker.Worker.serviceStart(Worker.java:143)
> at
> org.apache.hadoop.service.AbstractService.start(AbstractService.java:193)
> at
> com.suning.cybertron.superion.worker.Worker.initAndStartNodeManager(Worker.java:182)
> at com.suning.cybertron.superion.worker.Worker.main(Worker.java:227)
> Caused by: org.apache.zookeeper.KeeperException$NodeExistsException:
> KeeperErrorCode = NodeExists for /workers/10.24.76.52/state
> at org.apache.zookeeper.KeeperException.create(KeeperException.java:119)
> at org.apache.zookeeper.KeeperException.create(KeeperException.java:51)
> at org.apache.zookeeper.ZooKeeper.create(ZooKeeper.java:783)
> at
> org.apache.curator.framework.imps.CreateBuilderImpl$11.call(CreateBuilderImpl.java:688)
> at
> org.apache.curator.framework.imps.CreateBuilderImpl$11.call(CreateBuilderImpl.java:672)
> at org.apache.curator.RetryLoop.callWithRetry(RetryLoop.java:107)
> at
> org.apache.curator.framework.imps.CreateBuilderImpl.pathInForeground(CreateBuilderImpl.java:668)
> at
> org.apache.curator.framework.imps.CreateBuilderImpl.protectedPathInForeground(CreateBuilderImpl.java:453)
> at
> org.apache.curator.framework.imps.CreateBuilderImpl.forPath(CreateBuilderImpl.java:443)
> at
> org.apache.curator.framework.imps.CreateBuilderImpl.forPath(CreateBuilderImpl.java:44)
> at
> com.suning.cybertron.superion.worker.containermanager.zookeeper.ZookeeperClient.writeData(ZookeeperClient.java:125)
> at
> com.suning.cybertron.superion.worker.containermanager.zookeeper.ZookeeperClient.makeDir(ZookeeperClient.java:169)
> at
> com.suning.cybertron.superion.worker.containermanager.zookeeper.ZookeeperService.startWorker(ZookeeperService.java:315)
> ... 10 more
> 2015-03-31 15:29:50,562 INFO [Public Localizer]
> localizer.ResourceLocalizationService
> (ResourceLocalizationService.java:run(642)) - Public cache exiting
> 2015-03-31 15:29:50,563 INFO [main] impl.MetricsSystemImpl
> (MetricsSystemImpl.java:stop(200)) - Stopping Worker metrics system...
> 2015-03-31 15:29:50,564 INFO [main] impl.MetricsSystemImpl
> (MetricsSystemImpl.java:stop(206)) - Worker metrics system stopped.
> 2015-03-31 15:29:50,564 INFO [main] impl.MetricsSystemImpl
> (MetricsSystemImpl.java:shutdown(572)) - Worker metrics system shutdown
> complete.
> 2015-03-31 15:29:50,564 FATAL [main] worker.Worker
> (Worker.java:initAndStartNodeManager(184)) - Error starting NodeManager
> com.suning.cybertron.superion.exception.SuperionRuntimeException: Worker
> Register Error Happen, Maker Sure Zookeeper Server Can Be Connected
> at
> com.suning.cybertron.superion.worker.containermanager.zookeeper.ZookeeperService.startWorker(ZookeeperService.java:332)
> at
> com.suning.cybertron.superion.worker.containermanager.zookeeper.ZookeeperService.serviceStart(ZookeeperService.java:86)
> at
> org.apache.hadoop.service.AbstractService.start(AbstractService.java:193)
> at
> org.apache.hadoop.service.CompositeService.serviceStart(CompositeService.java:121)
> at
> com.suning.cybertron.superion.worker.containermanager.ContainerManagerImpl.serviceStart(ContainerManagerImpl.java:230)
> at
> org.apache.hadoop.service.AbstractService.start(AbstractService.java:193)
> at
> org.apache.hadoop.service.CompositeService.serviceStart(CompositeService.java:121)
> at
> com.suning.cybertron.superion.worker.Worker.serviceStart(Worker.java:143)
> at
> org.apache.hadoop.service.AbstractService.start(AbstractService.java:193)
> at
> com.suning.cybertron.superion.worker.Worker.initAndStartNodeManager(Worker.java:182)
> at com.suning.cybertron.superion.worker.Worker.main(Worker.java:227)
> Caused by: org.apache.zookeeper.KeeperException$NodeExistsException:
> KeeperErrorCode = NodeExists for /workers/10.24.76.52/state
> at org.apache.zookeeper.KeeperException.create(KeeperException.java:119)
> at org.apache.zookeeper.KeeperException.create(KeeperException.java:51)
> at org.apache.zookeeper.ZooKeeper.create(ZooKeeper.java:783)
> at
> org.apache.curator.framework.imps.CreateBuilderImpl$11.call(CreateBuilderImpl.java:688)
> at
> org.apache.curator.framework.imps.CreateBuilderImpl$11.call(CreateBuilderImpl.java:672)
> at org.apache.curator.RetryLoop.callWithRetry(RetryLoop.java:107)
> at
> org.apache.curator.framework.imps.CreateBuilderImpl.pathInForeground(CreateBuilderImpl.java:668)
> at
> org.apache.curator.framework.imps.CreateBuilderImpl.protectedPathInForeground(CreateBuilderImpl.java:453)
> at
> org.apache.curator.framework.imps.CreateBuilderImpl.forPath(CreateBuilderImpl.java:443)
> at
> org.apache.curator.framework.imps.CreateBuilderImpl.forPath(CreateBuilderImpl.java:44)
> at
> com.suning.cybertron.superion.worker.containermanager.zookeeper.ZookeeperClient.writeData(ZookeeperClient.java:125)
> at
> com.suning.cybertron.superion.worker.containermanager.zookeeper.ZookeeperClient.makeDir(ZookeeperClient.java:169)
> at
> com.suning.cybertron.superion.worker.containermanager.zookeeper.ZookeeperService.startWorker(ZookeeperService.java:315)
> ... 10 more
> {code}
> ===================================================
> Scene Two: When starting job :
> ===================================================
> {code}
> private void startJob(ZookeeperEvent zookeeperEvent) {
>
> StartJobZookeeperEvent startJobZookeeperEvent =
> (StartJobZookeeperEvent) zookeeperEvent;
> String jobInstanceId = startJobZookeeperEvent
>
> .getStartContainerRequest().getContainerId().getApplicationId()
> .getJobInstanceZKId();
>
> String jobTmpEphemeral =
> SuperionConstant.ZOOKEEPER_JOB_TMP_PATH + "/" + jobInstanceId;
> String jobStatePersistent =
> SuperionConstant.ZOOKEEPER_JOB_STATE_PATH + "/" + jobInstanceId;
>
> String jobStateWorkerIP = jobStatePersistent + "/" +
> SuperionConstant.JobState.WorkerIP;
> String jobStateJobStatus = jobStatePersistent + "/" +
> SuperionConstant.JobState.JobStatus;
> String jobStateJobErrorMsg = jobStatePersistent + "/" +
> SuperionConstant.JobState.JobErrorMsg;
> String jobStateCreateTime = jobStatePersistent + "/" +
> SuperionConstant.JobState.CreateTime;
>
> try {
> /** Ephemeral Node: /job/tmp/jobInstanceId */
> this.client.createEphemeralNode(jobTmpEphemeral);
> if(this.client.checkExists(jobTmpEphemeral) == null)
> throw new Exception("ephemeral
> node["+jobTmpEphemeral+"] create fail");
>
> /** update job state----------------- */
> /** Persistent Node: /jobs/state/jobInstanceId */
> this.client.makeDir(jobStatePersistent);
> /** Persistent Node: /jobs/state/jobInstanceId/WorkerIP
> */
> this.client.writeString(jobStateWorkerIP,
> this.workerIP);
>
> /** Persistent Node:
> /jobs/state/jobInstanceId/CreateTime */
> this.client.writeInt64(jobStateCreateTime,
> System.currentTimeMillis());
> /* start container request */
> StartContainerResponse response =
> this.containerManager.startContainers(
>
> startJobZookeeperEvent.getStartContainerRequest());
>
> int jobStatusInt = SuperionConstant.JOB_STATUS_TAKED;
>
> //TODO whtest
>
> if(!response.isSuccess()) {
> // jobStatusInt =
> SuperionConstant.JOB_STATUS_PARAMETER_CHECK_ERROR;
>
> LOG.error(startJobZookeeperEvent.getStartContainerRequest().getContainerId().toString()
> + " start exception",
> response.getFailureReason());
> String jobErrorMsg =
> response.getFailureReason().getMessage();
> throw new
> Exception(jobErrorMsg,response.getFailureReason());
> /** Persistent Node:
> /jobs/state/jobInstanceId/JobErrorMsg */
> // this.client.writeString(jobStateJobErrorMsg,
> jobErrorMsg);
>
> }
>
> /** Persistent Node:
> /jobs/state/jobInstanceId/JobStatus */
> this.client.writeInt32(jobStateJobStatus, jobStatusInt);
> } catch (Exception e) {
> LOG.error("exception happened when start job" , e);
>
> if(e instanceof KeeperException.NodeExistsException){
> /*
> * node exit exception when
> /job/tmp/jobInstanceId create
> * if /job/tmp/jobInstanceId create then return
> * */
> KeeperException.NodeExistsException nodeExists
> = (KeeperException.NodeExistsException)e;
> String existsPath = nodeExists.getPath();
>
> if(existsPath != null &&
> existsPath.startsWith(SuperionConstant.ZOOKEEPER_JOB_TMP_PATH)) {
> return;
> }
> }
> try{
> String jobErrorMsg = e.getMessage();
> /** Persistent Node:
> /jobs/state/jobInstanceId/JobErrorMsg */
> this.client.writeString(jobStateJobErrorMsg,
> jobErrorMsg);
> /** Persistent Node:
> /jobs/state/jobInstanceId/JobStatus */
> this.client.writeInt32(jobStateJobStatus,
> SuperionConstant.JOB_STATUS_PARAMETER_CHECK_ERROR);
> } catch(Exception ignoreE) {
> LOG.warn("Ignore Exception", ignoreE);//ignore
> } finally {
> try {
>
> this.client.deleteEphemeralNode(jobTmpEphemeral);
> } catch(Exception exception) {
> LOG.warn("Ignore Exception",
> exception);//ignore
> }
> }
> }
> }
> {code}
> ====================================================
> When we saw logs.we find some jobs(not every one) throw the Exception
> ==================================================
> {code}
> ource_visiblity as resource9_2_ from job_depend_resource jobdependr0_ where
> jobdependr0_.job_id=?
> 2015-03-28 00:01:58,651 INFO [AsyncDispatcher event handler]
> containermanager.ContainerManagerImpl
> (ContainerManagerImpl.java:startContainerInternal(319)) - Start request for
> container_20150327000156_5755_0299_0144_ by user bicbt
> 2015-03-28 00:01:58,652 INFO [AsyncDispatcher event handler]
> containermanager.ContainerManagerImpl
> (ContainerManagerImpl.java:startContainerInternal(343)) - Creating a new
> application reference for app application_20150327000156_5755
> 2015-03-28 00:01:58,652 INFO [AsyncDispatcher event handler]
> worker.WorkerAuditLogger (WorkerAuditLogger.java:logSuccess(98)) - USER=bicbt
> OPERATION=Start Container Request TARGET=ContainerManageImpl
> RESULT=SUCCESS APPID=application_20150327000156_5755
> CONTAINERID=container_20150327000156_5755_0299_0144_
> 2015-03-28 00:01:58,675 ERROR [AsyncDispatcher event handler]
> zookeeper.ZookeeperService (ZookeeperService.java:startJob(178)) - exception
> happened when start job
> org.apache.zookeeper.KeeperException$NodeExistsException: KeeperErrorCode =
> NodeExists for /jobs/state/1_299_20150328000156_144_0/JobStatus
> at
> org.apache.zookeeper.KeeperException.create(KeeperException.java:119)
> at
> org.apache.zookeeper.KeeperException.create(KeeperException.java:51)
> at org.apache.zookeeper.ZooKeeper.create(ZooKeeper.java:783)
> at
> org.apache.curator.framework.imps.CreateBuilderImpl$11.call(CreateBuilderImpl.java:688)
> at
> org.apache.curator.framework.imps.CreateBuilderImpl$11.call(CreateBuilderImpl.java:672)
> at org.apache.curator.RetryLoop.callWithRetry(RetryLoop.java:107)
> at
> org.apache.curator.framework.imps.CreateBuilderImpl.pathInForeground(CreateBuilderImpl.java:668)
> at
> org.apache.curator.framework.imps.CreateBuilderImpl.protectedPathInForeground(CreateBuilderImpl.java:453)
> at
> org.apache.curator.framework.imps.CreateBuilderImpl.forPath(CreateBuilderImpl.java:443)
> at
> org.apache.curator.framework.imps.CreateBuilderImpl.forPath(CreateBuilderImpl.java:44)
> at
> com.suning.cybertron.superion.worker.containermanager.zookeeper.ZookeeperClient.writeData(ZookeeperClient.java:119)
> at
> com.suning.cybertron.superion.worker.containermanager.zookeeper.ZookeeperClient.writeInt32(ZookeeperClient.java:126)
> at
> com.suning.cybertron.superion.worker.containermanager.zookeeper.ZookeeperService.startJob(ZookeeperService.java:176)
> at
> com.suning.cybertron.superion.worker.containermanager.zookeeper.ZookeeperService.handle(ZookeeperService.java:104)
> at
> com.suning.cybertron.superion.worker.containermanager.zookeeper.ZookeeperService.handle(ZookeeperService.java:30)
> at
> com.suning.cybertron.superion.event.AsyncDispatcher.dispatch(AsyncDispatcher.java:138)
> at
> com.suning.cybertron.superion.event.AsyncDispatcher$1.run(AsyncDispatcher.java:85)
> at java.lang.Thread.run(Thread.java:745)
> {code}
--
This message was sent by Atlassian Jira
(v8.20.10#820010)