We are using helix-0.7.1 All these logs are from participants, not from controller.
-Shawn On Wed, Apr 5, 2017 at 7:09 PM, kishore g <g.kish...@gmail.com> wrote: > Hi Shawn, > > Are the logs on the participant or controller? what is the helix version? > > > > On Wed, Apr 5, 2017 at 6:36 PM, Neutron Sharc <neutronsh...@gmail.com> > wrote: > >> Hi all, >> >> We are testing a failure recovery scenario where I have many resources >> spanning many participants. I shutdown all participants and helix >> admins, wait a while, then add each participant back into cluster. >> (zookeeper is on a separate cluster, not affected by shtudown) During >> the recovery, it seems controller generates too many messages, and >> there so many exceptions. Below are some examples. >> >> Are these exceptions expected? Any comments are highly appreciated. >> Thanks. >> >> >> [ERROR 2017-04-05 14:26:17,734 >> org.apache.helix.manager.zk.ZkBaseDataAccessor:303] Exception while >> updating path: /yy_cluster_name/INSTANCES/P60505029461/ERRORS/ >> 1002d87a25a0589/USER_DEFINE_MSG/15ae0bd8-10 >> 1f-4af3-acc3-36a486af4f4c >> org.I0Itec.zkclient.exception.ZkInterruptedException: >> java.lang.InterruptedException >> at org.I0Itec.zkclient.ZkClient.retryUntilConnected(ZkClient. >> java:687) >> at org.apache.helix.manager.zk.ZkClient.readData(ZkClient.java:240) >> at org.I0Itec.zkclient.ZkClient.readData(ZkClient.java:761) >> at org.apache.helix.manager.zk.ZkBaseDataAccessor.doUpdate( >> ZkBaseDataAccessor.java:273) >> at org.apache.helix.manager.zk.ZkBaseDataAccessor.update( >> ZkBaseDataAccessor.java:245) >> at org.apache.helix.manager.zk.ZKHelixDataAccessor.updateProperty( >> ZKHelixDataAccessor.java:150) >> at org.apache.helix.util.StatusUpdateUtil.publishErrorRecord( >> StatusUpdateUtil.java:501) >> at org.apache.helix.util.StatusUpdateUtil. >> publishStatusUpdateRecord(StatusUpdateUtil.java:435) >> at org.apache.helix.util.StatusUpdateUtil. >> logMessageStatusUpdateRecord(StatusUpdateUtil.java:334) >> at org.apache.helix.util.StatusUpdateUtil.logError( >> StatusUpdateUtil.java:342) >> at org.apache.helix.messaging.handling.HelixTask.call( >> HelixTask.java:163) >> at org.apache.helix.messaging.handling.HelixTask.call( >> HelixTask.java:42) >> at java.util.concurrent.FutureTask.run(FutureTask.java:266) >> at java.util.concurrent.ThreadPoolExecutor.runWorker( >> ThreadPoolExecutor.java:1142) >> at java.util.concurrent.ThreadPoolExecutor$Worker.run( >> ThreadPoolExecutor.java:617) >> at java.lang.Thread.run(Thread.java:745) >> Caused by: java.lang.InterruptedException >> at java.lang.Object.wait(Native Method) >> at java.lang.Object.wait(Object.java:502) >> at org.apache.zookeeper.ClientCnxn.submitRequest( >> ClientCnxn.java:1344) >> at org.apache.zookeeper.ZooKeeper.getData(ZooKeeper.java:925) >> at org.apache.zookeeper.ZooKeeper.getData(ZooKeeper.java:956) >> at org.I0Itec.zkclient.ZkConnection.readData(ZkConnection.java:103) >> at org.apache.helix.manager.zk.ZkClient$4.call(ZkClient.java:244) >> at org.apache.helix.manager.zk.ZkClient$4.call(ZkClient.java:240) >> at org.I0Itec.zkclient.ZkClient.retryUntilConnected(ZkClient. >> java:675) >> ... 15 more >> >> >> [ERROR 2017-04-05 14:26:17,676 >> org.apache.helix.messaging.handling.HelixTask:162] Exception after >> executing a message, msgId: >> 35e73c64-8fd3-4fb8-b0b8-419eacfa91a0org.I0Itec.zkclient.exception. >> ZkInterrupte >> dException: java.lang.InterruptedException >> org.I0Itec.zkclient.exception.ZkInterruptedException: >> java.lang.InterruptedException >> at org.I0Itec.zkclient.ZkClient.retryUntilConnected(ZkClient. >> java:687) >> at org.apache.helix.manager.zk.ZkClient.getChildren(ZkClient. >> java:212) >> at org.I0Itec.zkclient.ZkClient.deleteRecursive(ZkClient.java:505) >> at org.apache.helix.manager.zk.ZkBaseDataAccessor.remove( >> ZkBaseDataAccessor.java:537) >> at org.apache.helix.manager.zk.ZKHelixDataAccessor.removeProperty( >> ZKHelixDataAccessor.java:271) >> at org.apache.helix.messaging.handling.HelixTask. >> removeMessageFromZk(HelixTask.java:187) >> at org.apache.helix.messaging.handling.HelixTask.call( >> HelixTask.java:150) >> at org.apache.helix.messaging.handling.HelixTask.call( >> HelixTask.java:42) >> at java.util.concurrent.FutureTask.run(FutureTask.java:266) >> at java.util.concurrent.ThreadPoolExecutor.runWorker( >> ThreadPoolExecutor.java:1142) >> at java.util.concurrent.ThreadPoolExecutor$Worker.run( >> ThreadPoolExecutor.java:617) >> at java.lang.Thread.run(Thread.java:745) >> Caused by: java.lang.InterruptedException >> at java.lang.Object.wait(Native Method) >> at java.lang.Object.wait(Object.java:502) >> at org.apache.zookeeper.ClientCnxn.submitRequest( >> ClientCnxn.java:1344) >> at org.apache.zookeeper.ZooKeeper.getChildren(ZooKeeper.java:1247) >> at org.apache.zookeeper.ZooKeeper.getChildren(ZooKeeper.java:1277) >> at org.I0Itec.zkclient.ZkConnection.getChildren( >> ZkConnection.java:99) >> at org.apache.helix.manager.zk.ZkClient$3.call(ZkClient.java:215) >> at org.apache.helix.manager.zk.ZkClient$3.call(ZkClient.java:212) >> at org.I0Itec.zkclient.ZkClient.retryUntilConnected(ZkClient. >> java:675) >> ... 11 more >> >> [ERROR 2017-04-05 14:26:18,321 >> org.apache.helix.messaging.handling.HelixTask:102] Exception while >> executing a message. java.lang.NullPointerException msgId: >> eb79d5b0-4210-479e-b010-076764199059 type: USER >> _DEFINE_MSG >> java.lang.NullPointerException >> at com.hcd.hcdadmin.CustomMessageHandlerFactory$ >> CustomMessageHandler.handleMessage(CustomMessageHandlerFactory.java:149) >> at org.apache.helix.messaging.handling.HelixTask.call( >> HelixTask.java:85) >> at org.apache.helix.messaging.handling.HelixTask.call( >> HelixTask.java:42) >> at java.util.concurrent.FutureTask.run(FutureTask.java:266) >> at java.util.concurrent.ThreadPoolExecutor.runWorker( >> ThreadPoolExecutor.java:1142) >> at java.util.concurrent.ThreadPoolExecutor$Worker.run( >> ThreadPoolExecutor.java:617) >> at java.lang.Thread.run(Thread.java:745) >> >> [3:06] >> [ERROR 2017-04-05 14:26:15,958 >> org.apache.helix.messaging.handling.HelixTask:102] Exception while >> executing a message. java.lang.NullPointerException msgId: >> 5b05cd82-c596-4329-8484-28ac3ef40e80 type: USER >> _DEFINE_MSG >> java.lang.NullPointerException >> at com.hcd.hcdadmin.CustomMessageHandlerFactory$ >> CustomMessageHandler.handleMessage(CustomMessageHandlerFactory.java:149) >> at org.apache.helix.messaging.handling.HelixTask.call( >> HelixTask.java:85) >> at org.apache.helix.messaging.handling.HelixTask.call( >> HelixTask.java:42) >> at java.util.concurrent.FutureTask.run(FutureTask.java:266) >> at java.util.concurrent.ThreadPoolExecutor.runWorker( >> ThreadPoolExecutor.java:1142) >> at java.util.concurrent.ThreadPoolExecutor$Worker.run( >> ThreadPoolExecutor.java:617) >> at java.lang.Thread.run(Thread.java:745) >>