[ https://issues.apache.org/jira/browse/AMBARI-25613?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=17275849#comment-17275849 ]
Suraj Naik commented on AMBARI-25613: ------------------------------------- [~aonishuk], [~ihorlukianov] please review my PR > Concurrent Host Modification exception while sending INSTALL/START Host > request > ------------------------------------------------------------------------------- > > Key: AMBARI-25613 > URL: https://issues.apache.org/jira/browse/AMBARI-25613 > Project: Ambari > Issue Type: Bug > Components: ambari-server > Affects Versions: 2.7.6 > Reporter: Suraj Naik > Priority: Major > Time Spent: 20m > Remaining Estimate: 0h > > java.lang.RuntimeException: START Host request submission failed: > java.lang.RuntimeException: Update Host request submission failed: > java.util.ConcurrentModificationException > at > org.apache.ambari.server.topology.AmbariContext.startHost(AmbariContext.java:497) > at > org.apache.ambari.server.topology.ClusterTopologyImpl.startHost(ClusterTopologyImpl.java:268) > at > org.apache.ambari.server.topology.tasks.StartHostTask.runTask(StartHostTask.java:51) > at > org.apache.ambari.server.topology.tasks.TopologyHostTask.run(TopologyHostTask.java:55) > at > org.apache.ambari.server.topology.HostOfferResponse$1.run(HostOfferResponse.java:85) > at > java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149) > at > java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624) > at java.lang.Thread.run(Thread.java:748) > Caused by: java.lang.RuntimeException: Update Host request submission failed: > java.util.ConcurrentModificationException > at > org.apache.ambari.server.controller.internal.HostComponentResourceProvider$4.invoke(HostComponentResourceProvider.java:865) > at > org.apache.ambari.server.controller.internal.HostComponentResourceProvider$4.invoke(HostComponentResourceProvider.java:852) > at > org.apache.ambari.server.controller.internal.AbstractResourceProvider.invokeWithRetry(AbstractResourceProvider.java:465) > at > org.apache.ambari.server.controller.internal.AbstractResourceProvider.modifyResources(AbstractResourceProvider.java:346) > at > org.apache.ambari.server.controller.internal.HostComponentResourceProvider.doUpdateResources(HostComponentResourceProvider.java:852) > at > org.apache.ambari.server.controller.internal.HostComponentResourceProvider.start(HostComponentResourceProvider.java:492) > at > org.apache.ambari.server.topology.AmbariContext.startHost(AmbariContext.java:494) > at > org.apache.ambari.server.topology.ClusterTopologyImpl.startHost(ClusterTopologyImpl.java:268) > at > org.apache.ambari.server.topology.tasks.StartHostTask.runTask(StartHostTask.java:51) > at > org.apache.ambari.server.topology.tasks.TopologyHostTask.run(TopologyHostTask.java:55) > at > org.apache.ambari.server.topology.HostOfferResponse$1.run(HostOfferResponse.java:85) > at > java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149) > at > java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624) > at java.lang.Thread.run(Thread.java:748) > Caused by: java.util.ConcurrentModificationException: NA > at java.util.HashMap$HashIterator.nextNode(HashMap.java:1445) > at java.util.HashMap$EntryIterator.next(HashMap.java:1479) > at java.util.HashMap$EntryIterator.next(HashMap.java:1477) > at java.util.HashMap.putMapEntries(HashMap.java:512) > at java.util.HashMap.<init>(HashMap.java:490) > at > org.apache.ambari.server.topology.HostRequest.getPhysicalTaskMapping(HostRequest.java:458) > at > org.apache.ambari.server.topology.LogicalRequest.getStageSummaries(LogicalRequest.java:286) > at > org.apache.ambari.server.topology.TopologyManager.getPendingHostComponents(TopologyManager.java:823) > at > org.apache.ambari.server.utils.StageUtils.getClusterHostInfo(StageUtils.java:306) > at > org.apache.ambari.server.controller.AmbariManagementControllerImpl.doStageCreation(AmbariManagementControllerImpl.java:2788) > at > org.apache.ambari.server.controller.AmbariManagementControllerImpl.addStages(AmbariManagementControllerImpl.java:3513) > at > org.apache.ambari.server.controller.internal.HostComponentResourceProvider.updateHostComponents(HostComponentResourceProvider.java:707) > at > org.apache.ambari.server.controller.internal.HostComponentResourceProvider$4.invoke(HostComponentResourceProvider.java:857) > at > org.apache.ambari.server.controller.internal.HostComponentResourceProvider$4.invoke(HostComponentResourceProvider.java:852) > at > org.apache.ambari.server.controller.internal.AbstractResourceProvider.invokeWithRetry(AbstractResourceProvider.java:465) > at > org.apache.ambari.server.controller.internal.AbstractResourceProvider.modifyResources(AbstractResourceProvider.java:346) > at > org.apache.ambari.server.controller.internal.HostComponentResourceProvider.doUpdateResources(HostComponentResourceProvider.java:852) > at > org.apache.ambari.server.controller.internal.HostComponentResourceProvider.start(HostComponentResourceProvider.java:492) > at > org.apache.ambari.server.topology.AmbariContext.startHost(AmbariContext.java:494) > at > org.apache.ambari.server.topology.ClusterTopologyImpl.startHost(ClusterTopologyImpl.java:268) > at > org.apache.ambari.server.topology.tasks.StartHostTask.runTask(StartHostTask.java:51) > at > org.apache.ambari.server.topology.tasks.TopologyHostTask.run(TopologyHostTask.java:55) > at > org.apache.ambari.server.topology.HostOfferResponse$1.run(HostOfferResponse.java:85) > at > java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149) > at > java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624) > at java.lang.Thread.run(Thread.java:748) > > > My teammate [~ramkrishna] did some analysis on this one by adding logs and > latches and found that the installation and registration though done > parallely each thread tries to get the entire cluster’s view of the current > physical tasks. So it is bound to happen that when a registration is > happening the other thread can do a getPhysicalTaskMapping(). (leading to > CME) > > > > -- This message was sent by Atlassian Jira (v8.3.4#803005)