[ https://issues.apache.org/jira/browse/YARN-9155?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ]
Zac Zhou updated YARN-9155: --------------------------- Summary: Can't re-run a submarine job, if the previous job with the same service name has finished (was: Can't re-run a submarine job, if a previous job with the same service name has finished) > Can't re-run a submarine job, if the previous job with the same service name > has finished > ----------------------------------------------------------------------------------------- > > Key: YARN-9155 > URL: https://issues.apache.org/jira/browse/YARN-9155 > Project: Hadoop YARN > Issue Type: Sub-task > Reporter: Zac Zhou > Assignee: Zac Zhou > Priority: Major > > Yarn native service doesn't clean up its HDFS service path when it is > finished. > So if we don't execute "yarn app -destroy " command before the next run of a > submarine job. we would get the following exception: > 2018-12-24 11:38:02,493 ERROR > org.apache.hadoop.yarn.service.utils.CoreFileSystem: Dir > /user/hadoop/****/services/distributed-tf-gpu-ml4/${service_name}.json > exists: hdfs://mldev/user/hadoop/****** > /services/distributed-tf-gpu-ml4/${service_name}.json 8472 > 2018-12-24 11:38:02,494 ERROR > org.apache.hadoop.yarn.service.webapp.ApiServer: Failed to create service > ${service_name}: {} > java.lang.reflect.UndeclaredThrowableException > at > org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1748) > at > org.apache.hadoop.yarn.service.webapp.ApiServer.createService(ApiServer.java:131) > at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method) > at > sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62) > at > sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43) > at java.lang.reflect.Method.invoke(Method.java:498) > at > com.sun.jersey.spi.container.JavaMethodInvokerFactory$1.invoke(JavaMethodInvokerFactory.java:60) > at > com.sun.jersey.server.impl.model.method.dispatch.AbstractResourceMethodDispatchProvider$ResponseOu > tInvoker._dispatch(AbstractResourceMethodDispatchProvider.java:205) > at > com.sun.jersey.server.impl.model.method.dispatch.ResourceJavaMethodDispatcher.dispatch(ResourceJav > aMethodDispatcher.java:75) > at > com.sun.jersey.server.impl.uri.rules.HttpMethodRule.accept(HttpMethodRule.java:302) > at > com.sun.jersey.server.impl.uri.rules.RightHandPathRule.accept(RightHandPathRule.java:147) > at > com.sun.jersey.server.impl.uri.rules.ResourceClassRule.accept(ResourceClassRule.java:108) > at > com.sun.jersey.server.impl.uri.rules.RightHandPathRule.accept(RightHandPathRule.java:147) > at > com.sun.jersey.server.impl.uri.rules.RootResourceClassesRule.accept(RootResourceClassesRule.java:8 > 4) > at > com.sun.jersey.server.impl.application.WebApplicationImpl._handleRequest(WebApplicationImpl.java:1 > 542) > at > com.sun.jersey.server.impl.application.WebApplicationImpl._handleRequest(WebApplicationImpl.java:1 > 473) > at > com.sun.jersey.server.impl.application.WebApplicationImpl.handleRequest(WebApplicationImpl.java:14 > 19) > at > com.sun.jersey.server.impl.application.WebApplicationImpl.handleRequest(WebApplicationImpl.java:14 > 09) > at > com.sun.jersey.spi.container.servlet.WebComponent.service(WebComponent.java:409) > at > com.sun.jersey.spi.container.servlet.ServletContainer.service(ServletContainer.java:558) > at > com.sun.jersey.spi.container.servlet.ServletContainer.service(ServletContainer.java:733) > at javax.servlet.http.HttpServlet.service(HttpServlet.java:790) > at org.eclipse.jetty.servlet.ServletHolder.handle(ServletHolder.java:848) > at > org.eclipse.jetty.servlet.ServletHandler$CachedChain.doFilter(ServletHandler.java:1772) > at > com.google.inject.servlet.FilterChainInvocation.doFilter(FilterChainInvocation.java:89) > at > com.sun.jersey.spi.container.servlet.ServletContainer.doFilter(ServletContainer.java:941) > at > com.sun.jersey.spi.container.servlet.ServletContainer.doFilter(ServletContainer.java:875) > at > org.apache.hadoop.yarn.server.resourcemanager.webapp.RMWebAppFilter.doFilter(RMWebAppFilter.java:1 > 79) > at > com.sun.jersey.spi.container.servlet.ServletContainer.doFilter(ServletContainer.java:829) > at > com.google.inject.servlet.FilterChainInvocation.doFilter(FilterChainInvocation.java:82) > at > com.google.inject.servlet.ManagedFilterPipeline.dispatch(ManagedFilterPipeline.java:119) > at com.google.inject.servlet.GuiceFilter$1.call(GuiceFilter.java:133) > at com.google.inject.servlet.GuiceFilter$1.call(GuiceFilter.java:130) > at com.google.inject.servlet.GuiceFilter$Context.call(GuiceFilter.java:203) > at com.google.inject.servlet.GuiceFilter.doFilter(GuiceFilter.java:130) > at > org.eclipse.jetty.servlet.ServletHandler$CachedChain.doFilter(ServletHandler.java:1759) > at > org.apache.hadoop.security.http.XFrameOptionsFilter.doFilter(XFrameOptionsFilter.java:57) > at > org.eclipse.jetty.servlet.ServletHandler$CachedChain.doFilter(ServletHandler.java:1759) > at > org.apache.hadoop.security.authentication.server.AuthenticationFilter.doFilter(AuthenticationFilte > r.java:644) > at > org.apache.hadoop.security.authentication.server.AuthenticationFilter.doFilter(AuthenticationFilte > r.java:592) > at > org.eclipse.jetty.servlet.ServletHandler$CachedChain.doFilter(ServletHandler.java:1759) > at > org.apache.hadoop.http.HttpServer2$QuotingInputFilter.doFilter(HttpServer2.java:1610) > at > org.eclipse.jetty.servlet.ServletHandler$CachedChain.doFilter(ServletHandler.java:1759) > at org.apache.hadoop.http.NoCacheFilter.doFilter(NoCacheFilter.java:45) > at > org.eclipse.jetty.servlet.ServletHandler$CachedChain.doFilter(ServletHandler.java:1759) > at org.eclipse.jetty.servlet.ServletHandler.doHandle(ServletHandler.java:582) > at > org.eclipse.jetty.server.handler.ScopedHandler.handle(ScopedHandler.java:143) > at > org.eclipse.jetty.security.SecurityHandler.handle(SecurityHandler.java:548) > at > org.eclipse.jetty.server.session.SessionHandler.doHandle(SessionHandler.java:226) > at > org.eclipse.jetty.server.handler.ContextHandler.doHandle(ContextHandler.java:1180) > at org.eclipse.jetty.servlet.ServletHandler.doScope(ServletHandler.java:512) > at > org.eclipse.jetty.server.session.SessionHandler.doScope(SessionHandler.java:185) > at > org.eclipse.jetty.server.handler.ContextHandler.doScope(ContextHandler.java:1112) > at > org.eclipse.jetty.server.handler.ScopedHandler.handle(ScopedHandler.java:141) > at > org.eclipse.jetty.server.handler.HandlerCollection.handle(HandlerCollection.java:119) > at > org.eclipse.jetty.server.handler.HandlerWrapper.handle(HandlerWrapper.java:134) > at org.eclipse.jetty.server.Server.handle(Server.java:539) > at org.eclipse.jetty.server.HttpChannel.handle(HttpChannel.java:333) > at > org.eclipse.jetty.server.HttpConnection.onFillable(HttpConnection.java:251) > at > org.eclipse.jetty.io.AbstractConnection$ReadCallback.succeeded(AbstractConnection.java:283) > at org.eclipse.jetty.io.FillInterest.fillable(FillInterest.java:108) > at > org.eclipse.jetty.io.SelectChannelEndPoint$2.run(SelectChannelEndPoint.java:93) > at > org.eclipse.jetty.util.thread.strategy.ExecuteProduceConsume.executeProduceConsume(ExecuteProduceC > onsume.java:303) > at > org.eclipse.jetty.util.thread.strategy.ExecuteProduceConsume.produceConsume(ExecuteProduceConsume. > java:148) > at > org.eclipse.jetty.util.thread.strategy.ExecuteProduceConsume.run(ExecuteProduceConsume.java:136) > at > org.eclipse.jetty.util.thread.QueuedThreadPool.runJob(QueuedThreadPool.java:671) > at > org.eclipse.jetty.util.thread.QueuedThreadPool$2.run(QueuedThreadPool.java:589) > at java.lang.Thread.run(Thread.java:745) > Caused by: org.apache.hadoop.yarn.service.exceptions.SliderException: Service > Instance dir already exists: /u > ser/hadoop/********/services/distributed-tf-gpu-ml4/${service_name}.json > at > org.apache.hadoop.yarn.service.utils.CoreFileSystem.verifyDirectoryNonexistent(CoreFileSystem.java > :260) > at > org.apache.hadoop.yarn.service.client.ServiceClient.checkAppNotExistOnHdfs(ServiceClient.java:1181 > ) > at > org.apache.hadoop.yarn.service.client.ServiceClient.actionCreate(ServiceClient.java:484) > at org.apache.hadoop.yarn.service.webapp.ApiServer$2.run(ApiServer.java:137) > at org.apache.hadoop.yarn.service.webapp.ApiServer$2.run(ApiServer.java:131) > at java.security.AccessController.doPrivileged(Native Method) > at javax.security.auth.Subject.doAs(Subject.java:422) > at > org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1730) > ... 67 more -- This message was sent by Atlassian JIRA (v7.6.3#76005) --------------------------------------------------------------------- To unsubscribe, e-mail: yarn-issues-unsubscr...@hadoop.apache.org For additional commands, e-mail: yarn-issues-h...@hadoop.apache.org