Hi all – we have a simple Jenkins install (single node, HDFS, YARN) and we’re
attempting to automatically install DT, and launch an app. We’re running into
several issues however, including errors and the Gateway seeming to shut down
on its own. Error stacks from the gateway are below, would really appreciate
some help debugging this.
We are launching with:
''' ${WORKSPACE}/datatorrent/current/bin/dtgateway stop || true
export DTBIN="datatorrent-dist-3.3.0-RC5.bin"
wget -N
http://mdcilabvlt01.kdc.capitalone.com/cerberus/${DTBIN}<http://mdcilabvlt01.kdc.capitalone.com/cerberus/$%7BDTBIN%7D>
chmod +x ${DTBIN}
./${DTBIN} -B ${WORKSPACE}/datatorrent -s ${WORKSPACE}/conf/dt-site.xml
${WORKSPACE}/datatorrent/current/bin/dtgateway restart
yarn application -list 2>/dev/null| tail -n-1 | cut -d" " -f1 | awk '{print
"yarn application -kill "$1}' | sh -x || true
./datatorrent/current/bin/dtcli -e "launch
target/DTApp-citadel-1.0-SNAPSHOT.jar -conf
src/main/resources/META-INF/properties_jenkins.xml" > appid
export DT_APPID=`cat appid | cut -d "\"" -f4`
datatorrent/current/bin/dtcli -e "connect $DT_APPID" ‘''
And dt-site.xml as:
<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<configuration>
<!-- WARNING: Do not edit this file. Your changes will be overwritten. -->
<!-- Written by dtgateway on 2016-05-04T16:46:00-->
<!-- Setting dt.version = 3.3.1-dt20160309 -->
<property>
<name>dt.attr.GATEWAY_CONNECT_ADDRESS</name>
<value>10.24.28.21:9090</value>
<description>Automatically set by DTGateway</description>
</property>
<property>
<name>dt.dfsRootDirectory</name>
<value>/user/jenkins/datatorrent</value>
<description>Automatically set by DTGateway</description>
</property>
<property>
<name>dt.configStatus</name>
<value>complete</value>
</property>
<property>
<name>dt.version</name>
<value>3.3.1-dt20160309</value>
</property>
<!-- WARNING: Do not edit this file. Your changes will be overwritten. -->
</configuration>
2016-05-04 17:57:06,091 ERROR com.datatorrent.gateway.b: DFS Directory cannot
be written to with exception:
org.apache.hadoop.ipc.RemoteException(java.io.IOException): Requested
replication factor of 3 exceeds maximum of 1 for
/user/jenkins/datatorrent/dtcheck_1462399026089.tmp from 10.24.28.21
at
org.apache.hadoop.hdfs.server.blockmanagement.BlockManager.verifyReplication(BlockManager.java:982)
at
org.apache.hadoop.hdfs.server.namenode.FSNamesystem.startFileInt(FSNamesystem.java:2600)
at
org.apache.hadoop.hdfs.server.namenode.FSNamesystem.startFile(FSNamesystem.java:2559)
at
org.apache.hadoop.hdfs.server.namenode.NameNodeRpcServer.create(NameNodeRpcServer.java:592)
at
org.apache.hadoop.hdfs.server.namenode.AuthorizationProviderProxyClientProtocol.create(AuthorizationProviderProxyClientProtocol.java:110)
at
org.apache.hadoop.hdfs.protocolPB.ClientNamenodeProtocolServerSideTranslatorPB.create(ClientNamenodeProtocolServerSideTranslatorPB.java:395)
at
org.apache.hadoop.hdfs.protocol.proto.ClientNamenodeProtocolProtos$ClientNamenodeProtocol$2.callBlockingMethod(ClientNamenodeProtocolProtos.java)
at
org.apache.hadoop.ipc.ProtobufRpcEngine$Server$ProtoBufRpcInvoker.call(ProtobufRpcEngine.java:617)
at org.apache.hadoop.ipc.RPC$Server.call(RPC.java:1060)
at org.apache.hadoop.ipc.Server$Handler$1.run(Server.java:2086)
at org.apache.hadoop.ipc.Server$Handler$1.run(Server.java:2082)
at java.security.AccessController.doPrivileged(Native Method)
at javax.security.auth.Subject.doAs(Subject.java:415)
at
org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1671)
at org.apache.hadoop.ipc.Server$Handler.run(Server.java:2080)
at org.apache.hadoop.ipc.Client.call(Client.java:1472)
at org.apache.hadoop.ipc.Client.call(Client.java:1403)
at
org.apache.hadoop.ipc.ProtobufRpcEngine$Invoker.invoke(ProtobufRpcEngine.java:230)
at com.sun.proxy.$Proxy14.create(Unknown Source)
at
org.apache.hadoop.hdfs.protocolPB.ClientNamenodeProtocolTranslatorPB.create(ClientNamenodeProtocolTranslatorPB.java:295)
at sun.reflect.GeneratedMethodAccessor32.invoke(Unknown Source)
at
sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
at java.lang.reflect.Method.invoke(Method.java:606)
at
org.apache.hadoop.io.retry.RetryInvocationHandler.invokeMethod(RetryInvocationHandler.java:252)
at
org.apache.hadoop.io.retry.RetryInvocationHandler.invoke(RetryInvocationHandler.java:104)
at com.sun.proxy.$Proxy15.create(Unknown Source)
at
org.apache.hadoop.hdfs.DFSOutputStream.newStreamForCreate(DFSOutputStream.java:1867)
at org.apache.hadoop.hdfs.DFSClient.create(DFSClient.java:1737)
at org.apache.hadoop.hdfs.DFSClient.create(DFSClient.java:1662)
at
org.apache.hadoop.hdfs.DistributedFileSystem$6.doCall(DistributedFileSystem.java:404)
at
org.apache.hadoop.hdfs.DistributedFileSystem$6.doCall(DistributedFileSystem.java:400)
at
org.apache.hadoop.fs.FileSystemLinkResolver.resolve(FileSystemLinkResolver.java:81)
at
org.apache.hadoop.hdfs.DistributedFileSystem.create(DistributedFileSystem.java:400)
at
org.apache.hadoop.hdfs.DistributedFileSystem.create(DistributedFileSystem.java:343)
at org.apache.hadoop.fs.FileSystem.create(FileSystem.java:917)
at org.apache.hadoop.fs.FileSystem.create(FileSystem.java:898)
at org.apache.hadoop.fs.FileSystem.create(FileSystem.java:795)
at org.apache.hadoop.fs.FileSystem.create(FileSystem.java:784)
at com.datatorrent.stram.client.FSAgent.createFile(FSAgent.java:70)
at com.datatorrent.gateway.b.I(lc:138)
at com.datatorrent.gateway.f.F(vc:215)
at com.datatorrent.gateway.f.I(vc:169)
at com.datatorrent.gateway.A.run(zc:86)
at java.lang.Thread.run(Thread.java:745)
2016-05-04 17:57:07,555 WARN com.datatorrent.gateway.W: Cannot update license
registry for the number of nodes
com.datatorrent.a.I
at com.datatorrent.a.l.I(g:138)
at com.datatorrent.a.e.I(q:108)
at com.datatorrent.gateway.W.I(hd:393)
at com.datatorrent.gateway.W.I(hd:278)
at com.datatorrent.gateway.W.I(hd:653)
at com.datatorrent.gateway.W.I(hd:101)
at com.datatorrent.gateway.x.run(hd:78)
at
java.util.concurrent.Executors$RunnableAdapter.call(Executors.java:471)
at java.util.concurrent.FutureTask.run(FutureTask.java:262)
at
java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1145)
at
java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:615)
at java.lang.Thread.run(Thread.java:745)
2016-05-04 18:00:39,070 WARN com.datatorrent.gateway.W: Cannot update license
registry for the number of nodes
com.datatorrent.a.I: Failed to replace a bad datanode on the existing pipeline
due to no more good datanodes being available to try. (Nodes:
current=[DatanodeInfoWithStorage[10.24.28.21:50010,DS-7c8ba6d2-bbdb-4e69-9462-abcbafab378d,DISK]],
original=[DatanodeInfoWithStorage[10.24.28.21:50010,DS-7c8ba6d2-bbdb-4e69-9462-abcbafab378d,DISK]]).
The current failed datanode replacement policy is DEFAULT, and a client may
configure this via 'dfs.client.block.write.replace-datanode-on-failure.policy'
in its configuration.
at com.datatorrent.a.l.I(g:138)
at com.datatorrent.a.e.I(q:108)
at com.datatorrent.gateway.W.I(hd:393)
at com.datatorrent.gateway.W.I(hd:278)
at com.datatorrent.gateway.W.I(hd:653)
at com.datatorrent.gateway.W.I(hd:101)
at com.datatorrent.gateway.x.run(hd:78)
at java.util.concurrent.Executors$RunnableAdapter.call(Executors.java:471)
at java.util.concurrent.FutureTask.run(FutureTask.java:262)
at
java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1145)
at
java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:615)
at java.lang.Thread.run(Thread.java:745)
2016-05-04 18:00:44,070 WARN com.datatorrent.gateway.W: Cannot update license
registry for the number of nodes
com.datatorrent.a.I: Failed to replace a bad datanode on the existing pipeline
due to no more good datanodes being available to try. (Nodes:
current=[DatanodeInfoWithStorage[10.24.28.21:50010,DS-7c8ba6d2-bbdb-4e69-9462-abcbafab378d,DISK]],
original=[DatanodeInfoWithStorage[10.24.28.21:50010,DS-7c8ba6d2-bbdb-4e69-9462-abcbafab378d,DISK]]).
The current failed datanode replacement policy is DEFAULT, and a client may
configure this via 'dfs.client.block.write.replace-datanode-on-failure.policy'
in its configuration.
at com.datatorrent.a.l.I(g:138)
at com.datatorrent.a.e.I(q:108)
at com.datatorrent.gateway.W.I(hd:393)
at com.datatorrent.gateway.W.I(hd:278)
at com.datatorrent.gateway.W.I(hd:653)
at com.datatorrent.gateway.W.I(hd:101)
at com.datatorrent.gateway.x.run(hd:78)
at java.util.concurrent.Executors$RunnableAdapter.call(Executors.java:471)
at java.util.concurrent.FutureTask.run(FutureTask.java:262)
at
java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1145)
at
java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:615)
at java.lang.Thread.run(Thread.java:745)
2016-05-04 18:00:49,070 WARN com.datatorrent.gateway.W: Cannot update license
registry for the number of nodes
com.datatorrent.a.I: Failed to replace a bad datanode on the existing pipeline
due to no more good datanodes being available to try. (Nodes:
current=[DatanodeInfoWithStorage[10.24.28.21:50010,DS-7c8ba6d2-bbdb-4e69-9462-abcbafab378d,DISK]],
original=[DatanodeInfoWithStorage[10.24.28.21:50010,DS-7c8ba6d2-bbdb-4e69-9462-abcbafab378d,DISK]]).
The current failed datanode replacement policy is DEFAULT, and a client may
configure this via 'dfs.client.block.write.replace-datanode-on-failure.policy'
in its configuration.
at com.datatorrent.a.l.I(g:138)
at com.datatorrent.a.e.I(q:108)
at com.datatorrent.gateway.W.I(hd:393)
at com.datatorrent.gateway.W.I(hd:278)
at com.datatorrent.gateway.W.I(hd:653)
at com.datatorrent.gateway.W.I(hd:101)
at com.datatorrent.gateway.x.run(hd:78)
at java.util.concurrent.Executors$RunnableAdapter.call(Executors.java:471)
at java.util.concurrent.FutureTask.run(FutureTask.java:262)
at
java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1145)
at
java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:615)
at java.lang.Thread.run(Thread.java:745)
________________________________________________________
The information contained in this e-mail is confidential and/or proprietary to
Capital One and/or its affiliates and may only be used solely in performance of
work or services for Capital One. The information transmitted herewith is
intended only for use by the individual or entity to which it is addressed. If
the reader of this message is not the intended recipient, you are hereby
notified that any review, retransmission, dissemination, distribution, copying
or other use of, or taking of any action in reliance upon this information is
strictly prohibited. If you have received this communication in error, please
contact the sender and delete the material from your computer.