[ https://issues.apache.org/jira/browse/HDDS-2356?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=16963875#comment-16963875 ]
Li Cheng commented on HDDS-2356: -------------------------------- I tried multiprocessing in python to fetch data locally and write to remote ozone cluster thru s3 gateway and boto3 (aws python client). Shows an RPC error: 2019-10-31 17:44:33,123 [qtp1383524016-25] ERROR (OzoneManagerProtocolClientSideTranslatorPB.java:268) - Failed to connect to OM. Attempted 10 retries a nd 10 failovers 2019-10-31 17:44:33,124 [qtp1383524016-25] ERROR (ObjectEndpoint.java:186) - Exception occurred in PutObject java.io.IOException: Failed on local exception: org.apache.hadoop.ipc.RpcException: RPC response exceeds maximum data length; Host Details : local host is: "VM_50_210_centos/127.0.0.1"; destination host is: "9.134.50.210":9862; at org.apache.hadoop.net.NetUtils.wrapException(NetUtils.java:816) at org.apache.hadoop.ipc.Client.getRpcResponse(Client.java:1515) at org.apache.hadoop.ipc.Client.call(Client.java:1457) at org.apache.hadoop.ipc.Client.call(Client.java:1367) at org.apache.hadoop.ipc.ProtobufRpcEngine$Invoker.invoke(ProtobufRpcEngine.java:228) at org.apache.hadoop.ipc.ProtobufRpcEngine$Invoker.invoke(ProtobufRpcEngine.java:116) at com.sun.proxy.$Proxy81.submitRequest(Unknown Source) at sun.reflect.GeneratedMethodAccessor7.invoke(Unknown Source) at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43) at java.lang.reflect.Method.invoke(Method.java:497) at org.apache.hadoop.io.retry.RetryInvocationHandler.invokeMethod(RetryInvocationHandler.java:422) at org.apache.hadoop.io.retry.RetryInvocationHandler$Call.invokeMethod(RetryInvocationHandler.java:165) at org.apache.hadoop.io.retry.RetryInvocationHandler$Call.invoke(RetryInvocationHandler.java:157) at org.apache.hadoop.io.retry.RetryInvocationHandler$Call.invokeOnce(RetryInvocationHandler.java:95) at org.apache.hadoop.io.retry.RetryInvocationHandler.invoke(RetryInvocationHandler.java:359) at com.sun.proxy.$Proxy81.submitRequest(Unknown Source) at org.apache.hadoop.ozone.om.protocolPB.OzoneManagerProtocolClientSideTranslatorPB.submitRequest(OzoneManagerProtocolClientSideTranslatorPB.java:338) at org.apache.hadoop.ozone.om.protocolPB.OzoneManagerProtocolClientSideTranslatorPB.openKey(OzoneManagerProtocolClientSideTranslatorPB.java:723) at sun.reflect.GeneratedMethodAccessor34.invoke(Unknown Source) at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43) at java.lang.reflect.Method.invoke(Method.java:497) at org.apache.hadoop.hdds.tracing.TraceAllMethod.invoke(TraceAllMethod.java:66) at com.sun.proxy.$Proxy82.openKey(Unknown Source) at org.apache.hadoop.ozone.client.rpc.RpcClient.createKey(RpcClient.java:614) at org.apache.hadoop.ozone.client.OzoneBucket.createKey(OzoneBucket.java:325) at org.apache.hadoop.ozone.s3.endpoint.ObjectEndpoint.put(ObjectEndpoint.java:173) at sun.reflect.GeneratedMethodAccessor26.invoke(Unknown Source) at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43) at java.lang.reflect.Method.invoke(Method.java:497) at org.glassfish.jersey.server.model.internal.ResourceMethodInvocationHandlerFactory.lambda$static$0(ResourceMethodInvocationHandlerFactory.java:76) at org.glassfish.jersey.server.model.internal.AbstractJavaResourceMethodDispatcher$1.run(AbstractJavaResourceMethodDispatcher.java:148) at org.glassfish.jersey.server.model.internal.AbstractJavaResourceMethodDispatcher.invoke(AbstractJavaResourceMethodDispatcher.java:191) at org.glassfish.jersey.server.model.internal.JavaResourceMethodDispatcherProvider$ResponseOutInvoker.doDispatch(JavaResourceMethodDispatcherProvider.java:200) at org.glassfish.jersey.server.model.internal.AbstractJavaResourceMethodDispatcher.dispatch(AbstractJavaResourceMethodDispatcher.java:103) at org.glassfish.jersey.server.model.ResourceMethodInvoker.invoke(ResourceMethodInvoker.java:493) at org.glassfish.jersey.server.model.ResourceMethodInvoker.apply(ResourceMethodInvoker.java:415) at org.glassfish.jersey.server.model.ResourceMethodInvoker.apply(ResourceMethodInvoker.java:104) at org.glassfish.jersey.server.ServerRuntime$1.run(ServerRuntime.java:277) at org.glassfish.jersey.internal.Errors$1.call(Errors.java:272) at org.glassfish.jersey.internal.Errors$1.call(Errors.java:268) at org.glassfish.jersey.internal.Errors.process(Errors.java:316) at org.glassfish.jersey.internal.Errors.process(Errors.java:298) at org.glassfish.jersey.internal.Errors.process(Errors.java:268) at org.glassfish.jersey.process.internal.RequestScope.runInScope(RequestScope.java:289) at org.glassfish.jersey.server.ServerRuntime.process(ServerRuntime.java:256) at org.glassfish.jersey.server.ApplicationHandler.handle(ApplicationHandler.java:703) at org.glassfish.jersey.servlet.WebComponent.serviceImpl(WebComponent.java:416) at org.glassfish.jersey.servlet.WebComponent.service(WebComponent.java:370) at org.glassfish.jersey.servlet.ServletContainer.service(ServletContainer.java:389) at org.glassfish.jersey.servlet.ServletContainer.service(ServletContainer.java:342) at org.glassfish.jersey.servlet.ServletContainer.service(ServletContainer.java:229) at org.eclipse.jetty.servlet.ServletHolder.handle(ServletHolder.java:840) at org.eclipse.jetty.servlet.ServletHandler$CachedChain.doFilter(ServletHandler.java:1780) at org.apache.hadoop.http.HttpServer2$QuotingInputFilter.doFilter(HttpServer2.java:1609) at org.eclipse.jetty.servlet.ServletHandler$CachedChain.doFilter(ServletHandler.java:1767) at org.apache.hadoop.http.NoCacheFilter.doFilter(NoCacheFilter.java:45) at org.eclipse.jetty.servlet.ServletHandler$CachedChain.doFilter(ServletHandler.java:1767) at org.eclipse.jetty.servlet.ServletHandler.doHandle(ServletHandler.java:583) at org.eclipse.jetty.server.handler.ScopedHandler.handle(ScopedHandler.java:143) at org.eclipse.jetty.security.SecurityHandler.handle(SecurityHandler.java:548) at org.eclipse.jetty.server.session.SessionHandler.doHandle(SessionHandler.java:226) at org.eclipse.jetty.server.handler.ContextHandler.doHandle(ContextHandler.java:1180) at org.eclipse.jetty.servlet.ServletHandler.doScope(ServletHandler.java:513) at org.eclipse.jetty.server.session.SessionHandler.doScope(SessionHandler.java:185) at org.eclipse.jetty.server.handler.ContextHandler.doScope(ContextHandler.java:1112) at org.eclipse.jetty.server.handler.ScopedHandler.handle(ScopedHandler.java:141) at org.eclipse.jetty.server.handler.HandlerCollection.handle(HandlerCollection.java:119) at org.eclipse.jetty.server.handler.HandlerWrapper.handle(HandlerWrapper.java:134) at org.eclipse.jetty.server.Server.handle(Server.java:539) at org.eclipse.jetty.server.HttpChannel.handle(HttpChannel.java:333) at org.eclipse.jetty.server.HttpConnection.onFillable(HttpConnection.java:251) at org.eclipse.jetty.io.AbstractConnection$ReadCallback.succeeded(AbstractConnection.java:283) at org.eclipse.jetty.io.FillInterest.fillable(FillInterest.java:108) at org.eclipse.jetty.io.SelectChannelEndPoint$2.run(SelectChannelEndPoint.java:93) at org.eclipse.jetty.util.thread.strategy.ExecuteProduceConsume.executeProduceConsume(ExecuteProduceConsume.java:303) at org.eclipse.jetty.util.thread.strategy.ExecuteProduceConsume.produceConsume(ExecuteProduceConsume.java:148) at org.eclipse.jetty.util.thread.strategy.ExecuteProduceConsume.run(ExecuteProduceConsume.java:136) at org.eclipse.jetty.util.thread.QueuedThreadPool.runJob(QueuedThreadPool.java:671) at org.eclipse.jetty.util.thread.QueuedThreadPool$2.run(QueuedThreadPool.java:589) at java.lang.Thread.run(Thread.java:745) Caused by: org.apache.hadoop.ipc.RpcException: RPC response exceeds maximum data length at org.apache.hadoop.ipc.Client$IpcStreams.readResponse(Client.java:1830) at org.apache.hadoop.ipc.Client$Connection.receiveRpcResponse(Client.java:1173) at org.apache.hadoop.ipc.Client$Connection.run(Client.java:1069) > Multipart upload report errors while writing to ozone Ratis pipeline > -------------------------------------------------------------------- > > Key: HDDS-2356 > URL: https://issues.apache.org/jira/browse/HDDS-2356 > Project: Hadoop Distributed Data Store > Issue Type: Bug > Components: Ozone Manager > Affects Versions: 0.4.1 > Environment: Env: 4 VMs in total: 3 Datanodes on 3 VMs, 1 OM & 1 SCM > on a separate VM > Reporter: Li Cheng > Assignee: Bharat Viswanadham > Priority: Blocker > Fix For: 0.5.0 > > Attachments: image-2019-10-31-18-56-56-177.png > > > Env: 4 VMs in total: 3 Datanodes on 3 VMs, 1 OM & 1 SCM on a separate VM, say > it's VM0. > I use goofys as a fuse and enable ozone S3 gateway to mount ozone to a path > on VM0, while reading data from VM0 local disk and write to mount path. The > dataset has various sizes of files from 0 byte to GB-level and it has a > number of ~50,000 files. > The writing is slow (1GB for ~10 mins) and it stops after around 4GB. As I > look at hadoop-root-om-VM_50_210_centos.out log, I see OM throwing errors > related with Multipart upload. This error eventually causes the writing to > terminate and OM to be closed. > > 2019-10-28 11:44:34,079 [qtp1383524016-70] ERROR - Error in Complete > Multipart Upload Request for bucket: ozone-test, key: > 20191012/plc_1570863541668_927 > 8 > MISMATCH_MULTIPART_LIST org.apache.hadoop.ozone.om.exceptions.OMException: > Complete Multipart Upload Failed: volume: > s3c89e813c80ffcea9543004d57b2a1239bucket: > ozone-testkey: 20191012/plc_1570863541668_9278 > at > org.apache.hadoop.ozone.om.protocolPB.OzoneManagerProtocolClientSideTranslatorPB.handleError(OzoneManagerProtocolClientSideTranslatorPB.java:732) > at > org.apache.hadoop.ozone.om.protocolPB.OzoneManagerProtocolClientSideTranslatorPB.completeMultipartUpload(OzoneManagerProtocolClientSideTranslatorPB > .java:1104) > at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method) > at > sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62) > at > sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43) > at java.lang.reflect.Method.invoke(Method.java:497) > at > org.apache.hadoop.hdds.tracing.TraceAllMethod.invoke(TraceAllMethod.java:66) > at com.sun.proxy.$Proxy82.completeMultipartUpload(Unknown Source) > at > org.apache.hadoop.ozone.client.rpc.RpcClient.completeMultipartUpload(RpcClient.java:883) > at > org.apache.hadoop.ozone.client.OzoneBucket.completeMultipartUpload(OzoneBucket.java:445) > at > org.apache.hadoop.ozone.s3.endpoint.ObjectEndpoint.completeMultipartUpload(ObjectEndpoint.java:498) > at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method) > at > sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62) > at > sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43) > at java.lang.reflect.Method.invoke(Method.java:497) > at > org.glassfish.jersey.server.model.internal.ResourceMethodInvocationHandlerFactory.lambda$static$0(ResourceMethodInvocationHandlerFactory.java:76) > at > org.glassfish.jersey.server.model.internal.AbstractJavaResourceMethodDispatcher$1.run(AbstractJavaResourceMethodDispatcher.java:148) > at > org.glassfish.jersey.server.model.internal.AbstractJavaResourceMethodDispatcher.invoke(AbstractJavaResourceMethodDispatcher.java:191) > at > org.glassfish.jersey.server.model.internal.JavaResourceMethodDispatcherProvider$ResponseOutInvoker.doDispatch(JavaResourceMethodDispatcherProvider.java:200) > at > org.glassfish.jersey.server.model.internal.AbstractJavaResourceMethodDispatcher.dispatch(AbstractJavaResourceMethodDispatcher.java:103) > at > org.glassfish.jersey.server.model.ResourceMethodInvoker.invoke(ResourceMethodInvoker.java:493) > > The following errors has been resolved in > https://issues.apache.org/jira/browse/HDDS-2322. > 2019-10-24 16:01:59,527 [OMDoubleBufferFlushThread] ERROR - Terminating with > exit status 2: OMDoubleBuffer flush > threadOMDoubleBufferFlushThreadencountered Throwable error > java.util.ConcurrentModificationException > at java.util.TreeMap.forEach(TreeMap.java:1004) > at > org.apache.hadoop.ozone.om.helpers.OmMultipartKeyInfo.getProto(OmMultipartKeyInfo.java:111) > at > org.apache.hadoop.ozone.om.codec.OmMultipartKeyInfoCodec.toPersistedFormat(OmMultipartKeyInfoCodec.java:38) > at > org.apache.hadoop.ozone.om.codec.OmMultipartKeyInfoCodec.toPersistedFormat(OmMultipartKeyInfoCodec.java:31) > at > org.apache.hadoop.hdds.utils.db.CodecRegistry.asRawData(CodecRegistry.java:68) > at > org.apache.hadoop.hdds.utils.db.TypedTable.putWithBatch(TypedTable.java:125) > at > org.apache.hadoop.ozone.om.response.s3.multipart.S3MultipartUploadCommitPartResponse.addToDBBatch(S3MultipartUploadCommitPartResponse.java:112) > at > org.apache.hadoop.ozone.om.ratis.OzoneManagerDoubleBuffer.lambda$flushTransactions$0(OzoneManagerDoubleBuffer.java:137) > at java.util.Iterator.forEachRemaining(Iterator.java:116) > at > org.apache.hadoop.ozone.om.ratis.OzoneManagerDoubleBuffer.flushTransactions(OzoneManagerDoubleBuffer.java:135) > at java.lang.Thread.run(Thread.java:745) > 2019-10-24 16:01:59,629 [shutdown-hook-0] INFO - SHUTDOWN_MSG: -- This message was sent by Atlassian Jira (v8.3.4#803005) --------------------------------------------------------------------- To unsubscribe, e-mail: hdfs-issues-unsubscr...@hadoop.apache.org For additional commands, e-mail: hdfs-issues-h...@hadoop.apache.org