[ https://issues.apache.org/jira/browse/CLOUDSTACK-9348?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=15243337#comment-15243337 ]
ASF GitHub Bot commented on CLOUDSTACK-9348: -------------------------------------------- Github user bhaisaab commented on a diff in the pull request: https://github.com/apache/cloudstack/pull/1493#discussion_r59915385 --- Diff: utils/src/main/java/com/cloud/utils/nio/Link.java --- @@ -453,115 +449,192 @@ public static SSLContext initSSLContext(boolean isClient) throws GeneralSecurity return sslContext; } - public static void doHandshake(SocketChannel ch, SSLEngine sslEngine, boolean isClient) throws IOException { - if (s_logger.isTraceEnabled()) { - s_logger.trace("SSL: begin Handshake, isClient: " + isClient); + public static ByteBuffer enlargeBuffer(ByteBuffer buffer, final int sessionProposedCapacity) { + if (buffer == null || sessionProposedCapacity < 0) { + return buffer; } - - SSLEngineResult engResult; - SSLSession sslSession = sslEngine.getSession(); - HandshakeStatus hsStatus; - ByteBuffer in_pkgBuf = ByteBuffer.allocate(sslSession.getPacketBufferSize() + 40); - ByteBuffer in_appBuf = ByteBuffer.allocate(sslSession.getApplicationBufferSize() + 40); - ByteBuffer out_pkgBuf = ByteBuffer.allocate(sslSession.getPacketBufferSize() + 40); - ByteBuffer out_appBuf = ByteBuffer.allocate(sslSession.getApplicationBufferSize() + 40); - int count; - ch.socket().setSoTimeout(60 * 1000); - InputStream inStream = ch.socket().getInputStream(); - // Use readCh to make sure the timeout on reading is working - ReadableByteChannel readCh = Channels.newChannel(inStream); - - if (isClient) { - hsStatus = SSLEngineResult.HandshakeStatus.NEED_WRAP; + if (sessionProposedCapacity > buffer.capacity()) { + buffer = ByteBuffer.allocate(sessionProposedCapacity); } else { - hsStatus = SSLEngineResult.HandshakeStatus.NEED_UNWRAP; + buffer = ByteBuffer.allocate(buffer.capacity() * 2); } + return buffer; + } - while (hsStatus != SSLEngineResult.HandshakeStatus.FINISHED) { - if (s_logger.isTraceEnabled()) { - s_logger.trace("SSL: Handshake status " + hsStatus); + public static ByteBuffer handleBufferUnderflow(final SSLEngine engine, ByteBuffer buffer) { + if (engine == null || buffer == null) { + return buffer; + } + if (buffer.position() < buffer.limit()) { + return buffer; + } + ByteBuffer replaceBuffer = enlargeBuffer(buffer, engine.getSession().getPacketBufferSize()); + buffer.flip(); + replaceBuffer.put(buffer); + return replaceBuffer; + } + + private static boolean doHandshakeUnwrap(final SocketChannel socketChannel, final SSLEngine sslEngine, + ByteBuffer peerAppData, ByteBuffer peerNetData, final int appBufferSize) throws IOException { + if (socketChannel == null || sslEngine == null || peerAppData == null || peerNetData == null || appBufferSize < 0) { + return false; + } + if (socketChannel.read(peerNetData) < 0) { + if (sslEngine.isInboundDone() && sslEngine.isOutboundDone()) { + return false; } - engResult = null; - if (hsStatus == SSLEngineResult.HandshakeStatus.NEED_WRAP) { - out_pkgBuf.clear(); - out_appBuf.clear(); - out_appBuf.put("Hello".getBytes()); - engResult = sslEngine.wrap(out_appBuf, out_pkgBuf); - out_pkgBuf.flip(); - int remain = out_pkgBuf.limit(); - while (remain != 0) { - remain -= ch.write(out_pkgBuf); - if (remain < 0) { - throw new IOException("Too much bytes sent?"); - } - } - } else if (hsStatus == SSLEngineResult.HandshakeStatus.NEED_UNWRAP) { - in_appBuf.clear(); - // One packet may contained multiply operation - if (in_pkgBuf.position() == 0 || !in_pkgBuf.hasRemaining()) { - in_pkgBuf.clear(); - count = 0; - try { - count = readCh.read(in_pkgBuf); - } catch (SocketTimeoutException ex) { - if (s_logger.isTraceEnabled()) { - s_logger.trace("Handshake reading time out! Cut the connection"); - } - count = -1; - } - if (count == -1) { - throw new IOException("Connection closed with -1 on reading size."); - } - in_pkgBuf.flip(); + try { + sslEngine.closeInbound(); + } catch (SSLException e) { + s_logger.warn("This SSL engine was forced to close inbound due to end of stream."); + } + sslEngine.closeOutbound(); + // After closeOutbound the engine will be set to WRAP state, + // in order to try to send a close message to the client. + return true; + } + peerNetData.flip(); + SSLEngineResult result = null; + try { + result = sslEngine.unwrap(peerNetData, peerAppData); + peerNetData.compact(); + } catch (SSLException sslException) { + s_logger.error("SSL error occurred while processing unwrap data: " + sslException.getMessage()); + sslEngine.closeOutbound(); + return true; + } + switch (result.getStatus()) { + case OK: + break; + case BUFFER_OVERFLOW: + // Will occur when peerAppData's capacity is smaller than the data derived from peerNetData's unwrap. + peerAppData = enlargeBuffer(peerAppData, appBufferSize); + break; + case BUFFER_UNDERFLOW: + // Will occur either when no data was read from the peer or when the peerNetData buffer + // was too small to hold all peer's data. + peerNetData = handleBufferUnderflow(sslEngine, peerNetData); + break; + case CLOSED: + if (sslEngine.isOutboundDone()) { + return false; + } else { + sslEngine.closeOutbound(); + break; } - engResult = sslEngine.unwrap(in_pkgBuf, in_appBuf); - ByteBuffer tmp_pkgBuf = ByteBuffer.allocate(sslSession.getPacketBufferSize() + 40); - int loop_count = 0; - while (engResult.getStatus() == SSLEngineResult.Status.BUFFER_UNDERFLOW) { - // The client is too slow? Cut it and let it reconnect - if (loop_count > 10) { - throw new IOException("Too many times in SSL BUFFER_UNDERFLOW, disconnect guest."); - } - // We need more packets to complete this operation - if (s_logger.isTraceEnabled()) { - s_logger.trace("SSL: Buffer underflowed, getting more packets"); - } - tmp_pkgBuf.clear(); - count = ch.read(tmp_pkgBuf); - if (count == -1) { - throw new IOException("Connection closed with -1 on reading size."); - } - tmp_pkgBuf.flip(); - - in_pkgBuf.mark(); - in_pkgBuf.position(in_pkgBuf.limit()); - in_pkgBuf.limit(in_pkgBuf.limit() + tmp_pkgBuf.limit()); - in_pkgBuf.put(tmp_pkgBuf); - in_pkgBuf.reset(); + default: + throw new IllegalStateException("Invalid SSL status: " + result.getStatus()); + } + return true; + } - in_appBuf.clear(); - engResult = sslEngine.unwrap(in_pkgBuf, in_appBuf); - loop_count++; + private static boolean doHandshakeWrap(final SocketChannel socketChannel, final SSLEngine sslEngine, + ByteBuffer myAppData, ByteBuffer myNetData, ByteBuffer peerNetData, + final int netBufferSize) throws IOException { + if (socketChannel == null || sslEngine == null || myNetData == null || peerNetData == null + || myAppData == null || netBufferSize < 0) { + return false; + } + myNetData.clear(); + SSLEngineResult result = null; + try { + result = sslEngine.wrap(myAppData, myNetData); + } catch (SSLException sslException) { + s_logger.error("SSL error occurred while processing wrap data: " + sslException.getMessage()); + sslEngine.closeOutbound(); + return true; + } + switch (result.getStatus()) { + case OK : + myNetData.flip(); + while (myNetData.hasRemaining()) { + socketChannel.write(myNetData); } - } else if (hsStatus == SSLEngineResult.HandshakeStatus.NEED_TASK) { - Runnable run; - while ((run = sslEngine.getDelegatedTask()) != null) { - if (s_logger.isTraceEnabled()) { - s_logger.trace("SSL: Running delegated task!"); + break; + case BUFFER_OVERFLOW: + // Will occur if there is not enough space in myNetData buffer to write all the data + // that would be generated by the method wrap. Since myNetData is set to session's packet + // size we should not get to this point because SSLEngine is supposed to produce messages + // smaller or equal to that, but a general handling would be the following: + myNetData = enlargeBuffer(myNetData, netBufferSize); + break; + case BUFFER_UNDERFLOW: + throw new SSLException("Buffer underflow occurred after a wrap. We should not reach here."); + case CLOSED: + try { + myNetData.flip(); + while (myNetData.hasRemaining()) { + socketChannel.write(myNetData); } - run.run(); + // At this point the handshake status will probably be NEED_UNWRAP + // so we make sure that peerNetData is clear to read. + peerNetData.clear(); + } catch (Exception e) { + s_logger.error("Failed to send server's CLOSE message due to socket channel's failure."); + } + break; + default: + throw new IllegalStateException("Invalid SSL status: " + result.getStatus()); + } + return true; + } + + public static boolean doHandshake(final SocketChannel socketChannel, final SSLEngine sslEngine, final boolean isClient) throws IOException { + if (socketChannel == null || sslEngine == null) { + return false; + } + final int appBufferSize = sslEngine.getSession().getApplicationBufferSize(); + final int netBufferSize = sslEngine.getSession().getPacketBufferSize(); + ByteBuffer myAppData = ByteBuffer.allocate(appBufferSize); + ByteBuffer peerAppData = ByteBuffer.allocate(appBufferSize); + ByteBuffer myNetData = ByteBuffer.allocate(netBufferSize); + ByteBuffer peerNetData = ByteBuffer.allocate(netBufferSize); + + long startTimeMills = System.currentTimeMillis(); + + HandshakeStatus handshakeStatus = sslEngine.getHandshakeStatus(); + while (handshakeStatus != SSLEngineResult.HandshakeStatus.FINISHED + && handshakeStatus != SSLEngineResult.HandshakeStatus.NOT_HANDSHAKING) { + long timeTaken = System.currentTimeMillis() - startTimeMills; + if (timeTaken > 60000) { + if (isClient && timeTaken > 120000) { + s_logger.warn("SSL Handshake has taken more than 120s to connect to remote host: " + socketChannel.getRemoteAddress() + --- End diff -- I re-checked, I had introduce this for test; while in previous code both server/client side handshakes assumed 60s timeout so I'll keep that single value for failing fast. > CloudStack Server degrades when a lot of connections on port 8250 > ----------------------------------------------------------------- > > Key: CLOUDSTACK-9348 > URL: https://issues.apache.org/jira/browse/CLOUDSTACK-9348 > Project: CloudStack > Issue Type: Bug > Security Level: Public(Anyone can view this level - this is the > default.) > Reporter: Rohit Yadav > Assignee: Rohit Yadav > Fix For: 4.9.0 > > > An intermittent issue was found with a large CloudStack deployment, where > servers could not keep agents connected on port 8250. > All connections are handled by accept() in NioConnection: > https://github.com/apache/cloudstack/blob/master/utils/src/main/java/com/cloud/utils/nio/NioConnection.java#L125 > A new connection is handled by accept() which does blocking SSL handshake. A > good fix would be to make this non-blocking and handle expensive tasks in > separate threads/pool. This way the main IO loop won't be blocked and can > continue to serve other agents/clients. -- This message was sent by Atlassian JIRA (v6.3.4#6332)