Thanks for your quick response. See inline. On Mon, Nov 24, 2014 at 7:31 AM, Matt Caswell via RT <r...@openssl.org> wrote:
> On Sun Nov 23 19:09:46 2014, prav...@viptela.com wrote: > > This happens when the server is unreachable. The client when it is trying > > to resend the client_hello is barfing on fragment->frag value. Is this > > known issue ? Let me know if you need any more info. > > > > Not consistently reproducible. Please let us know if I can work around > this > > issue. > > > Curious. > > Can you confirm the OpenSSL version and platform that you are using? > *[viptela.com <http://viptela.com>] Version: 1.0.1j, Platform: mips64 * > > The only way I can see for frag->fragment to be NULL is if s->init_num is 0 > *[viptela.com <http://viptela.com>] Yes that is correct.* *(gdb) frame 1* *#1 0x000000fff7e35530 in dtls1_retransmit_message (s=0x1206dd750, seq=0, frag_off=0, found=0xffffdfec60) at d1_both.c:1289* *1289 memcpy(s->init_buf->data, frag->fragment,* *(gdb) set pr pr* *(gdb) set height 0* *(gdb) p *s* *$1 = {* * version = 65279,* * type = 4096,* * method = 0xfff7e74e28 <DTLSv1_client_method_data.14990>,* * rbio = 0x120869800,* * wbio = 0x1205ce130,* * bbio = 0x1205ce130,* * rwstate = 1,* * in_handshake = 1,* * handshake_func = 0xfff7e29a88 <dtls1_connect>,* * server = 0,* * new_session = 0,* * quiet_shutdown = 0,* * shutdown = 0,* * state = 4384,* * rstate = 240,* * init_buf = 0x1205ce110,* * init_msg = 0x0,* * init_num = 0,* * init_off = 0,* * packet = 0x1205bb063 "",* * packet_length = 0,* * s2 = 0x0,* * s3 = 0x120774a60,* * d1 = 0x1207560c0,* * read_ahead = 1,* * msg_callback = 0x0,* * msg_callback_arg = 0x0,* * hit = 0,* * param = 0x120860b20,* * cipher_list = 0x0,* * cipher_list_by_id = 0x0,* * mac_flags = 0,* * enc_read_ctx = 0x0,* * read_hash = 0x0,* * expand = 0x0,* * enc_write_ctx = 0x0,* * write_hash = 0x0,* * compress = 0x0,* * cert = 0x120869880,* * sid_ctx_length = 0,* * sid_ctx = '\000' <repeats 31 times>,* * session = 0x1206a6730,* * generate_session_id = 0x0,* * verify_mode = 5,* * verify_callback = 0x12000f7b0 <vdaemon_verify_callback>,* * info_callback = 0x0,* * error = 0,* * error_code = 0,* * psk_client_callback = 0x0,* * psk_server_callback = 0x0,* * ctx = 0x12078b450,* * debug = 0,* * verify_result = 0,* * ex_data = {* * sk = 0x0,* * dummy = 0* * },* * client_CA = 0x0,* * references = 1,* * options = 4100,* * mode = 0,* * max_cert_list = 102400,* * first_packet = 0,* * client_version = 65279,* * max_send_fragment = 16384,* * tlsext_debug_cb = 0x0,* * tlsext_debug_arg = 0x0,* * tlsext_hostname = 0x0,* * servername_done = 0,* * tlsext_status_type = -1,* * tlsext_status_expected = 0,* * tlsext_ocsp_ids = 0x0,* * tlsext_ocsp_exts = 0x0,* * tlsext_ocsp_resp = 0x0,* * tlsext_ocsp_resplen = -1,* * tlsext_ticket_expected = 0,* * tlsext_ecpointformatlist_length = 0,* * tlsext_ecpointformatlist = 0x0,* * tlsext_ellipticcurvelist_length = 0,* * tlsext_ellipticcurvelist = 0x0,* * tlsext_opaque_prf_input = 0x0,* * tlsext_opaque_prf_input_len = 0,* * tlsext_session_ticket = 0x0,* * tls_session_ticket_ext_cb = 0x0,* * tls_session_ticket_ext_cb_arg = 0x0,* * tls_session_secret_cb = 0x0,* * tls_session_secret_cb_arg = 0x0,* * initial_ctx = 0x12078b450,* * next_proto_negotiated = 0x0,* * next_proto_negotiated_len = 0 '\000',* * srtp_profiles = 0x0,* * srtp_profile = 0x0,* * tlsext_heartbeat = 0,* * tlsext_hb_pending = 0,* * tlsext_hb_seq = 0,* * renegotiate = 0,* * srp_ctx = {* * SRP_cb_arg = 0x0,* * TLS_ext_srp_username_callback = 0x0,* * SRP_verify_param_callback = 0x0,* * SRP_give_srp_client_pwd_callback = 0x0,* * login = 0x0,* * N = 0x0,* * g = 0x0,* * s = 0x0,* * B = 0x0,* * A = 0x0,* * a = 0x0,* * b = 0x0,* * v = 0x0,* * info = 0x0,* * strength = 1024,* * srp_Mask = 0* * }* *}* *(gdb) * when the message is buffered in the first place. Messages get buffered in > dtls1_buffer_message in d1_both.c: > > frag = dtls1_hm_fragment_new(s->init_num, 0); > if (!frag) > return 0; > > memcpy(frag->fragment, s->init_buf->data, s->init_num); > > If init_num is 0 then the memcpy does nothing and so will not fail if > frag->fragment is NULL. > dtls1_hm_fragment_new does this: > > unsigned char *buf = NULL; > unsigned char *bitmask = NULL; > > frag = (hm_fragment *)OPENSSL_malloc(sizeof(hm_fragment)); > if ( frag == NULL) > return NULL; > > if (frag_len) > { > buf = (unsigned char *)OPENSSL_malloc(frag_len); > if ( buf == NULL) > { > OPENSSL_free(frag); > return NULL; > } > } > > /* zero length fragment gets zero frag->fragment */ > frag->fragment = buf; > > So if s->init_num is 0 then frag_len is 0 and frag->fragment gets set to > NULL. > > *[viptela.com <http://viptela.com>] * *(gdb) p frag* *$2 = (hm_fragment *) 0x1205ce0a0* *(gdb) p *frag* *$3 = {* * msg_header = {* * type = 1 '\001',* * msg_len = 55,* * seq = 0,* * frag_off = 0,* * frag_len = 55,* * is_ccs = 0,* * saved_retransmit_state = {* * enc_write_ctx = 0x0,* * write_hash = 0x0,* * compress = 0x0,* * session = 0x1206a6730,* * epoch = 0* * }* * },* * fragment = 0x0,* * reassembly = 0x0* *} * dtls1_buffer_message gets called from a number of places (client side): > - In dtls1_client_hello > - In dtls1_send_client_key_exchange > - In dtls1_send_client_verify > - In dtls1_send_client_certificate > - In dtls1_send_finished > - In dtls1_send_change_cipher_spec > > Based on your stack trace and description it seems likely that the one > we're > interested in is dtls1_client_hello which does this: > d=p= &(buf[DTLS1_HM_HEADER_LENGTH]); /* DTLS1_HM_HEADER_LENGTH is 12 */ > ... > /* Do lots of stuff that increments p but doesn't change buf that I can > see */ > ... > /* number of bytes to write */ > s->init_num=p-buf; > s->init_off=0; > > /* buffer the message to handle re-xmits */ > dtls1_buffer_message(s, 0); > > So at the moment I can't see how frag->fragment is ending up being NULL in > your > situation. > > *[viptela.com <http://viptela.com>]* *Agreed. All good points. Just another data point, is that we ran valgrind on another node, saw a leak in this related code. See if this helps you.* *==697== HEAP SUMMARY: ==697== in use at exit: 1,282,108 bytes in 20,788 blocks ==697== total heap usage: 664,349 allocs, 643,561 frees, 105,419,006 bytes allocated ==697== ==697== 120 bytes in 1 blocks are definitely lost in loss record 27 of 96 ==697== at 0x4A05F80: malloc (vg_replace_malloc.c:296) ==697== by 0x5BFBC86: default_malloc_ex (mem.c:79) ==697== by 0x5BFC315: CRYPTO_malloc (mem.c:308) ==697== by 0x5955875: dtls1_hm_fragment_new (d1_both.c:199) ==697== by 0x5956817: dtls1_reassemble_fragment (d1_both.c:625) ==697== by 0x595720A: dtls1_get_message_fragment (d1_both.c:852) ==697== by 0x5956174: dtls1_get_message (d1_both.c:443) ==697== by 0x59504DA: dtls1_get_hello_verify (d1_clnt.c:918) ==697== by 0x594F5AB: dtls1_connect (d1_clnt.c:360) ==697== by 0x595B591: SSL_connect (ssl_lib.c:949) ==697== by 0x430409: ssl_connect_timer_cb (vdaemon_peer.c:303) ==697== by 0x48573E: timer_exec_pri (timer.c:612) ==697== ==697== LEAK SUMMARY: ==697== definitely lost: 120 bytes in 1 blocks ==697== indirectly lost: 0 bytes in 0 blocks ==697== possibly lost: 0 bytes in 0 blocks ==697== still reachable: 1,281,988 bytes in 20,787 blocks ==697== suppressed: 0 bytes in 0 blocks ==697== Reachable blocks (those to which a pointer was found) are not shown. ==697== To see them, rerun with: --leak-check=full --show-leak-kinds=all ==697== ==697== For counts of detected and suppressed errors, rerun with: -v ==697== Use --track-origins=yes to see where uninitialised values come from * *==697== ERROR SUMMARY: 126394 errors from 117 contexts (suppressed: 1 from 1)* > If I sent you some instrumented code would you be able to apply it and see > if > that helps us narrow down what's going on? > *[viptela.com <http://viptela.com>] * *Ofcourse. But as I mentioned earlier, we dont know the likelyhood of this happening again. Please send me any instrumented patch. We will keep trying.* *Regards* *Praveen Kariyanahalli* > > Thanks > > Matt > > -- Regards -Praveen