On Wed, Jun 14, 2017 at 06:53:52PM +0200, Knut Omang wrote: > If an offset of ports is specified to the inet_listen_saddr function(), > and two or more processes tries to bind from these ports at the same time, > occasionally more than one process may be able to bind to the same > port. The condition is detected by listen() but too late to avoid a failure. > > This function is called by socket_listen() and used > by all socket listening code in QEMU, so all cases where any form of dynamic > port selection is used should be subject to this issue. > > Add code to close and re-establish the socket when this > condition is observed, hiding the race condition from the user. > > This has been developed and tested by means of the > test-listen unit test in the previous commit. > Enable the test for make check now that it passes. > > Signed-off-by: Knut Omang <knut.om...@oracle.com> > Reviewed-by: Bhavesh Davda <bhavesh.da...@oracle.com> > Reviewed-by: Yuval Shaia <yuval.sh...@oracle.com> > Reviewed-by: Girish Moodalbail <girish.moodalb...@oracle.com> > --- > tests/Makefile.include | 2 +- > util/qemu-sockets.c | 159 ++++++++++++++++++++++++++++-------------- > 2 files changed, 108 insertions(+), 53 deletions(-) > > diff --git a/tests/Makefile.include b/tests/Makefile.include > index 22bb97e..c38f94e 100644 > --- a/tests/Makefile.include > +++ b/tests/Makefile.include > @@ -127,7 +127,7 @@ check-unit-y += tests/test-bufferiszero$(EXESUF) > gcov-files-check-bufferiszero-y = util/bufferiszero.c > check-unit-y += tests/test-uuid$(EXESUF) > check-unit-y += tests/ptimer-test$(EXESUF) > -#check-unit-y += tests/test-listen$(EXESUF) > +check-unit-y += tests/test-listen$(EXESUF) > gcov-files-ptimer-test-y = hw/core/ptimer.c > check-unit-y += tests/test-qapi-util$(EXESUF) > gcov-files-test-qapi-util-y = qapi/qapi-util.c > diff --git a/util/qemu-sockets.c b/util/qemu-sockets.c > index 852773d..7b118b4 100644 > --- a/util/qemu-sockets.c > +++ b/util/qemu-sockets.c > @@ -149,6 +149,94 @@ int inet_ai_family_from_address(InetSocketAddress *addr, > return PF_UNSPEC; > } > > +static int create_fast_reuse_socket(struct addrinfo *e, Error **errp) > +{ > + int slisten = qemu_socket(e->ai_family, e->ai_socktype, e->ai_protocol); > + if (slisten < 0) { > + if (!e->ai_next) { > + error_setg_errno(errp, errno, "Failed to create socket"); > + } > + return -1; > + } > + > + socket_set_fast_reuse(slisten); > + return slisten; > +} > + > +static int try_bind(int socket, InetSocketAddress *saddr, struct addrinfo *e) > +{ > +#ifndef IPV6_V6ONLY > + return bind(socket, e->ai_addr, e->ai_addrlen); > +#else > + /* > + * Deals with first & last cases in matrix in comment > + * for inet_ai_family_from_address(). > + */ > + int v6only = > + ((!saddr->has_ipv4 && !saddr->has_ipv6) || > + (saddr->has_ipv4 && saddr->ipv4 && > + saddr->has_ipv6 && saddr->ipv6)) ? 0 : 1; > + int stat; > + > + rebind: > + if (e->ai_family == PF_INET6) { > + qemu_setsockopt(socket, IPPROTO_IPV6, IPV6_V6ONLY, &v6only, > + sizeof(v6only)); > + } > + > + stat = bind(socket, e->ai_addr, e->ai_addrlen); > + if (!stat) { > + return 0; > + } > + > + /* If we got EADDRINUSE from an IPv6 bind & v6only is unset, > + * it could be that the IPv4 port is already claimed, so retry > + * with v6only set > + */ > + if (e->ai_family == PF_INET6 && errno == EADDRINUSE && !v6only) { > + v6only = 1; > + goto rebind; > + } > + return stat; > +#endif > +} > + > +static int try_bind_listen(int *socket, InetSocketAddress *saddr, > + struct addrinfo *e, int port, Error **errp) > +{ > + int s = *socket; > + int ret; > + > + inet_setport(e, port); > + ret = try_bind(s, saddr, e); > + if (ret) { > + if (errno != EADDRINUSE) { > + error_setg_errno(errp, errno, "Failed to bind socket"); > + } > + return errno; > + } > + if (listen(s, 1) == 0) { > + return 0; > + } > + if (errno == EADDRINUSE) { > + /* We got to bind the socket to a port but someone else managed > + * to bind to the same port and beat us to listen on it! > + * Recreate the socket and return EADDRINUSE to preserve the > + * expected state by the caller: > + */ > + closesocket(s); > + s = create_fast_reuse_socket(e, errp); > + if (s < 0) { > + return errno; > + } > + *socket = s; > + errno = EADDRINUSE; > + return errno; > + } > + error_setg_errno(errp, errno, "Failed to listen on socket"); > + return errno; > +} > + > static int inet_listen_saddr(InetSocketAddress *saddr, > int port_offset, > bool update_addr, > @@ -158,7 +246,9 @@ static int inet_listen_saddr(InetSocketAddress *saddr, > char port[33]; > char uaddr[INET6_ADDRSTRLEN+1]; > char uport[33]; > - int slisten, rc, port_min, port_max, p; > + int rc, port_min, port_max, p; > + int slisten = 0; > + int saved_errno = 0; > Error *err = NULL; > > memset(&ai,0, sizeof(ai)); > @@ -210,75 +300,40 @@ static int inet_listen_saddr(InetSocketAddress *saddr, > return -1; > } > > - /* create socket + bind */ > + /* create socket + bind/listen */ > for (e = res; e != NULL; e = e->ai_next) { > getnameinfo((struct sockaddr*)e->ai_addr,e->ai_addrlen, > uaddr,INET6_ADDRSTRLEN,uport,32, > NI_NUMERICHOST | NI_NUMERICSERV); > - slisten = qemu_socket(e->ai_family, e->ai_socktype, e->ai_protocol); > + > + slisten = create_fast_reuse_socket(e, &err); > if (slisten < 0) { > - if (!e->ai_next) { > - error_setg_errno(errp, errno, "Failed to create socket"); > - } > continue; > } > > - socket_set_fast_reuse(slisten); > - > port_min = inet_getport(e); > port_max = saddr->has_to ? saddr->to + port_offset : port_min; > for (p = port_min; p <= port_max; p++) { > -#ifdef IPV6_V6ONLY > - /* > - * Deals with first & last cases in matrix in comment > - * for inet_ai_family_from_address(). > - */ > - int v6only = > - ((!saddr->has_ipv4 && !saddr->has_ipv6) || > - (saddr->has_ipv4 && saddr->ipv4 && > - saddr->has_ipv6 && saddr->ipv6)) ? 0 : 1; > -#endif > - inet_setport(e, p); > -#ifdef IPV6_V6ONLY > - rebind: > - if (e->ai_family == PF_INET6) { > - qemu_setsockopt(slisten, IPPROTO_IPV6, IPV6_V6ONLY, &v6only, > - sizeof(v6only)); > - } > -#endif > - if (bind(slisten, e->ai_addr, e->ai_addrlen) == 0) { > - goto listen; > - } > - > -#ifdef IPV6_V6ONLY > - /* If we got EADDRINUSE from an IPv6 bind & V6ONLY is unset, > - * it could be that the IPv4 port is already claimed, so retry > - * with V6ONLY set > - */ > - if (e->ai_family == PF_INET6 && errno == EADDRINUSE && !v6only) { > - v6only = 1; > - goto rebind; > - } > -#endif > - > - if (p == port_max) { > - if (!e->ai_next) { > - error_setg_errno(errp, errno, "Failed to bind socket"); > - } > + int eno = try_bind_listen(&slisten, saddr, e, p, &err); > + if (!eno) { > + goto listen_ok; > + } else if (eno != EADDRINUSE) { > + goto listen_failed; > } > } > + } > + error_setg_errno(errp, errno, "Failed to find available port"); > + > +listen_failed: > + saved_errno = errno; > + if (slisten >= 0) { > closesocket(slisten); > } > freeaddrinfo(res); > + errno = saved_errno; > return -1; > > -listen: > - if (listen(slisten,1) != 0) { > - error_setg_errno(errp, errno, "Failed to listen on socket"); > - closesocket(slisten); > - freeaddrinfo(res); > - return -1; > - } > +listen_ok: > if (update_addr) { > g_free(saddr->host); > saddr->host = g_strdup(uaddr);
I find this patch rather hard to review for correctness, because it has mixed up a huge amount of code movement / refactoring, with the bug fix. Can you split this up into 2 (or possibly more) patches, so we have 1 (or more) no-functional-change refactoring steps, and then the bug fix on its own. Regards, Daniel -- |: https://berrange.com -o- https://www.flickr.com/photos/dberrange :| |: https://libvirt.org -o- https://fstop138.berrange.com :| |: https://entangle-photo.org -o- https://www.instagram.com/dberrange :|