Author: whu Date: Wed May 20 11:03:59 2020 New Revision: 361275 URL: https://svnweb.freebsd.org/changeset/base/361275
Log: HyperV socket implementation for FreeBSD This change adds Hyper-V socket feature in FreeBSD. New socket address family AF_HYPERV and its kernel support are added. Submitted by: Wei Hu <w...@microsoft.com> Reviewed by: Dexuan Cui <de...@microsoft.com> Relnotes: yes Sponsored by: Microsoft Differential Revision: https://reviews.freebsd.org/D24061 Added: head/sys/dev/hyperv/hvsock/ head/sys/dev/hyperv/hvsock/hv_sock.c (contents, props changed) head/sys/dev/hyperv/hvsock/hv_sock.h (contents, props changed) head/sys/modules/hyperv/hvsock/ head/sys/modules/hyperv/hvsock/Makefile (contents, props changed) Modified: head/sys/conf/files.x86 head/sys/dev/hyperv/include/vmbus.h head/sys/dev/hyperv/vmbus/vmbus.c head/sys/dev/hyperv/vmbus/vmbus_br.c head/sys/dev/hyperv/vmbus/vmbus_brvar.h head/sys/dev/hyperv/vmbus/vmbus_chan.c head/sys/dev/hyperv/vmbus/vmbus_chanvar.h head/sys/dev/hyperv/vmbus/vmbus_reg.h head/sys/modules/hyperv/Makefile head/sys/sys/socket.h Modified: head/sys/conf/files.x86 ============================================================================== --- head/sys/conf/files.x86 Wed May 20 11:01:10 2020 (r361274) +++ head/sys/conf/files.x86 Wed May 20 11:03:59 2020 (r361275) @@ -133,6 +133,7 @@ dev/hwpmc/hwpmc_core.c optional hwpmc dev/hwpmc/hwpmc_uncore.c optional hwpmc dev/hwpmc/hwpmc_tsc.c optional hwpmc dev/hwpmc/hwpmc_x86.c optional hwpmc +dev/hyperv/hvsock/hv_sock.c optional hyperv dev/hyperv/input/hv_kbd.c optional hyperv dev/hyperv/input/hv_kbdc.c optional hyperv dev/hyperv/pcib/vmbus_pcib.c optional hyperv pci Added: head/sys/dev/hyperv/hvsock/hv_sock.c ============================================================================== --- /dev/null 00:00:00 1970 (empty, because file is newly added) +++ head/sys/dev/hyperv/hvsock/hv_sock.c Wed May 20 11:03:59 2020 (r361275) @@ -0,0 +1,1748 @@ +/*- + * SPDX-License-Identifier: BSD-2-Clause-FreeBSD + * + * Copyright (c) 2020 Microsoft Corp. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice unmodified, this list of conditions, and the following + * disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include <sys/cdefs.h> +__FBSDID("$FreeBSD$"); + +#include <sys/param.h> +#include <sys/bus.h> +#include <sys/domain.h> +#include <sys/lock.h> +#include <sys/kernel.h> +#include <sys/types.h> +#include <sys/malloc.h> +#include <sys/module.h> +#include <sys/mutex.h> +#include <sys/proc.h> +#include <sys/protosw.h> +#include <sys/socket.h> +#include <sys/sysctl.h> +#include <sys/sysproto.h> +#include <sys/systm.h> +#include <sys/sockbuf.h> +#include <sys/sx.h> +#include <sys/uio.h> + +#include <net/vnet.h> + +#include <dev/hyperv/vmbus/vmbus_reg.h> + +#include "hv_sock.h" + +#define HVSOCK_DBG_NONE 0x0 +#define HVSOCK_DBG_INFO 0x1 +#define HVSOCK_DBG_ERR 0x2 +#define HVSOCK_DBG_VERBOSE 0x3 + + +SYSCTL_NODE(_net, OID_AUTO, hvsock, CTLFLAG_RD, 0, "HyperV socket"); + +static int hvs_dbg_level; +SYSCTL_INT(_net_hvsock, OID_AUTO, hvs_dbg_level, CTLFLAG_RWTUN, &hvs_dbg_level, + 0, "hyperv socket debug level: 0 = none, 1 = info, 2 = error, 3 = verbose"); + + +#define HVSOCK_DBG(level, ...) do { \ + if (hvs_dbg_level >= (level)) \ + printf(__VA_ARGS__); \ + } while (0) + +MALLOC_DEFINE(M_HVSOCK, "hyperv_socket", "hyperv socket control structures"); + +/* The MTU is 16KB per host side's design */ +#define HVSOCK_MTU_SIZE (1024 * 16) +#define HVSOCK_SEND_BUF_SZ (PAGE_SIZE - sizeof(struct vmpipe_proto_header)) + +#define HVSOCK_HEADER_LEN (sizeof(struct hvs_pkt_header)) + +#define HVSOCK_PKT_LEN(payload_len) (HVSOCK_HEADER_LEN + \ + roundup2(payload_len, 8) + \ + sizeof(uint64_t)) + + +static struct domain hv_socket_domain; + +/* + * HyperV Transport sockets + */ +static struct pr_usrreqs hvs_trans_usrreqs = { + .pru_attach = hvs_trans_attach, + .pru_bind = hvs_trans_bind, + .pru_listen = hvs_trans_listen, + .pru_accept = hvs_trans_accept, + .pru_connect = hvs_trans_connect, + .pru_peeraddr = hvs_trans_peeraddr, + .pru_sockaddr = hvs_trans_sockaddr, + .pru_soreceive = hvs_trans_soreceive, + .pru_sosend = hvs_trans_sosend, + .pru_disconnect = hvs_trans_disconnect, + .pru_close = hvs_trans_close, + .pru_detach = hvs_trans_detach, + .pru_shutdown = hvs_trans_shutdown, + .pru_abort = hvs_trans_abort, +}; + +/* + * Definitions of protocols supported in HyperV socket domain + */ +static struct protosw hv_socket_protosw[] = { +{ + .pr_type = SOCK_STREAM, + .pr_domain = &hv_socket_domain, + .pr_protocol = HYPERV_SOCK_PROTO_TRANS, + .pr_flags = PR_CONNREQUIRED, + .pr_init = hvs_trans_init, + .pr_usrreqs = &hvs_trans_usrreqs, +}, +}; + +static struct domain hv_socket_domain = { + .dom_family = AF_HYPERV, + .dom_name = "hyperv", + .dom_protosw = hv_socket_protosw, + .dom_protoswNPROTOSW = &hv_socket_protosw[nitems(hv_socket_protosw)] +}; + +VNET_DOMAIN_SET(hv_socket_); + +#define MAX_PORT ((uint32_t)0xFFFFFFFF) +#define MIN_PORT ((uint32_t)0x0) + +/* 00000000-facb-11e6-bd58-64006a7986d3 */ +static const struct hyperv_guid srv_id_template = { + .hv_guid = { + 0x00, 0x00, 0x00, 0x00, 0xcb, 0xfa, 0xe6, 0x11, + 0xbd, 0x58, 0x64, 0x00, 0x6a, 0x79, 0x86, 0xd3 } +}; + +static int hvsock_br_callback(void *, int, void *); +static uint32_t hvsock_canread_check(struct hvs_pcb *); +static uint32_t hvsock_canwrite_check(struct hvs_pcb *); +static int hvsock_send_data(struct vmbus_channel *chan, + struct uio *uio, uint32_t to_write, struct sockbuf *sb); + + + +/* Globals */ +static struct sx hvs_trans_socks_sx; +static struct mtx hvs_trans_socks_mtx; +static LIST_HEAD(, hvs_pcb) hvs_trans_bound_socks; +static LIST_HEAD(, hvs_pcb) hvs_trans_connected_socks; +static uint32_t previous_auto_bound_port; + +static void +hvsock_print_guid(struct hyperv_guid *guid) +{ + unsigned char *p = (unsigned char *)guid; + + HVSOCK_DBG(HVSOCK_DBG_INFO, + "0x%x-0x%x-0x%x-0x%x-0x%x-0x%x-0x%x-0x%x-0x%x-0x%x-0x%x\n", + *(unsigned int *)p, + *((unsigned short *) &p[4]), + *((unsigned short *) &p[6]), + p[8], p[9], p[10], p[11], p[12], p[13], p[14], p[15]); +} + +static bool +is_valid_srv_id(const struct hyperv_guid *id) +{ + return !memcmp(&id->hv_guid[4], + &srv_id_template.hv_guid[4], sizeof(struct hyperv_guid) - 4); +} + +static unsigned int +get_port_by_srv_id(const struct hyperv_guid *srv_id) +{ + return *((const unsigned int *)srv_id); +} + +static void +set_port_by_srv_id(struct hyperv_guid *srv_id, unsigned int port) +{ + *((unsigned int *)srv_id) = port; +} + + +static void +__hvs_remove_pcb_from_list(struct hvs_pcb *pcb, unsigned char list) +{ + struct hvs_pcb *p = NULL; + + HVSOCK_DBG(HVSOCK_DBG_VERBOSE, "%s: pcb is %p\n", __func__, pcb); + + if (!pcb) + return; + + if (list & HVS_LIST_BOUND) { + LIST_FOREACH(p, &hvs_trans_bound_socks, bound_next) + if (p == pcb) + LIST_REMOVE(p, bound_next); + } + + if (list & HVS_LIST_CONNECTED) { + LIST_FOREACH(p, &hvs_trans_connected_socks, connected_next) + if (p == pcb) + LIST_REMOVE(pcb, connected_next); + } +} + +static void +__hvs_remove_socket_from_list(struct socket *so, unsigned char list) +{ + struct hvs_pcb *pcb = so2hvspcb(so); + + HVSOCK_DBG(HVSOCK_DBG_VERBOSE, "%s: pcb is %p\n", __func__, pcb); + + __hvs_remove_pcb_from_list(pcb, list); +} + +static void +__hvs_insert_socket_on_list(struct socket *so, unsigned char list) +{ + struct hvs_pcb *pcb = so2hvspcb(so); + + if (list & HVS_LIST_BOUND) + LIST_INSERT_HEAD(&hvs_trans_bound_socks, + pcb, bound_next); + + if (list & HVS_LIST_CONNECTED) + LIST_INSERT_HEAD(&hvs_trans_connected_socks, + pcb, connected_next); +} + +void +hvs_remove_socket_from_list(struct socket *so, unsigned char list) +{ + if (!so || !so->so_pcb) { + HVSOCK_DBG(HVSOCK_DBG_VERBOSE, + "%s: socket or so_pcb is null\n", __func__); + return; + } + + mtx_lock(&hvs_trans_socks_mtx); + __hvs_remove_socket_from_list(so, list); + mtx_unlock(&hvs_trans_socks_mtx); +} + +static void +hvs_insert_socket_on_list(struct socket *so, unsigned char list) +{ + if (!so || !so->so_pcb) { + HVSOCK_DBG(HVSOCK_DBG_VERBOSE, + "%s: socket or so_pcb is null\n", __func__); + return; + } + + mtx_lock(&hvs_trans_socks_mtx); + __hvs_insert_socket_on_list(so, list); + mtx_unlock(&hvs_trans_socks_mtx); +} + +static struct socket * +__hvs_find_socket_on_list(struct sockaddr_hvs *addr, unsigned char list) +{ + struct hvs_pcb *p = NULL; + + if (list & HVS_LIST_BOUND) + LIST_FOREACH(p, &hvs_trans_bound_socks, bound_next) + if (p->so != NULL && + addr->hvs_port == p->local_addr.hvs_port) + return p->so; + + if (list & HVS_LIST_CONNECTED) + LIST_FOREACH(p, &hvs_trans_connected_socks, connected_next) + if (p->so != NULL && + addr->hvs_port == p->local_addr.hvs_port) + return p->so; + + return NULL; +} + +static struct socket * +hvs_find_socket_on_list(struct sockaddr_hvs *addr, unsigned char list) +{ + struct socket *s = NULL; + + mtx_lock(&hvs_trans_socks_mtx); + s = __hvs_find_socket_on_list(addr, list); + mtx_unlock(&hvs_trans_socks_mtx); + + return s; +} + +static inline void +hvs_addr_set(struct sockaddr_hvs *addr, unsigned int port) +{ + memset(addr, 0, sizeof(*addr)); + addr->sa_family = AF_HYPERV; + addr->hvs_port = port; +} + +void +hvs_addr_init(struct sockaddr_hvs *addr, const struct hyperv_guid *svr_id) +{ + hvs_addr_set(addr, get_port_by_srv_id(svr_id)); +} + +int +hvs_trans_lock(void) +{ + sx_xlock(&hvs_trans_socks_sx); + return (0); +} + +void +hvs_trans_unlock(void) +{ + sx_xunlock(&hvs_trans_socks_sx); +} + +void +hvs_trans_init(void) +{ + /* Skip initialization of globals for non-default instances. */ + if (!IS_DEFAULT_VNET(curvnet)) + return; + + if (vm_guest != VM_GUEST_HV) + return; + + HVSOCK_DBG(HVSOCK_DBG_VERBOSE, + "%s: HyperV Socket hvs_trans_init called\n", __func__); + + /* Initialize Globals */ + previous_auto_bound_port = MAX_PORT; + sx_init(&hvs_trans_socks_sx, "hvs_trans_sock_sx"); + mtx_init(&hvs_trans_socks_mtx, + "hvs_trans_socks_mtx", NULL, MTX_DEF); + LIST_INIT(&hvs_trans_bound_socks); + LIST_INIT(&hvs_trans_connected_socks); +} + +/* + * Called in two cases: + * 1) When user calls socket(); + * 2) When we accept new incoming conneciton and call sonewconn(). + */ +int +hvs_trans_attach(struct socket *so, int proto, struct thread *td) +{ + struct hvs_pcb *pcb = so2hvspcb(so); + + HVSOCK_DBG(HVSOCK_DBG_VERBOSE, + "%s: HyperV Socket hvs_trans_attach called\n", __func__); + + if (so->so_type != SOCK_STREAM) + return (ESOCKTNOSUPPORT); + + if (proto != 0 && proto != HYPERV_SOCK_PROTO_TRANS) + return (EPROTONOSUPPORT); + + if (pcb != NULL) + return (EISCONN); + pcb = malloc(sizeof(struct hvs_pcb), M_HVSOCK, M_NOWAIT | M_ZERO); + if (pcb == NULL) + return (ENOMEM); + + pcb->so = so; + so->so_pcb = (void *)pcb; + + return (0); +} + +void +hvs_trans_detach(struct socket *so) +{ + struct hvs_pcb *pcb; + + HVSOCK_DBG(HVSOCK_DBG_VERBOSE, + "%s: HyperV Socket hvs_trans_detach called\n", __func__); + + (void) hvs_trans_lock(); + pcb = so2hvspcb(so); + if (pcb == NULL) { + hvs_trans_unlock(); + return; + } + + if (SOLISTENING(so)) { + bzero(pcb, sizeof(*pcb)); + free(pcb, M_HVSOCK); + } + + so->so_pcb = NULL; + + hvs_trans_unlock(); +} + +int +hvs_trans_bind(struct socket *so, struct sockaddr *addr, struct thread *td) +{ + struct hvs_pcb *pcb = so2hvspcb(so); + struct sockaddr_hvs *sa = (struct sockaddr_hvs *) addr; + int error = 0; + + HVSOCK_DBG(HVSOCK_DBG_VERBOSE, + "%s: HyperV Socket hvs_trans_bind called\n", __func__); + + if (sa == NULL) { + return (EINVAL); + } + + if (pcb == NULL) { + return (EINVAL); + } + + if (sa->sa_family != AF_HYPERV) { + HVSOCK_DBG(HVSOCK_DBG_ERR, + "%s: Not supported, sa_family is %u\n", + __func__, sa->sa_family); + return (EAFNOSUPPORT); + } + + HVSOCK_DBG(HVSOCK_DBG_VERBOSE, + "%s: binding port = 0x%x\n", __func__, sa->hvs_port); + + mtx_lock(&hvs_trans_socks_mtx); + if (__hvs_find_socket_on_list(sa, + HVS_LIST_BOUND | HVS_LIST_CONNECTED)) { + error = EADDRINUSE; + } else { + /* + * The address is available for us to bind. + * Add socket to the bound list. + */ + hvs_addr_set(&pcb->local_addr, sa->hvs_port); + hvs_addr_set(&pcb->remote_addr, HVADDR_PORT_ANY); + __hvs_insert_socket_on_list(so, HVS_LIST_BOUND); + } + mtx_unlock(&hvs_trans_socks_mtx); + + return (error); +} + +int +hvs_trans_listen(struct socket *so, int backlog, struct thread *td) +{ + struct hvs_pcb *pcb = so2hvspcb(so); + struct socket *bound_so; + int error; + + HVSOCK_DBG(HVSOCK_DBG_VERBOSE, + "%s: HyperV Socket hvs_trans_listen called\n", __func__); + + if (pcb == NULL) + return (EINVAL); + + /* Check if the address is already bound and it was by us. */ + bound_so = hvs_find_socket_on_list(&pcb->local_addr, HVS_LIST_BOUND); + if (bound_so == NULL || bound_so != so) { + HVSOCK_DBG(HVSOCK_DBG_ERR, + "%s: Address not bound or not by us.\n", __func__); + return (EADDRNOTAVAIL); + } + + SOCK_LOCK(so); + error = solisten_proto_check(so); + if (error == 0) + solisten_proto(so, backlog); + SOCK_UNLOCK(so); + + HVSOCK_DBG(HVSOCK_DBG_VERBOSE, + "%s: HyperV Socket listen error = %d\n", __func__, error); + return (error); +} + +int +hvs_trans_accept(struct socket *so, struct sockaddr **nam) +{ + struct hvs_pcb *pcb = so2hvspcb(so); + + HVSOCK_DBG(HVSOCK_DBG_VERBOSE, + "%s: HyperV Socket hvs_trans_accept called\n", __func__); + + if (pcb == NULL) + return (EINVAL); + + *nam = sodupsockaddr((struct sockaddr *) &pcb->remote_addr, + M_NOWAIT); + + return ((*nam == NULL) ? ENOMEM : 0); +} + +int +hvs_trans_connect(struct socket *so, struct sockaddr *nam, struct thread *td) +{ + struct hvs_pcb *pcb = so2hvspcb(so); + struct sockaddr_hvs *raddr = (struct sockaddr_hvs *)nam; + bool found_auto_bound_port = false; + int i, error = 0; + + HVSOCK_DBG(HVSOCK_DBG_VERBOSE, + "%s: HyperV Socket hvs_trans_connect called, remote port is %x\n", + __func__, raddr->hvs_port); + + if (pcb == NULL) + return (EINVAL); + + /* Verify the remote address */ + if (raddr == NULL) + return (EINVAL); + if (raddr->sa_family != AF_HYPERV) + return (EAFNOSUPPORT); + + mtx_lock(&hvs_trans_socks_mtx); + if (so->so_state & + (SS_ISCONNECTED|SS_ISDISCONNECTING|SS_ISCONNECTING)) { + HVSOCK_DBG(HVSOCK_DBG_ERR, + "%s: socket connect in progress\n", + __func__); + error = EINPROGRESS; + goto out; + } + + /* + * Find an available port for us to auto bind the local + * address. + */ + hvs_addr_set(&pcb->local_addr, 0); + + for (i = previous_auto_bound_port - 1; + i != previous_auto_bound_port; i --) { + if (i == MIN_PORT) + i = MAX_PORT; + + pcb->local_addr.hvs_port = i; + + if (__hvs_find_socket_on_list(&pcb->local_addr, + HVS_LIST_BOUND | HVS_LIST_CONNECTED) == NULL) { + found_auto_bound_port = true; + previous_auto_bound_port = i; + HVSOCK_DBG(HVSOCK_DBG_VERBOSE, + "%s: found local bound port is %x\n", + __func__, pcb->local_addr.hvs_port); + break; + } + } + + if (found_auto_bound_port == true) { + /* Found available port for auto bound, put on list */ + __hvs_insert_socket_on_list(so, HVS_LIST_BOUND); + /* Set VM service ID */ + pcb->vm_srv_id = srv_id_template; + set_port_by_srv_id(&pcb->vm_srv_id, pcb->local_addr.hvs_port); + /* Set host service ID and remote port */ + pcb->host_srv_id = srv_id_template; + set_port_by_srv_id(&pcb->host_srv_id, raddr->hvs_port); + hvs_addr_set(&pcb->remote_addr, raddr->hvs_port); + + /* Change the socket state to SS_ISCONNECTING */ + soisconnecting(so); + } else { + HVSOCK_DBG(HVSOCK_DBG_ERR, + "%s: No local port available for auto bound\n", + __func__); + error = EADDRINUSE; + } + + HVSOCK_DBG(HVSOCK_DBG_INFO, "Connect vm_srv_id is "); + hvsock_print_guid(&pcb->vm_srv_id); + HVSOCK_DBG(HVSOCK_DBG_INFO, "Connect host_srv_id is "); + hvsock_print_guid(&pcb->host_srv_id); + +out: + mtx_unlock(&hvs_trans_socks_mtx); + + if (found_auto_bound_port == true) + vmbus_req_tl_connect(&pcb->vm_srv_id, &pcb->host_srv_id); + + return (error); +} + +int +hvs_trans_disconnect(struct socket *so) +{ + struct hvs_pcb *pcb; + + HVSOCK_DBG(HVSOCK_DBG_VERBOSE, + "%s: HyperV Socket hvs_trans_disconnect called\n", __func__); + + (void) hvs_trans_lock(); + pcb = so2hvspcb(so); + if (pcb == NULL) { + hvs_trans_unlock(); + return (EINVAL); + } + + /* If socket is already disconnected, skip this */ + if ((so->so_state & SS_ISDISCONNECTED) == 0) + soisdisconnecting(so); + + hvs_trans_unlock(); + + return (0); +} + +#define SBLOCKWAIT(f) (((f) & MSG_DONTWAIT) ? 0 : SBL_WAIT) +struct hvs_callback_arg { + struct uio *uio; + struct sockbuf *sb; +}; + +int +hvs_trans_soreceive(struct socket *so, struct sockaddr **paddr, + struct uio *uio, struct mbuf **mp0, struct mbuf **controlp, int *flagsp) +{ + struct hvs_pcb *pcb = so2hvspcb(so); + struct sockbuf *sb; + ssize_t orig_resid; + uint32_t canread, to_read; + int flags, error = 0; + struct hvs_callback_arg cbarg; + + HVSOCK_DBG(HVSOCK_DBG_VERBOSE, + "%s: HyperV Socket hvs_trans_soreceive called\n", __func__); + + if (so->so_type != SOCK_STREAM) + return (EINVAL); + if (pcb == NULL) + return (EINVAL); + + if (flagsp != NULL) + flags = *flagsp &~ MSG_EOR; + else + flags = 0; + + if (flags & MSG_PEEK) + return (EOPNOTSUPP); + + /* If no space to copy out anything */ + if (uio->uio_resid == 0 || uio->uio_rw != UIO_READ) + return (EINVAL); + + sb = &so->so_rcv; + + orig_resid = uio->uio_resid; + + /* Prevent other readers from entering the socket. */ + error = sblock(sb, SBLOCKWAIT(flags)); + if (error) { + HVSOCK_DBG(HVSOCK_DBG_ERR, + "%s: sblock returned error = %d\n", __func__, error); + return (error); + } + + SOCKBUF_LOCK(sb); + + cbarg.uio = uio; + cbarg.sb = sb; + /* + * If the socket is closing, there might still be some data + * in rx br to read. However we need to make sure + * the channel is still open. + */ + if ((sb->sb_state & SBS_CANTRCVMORE) && + (so->so_state & SS_ISDISCONNECTED)) { + /* Other thread already closed the channel */ + error = EPIPE; + goto out; + } + + while (true) { + while (uio->uio_resid > 0 && + (canread = hvsock_canread_check(pcb)) > 0) { + to_read = MIN(canread, uio->uio_resid); + HVSOCK_DBG(HVSOCK_DBG_VERBOSE, + "%s: to_read = %u, skip = %u\n", __func__, to_read, + (unsigned int)(sizeof(struct hvs_pkt_header) + + pcb->recv_data_off)); + + error = vmbus_chan_recv_peek_call(pcb->chan, to_read, + sizeof(struct hvs_pkt_header) + pcb->recv_data_off, + hvsock_br_callback, (void *)&cbarg); + /* + * It is possible socket is disconnected becasue + * we released lock in hvsock_br_callback. So we + * need to check the state to make sure it is not + * disconnected. + */ + if (error || so->so_state & SS_ISDISCONNECTED) { + break; + } + + pcb->recv_data_len -= to_read; + pcb->recv_data_off += to_read; + } + + if (error) + break; + + /* Abort if socket has reported problems. */ + if (so->so_error) { + if (so->so_error == ESHUTDOWN && + orig_resid > uio->uio_resid) { + /* + * Although we got a FIN, we also received + * some data in this round. Delivery it + * to user. + */ + error = 0; + } else { + if (so->so_error != ESHUTDOWN) + error = so->so_error; + } + + break; + } + + /* Cannot received more. */ + if (sb->sb_state & SBS_CANTRCVMORE) + break; + + /* We are done if buffer has been filled */ + if (uio->uio_resid == 0) + break; + + if (!(flags & MSG_WAITALL) && orig_resid > uio->uio_resid) + break; + + /* Buffer ring is empty and we shall not block */ + if ((so->so_state & SS_NBIO) || + (flags & (MSG_DONTWAIT|MSG_NBIO))) { + if (orig_resid == uio->uio_resid) { + /* We have not read anything */ + error = EAGAIN; + } + HVSOCK_DBG(HVSOCK_DBG_VERBOSE, + "%s: non blocked read return, error %d.\n", + __func__, error); + break; + } + + /* + * Wait and block until (more) data comes in. + * Note: Drops the sockbuf lock during wait. + */ + error = sbwait(sb); + + if (error) + break; + + HVSOCK_DBG(HVSOCK_DBG_VERBOSE, + "%s: wake up from sbwait, read available is %u\n", + __func__, vmbus_chan_read_available(pcb->chan)); + } + +out: + SOCKBUF_UNLOCK(sb); + + sbunlock(sb); + + /* We recieved a FIN in this call */ + if (so->so_error == ESHUTDOWN) { + if (so->so_snd.sb_state & SBS_CANTSENDMORE) { + /* Send has already closed */ + soisdisconnecting(so); + } else { + /* Just close the receive side */ + socantrcvmore(so); + } + } + + HVSOCK_DBG(HVSOCK_DBG_VERBOSE, + "%s: returning error = %d, so_error = %d\n", + __func__, error, so->so_error); + + return (error); +} + +int +hvs_trans_sosend(struct socket *so, struct sockaddr *addr, struct uio *uio, + struct mbuf *top, struct mbuf *controlp, int flags, struct thread *td) +{ + struct hvs_pcb *pcb = so2hvspcb(so); + struct sockbuf *sb; + ssize_t orig_resid; + uint32_t canwrite, to_write; + int error = 0; + + HVSOCK_DBG(HVSOCK_DBG_VERBOSE, + "%s: HyperV Socket hvs_trans_sosend called, uio_resid = %lu\n", + __func__, uio->uio_resid); + + if (so->so_type != SOCK_STREAM) + return (EINVAL); + if (pcb == NULL) + return (EINVAL); + + /* If nothing to send */ + if (uio->uio_resid == 0 || uio->uio_rw != UIO_WRITE) + return (EINVAL); + + sb = &so->so_snd; + + orig_resid = uio->uio_resid; + + /* Prevent other writers from entering the socket. */ + error = sblock(sb, SBLOCKWAIT(flags)); + if (error) { + HVSOCK_DBG(HVSOCK_DBG_ERR, + "%s: sblock returned error = %d\n", __func__, error); + return (error); + } + + SOCKBUF_LOCK(sb); + + if ((sb->sb_state & SBS_CANTSENDMORE) || + so->so_error == ESHUTDOWN) { + error = EPIPE; + goto out; + } + + while (uio->uio_resid > 0) { + canwrite = hvsock_canwrite_check(pcb); + if (canwrite == 0) { + /* We have sent some data */ + if (orig_resid > uio->uio_resid) + break; + /* + * We have not sent any data and it is + * non-blocked io + */ + if (so->so_state & SS_NBIO || + (flags & (MSG_NBIO | MSG_DONTWAIT)) != 0) { + error = EWOULDBLOCK; + break; + } else { + /* + * We are here because there is no space on + * send buffer ring. Signal the other side + * to read and free more space. + * Sleep wait until space avaiable to send + * Note: Drops the sockbuf lock during wait. + */ + error = sbwait(sb); + + if (error) + break; + + HVSOCK_DBG(HVSOCK_DBG_VERBOSE, + "%s: wake up from sbwait, space avail on " + "tx ring is %u\n", + __func__, + vmbus_chan_write_available(pcb->chan)); + + continue; + } + } + to_write = MIN(canwrite, uio->uio_resid); + to_write = MIN(to_write, HVSOCK_SEND_BUF_SZ); + + HVSOCK_DBG(HVSOCK_DBG_VERBOSE, + "%s: canwrite is %u, to_write = %u\n", __func__, + canwrite, to_write); + error = hvsock_send_data(pcb->chan, uio, to_write, sb); + + if (error) + break; + } + +out: + SOCKBUF_UNLOCK(sb); + sbunlock(sb); + + return (error); +} + +int +hvs_trans_peeraddr(struct socket *so, struct sockaddr **nam) +{ + struct hvs_pcb *pcb = so2hvspcb(so); + + HVSOCK_DBG(HVSOCK_DBG_VERBOSE, + "%s: HyperV Socket hvs_trans_peeraddr called\n", __func__); + + if (pcb == NULL) + return (EINVAL); + + *nam = sodupsockaddr((struct sockaddr *) &pcb->remote_addr, M_NOWAIT); + + return ((*nam == NULL)? ENOMEM : 0); +} + +int +hvs_trans_sockaddr(struct socket *so, struct sockaddr **nam) +{ + struct hvs_pcb *pcb = so2hvspcb(so); + + HVSOCK_DBG(HVSOCK_DBG_VERBOSE, + "%s: HyperV Socket hvs_trans_sockaddr called\n", __func__); + + if (pcb == NULL) + return (EINVAL); + + *nam = sodupsockaddr((struct sockaddr *) &pcb->local_addr, M_NOWAIT); + + return ((*nam == NULL)? ENOMEM : 0); +} + +void +hvs_trans_close(struct socket *so) +{ + struct hvs_pcb *pcb; + + HVSOCK_DBG(HVSOCK_DBG_VERBOSE, + "%s: HyperV Socket hvs_trans_close called\n", __func__); + + (void) hvs_trans_lock(); + pcb = so2hvspcb(so); + if (!pcb) { + hvs_trans_unlock(); + return; + } + + if (so->so_state & SS_ISCONNECTED) { + /* Send a FIN to peer */ + HVSOCK_DBG(HVSOCK_DBG_VERBOSE, + "%s: hvs_trans_close sending a FIN to host\n", __func__); + (void) hvsock_send_data(pcb->chan, NULL, 0, NULL); + } + + if (so->so_state & + (SS_ISCONNECTED|SS_ISCONNECTING|SS_ISDISCONNECTING)) + soisdisconnected(so); + + pcb->chan = NULL; + pcb->so = NULL; + + if (SOLISTENING(so)) { + mtx_lock(&hvs_trans_socks_mtx); + /* Remove from bound list */ + __hvs_remove_socket_from_list(so, HVS_LIST_BOUND); + mtx_unlock(&hvs_trans_socks_mtx); + } + + hvs_trans_unlock(); + + return; +} + +void +hvs_trans_abort(struct socket *so) +{ + struct hvs_pcb *pcb = so2hvspcb(so); + + HVSOCK_DBG(HVSOCK_DBG_VERBOSE, + "%s: HyperV Socket hvs_trans_abort called\n", __func__); + + (void) hvs_trans_lock(); + if (pcb == NULL) { + hvs_trans_unlock(); + return; + } + + if (SOLISTENING(so)) { + mtx_lock(&hvs_trans_socks_mtx); + /* Remove from bound list */ + __hvs_remove_socket_from_list(so, HVS_LIST_BOUND); + mtx_unlock(&hvs_trans_socks_mtx); + } + + if (so->so_state & SS_ISCONNECTED) { + (void) sodisconnect(so); + } + hvs_trans_unlock(); + + return; +} + +int +hvs_trans_shutdown(struct socket *so) +{ + struct hvs_pcb *pcb = so2hvspcb(so); *** DIFF OUTPUT TRUNCATED AT 1000 LINES *** _______________________________________________ svn-src-all@freebsd.org mailing list https://lists.freebsd.org/mailman/listinfo/svn-src-all To unsubscribe, send any mail to "svn-src-all-unsubscr...@freebsd.org"