Author: whu
Date: Wed May 20 11:03:59 2020
New Revision: 361275
URL: https://svnweb.freebsd.org/changeset/base/361275

Log:
  HyperV socket implementation for FreeBSD
  
  This change adds Hyper-V socket feature in FreeBSD. New socket address
  family AF_HYPERV and its kernel support are added.
  
  Submitted by: Wei Hu <w...@microsoft.com>
  Reviewed by:  Dexuan Cui <de...@microsoft.com>
  Relnotes:     yes
  Sponsored by: Microsoft
  Differential Revision:        https://reviews.freebsd.org/D24061

Added:
  head/sys/dev/hyperv/hvsock/
  head/sys/dev/hyperv/hvsock/hv_sock.c   (contents, props changed)
  head/sys/dev/hyperv/hvsock/hv_sock.h   (contents, props changed)
  head/sys/modules/hyperv/hvsock/
  head/sys/modules/hyperv/hvsock/Makefile   (contents, props changed)
Modified:
  head/sys/conf/files.x86
  head/sys/dev/hyperv/include/vmbus.h
  head/sys/dev/hyperv/vmbus/vmbus.c
  head/sys/dev/hyperv/vmbus/vmbus_br.c
  head/sys/dev/hyperv/vmbus/vmbus_brvar.h
  head/sys/dev/hyperv/vmbus/vmbus_chan.c
  head/sys/dev/hyperv/vmbus/vmbus_chanvar.h
  head/sys/dev/hyperv/vmbus/vmbus_reg.h
  head/sys/modules/hyperv/Makefile
  head/sys/sys/socket.h

Modified: head/sys/conf/files.x86
==============================================================================
--- head/sys/conf/files.x86     Wed May 20 11:01:10 2020        (r361274)
+++ head/sys/conf/files.x86     Wed May 20 11:03:59 2020        (r361275)
@@ -133,6 +133,7 @@ dev/hwpmc/hwpmc_core.c              optional        hwpmc
 dev/hwpmc/hwpmc_uncore.c       optional        hwpmc
 dev/hwpmc/hwpmc_tsc.c          optional        hwpmc
 dev/hwpmc/hwpmc_x86.c          optional        hwpmc
+dev/hyperv/hvsock/hv_sock.c                            optional        hyperv
 dev/hyperv/input/hv_kbd.c                              optional        hyperv
 dev/hyperv/input/hv_kbdc.c                             optional        hyperv
 dev/hyperv/pcib/vmbus_pcib.c                           optional        hyperv 
pci

Added: head/sys/dev/hyperv/hvsock/hv_sock.c
==============================================================================
--- /dev/null   00:00:00 1970   (empty, because file is newly added)
+++ head/sys/dev/hyperv/hvsock/hv_sock.c        Wed May 20 11:03:59 2020        
(r361275)
@@ -0,0 +1,1748 @@
+/*-
+ * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
+ *
+ * Copyright (c) 2020 Microsoft Corp.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice unmodified, this list of conditions, and the following
+ *    disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <sys/param.h>
+#include <sys/bus.h>
+#include <sys/domain.h>
+#include <sys/lock.h>
+#include <sys/kernel.h>
+#include <sys/types.h>
+#include <sys/malloc.h>
+#include <sys/module.h>
+#include <sys/mutex.h>
+#include <sys/proc.h>
+#include <sys/protosw.h>
+#include <sys/socket.h>
+#include <sys/sysctl.h>
+#include <sys/sysproto.h>
+#include <sys/systm.h>
+#include <sys/sockbuf.h>
+#include <sys/sx.h>
+#include <sys/uio.h>
+
+#include <net/vnet.h>
+
+#include <dev/hyperv/vmbus/vmbus_reg.h>
+
+#include "hv_sock.h"
+
+#define HVSOCK_DBG_NONE                        0x0
+#define HVSOCK_DBG_INFO                        0x1
+#define HVSOCK_DBG_ERR                 0x2
+#define HVSOCK_DBG_VERBOSE             0x3
+
+
+SYSCTL_NODE(_net, OID_AUTO, hvsock, CTLFLAG_RD, 0, "HyperV socket");
+
+static int hvs_dbg_level;
+SYSCTL_INT(_net_hvsock, OID_AUTO, hvs_dbg_level, CTLFLAG_RWTUN, &hvs_dbg_level,
+    0, "hyperv socket debug level: 0 = none, 1 = info, 2 = error, 3 = 
verbose");
+
+
+#define HVSOCK_DBG(level, ...) do {                                    \
+       if (hvs_dbg_level >= (level))                                   \
+               printf(__VA_ARGS__);                                    \
+       } while (0)
+
+MALLOC_DEFINE(M_HVSOCK, "hyperv_socket", "hyperv socket control structures");
+
+/* The MTU is 16KB per host side's design */
+#define HVSOCK_MTU_SIZE                (1024 * 16)
+#define HVSOCK_SEND_BUF_SZ     (PAGE_SIZE - sizeof(struct vmpipe_proto_header))
+
+#define HVSOCK_HEADER_LEN      (sizeof(struct hvs_pkt_header))
+
+#define HVSOCK_PKT_LEN(payload_len)    (HVSOCK_HEADER_LEN + \
+                                        roundup2(payload_len, 8) + \
+                                        sizeof(uint64_t))
+
+
+static struct domain           hv_socket_domain;
+
+/*
+ * HyperV Transport sockets
+ */
+static struct pr_usrreqs       hvs_trans_usrreqs = {
+       .pru_attach =           hvs_trans_attach,
+       .pru_bind =             hvs_trans_bind,
+       .pru_listen =           hvs_trans_listen,
+       .pru_accept =           hvs_trans_accept,
+       .pru_connect =          hvs_trans_connect,
+       .pru_peeraddr =         hvs_trans_peeraddr,
+       .pru_sockaddr =         hvs_trans_sockaddr,
+       .pru_soreceive =        hvs_trans_soreceive,
+       .pru_sosend =           hvs_trans_sosend,
+       .pru_disconnect =       hvs_trans_disconnect,
+       .pru_close =            hvs_trans_close,
+       .pru_detach =           hvs_trans_detach,
+       .pru_shutdown =         hvs_trans_shutdown,
+       .pru_abort =            hvs_trans_abort,
+};
+
+/*
+ * Definitions of protocols supported in HyperV socket domain
+ */
+static struct protosw          hv_socket_protosw[] = {
+{
+       .pr_type =              SOCK_STREAM,
+       .pr_domain =            &hv_socket_domain,
+       .pr_protocol =          HYPERV_SOCK_PROTO_TRANS,
+       .pr_flags =             PR_CONNREQUIRED,
+       .pr_init =              hvs_trans_init,
+       .pr_usrreqs =           &hvs_trans_usrreqs,
+},
+};
+
+static struct domain           hv_socket_domain = {
+       .dom_family =           AF_HYPERV,
+       .dom_name =             "hyperv",
+       .dom_protosw =          hv_socket_protosw,
+       .dom_protoswNPROTOSW =  &hv_socket_protosw[nitems(hv_socket_protosw)]
+};
+
+VNET_DOMAIN_SET(hv_socket_);
+
+#define MAX_PORT                       ((uint32_t)0xFFFFFFFF)
+#define MIN_PORT                       ((uint32_t)0x0)
+
+/* 00000000-facb-11e6-bd58-64006a7986d3 */
+static const struct hyperv_guid srv_id_template = {
+       .hv_guid = {
+           0x00, 0x00, 0x00, 0x00, 0xcb, 0xfa, 0xe6, 0x11,
+           0xbd, 0x58, 0x64, 0x00, 0x6a, 0x79, 0x86, 0xd3 }
+};
+
+static int             hvsock_br_callback(void *, int, void *);
+static uint32_t                hvsock_canread_check(struct hvs_pcb *);
+static uint32_t                hvsock_canwrite_check(struct hvs_pcb *);
+static int             hvsock_send_data(struct vmbus_channel *chan,
+    struct uio *uio, uint32_t to_write, struct sockbuf *sb);
+
+
+
+/* Globals */
+static struct sx               hvs_trans_socks_sx;
+static struct mtx              hvs_trans_socks_mtx;
+static LIST_HEAD(, hvs_pcb)    hvs_trans_bound_socks;
+static LIST_HEAD(, hvs_pcb)    hvs_trans_connected_socks;
+static uint32_t                        previous_auto_bound_port;
+
+static void
+hvsock_print_guid(struct hyperv_guid *guid)
+{
+       unsigned char *p = (unsigned char *)guid;
+
+       HVSOCK_DBG(HVSOCK_DBG_INFO,
+           "0x%x-0x%x-0x%x-0x%x-0x%x-0x%x-0x%x-0x%x-0x%x-0x%x-0x%x\n",
+           *(unsigned int *)p,
+           *((unsigned short *) &p[4]),
+           *((unsigned short *) &p[6]),
+           p[8], p[9], p[10], p[11], p[12], p[13], p[14], p[15]);
+}
+
+static bool
+is_valid_srv_id(const struct hyperv_guid *id)
+{
+       return !memcmp(&id->hv_guid[4],
+           &srv_id_template.hv_guid[4], sizeof(struct hyperv_guid) - 4);
+}
+
+static unsigned int
+get_port_by_srv_id(const struct hyperv_guid *srv_id)
+{
+       return *((const unsigned int *)srv_id);
+}
+
+static void
+set_port_by_srv_id(struct hyperv_guid *srv_id, unsigned int port)
+{
+       *((unsigned int *)srv_id) = port;
+}
+
+
+static void
+__hvs_remove_pcb_from_list(struct hvs_pcb *pcb, unsigned char list)
+{
+       struct hvs_pcb *p = NULL;
+
+       HVSOCK_DBG(HVSOCK_DBG_VERBOSE, "%s: pcb is %p\n", __func__, pcb);
+
+       if (!pcb)
+               return;
+
+       if (list & HVS_LIST_BOUND) {
+               LIST_FOREACH(p, &hvs_trans_bound_socks, bound_next)
+                       if  (p == pcb)
+                               LIST_REMOVE(p, bound_next);
+       }
+
+       if (list & HVS_LIST_CONNECTED) {
+               LIST_FOREACH(p, &hvs_trans_connected_socks, connected_next)
+                       if (p == pcb)
+                               LIST_REMOVE(pcb, connected_next);
+       }
+}
+
+static void
+__hvs_remove_socket_from_list(struct socket *so, unsigned char list)
+{
+       struct hvs_pcb *pcb = so2hvspcb(so);
+
+       HVSOCK_DBG(HVSOCK_DBG_VERBOSE, "%s: pcb is %p\n", __func__, pcb);
+
+       __hvs_remove_pcb_from_list(pcb, list);
+}
+
+static void
+__hvs_insert_socket_on_list(struct socket *so, unsigned char list)
+{
+       struct hvs_pcb *pcb = so2hvspcb(so);
+
+       if (list & HVS_LIST_BOUND)
+               LIST_INSERT_HEAD(&hvs_trans_bound_socks,
+                  pcb, bound_next);
+
+       if (list & HVS_LIST_CONNECTED)
+               LIST_INSERT_HEAD(&hvs_trans_connected_socks,
+                  pcb, connected_next);
+}
+
+void
+hvs_remove_socket_from_list(struct socket *so, unsigned char list)
+{
+       if (!so || !so->so_pcb) {
+               HVSOCK_DBG(HVSOCK_DBG_VERBOSE,
+                   "%s: socket or so_pcb is null\n", __func__);
+               return;
+       }
+
+       mtx_lock(&hvs_trans_socks_mtx);
+       __hvs_remove_socket_from_list(so, list);
+       mtx_unlock(&hvs_trans_socks_mtx);
+}
+
+static void
+hvs_insert_socket_on_list(struct socket *so, unsigned char list)
+{
+       if (!so || !so->so_pcb) {
+               HVSOCK_DBG(HVSOCK_DBG_VERBOSE,
+                   "%s: socket or so_pcb is null\n", __func__);
+               return;
+       }
+
+       mtx_lock(&hvs_trans_socks_mtx);
+       __hvs_insert_socket_on_list(so, list);
+       mtx_unlock(&hvs_trans_socks_mtx);
+}
+
+static struct socket *
+__hvs_find_socket_on_list(struct sockaddr_hvs *addr, unsigned char list)
+{
+       struct hvs_pcb *p = NULL;
+
+       if (list & HVS_LIST_BOUND)
+               LIST_FOREACH(p, &hvs_trans_bound_socks, bound_next)
+                       if (p->so != NULL &&
+                           addr->hvs_port == p->local_addr.hvs_port)
+                               return p->so;
+
+       if (list & HVS_LIST_CONNECTED)
+               LIST_FOREACH(p, &hvs_trans_connected_socks, connected_next)
+                       if (p->so != NULL &&
+                           addr->hvs_port == p->local_addr.hvs_port)
+                               return p->so;
+
+       return NULL;
+}
+
+static struct socket *
+hvs_find_socket_on_list(struct sockaddr_hvs *addr, unsigned char list)
+{
+       struct socket *s = NULL;
+
+       mtx_lock(&hvs_trans_socks_mtx);
+       s = __hvs_find_socket_on_list(addr, list);
+       mtx_unlock(&hvs_trans_socks_mtx);
+
+       return s;
+}
+
+static inline void
+hvs_addr_set(struct sockaddr_hvs *addr, unsigned int port)
+{
+       memset(addr, 0, sizeof(*addr));
+       addr->sa_family = AF_HYPERV;
+       addr->hvs_port = port;
+}
+
+void
+hvs_addr_init(struct sockaddr_hvs *addr, const struct hyperv_guid *svr_id)
+{
+       hvs_addr_set(addr, get_port_by_srv_id(svr_id));
+}
+
+int
+hvs_trans_lock(void)
+{
+       sx_xlock(&hvs_trans_socks_sx);
+       return (0);
+}
+
+void
+hvs_trans_unlock(void)
+{
+       sx_xunlock(&hvs_trans_socks_sx);
+}
+
+void
+hvs_trans_init(void)
+{
+       /* Skip initialization of globals for non-default instances. */
+       if (!IS_DEFAULT_VNET(curvnet))
+               return;
+
+       if (vm_guest != VM_GUEST_HV)
+               return;
+
+       HVSOCK_DBG(HVSOCK_DBG_VERBOSE,
+           "%s: HyperV Socket hvs_trans_init called\n", __func__);
+
+       /* Initialize Globals */
+       previous_auto_bound_port = MAX_PORT;
+       sx_init(&hvs_trans_socks_sx, "hvs_trans_sock_sx");
+       mtx_init(&hvs_trans_socks_mtx,
+           "hvs_trans_socks_mtx", NULL, MTX_DEF);
+       LIST_INIT(&hvs_trans_bound_socks);
+       LIST_INIT(&hvs_trans_connected_socks);
+}
+
+/*
+ * Called in two cases:
+ * 1) When user calls socket();
+ * 2) When we accept new incoming conneciton and call sonewconn().
+ */
+int
+hvs_trans_attach(struct socket *so, int proto, struct thread *td)
+{
+       struct hvs_pcb *pcb = so2hvspcb(so);
+
+       HVSOCK_DBG(HVSOCK_DBG_VERBOSE,
+           "%s: HyperV Socket hvs_trans_attach called\n", __func__);
+
+       if (so->so_type != SOCK_STREAM)
+               return (ESOCKTNOSUPPORT);
+
+       if (proto != 0 && proto != HYPERV_SOCK_PROTO_TRANS)
+               return (EPROTONOSUPPORT);
+
+       if (pcb != NULL)
+               return (EISCONN);
+       pcb = malloc(sizeof(struct hvs_pcb), M_HVSOCK, M_NOWAIT | M_ZERO);
+       if (pcb == NULL)
+               return (ENOMEM);
+
+       pcb->so = so;
+       so->so_pcb = (void *)pcb;
+
+       return (0);
+}
+
+void
+hvs_trans_detach(struct socket *so)
+{
+       struct hvs_pcb *pcb;
+
+       HVSOCK_DBG(HVSOCK_DBG_VERBOSE,
+           "%s: HyperV Socket hvs_trans_detach called\n", __func__);
+
+       (void) hvs_trans_lock();
+       pcb = so2hvspcb(so);
+       if (pcb == NULL) {
+               hvs_trans_unlock();
+               return;
+       }
+
+       if (SOLISTENING(so)) {
+               bzero(pcb, sizeof(*pcb));
+               free(pcb, M_HVSOCK);
+       }
+
+       so->so_pcb = NULL;
+
+       hvs_trans_unlock();
+}
+
+int
+hvs_trans_bind(struct socket *so, struct sockaddr *addr, struct thread *td)
+{
+       struct hvs_pcb *pcb = so2hvspcb(so);
+       struct sockaddr_hvs *sa = (struct sockaddr_hvs *) addr;
+       int error = 0;
+
+       HVSOCK_DBG(HVSOCK_DBG_VERBOSE,
+           "%s: HyperV Socket hvs_trans_bind called\n", __func__);
+
+       if (sa == NULL) {
+               return (EINVAL);
+       }
+
+       if (pcb == NULL) {
+               return (EINVAL);
+       }
+
+       if (sa->sa_family != AF_HYPERV) {
+               HVSOCK_DBG(HVSOCK_DBG_ERR,
+                   "%s: Not supported, sa_family is %u\n",
+                   __func__, sa->sa_family);
+               return (EAFNOSUPPORT);
+       }
+
+       HVSOCK_DBG(HVSOCK_DBG_VERBOSE,
+           "%s: binding port = 0x%x\n", __func__, sa->hvs_port);
+
+       mtx_lock(&hvs_trans_socks_mtx);
+       if (__hvs_find_socket_on_list(sa,
+           HVS_LIST_BOUND | HVS_LIST_CONNECTED)) {
+               error = EADDRINUSE;
+       } else {
+               /*
+                * The address is available for us to bind.
+                * Add socket to the bound list.
+                */
+               hvs_addr_set(&pcb->local_addr, sa->hvs_port);
+               hvs_addr_set(&pcb->remote_addr, HVADDR_PORT_ANY);
+               __hvs_insert_socket_on_list(so, HVS_LIST_BOUND);
+       }
+       mtx_unlock(&hvs_trans_socks_mtx);
+
+       return (error);
+}
+
+int
+hvs_trans_listen(struct socket *so, int backlog, struct thread *td)
+{
+       struct hvs_pcb *pcb = so2hvspcb(so);
+       struct socket *bound_so;
+       int error;
+
+       HVSOCK_DBG(HVSOCK_DBG_VERBOSE,
+           "%s: HyperV Socket hvs_trans_listen called\n", __func__);
+
+       if (pcb == NULL)
+               return (EINVAL);
+
+       /* Check if the address is already bound and it was by us. */
+       bound_so = hvs_find_socket_on_list(&pcb->local_addr, HVS_LIST_BOUND);
+       if (bound_so == NULL || bound_so != so) {
+               HVSOCK_DBG(HVSOCK_DBG_ERR,
+                   "%s: Address not bound or not by us.\n", __func__);
+               return (EADDRNOTAVAIL);
+       }
+
+       SOCK_LOCK(so);
+       error = solisten_proto_check(so);
+       if (error == 0)
+               solisten_proto(so, backlog);
+       SOCK_UNLOCK(so);
+
+       HVSOCK_DBG(HVSOCK_DBG_VERBOSE,
+           "%s: HyperV Socket listen error = %d\n", __func__, error);
+       return (error);
+}
+
+int
+hvs_trans_accept(struct socket *so, struct sockaddr **nam)
+{
+       struct hvs_pcb *pcb = so2hvspcb(so);
+
+       HVSOCK_DBG(HVSOCK_DBG_VERBOSE,
+           "%s: HyperV Socket hvs_trans_accept called\n", __func__);
+
+       if (pcb == NULL)
+               return (EINVAL);
+
+       *nam = sodupsockaddr((struct sockaddr *) &pcb->remote_addr,
+           M_NOWAIT);
+
+       return ((*nam == NULL) ? ENOMEM : 0);
+}
+
+int
+hvs_trans_connect(struct socket *so, struct sockaddr *nam, struct thread *td)
+{
+       struct hvs_pcb *pcb = so2hvspcb(so);
+       struct sockaddr_hvs *raddr = (struct sockaddr_hvs *)nam;
+       bool found_auto_bound_port = false;
+       int i, error = 0;
+
+       HVSOCK_DBG(HVSOCK_DBG_VERBOSE,
+           "%s: HyperV Socket hvs_trans_connect called, remote port is %x\n",
+           __func__, raddr->hvs_port);
+
+       if (pcb == NULL)
+               return (EINVAL);
+
+       /* Verify the remote address */
+       if (raddr == NULL)
+               return (EINVAL);
+       if (raddr->sa_family != AF_HYPERV)
+               return (EAFNOSUPPORT);
+
+       mtx_lock(&hvs_trans_socks_mtx);
+       if (so->so_state &
+           (SS_ISCONNECTED|SS_ISDISCONNECTING|SS_ISCONNECTING)) {
+                       HVSOCK_DBG(HVSOCK_DBG_ERR,
+                           "%s: socket connect in progress\n",
+                           __func__);
+                       error = EINPROGRESS;
+                       goto out;
+       }
+
+       /*
+        * Find an available port for us to auto bind the local
+        * address.
+        */
+       hvs_addr_set(&pcb->local_addr, 0);
+
+       for (i = previous_auto_bound_port - 1;
+           i != previous_auto_bound_port; i --) {
+               if (i == MIN_PORT)
+                       i = MAX_PORT;
+
+               pcb->local_addr.hvs_port = i;
+
+               if (__hvs_find_socket_on_list(&pcb->local_addr,
+                   HVS_LIST_BOUND | HVS_LIST_CONNECTED) == NULL) {
+                       found_auto_bound_port = true;
+                       previous_auto_bound_port = i;
+                       HVSOCK_DBG(HVSOCK_DBG_VERBOSE,
+                           "%s: found local bound port is %x\n",
+                           __func__, pcb->local_addr.hvs_port);
+                       break;
+               }
+       }
+
+       if (found_auto_bound_port == true) {
+               /* Found available port for auto bound, put on list */
+               __hvs_insert_socket_on_list(so, HVS_LIST_BOUND);
+               /* Set VM service ID */
+               pcb->vm_srv_id = srv_id_template;
+               set_port_by_srv_id(&pcb->vm_srv_id, pcb->local_addr.hvs_port);
+               /* Set host service ID and remote port */
+               pcb->host_srv_id = srv_id_template;
+               set_port_by_srv_id(&pcb->host_srv_id, raddr->hvs_port);
+               hvs_addr_set(&pcb->remote_addr, raddr->hvs_port);
+
+               /* Change the socket state to SS_ISCONNECTING */
+               soisconnecting(so);
+       } else {
+               HVSOCK_DBG(HVSOCK_DBG_ERR,
+                   "%s: No local port available for auto bound\n",
+                   __func__);
+               error = EADDRINUSE;
+       }
+
+       HVSOCK_DBG(HVSOCK_DBG_INFO, "Connect vm_srv_id is ");
+       hvsock_print_guid(&pcb->vm_srv_id);
+       HVSOCK_DBG(HVSOCK_DBG_INFO, "Connect host_srv_id is ");
+       hvsock_print_guid(&pcb->host_srv_id);
+
+out:
+       mtx_unlock(&hvs_trans_socks_mtx);
+
+       if (found_auto_bound_port == true)
+                vmbus_req_tl_connect(&pcb->vm_srv_id, &pcb->host_srv_id);
+
+       return (error);
+}
+
+int
+hvs_trans_disconnect(struct socket *so)
+{
+       struct hvs_pcb *pcb;
+
+       HVSOCK_DBG(HVSOCK_DBG_VERBOSE,
+           "%s: HyperV Socket hvs_trans_disconnect called\n", __func__);
+
+       (void) hvs_trans_lock();
+       pcb = so2hvspcb(so);
+       if (pcb == NULL) {
+               hvs_trans_unlock();
+               return (EINVAL);
+       }
+
+       /* If socket is already disconnected, skip this */
+       if ((so->so_state & SS_ISDISCONNECTED) == 0)
+               soisdisconnecting(so);
+
+       hvs_trans_unlock();
+
+       return (0);
+}
+
+#define SBLOCKWAIT(f)  (((f) & MSG_DONTWAIT) ? 0 : SBL_WAIT)
+struct hvs_callback_arg {
+       struct uio *uio;
+       struct sockbuf *sb;
+};
+
+int
+hvs_trans_soreceive(struct socket *so, struct sockaddr **paddr,
+    struct uio *uio, struct mbuf **mp0, struct mbuf **controlp, int *flagsp)
+{
+       struct hvs_pcb *pcb = so2hvspcb(so);
+       struct sockbuf *sb;
+       ssize_t orig_resid;
+       uint32_t canread, to_read;
+       int flags, error = 0;
+       struct hvs_callback_arg cbarg;
+
+       HVSOCK_DBG(HVSOCK_DBG_VERBOSE,
+           "%s: HyperV Socket hvs_trans_soreceive called\n", __func__);
+
+       if (so->so_type != SOCK_STREAM)
+               return (EINVAL);
+       if (pcb == NULL)
+               return (EINVAL);
+
+       if (flagsp != NULL)
+               flags = *flagsp &~ MSG_EOR;
+       else
+               flags = 0;
+
+       if (flags & MSG_PEEK)
+               return (EOPNOTSUPP);
+
+       /* If no space to copy out anything */
+       if (uio->uio_resid == 0 || uio->uio_rw != UIO_READ)
+               return (EINVAL);
+
+       sb = &so->so_rcv;
+
+       orig_resid = uio->uio_resid;
+
+       /* Prevent other readers from entering the socket. */
+       error = sblock(sb, SBLOCKWAIT(flags));
+       if (error) {
+               HVSOCK_DBG(HVSOCK_DBG_ERR,
+                   "%s: sblock returned error = %d\n", __func__, error);
+               return (error);
+       }
+
+       SOCKBUF_LOCK(sb);
+
+       cbarg.uio = uio;
+       cbarg.sb = sb;
+       /*
+        * If the socket is closing, there might still be some data
+        * in rx br to read. However we need to make sure
+        * the channel is still open.
+        */
+       if ((sb->sb_state & SBS_CANTRCVMORE) &&
+           (so->so_state & SS_ISDISCONNECTED)) {
+               /* Other thread already closed the channel */
+               error = EPIPE;
+               goto out;
+       }
+
+       while (true) {
+               while (uio->uio_resid > 0 &&
+                   (canread = hvsock_canread_check(pcb)) > 0) {
+                       to_read = MIN(canread, uio->uio_resid);
+                       HVSOCK_DBG(HVSOCK_DBG_VERBOSE,
+                           "%s: to_read = %u, skip = %u\n", __func__, to_read,
+                           (unsigned int)(sizeof(struct hvs_pkt_header) +
+                           pcb->recv_data_off));
+
+                       error = vmbus_chan_recv_peek_call(pcb->chan, to_read,
+                           sizeof(struct hvs_pkt_header) + pcb->recv_data_off,
+                           hvsock_br_callback, (void *)&cbarg);
+                       /*
+                        * It is possible socket is disconnected becasue
+                        * we released lock in hvsock_br_callback. So we
+                        * need to check the state to make sure it is not
+                        * disconnected.
+                        */
+                       if (error || so->so_state & SS_ISDISCONNECTED) {
+                               break;
+                       }
+
+                       pcb->recv_data_len -= to_read;
+                       pcb->recv_data_off += to_read;
+               }
+
+               if (error)
+                       break;
+
+               /* Abort if socket has reported problems. */
+               if (so->so_error) {
+                       if (so->so_error == ESHUTDOWN &&
+                           orig_resid > uio->uio_resid) {
+                               /*
+                                * Although we got a FIN, we also received
+                                * some data in this round. Delivery it
+                                * to user.
+                                */
+                               error = 0;
+                       } else {
+                               if (so->so_error != ESHUTDOWN)
+                                       error = so->so_error;
+                       }
+
+                       break;
+               }
+
+               /* Cannot received more. */
+               if (sb->sb_state & SBS_CANTRCVMORE)
+                       break;
+
+               /* We are done if buffer has been filled */
+               if (uio->uio_resid == 0)
+                       break;
+
+               if (!(flags & MSG_WAITALL) && orig_resid > uio->uio_resid)
+                       break;
+
+               /* Buffer ring is empty and we shall not block */
+               if ((so->so_state & SS_NBIO) ||
+                   (flags & (MSG_DONTWAIT|MSG_NBIO))) {
+                       if (orig_resid == uio->uio_resid) {
+                               /* We have not read anything */
+                               error = EAGAIN;
+                       }
+                       HVSOCK_DBG(HVSOCK_DBG_VERBOSE,
+                           "%s: non blocked read return, error %d.\n",
+                           __func__, error);
+                       break;
+               }
+
+               /*
+                * Wait and block until (more) data comes in.
+                * Note: Drops the sockbuf lock during wait.
+                */
+               error = sbwait(sb);
+
+               if (error)
+                       break;
+
+               HVSOCK_DBG(HVSOCK_DBG_VERBOSE,
+                   "%s: wake up from sbwait, read available is %u\n",
+                   __func__, vmbus_chan_read_available(pcb->chan));
+       }
+
+out:
+       SOCKBUF_UNLOCK(sb);
+
+       sbunlock(sb);
+
+       /* We recieved a FIN in this call */
+       if (so->so_error == ESHUTDOWN) {
+               if (so->so_snd.sb_state & SBS_CANTSENDMORE) {
+                       /* Send has already closed */
+                       soisdisconnecting(so);
+               } else {
+                       /* Just close the receive side */
+                       socantrcvmore(so);
+               }
+       }
+
+       HVSOCK_DBG(HVSOCK_DBG_VERBOSE,
+           "%s: returning error = %d, so_error = %d\n",
+           __func__, error, so->so_error);
+
+       return (error);
+}
+
+int
+hvs_trans_sosend(struct socket *so, struct sockaddr *addr, struct uio *uio,
+    struct mbuf *top, struct mbuf *controlp, int flags, struct thread *td)
+{
+       struct hvs_pcb *pcb = so2hvspcb(so);
+       struct sockbuf *sb;
+       ssize_t orig_resid;
+       uint32_t canwrite, to_write;
+       int error = 0;
+
+       HVSOCK_DBG(HVSOCK_DBG_VERBOSE,
+           "%s: HyperV Socket hvs_trans_sosend called, uio_resid = %lu\n",
+           __func__, uio->uio_resid);
+
+       if (so->so_type != SOCK_STREAM)
+               return (EINVAL);
+       if (pcb == NULL)
+               return (EINVAL);
+
+       /* If nothing to send */
+       if (uio->uio_resid == 0 || uio->uio_rw != UIO_WRITE)
+               return (EINVAL);
+
+       sb = &so->so_snd;
+
+       orig_resid = uio->uio_resid;
+
+       /* Prevent other writers from entering the socket. */
+       error = sblock(sb, SBLOCKWAIT(flags));
+       if (error) {
+               HVSOCK_DBG(HVSOCK_DBG_ERR,
+                   "%s: sblock returned error = %d\n", __func__, error);
+               return (error);
+       }
+
+       SOCKBUF_LOCK(sb);
+
+       if ((sb->sb_state & SBS_CANTSENDMORE) ||
+           so->so_error == ESHUTDOWN) {
+               error = EPIPE;
+               goto out;
+       }
+
+       while (uio->uio_resid > 0) {
+               canwrite = hvsock_canwrite_check(pcb);
+               if (canwrite == 0) {
+                       /* We have sent some data */
+                       if (orig_resid > uio->uio_resid)
+                               break;
+                       /*
+                        * We have not sent any data and it is
+                        * non-blocked io
+                        */
+                       if (so->so_state & SS_NBIO ||
+                           (flags & (MSG_NBIO | MSG_DONTWAIT)) != 0) {
+                               error = EWOULDBLOCK;
+                               break;
+                       } else {
+                               /*
+                                * We are here because there is no space on
+                                * send buffer ring. Signal the other side
+                                * to read and free more space.
+                                * Sleep wait until space avaiable to send
+                                * Note: Drops the sockbuf lock during wait.
+                                */
+                               error = sbwait(sb);
+
+                               if (error)
+                                       break;
+
+                               HVSOCK_DBG(HVSOCK_DBG_VERBOSE,
+                                   "%s: wake up from sbwait, space avail on "
+                                   "tx ring is %u\n",
+                                   __func__,
+                                   vmbus_chan_write_available(pcb->chan));
+
+                               continue;
+                       }
+               }
+               to_write = MIN(canwrite, uio->uio_resid);
+               to_write = MIN(to_write, HVSOCK_SEND_BUF_SZ);
+
+               HVSOCK_DBG(HVSOCK_DBG_VERBOSE,
+                   "%s: canwrite is %u, to_write = %u\n", __func__,
+                   canwrite, to_write);
+               error = hvsock_send_data(pcb->chan, uio, to_write, sb);
+
+               if (error)
+                       break;
+       }
+
+out:
+       SOCKBUF_UNLOCK(sb);
+       sbunlock(sb);
+
+       return (error);
+}
+
+int
+hvs_trans_peeraddr(struct socket *so, struct sockaddr **nam)
+{
+       struct hvs_pcb *pcb = so2hvspcb(so);
+
+       HVSOCK_DBG(HVSOCK_DBG_VERBOSE,
+           "%s: HyperV Socket hvs_trans_peeraddr called\n", __func__);
+
+       if (pcb == NULL)
+               return (EINVAL);
+
+       *nam = sodupsockaddr((struct sockaddr *) &pcb->remote_addr, M_NOWAIT);
+
+       return ((*nam == NULL)? ENOMEM : 0);
+}
+
+int
+hvs_trans_sockaddr(struct socket *so, struct sockaddr **nam)
+{
+       struct hvs_pcb *pcb = so2hvspcb(so);
+
+       HVSOCK_DBG(HVSOCK_DBG_VERBOSE,
+           "%s: HyperV Socket hvs_trans_sockaddr called\n", __func__);
+
+       if (pcb == NULL)
+               return (EINVAL);
+
+       *nam = sodupsockaddr((struct sockaddr *) &pcb->local_addr, M_NOWAIT);
+
+       return ((*nam == NULL)? ENOMEM : 0);
+}
+
+void
+hvs_trans_close(struct socket *so)
+{
+       struct hvs_pcb *pcb;
+
+       HVSOCK_DBG(HVSOCK_DBG_VERBOSE,
+           "%s: HyperV Socket hvs_trans_close called\n", __func__);
+
+       (void) hvs_trans_lock();
+       pcb = so2hvspcb(so);
+       if (!pcb) {
+               hvs_trans_unlock();
+               return;
+       }
+
+       if (so->so_state & SS_ISCONNECTED) {
+               /* Send a FIN to peer */
+               HVSOCK_DBG(HVSOCK_DBG_VERBOSE,
+                   "%s: hvs_trans_close sending a FIN to host\n", __func__);
+               (void) hvsock_send_data(pcb->chan, NULL, 0, NULL);
+       }
+
+       if (so->so_state &
+           (SS_ISCONNECTED|SS_ISCONNECTING|SS_ISDISCONNECTING))
+               soisdisconnected(so);
+
+       pcb->chan = NULL;
+       pcb->so = NULL;
+
+       if (SOLISTENING(so)) {
+               mtx_lock(&hvs_trans_socks_mtx);
+               /* Remove from bound list */
+               __hvs_remove_socket_from_list(so, HVS_LIST_BOUND);
+               mtx_unlock(&hvs_trans_socks_mtx);
+       }
+
+       hvs_trans_unlock();
+
+       return;
+}
+
+void
+hvs_trans_abort(struct socket *so)
+{
+       struct hvs_pcb *pcb = so2hvspcb(so);
+
+       HVSOCK_DBG(HVSOCK_DBG_VERBOSE,
+           "%s: HyperV Socket hvs_trans_abort called\n", __func__);
+
+       (void) hvs_trans_lock();
+       if (pcb == NULL) {
+               hvs_trans_unlock();
+               return;
+       }
+
+       if (SOLISTENING(so)) {
+               mtx_lock(&hvs_trans_socks_mtx);
+               /* Remove from bound list */
+               __hvs_remove_socket_from_list(so, HVS_LIST_BOUND);
+               mtx_unlock(&hvs_trans_socks_mtx);
+       }
+
+       if (so->so_state & SS_ISCONNECTED) {
+               (void) sodisconnect(so);
+       }
+       hvs_trans_unlock();
+
+       return;
+}
+
+int
+hvs_trans_shutdown(struct socket *so)
+{
+       struct hvs_pcb *pcb = so2hvspcb(so);

*** DIFF OUTPUT TRUNCATED AT 1000 LINES ***
_______________________________________________
svn-src-all@freebsd.org mailing list
https://lists.freebsd.org/mailman/listinfo/svn-src-all
To unsubscribe, send any mail to "svn-src-all-unsubscr...@freebsd.org"

Reply via email to