Author: hselasky
Date: Thu Oct 22 09:09:53 2020
New Revision: 366930
URL: https://svnweb.freebsd.org/changeset/base/366930

Log:
  Factor out generic IP over infiniband, IPoIB, definitions and code
  into net/if_infiniband.c and net/infiniband.h . No functional change
  intended.
  
  Differential Revision:        https://reviews.freebsd.org/D26254
  Reviewed by:          melifaro@
  MFC after:            1 week
  Sponsored by:         Mellanox Technologies // NVIDIA Networking

Added:
  head/sys/modules/if_infiniband/
  head/sys/modules/if_infiniband/Makefile   (contents, props changed)
  head/sys/net/if_infiniband.c   (contents, props changed)
  head/sys/net/infiniband.h   (contents, props changed)
Modified:
  head/sys/conf/files
  head/sys/modules/Makefile
  head/sys/ofed/drivers/infiniband/ulp/ipoib/ipoib.h
  head/sys/ofed/drivers/infiniband/ulp/ipoib/ipoib_cm.c
  head/sys/ofed/drivers/infiniband/ulp/ipoib/ipoib_ib.c
  head/sys/ofed/drivers/infiniband/ulp/ipoib/ipoib_main.c

Modified: head/sys/conf/files
==============================================================================
--- head/sys/conf/files Thu Oct 22 08:40:25 2020        (r366929)
+++ head/sys/conf/files Thu Oct 22 09:09:53 2020        (r366930)
@@ -4571,6 +4571,7 @@ compat/lindebugfs/lindebugfs.c                    
optional lindebugfs \
        compile-with "${LINUXKPI_C}"
 
 # OpenFabrics Enterprise Distribution (Infiniband)
+net/if_infiniband.c                                    optional ofed
 ofed/drivers/infiniband/core/ib_addr.c                 optional ofed   \
        compile-with "${OFED_C}"
 ofed/drivers/infiniband/core/ib_agent.c                        optional ofed   
\

Modified: head/sys/modules/Makefile
==============================================================================
--- head/sys/modules/Makefile   Thu Oct 22 08:40:25 2020        (r366929)
+++ head/sys/modules/Makefile   Thu Oct 22 09:09:53 2020        (r366930)
@@ -154,6 +154,7 @@ SUBDIR=     \
        ${_if_gif} \
        ${_if_gre} \
        ${_if_me} \
+       if_infiniband \
        if_lagg \
        ${_if_ndis} \
        ${_if_stf} \

Added: head/sys/modules/if_infiniband/Makefile
==============================================================================
--- /dev/null   00:00:00 1970   (empty, because file is newly added)
+++ head/sys/modules/if_infiniband/Makefile     Thu Oct 22 09:09:53 2020        
(r366930)
@@ -0,0 +1,10 @@
+# $FreeBSD$
+
+.PATH: ${SRCTOP}/sys/net
+
+KMOD=  if_infiniband
+SRCS=  if_infiniband.c \
+       opt_inet.h \
+       opt_inet6.h
+
+.include <bsd.kmod.mk>

Added: head/sys/net/if_infiniband.c
==============================================================================
--- /dev/null   00:00:00 1970   (empty, because file is newly added)
+++ head/sys/net/if_infiniband.c        Thu Oct 22 09:09:53 2020        
(r366930)
@@ -0,0 +1,538 @@
+/*-
+ * Copyright (c) 2020 Mellanox Technologies. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS `AS IS' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include "opt_inet.h"
+#include "opt_inet6.h"
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <sys/types.h>
+#include <sys/systm.h>
+#include <sys/eventhandler.h>
+#include <sys/socket.h>
+#include <sys/sysctl.h>
+#include <sys/devctl.h>
+#include <sys/module.h>
+
+#include <net/if.h>
+#include <net/if_var.h>
+#include <net/route.h>
+#include <net/ethernet.h>
+#include <net/infiniband.h>
+#include <net/bpf.h>
+#include <net/if_llatbl.h>
+#include <net/netisr.h>
+#include <net/if_dl.h>
+#include <net/if_types.h>
+#include <net/if_media.h>
+
+#include <netinet/in.h>
+#include <netinet/if_ether.h>
+#include <netinet/ip6.h>
+
+#include <netinet6/in6_var.h>
+#include <netinet6/nd6.h>
+
+#include <security/mac/mac_framework.h>
+
+#ifdef INET
+static inline void
+infiniband_ipv4_multicast_map(uint32_t addr,
+    const uint8_t *broadcast, uint8_t *buf)
+{
+       uint8_t scope;
+
+       addr = ntohl(addr);
+       scope = broadcast[5] & 0xF;
+
+       buf[0] = 0;
+       buf[1] = 0xff;
+       buf[2] = 0xff;
+       buf[3] = 0xff;
+       buf[4] = 0xff;
+       buf[5] = 0x10 | scope;
+       buf[6] = 0x40;
+       buf[7] = 0x1b;
+       buf[8] = broadcast[8];
+       buf[9] = broadcast[9];
+       buf[10] = 0;
+       buf[11] = 0;
+       buf[12] = 0;
+       buf[13] = 0;
+       buf[14] = 0;
+       buf[15] = 0;
+       buf[16] = (addr >> 24) & 0xff;
+       buf[17] = (addr >> 16) & 0xff;
+       buf[18] = (addr >> 8) & 0xff;
+       buf[19] = addr & 0xff;
+}
+#endif
+
+#ifdef INET6
+static inline void
+infiniband_ipv6_multicast_map(const struct in6_addr *addr,
+    const uint8_t *broadcast, uint8_t *buf)
+{
+       uint8_t scope;
+
+       scope = broadcast[5] & 0xF;
+
+       buf[0] = 0;
+       buf[1] = 0xff;
+       buf[2] = 0xff;
+       buf[3] = 0xff;
+       buf[4] = 0xff;
+       buf[5] = 0x10 | scope;
+       buf[6] = 0x60;
+       buf[7] = 0x1b;
+       buf[8] = broadcast[8];
+       buf[9] = broadcast[9];
+       memcpy(&buf[10], &addr->s6_addr[6], 10);
+}
+#endif
+
+/*
+ * This is for clients that have an infiniband_header in the mbuf.
+ */
+void
+infiniband_bpf_mtap(struct ifnet *ifp, struct mbuf *mb)
+{
+       struct infiniband_header *ibh;
+       struct ether_header eh;
+  
+       if (mb->m_len < sizeof(*ibh))
+               return;
+
+       ibh = mtod(mb, struct infiniband_header *);
+       eh.ether_type = ibh->ib_protocol;
+       memset(eh.ether_shost, 0, ETHER_ADDR_LEN);
+       memcpy(eh.ether_dhost, ibh->ib_hwaddr + 4, ETHER_ADDR_LEN);
+       mb->m_data += sizeof(*ibh);
+       mb->m_len -= sizeof(*ibh);
+       mb->m_pkthdr.len -= sizeof(*ibh);
+       bpf_mtap2(ifp->if_bpf, &eh, sizeof(eh), mb);
+       mb->m_data -= sizeof(*ibh);
+       mb->m_len += sizeof(*ibh);
+       mb->m_pkthdr.len += sizeof(*ibh);
+}
+
+/*
+ * Infiniband output routine.
+ */
+static int
+infiniband_output(struct ifnet *ifp, struct mbuf *m, const struct sockaddr 
*dst,
+    struct route *ro)
+{
+       uint8_t edst[INFINIBAND_ADDR_LEN];
+#if defined(INET) || defined(INET6)
+       struct llentry *lle = NULL;
+#endif
+       struct infiniband_header *ibh;
+       int error = 0;
+       uint16_t type;
+       bool is_gw;
+
+       NET_EPOCH_ASSERT();
+
+       is_gw = ((ro != NULL) && (ro->ro_flags & RT_HAS_GW) != 0);
+
+#ifdef MAC
+       error = mac_ifnet_check_transmit(ifp, m);
+       if (error)
+               goto bad;
+#endif
+
+       M_PROFILE(m);
+       if (ifp->if_flags & IFF_MONITOR) {
+               error = ENETDOWN;
+               goto bad;
+       }
+       if (!((ifp->if_flags & IFF_UP) &&
+           (ifp->if_drv_flags & IFF_DRV_RUNNING))) {
+               error = ENETDOWN;
+               goto bad;
+       }
+
+       switch (dst->sa_family) {
+       case AF_LINK:
+               goto output;
+#ifdef INET
+       case AF_INET:
+               if (lle != NULL && (lle->la_flags & LLE_VALID)) {
+                       memcpy(edst, lle->ll_addr, sizeof(edst));
+               } else if (m->m_flags & M_MCAST) {
+                       infiniband_ipv4_multicast_map(
+                           ((const struct sockaddr_in *)dst)->sin_addr.s_addr,
+                           ifp->if_broadcastaddr, edst);
+               } else {
+                       error = arpresolve(ifp, is_gw, m, dst, edst, NULL, 
NULL);
+                       if (error) {
+                               if (error == EWOULDBLOCK)
+                                       error = 0;
+                               m = NULL;       /* mbuf is consumed by resolver 
*/
+                               goto bad;
+                       }
+               }
+               type = htons(ETHERTYPE_IP);
+               break;
+       case AF_ARP: {
+               struct arphdr *ah;
+
+               if (m->m_len < sizeof(*ah)) {
+                       error = EINVAL;
+                       goto bad;
+               }
+
+               ah = mtod(m, struct arphdr *);
+
+               if (m->m_len < arphdr_len(ah)) {
+                       error = EINVAL;
+                       goto bad;
+               }
+               ah->ar_hrd = htons(ARPHRD_INFINIBAND);
+
+               switch (ntohs(ah->ar_op)) {
+               case ARPOP_REVREQUEST:
+               case ARPOP_REVREPLY:
+                       type = htons(ETHERTYPE_REVARP);
+                       break;
+               case ARPOP_REQUEST:
+               case ARPOP_REPLY:
+               default:
+                       type = htons(ETHERTYPE_ARP);
+                       break;
+               }
+
+               if (m->m_flags & M_BCAST) {
+                       memcpy(edst, ifp->if_broadcastaddr, 
INFINIBAND_ADDR_LEN);
+               } else {
+                       if (ah->ar_hln != INFINIBAND_ADDR_LEN) {
+                               error = EINVAL;
+                               goto bad;
+                       }
+                       memcpy(edst, ar_tha(ah), INFINIBAND_ADDR_LEN);
+               }
+               break;
+       }
+#endif
+#ifdef INET6
+       case AF_INET6: {
+               const struct ip6_hdr *ip6;
+
+               ip6 = mtod(m, const struct ip6_hdr *);
+               if (m->m_len < sizeof(*ip6)) {
+                       error = EINVAL;
+                       goto bad;
+               } else if (lle != NULL && (lle->la_flags & LLE_VALID)) {
+                       memcpy(edst, lle->ll_addr, sizeof(edst));
+               } else if (m->m_flags & M_MCAST) {
+                       infiniband_ipv6_multicast_map(
+                           &((const struct sockaddr_in6 *)dst)->sin6_addr,
+                           ifp->if_broadcastaddr, edst);
+               } else if (ip6->ip6_nxt == IPPROTO_ICMPV6) {
+                       memcpy(edst, ifp->if_broadcastaddr, 
INFINIBAND_ADDR_LEN);
+               } else {
+                       error = nd6_resolve(ifp, is_gw, m, dst, edst, NULL, 
NULL);
+                       if (error) {
+                               if (error == EWOULDBLOCK)
+                                       error = 0;
+                               m = NULL;       /* mbuf is consumed by resolver 
*/
+                               goto bad;
+                       }
+               }
+               type = htons(ETHERTYPE_IPV6);
+               break;
+       }
+#endif
+       default:
+               error = EAFNOSUPPORT;
+               goto bad;
+       }
+
+       /*
+        * Add local net header.  If no space in first mbuf,
+        * allocate another.
+        */
+       M_PREPEND(m, INFINIBAND_HDR_LEN, M_NOWAIT);
+       if (m == NULL) {
+               error = ENOBUFS;
+               goto bad;
+       }
+       ibh = mtod(m, struct infiniband_header *);
+
+       ibh->ib_protocol = type;
+       memcpy(ibh->ib_hwaddr, edst, sizeof(edst));
+
+       /*
+        * Queue message on interface, update output statistics if
+        * successful, and start output if interface not yet active.
+        */
+output:
+       return (ifp->if_transmit(ifp, m));
+bad:
+       if (m != NULL)
+               m_freem(m);
+       return (error);
+}
+
+/*
+ * Process a received Infiniband packet.
+ */
+static void
+infiniband_input(struct ifnet *ifp, struct mbuf *m)
+{
+       struct infiniband_header *ibh;
+       struct epoch_tracker et;
+       int isr;
+
+       CURVNET_SET_QUIET(ifp->if_vnet);
+
+       if ((ifp->if_flags & IFF_UP) == 0) {
+               if_inc_counter(ifp, IFCOUNTER_IERRORS, 1);
+               m_freem(m);
+               goto done;
+       }
+
+       ibh = mtod(m, struct infiniband_header *);
+
+       /*
+        * Reset layer specific mbuf flags to avoid confusing upper
+        * layers:
+        */
+       m->m_flags &= ~M_VLANTAG;
+       m_clrprotoflags(m);
+
+       if (INFINIBAND_IS_MULTICAST(ibh->ib_hwaddr)) {
+               if (memcmp(ibh->ib_hwaddr, ifp->if_broadcastaddr,
+                   ifp->if_addrlen) == 0)
+                       m->m_flags |= M_BCAST;
+               else
+                       m->m_flags |= M_MCAST;
+               if_inc_counter(ifp, IFCOUNTER_IMCASTS, 1);
+       }
+
+       /* Let BPF have it before we strip the header. */
+       INFINIBAND_BPF_MTAP(ifp, m);
+
+       /* Allow monitor mode to claim this frame, after stats are updated. */
+       if (ifp->if_flags & IFF_MONITOR) {
+               m_freem(m);
+               goto done;
+       }
+
+       /* Direct packet to correct FIB based on interface config. */
+       M_SETFIB(m, ifp->if_fib);
+
+       /*
+        * Dispatch frame to upper layer.
+        */
+       switch (ibh->ib_protocol) {
+#ifdef INET
+       case htons(ETHERTYPE_IP):
+               isr = NETISR_IP;
+               break;
+
+       case htons(ETHERTYPE_ARP):
+               if (ifp->if_flags & IFF_NOARP) {
+                       /* Discard packet if ARP is disabled on interface */
+                       m_freem(m);
+                       goto done;
+               }
+               isr = NETISR_ARP;
+               break;
+#endif
+#ifdef INET6
+       case htons(ETHERTYPE_IPV6):
+               isr = NETISR_IPV6;
+               break;
+#endif
+       default:
+               if_inc_counter(ifp, IFCOUNTER_IERRORS, 1);
+               m_freem(m);
+               goto done;
+       }
+
+       /* Strip off the Infiniband header. */
+       m_adj(m, INFINIBAND_HDR_LEN);
+
+#ifdef MAC
+       /*
+        * Tag the mbuf with an appropriate MAC label before any other
+        * consumers can get to it.
+        */
+       mac_ifnet_create_mbuf(ifp, m);
+#endif
+       /* Allow monitor mode to claim this frame, after stats are updated. */
+       NET_EPOCH_ENTER(et);
+       netisr_dispatch(isr, m);
+       NET_EPOCH_EXIT(et);
+done:
+       CURVNET_RESTORE();
+}
+
+static int
+infiniband_resolvemulti(struct ifnet *ifp, struct sockaddr **llsa,
+    struct sockaddr *sa)
+{
+       struct sockaddr_dl *sdl;
+#ifdef INET
+       struct sockaddr_in *sin;
+#endif
+#ifdef INET6
+       struct sockaddr_in6 *sin6;
+#endif
+       uint8_t *e_addr;
+
+       switch (sa->sa_family) {
+       case AF_LINK:
+               /*
+                * No mapping needed. Just check that it's a valid MC address.
+                */
+               sdl = (struct sockaddr_dl *)sa;
+               e_addr = LLADDR(sdl);
+               if (!INFINIBAND_IS_MULTICAST(e_addr))
+                       return (EADDRNOTAVAIL);
+               *llsa = NULL;
+               return 0;
+
+#ifdef INET
+       case AF_INET:
+               sin = (struct sockaddr_in *)sa;
+               if (!IN_MULTICAST(ntohl(sin->sin_addr.s_addr)))
+                       return (EADDRNOTAVAIL);
+               sdl = link_init_sdl(ifp, *llsa, IFT_INFINIBAND);
+               sdl->sdl_alen = INFINIBAND_ADDR_LEN;
+               e_addr = LLADDR(sdl);
+               infiniband_ipv4_multicast_map(sin->sin_addr.s_addr, 
ifp->if_broadcastaddr,
+                   e_addr);
+               *llsa = (struct sockaddr *)sdl;
+               return (0);
+#endif
+#ifdef INET6
+       case AF_INET6:
+               sin6 = (struct sockaddr_in6 *)sa;
+               /*
+                * An IP6 address of 0 means listen to all of the
+                * multicast address used for IP6. This has no meaning
+                * in infiniband.
+                */
+               if (IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr))
+                       return (EADDRNOTAVAIL);
+               if (!IN6_IS_ADDR_MULTICAST(&sin6->sin6_addr))
+                       return (EADDRNOTAVAIL);
+               sdl = link_init_sdl(ifp, *llsa, IFT_INFINIBAND);
+               sdl->sdl_alen = INFINIBAND_ADDR_LEN;
+               e_addr = LLADDR(sdl);
+               infiniband_ipv6_multicast_map(&sin6->sin6_addr, 
ifp->if_broadcastaddr, e_addr);
+               *llsa = (struct sockaddr *)sdl;
+               return (0);
+#endif
+       default:
+               return (EAFNOSUPPORT);
+       }
+}
+
+void
+infiniband_ifattach(struct ifnet *ifp, const uint8_t *lla, const uint8_t *llb)
+{
+       struct sockaddr_dl *sdl;
+       struct ifaddr *ifa;
+       int i;
+
+       ifp->if_addrlen = INFINIBAND_ADDR_LEN;
+       ifp->if_hdrlen = INFINIBAND_HDR_LEN;
+       ifp->if_mtu = INFINIBAND_MTU;
+       if_attach(ifp);
+       ifp->if_output = infiniband_output;
+       ifp->if_input = infiniband_input;
+       ifp->if_resolvemulti = infiniband_resolvemulti;
+
+       if (ifp->if_baudrate == 0)
+               ifp->if_baudrate = IF_Gbps(10); /* default value */
+       if (llb != NULL)
+               ifp->if_broadcastaddr = llb;
+
+       ifa = ifp->if_addr;
+       KASSERT(ifa != NULL, ("%s: no lladdr!\n", __func__));
+       sdl = (struct sockaddr_dl *)ifa->ifa_addr;
+       sdl->sdl_type = IFT_INFINIBAND;
+       sdl->sdl_alen = ifp->if_addrlen;
+
+       if (lla != NULL) {
+               memcpy(LLADDR(sdl), lla, ifp->if_addrlen);
+
+               if (ifp->if_hw_addr != NULL)
+                       memcpy(ifp->if_hw_addr, lla, ifp->if_addrlen);
+       } else {
+               lla = LLADDR(sdl);
+       }
+
+       /* Attach ethernet compatible network device */
+       bpfattach(ifp, DLT_EN10MB, ETHER_HDR_LEN);
+
+       /* Announce Infiniband MAC address if non-zero. */
+       for (i = 0; i < ifp->if_addrlen; i++)
+               if (lla[i] != 0)
+                       break;
+       if (i != ifp->if_addrlen)
+               if_printf(ifp, "Infiniband address: %20D\n", lla, ":");
+
+       /* Add necessary bits are setup; announce it now. */
+       EVENTHANDLER_INVOKE(infiniband_ifattach_event, ifp);
+
+       if (IS_DEFAULT_VNET(curvnet))
+               devctl_notify("INFINIBAND", ifp->if_xname, "IFATTACH", NULL);
+}
+
+/*
+ * Perform common duties while detaching an Infiniband interface
+ */
+void
+infiniband_ifdetach(struct ifnet *ifp)
+{
+       bpfdetach(ifp);
+       if_detach(ifp);
+}
+
+static int
+infiniband_modevent(module_t mod, int type, void *data)
+{
+       switch (type) {
+       case MOD_LOAD:
+       case MOD_UNLOAD:
+               return (0);
+       default:
+               return (EOPNOTSUPP);
+       }
+}
+
+static moduledata_t infiniband_mod = {
+       .name = "if_infiniband",
+       .evhand = &infiniband_modevent,
+};
+
+DECLARE_MODULE(if_infiniband, infiniband_mod, SI_SUB_INIT_IF, SI_ORDER_ANY);
+MODULE_VERSION(if_infiniband, 1);

Added: head/sys/net/infiniband.h
==============================================================================
--- /dev/null   00:00:00 1970   (empty, because file is newly added)
+++ head/sys/net/infiniband.h   Thu Oct 22 09:09:53 2020        (r366930)
@@ -0,0 +1,80 @@
+/*-
+ * Copyright (c) 2020 Mellanox Technologies. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS `AS IS' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+#ifndef __INFINIBAND_H__
+#define        __INFINIBAND_H__
+
+#include <sys/cdefs.h>
+#include <sys/stdint.h>
+
+#define        INFINIBAND_ADDR_LEN     20      /* bytes */
+#define        INFINIBAND_MTU          1500    /* bytes - default value */
+
+#define        INFINIBAND_ENC_LEN      4       /* bytes */
+#define        INFINIBAND_HDR_LEN \
+    (INFINIBAND_ADDR_LEN + INFINIBAND_ENC_LEN)
+
+#define        INFINIBAND_IS_MULTICAST(addr) \
+    ((addr)[4] == 0xff)
+
+#define        INFINIBAND_BPF_MTAP(_ifp, _m)                   \
+do {                                                   \
+       if (bpf_peers_present((_ifp)->if_bpf)) {        \
+               M_ASSERTVALID(_m);                      \
+               infiniband_bpf_mtap(_ifp, _m);          \
+       }                                               \
+} while (0)
+
+struct infiniband_header {
+       uint8_t ib_hwaddr[INFINIBAND_ADDR_LEN];
+       uint16_t ib_protocol;           /* big endian */
+       uint16_t ib_reserved;           /* zero */
+} __packed;
+
+struct infiniband_address {
+       uint8_t octet[INFINIBAND_ADDR_LEN];
+} __packed;
+
+#ifdef _KERNEL
+
+#include <sys/_eventhandler.h>
+
+struct ifnet;
+struct mbuf;
+
+extern void infiniband_ifattach(struct ifnet *, const uint8_t *hwaddr, const 
uint8_t *bcaddr);
+extern void infiniband_ifdetach(struct ifnet *);
+extern void infiniband_bpf_mtap(struct ifnet *, struct mbuf *);
+
+/* new infiniband interface attached event */
+typedef void (*infiniband_ifattach_event_handler_t)(void *, struct ifnet *);
+
+EVENTHANDLER_DECLARE(infiniband_ifattach_event, 
infiniband_ifattach_event_handler_t);
+
+#endif
+
+#endif                                 /* __INFINIBAND_H__ */

Modified: head/sys/ofed/drivers/infiniband/ulp/ipoib/ipoib.h
==============================================================================
--- head/sys/ofed/drivers/infiniband/ulp/ipoib/ipoib.h  Thu Oct 22 08:40:25 
2020        (r366929)
+++ head/sys/ofed/drivers/infiniband/ulp/ipoib/ipoib.h  Thu Oct 22 09:09:53 
2020        (r366930)
@@ -438,16 +438,7 @@ struct ipoib_path {
 
 extern struct workqueue_struct *ipoib_workqueue;
 
-#define IPOIB_MTAP_PROTO(_ifp, _m, _proto)                     \
-do {                                                           \
-       if (bpf_peers_present((_ifp)->if_bpf)) {                \
-               M_ASSERTVALID(_m);                              \
-               ipoib_mtap_proto((_ifp), (_m), (_proto));       \
-       }                                                       \
-} while (0)
-
 /* functions */
-void ipoib_mtap_proto(struct ifnet *ifp, struct mbuf *mb, uint16_t proto);
 void ipoib_ib_completion(struct ib_cq *cq, void *dev_ptr);
 void ipoib_send_comp_handler(struct ib_cq *cq, void *dev_ptr);
 
@@ -463,8 +454,6 @@ int ipoib_open(struct ipoib_dev_priv *priv);
 int ipoib_add_pkey_attr(struct ipoib_dev_priv *priv);
 int ipoib_add_umcast_attr(struct ipoib_dev_priv *priv);
 
-void ipoib_demux(struct ifnet *ifp, struct mbuf *m, u_short proto);
-
 void ipoib_send(struct ipoib_dev_priv *priv, struct mbuf *mb,
                struct ipoib_ah *address, u32 qpn);
 void ipoib_reap_ah(struct work_struct *work);
@@ -540,7 +529,7 @@ int ipoib_poll_tx(struct ipoib_dev_priv *priv, bool do
 
 void ipoib_dma_unmap_rx(struct ipoib_dev_priv *priv, struct ipoib_rx_buf 
*rx_req);
 void ipoib_dma_mb(struct ipoib_dev_priv *priv, struct mbuf *mb, unsigned int 
length);
-struct mbuf *ipoib_alloc_map_mb(struct ipoib_dev_priv *priv, struct 
ipoib_rx_buf *rx_req, int size);
+struct mbuf *ipoib_alloc_map_mb(struct ipoib_dev_priv *priv, struct 
ipoib_rx_buf *rx_req, int align, int size);
 
 
 void ipoib_set_ethtool_ops(struct ifnet *dev);

Modified: head/sys/ofed/drivers/infiniband/ulp/ipoib/ipoib_cm.c
==============================================================================
--- head/sys/ofed/drivers/infiniband/ulp/ipoib/ipoib_cm.c       Thu Oct 22 
08:40:25 2020        (r366929)
+++ head/sys/ofed/drivers/infiniband/ulp/ipoib/ipoib_cm.c       Thu Oct 22 
09:09:53 2020        (r366930)
@@ -153,7 +153,7 @@ static struct mbuf *
 ipoib_cm_alloc_rx_mb(struct ipoib_dev_priv *priv, struct ipoib_cm_rx_buf 
*rx_req)
 {
        return ipoib_alloc_map_mb(priv, (struct ipoib_rx_buf *)rx_req,
-           priv->cm.max_cm_mtu);
+           sizeof(struct ipoib_pseudoheader), priv->cm.max_cm_mtu);
 }
 
 static void ipoib_cm_free_rx_ring(struct ipoib_dev_priv *priv,
@@ -484,10 +484,7 @@ void ipoib_cm_handle_rx_wc(struct ipoib_dev_priv *priv
        struct mbuf *mb, *newmb;
        struct ipoib_cm_rx *p;
        int has_srq;
-       u_short proto;
 
-       CURVNET_SET_QUIET(dev->if_vnet);
-
        ipoib_dbg_data(priv, "cm recv completion: id %d, status: %d\n",
                       wr_id, wc->status);
 
@@ -561,16 +558,24 @@ void ipoib_cm_handle_rx_wc(struct ipoib_dev_priv *priv
 
        ipoib_dma_mb(priv, mb, wc->byte_len);
 
-       if_inc_counter(dev, IFCOUNTER_IPACKETS, 1);
-       if_inc_counter(dev, IFCOUNTER_IBYTES, mb->m_pkthdr.len);
-
        mb->m_pkthdr.rcvif = dev;
-       proto = *mtod(mb, uint16_t *);
-       m_adj(mb, IPOIB_ENCAP_LEN);
 
-       IPOIB_MTAP_PROTO(dev, mb, proto);
-       ipoib_demux(dev, mb, ntohs(proto));
+       M_PREPEND(mb, sizeof(struct ipoib_pseudoheader), M_NOWAIT);
+       if (likely(mb != NULL)) {
+               struct ipoib_header *ibh;
 
+               if_inc_counter(dev, IFCOUNTER_IPACKETS, 1);
+               if_inc_counter(dev, IFCOUNTER_IBYTES, mb->m_pkthdr.len);
+
+               /* fixup destination infiniband address */
+               ibh = mtod(mb, struct ipoib_header *);
+               memset(ibh->hwaddr, 0, 4);
+               memcpy(ibh->hwaddr + 4, priv->local_gid.raw, sizeof(union 
ib_gid));
+
+               dev->if_input(dev, mb);
+       } else {
+               if_inc_counter(dev, IFCOUNTER_IERRORS, 1);
+       }
 repost:
        if (has_srq) {
                if (unlikely(ipoib_cm_post_receive_srq(priv, wr_id)))
@@ -587,7 +592,6 @@ repost:
                }
        }
 done:
-       CURVNET_RESTORE();
        return;
 }
 

Modified: head/sys/ofed/drivers/infiniband/ulp/ipoib/ipoib_ib.c
==============================================================================
--- head/sys/ofed/drivers/infiniband/ulp/ipoib/ipoib_ib.c       Thu Oct 22 
08:40:25 2020        (r366929)
+++ head/sys/ofed/drivers/infiniband/ulp/ipoib/ipoib_ib.c       Thu Oct 22 
09:09:53 2020        (r366930)
@@ -112,17 +112,19 @@ ipoib_dma_mb(struct ipoib_dev_priv *priv, struct mbuf 
 
 struct mbuf *
 ipoib_alloc_map_mb(struct ipoib_dev_priv *priv, struct ipoib_rx_buf *rx_req,
-    int size)
+    int align, int size)
 {
        struct mbuf *mb, *m;
        int i, j;
 
        rx_req->mb = NULL;
-       mb = m_getm2(NULL, size, M_NOWAIT, MT_DATA, M_PKTHDR);
+       mb = m_getm2(NULL, align + size, M_NOWAIT, MT_DATA, M_PKTHDR);
        if (mb == NULL)
                return (NULL);
        for (i = 0, m = mb; m != NULL; m = m->m_next, i++) {
-               m->m_len = M_SIZE(m);
+               m->m_len = M_SIZE(m) - align;
+               m->m_data += align;
+               align = 0;
                mb->m_pkthdr.len += m->m_len;
                rx_req->mapping[i] = ib_dma_map_single(priv->ca,
                    mtod(m, void *), m->m_len, DMA_FROM_DEVICE);
@@ -174,7 +176,7 @@ ipoib_alloc_rx_mb(struct ipoib_dev_priv *priv, int id)
 {
 
        return ipoib_alloc_map_mb(priv, &priv->rx_ring[id],
-           priv->max_ib_mtu + IB_GRH_BYTES);
+           0, priv->max_ib_mtu + IB_GRH_BYTES);
 }
 
 static int ipoib_ib_post_receives(struct ipoib_dev_priv *priv)

Modified: head/sys/ofed/drivers/infiniband/ulp/ipoib/ipoib_main.c
==============================================================================
--- head/sys/ofed/drivers/infiniband/ulp/ipoib/ipoib_main.c     Thu Oct 22 
08:40:25 2020        (r366929)
+++ head/sys/ofed/drivers/infiniband/ulp/ipoib/ipoib_main.c     Thu Oct 22 
09:09:53 2020        (r366930)
@@ -40,21 +40,16 @@ __FBSDID("$FreeBSD$");
 #include "ipoib.h"
 #include <sys/eventhandler.h>
 
-static int ipoib_resolvemulti(struct ifnet *, struct sockaddr **,
-               struct sockaddr *);
-
-
 #include <linux/module.h>
 
 #include <linux/slab.h>
 #include <linux/kernel.h>
 #include <linux/vmalloc.h>
 
-#include <linux/if_arp.h>      /* For ARPHRD_xxx */
 #include <linux/if_vlan.h>
-#include <net/ip.h>
-#include <net/ipv6.h>
 
+#include <net/infiniband.h>
+
 #include <rdma/ib_cache.h>
 
 MODULE_AUTHOR("Roland Dreier");
@@ -98,19 +93,8 @@ static struct net_device *ipoib_get_net_dev_by_params(
                const union ib_gid *gid, const struct sockaddr *addr,
                void *client_data);
 static void ipoib_start(struct ifnet *dev);
-static int ipoib_output(struct ifnet *ifp, struct mbuf *m,
-           const struct sockaddr *dst, struct route *ro);
 static int ipoib_ioctl(struct ifnet *ifp, u_long command, caddr_t data);
-static void ipoib_input(struct ifnet *ifp, struct mbuf *m);
 
-#define        IPOIB_MTAP(_ifp, _m)                                    \
-do {                                                           \
-       if (bpf_peers_present((_ifp)->if_bpf)) {                \
-               M_ASSERTVALID(_m);                              \
-               ipoib_mtap_mb((_ifp), (_m));                    \
-       }                                                       \
-} while (0)
-
 static struct unrhdr *ipoib_unrhdr;
 
 static void
@@ -136,37 +120,6 @@ ipoib_unrhdr_uninit(void *arg)
 }
 SYSUNINIT(ipoib_unrhdr_uninit, SI_SUB_KLD - 1, SI_ORDER_ANY, 
ipoib_unrhdr_uninit, NULL);
 
-/*
- * This is for clients that have an ipoib_header in the mbuf.
- */
-static void
-ipoib_mtap_mb(struct ifnet *ifp, struct mbuf *mb)
-{
-       struct ipoib_header *ih;
-       struct ether_header eh;
-
-       ih = mtod(mb, struct ipoib_header *);
-       eh.ether_type = ih->proto;
-       bcopy(ih->hwaddr, &eh.ether_dhost, ETHER_ADDR_LEN);
-       bzero(&eh.ether_shost, ETHER_ADDR_LEN);
-       mb->m_data += sizeof(struct ipoib_header);
-       mb->m_len -= sizeof(struct ipoib_header);
-       bpf_mtap2(ifp->if_bpf, &eh, sizeof(eh), mb);
-       mb->m_data -= sizeof(struct ipoib_header);
-       mb->m_len += sizeof(struct ipoib_header);
-}
-
-void
-ipoib_mtap_proto(struct ifnet *ifp, struct mbuf *mb, uint16_t proto)
-{
-       struct ether_header eh;
-
-       eh.ether_type = proto;
-       bzero(&eh.ether_shost, ETHER_ADDR_LEN);
-       bzero(&eh.ether_dhost, ETHER_ADDR_LEN);
-       bpf_mtap2(ifp->if_bpf, &eh, sizeof(eh), mb);
-}
-
 static struct ib_client ipoib_client = {
        .name   = "ipoib",
        .add    = ipoib_add_one,
@@ -787,7 +740,7 @@ ipoib_start_locked(struct ifnet *dev, struct ipoib_dev
                IFQ_DRV_DEQUEUE(&dev->if_snd, mb);
                if (mb == NULL)
                        break;
-               IPOIB_MTAP(dev, mb);
+               INFINIBAND_BPF_MTAP(dev, mb);
                ipoib_send_one(priv, mb);
        }
 }
@@ -875,8 +828,7 @@ ipoib_detach(struct ipoib_dev_priv *priv)
        dev = priv->dev;
        if (!test_bit(IPOIB_FLAG_SUBINTERFACE, &priv->flags)) {
                priv->gone = 1;
-               bpfdetach(dev);
-               if_detach(dev);
+               infiniband_ifdetach(dev);
                if_free(dev);
                free_unr(ipoib_unrhdr, priv->unit);
        } else
@@ -935,7 +887,6 @@ struct ipoib_dev_priv *
 ipoib_intf_alloc(const char *name)
 {
        struct ipoib_dev_priv *priv;
-       struct sockaddr_dl *sdl;
        struct ifnet *dev;
 
        priv = ipoib_priv_alloc();
@@ -953,24 +904,17 @@ ipoib_intf_alloc(const char *name)
        }
        if_initname(dev, name, priv->unit);
        dev->if_flags = IFF_BROADCAST | IFF_MULTICAST;
-       dev->if_addrlen = INFINIBAND_ALEN;
-       dev->if_hdrlen = IPOIB_HEADER_LEN;
-       if_attach(dev);
+
+       infiniband_ifattach(dev, NULL, priv->broadcastaddr);
+
        dev->if_init = ipoib_init;
        dev->if_ioctl = ipoib_ioctl;
        dev->if_start = ipoib_start;
-       dev->if_output = ipoib_output;
-       dev->if_input = ipoib_input;
-       dev->if_resolvemulti = ipoib_resolvemulti;
-       dev->if_baudrate = IF_Gbps(10);
-       dev->if_broadcastaddr = priv->broadcastaddr;
+
        dev->if_snd.ifq_maxlen = ipoib_sendq_size * 2;
-       sdl = (struct sockaddr_dl *)dev->if_addr->ifa_addr;
-       sdl->sdl_type = IFT_INFINIBAND;
-       sdl->sdl_alen = dev->if_addrlen;
+
        priv->dev = dev;
        if_link_state_change(dev, LINK_STATE_DOWN);
-       bpfattach(dev, DLT_EN10MB, ETHER_HDR_LEN);
 
        return dev->if_softc;
 }
@@ -1165,7 +1109,6 @@ ipoib_match_dev_addr(const struct sockaddr *addr, stru
        struct ifaddr *ifa;
        int retval = 0;
 
-       CURVNET_SET(dev->if_vnet);
        NET_EPOCH_ENTER(et);
        CK_STAILQ_FOREACH(ifa, &dev->if_addrhead, ifa_link) {
                if (ifa->ifa_addr == NULL ||
@@ -1179,7 +1122,6 @@ ipoib_match_dev_addr(const struct sockaddr *addr, stru
                }
        }
        NET_EPOCH_EXIT(et);
-       CURVNET_RESTORE();
 
        return (retval);
 }
@@ -1475,286 +1417,6 @@ ipoib_cleanup_module(void)
        ib_sa_unregister_client(&ipoib_sa_client);
        destroy_workqueue(ipoib_workqueue);
 }
-
-/*
- * Infiniband output routine.
- */
-static int
-ipoib_output(struct ifnet *ifp, struct mbuf *m,
-       const struct sockaddr *dst, struct route *ro)
-{
-       u_char edst[INFINIBAND_ALEN];
-#if defined(INET) || defined(INET6)
-       struct llentry *lle = NULL;
-#endif
-       struct ipoib_header *eh;
-       int error = 0, is_gw = 0;
-       short type;
-
-       NET_EPOCH_ASSERT();
-
-       if (ro != NULL)
-               is_gw = (ro->ro_flags & RT_HAS_GW) != 0;
-#ifdef MAC
-       error = mac_ifnet_check_transmit(ifp, m);
-       if (error)
-               goto bad;
-#endif
-
-       M_PROFILE(m);
-       if (ifp->if_flags & IFF_MONITOR) {
-               error = ENETDOWN;
-               goto bad;
-       }
-       if (!((ifp->if_flags & IFF_UP) &&
-           (ifp->if_drv_flags & IFF_DRV_RUNNING))) {
-               error = ENETDOWN;
-               goto bad;
-       }
-
-       switch (dst->sa_family) {
-#ifdef INET
-       case AF_INET:
-               if (lle != NULL && (lle->la_flags & LLE_VALID))
-                       memcpy(edst, lle->ll_addr, sizeof(edst));
-               else if (m->m_flags & M_MCAST)
-                       ip_ib_mc_map(((struct sockaddr_in 
*)dst)->sin_addr.s_addr, ifp->if_broadcastaddr, edst);
-               else
-                       error = arpresolve(ifp, is_gw, m, dst, edst, NULL, 
NULL);
-               if (error)
-                       return (error == EWOULDBLOCK ? 0 : error);
-               type = htons(ETHERTYPE_IP);

*** DIFF OUTPUT TRUNCATED AT 1000 LINES ***
_______________________________________________
svn-src-head@freebsd.org mailing list
https://lists.freebsd.org/mailman/listinfo/svn-src-head
To unsubscribe, send any mail to "svn-src-head-unsubscr...@freebsd.org"

Reply via email to