Author: zec
Date: Sun Aug 30 07:34:32 2020
New Revision: 364973
URL: https://svnweb.freebsd.org/changeset/base/364973

Log:
  Driver for 4x10Gb Ethernet reference NIC FPGA design for NetFPGA SUME
  development board.
  
  Submitted by: Denis Salopek <denis.salopek AT fer.hr>
  Reported by:  zec, bz (src); rgrimes, bcr (manpages)
  MFC after:    7 days
  Sponsored by: Google Summer of Code 2020
  Differential Revision:        https://reviews.freebsd.org/D26074

Added:
  head/share/man/man4/sume.4   (contents, props changed)
  head/sys/dev/sume/
  head/sys/dev/sume/adapter.h   (contents, props changed)
  head/sys/dev/sume/if_sume.c   (contents, props changed)
  head/sys/modules/sume/
  head/sys/modules/sume/Makefile   (contents, props changed)
Modified:
  head/share/man/man4/Makefile
  head/sys/conf/files.amd64
  head/sys/modules/Makefile

Modified: head/share/man/man4/Makefile
==============================================================================
--- head/share/man/man4/Makefile        Sun Aug 30 02:26:43 2020        
(r364972)
+++ head/share/man/man4/Makefile        Sun Aug 30 07:34:32 2020        
(r364973)
@@ -514,6 +514,7 @@ MAN=        aac.4 \
        ste.4 \
        stf.4 \
        stge.4 \
+       ${_sume.4} \
        ${_superio.4} \
        sym.4 \
        syncache.4 \
@@ -851,6 +852,7 @@ _qlxgbe.4=  qlxgbe.4
 _qlnxe.4=      qlnxe.4
 _sfxge.4=      sfxge.4
 _smartpqi.4=   smartpqi.4
+_sume.4=       sume.4
 _vmd.4=                vmd.4
 
 MLINKS+=qlxge.4 if_qlxge.4
@@ -858,6 +860,7 @@ MLINKS+=qlxgb.4 if_qlxgb.4
 MLINKS+=qlxgbe.4 if_qlxgbe.4
 MLINKS+=qlnxe.4 if_qlnxe.4
 MLINKS+=sfxge.4 if_sfxge.4
+MLINKS+=sume.4 if_sume.4
 
 .if ${MK_BHYVE} != "no"
 _bhyve.4=      bhyve.4

Added: head/share/man/man4/sume.4
==============================================================================
--- /dev/null   00:00:00 1970   (empty, because file is newly added)
+++ head/share/man/man4/sume.4  Sun Aug 30 07:34:32 2020        (r364973)
@@ -0,0 +1,98 @@
+.\"-
+.\" SPDX-License-Identifier: BSD-2-Clause-FreeBSD
+.\"
+.\" Copyright (c) 2020 Denis Salopek
+.\"
+.\" Redistribution and use in source and binary forms, with or without
+.\" modification, are permitted provided that the following conditions
+.\" are met:
+.\" 1. Redistributions of source code must retain the above copyright
+.\"    notice, this list of conditions and the following disclaimer.
+.\" 2. Redistributions in binary form must reproduce the above copyright
+.\"    notice, this list of conditions and the following disclaimer in the
+.\"    documentation and/or other materials provided with the distribution.
+.\"
+.\" THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+.\" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+.\" ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+.\" LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+.\" CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+.\" SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+.\" INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+.\" CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+.\" ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+.\" POSSIBILITY OF SUCH DAMAGE.
+.\"
+.\" $FreeBSD$
+.\"
+.Dd August 30, 2020
+.Dt SUME 4
+.Os
+.Sh NAME
+.Nm sume
+.Nd "NetFPGA SUME 4x10Gb Ethernet driver"
+.Sh SYNOPSIS
+To compile this driver into the kernel, place the following lines
+in your kernel configuration file:
+.Bd -ragged -offset indent
+.Cd "device sume"
+.Ed
+.Pp
+Alternatively, to load the driver as a module at boot time, place
+the following line in
+.Xr loader.conf 5 :
+.Bd -literal -offset indent
+if_sume_load="YES"
+.Ed
+.Sh DESCRIPTION
+The
+.Nm
+driver provides support for NetFPGA SUME Virtex-7 FPGA Development Board
+with the reference NIC bitstream loaded onto it.
+The HDL design for the reference NIC project uses the RIFFA based DMA
+engine to communicate with the host machine over PCIe.
+Every packet is transmitted to / from the board via a single DMA
+transaction, taking up to two or three interrupts per one transaction
+which yields low performance.
+.Pp
+There is no support for Jumbo frames as the hardware is capable of
+dealing only with frames with maximum size of 1514 bytes.
+The hardware does not support multicast filtering, provides no checksums,
+and offers no other offloading.
+.Sh SEE ALSO
+.Xr arp 4 ,
+.Xr netgraph 4 ,
+.Xr netintro 4 ,
+.Xr ng_ether 4 ,
+.Xr vlan 4 ,
+.Xr ifconfig 8
+.Sh AUTHORS
+The Linux
+.Nm
+driver was originally written by
+.An -nosplit
+.An Bjoern A. Zeeb .
+The
+.Fx version and this manual page were written by
+.An Denis Salopek
+as a GSoC project.
+More information about the project can be found here:
+.Pa https://wiki.freebsd.org/SummerOfCode2020Projects/NetFPGA_SUME_Driver
+.Sh BUGS
+The reference NIC hardware design provides no mechanism for quiescing
+inbound traffic from interfaces configured as DOWN.
+All packets from administratively disabled interfaces are transferred to
+main memory, leaving the driver with the task of dropping such packets,
+thus consuming PCI bandwidth, interrupts and CPU cycles in vain.
+.Pp
+Pre-built FPGA bitstream from the NetFPGA project may not work correctly.
+At higher RX packet rates, the newly incoming packets can overwrite the
+ones in an internal FIFO so the packets would arrive in main memory
+corrupted, until a physical reset of the board.
+.Pp
+Occasionally, the driver can get stuck in a non-IDLE TX state due to
+a missed interrupt.
+The driver includes a watchdog function which monitors for such a
+condition and resets the board automatically.
+For more details, visit the NetFPGA SUME project site.

Modified: head/sys/conf/files.amd64
==============================================================================
--- head/sys/conf/files.amd64   Sun Aug 30 02:26:43 2020        (r364972)
+++ head/sys/conf/files.amd64   Sun Aug 30 07:34:32 2020        (r364973)
@@ -355,6 +355,7 @@ dev/smartpqi/smartpqi_response.c    optional        smartpqi
 dev/smartpqi/smartpqi_sis.c     optional       smartpqi
 dev/smartpqi/smartpqi_tag.c     optional       smartpqi
 dev/speaker/spkr.c             optional        speaker
+dev/sume/if_sume.c             optional        sume
 dev/superio/superio.c          optional        superio isa
 dev/syscons/apm/apm_saver.c    optional        apm_saver apm
 dev/syscons/scvesactl.c                optional        sc vga vesa

Added: head/sys/dev/sume/adapter.h
==============================================================================
--- /dev/null   00:00:00 1970   (empty, because file is newly added)
+++ head/sys/dev/sume/adapter.h Sun Aug 30 07:34:32 2020        (r364973)
@@ -0,0 +1,242 @@
+/*-
+ * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
+ *
+ * Copyright (c) 2015 Bjoern A. Zeeb
+ * Copyright (c) 2020 Denis Salopek
+ *
+ * This software was developed by SRI International and the University of
+ * Cambridge Computer Laboratory under DARPA/AFRL contract FA8750-11-C-0249
+ * ("MRC2"), as part of the DARPA MRC research programme.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/* $FreeBSD$ */
+
+#define        DEFAULT_ETHER_ADDRESS           "\02SUME\00"
+#define        SUME_ETH_DEVICE_NAME            "sume"
+#define        MAX_IFC_NAME_LEN                8
+
+#define        SUME_NPORTS                     4
+
+#define        SUME_IOCTL_CMD_WRITE_REG        (SIOCGPRIVATE_0)
+#define        SUME_IOCTL_CMD_READ_REG         (SIOCGPRIVATE_1)
+
+#define        SUME_LOCK(adapter)              mtx_lock(&adapter->lock);
+#define        SUME_UNLOCK(adapter)            mtx_unlock(&adapter->lock);
+
+/* Currently SUME only uses 2 fixed channels for all port traffic and regs. */
+#define        SUME_RIFFA_CHANNEL_DATA         0
+#define        SUME_RIFFA_CHANNEL_REG          1
+#define        SUME_RIFFA_CHANNELS             2
+
+/* RIFFA constants. */
+#define        RIFFA_MAX_CHNLS                 12
+#define        RIFFA_MAX_BUS_WIDTH_PARAM       4
+#define        RIFFA_SG_BUF_SIZE               (4*1024)
+#define        RIFFA_SG_ELEMS                  200
+
+/* RIFFA register offsets. */
+#define        RIFFA_RX_SG_LEN_REG_OFF         0x0
+#define        RIFFA_RX_SG_ADDR_LO_REG_OFF     0x1
+#define        RIFFA_RX_SG_ADDR_HI_REG_OFF     0x2
+#define        RIFFA_RX_LEN_REG_OFF            0x3
+#define        RIFFA_RX_OFFLAST_REG_OFF        0x4
+#define        RIFFA_TX_SG_LEN_REG_OFF         0x5
+#define        RIFFA_TX_SG_ADDR_LO_REG_OFF     0x6
+#define        RIFFA_TX_SG_ADDR_HI_REG_OFF     0x7
+#define        RIFFA_TX_LEN_REG_OFF            0x8
+#define        RIFFA_TX_OFFLAST_REG_OFF        0x9
+#define        RIFFA_INFO_REG_OFF              0xA
+#define        RIFFA_IRQ_REG0_OFF              0xB
+#define        RIFFA_IRQ_REG1_OFF              0xC
+#define        RIFFA_RX_TNFR_LEN_REG_OFF       0xD
+#define        RIFFA_TX_TNFR_LEN_REG_OFF       0xE
+
+#define        RIFFA_CHNL_REG(c, o)            ((c << 4) + o)
+
+/*
+ * RIFFA state machine;
+ * rather than using complex circular buffers for 1 transaction.
+ */
+#define        SUME_RIFFA_CHAN_STATE_IDLE      0x01
+#define        SUME_RIFFA_CHAN_STATE_READY     0x02
+#define        SUME_RIFFA_CHAN_STATE_READ      0x04
+#define        SUME_RIFFA_CHAN_STATE_LEN       0x08
+
+/* Accessor macros. */
+#define        SUME_OFFLAST                    ((0 << 1) | (1 & 0x01))
+#define        SUME_RIFFA_LAST(offlast)        ((offlast) & 0x01)
+#define        SUME_RIFFA_OFFSET(offlast)      ((uint64_t)((offlast) >> 1) << 
2)
+#define        SUME_RIFFA_LEN(len)             ((uint64_t)(len) << 2)
+
+#define        SUME_RIFFA_LO_ADDR(addr)        (addr & 0xFFFFFFFF)
+#define        SUME_RIFFA_HI_ADDR(addr)        ((addr >> 32) & 0xFFFFFFFF)
+
+/* Vector bits. */
+#define        SUME_MSI_RXQUE                  (1 << 0)
+#define        SUME_MSI_RXBUF                  (1 << 1)
+#define        SUME_MSI_RXDONE                 (1 << 2)
+#define        SUME_MSI_TXBUF                  (1 << 3)
+#define        SUME_MSI_TXDONE                 (1 << 4)
+
+/* Invalid vector. */
+#define        SUME_INVALID_VECT               0xc0000000
+
+/* Module register data (packet counters, link status...) */
+#define        SUME_MOD0_REG_BASE              0x44040000
+#define        SUME_MOD_REG(port)              (SUME_MOD0_REG_BASE + 0x10000 * 
port)
+
+#define        SUME_RESET_OFFSET               0x8
+#define        SUME_PKTIN_OFFSET               0x18
+#define        SUME_PKTOUT_OFFSET              0x1c
+#define        SUME_STATUS_OFFSET              0x48
+
+#define        SUME_RESET_ADDR(p)              (SUME_MOD_REG(p) + 
SUME_RESET_OFFSET)
+#define        SUME_STAT_RX_ADDR(p)            (SUME_MOD_REG(p) + 
SUME_PKTIN_OFFSET)
+#define        SUME_STAT_TX_ADDR(p)            (SUME_MOD_REG(p) + 
SUME_PKTOUT_OFFSET)
+#define        SUME_STATUS_ADDR(p)             (SUME_MOD_REG(p) + 
SUME_STATUS_OFFSET)
+
+#define        SUME_LINK_STATUS(val)           ((val >> 12) & 0x1)
+
+/* Various bits and pieces. */
+#define        SUME_RIFFA_MAGIC                0xcafe
+#define        SUME_MR_WRITE                   0x1f
+#define        SUME_MR_READ                    0x00
+#define        SUME_INIT_RTAG                  -3
+#define        SUME_DPORT_MASK                 0xaa
+#define        SUME_MIN_PKT_SIZE               (ETHER_MIN_LEN - ETHER_CRC_LEN)
+
+struct irq {
+       uint32_t                rid;
+       struct resource         *res;
+       void                    *tag;
+} __aligned(CACHE_LINE_SIZE);
+
+struct nf_stats {
+       uint64_t                hw_rx_packets;
+       uint64_t                hw_tx_packets;
+       uint64_t                ifc_down_bytes;
+       uint64_t                ifc_down_packets;
+       uint64_t                rx_bytes;
+       uint64_t                rx_dropped;
+       uint64_t                rx_packets;
+       uint64_t                tx_bytes;
+       uint64_t                tx_dropped;
+       uint64_t                tx_packets;
+};
+
+struct riffa_chnl_dir {
+       uint32_t                state;
+       bus_dma_tag_t           ch_tag;
+       bus_dmamap_t            ch_map;
+       char                    *buf_addr;      /* bouncebuf addresses+len. */
+       bus_addr_t              buf_hw_addr;    /* -- " -- mapped. */
+       uint32_t                num_sg;
+       uint32_t                event;          /* Used for modreg r/w */
+       uint32_t                len;            /* words */
+       uint32_t                offlast;
+       uint32_t                recovery;
+       uint32_t                rtag;
+};
+
+struct sume_ifreq {
+       uint32_t                addr;
+       uint32_t                val;
+};
+
+struct nf_priv {
+       struct sume_adapter     *adapter;
+       struct ifmedia          media;
+       struct nf_stats         stats;
+       uint32_t                unit;
+       uint32_t                port;
+       uint32_t                link_up;
+};
+
+struct sume_adapter {
+       struct mtx              lock;
+       uint32_t                running;
+       uint32_t                rid;
+       struct riffa_chnl_dir   **recv;
+       struct riffa_chnl_dir   **send;
+       device_t                dev;
+       struct ifnet            *ifp[SUME_NPORTS];
+       struct resource         *bar0_addr;
+       bus_space_tag_t         bt;
+       bus_space_handle_t      bh;
+       bus_size_t              bar0_len;
+       struct irq              irq;
+       struct callout          timer;
+       struct task             stat_task;
+       struct taskqueue        *tq;
+       uint64_t                bytes_err;
+       uint64_t                packets_err;
+       uint32_t                last_ifc;
+       uint32_t                num_sg;
+       uint32_t                sg_buf_size;
+       uint32_t                sume_debug;
+       uint32_t                wd_counter;
+};
+
+/* SUME metadata:
+ * sport - not used for RX. For TX, set to 0x02, 0x08, 0x20, 0x80, depending on
+ *     the sending interface (nf0, nf1, nf2 or nf3).
+ * dport - For RX, is set to 0x02, 0x08, 0x20, 0x80, depending on the receiving
+ *     interface (nf0, nf1, nf2 or nf3). For TX, set to 0x01, 0x04, 0x10, 0x40,
+ *     depending on the sending HW interface (nf0, nf1, nf2 or nf3).
+ * plen - length of the send/receive packet data (in bytes)
+ * magic - SUME hardcoded magic number which should be 0xcafe
+ * t1, t1 - could be used for timestamping by SUME
+ */
+struct nf_metadata {
+       uint16_t                sport;
+       uint16_t                dport;
+       uint16_t                plen;
+       uint16_t                magic;
+       uint32_t                t1;
+       uint32_t                t2;
+};
+
+/* Used for ioctl communication with the rwaxi program used to read/write SUME
+ *    internally defined register data.
+ * addr - address of the SUME module register to read/write
+ * val - value to write/read to/from the register
+ * rtag - returned on read: transaction tag, for syncronization
+ * optype - 0x1f when writing, 0x00 for reading
+ */
+struct nf_regop_data {
+       uint32_t                addr;
+       uint32_t                val;
+       uint32_t                rtag;
+       uint32_t                optype;
+};
+
+/* Our bouncebuffer "descriptor". This holds our physical address (lower and
+ * upper values) of the beginning of the DMA data to RX/TX. The len is number
+ * of words to transmit.
+ */
+struct nf_bb_desc {
+       uint32_t                lower;
+       uint32_t                upper;
+       uint32_t                len;
+};

Added: head/sys/dev/sume/if_sume.c
==============================================================================
--- /dev/null   00:00:00 1970   (empty, because file is newly added)
+++ head/sys/dev/sume/if_sume.c Sun Aug 30 07:34:32 2020        (r364973)
@@ -0,0 +1,1602 @@
+/*-
+ * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
+ *
+ * Copyright (c) 2015 Bjoern A. Zeeb
+ * Copyright (c) 2020 Denis Salopek
+ *
+ * This software was developed by SRI International and the University of
+ * Cambridge Computer Laboratory under DARPA/AFRL contract FA8750-11-C-0249
+ * ("MRC2"), as part of the DARPA MRC research programme.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <sys/param.h>
+#include <sys/bus.h>
+#include <sys/endian.h>
+#include <sys/kernel.h>
+#include <sys/limits.h>
+#include <sys/module.h>
+#include <sys/rman.h>
+#include <sys/socket.h>
+#include <sys/sockio.h>
+#include <sys/sysctl.h>
+#include <sys/taskqueue.h>
+
+#include <net/if.h>
+#include <net/if_media.h>
+#include <net/if_types.h>
+#include <net/if_var.h>
+
+#include <netinet/in.h>
+#include <netinet/if_ether.h>
+
+#include <dev/pci/pcivar.h>
+#include <dev/pci/pcireg.h>
+
+#include <machine/bus.h>
+
+#include "adapter.h"
+
+#define        PCI_VENDOR_ID_XILINX    0x10ee
+#define        PCI_DEVICE_ID_SUME      0x7028
+
+/* SUME bus driver interface */
+static int sume_probe(device_t);
+static int sume_attach(device_t);
+static int sume_detach(device_t);
+
+static device_method_t sume_methods[] = {
+       DEVMETHOD(device_probe,         sume_probe),
+       DEVMETHOD(device_attach,        sume_attach),
+       DEVMETHOD(device_detach,        sume_detach),
+       DEVMETHOD_END
+};
+
+static driver_t sume_driver = {
+       "sume",
+       sume_methods,
+       sizeof(struct sume_adapter)
+};
+
+/*
+ * The DMA engine for SUME generates interrupts for each RX/TX transaction.
+ * Depending on the channel (0 if packet transaction, 1 if register 
transaction)
+ * the used bits of the interrupt vector will be the lowest or the second 
lowest
+ * 5 bits.
+ *
+ * When receiving packets from SUME (RX):
+ * (1) SUME received a packet on one of the interfaces.
+ * (2) SUME generates an interrupt vector, bit 00001 is set (channel 0 - new RX
+ *     transaction).
+ * (3) We read the length of the incoming packet and the offset along with the
+ *     'last' flag from the SUME registers.
+ * (4) We prepare for the DMA transaction by setting the bouncebuffer on the
+ *     address buf_addr. For now, this is how it's done:
+ *     - First 3*sizeof(uint32_t) bytes are: lower and upper 32 bits of 
physical
+ *     address where we want the data to arrive (buf_addr[0] and buf_addr[1]),
+ *     and length of incoming data (buf_addr[2]).
+ *     - Data will start right after, at buf_addr+3*sizeof(uint32_t). The
+ *     physical address buf_hw_addr is a block of contiguous memory mapped to
+ *     buf_addr, so we can set the incoming data's physical address 
(buf_addr[0]
+ *     and buf_addr[1]) to buf_hw_addr+3*sizeof(uint32_t).
+ * (5) We notify SUME that the bouncebuffer is ready for the transaction by
+ *     writing the lower/upper physical address buf_hw_addr to the SUME
+ *     registers RIFFA_TX_SG_ADDR_LO_REG_OFF and RIFFA_TX_SG_ADDR_HI_REG_OFF as
+ *     well as the number of segments to the register RIFFA_TX_SG_LEN_REG_OFF.
+ * (6) SUME generates an interrupt vector, bit 00010 is set (channel 0 -
+ *     bouncebuffer received).
+ * (7) SUME generates an interrupt vector, bit 00100 is set (channel 0 -
+ *     transaction is done).
+ * (8) SUME can do both steps (6) and (7) using the same interrupt.
+ * (8) We read the first 16 bytes (metadata) of the received data and note the
+ *     incoming interface so we can later forward it to the right one in the OS
+ *     (sume0, sume1, sume2 or sume3).
+ * (10) We create an mbuf and copy the data from the bouncebuffer to the mbuf
+ *     and set the mbuf rcvif to the incoming interface.
+ * (11) We forward the mbuf to the appropriate interface via ifp->if_input.
+ *
+ * When sending packets to SUME (TX):
+ * (1) The OS calls sume_if_start() function on TX.
+ * (2) We get the mbuf packet data and copy it to the
+ *     buf_addr+3*sizeof(uint32_t) + metadata 16 bytes.
+ * (3) We create the metadata based on the output interface and copy it to the
+ *     buf_addr+3*sizeof(uint32_t).
+ * (4) We write the offset/last and length of the packet to the SUME registers
+ *     RIFFA_RX_OFFLAST_REG_OFF and RIFFA_RX_LEN_REG_OFF.
+ * (5) We fill the bouncebuffer by filling the first 3*sizeof(uint32_t) bytes
+ *     with the physical address and length just as in RX step (4).
+ * (6) We notify SUME that the bouncebuffer is ready by writing to SUME
+ *     registers RIFFA_RX_SG_ADDR_LO_REG_OFF, RIFFA_RX_SG_ADDR_HI_REG_OFF and
+ *     RIFFA_RX_SG_LEN_REG_OFF just as in RX step (5).
+ * (7) SUME generates an interrupt vector, bit 01000 is set (channel 0 -
+ *     bouncebuffer is read).
+ * (8) SUME generates an interrupt vector, bit 10000 is set (channel 0 -
+ *     transaction is done).
+ * (9) SUME can do both steps (7) and (8) using the same interrupt.
+ *
+ * Internal registers
+ * Every module in the SUME hardware has its own set of internal registers
+ * (IDs, for debugging and statistic purposes, etc.). Their base addresses are
+ * defined in 'projects/reference_nic/hw/tcl/reference_nic_defines.tcl' and the
+ * offsets to different memory locations of every module are defined in their
+ * corresponding folder inside the library. These registers can be RO/RW and
+ * there is a special method to fetch/change this data over 1 or 2 DMA
+ * transactions. For writing, by calling the sume_module_reg_write(). For
+ * reading, by calling the sume_module_reg_write() and then
+ * sume_module_reg_read(). Check those functions for more information.
+ */
+
+MALLOC_DECLARE(M_SUME);
+MALLOC_DEFINE(M_SUME, "sume", "NetFPGA SUME device driver");
+
+static void check_tx_queues(struct sume_adapter *);
+static void sume_fill_bb_desc(struct sume_adapter *, struct riffa_chnl_dir *,
+    uint64_t);
+
+static struct unrhdr *unr;
+
+static struct {
+       uint16_t device;
+       char *desc;
+} sume_pciids[] = {
+       {PCI_DEVICE_ID_SUME, "NetFPGA SUME reference NIC"},
+};
+
+static inline uint32_t
+read_reg(struct sume_adapter *adapter, int offset)
+{
+
+       return (bus_space_read_4(adapter->bt, adapter->bh, offset << 2));
+}
+
+static inline void
+write_reg(struct sume_adapter *adapter, int offset, uint32_t val)
+{
+
+       bus_space_write_4(adapter->bt, adapter->bh, offset << 2, val);
+}
+
+static int
+sume_probe(device_t dev)
+{
+       int i;
+       uint16_t v = pci_get_vendor(dev);
+       uint16_t d = pci_get_device(dev);
+
+       if (v != PCI_VENDOR_ID_XILINX)
+               return (ENXIO);
+
+       for (i = 0; i < nitems(sume_pciids); i++) {
+               if (d == sume_pciids[i].device) {
+                       device_set_desc(dev, sume_pciids[i].desc);
+                       return (BUS_PROBE_DEFAULT);
+               }
+       }
+
+       return (ENXIO);
+}
+
+/*
+ * Building mbuf for packet received from SUME. We expect to receive 'len'
+ * bytes of data (including metadata) written from the bouncebuffer address
+ * buf_addr+3*sizeof(uint32_t). Metadata will tell us which SUME interface
+ * received the packet (sport will be 1, 2, 4 or 8), the packet length (plen),
+ * and the magic word needs to be 0xcafe. When we have the packet data, we
+ * create an mbuf and copy the data to it using m_copyback() function, set the
+ * correct interface to rcvif and return the mbuf to be later sent to the OS
+ * with if_input.
+ */
+static struct mbuf *
+sume_rx_build_mbuf(struct sume_adapter *adapter, uint32_t len)
+{
+       struct nf_priv *nf_priv;
+       struct mbuf *m;
+       struct ifnet *ifp = NULL;
+       int np;
+       uint16_t dport, plen, magic;
+       device_t dev = adapter->dev;
+       uint8_t *indata = (uint8_t *)
+           adapter->recv[SUME_RIFFA_CHANNEL_DATA]->buf_addr +
+           sizeof(struct nf_bb_desc);
+       struct nf_metadata *mdata = (struct nf_metadata *) indata;
+
+       /* The metadata header is 16 bytes. */
+       if (len < sizeof(struct nf_metadata)) {
+               device_printf(dev, "short frame (%d)\n", len);
+               adapter->packets_err++;
+               adapter->bytes_err += len;
+               return (NULL);
+       }
+
+       dport = le16toh(mdata->dport);
+       plen = le16toh(mdata->plen);
+       magic = le16toh(mdata->magic);
+
+       if (sizeof(struct nf_metadata) + plen > len ||
+           magic != SUME_RIFFA_MAGIC) {
+               device_printf(dev, "corrupted packet (%zd + %d > %d || magic "
+                   "0x%04x != 0x%04x)\n", sizeof(struct nf_metadata), plen,
+                   len, magic, SUME_RIFFA_MAGIC);
+               return (NULL);
+       }
+
+       /* We got the packet from one of the even bits */
+       np = (ffs(dport & SUME_DPORT_MASK) >> 1) - 1;
+       if (np > SUME_NPORTS) {
+               device_printf(dev, "invalid destination port 0x%04x (%d)\n",
+                   dport, np);
+               adapter->packets_err++;
+               adapter->bytes_err += plen;
+               return (NULL);
+       }
+       ifp = adapter->ifp[np];
+       nf_priv = ifp->if_softc;
+       nf_priv->stats.rx_packets++;
+       nf_priv->stats.rx_bytes += plen;
+
+       /* If the interface is down, well, we are done. */
+       if (!(ifp->if_flags & IFF_UP)) {
+               nf_priv->stats.ifc_down_packets++;
+               nf_priv->stats.ifc_down_bytes += plen;
+               return (NULL);
+       }
+
+       if (adapter->sume_debug)
+               printf("Building mbuf with length: %d\n", plen);
+
+       m = m_getm(NULL, plen, M_NOWAIT, MT_DATA);
+       if (m == NULL) {
+               adapter->packets_err++;
+               adapter->bytes_err += plen;
+               return (NULL);
+       }
+
+       /* Copy the data in at the right offset. */
+       m_copyback(m, 0, plen, (void *) (indata + sizeof(struct nf_metadata)));
+       m->m_pkthdr.rcvif = ifp;
+
+       return (m);
+}
+
+/*
+ * SUME interrupt handler for when we get a valid interrupt from the board.
+ * Theoretically, we can receive interrupt for any of the available channels,
+ * but RIFFA DMA uses only 2: 0 and 1, so we use only vect0. The vector is a 32
+ * bit number, using 5 bits for every channel, the least significant bits
+ * correspond to channel 0 and the next 5 bits correspond to channel 1. Vector
+ * bits for RX/TX are:
+ * RX
+ * bit 0 - new transaction from SUME
+ * bit 1 - SUME received our bouncebuffer address
+ * bit 2 - SUME copied the received data to our bouncebuffer, transaction done
+ * TX
+ * bit 3 - SUME received our bouncebuffer address
+ * bit 4 - SUME copied the data from our bouncebuffer, transaction done
+ *
+ * There are two finite state machines (one for TX, one for RX). We loop
+ * through channels 0 and 1 to check and our current state and which interrupt
+ * bit is set.
+ * TX
+ * SUME_RIFFA_CHAN_STATE_IDLE: waiting for the first TX transaction.
+ * SUME_RIFFA_CHAN_STATE_READY: we prepared (filled with data) the bouncebuffer
+ * and triggered the SUME for the TX transaction. Waiting for interrupt bit 3
+ * to go to the next state.
+ * SUME_RIFFA_CHAN_STATE_READ: waiting for interrupt bit 4 (for SUME to send
+ * our packet). Then we get the length of the sent data and go back to the
+ * IDLE state.
+ * RX
+ * SUME_RIFFA_CHAN_STATE_IDLE: waiting for the interrupt bit 0 (new RX
+ * transaction). When we get it, we prepare our bouncebuffer for reading and
+ * trigger the SUME to start the transaction. Go to the next state.
+ * SUME_RIFFA_CHAN_STATE_READY: waiting for the interrupt bit 1 (SUME got our
+ * bouncebuffer). Go to the next state.
+ * SUME_RIFFA_CHAN_STATE_READ: SUME copied data and our bouncebuffer is ready,
+ * we can build the mbuf and go back to the IDLE state.
+ */
+static void
+sume_intr_handler(void *arg)
+{
+       struct sume_adapter *adapter = arg;
+       uint32_t vect, vect0, len;
+       int ch, loops;
+       device_t dev = adapter->dev;
+       struct mbuf *m = NULL;
+       struct ifnet *ifp = NULL;
+       struct riffa_chnl_dir *send, *recv;
+
+       SUME_LOCK(adapter);
+
+       vect0 = read_reg(adapter, RIFFA_IRQ_REG0_OFF);
+       if ((vect0 & SUME_INVALID_VECT) != 0) {
+               SUME_UNLOCK(adapter);
+               return;
+       }
+
+       /*
+        * We only have one interrupt for all channels and no way
+        * to quickly lookup for which channel(s) we got an interrupt?
+        */
+       for (ch = 0; ch < SUME_RIFFA_CHANNELS; ch++) {
+               vect = vect0 >> (5 * ch);
+               send = adapter->send[ch];
+               recv = adapter->recv[ch];
+
+               loops = 0;
+               while ((vect & (SUME_MSI_TXBUF | SUME_MSI_TXDONE)) &&
+                   loops <= 5) {
+                       if (adapter->sume_debug)
+                               device_printf(dev, "TX ch %d state %u vect = "
+                                   "0x%08x\n", ch, send->state, vect);
+                       switch (send->state) {
+                       case SUME_RIFFA_CHAN_STATE_IDLE:
+                               break;
+                       case SUME_RIFFA_CHAN_STATE_READY:
+                               if (!(vect & SUME_MSI_TXBUF)) {
+                                       device_printf(dev, "ch %d unexpected "
+                                           "interrupt in send+3 state %u: "
+                                           "vect = 0x%08x\n", ch, send->state,
+                                           vect);
+                                       send->recovery = 1;
+                                       break;
+                               }
+                               send->state = SUME_RIFFA_CHAN_STATE_READ;
+                               vect &= ~SUME_MSI_TXBUF;
+                               break;
+                       case SUME_RIFFA_CHAN_STATE_READ:
+                               if (!(vect & SUME_MSI_TXDONE)) {
+                                       device_printf(dev, "ch %d unexpected "
+                                           "interrupt in send+4 state %u: "
+                                           "vect = 0x%08x\n", ch, send->state,
+                                           vect);
+                                       send->recovery = 1;
+                                       break;
+                               }
+                               send->state = SUME_RIFFA_CHAN_STATE_LEN;
+
+                               len = read_reg(adapter, RIFFA_CHNL_REG(ch,
+                                   RIFFA_RX_TNFR_LEN_REG_OFF));
+                               if (ch == SUME_RIFFA_CHANNEL_DATA) {
+                                       send->state =
+                                           SUME_RIFFA_CHAN_STATE_IDLE;
+                                       check_tx_queues(adapter);
+                               } else if (ch == SUME_RIFFA_CHANNEL_REG)
+                                       wakeup(&send->event);
+                               else {
+                                       device_printf(dev, "ch %d unexpected "
+                                           "interrupt in send+4 state %u: "
+                                           "vect = 0x%08x\n", ch, send->state,
+                                           vect);
+                                       send->recovery = 1;
+                               }
+                               vect &= ~SUME_MSI_TXDONE;
+                               break;
+                       case SUME_RIFFA_CHAN_STATE_LEN:
+                               break;
+                       default:
+                               device_printf(dev, "unknown TX state!\n");
+                       }
+                       loops++;
+               }
+
+               if ((vect & (SUME_MSI_TXBUF | SUME_MSI_TXDONE)) &&
+                   send->recovery)
+                       device_printf(dev, "ch %d ignoring vect = 0x%08x "
+                           "during TX; not in recovery; state = %d loops = "
+                           "%d\n", ch, vect, send->state, loops);
+
+               loops = 0;
+               while ((vect & (SUME_MSI_RXQUE | SUME_MSI_RXBUF |
+                   SUME_MSI_RXDONE)) && loops < 5) {
+                       if (adapter->sume_debug)
+                               device_printf(dev, "RX ch %d state %u vect = "
+                                   "0x%08x\n", ch, recv->state, vect);
+                       switch (recv->state) {
+                       case SUME_RIFFA_CHAN_STATE_IDLE:
+                               if (!(vect & SUME_MSI_RXQUE)) {
+                                       device_printf(dev, "ch %d unexpected "
+                                           "interrupt in recv+0 state %u: "
+                                           "vect = 0x%08x\n", ch, recv->state,
+                                           vect);
+                                       recv->recovery = 1;
+                                       break;
+                               }
+                               uint32_t max_ptr;
+
+                               /* Clear recovery state. */
+                               recv->recovery = 0;
+
+                               /* Get offset and length. */
+                               recv->offlast = read_reg(adapter,
+                                   RIFFA_CHNL_REG(ch,
+                                   RIFFA_TX_OFFLAST_REG_OFF));
+                               recv->len = read_reg(adapter, RIFFA_CHNL_REG(ch,
+                                   RIFFA_TX_LEN_REG_OFF));
+
+                               /* Boundary checks. */
+                               max_ptr = (uint32_t)((uintptr_t)recv->buf_addr
+                                   + SUME_RIFFA_OFFSET(recv->offlast)
+                                   + SUME_RIFFA_LEN(recv->len) - 1);
+                               if (max_ptr <
+                                   (uint32_t)((uintptr_t)recv->buf_addr))
+                                       device_printf(dev, "receive buffer "
+                                           "wrap-around overflow.\n");
+                               if (SUME_RIFFA_OFFSET(recv->offlast) +
+                                   SUME_RIFFA_LEN(recv->len) >
+                                   adapter->sg_buf_size)
+                                       device_printf(dev, "receive buffer too"
+                                           " small.\n");
+
+                               /* Fill the bouncebuf "descriptor". */
+                               sume_fill_bb_desc(adapter, recv,
+                                   SUME_RIFFA_LEN(recv->len));
+
+                               bus_dmamap_sync(recv->ch_tag, recv->ch_map,
+                                   BUS_DMASYNC_PREREAD |
+                                   BUS_DMASYNC_PREWRITE);
+                               write_reg(adapter, RIFFA_CHNL_REG(ch,
+                                   RIFFA_TX_SG_ADDR_LO_REG_OFF),
+                                   SUME_RIFFA_LO_ADDR(recv->buf_hw_addr));
+                               write_reg(adapter, RIFFA_CHNL_REG(ch,
+                                   RIFFA_TX_SG_ADDR_HI_REG_OFF),
+                                   SUME_RIFFA_HI_ADDR(recv->buf_hw_addr));
+                               write_reg(adapter, RIFFA_CHNL_REG(ch,
+                                   RIFFA_TX_SG_LEN_REG_OFF),
+                                   4 * recv->num_sg);
+                               bus_dmamap_sync(recv->ch_tag, recv->ch_map,
+                                   BUS_DMASYNC_POSTREAD |
+                                   BUS_DMASYNC_POSTWRITE);
+
+                               recv->state = SUME_RIFFA_CHAN_STATE_READY;
+                               vect &= ~SUME_MSI_RXQUE;
+                               break;
+                       case SUME_RIFFA_CHAN_STATE_READY:
+                               if (!(vect & SUME_MSI_RXBUF)) {
+                                       device_printf(dev, "ch %d unexpected "
+                                           "interrupt in recv+1 state %u: "
+                                           "vect = 0x%08x\n", ch, recv->state,
+                                           vect);
+                                       recv->recovery = 1;
+                                       break;
+                               }
+                               recv->state = SUME_RIFFA_CHAN_STATE_READ;
+                               vect &= ~SUME_MSI_RXBUF;
+                               break;
+                       case SUME_RIFFA_CHAN_STATE_READ:
+                               if (!(vect & SUME_MSI_RXDONE)) {
+                                       device_printf(dev, "ch %d unexpected "
+                                           "interrupt in recv+2 state %u: "
+                                           "vect = 0x%08x\n", ch, recv->state,
+                                           vect);
+                                       recv->recovery = 1;
+                                       break;
+                               }
+                               len = read_reg(adapter, RIFFA_CHNL_REG(ch,
+                                   RIFFA_TX_TNFR_LEN_REG_OFF));
+
+                               /* Remember, len and recv->len are words. */
+                               if (ch == SUME_RIFFA_CHANNEL_DATA) {
+                                       m = sume_rx_build_mbuf(adapter, 
+                                           len << 2);
+                                       recv->state =
+                                           SUME_RIFFA_CHAN_STATE_IDLE;
+                               } else if (ch == SUME_RIFFA_CHANNEL_REG)
+                                       wakeup(&recv->event);
+                               else {
+                                       device_printf(dev, "ch %d unexpected "
+                                           "interrupt in recv+2 state %u: "
+                                           "vect = 0x%08x\n", ch, recv->state,
+                                           vect);
+                                       recv->recovery = 1;
+                               }
+                               vect &= ~SUME_MSI_RXDONE;
+                               break;
+                       case SUME_RIFFA_CHAN_STATE_LEN:
+                               break;
+                       default:
+                               device_printf(dev, "unknown RX state!\n");
+                       }
+                       loops++;
+               }
+
+               if ((vect & (SUME_MSI_RXQUE | SUME_MSI_RXBUF |
+                   SUME_MSI_RXDONE)) && recv->recovery) {
+                       device_printf(dev, "ch %d ignoring vect = 0x%08x "
+                           "during RX; not in recovery; state = %d, loops = "
+                           "%d\n", ch, vect, recv->state, loops);
+
+                       /* Clean the unfinished transaction. */
+                       if (ch == SUME_RIFFA_CHANNEL_REG &&
+                           vect & SUME_MSI_RXDONE) {
+                               read_reg(adapter, RIFFA_CHNL_REG(ch,
+                                   RIFFA_TX_TNFR_LEN_REG_OFF));
+                               recv->recovery = 0;
+                       }
+               }
+       }
+       SUME_UNLOCK(adapter);
+
+       if (m != NULL) {
+               ifp = m->m_pkthdr.rcvif;
+               (*ifp->if_input)(ifp, m);
+       }
+}
+
+/*
+ * As we cannot disable interrupt generation, ignore early interrupts by 
waiting
+ * for the adapter to go into the 'running' state.
+ */
+static int
+sume_intr_filter(void *arg)
+{
+       struct sume_adapter *adapter = arg;
+
+       if (adapter->running == 0)
+               return (FILTER_STRAY);
+
+       return (FILTER_SCHEDULE_THREAD);
+}
+
+static int
+sume_probe_riffa_pci(struct sume_adapter *adapter)
+{
+       device_t dev = adapter->dev;
+       int error, count, capmem;
+       uint32_t reg, devctl, linkctl;
+
+       pci_enable_busmaster(dev);
+
+       adapter->rid = PCIR_BAR(0);
+       adapter->bar0_addr = bus_alloc_resource_any(dev, SYS_RES_MEMORY,
+           &adapter->rid, RF_ACTIVE);
+       if (adapter->bar0_addr == NULL) {
+               device_printf(dev, "unable to allocate bus resource: "
+                   "BAR0 address\n");
+               return (ENXIO);
+       }
+       adapter->bt = rman_get_bustag(adapter->bar0_addr);
+       adapter->bh = rman_get_bushandle(adapter->bar0_addr);
+       adapter->bar0_len = rman_get_size(adapter->bar0_addr);
+       if (adapter->bar0_len != 1024) {
+               device_printf(dev, "BAR0 resource length %lu != 1024\n",
+                   adapter->bar0_len);
+               return (ENXIO);
+       }
+
+       count = pci_msi_count(dev);
+       error = pci_alloc_msi(dev, &count);
+       if (error) {
+               device_printf(dev, "unable to allocate bus resource: PCI "
+                   "MSI\n");
+               return (error);
+       }
+
+       adapter->irq.rid = 1; /* Should be 1, thus says pci_alloc_msi() */
+       adapter->irq.res = bus_alloc_resource_any(dev, SYS_RES_IRQ,
+           &adapter->irq.rid, RF_SHAREABLE | RF_ACTIVE);
+       if (adapter->irq.res == NULL) {
+               device_printf(dev, "unable to allocate bus resource: IRQ "
+                   "memory\n");
+               return (ENXIO);
+       }
+
+       error = bus_setup_intr(dev, adapter->irq.res, INTR_MPSAFE |
+           INTR_TYPE_NET, sume_intr_filter, sume_intr_handler, adapter,
+           &adapter->irq.tag);
+       if (error) {
+               device_printf(dev, "failed to setup interrupt for rid %d, name"
+                   " %s: %d\n", adapter->irq.rid, "SUME_INTR", error);
+               return (ENXIO);
+       }
+
+       if (pci_find_cap(dev, PCIY_EXPRESS, &capmem) != 0) {
+               device_printf(dev, "PCI not PCIe capable\n");

*** DIFF OUTPUT TRUNCATED AT 1000 LINES ***
_______________________________________________
svn-src-head@freebsd.org mailing list
https://lists.freebsd.org/mailman/listinfo/svn-src-head
To unsubscribe, send any mail to "svn-src-head-unsubscr...@freebsd.org"

Reply via email to