On Fri, Nov 18, 2016 at 15:37 +0100, Mike Belopuhov wrote:
> The remaining diff after the interrupt change was committed.  I have
> kept KERNEL_LOCK/UNLOCK dance since those functions can run in
> parallel with similar code triggered by ifconfig so better safe than
> sorry.
> 
> I still need to go through changes in ixgbe_initialize_receive_units.
> One thing that is obvious is that Intel driver calls RSS setup even
> if one queue is configured which we haven't done before.  My and
> others tests don't show any regression regarding this however.
> 
> "Wait for a last completion before clearing buffers" change in the
> ixgbe_clear_tx_pending was committed upstream without any explanation
> but "looks safe".
> 
> FCRTH related change also looks strange, needs to get checked against
> documentation.
>

Quoting the spec for 82599: "The content of the Flow Control Receive
Threshold High (FCRTH) register determines at what point the 82599
transmits the first PAUSE frame." Then it mentions that FCRTL (low
threshold) when enabled controls sending of XON messages.

There are two modes of operation regarding flow control: the regular
one and Priority Flow Control that we don't enable.  Then depending
on a few other features (DCB, Flow director) and whether or not we
enable jumbo frames the FCRTH is set relative to the packet buffer
size (which is 384k for X540 and 512k for 82599).

The way FCRTH is calculated in *our* case (no DCB, no flow director,
etc) is basically Rx packet buffer size minus the delay value which
is somewhere in the range of [24..60]k.  But the changed code below
is a branch that handles the default not configured case: it sets
FCRTL to 0, disable XON and low threshold and seems to be setting
the FCRTH to a "default" value, which might or might not be poorly
chosen.  However it definitely makes more sense than the "-32" we
have there now.

> Ditto regarding changes regarding PHY power and ixgbe_handle_mod.
> Hrvoje has already reported that X550 SFP doesn't seem to be able to
> detect different SFP+ modules when replugged as opposed to X520.
> 

ixgbe_handle_mod change is relevant when you swap different SFP+
modules (fiber, copper, etc), it also handles the unknown PHY
case by dropping out early.  I intend to commit this as one of
the last changes so that it will get a bit more testing.

The PHY power change is explained here:
https://svnweb.freebsd.org/base?view=revision&revision=295093
"This fixes link not detected on X540-AT2 after booting to Linux
which turns the phy power off on detach" -- explains why we
we haven't seen this while some users have reported weird
problems with X540...

diff --git sys/dev/pci/files.pci sys/dev/pci/files.pci
index a6b91fb..34ce9bf 100644
--- sys/dev/pci/files.pci
+++ sys/dev/pci/files.pci
@@ -351,20 +351,21 @@ file      dev/pci/ixgb_ee.c               ixgb
 file   dev/pci/ixgb_hw.c               ixgb
 
 # Intel 82598 10GbE
 device ix: ether, ifnet, ifmedia
 attach ix at pci
 file   dev/pci/if_ix.c                 ix
 file   dev/pci/ixgbe.c                 ix
 file   dev/pci/ixgbe_82598.c           ix
 file   dev/pci/ixgbe_82599.c           ix
 file   dev/pci/ixgbe_x540.c            ix
+file   dev/pci/ixgbe_x550.c            ix
 file   dev/pci/ixgbe_phy.c             ix
 
 # Neterion Xframe 10 Gigabit ethernet 
 device xge: ether, ifnet, ifmedia
 attach xge  at pci
 file   dev/pci/if_xge.c                xge
 
 # NetXen NX2031/NX2035 10Gb Ethernet
 device nxe: ether, ifnet, ifmedia
 attach nxe at pci
diff --git sys/dev/pci/if_ix.c sys/dev/pci/if_ix.c
index c41a443..755d40d 100644
--- sys/dev/pci/if_ix.c
+++ sys/dev/pci/if_ix.c
@@ -64,24 +64,34 @@ const struct pci_matchid ixgbe_devices[] = {
        { PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82598_DA_DUAL },
        { PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82599_KX4 },
        { PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82599_KX4_MEZZ },
        { PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82599_XAUI },
        { PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82599_COMBO_BP },
        { PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82599_BPLANE_FCOE },
        { PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82599_CX4 },
        { PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82599_T3_LOM },
        { PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82599_SFP },
        { PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82599_SFP_EM },
+       { PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82599_SFP_SF_QP },
        { PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82599_SFP_SF2 },
        { PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82599_SFP_FCOE },
        { PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82599EN_SFP },
+       { PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82599_QSFP_SF_QP },
        { PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_X540T },
+       { PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_X540T1 },
+       { PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_X550T },
+       { PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_X550T1 },
+       { PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_X550EM_X_KX4 },
+       { PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_X550EM_X_KR },
+       { PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_X550EM_X_SFP },
+       { PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_X550EM_X_10G_T },
+       { PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_X550EM_X_1G_T },
 };
 
 /*********************************************************************
  *  Function prototypes
  *********************************************************************/
 int    ixgbe_probe(struct device *, void *, void *);
 void   ixgbe_attach(struct device *, struct device *, void *);
 int    ixgbe_detach(struct device *, int);
 void   ixgbe_start(struct ifnet *);
 int    ixgbe_ioctl(struct ifnet *, u_long, caddr_t);
@@ -108,20 +118,21 @@ int       ixgbe_setup_transmit_ring(struct tx_ring *);
 void   ixgbe_initialize_transmit_units(struct ix_softc *);
 void   ixgbe_free_transmit_structures(struct ix_softc *);
 void   ixgbe_free_transmit_buffers(struct tx_ring *);
 
 int    ixgbe_allocate_receive_buffers(struct rx_ring *);
 int    ixgbe_setup_receive_structures(struct ix_softc *);
 int    ixgbe_setup_receive_ring(struct rx_ring *);
 void   ixgbe_initialize_receive_units(struct ix_softc *);
 void   ixgbe_free_receive_structures(struct ix_softc *);
 void   ixgbe_free_receive_buffers(struct rx_ring *);
+void   ixgbe_initialize_rss_mapping(struct ix_softc *);
 int    ixgbe_rxfill(struct rx_ring *);
 void   ixgbe_rxrefill(void *);
 
 void   ixgbe_enable_intr(struct ix_softc *);
 void   ixgbe_disable_intr(struct ix_softc *);
 void   ixgbe_update_stats_counters(struct ix_softc *);
 int    ixgbe_txeof(struct tx_ring *);
 int    ixgbe_rxeof(struct ix_queue *);
 void   ixgbe_rx_checksum(uint32_t, struct mbuf *, uint32_t);
 void   ixgbe_iff(struct ix_softc *);
@@ -272,20 +283,28 @@ ixgbe_attach(struct device *parent, struct device *self, 
void *aux)
        /* Detect and set physical type */
        ixgbe_setup_optics(sc);
 
        bcopy(sc->hw.mac.addr, sc->arpcom.ac_enaddr,
            IXGBE_ETH_LENGTH_OF_ADDRESS);
 
        error = ixgbe_allocate_legacy(sc);
        if (error)
                goto err_late;
 
+       /* Enable the optics for 82599 SFP+ fiber */
+       if (sc->hw.phy.multispeed_fiber && sc->hw.mac.ops.enable_tx_laser)
+               sc->hw.mac.ops.enable_tx_laser(&sc->hw);
+
+       /* Enable power to the phy */
+       if (hw->phy.ops.set_phy_power)
+               hw->phy.ops.set_phy_power(&sc->hw, TRUE);
+
        /* Setup OS specific network interface */
        ixgbe_setup_interface(sc);
 
        /* Initialize statistics */
        ixgbe_update_stats_counters(sc);
 
        /* Get the PCI-E bus info and determine LAN ID */
        hw->mac.ops.get_bus_info(hw);
 
        /* Set an initial default flow control value */
@@ -721,20 +740,24 @@ ixgbe_init(void *arg)
                        return;
                }
        }
 
        /* Setup interrupt moderation */
        itr = (4000000 / IXGBE_INTS_PER_SEC) & 0xff8;
        if (sc->hw.mac.type != ixgbe_mac_82598EB)
                itr |= IXGBE_EITR_LLI_MOD | IXGBE_EITR_CNT_WDIS;
        IXGBE_WRITE_REG(&sc->hw, IXGBE_EITR(0), itr);
 
+       /* Enable power to the phy */
+       if (sc->hw.phy.ops.set_phy_power)
+               sc->hw.phy.ops.set_phy_power(&sc->hw, TRUE);
+
        /* Config/Enable Link */
        ixgbe_config_link(sc);
 
        /* Hardware Packet Buffer & Flow Control setup */
        ixgbe_config_delay_values(sc);
 
        /* Initialize the FC settings */
        sc->hw.mac.ops.start_hw(&sc->hw);
 
        /* And now turn on interrupts */
@@ -2531,22 +2554,21 @@ fail:
  *
  **********************************************************************/
 #define IXGBE_SRRCTL_BSIZEHDRSIZE_SHIFT 2
 
 void
 ixgbe_initialize_receive_units(struct ix_softc *sc)
 {
        struct rx_ring  *rxr = sc->rx_rings;
        struct ixgbe_hw *hw = &sc->hw;
        uint32_t        bufsz, fctrl, srrctl, rxcsum;
-       uint32_t        reta, mrqc = 0, hlreg;
-       uint32_t        random[10];
+       uint32_t        hlreg;
        int             i;
 
        /*
         * Make sure receives are disabled while
         * setting up the descriptor ring
         */
        ixgbe_disable_rx(hw);
 
        /* Enable broadcasts */
        fctrl = IXGBE_READ_REG(hw, IXGBE_FCTRL);
@@ -2585,66 +2607,100 @@ ixgbe_initialize_receive_units(struct ix_softc *sc)
 
        if (sc->hw.mac.type != ixgbe_mac_82598EB) {
                uint32_t psrtype = IXGBE_PSRTYPE_TCPHDR |
                              IXGBE_PSRTYPE_UDPHDR |
                              IXGBE_PSRTYPE_IPV4HDR |
                              IXGBE_PSRTYPE_IPV6HDR;
                IXGBE_WRITE_REG(hw, IXGBE_PSRTYPE(0), psrtype);
        }
 
        rxcsum = IXGBE_READ_REG(hw, IXGBE_RXCSUM);
-       rxcsum &= ~IXGBE_RXCSUM_PCSD;
-
-       /* Setup RSS */
-       if (sc->num_queues > 1) {
-               int j;
-               reta = 0;
-               /* set up random bits */
-               arc4random_buf(&random, sizeof(random));
-
-               /* Set up the redirection table */
-               for (i = 0, j = 0; i < 128; i++, j++) {
-                       if (j == sc->num_queues)
-                               j = 0;
-                       reta = (reta << 8) | (j * 0x11);
-                       if ((i & 3) == 3)
-                               IXGBE_WRITE_REG(&sc->hw, IXGBE_RETA(i >> 2), 
reta);
-               }
 
-               /* Now fill our hash function seeds */
-               for (i = 0; i < 10; i++)
-                       IXGBE_WRITE_REG(&sc->hw, IXGBE_RSSRK(i), random[i]);
-
-               /* Perform hash on these packet types */
-               mrqc = IXGBE_MRQC_RSSEN
-                   | IXGBE_MRQC_RSS_FIELD_IPV4
-                   | IXGBE_MRQC_RSS_FIELD_IPV4_TCP
-                   | IXGBE_MRQC_RSS_FIELD_IPV4_UDP
-                   | IXGBE_MRQC_RSS_FIELD_IPV6_EX_TCP
-                   | IXGBE_MRQC_RSS_FIELD_IPV6_EX
-                   | IXGBE_MRQC_RSS_FIELD_IPV6
-                   | IXGBE_MRQC_RSS_FIELD_IPV6_TCP
-                   | IXGBE_MRQC_RSS_FIELD_IPV6_UDP
-                   | IXGBE_MRQC_RSS_FIELD_IPV6_EX_UDP;
-               IXGBE_WRITE_REG(&sc->hw, IXGBE_MRQC, mrqc);
+       ixgbe_initialize_rss_mapping(sc);
 
+       if (sc->num_queues > 1) {
                /* RSS and RX IPP Checksum are mutually exclusive */
                rxcsum |= IXGBE_RXCSUM_PCSD;
        }
 
        /* This is useful for calculating UDP/IP fragment checksums */
        if (!(rxcsum & IXGBE_RXCSUM_PCSD))
                rxcsum |= IXGBE_RXCSUM_IPPCSE;
 
        IXGBE_WRITE_REG(hw, IXGBE_RXCSUM, rxcsum);
 }
 
+void
+ixgbe_initialize_rss_mapping(struct ix_softc *sc)
+{
+       struct ixgbe_hw *hw = &sc->hw;
+       uint32_t reta = 0, mrqc, rss_key[10];
+       int i, j, queue_id, table_size, index_mult;
+
+       /* set up random bits */
+       arc4random_buf(&rss_key, sizeof(rss_key));
+
+       /* Set multiplier for RETA setup and table size based on MAC */
+       index_mult = 0x1;
+       table_size = 128;
+       switch (sc->hw.mac.type) {
+       case ixgbe_mac_82598EB:
+               index_mult = 0x11;
+               break;
+       case ixgbe_mac_X550:
+       case ixgbe_mac_X550EM_x:
+               table_size = 512;
+               break;
+       default:
+               break;
+       }
+
+       /* Set up the redirection table */
+       for (i = 0, j = 0; i < table_size; i++, j++) {
+               if (j == sc->num_queues) j = 0;
+               queue_id = (j * index_mult);
+               /*
+                * The low 8 bits are for hash value (n+0);
+                * The next 8 bits are for hash value (n+1), etc.
+                */
+               reta = reta >> 8;
+               reta = reta | ( ((uint32_t) queue_id) << 24);
+               if ((i & 3) == 3) {
+                       if (i < 128)
+                               IXGBE_WRITE_REG(hw, IXGBE_RETA(i >> 2), reta);
+                       else
+                               IXGBE_WRITE_REG(hw, IXGBE_ERETA((i >> 2) - 32),
+                                   reta);
+                       reta = 0;
+               }
+       }
+
+       /* Now fill our hash function seeds */
+       for (i = 0; i < 10; i++)
+               IXGBE_WRITE_REG(hw, IXGBE_RSSRK(i), rss_key[i]);
+
+       /*
+        * Disable UDP - IP fragments aren't currently being handled
+        * and so we end up with a mix of 2-tuple and 4-tuple
+        * traffic.
+        */
+       mrqc = IXGBE_MRQC_RSSEN
+            | IXGBE_MRQC_RSS_FIELD_IPV4
+            | IXGBE_MRQC_RSS_FIELD_IPV4_TCP
+            | IXGBE_MRQC_RSS_FIELD_IPV6_EX_TCP
+            | IXGBE_MRQC_RSS_FIELD_IPV6_EX
+            | IXGBE_MRQC_RSS_FIELD_IPV6
+            | IXGBE_MRQC_RSS_FIELD_IPV6_TCP
+       ;
+       IXGBE_WRITE_REG(hw, IXGBE_MRQC, mrqc);
+}
+
 /*********************************************************************
  *
  *  Free all receive rings.
  *
  **********************************************************************/
 void
 ixgbe_free_receive_structures(struct ix_softc *sc)
 {
        struct rx_ring *rxr;
        int             i;
@@ -3106,34 +3162,55 @@ ixgbe_configure_ivars(struct ix_softc *sc)
 #endif
 }
 
 /*
  * SFP module interrupts handler
  */
 void
 ixgbe_handle_mod(struct ix_softc *sc)
 {
        struct ixgbe_hw *hw = &sc->hw;
+       enum ixgbe_phy_type orig_type = hw->phy.type;
        uint32_t err;
 
+       /* Check to see if the PHY type changed */
+       if (hw->phy.ops.identify) {
+               hw->phy.type = ixgbe_phy_unknown;
+               hw->phy.ops.identify(hw);
+       }
+
+       if (hw->phy.type != orig_type) {
+               if (hw->phy.type == ixgbe_phy_none) {
+                       hw->phy.sfp_type = ixgbe_sfp_type_unknown;
+                       goto out;
+               }
+
+               /* Try to do the initialization that was skipped before */
+               if (hw->phy.ops.init)
+                       hw->phy.ops.init(hw);
+               if (hw->phy.ops.reset)
+                       hw->phy.ops.reset(hw);
+       }
+
        err = hw->phy.ops.identify_sfp(hw);
        if (err == IXGBE_ERR_SFP_NOT_SUPPORTED) {
                printf("%s: Unsupported SFP+ module type was detected!\n",
                    sc->dev.dv_xname);
                return;
        }
        err = hw->mac.ops.setup_sfp(hw);
        if (err == IXGBE_ERR_SFP_NOT_SUPPORTED) {
                printf("%s: Setup failure - unsupported SFP+ module type!\n",
                    sc->dev.dv_xname);
                return;
        }
+ out:
        /* Set the optics type so system reports correctly */
        ixgbe_setup_optics(sc);
 
        ixgbe_handle_msf(sc);
 }
 
 
 /*
  * MSF (multispeed fiber) interrupts handler
  */
diff --git sys/dev/pci/ixgbe.c sys/dev/pci/ixgbe.c
index 6304f04..c466b3f 100644
--- sys/dev/pci/ixgbe.c
+++ sys/dev/pci/ixgbe.c
@@ -2280,24 +2280,25 @@ int32_t ixgbe_fc_enable_generic(struct ixgbe_hw *hw)
                if ((hw->fc.current_mode & ixgbe_fc_tx_pause) &&
                    hw->fc.high_water[i]) {
                        fcrtl = (hw->fc.low_water[i] << 10) | IXGBE_FCRTL_XONE;
                        IXGBE_WRITE_REG(hw, IXGBE_FCRTL_82599(i), fcrtl);
                        fcrth = (hw->fc.high_water[i] << 10) | IXGBE_FCRTH_FCEN;
                } else {
                        IXGBE_WRITE_REG(hw, IXGBE_FCRTL_82599(i), 0);
                        /*
                         * In order to prevent Tx hangs when the internal Tx
                         * switch is enabled we must set the high water mark
-                        * to the maximum FCRTH value.  This allows the Tx
-                        * switch to function even under heavy Rx workloads.
+                        * to the Rx packet buffer size - 24KB.  This allows
+                        * the Tx switch to function even under heavy Rx
+                        * workloads.
                         */
-                       fcrth = IXGBE_READ_REG(hw, IXGBE_RXPBSIZE(i)) - 32;
+                       fcrth = IXGBE_READ_REG(hw, IXGBE_RXPBSIZE(i)) - 24576;
                }
 
                IXGBE_WRITE_REG(hw, IXGBE_FCRTH_82599(i), fcrth);
        }
 
        /* Configure pause time (2 TCs per register) */
        reg = hw->fc.pause_time * 0x00010001;
        for (i = 0; i < (IXGBE_DCB_MAX_TRAFFIC_CLASS / 2); i++)
                IXGBE_WRITE_REG(hw, IXGBE_FCTTV(i), reg);
 
@@ -3562,37 +3563,55 @@ int32_t ixgbe_host_interface_command(struct ixgbe_hw 
*hw, uint32_t *buffer,
 /**
  * ixgbe_clear_tx_pending - Clear pending TX work from the PCIe fifo
  * @hw: pointer to the hardware structure
  *
  * The 82599 and x540 MACs can experience issues if TX work is still pending
  * when a reset occurs.  This function prevents this by flushing the PCIe
  * buffers on the system.
  **/
 void ixgbe_clear_tx_pending(struct ixgbe_hw *hw)
 {
-       uint32_t gcr_ext, hlreg0;
+       uint32_t gcr_ext, hlreg0, i, poll;
+       uint16_t value;
 
        /*
         * If double reset is not requested then all transactions should
         * already be clear and as such there is no work to do
         */
        if (!(hw->mac.flags & IXGBE_FLAGS_DOUBLE_RESET_REQUIRED))
                return;
 
        /*
         * Set loopback enable to prevent any transmits from being sent
         * should the link come up.  This assumes that the RXCTRL.RXEN bit
         * has already been cleared.
         */
        hlreg0 = IXGBE_READ_REG(hw, IXGBE_HLREG0);
        IXGBE_WRITE_REG(hw, IXGBE_HLREG0, hlreg0 | IXGBE_HLREG0_LPBK);
 
+       /* Wait for a last completion before clearing buffers */
+       IXGBE_WRITE_FLUSH(hw);
+       msec_delay(3);
+
+       /*
+        * Before proceeding, make sure that the PCIe block does not have
+        * transactions pending.
+        */
+       poll = ixgbe_pcie_timeout_poll(hw);
+       for (i = 0; i < poll; i++) {
+               usec_delay(100);
+               value = IXGBE_READ_PCIE_WORD(hw, IXGBE_PCI_DEVICE_STATUS);
+               if (!(value & IXGBE_PCI_DEVICE_STATUS_TRANSACTION_PENDING))
+                       goto out;
+       }
+
+out:
        /* initiate cleaning flow for buffers in the PCIe transaction layer */
        gcr_ext = IXGBE_READ_REG(hw, IXGBE_GCR_EXT);
        IXGBE_WRITE_REG(hw, IXGBE_GCR_EXT,
                        gcr_ext | IXGBE_GCR_EXT_BUFFERS_CLEAR);
 
        /* Flush all writes and allow 20usec for all transactions to clear */
        IXGBE_WRITE_FLUSH(hw);
        usec_delay(20);
 
        /* restore previous register values */
@@ -3928,20 +3947,26 @@ int32_t ixgbe_init_shared_code(struct ixgbe_hw *hw)
        switch (hw->mac.type) {
        case ixgbe_mac_82598EB:
                status = ixgbe_init_ops_82598(hw);
                break;
        case ixgbe_mac_82599EB:
                status = ixgbe_init_ops_82599(hw);
                break;
        case ixgbe_mac_X540:
                status = ixgbe_init_ops_X540(hw);
                break;
+       case ixgbe_mac_X550:
+               status = ixgbe_init_ops_X550(hw);
+               break;
+       case ixgbe_mac_X550EM_x:
+               status = ixgbe_init_ops_X550EM(hw);
+               break;
        default:
                status = IXGBE_ERR_DEVICE_NOT_SUPPORTED;
                break;
        }
        hw->mac.max_link_up_time = IXGBE_LINK_UP_TIME;
 
        return status;
 }
 
 /**

Reply via email to