When using DPDK rings (dpdkr port type), packet buffers get shared to consumers of the rings (e.g. Virtual Machines). The packet buffers also include the RSS hash. This is a hash of a number of fields in the packet and is used in order to do a fast lookup in the EMC.
However, if a consumer of the packet modifies the packet without regenerating the RSS hash, the EMC will use the same hash for lookup even though the packet may belong to a different flow. This would cause unnecessary collisions in the EMC reducing performance in the presence of multiple flows. To avoid receiving an incorrect RSS hash on reception from a DPDK ring, the RSS hash needs to be reset on transmission. This will reduce performance of the forwarding path as the RSS hash will need to calculated for every packet received from an dpdkr but will behave correctly in the presence of a large number of flows that get modified by the consumer of a DPDK ring Reviewed-by: Daniele Di Proietto <diproiet...@vmware.com> Signed-off-by: Mark D. Gray <mark.d.g...@intel.com> --- lib/netdev-dpdk.c | 34 ++++++++++++++++++++++------------ 1 files changed, 22 insertions(+), 12 deletions(-) diff --git a/lib/netdev-dpdk.c b/lib/netdev-dpdk.c index f69154b..f5ebb47 100644 --- a/lib/netdev-dpdk.c +++ b/lib/netdev-dpdk.c @@ -558,11 +558,11 @@ netdev_dpdk_init(struct netdev *netdev_, unsigned int port_no, netdev_->n_rxq = NR_QUEUE; if (type == DPDK_DEV_ETH) { - netdev_dpdk_alloc_txq(netdev, NR_QUEUE); - err = dpdk_eth_dev_init(netdev); - if (err) { - goto unlock; - } + netdev_dpdk_alloc_txq(netdev, NR_QUEUE); + err = dpdk_eth_dev_init(netdev); + if (err) { + goto unlock; + } } list_push_back(&dpdk_list, &netdev->list_node); @@ -906,10 +906,10 @@ __netdev_dpdk_vhost_send(struct netdev *netdev, struct dp_packet **pkts, int tx_pkts, i; if (OVS_UNLIKELY(!is_vhost_running(virtio_dev))) { - ovs_mutex_lock(&vhost_dev->mutex); - vhost_dev->stats.tx_dropped+= cnt; - ovs_mutex_unlock(&vhost_dev->mutex); - goto out; + ovs_mutex_lock(&vhost_dev->mutex); + vhost_dev->stats.tx_dropped+= cnt; + ovs_mutex_unlock(&vhost_dev->mutex); + goto out; } /* There is vHost TX single queue, So we need to lock it for TX. */ @@ -923,9 +923,9 @@ __netdev_dpdk_vhost_send(struct netdev *netdev, struct dp_packet **pkts, out: if (may_steal) { - for (i = 0; i < cnt; i++) { - dp_packet_delete(pkts[i]); - } + for (i = 0; i < cnt; i++) { + dp_packet_delete(pkts[i]); + } } } @@ -1064,6 +1064,7 @@ netdev_dpdk_send__(struct netdev_dpdk *dev, int qid, for (i = 0; i < cnt; i++) { int size = dp_packet_size(pkts[i]); + if (OVS_UNLIKELY(size > dev->max_packet_len)) { if (next_tx_idx != i) { dpdk_queue_pkts(dev, qid, @@ -1745,6 +1746,15 @@ netdev_dpdk_ring_send(struct netdev *netdev, int qid OVS_UNUSED, struct dp_packet **pkts, int cnt, bool may_steal) { struct netdev_dpdk *dev = netdev_dpdk_cast(netdev); + unsigned i; + + /* When using 'dpdkr' and sending to a DPDK ring, we want to ensure that the + * rss hash field is clear. This is because the same mbuf may be modified by + * the consumer of the ring and return into the datapath without recalculating + * the RSS hash. */ + for (i = 0; i < cnt; i++) { + dp_packet_set_dp_hash(pkts[i], 0); + } /* DPDK Rings have a single TX queue, Therefore needs locking. */ rte_spinlock_lock(&dev->txq_lock); -- 1.7.4.1 _______________________________________________ dev mailing list dev@openvswitch.org http://openvswitch.org/mailman/listinfo/dev