On 27.06.2017 18:46, Billy O'Mahony wrote:
> Previously if there is no available (non-isolated) pmd on the numa node
> for a port then the port is not polled at all. This can result in a
> non-operational system until such time as nics are physically
> repositioned. It is preferable to operate with a pmd on the 'wrong' numa
> node albeit with lower performance. Local pmds are still chosen when
> available.
> 
> Signed-off-by: Billy O'Mahony <billy.o.mah...@intel.com>
> Signed-off-by: Ilya Maximets <i.maxim...@samsung.com>
> Co-authored-by: Ilya Maximets <i.maxim...@samsung.com>
> ---
> v7: Incorporate review comments on docs and implementation
> v6: Change 'port' to 'queue' in a warning msg
> v5: Fix warning msg; Update same in docs
> v4: Fix a checkpatch error
> v3: Fix warning messages not appearing when using multiqueue
> v2: Add details of warning messages into docs
> 
>  Documentation/intro/install/dpdk.rst | 18 +++++++++++++---
>  lib/dpif-netdev.c                    | 42 
> ++++++++++++++++++++++++++++++++----
>  2 files changed, 53 insertions(+), 7 deletions(-)
> 
> diff --git a/Documentation/intro/install/dpdk.rst 
> b/Documentation/intro/install/dpdk.rst
> index e83f852..a760fb6 100644
> --- a/Documentation/intro/install/dpdk.rst
> +++ b/Documentation/intro/install/dpdk.rst
> @@ -449,7 +449,7 @@ affinitized accordingly.
>  
>    A poll mode driver (pmd) thread handles the I/O of all DPDK interfaces
>    assigned to it. A pmd thread shall poll the ports for incoming packets,
> -  switch the packets and send to tx port.  pmd thread is CPU bound, and needs
> +  switch the packets and send to tx port.  A pmd thread is CPU bound, and 
> needs
>    to be affinitized to isolated cores for optimum performance.
>  
>    By setting a bit in the mask, a pmd thread is created and pinned to the
> @@ -458,8 +458,20 @@ affinitized accordingly.
>        $ ovs-vsctl set Open_vSwitch . other_config:pmd-cpu-mask=0x4
>  
>    .. note::
> -    pmd thread on a NUMA node is only created if there is at least one DPDK
> -    interface from that NUMA node added to OVS.
> +    While pmd threads are created based on pmd-cpu-mask, the thread only 
> starts
> +    consuming CPU cycles if there is least one receive queue assigned to the
> +    pmd.
> +
> +  .. note::
> +
> +    On NUMA systems PCI devices are also local to a NUMA node.  Unbound Rx
> +    queues for PCI device will assigned to a pmd on it's local NUMA node if
> +    pmd-cpu-mask has created a pmd thread on that NUMA node.  If not the 
> queue
> +    will be assigned to a pmd on a remote NUMA node.  This will result in
> +    reduced maximum throughput on that device.  

And possibly on other devices assigned to that pmd thread.

>       In case such a queue assignment
> +    is made a warning message will be logged: "There's no available (non-
> +    isolated) pmd thread on numa node N. Queue Q on port P will be assigned 
> to
> +    the pmd on core C (numa node N'). Expect reduced performance."
>  
>  - QEMU vCPU thread Affinity
>  
> diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c
> index 4e29085..38a0fd3 100644
> --- a/lib/dpif-netdev.c
> +++ b/lib/dpif-netdev.c
> @@ -3195,6 +3195,23 @@ rr_numa_list_lookup(struct rr_numa_list *rr, int 
> numa_id)
>      return NULL;
>  }
>  
> +/* Returns next NUMA from rr list in round-robin fashion. Returns the first
> + * NUMA node if 'NULL' or the last node passed, and 'NULL' if list is empty. 
> */
> +static struct rr_numa *
> +rr_numa_list_next(struct rr_numa_list *rr, const struct rr_numa *numa)
> +{
> +    struct hmap_node *node = NULL;
> +
> +    if (numa) {
> +        node = hmap_next(&rr->numas, &numa->node);
> +    }
> +    if (!node) {
> +        node = hmap_first(&rr->numas);
> +    }
> +
> +    return (node) ? CONTAINER_OF(node, struct rr_numa, node) : NULL;
> +}
> +
>  static void
>  rr_numa_list_populate(struct dp_netdev *dp, struct rr_numa_list *rr)
>  {
> @@ -3249,6 +3266,7 @@ rxq_scheduling(struct dp_netdev *dp, bool pinned) 
> OVS_REQUIRES(dp->port_mutex)
>  {
>      struct dp_netdev_port *port;
>      struct rr_numa_list rr;
> +    struct rr_numa * non_local_numa = NULL;

'*' should be near to variable.

>  
>      rr_numa_list_populate(dp, &rr);
>  
> @@ -3262,7 +3280,6 @@ rxq_scheduling(struct dp_netdev *dp, bool pinned) 
> OVS_REQUIRES(dp->port_mutex)
>  
>          numa_id = netdev_get_numa_id(port->netdev);
>          numa = rr_numa_list_lookup(&rr, numa_id);
> -
>          for (int qid = 0; qid < port->n_rxq; qid++) {
>              struct dp_netdev_rxq *q = &port->rxqs[qid];

I prefer to keep that blank line.

> @@ -3281,11 +3298,28 @@ rxq_scheduling(struct dp_netdev *dp, bool pinned) 
> OVS_REQUIRES(dp->port_mutex)
>                  }
>              } else if (!pinned && q->core_id == OVS_CORE_UNSPEC) {
>                  if (!numa) {
> -                    VLOG_WARN("There's no available (non isolated) pmd 
> thread "
> +                    /* There are no pmds on the queue's local NUMA node.
> +                       Round-robin on the NUMA nodes that do have pmds. */
> +                    non_local_numa = rr_numa_list_next(&rr, non_local_numa);
> +                    if (!non_local_numa) {
> +                        VLOG_ERR("There is no available (non-isolated) pmd "
> +                                 "thread for port \'%s\' queue %d. This 
> queue "
> +                                 "will not be polled. Is pmd-cpu-mask set to 
> "
> +                                 "zero? Or are all PMDs isolated to other "
> +                                 "queues?", netdev_get_name(port->netdev),
> +                                 qid);
> +                        continue;
> +                    }
> +                    q->pmd = rr_numa_get_pmd(non_local_numa);
> +                    VLOG_WARN("There's no available (non-isolated) pmd 
> thread "
>                                "on numa node %d. Queue %d on port \'%s\' will 
> "
> -                              "not be polled.",
> -                              numa_id, qid, netdev_get_name(port->netdev));
> +                              "be assigned to the pmd on core %d "
> +                              "(numa node %d). Expect reduced performance.",
> +                              numa_id, qid, netdev_get_name(port->netdev),
> +                              q->pmd->core_id, q->pmd->numa_id);
>                  } else {
> +                    /* Assign queue to the next (round-robin) PMD on it's 
> local
> +                       NUMA node. */
>                      q->pmd = rr_numa_get_pmd(numa);
>                  }
>              }
> 
_______________________________________________
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev

Reply via email to