Re: [ovs-dev] [PATCH v8 08/16] dpif-netdev: Add pmd thread local port cache for transmission.

2016-04-22 Thread Daniele Di Proietto
Hi Ilya,

thanks for reporting this.  I've decided to handle tx port cache
in dp_netdev_set_pmds_on_numa() and dp_netdev_set_nonpmd().

I've sent an updated version here:

http://openvswitch.org/pipermail/dev/2016-April/070064.html

Thanks,

Daniele

On 22/04/2016 08:15, "Ilya Maximets"  wrote:

>Without this we will lost connection to non-pmd ports:
>--
>diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c
>index 26ef612..be1f291 100644
>--- a/lib/dpif-netdev.c
>+++ b/lib/dpif-netdev.c
>@@ -3359,9 +3359,7 @@ dp_netdev_reset_pmd_threads(struct dp_netdev *dp)
> struct hmapx_node *node;
> 
> HMAP_FOR_EACH (port, node, >ports) {
>-if (netdev_is_pmd(port->netdev)) {
>-dp_netdev_add_port_to_pmds__(dp, port, _reload);
>-}
>+dp_netdev_add_port_to_pmds__(dp, port, _reload);
> }
> 
> HMAPX_FOR_EACH (node, _reload) {
>--
>
>Best regards, Ilya Maximets.
>
>On 20.04.2016 01:28, diproiettod at vmware.com (Daniele Di Proietto)
>wrote:
>> A future commit will stop using RCU for 'dp->ports' and use a mutex for
>> reading/writing them.  To avoid taking a mutex in dp_execute_cb(), which
>> is called in the fast path, this commit introduces a pmd thread local
>> cache of ports.
>> 
>> The downside is that every port add/remove now needs to synchronize with
>> every pmd thread.
>> 
>> Among the advantages, keeping a per thread port mapping could allow
>> greater control over the txq assigment.
>> 
>> Signed-off-by: Daniele Di Proietto 
>> ---
>>  lib/dpif-netdev.c | 249
>>+++---
>>  1 file changed, 179 insertions(+), 70 deletions(-)
>> 
>> diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c
>> index cedaf39..bd2249e 100644
>> --- a/lib/dpif-netdev.c
>> +++ b/lib/dpif-netdev.c
>> @@ -184,6 +184,7 @@ static bool dpcls_lookup(const struct dpcls *cls,
>>   *
>>   *dp_netdev_mutex (global)
>>   *port_mutex
>> + *non_pmd_mutex
>>   */
>>  struct dp_netdev {
>>  const struct dpif_class *const class;
>> @@ -379,6 +380,13 @@ struct rxq_poll {
>>  struct ovs_list node;
>>  };
>>  
>> +/* Contained by struct dp_netdev_pmd_thread's 'port_cache' or
>>'tx_ports'. */
>> +struct tx_port {
>> +odp_port_t port_no;
>> +struct netdev *netdev;
>> +struct hmap_node node;
>> +};
>> +
>>  /* PMD: Poll modes drivers.  PMD accesses devices via polling to
>>eliminate
>>   * the performance overhead of interrupt processing.  Therefore netdev
>>can
>>   * not implement rx-wait for these devices.  dpif-netdev needs to poll
>> @@ -405,8 +413,8 @@ struct dp_netdev_pmd_thread {
>>  
>>  /* Per thread exact-match cache.  Note, the instance for cpu core
>>   * NON_PMD_CORE_ID can be accessed by multiple threads, and thusly
>> - * need to be protected (e.g. by 'dp_netdev_mutex').  All other
>> - * instances will only be accessed by its own pmd thread. */
>> + * need to be protected by 'non_pmd_mutex'.  Every other instance
>> + * will only be accessed by its own pmd thread. */
>>  struct emc_cache flow_cache;
>>  
>>  /* Classifier and Flow-Table.
>> @@ -435,10 +443,20 @@ struct dp_netdev_pmd_thread {
>>  atomic_int tx_qid;  /* Queue id used by this pmd
>>thread to
>>   * send packets on all netdevs */
>>  
>> -struct ovs_mutex poll_mutex;/* Mutex for poll_list. */
>> +struct ovs_mutex port_mutex;/* Mutex for 'poll_list' and
>>'tx_ports'. */
>>  /* List of rx queues to poll. */
>>  struct ovs_list poll_list OVS_GUARDED;
>> -int poll_cnt;   /* Number of elemints in
>>poll_list. */
>> +/* Number of elements in 'poll_list' */
>> +int poll_cnt;
>> +/* Map of 'tx_port's used for transmission.  Written by the main
>>thread,
>> + * read by the pmd thread. */
>> +struct hmap tx_ports OVS_GUARDED;
>> +
>> +/* Map of 'tx_port' used in the fast path. This is a thread-local
>>copy of
>> + * 'tx_ports'. The instance for cpu core NON_PMD_CORE_ID can be
>>accessed
>> + * by multiple threads, and thusly need to be protected by
>>'non_pmd_mutex'.
>> + * Every other instance will only be accessed by its own pmd
>>thread. */
>> +struct hmap port_cache;
>>  
>>  /* Only a pmd thread can write on its own 'cycles' and 'stats'.
>>   * The main thread keeps 'stats_zero' and 'cycles_zero' as base
>> @@ -494,7 +512,7 @@ dp_netdev_pmd_get_next(struct dp_netdev *dp, struct
>>cmap_position *pos);
>>  static void dp_netdev_destroy_all_pmds(struct dp_netdev *dp);
>>  static void dp_netdev_del_pmds_on_numa(struct dp_netdev *dp, int
>>numa_id);
>>  static void dp_netdev_set_pmds_on_numa(struct dp_netdev *dp, int
>>numa_id);
>> -static void dp_netdev_pmd_clear_poll_list(struct dp_netdev_pmd_thread
>>*pmd);
>> +static void dp_netdev_pmd_clear_ports(struct dp_netdev_pmd_thread
>>*pmd);
>>  static void 

Re: [ovs-dev] [PATCH v8 08/16] dpif-netdev: Add pmd thread local port cache for transmission.

2016-04-22 Thread Ilya Maximets
Without this we will lost connection to non-pmd ports:
--
diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c
index 26ef612..be1f291 100644
--- a/lib/dpif-netdev.c
+++ b/lib/dpif-netdev.c
@@ -3359,9 +3359,7 @@ dp_netdev_reset_pmd_threads(struct dp_netdev *dp)
 struct hmapx_node *node;
 
 HMAP_FOR_EACH (port, node, >ports) {
-if (netdev_is_pmd(port->netdev)) {
-dp_netdev_add_port_to_pmds__(dp, port, _reload);
-}
+dp_netdev_add_port_to_pmds__(dp, port, _reload);
 }
 
 HMAPX_FOR_EACH (node, _reload) {
--

Best regards, Ilya Maximets.

On 20.04.2016 01:28, diproiettod at vmware.com (Daniele Di Proietto) wrote:
> A future commit will stop using RCU for 'dp->ports' and use a mutex for
> reading/writing them.  To avoid taking a mutex in dp_execute_cb(), which
> is called in the fast path, this commit introduces a pmd thread local
> cache of ports.
> 
> The downside is that every port add/remove now needs to synchronize with
> every pmd thread.
> 
> Among the advantages, keeping a per thread port mapping could allow
> greater control over the txq assigment.
> 
> Signed-off-by: Daniele Di Proietto 
> ---
>  lib/dpif-netdev.c | 249 
> +++---
>  1 file changed, 179 insertions(+), 70 deletions(-)
> 
> diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c
> index cedaf39..bd2249e 100644
> --- a/lib/dpif-netdev.c
> +++ b/lib/dpif-netdev.c
> @@ -184,6 +184,7 @@ static bool dpcls_lookup(const struct dpcls *cls,
>   *
>   *dp_netdev_mutex (global)
>   *port_mutex
> + *non_pmd_mutex
>   */
>  struct dp_netdev {
>  const struct dpif_class *const class;
> @@ -379,6 +380,13 @@ struct rxq_poll {
>  struct ovs_list node;
>  };
>  
> +/* Contained by struct dp_netdev_pmd_thread's 'port_cache' or 'tx_ports'. */
> +struct tx_port {
> +odp_port_t port_no;
> +struct netdev *netdev;
> +struct hmap_node node;
> +};
> +
>  /* PMD: Poll modes drivers.  PMD accesses devices via polling to eliminate
>   * the performance overhead of interrupt processing.  Therefore netdev can
>   * not implement rx-wait for these devices.  dpif-netdev needs to poll
> @@ -405,8 +413,8 @@ struct dp_netdev_pmd_thread {
>  
>  /* Per thread exact-match cache.  Note, the instance for cpu core
>   * NON_PMD_CORE_ID can be accessed by multiple threads, and thusly
> - * need to be protected (e.g. by 'dp_netdev_mutex').  All other
> - * instances will only be accessed by its own pmd thread. */
> + * need to be protected by 'non_pmd_mutex'.  Every other instance
> + * will only be accessed by its own pmd thread. */
>  struct emc_cache flow_cache;
>  
>  /* Classifier and Flow-Table.
> @@ -435,10 +443,20 @@ struct dp_netdev_pmd_thread {
>  atomic_int tx_qid;  /* Queue id used by this pmd thread to
>   * send packets on all netdevs */
>  
> -struct ovs_mutex poll_mutex;/* Mutex for poll_list. */
> +struct ovs_mutex port_mutex;/* Mutex for 'poll_list' and 'tx_ports'. 
> */
>  /* List of rx queues to poll. */
>  struct ovs_list poll_list OVS_GUARDED;
> -int poll_cnt;   /* Number of elemints in poll_list. */
> +/* Number of elements in 'poll_list' */
> +int poll_cnt;
> +/* Map of 'tx_port's used for transmission.  Written by the main thread,
> + * read by the pmd thread. */
> +struct hmap tx_ports OVS_GUARDED;
> +
> +/* Map of 'tx_port' used in the fast path. This is a thread-local copy of
> + * 'tx_ports'. The instance for cpu core NON_PMD_CORE_ID can be accessed
> + * by multiple threads, and thusly need to be protected by 
> 'non_pmd_mutex'.
> + * Every other instance will only be accessed by its own pmd thread. */
> +struct hmap port_cache;
>  
>  /* Only a pmd thread can write on its own 'cycles' and 'stats'.
>   * The main thread keeps 'stats_zero' and 'cycles_zero' as base
> @@ -494,7 +512,7 @@ dp_netdev_pmd_get_next(struct dp_netdev *dp, struct 
> cmap_position *pos);
>  static void dp_netdev_destroy_all_pmds(struct dp_netdev *dp);
>  static void dp_netdev_del_pmds_on_numa(struct dp_netdev *dp, int numa_id);
>  static void dp_netdev_set_pmds_on_numa(struct dp_netdev *dp, int numa_id);
> -static void dp_netdev_pmd_clear_poll_list(struct dp_netdev_pmd_thread *pmd);
> +static void dp_netdev_pmd_clear_ports(struct dp_netdev_pmd_thread *pmd);
>  static void dp_netdev_del_port_from_all_pmds(struct dp_netdev *dp,
>   struct dp_netdev_port *port);
>  static void
> @@ -508,6 +526,8 @@ static void dp_netdev_reset_pmd_threads(struct dp_netdev 
> *dp);
>  static bool dp_netdev_pmd_try_ref(struct dp_netdev_pmd_thread *pmd);
>  static void dp_netdev_pmd_unref(struct dp_netdev_pmd_thread *pmd);
>  static void dp_netdev_pmd_flow_flush(struct dp_netdev_pmd_thread 

[ovs-dev] [PATCH v8 08/16] dpif-netdev: Add pmd thread local port cache for transmission.

2016-04-19 Thread Daniele Di Proietto
A future commit will stop using RCU for 'dp->ports' and use a mutex for
reading/writing them.  To avoid taking a mutex in dp_execute_cb(), which
is called in the fast path, this commit introduces a pmd thread local
cache of ports.

The downside is that every port add/remove now needs to synchronize with
every pmd thread.

Among the advantages, keeping a per thread port mapping could allow
greater control over the txq assigment.

Signed-off-by: Daniele Di Proietto 
---
 lib/dpif-netdev.c | 249 +++---
 1 file changed, 179 insertions(+), 70 deletions(-)

diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c
index cedaf39..bd2249e 100644
--- a/lib/dpif-netdev.c
+++ b/lib/dpif-netdev.c
@@ -184,6 +184,7 @@ static bool dpcls_lookup(const struct dpcls *cls,
  *
  *dp_netdev_mutex (global)
  *port_mutex
+ *non_pmd_mutex
  */
 struct dp_netdev {
 const struct dpif_class *const class;
@@ -379,6 +380,13 @@ struct rxq_poll {
 struct ovs_list node;
 };
 
+/* Contained by struct dp_netdev_pmd_thread's 'port_cache' or 'tx_ports'. */
+struct tx_port {
+odp_port_t port_no;
+struct netdev *netdev;
+struct hmap_node node;
+};
+
 /* PMD: Poll modes drivers.  PMD accesses devices via polling to eliminate
  * the performance overhead of interrupt processing.  Therefore netdev can
  * not implement rx-wait for these devices.  dpif-netdev needs to poll
@@ -405,8 +413,8 @@ struct dp_netdev_pmd_thread {
 
 /* Per thread exact-match cache.  Note, the instance for cpu core
  * NON_PMD_CORE_ID can be accessed by multiple threads, and thusly
- * need to be protected (e.g. by 'dp_netdev_mutex').  All other
- * instances will only be accessed by its own pmd thread. */
+ * need to be protected by 'non_pmd_mutex'.  Every other instance
+ * will only be accessed by its own pmd thread. */
 struct emc_cache flow_cache;
 
 /* Classifier and Flow-Table.
@@ -435,10 +443,20 @@ struct dp_netdev_pmd_thread {
 atomic_int tx_qid;  /* Queue id used by this pmd thread to
  * send packets on all netdevs */
 
-struct ovs_mutex poll_mutex;/* Mutex for poll_list. */
+struct ovs_mutex port_mutex;/* Mutex for 'poll_list' and 'tx_ports'. */
 /* List of rx queues to poll. */
 struct ovs_list poll_list OVS_GUARDED;
-int poll_cnt;   /* Number of elemints in poll_list. */
+/* Number of elements in 'poll_list' */
+int poll_cnt;
+/* Map of 'tx_port's used for transmission.  Written by the main thread,
+ * read by the pmd thread. */
+struct hmap tx_ports OVS_GUARDED;
+
+/* Map of 'tx_port' used in the fast path. This is a thread-local copy of
+ * 'tx_ports'. The instance for cpu core NON_PMD_CORE_ID can be accessed
+ * by multiple threads, and thusly need to be protected by 'non_pmd_mutex'.
+ * Every other instance will only be accessed by its own pmd thread. */
+struct hmap port_cache;
 
 /* Only a pmd thread can write on its own 'cycles' and 'stats'.
  * The main thread keeps 'stats_zero' and 'cycles_zero' as base
@@ -494,7 +512,7 @@ dp_netdev_pmd_get_next(struct dp_netdev *dp, struct 
cmap_position *pos);
 static void dp_netdev_destroy_all_pmds(struct dp_netdev *dp);
 static void dp_netdev_del_pmds_on_numa(struct dp_netdev *dp, int numa_id);
 static void dp_netdev_set_pmds_on_numa(struct dp_netdev *dp, int numa_id);
-static void dp_netdev_pmd_clear_poll_list(struct dp_netdev_pmd_thread *pmd);
+static void dp_netdev_pmd_clear_ports(struct dp_netdev_pmd_thread *pmd);
 static void dp_netdev_del_port_from_all_pmds(struct dp_netdev *dp,
  struct dp_netdev_port *port);
 static void
@@ -508,6 +526,8 @@ static void dp_netdev_reset_pmd_threads(struct dp_netdev 
*dp);
 static bool dp_netdev_pmd_try_ref(struct dp_netdev_pmd_thread *pmd);
 static void dp_netdev_pmd_unref(struct dp_netdev_pmd_thread *pmd);
 static void dp_netdev_pmd_flow_flush(struct dp_netdev_pmd_thread *pmd);
+static void pmd_load_cached_ports(struct dp_netdev_pmd_thread *pmd)
+OVS_REQUIRES(pmd->port_mutex);
 
 static inline bool emc_entry_alive(struct emc_entry *ce);
 static void emc_clear_entry(struct emc_entry *ce);
@@ -690,7 +710,7 @@ pmd_info_show_rxq(struct ds *reply, struct 
dp_netdev_pmd_thread *pmd)
 ds_put_format(reply, "pmd thread numa_id %d core_id %u:\n",
   pmd->numa_id, pmd->core_id);
 
-ovs_mutex_lock(>poll_mutex);
+ovs_mutex_lock(>port_mutex);
 LIST_FOR_EACH (poll, node, >poll_list) {
 const char *name = netdev_get_name(poll->port->netdev);
 
@@ -704,7 +724,7 @@ pmd_info_show_rxq(struct ds *reply, struct 
dp_netdev_pmd_thread *pmd)
 ds_put_format(reply, " %d", netdev_rxq_get_queue_id(poll->rx));
 prev_name = name;
 }
-ovs_mutex_unlock(>poll_mutex);
+