Re: [ovs-dev] [ovs-dev,V2] netdev-dpdk: fix memory leak

2016-08-08 Thread Kavanagh, Mark B
>
>Thanks for the report, I didn't realize that the callback could come in the 
>same thread.
>

Likewise - thanks for the catch, Ilya.

>I sent a patch that I believe should fix the deadlock here:

Thanks for resolving the issue, Daniele.

>
>http://openvswitch.org/pipermail/dev/2016-August/077315.html
>
>2016-08-05 7:48 GMT-07:00 Ilya Maximets :
>On 04.08.2016 12:49, Mark Kavanagh wrote:
>> DPDK v16.07 introduces the ability to free memzones.
>> Up until this point, DPDK memory pools created in OVS could
>> not be destroyed, thus incurring a memory leak.
>>
>> Leverage the DPDK v16.07 rte_mempool API to free DPDK
>> mempools when their associated reference count reaches 0 (this
>> indicates that the memory pool is no longer in use).
>>
>> Signed-off-by: Mark Kavanagh 
>> ---
>>
>> v2->v1: rebase to head of master, and remove 'RFC' tag
>>
>>  lib/netdev-dpdk.c | 29 +++--
>>  1 file changed, 15 insertions(+), 14 deletions(-)
>>
>> diff --git a/lib/netdev-dpdk.c b/lib/netdev-dpdk.c
>> index aaac0d1..ffcd35c 100644
>> --- a/lib/netdev-dpdk.c
>> +++ b/lib/netdev-dpdk.c
>> @@ -506,7 +506,7 @@ dpdk_mp_get(int socket_id, int mtu) 
>> OVS_REQUIRES(dpdk_mutex)
>>  }
>>
>>  static void
>> -dpdk_mp_put(struct dpdk_mp *dmp)
>> +dpdk_mp_put(struct dpdk_mp *dmp) OVS_REQUIRES(dpdk_mutex)
>>  {
>>
>>      if (!dmp) {
>> @@ -514,15 +514,12 @@ dpdk_mp_put(struct dpdk_mp *dmp)
>>      }
>>
>>      dmp->refcount--;
>> -    ovs_assert(dmp->refcount >= 0);
>>
>> -#if 0
>> -    /* I could not find any API to destroy mp. */
>> -    if (dmp->refcount == 0) {
>> -        list_delete(dmp->list_node);
>> -        /* destroy mp-pool. */
>> -    }
>> -#endif
>> +    if (OVS_UNLIKELY(!dmp->refcount)) {
>> +        ovs_list_remove(>list_node);
>> +        rte_mempool_free(dmp->mp);
>> +     }
>> +
>>  }
>>
>>  static void
>> @@ -928,16 +925,18 @@ netdev_dpdk_destruct(struct netdev *netdev)
>>  {
>>      struct netdev_dpdk *dev = netdev_dpdk_cast(netdev);
>>
>> +    ovs_mutex_lock(_mutex);
>>      ovs_mutex_lock(>mutex);
>> +
>>      rte_eth_dev_stop(dev->port_id);
>>      free(ovsrcu_get_protected(struct ingress_policer *,
>>                                >ingress_policer));
>> -    ovs_mutex_unlock(>mutex);
>>
>> -    ovs_mutex_lock(_mutex);
>>      rte_free(dev->tx_q);
>>      ovs_list_remove(>list_node);
>>      dpdk_mp_put(dev->dpdk_mp);
>> +
>> +    ovs_mutex_unlock(>mutex);
>>      ovs_mutex_unlock(_mutex);
>>  }
>>
>> @@ -946,6 +945,9 @@ netdev_dpdk_vhost_destruct(struct netdev *netdev)
>>  {
>>      struct netdev_dpdk *dev = netdev_dpdk_cast(netdev);
>>
>> +    ovs_mutex_lock(_mutex);
>> +    ovs_mutex_lock(>mutex);
>> +
>>      /* Guest becomes an orphan if still attached. */
>>      if (netdev_dpdk_get_vid(dev) >= 0) {
>>          VLOG_ERR("Removing port '%s' while vhost device still attached.",
>> @@ -961,15 +963,14 @@ netdev_dpdk_vhost_destruct(struct netdev *netdev)
>>          fatal_signal_remove_file_to_unlink(dev->vhost_id);
>>      }
>>
>> -    ovs_mutex_lock(>mutex);
>>      free(ovsrcu_get_protected(struct ingress_policer *,
>>                                >ingress_policer));
>> -    ovs_mutex_unlock(>mutex);
>>
>> -    ovs_mutex_lock(_mutex);
>>      rte_free(dev->tx_q);
>>      ovs_list_remove(>list_node);
>>      dpdk_mp_put(dev->dpdk_mp);
>> +
>> +    ovs_mutex_unlock(>mutex);
>>      ovs_mutex_unlock(_mutex);
>>  }
>I agree that locking here was wrong but this change introduces issue because
>'rte_vhost_driver_unregister()' may call 'destroy_device()' and OVS will be 
>aborted
>on attempt to lock 'dpdk_mutex' again:
>
>VHOST_CONFIG: free connfd = 37 for device '/vhost1'
>ovs-vswitchd: lib/netdev-dpdk.c:2305: pthread_mutex_lock failed (Resource 
>deadlock avoided)
>
>Program received signal SIGABRT, Aborted.
>0x007fb7ad6d38 in raise () from /lib64/libc.so.6
>(gdb) bt
>#0  0x007fb7ad6d38 in raise () from /lib64/libc.so.6
>#1  0x007fb7ad8aa8 in abort () from /lib64/libc.so.6
>#2  0x00692be0 in ovs_abort_valist at lib/util.c:335
>#3  0x00692ba0 in ovs_abort at lib/util.c:327
>#4  0x00651800 in ovs_mutex_lock_at (l_=0x899ab0 , 
>where=0x78a458
>"lib/netdev-dpdk.c:2305") at lib/ovs-thread.c:76
>#5  0x006c0190 in destroy_device (vid=0) at lib/netdev-dpdk.c:2305
>#6  0x004ea850 in vhost_destroy_device ()
>#7  0x004ee578 in rte_vhost_driver_unregister ()
>#8  0x006bc8c8 in netdev_dpdk_vhost_destruct (netdev=0x7f6bffed00) at 
>lib/netdev-
>dpdk.c:944
>#9  0x005e4ad4 in netdev_unref (dev=0x7f6bffed00) at lib/netdev.c:499
>#10 0x005e4b9c in netdev_close (netdev=0x7f6bffed00) at 
>lib/netdev.c:523
>[...]
>#20 0x0053ad94 in main (argc=7, argv=0x7ff318) at 
>vswitchd/ovs-vswitchd.c:112
>
>May be reproduced by removing port while virtio still attached.
>This blocks reconnection feature and deletion of port while QEMU still 
>attached.
>
>Someone should fix 

Re: [ovs-dev] [ovs-dev,V2] netdev-dpdk: fix memory leak

2016-08-05 Thread Daniele Di Proietto
Thanks for the report, I didn't realize that the callback could come in the
same thread.

I sent a patch that I believe should fix the deadlock here:

http://openvswitch.org/pipermail/dev/2016-August/077315.html

2016-08-05 7:48 GMT-07:00 Ilya Maximets :

> On 04.08.2016 12:49, Mark Kavanagh wrote:
> > DPDK v16.07 introduces the ability to free memzones.
> > Up until this point, DPDK memory pools created in OVS could
> > not be destroyed, thus incurring a memory leak.
> >
> > Leverage the DPDK v16.07 rte_mempool API to free DPDK
> > mempools when their associated reference count reaches 0 (this
> > indicates that the memory pool is no longer in use).
> >
> > Signed-off-by: Mark Kavanagh 
> > ---
> >
> > v2->v1: rebase to head of master, and remove 'RFC' tag
> >
> >  lib/netdev-dpdk.c | 29 +++--
> >  1 file changed, 15 insertions(+), 14 deletions(-)
> >
> > diff --git a/lib/netdev-dpdk.c b/lib/netdev-dpdk.c
> > index aaac0d1..ffcd35c 100644
> > --- a/lib/netdev-dpdk.c
> > +++ b/lib/netdev-dpdk.c
> > @@ -506,7 +506,7 @@ dpdk_mp_get(int socket_id, int mtu)
> OVS_REQUIRES(dpdk_mutex)
> >  }
> >
> >  static void
> > -dpdk_mp_put(struct dpdk_mp *dmp)
> > +dpdk_mp_put(struct dpdk_mp *dmp) OVS_REQUIRES(dpdk_mutex)
> >  {
> >
> >  if (!dmp) {
> > @@ -514,15 +514,12 @@ dpdk_mp_put(struct dpdk_mp *dmp)
> >  }
> >
> >  dmp->refcount--;
> > -ovs_assert(dmp->refcount >= 0);
> >
> > -#if 0
> > -/* I could not find any API to destroy mp. */
> > -if (dmp->refcount == 0) {
> > -list_delete(dmp->list_node);
> > -/* destroy mp-pool. */
> > -}
> > -#endif
> > +if (OVS_UNLIKELY(!dmp->refcount)) {
> > +ovs_list_remove(>list_node);
> > +rte_mempool_free(dmp->mp);
> > + }
> > +
> >  }
> >
> >  static void
> > @@ -928,16 +925,18 @@ netdev_dpdk_destruct(struct netdev *netdev)
> >  {
> >  struct netdev_dpdk *dev = netdev_dpdk_cast(netdev);
> >
> > +ovs_mutex_lock(_mutex);
> >  ovs_mutex_lock(>mutex);
> > +
> >  rte_eth_dev_stop(dev->port_id);
> >  free(ovsrcu_get_protected(struct ingress_policer *,
> >>ingress_policer));
> > -ovs_mutex_unlock(>mutex);
> >
> > -ovs_mutex_lock(_mutex);
> >  rte_free(dev->tx_q);
> >  ovs_list_remove(>list_node);
> >  dpdk_mp_put(dev->dpdk_mp);
> > +
> > +ovs_mutex_unlock(>mutex);
> >  ovs_mutex_unlock(_mutex);
> >  }
> >
> > @@ -946,6 +945,9 @@ netdev_dpdk_vhost_destruct(struct netdev *netdev)
> >  {
> >  struct netdev_dpdk *dev = netdev_dpdk_cast(netdev);
> >
> > +ovs_mutex_lock(_mutex);
> > +ovs_mutex_lock(>mutex);
> > +
> >  /* Guest becomes an orphan if still attached. */
> >  if (netdev_dpdk_get_vid(dev) >= 0) {
> >  VLOG_ERR("Removing port '%s' while vhost device still
> attached.",
> > @@ -961,15 +963,14 @@ netdev_dpdk_vhost_destruct(struct netdev *netdev)
> >  fatal_signal_remove_file_to_unlink(dev->vhost_id);
> >  }
> >
> > -ovs_mutex_lock(>mutex);
> >  free(ovsrcu_get_protected(struct ingress_policer *,
> >>ingress_policer));
> > -ovs_mutex_unlock(>mutex);
> >
> > -ovs_mutex_lock(_mutex);
> >  rte_free(dev->tx_q);
> >  ovs_list_remove(>list_node);
> >  dpdk_mp_put(dev->dpdk_mp);
> > +
> > +ovs_mutex_unlock(>mutex);
> >  ovs_mutex_unlock(_mutex);
> >  }
>
> I agree that locking here was wrong but this change introduces issue
> because
> 'rte_vhost_driver_unregister()' may call 'destroy_device()' and OVS will
> be aborted
> on attempt to lock 'dpdk_mutex' again:
>
> VHOST_CONFIG: free connfd = 37 for device '/vhost1'
> ovs-vswitchd: lib/netdev-dpdk.c:2305: pthread_mutex_lock failed (Resource
> deadlock avoided)
>
> Program received signal SIGABRT, Aborted.
> 0x007fb7ad6d38 in raise () from /lib64/libc.so.6
> (gdb) bt
> #0  0x007fb7ad6d38 in raise () from /lib64/libc.so.6
> #1  0x007fb7ad8aa8 in abort () from /lib64/libc.so.6
> #2  0x00692be0 in ovs_abort_valist at lib/util.c:335
> #3  0x00692ba0 in ovs_abort at lib/util.c:327
> #4  0x00651800 in ovs_mutex_lock_at (l_=0x899ab0 ,
> where=0x78a458 "lib/netdev-dpdk.c:2305") at lib/ovs-thread.c:76
> #5  0x006c0190 in destroy_device (vid=0) at lib/netdev-dpdk.c:2305
> #6  0x004ea850 in vhost_destroy_device ()
> #7  0x004ee578 in rte_vhost_driver_unregister ()
> #8  0x006bc8c8 in netdev_dpdk_vhost_destruct (netdev=0x7f6bffed00)
> at lib/netdev-dpdk.c:944
> #9  0x005e4ad4 in netdev_unref (dev=0x7f6bffed00) at
> lib/netdev.c:499
> #10 0x005e4b9c in netdev_close (netdev=0x7f6bffed00) at
> lib/netdev.c:523
> [...]
> #20 0x0053ad94 in main (argc=7, argv=0x7ff318) at
> vswitchd/ovs-vswitchd.c:112
>
> May be reproduced by removing port while virtio still attached.
> This blocks reconnection feature and deletion of port while QEMU still
> 

Re: [ovs-dev] [ovs-dev,V2] netdev-dpdk: fix memory leak

2016-08-05 Thread Ilya Maximets
On 04.08.2016 12:49, Mark Kavanagh wrote:
> DPDK v16.07 introduces the ability to free memzones.
> Up until this point, DPDK memory pools created in OVS could
> not be destroyed, thus incurring a memory leak.
> 
> Leverage the DPDK v16.07 rte_mempool API to free DPDK
> mempools when their associated reference count reaches 0 (this
> indicates that the memory pool is no longer in use).
> 
> Signed-off-by: Mark Kavanagh 
> ---
> 
> v2->v1: rebase to head of master, and remove 'RFC' tag
> 
>  lib/netdev-dpdk.c | 29 +++--
>  1 file changed, 15 insertions(+), 14 deletions(-)
> 
> diff --git a/lib/netdev-dpdk.c b/lib/netdev-dpdk.c
> index aaac0d1..ffcd35c 100644
> --- a/lib/netdev-dpdk.c
> +++ b/lib/netdev-dpdk.c
> @@ -506,7 +506,7 @@ dpdk_mp_get(int socket_id, int mtu) 
> OVS_REQUIRES(dpdk_mutex)
>  }
>  
>  static void
> -dpdk_mp_put(struct dpdk_mp *dmp)
> +dpdk_mp_put(struct dpdk_mp *dmp) OVS_REQUIRES(dpdk_mutex)
>  {
>  
>  if (!dmp) {
> @@ -514,15 +514,12 @@ dpdk_mp_put(struct dpdk_mp *dmp)
>  }
>  
>  dmp->refcount--;
> -ovs_assert(dmp->refcount >= 0);
>  
> -#if 0
> -/* I could not find any API to destroy mp. */
> -if (dmp->refcount == 0) {
> -list_delete(dmp->list_node);
> -/* destroy mp-pool. */
> -}
> -#endif
> +if (OVS_UNLIKELY(!dmp->refcount)) {
> +ovs_list_remove(>list_node);
> +rte_mempool_free(dmp->mp);
> + }
> +
>  }
>  
>  static void
> @@ -928,16 +925,18 @@ netdev_dpdk_destruct(struct netdev *netdev)
>  {
>  struct netdev_dpdk *dev = netdev_dpdk_cast(netdev);
>  
> +ovs_mutex_lock(_mutex);
>  ovs_mutex_lock(>mutex);
> +
>  rte_eth_dev_stop(dev->port_id);
>  free(ovsrcu_get_protected(struct ingress_policer *,
>>ingress_policer));
> -ovs_mutex_unlock(>mutex);
>  
> -ovs_mutex_lock(_mutex);
>  rte_free(dev->tx_q);
>  ovs_list_remove(>list_node);
>  dpdk_mp_put(dev->dpdk_mp);
> +
> +ovs_mutex_unlock(>mutex);
>  ovs_mutex_unlock(_mutex);
>  }
>  
> @@ -946,6 +945,9 @@ netdev_dpdk_vhost_destruct(struct netdev *netdev)
>  {
>  struct netdev_dpdk *dev = netdev_dpdk_cast(netdev);
>  
> +ovs_mutex_lock(_mutex);
> +ovs_mutex_lock(>mutex);
> +
>  /* Guest becomes an orphan if still attached. */
>  if (netdev_dpdk_get_vid(dev) >= 0) {
>  VLOG_ERR("Removing port '%s' while vhost device still attached.",
> @@ -961,15 +963,14 @@ netdev_dpdk_vhost_destruct(struct netdev *netdev)
>  fatal_signal_remove_file_to_unlink(dev->vhost_id);
>  }
>  
> -ovs_mutex_lock(>mutex);
>  free(ovsrcu_get_protected(struct ingress_policer *,
>>ingress_policer));
> -ovs_mutex_unlock(>mutex);
>  
> -ovs_mutex_lock(_mutex);
>  rte_free(dev->tx_q);
>  ovs_list_remove(>list_node);
>  dpdk_mp_put(dev->dpdk_mp);
> +
> +ovs_mutex_unlock(>mutex);
>  ovs_mutex_unlock(_mutex);
>  }

I agree that locking here was wrong but this change introduces issue because
'rte_vhost_driver_unregister()' may call 'destroy_device()' and OVS will be 
aborted
on attempt to lock 'dpdk_mutex' again:

VHOST_CONFIG: free connfd = 37 for device '/vhost1'
ovs-vswitchd: lib/netdev-dpdk.c:2305: pthread_mutex_lock failed (Resource 
deadlock avoided)

Program received signal SIGABRT, Aborted.
0x007fb7ad6d38 in raise () from /lib64/libc.so.6
(gdb) bt
#0  0x007fb7ad6d38 in raise () from /lib64/libc.so.6
#1  0x007fb7ad8aa8 in abort () from /lib64/libc.so.6
#2  0x00692be0 in ovs_abort_valist at lib/util.c:335
#3  0x00692ba0 in ovs_abort at lib/util.c:327
#4  0x00651800 in ovs_mutex_lock_at (l_=0x899ab0 , 
where=0x78a458 "lib/netdev-dpdk.c:2305") at lib/ovs-thread.c:76
#5  0x006c0190 in destroy_device (vid=0) at lib/netdev-dpdk.c:2305
#6  0x004ea850 in vhost_destroy_device ()
#7  0x004ee578 in rte_vhost_driver_unregister ()
#8  0x006bc8c8 in netdev_dpdk_vhost_destruct (netdev=0x7f6bffed00) at 
lib/netdev-dpdk.c:944
#9  0x005e4ad4 in netdev_unref (dev=0x7f6bffed00) at lib/netdev.c:499
#10 0x005e4b9c in netdev_close (netdev=0x7f6bffed00) at lib/netdev.c:523
[...]
#20 0x0053ad94 in main (argc=7, argv=0x7ff318) at 
vswitchd/ovs-vswitchd.c:112

May be reproduced by removing port while virtio still attached.
This blocks reconnection feature and deletion of port while QEMU still attached.

Someone should fix this. Any thoughts?

Best regards, Ilya Maximets.
___
dev mailing list
dev@openvswitch.org
http://openvswitch.org/mailman/listinfo/dev