On Fri, 28 Jun 2024 00:32:40 +0000 Mina Almasry wrote:
> +/* Protected by rtnl_lock() */
> +static DEFINE_XARRAY_FLAGS(net_devmem_dmabuf_bindings, XA_FLAGS_ALLOC1);
> +
> +void net_devmem_unbind_dmabuf(struct net_devmem_dmabuf_binding *binding)
> +{
> +     struct netdev_rx_queue *rxq;
> +     unsigned long xa_idx;
> +     unsigned int rxq_idx;
> +
> +     if (!binding)
> +             return;

nit: I don't see how it can happen, no defensive programming, please

> +     if (binding->list.next)
> +             list_del(&binding->list);
> +
> +     xa_for_each(&binding->bound_rxq_list, xa_idx, rxq) {

nit: s/bound_rxq_list/bound_rxqs/ ? it's not a list

> +             if (rxq->mp_params.mp_priv == binding) {
> +                     /* We hold the rtnl_lock while binding/unbinding
> +                      * dma-buf, so we can't race with another thread that
> +                      * is also modifying this value. However, the page_pool
> +                      * may read this config while it's creating its
> +                      * rx-queues. WRITE_ONCE() here to match the
> +                      * READ_ONCE() in the page_pool.
> +                      */
> +                     WRITE_ONCE(rxq->mp_params.mp_priv, NULL);

Is this really sufficient in terms of locking? @binding is not
RCU-protected and neither is the reader guaranteed to be in 
an RCU critical section. Actually the "reader" tries to take a ref 
and use this struct so it's not even a pure reader.

Let's add a lock or use one of the existing locks

Or, perhaps time to add a mutex to struct net_device

> +                     rxq_idx = get_netdev_rx_queue_index(rxq);
> +
> +                     netdev_rx_queue_restart(binding->dev, rxq_idx);
> +             }
> +     }
> +
> +     xa_erase(&net_devmem_dmabuf_bindings, binding->id);
> +
> +     net_devmem_dmabuf_binding_put(binding);
> +}
> +
> +int net_devmem_bind_dmabuf_to_queue(struct net_device *dev, u32 rxq_idx,
> +                                 struct net_devmem_dmabuf_binding *binding)
> +{
> +     struct netdev_rx_queue *rxq;
> +     u32 xa_idx;
> +     int err;
> +
> +     if (rxq_idx >= dev->num_rx_queues)
> +             return -ERANGE;
> +
> +     rxq = __netif_get_rx_queue(dev, rxq_idx);
> +     if (rxq->mp_params.mp_priv)
> +             return -EEXIST;

Makes me wonder - do we need an API to unbind or we assume
application will only have one binding per socket and close 
it every time? I guess that's fine for future extension.

> +     err = xa_alloc(&binding->bound_rxq_list, &xa_idx, rxq, xa_limit_32b,
> +                    GFP_KERNEL);
> +     if (err)
> +             return err;
> +
> +     /* We hold the rtnl_lock while binding/unbinding dma-buf, so we can't
> +      * race with another thread that is also modifying this value. However,
> +      * the driver may read this config while it's creating its * rx-queues.
> +      * WRITE_ONCE() here to match the READ_ONCE() in the driver.
> +      */
> +     WRITE_ONCE(rxq->mp_params.mp_priv, binding);
> +
> +     err = netdev_rx_queue_restart(dev, rxq_idx);
> +     if (err)
> +             goto err_xa_erase;
> +
> +     return 0;
> +
> +err_xa_erase:
> +     WRITE_ONCE(rxq->mp_params.mp_priv, NULL);
> +     xa_erase(&binding->bound_rxq_list, xa_idx);
> +
> +     return err;
> +}
> +
> +int net_devmem_bind_dmabuf(struct net_device *dev, unsigned int dmabuf_fd,
> +                        struct net_devmem_dmabuf_binding **out)
> +{
> +     struct net_devmem_dmabuf_binding *binding;
> +     static u32 id_alloc_next;
> +     struct scatterlist *sg;
> +     struct dma_buf *dmabuf;
> +     unsigned int sg_idx, i;
> +     unsigned long virtual;
> +     int err;
> +
> +     dmabuf = dma_buf_get(dmabuf_fd);
> +     if (IS_ERR(dmabuf))
> +             return -EBADFD;

nit: I think error pointers are nicer than **out parameters :(
     you can ERR_CAST() all the DMABUF errors

> +     binding = kzalloc_node(sizeof(*binding), GFP_KERNEL,
> +                            dev_to_node(&dev->dev));
> +     if (!binding) {
> +             err = -ENOMEM;
> +             goto err_put_dmabuf;
> +     }
> +
> +     binding->dev = dev;
> +
> +     err = xa_alloc_cyclic(&net_devmem_dmabuf_bindings, &binding->id,
> +                           binding, xa_limit_32b, &id_alloc_next,
> +                           GFP_KERNEL);
> +     if (err < 0)
> +             goto err_free_binding;
> +
> +     xa_init_flags(&binding->bound_rxq_list, XA_FLAGS_ALLOC);
> +
> +     refcount_set(&binding->ref, 1);
> +
> +     binding->dmabuf = dmabuf;
> +
> +     binding->attachment = dma_buf_attach(binding->dmabuf, dev->dev.parent);
> +     if (IS_ERR(binding->attachment)) {
> +             err = PTR_ERR(binding->attachment);
> +             goto err_free_id;
> +     }

> -/* Stub */
>  int netdev_nl_bind_rx_doit(struct sk_buff *skb, struct genl_info *info)
>  {
> -     return 0;
> +     struct nlattr *tb[ARRAY_SIZE(netdev_queue_dmabuf_nl_policy)];
> +     struct net_devmem_dmabuf_binding *out_binding;
> +     struct list_head *sock_binding_list;
> +     u32 ifindex, dmabuf_fd, rxq_idx;
> +     struct net_device *netdev;
> +     struct sk_buff *rsp;
> +     struct nlattr *attr;
> +     int rem, err = 0;
> +     void *hdr;
> +
> +     if (GENL_REQ_ATTR_CHECK(info, NETDEV_A_DEV_IFINDEX) ||
> +         GENL_REQ_ATTR_CHECK(info, NETDEV_A_BIND_DMABUF_DMABUF_FD) ||
> +         GENL_REQ_ATTR_CHECK(info, NETDEV_A_BIND_DMABUF_QUEUES))
> +             return -EINVAL;
> +
> +     ifindex = nla_get_u32(info->attrs[NETDEV_A_DEV_IFINDEX]);
> +     dmabuf_fd = nla_get_u32(info->attrs[NETDEV_A_BIND_DMABUF_DMABUF_FD]);
> +
> +     rtnl_lock();
> +
> +     netdev = __dev_get_by_index(genl_info_net(info), ifindex);
> +     if (!netdev) {

 || !netif_device_present(netdev)

> +             err = -ENODEV;
> +             goto err_unlock;
> +     }
> +
> +     err = net_devmem_bind_dmabuf(netdev, dmabuf_fd, &out_binding);
> +     if (err)
> +             goto err_unlock;
> +
> +     nla_for_each_attr(attr, genlmsg_data(info->genlhdr),
> +                       genlmsg_len(info->genlhdr), rem) {
> +
> +             if (nla_type(attr) != NETDEV_A_BIND_DMABUF_QUEUES)
> +                     continue;

nit: nla_for_each_attr_type()

> +             err = nla_parse_nested(
> +                     tb, ARRAY_SIZE(netdev_queue_dmabuf_nl_policy) - 1, attr,
> +                     netdev_queue_dmabuf_nl_policy, info->extack);
> +             if (err < 0)
> +                     goto err_unbind;
> +
> +             rxq_idx = nla_get_u32(tb[NETDEV_A_QUEUE_DMABUF_IDX]);
> +
> +             err = net_devmem_bind_dmabuf_to_queue(netdev, rxq_idx,
> +                                                   out_binding);
> +             if (err)
> +                     goto err_unbind;
> +     }
> +
> +     sock_binding_list = genl_sk_priv_get(&netdev_nl_family,
> +                                          NETLINK_CB(skb).sk);
> +     if (IS_ERR(sock_binding_list)) {
> +             err = PTR_ERR(sock_binding_list);
> +             goto err_unbind;
> +     }
> +
> +     list_add(&out_binding->list, sock_binding_list);
> +
> +     rsp = genlmsg_new(GENLMSG_DEFAULT_SIZE, GFP_KERNEL);
> +     if (!rsp) {
> +             err = -ENOMEM;
> +             goto err_unbind;
> +     }
> +
> +     hdr = genlmsg_iput(rsp, info);
> +     if (!hdr) {
> +             err = -EMSGSIZE;
> +             goto err_genlmsg_free;
> +     }

I'd move genl_sk_priv_get(), genlmsg_new() and genlmsg_iput() before we
take rtnl_lock(), but I admit it's a bit late for this sort of
feedback.. :)

> +     nla_put_u32(rsp, NETDEV_A_BIND_DMABUF_DMABUF_ID, out_binding->id);
> +     genlmsg_end(rsp, hdr);
> +
> +     rtnl_unlock();
> +
> +     return genlmsg_reply(rsp, info);
> +
> +err_genlmsg_free:
> +     nlmsg_free(rsp);
> +err_unbind:
> +     net_devmem_unbind_dmabuf(out_binding);
> +err_unlock:
> +     rtnl_unlock();
> +     return err;
>  }

Reply via email to