> -----Original Message-----
> From: Koikkara Reeny, Shibin <shibin.koikkara.re...@intel.com>
> Sent: Wednesday, December 14, 2022 11:41 PM
> To: dev@dpdk.org; Burakov, Anatoly <anatoly.bura...@intel.com>;
> Richardson, Bruce <bruce.richard...@intel.com>
> Cc: Loftus, Ciara <ciara.lof...@intel.com>; Zhang, Qi Z
> <qi.z.zh...@intel.com>; Koikkara Reeny, Shibin
> <shibin.koikkara.re...@intel.com>
> Subject: [PATCH v2] net/af_xdp: AF_XDP PMD CNI Integration
> 
> Integrate support for the AF_XDP CNI and device plugin [1] so that the DPDK
> AF_XDP PMD can work in an unprivileged container environment.
> Part of the AF_XDP PMD initialization process involves loading an eBPF
> program onto the given netdev. This operation requires privileges, which
> prevents the PMD from being able to work in an unprivileged container
> (without root access). The plugin CNI handles the program loading. CNI open
> Unix Domain Socket (UDS) and waits listening for a client to make requests
> over that UDS. The client(DPDK) connects and a "handshake" occurs, then
> the File Descriptor which points to the XSKMAP associated with the loaded
> eBPF program is handed over to the client. The client can then proceed with
> creating an AF_XDP socket and inserting the socket into the XSKMAP pointed
> to by the FD received on the UDS.
> 
> A new vdev arg "use_cni" is created to indicate user wishes to run the PMD in
> unprivileged mode and to receive the XSKMAP FD from the CNI.
> When this flag is set, the XSK_LIBBPF_FLAGS__INHIBIT_PROG_LOAD libbpf
> flag should be used when creating the socket, which tells libbpf not to load
> the default libbpf program on the netdev. We tell libbpf not to do this
> because the loading is handled by the CNI in this scenario.
> 
> [1]: https://github.com/intel/afxdp-plugins-for-kubernetes
> 
> Signed-off-by: Shibin Koikkara Reeny <shibin.koikkara.re...@intel.com>
> ---
>  drivers/net/af_xdp/rte_eth_af_xdp.c | 337
> +++++++++++++++++++++++++++-
>  1 file changed, 325 insertions(+), 12 deletions(-)
> 
> diff --git a/drivers/net/af_xdp/rte_eth_af_xdp.c
> b/drivers/net/af_xdp/rte_eth_af_xdp.c
> index b6ec9bf490..196d98ad97 100644
> --- a/drivers/net/af_xdp/rte_eth_af_xdp.c
> +++ b/drivers/net/af_xdp/rte_eth_af_xdp.c
> @@ -7,6 +7,7 @@
>  #include <string.h>
>  #include <netinet/in.h>
>  #include <net/if.h>
> +#include <sys/un.h>
>  #include <sys/socket.h>
>  #include <sys/ioctl.h>
>  #include <linux/if_ether.h>
> @@ -81,6 +82,24 @@ RTE_LOG_REGISTER_DEFAULT(af_xdp_logtype,
> NOTICE);
> 
>  #define ETH_AF_XDP_MP_KEY "afxdp_mp_send_fds"
> 
> +#define MAX_LONG_OPT_SZ                      64
> +#define UDS_MAX_FD_NUM                       2
> +#define UDS_MAX_CMD_LEN                      64
> +#define UDS_MAX_CMD_RESP             128
> +#define UDS_XSK_MAP_FD_MSG           "/xsk_map_fd"
> +#define UDS_SOCK                     "/tmp/afxdp.sock"
> +#define UDS_CONNECT_MSG                      "/connect"
> +#define UDS_HOST_OK_MSG                      "/host_ok"
> +#define UDS_HOST_NAK_MSG             "/host_nak"
> +#define UDS_VERSION_MSG                      "/version"
> +#define UDS_XSK_MAP_FD_MSG           "/xsk_map_fd"
> +#define UDS_XSK_SOCKET_MSG           "/xsk_socket"
> +#define UDS_FD_ACK_MSG                       "/fd_ack"
> +#define UDS_FD_NAK_MSG                       "/fd_nak"
> +#define UDS_FIN_MSG                  "/fin"
> +#define UDS_FIN_ACK_MSG                      "/fin_ack"
> +
> +
>  static int afxdp_dev_count;
> 
>  /* Message header to synchronize fds via IPC */ @@ -151,6 +170,7 @@
> struct pmd_internals {
>       char prog_path[PATH_MAX];
>       bool custom_prog_configured;
>       bool force_copy;
> +     bool use_cni;
>       struct bpf_map *map;
> 
>       struct rte_ether_addr eth_addr;
> @@ -170,6 +190,7 @@ struct pmd_process_private {
>  #define ETH_AF_XDP_PROG_ARG                  "xdp_prog"
>  #define ETH_AF_XDP_BUDGET_ARG                        "busy_budget"
>  #define ETH_AF_XDP_FORCE_COPY_ARG            "force_copy"
> +#define ETH_AF_XDP_USE_CNI_ARG                       "use_cni"
> 
>  static const char * const valid_arguments[] = {
>       ETH_AF_XDP_IFACE_ARG,
> @@ -179,8 +200,8 @@ static const char * const valid_arguments[] = {
>       ETH_AF_XDP_PROG_ARG,
>       ETH_AF_XDP_BUDGET_ARG,
>       ETH_AF_XDP_FORCE_COPY_ARG,
> -     NULL
> -};
> +     ETH_AF_XDP_USE_CNI_ARG,
> +     NULL};
> 
>  static const struct rte_eth_link pmd_link = {
>       .link_speed = RTE_ETH_SPEED_NUM_10G,
> @@ -1129,7 +1150,8 @@ xsk_umem_info *xdp_umem_configure(struct
> pmd_internals *internals,
>               ret = xsk_umem__create(&umem->umem, base_addr,
> umem_size,
>                               &rxq->fq, &rxq->cq, &usr_config);
>               if (ret) {
> -                     AF_XDP_LOG(ERR, "Failed to create umem\n");
> +                     AF_XDP_LOG(ERR, "Failed to create umem [%d]:
> [%s]\n",
> +                                errno, strerror(errno));
>                       goto err;
>               }
>               umem->buffer = base_addr;
> @@ -1314,6 +1336,245 @@ configure_preferred_busy_poll(struct
> pkt_rx_queue *rxq)
>       return 0;
>  }
> 
> +static int
> +init_uds_sock(struct sockaddr_un *server) {
> +     int sock;
> +
> +     sock = socket(AF_UNIX, SOCK_SEQPACKET, 0);
> +     if (sock < 0) {
> +             AF_XDP_LOG(ERR, "Failed to opening stream socket\n");
> +             return -1;
> +     }
> +
> +     server->sun_family = AF_UNIX;
> +     strlcpy(server->sun_path, UDS_SOCK, sizeof(server->sun_path));
> +
> +     if (connect(sock, (struct sockaddr *)server, sizeof(struct
> sockaddr_un)) < 0) {

        seems the server address is hard coded as  "/tmp/afxdp.sock", is any 
spec we follows, or should we parse this as a devargs?
        better add some comment or external link that help to explain this

Reply via email to