Implement the initialization logic for AF_PACKET based netdev
endpoints in filter-redirector:

1. filter_redirector_netdev_setup(): Creates and binds AF_PACKET
   sockets to the specified TAP interface. For in_netdev, the socket
   is used to receive packets; for out_netdev, it is used to send.

2. filter_redirector_netdev_read(): Async handler for reading packets
   from the in_netdev AF_PACKET socket. Packets are forwarded through
   the redirector chain.

3. Updated cleanup to properly close AF_PACKET sockets and free
   associated buffers.

4. Modified allow_send_when_stopped logic to consider both chardev
   and netdev output endpoints, and to only enable when the
   redirector is active (status=on).

5. VM state change handler now manages the AF_PACKET read handler
   activation based on VM running state and enable_when_stopped.

Signed-off-by: Cindy Lu <[email protected]>
---
 net/filter-mirror.c | 241 ++++++++++++++++++++++++++++++++++++++++----
 1 file changed, 224 insertions(+), 17 deletions(-)

diff --git a/net/filter-mirror.c b/net/filter-mirror.c
index 37035f3892..f8001612ec 100644
--- a/net/filter-mirror.c
+++ b/net/filter-mirror.c
@@ -26,6 +26,13 @@
 #include "qemu/sockets.h"
 #include "block/aio-wait.h"
 #include "system/runstate.h"
+#include "net/tap.h"
+#include "net/tap_int.h"
+
+#include <sys/socket.h>
+#include <net/if.h>
+#include <linux/if_packet.h>
+#include <netinet/if_ether.h>
 
 typedef struct MirrorState MirrorState;
 DECLARE_INSTANCE_CHECKER(MirrorState, FILTER_MIRROR,
@@ -42,6 +49,10 @@ struct MirrorState {
     char *outdev;
     char *in_netdev;
     char *out_netdev;
+    NetClientState *out_net;
+    int in_netfd;
+    uint8_t *in_netbuf;
+    int out_netfd;
     CharFrontend chr_in;
     CharFrontend chr_out;
     SocketReadState rs;
@@ -172,6 +183,17 @@ static int redirector_chr_can_read(void *opaque)
     return REDIRECTOR_MAX_LEN;
 }
 
+static bool filter_redirector_input_active(NetFilterState *nf, bool enable)
+{
+    MirrorState *s = FILTER_REDIRECTOR(nf);
+
+    if (!enable) {
+        return false;
+    }
+
+    return runstate_is_running() || s->enable_when_stopped;
+}
+
 static void redirector_chr_read(void *opaque, const uint8_t *buf, int size)
 {
     NetFilterState *nf = opaque;
@@ -208,6 +230,40 @@ static void redirector_chr_event(void *opaque, 
QEMUChrEvent event)
     }
 }
 
+static void filter_redirector_netdev_read(void *opaque)
+{
+    NetFilterState *nf = opaque;
+    MirrorState *s = FILTER_REDIRECTOR(nf);
+    struct sockaddr_ll sll;
+    socklen_t sll_len;
+    ssize_t len;
+
+    if (!s->in_netbuf || s->in_netfd < 0) {
+        return;
+    }
+
+    for (;;) {
+        sll_len = sizeof(sll);
+        len = recvfrom(s->in_netfd, s->in_netbuf, REDIRECTOR_MAX_LEN, 0,
+                       (struct sockaddr *)&sll, &sll_len);
+        if (len <= 0) {
+            break;
+        }
+
+        if (sll.sll_pkttype != PACKET_OUTGOING) {
+            continue;
+        }
+
+        redirector_to_filter(nf, s->in_netbuf, len);
+    }
+
+    if (len < 0 && errno != EAGAIN && errno != EWOULDBLOCK &&
+        errno != EINTR) {
+        error_report("filter redirector read in_netdev failed(%s)",
+                     strerror(errno));
+    }
+}
+
 static ssize_t filter_mirror_receive_iov(NetFilterState *nf,
                                          NetClientState *sender,
                                          unsigned flags,
@@ -268,7 +324,19 @@ static void filter_redirector_cleanup(NetFilterState *nf)
 
     qemu_chr_fe_deinit(&s->chr_in, false);
     qemu_chr_fe_deinit(&s->chr_out, false);
-    qemu_del_vm_change_state_handler(s->vmsentry);
+    if (s->vmsentry) {
+        qemu_del_vm_change_state_handler(s->vmsentry);
+        s->vmsentry = NULL;
+    }
+    if (s->in_netfd >= 0) {
+        qemu_set_fd_handler(s->in_netfd, NULL, NULL, NULL);
+        close(s->in_netfd);
+        s->in_netfd = -1;
+    }
+    if (s->out_netfd >= 0) {
+        close(s->out_netfd);
+        s->out_netfd = -1;
+    }
 
     if (nf->netdev) {
         nf->netdev->allow_send_when_stopped = 0;
@@ -320,13 +388,13 @@ 
filter_redirector_refresh_allow_send_when_stopped(NetFilterState *nf)
 
     /*
      * Allow sending when stopped if enable_when_stopped is set and we have
-     * an outdev. This must be independent of nf->on (status) so that packets
-     * can still flow through the filter chain to other filters even when this
-     * redirector is disabled. Otherwise, tap_send() will disable read_poll
-     * when qemu_can_send_packet() returns false, preventing further packet
-     * processing.
+     * a redirector output endpoint and the redirector is enabled.
+     * Keeping this active while redirector status=off can unexpectedly
+     * drain packets in migration stop windows and perturb vhost ring state.
      */
-    nc->allow_send_when_stopped = (s->enable_when_stopped && s->outdev);
+    nc->allow_send_when_stopped = (nf->on &&
+                                   s->enable_when_stopped &&
+                                   (s->outdev || s->out_netdev));
 }
 
 static void filter_redirector_vm_state_change(void *opaque, bool running,
@@ -335,8 +403,16 @@ static void filter_redirector_vm_state_change(void 
*opaque, bool running,
     NetFilterState *nf = opaque;
     MirrorState *s = FILTER_REDIRECTOR(nf);
     NetClientState *nc = nf->netdev;
+    bool active = filter_redirector_input_active(nf, nf->on);
+
+    if (s->in_netfd >= 0) {
+        qemu_set_fd_handler(s->in_netfd,
+                            active ? filter_redirector_netdev_read : NULL,
+                            NULL,
+                            active ? nf : NULL);
+    }
 
-    if (!running && s->enable_when_stopped && nc->info->read_poll) {
+    if (!running && nc && s->enable_when_stopped && nc->info->read_poll) {
         nc->info->read_poll(nc, true);
     }
 }
@@ -362,21 +438,127 @@ static void 
filter_redirector_maybe_enable_read_poll(NetFilterState *nf)
     }
 }
 
+static bool filter_redirector_netdev_setup(MirrorState *s, Error **errp)
+{
+    struct sockaddr_ll sll = { 0 };
+    char ifname[IFNAMSIZ] = { 0 };
+    int ifindex;
+    int fd;
+    NetClientState *nc;
+
+    if (s->in_netdev) {
+        int tapfd;
+        nc = qemu_find_netdev(s->in_netdev);
+        if (!nc) {
+            error_setg(errp, "in_netdev '%s' not found", s->in_netdev);
+            return false;
+        }
+
+        if (nc->info->type != NET_CLIENT_DRIVER_TAP) {
+            error_setg(errp, "in_netdev '%s' must be a TAP netdev",
+                       s->in_netdev);
+            return false;
+        }
+
+        tapfd = tap_get_fd(nc);
+        if (tapfd < 0 || tap_fd_get_ifname(tapfd, ifname) != 0) {
+            error_setg(errp, "failed to resolve TAP ifname for in_netdev '%s'",
+                       s->in_netdev);
+            return false;
+        }
+    } else if (s->out_netdev) {
+        nc = qemu_find_netdev(s->out_netdev);
+        if (!nc) {
+            error_setg(errp, "out_netdev '%s' not found", s->out_netdev);
+            return false;
+        }
+        /*
+         * out_netdev always uses AF_PACKET. For TAP netdev we resolve the
+         * interface name from tap fd; for non-TAP netdev we interpret
+         * out_netdev string as host interface name.
+         */
+        if (nc->info->type == NET_CLIENT_DRIVER_TAP) {
+            int tapfd = tap_get_fd(nc);
+
+            if (tapfd < 0 || tap_fd_get_ifname(tapfd, ifname) != 0) {
+                error_setg(errp,
+                           "failed to resolve TAP ifname for out_netdev '%s'",
+                           s->out_netdev);
+                return false;
+            }
+        } else {
+            snprintf(ifname, sizeof(ifname), "%s", s->out_netdev);
+        }
+    }
+
+    ifindex = if_nametoindex(ifname);
+    if (!ifindex) {
+        error_setg_errno(errp, errno,
+                         "failed to resolve ifindex for '%s'", ifname);
+        return false;
+    }
+
+    fd = qemu_socket(AF_PACKET, SOCK_RAW | SOCK_NONBLOCK, htons(ETH_P_ALL));
+    if (fd < 0) {
+        error_setg_errno(errp, errno, "failed to create AF_PACKET socket");
+        return false;
+    }
+
+    sll.sll_family = AF_PACKET;
+    sll.sll_ifindex = ifindex;
+    sll.sll_protocol = htons(ETH_P_ALL);
+    if (bind(fd, (struct sockaddr *)&sll, sizeof(sll)) < 0) {
+        error_setg_errno(errp, errno,
+                         "failed to bind AF_PACKET socket for ifname '%s'",
+                         ifname);
+        close(fd);
+        return false;
+    }
+
+    if (s->in_netdev) {
+        s->in_netfd = fd;
+        g_free(s->in_netbuf);
+        s->in_netbuf = g_malloc(REDIRECTOR_MAX_LEN);
+    } else {
+        s->out_netfd = fd;
+        s->out_net = nc;
+    }
+    return true;
+}
+
 static void filter_redirector_setup(NetFilterState *nf, Error **errp)
 {
     MirrorState *s = FILTER_REDIRECTOR(nf);
     Chardev *chr;
 
-    if (!s->indev && !s->outdev) {
-        error_setg(errp, "filter redirector needs 'indev' or "
-                   "'outdev' at least one property set");
+    if (!s->indev && !s->outdev && !s->in_netdev && !s->out_netdev) {
+        error_setg(errp, "filter redirector needs at least one of "
+                   "'indev', 'outdev', 'in_netdev', or 'out_netdev'");
+        return;
+    }
+
+    if (s->indev && s->in_netdev) {
+        error_setg(errp, "'indev' and 'in_netdev' cannot both be set "
+                   "for filter redirector");
+        return;
+    }
+
+    if (s->outdev && s->out_netdev) {
+        error_setg(errp, "'outdev' and 'out_netdev' cannot both be set "
+                   "for filter redirector");
+        return;
+    }
+
+    if (s->in_netdev && s->out_netdev) {
+        error_setg(errp, "'in_netdev' and 'out_netdev' cannot both be set "
+                   "for filter redirector");
+        return;
+    }
+
+    if (s->indev && s->outdev && !strcmp(s->indev, s->outdev)) {
+        error_setg(errp, "'indev' and 'outdev' could not be same "
+                   "for filter redirector");
         return;
-    } else if (s->indev && s->outdev) {
-        if (!strcmp(s->indev, s->outdev)) {
-            error_setg(errp, "'indev' and 'outdev' could not be same "
-                       "for filter redirector");
-            return;
-        }
     }
 
     net_socket_rs_init(&s->rs, redirector_rs_finalize, s->vnet_hdr);
@@ -412,9 +594,23 @@ static void filter_redirector_setup(NetFilterState *nf, 
Error **errp)
         }
     }
 
+    if (s->out_netdev || s->in_netdev) {
+        if (!filter_redirector_netdev_setup(s, errp)) {
+            return;
+        }
+    }
+
     s->vmsentry = qemu_add_vm_change_state_handler(
         filter_redirector_vm_state_change, nf);
 
+    if (s->in_netfd >= 0) {
+        bool active = filter_redirector_input_active(nf, nf->on);
+
+        qemu_set_fd_handler(s->in_netfd,
+                            active ? filter_redirector_netdev_read : NULL,
+                            NULL,
+                            active ? nf : NULL);
+    }
     filter_redirector_maybe_enable_read_poll(nf);
 
     filter_redirector_refresh_allow_send_when_stopped(nf);
@@ -423,6 +619,7 @@ static void filter_redirector_setup(NetFilterState *nf, 
Error **errp)
 static void filter_redirector_status_changed(NetFilterState *nf, Error **errp)
 {
     MirrorState *s = FILTER_REDIRECTOR(nf);
+    bool active = filter_redirector_input_active(nf, nf->on);
 
     if (s->indev) {
         if (nf->on) {
@@ -435,6 +632,13 @@ static void 
filter_redirector_status_changed(NetFilterState *nf, Error **errp)
         }
     }
 
+    if (s->in_netfd >= 0) {
+        qemu_set_fd_handler(s->in_netfd,
+                            active ? filter_redirector_netdev_read : NULL,
+                            NULL,
+                            active ? nf : NULL);
+    }
+
     if (nf->on) {
         filter_redirector_maybe_enable_read_poll(nf);
     }
@@ -665,6 +869,8 @@ static void filter_redirector_init(Object *obj)
     MirrorState *s = FILTER_REDIRECTOR(obj);
 
     s->vnet_hdr = false;
+    s->in_netfd = -1;
+    s->out_netfd = -1;
 }
 
 static void filter_mirror_fini(Object *obj)
@@ -682,6 +888,7 @@ static void filter_redirector_fini(Object *obj)
     g_free(s->outdev);
     g_free(s->in_netdev);
     g_free(s->out_netdev);
+    g_free(s->in_netbuf);
 }
 
 static const TypeInfo filter_redirector_info = {
-- 
2.52.0


Reply via email to