On Tue, Nov 11, 2025 at 11:40:13AM -0500, Aaron Conole wrote:
> Adrián Moreno <[email protected]> writes:
>
> > On Tue, Sep 30, 2025 at 12:46:58PM -0400, Aaron Conole wrote:
> >> This commit wires up support for the socket() action, which will
> >> create a recirculation on the datapath to bypass as many extra
> >> recirculations as possible before the output.  The rough idea is
> >> that a data packet can 'bypass' certain parts of the networking
> >> stack and experience a faster delivery.  This makes some of the
> >> tracking features (for example, simple tcpdump type operations)
> >> much more complicated because the underlying datapath will not
> >> use the complete routing / sockets layer in datapath, but it
> >> instead directly will queue the data portion of the packet to
> >> the socket buffer.  For non-data packets, the socket() action
> >> will hit the 'else' directive and take the recirc chain.
> >>
> >> One area that still needs enhancement is the ct() path.  The
> >> conntrack portions are only doing the socket call after the
> >> recirculation chain.  However, the proper way to do this is
> >> to rewrite the original recirculation state to hit the socket
> >> path first.  That will require quite a bit more context and
> >> logic in the compose ct action, so it isn't done here, and
> >> it isn't recommended to use this with a really complex flow
> >> pipeline yet (since this is still a WIP).  Future work will
> >> focus on this area so that an existing flow after freezing
> >> that looks like:
> >>
> >>   recirc_id(0),eth(...),eth_type(...),ipv4(...),
> >>     actions=ct(commit,nat(dst=1.2.3.4)),recirc(0x1)
> >>
> >> would be rewritten as:
> >>
> >>   recirc_id(0),eth(...),eth_type(...),ipv4(...),...
> >>     actions=socket(netns=...,inode=...,else(recirc(1))
> >>   recirc_id(0x1),eth(...)...
> >>     actions=ct(commit,nat(dst=1.2.3.4)),recirc(0x2)
> >>
> >> And then the corresponding output could get generated later.
> >>
> >> Signed-off-by: Aaron Conole <[email protected]>
> >> ---
> >>  lib/dpif-netdev.c            |  24 +++++-
> >>  lib/netdev-dummy.c           |  73 +++++++++++++++++
> >>  ofproto/ofproto-dpif-rid.h   |   1 +
> >>  ofproto/ofproto-dpif-xlate.c | 153 +++++++++++++++++++++++++++++++++++
> >>  ofproto/ofproto-dpif-xlate.h |   9 +++
> >>  tests/ofproto-dpif.at        |  81 +++++++++++++++++++
> >>  6 files changed, 340 insertions(+), 1 deletion(-)
> >>
> >> diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c
> >> index 1013d736eb..8c0c46c560 100644
> >> --- a/lib/dpif-netdev.c
> >> +++ b/lib/dpif-netdev.c
> >> @@ -9186,6 +9186,29 @@ dp_execute_cb(void *aux_, struct dp_packet_batch 
> >> *packets_,
> >>      uint32_t packet_count, packets_dropped;
> >>
> >>      switch ((enum ovs_action_attr)type) {
> >> +    case OVS_ACTION_ATTR_SOCKET: {
> >> +        /* Since socket isn't supported, but there's a fallback branch, 
> >> we will
> >> +         * execute the fallback side of the socket call.  In the future, 
> >> it
> >> +         * may be feasible to implement via an AF_XDP type socket. */
> >> +        static const struct nl_policy ovs_sock_act_policy[] = {
> >> +            [OVS_SOCKET_ACTION_ATTR_NETNS_ID] = { .type = NL_A_U32 },
> >> +            [OVS_SOCKET_ACTION_ATTR_INODE]    = { .type = NL_A_U64 },
> >> +            [OVS_SOCKET_ACTION_ATTR_ACTIONS]  = { .type = NL_A_NESTED },
> >> +        };
> >> +        struct nlattr *sock_act[ARRAY_SIZE(ovs_sock_act_policy)];
> >> +        if (!nl_parse_nested(a, ovs_sock_act_policy, sock_act,
> >> +                             ARRAY_SIZE(sock_act))) {
> >> +            VLOG_ERR("Unable to parse socket type.");
> >> +            return;
> >> +        }
> >> +
> >> +        struct nlattr *acts = sock_act[OVS_SOCKET_ACTION_ATTR_ACTIONS];
> >> +        dp_netdev_execute_actions(aux->pmd, packets_, should_steal,
> >> +                                  aux->flow, nl_attr_get(acts),
> >> +                                  nl_attr_get_size(acts));
> >> +        return;
> >> +    }
> >> +
> >>      case OVS_ACTION_ATTR_OUTPUT:
> >>          dp_execute_output_action(pmd, packets_, should_steal,
> >>                                   nl_attr_get_odp_port(a));
> >> @@ -9495,7 +9518,6 @@ dp_execute_cb(void *aux_, struct dp_packet_batch 
> >> *packets_,
> >>      case OVS_ACTION_ATTR_ADD_MPLS:
> >>      case OVS_ACTION_ATTR_DEC_TTL:
> >>      case OVS_ACTION_ATTR_PSAMPLE:
> >> -    case OVS_ACTION_ATTR_SOCKET:
> >>      case __OVS_ACTION_ATTR_MAX:
> >>          OVS_NOT_REACHED();
> >>      }
> >> diff --git a/lib/netdev-dummy.c b/lib/netdev-dummy.c
> >> index b72820fcc5..244c58869f 100644
> >> --- a/lib/netdev-dummy.c
> >> +++ b/lib/netdev-dummy.c
> >> @@ -174,6 +174,9 @@ struct netdev_dummy {
> >>
> >>      /* Set the segment size for netdev TSO support. */
> >>      int ol_tso_segsz OVS_GUARDED;
> >> +
> >> +    /* Socket lookup functionality state. */
> >> +    bool socket_lookup_enabled OVS_GUARDED;
> >>  };
> >>
> >>  /* Max 'recv_queue_len' in struct netdev_dummy. */
> >> @@ -739,6 +742,7 @@ netdev_dummy_construct(struct netdev *netdev_)
> >>      netdev->requested_n_rxq = netdev_->n_rxq;
> >>      netdev->requested_n_txq = netdev_->n_txq;
> >>      netdev->numa_id = 0;
> >> +    netdev->socket_lookup_enabled = false;
> >>
> >>      memset(&netdev->custom_stats, 0, sizeof(netdev->custom_stats));
> >>
> >> @@ -1796,6 +1800,71 @@ exit:
> >>      return error ? -1 : 0;
> >>  }
> >>
> >> +static int
> >> +netdev_dummy_get_target_ns(const struct netdev *netdev OVS_UNUSED,
> >> +                           int *target_ns)
> >> +{
> >> +    /* For dummy devices, return a fixed hash value instead of real
> >> +     * namespace ID. */
> >> +    *target_ns = 0x12345678;
> >> +    return 0;
> >> +}
> >> +
> >> +static int
> >> +netdev_dummy_set_socket_lookup_enabled(struct netdev *netdev_, bool 
> >> enabled)
> >> +{
> >> +    struct netdev_dummy *netdev = netdev_dummy_cast(netdev_);
> >> +
> >> +    ovs_mutex_lock(&netdev->mutex);
> >> +    netdev->socket_lookup_enabled = enabled;
> >> +    ovs_mutex_unlock(&netdev->mutex);
> >> +
> >> +    return 0;
> >> +}
> >> +
> >> +static bool
> >> +netdev_dummy_get_socket_lookup_enabled(const struct netdev *netdev_)
> >> +{
> >> +    struct netdev_dummy *netdev = netdev_dummy_cast(netdev_);
> >> +    bool enabled;
> >> +
> >> +    ovs_mutex_lock(&netdev->mutex);
> >> +    enabled = netdev->socket_lookup_enabled;
> >> +    ovs_mutex_unlock(&netdev->mutex);
> >> +
> >> +    return enabled;
> >> +}
> >> +
> >> +static int
> >> +netdev_dummy_get_socket_inode(const struct netdev *netdev OVS_UNUSED,
> >> +                              int proto, int af,
> >> +                              const void *src,
> >> +                              ovs_be16 sport,
> >> +                              const void *dst,
> >> +                              ovs_be16 dport,
> >> +                              uint64_t *inode_out, uint64_t *netns_out)
> >> +{
> >> +    /* For dummy devices, return hashed values instead of real 
> >> inode/netns */
> >> +    if (proto != IPPROTO_TCP) {
> >> +        return ENOENT;
> >> +    }
> >> +
> >> +    if (af == AF_INET) {
> >> +        uint64_t inode_hash = hash_2words(*(uint32_t *) src,
> >> +                                          *(uint32_t *) dst);
> >> +        inode_hash = inode_hash << 32;
> >> +        inode_hash |=
> >> +            ((OVS_FORCE uint16_t) sport << 16) |
> >> +            ((OVS_FORCE uint16_t) dport);
> >> +
> >> +        *inode_out = inode_hash;
> >> +        *netns_out = 0x12345678;
> >> +    } else {
> >> +        return ENOENT;
> >> +    }
> >> +    return 0;
> >> +}
> >> +
> >>  #define NETDEV_DUMMY_CLASS_COMMON                       \
> >>      .run = netdev_dummy_run,                            \
> >>      .wait = netdev_dummy_wait,                          \
> >> @@ -1822,6 +1891,10 @@ exit:
> >>      .dump_queue_stats = netdev_dummy_dump_queue_stats,  \
> >>      .get_addr_list = netdev_dummy_get_addr_list,        \
> >>      .update_flags = netdev_dummy_update_flags,          \
> >> +    .get_target_ns = netdev_dummy_get_target_ns,        \
> >> +    .set_socket_lookup_enabled = netdev_dummy_set_socket_lookup_enabled, \
> >> +    .get_socket_lookup_enabled = netdev_dummy_get_socket_lookup_enabled, \
> >> +    .get_socket_inode = netdev_dummy_get_socket_inode,  \
> >>      .rxq_alloc = netdev_dummy_rxq_alloc,                \
> >>      .rxq_construct = netdev_dummy_rxq_construct,        \
> >>      .rxq_destruct = netdev_dummy_rxq_destruct,          \
> >> diff --git a/ofproto/ofproto-dpif-rid.h b/ofproto/ofproto-dpif-rid.h
> >> index 4df630c62b..f899a83260 100644
> >> --- a/ofproto/ofproto-dpif-rid.h
> >> +++ b/ofproto/ofproto-dpif-rid.h
> >> @@ -155,6 +155,7 @@ struct frozen_state {
> >>      bool conntracked;             /* Conntrack occurred prior to freeze. 
> >> */
> >>      bool was_mpls;                /* MPLS packet */
> >>      struct uuid xport_uuid;       /* UUID of 1st port packet received on. 
> >> */
> >> +    bool socket_attempt;          /* A socket output was already 
> >> attempted. */
> >>
> >>      /* Actions to be translated when thawing. */
> >>      struct ofpact *ofpacts;
> >> diff --git a/ofproto/ofproto-dpif-xlate.c b/ofproto/ofproto-dpif-xlate.c
> >> index 2c8197fb73..2ac160507c 100644
> >> --- a/ofproto/ofproto-dpif-xlate.c
> >> +++ b/ofproto/ofproto-dpif-xlate.c
> >> @@ -4456,6 +4456,146 @@ terminate_native_tunnel(struct xlate_ctx *ctx, 
> >> const struct xport *xport,
> >>      return *tnl_port != ODPP_NONE;
> >>  }
> >>
> >> +static bool should_track_socket_flows(struct xlate_ctx *ctx,
> >> +                                      const struct xport *xport)
> >> +{
> >> +    struct flow *flow = &ctx->xin->flow;
> >> +
> >> +    /* Only for valid ports (also need to check the addresses involved). 
> >> */
> >> +    return ((xport && xport->netdev &&
> >> +             netdev_get_socket_lookup_enabled(xport->netdev)) &&
> >> +            (flow->dl_type == htons(ETH_TYPE_IP) ||
> >> +             flow->dl_type == htons(ETH_TYPE_IPV6)) &&
> >> +            (flow->nw_proto == IPPROTO_TCP) &&
> >> +            (flow->tp_src && flow->tp_dst));
> >> +}
> >> +
> >> +static void
> >> +unwildcard_socket_flow(struct xlate_ctx *ctx)
> >> +{
> >> +    struct flow_wildcards *wc = ctx->wc;
> >> +    struct flow *flow = &ctx->xin->flow;
> >> +
> >> +    /* Protocol and TCP ports are common */
> >> +    memset(&wc->masks.nw_proto, 0xff, sizeof wc->masks.nw_proto);
> >> +    memset(&wc->masks.tp_src, 0xff, sizeof wc->masks.tp_src);
> >> +    memset(&wc->masks.tp_dst, 0xff, sizeof wc->masks.tp_dst);
> >> +
> >> +    /* IP address fields */
> >> +    if (flow->dl_type == htons(ETH_TYPE_IP)) {
> >> +        /* IPv4 */
> >> +        wc->masks.nw_src = OVS_BE32_MAX;
> >> +        wc->masks.nw_dst = OVS_BE32_MAX;
> >> +    } else if (flow->dl_type == htons(ETH_TYPE_IPV6)) {
> >> +        /* IPv6 */
> >> +        memset(&wc->masks.ipv6_src, 0xff, sizeof wc->masks.ipv6_src);
> >> +        memset(&wc->masks.ipv6_dst, 0xff, sizeof wc->masks.ipv6_dst);
> >> +    }
> >> +
> >> +    /* Eth fields. */
> >> +    memset(&wc->masks.dl_type, 0xff, sizeof wc->masks.dl_type);
> >> +    memset(&wc->masks.dl_src, 0xff, ETH_ADDR_LEN);
> >> +    memset(&wc->masks.dl_dst, 0xff, ETH_ADDR_LEN);
> >> +}
> >> +
> >> +static void
> >> +compose_socket_action_with_fallback(struct ofpbuf *actions, uint32_t 
> >> netns,
> >> +                                    uint64_t inode, uint32_t recirc_id)
> >> +{
> >> +    size_t socket_offset, actions_offset;
> >> +
> >> +    socket_offset = nl_msg_start_nested(actions, OVS_ACTION_ATTR_SOCKET);
> >> +    nl_msg_put_u32(actions, OVS_SOCKET_ACTION_ATTR_NETNS_ID, netns);
> >> +    nl_msg_put_u64(actions, OVS_SOCKET_ACTION_ATTR_INODE, inode);
> >> +    actions_offset = nl_msg_start_nested(actions,
> >> +                                         OVS_SOCKET_ACTION_ATTR_ACTIONS);
> >> +    nl_msg_put_u32(actions, OVS_ACTION_ATTR_RECIRC, recirc_id);
> >> +    nl_msg_end_nested(actions, actions_offset);
> >> +    nl_msg_end_nested(actions, socket_offset);
> >> +}
> >> +
> >> +static void
> >> +generate_and_compose_socket_action(struct xlate_ctx *ctx,
> >> +                                   struct netdev *netdev, uint32_t 
> >> recirc_id)
> >> +{
> >> +    struct flow flow = ctx->base_flow; /* make a copy of the flow. */
> >> +    int af = (flow.dl_type == htons(ETH_TYPE_IP)) ? AF_INET : AF_INET6;
> >> +    uint64_t netns, inode;
> >> +    int error;
> >> +
> >> +    if (af == AF_INET) {
> >> +        error = netdev_get_socket_inode(netdev, flow.nw_proto, af,
> >> +                                       &flow.nw_src, flow.tp_src,
> >> +                                       &flow.nw_dst, flow.tp_dst,
> >> +                                       &inode, &netns);
> >> +    } else {
> >> +        error = netdev_get_socket_inode(netdev, flow.nw_proto, af,
> >> +                                       &flow.ipv6_src, flow.tp_src,
> >> +                                       &flow.ipv6_dst, flow.tp_dst,
> >> +                                       &inode, &netns);
> >> +    }
> >> +
> >> +    if (error) {
> >> +        VLOG_DBG("Socket lookup failed for flow: %s", 
> >> ovs_strerror(error));
> >> +        return;
> >> +    }
> >
> > This could be problematic. If we fail to find the socket maybe we should
> > go with the fallback (i.e: recirc) or even run the "output" normally.
>
> Right - this part needs to be built out more.
>
> > Which makes me think: how would this work with non-ct pipelines? i.e: if
> > all TCP packets match the same openflow flows.
> > The first packet (SYN) will not find a valid socket and the subsequent will
> > match the already installed dp flows so we won't have a chance to make
> > the lookup again and install the right "socket" action.
>
> What do you mean?  We narrow the flow during the socket installation.
> But I guess you mean when there isn't a socket already existing.
>

Exactly, I meant when the socket does not exist during the first packet
(SYN).


> The current strategy I'm working on is to have the socket action itself
> take a 5 tuple, then the kernel DP will look up on each packet until it
> finds it, and populate the internal reference.  This could be installed
> with the narrowed flow as already done, and that means we wouldn't have
> cast a wide flow net.  That solves the case that no socket exists at the
> time we install the flow.
>
> I don't see how ct vs. non-ct matters for this, though.  That's a
> separate issue (whether to even allow this kind of 'offload') that isn't
> really going to matter whether a CT flow is present.

My point about ct was related to the socket not existing during SYN. On
a ct-based pipeline, you will typically have a flow for "+new" which
will process the SYN (and skip the "socket" action) and another flow for
"+est" which will succeed to find a socket and install the socket action.

>
> As for the CT side, that makes it more difficult, because we need to go
> back and rebuild the correct frozen contexts when we install a socket()
> action.  That one is a bit trickier because the recirc system isn't
> really meant to do that right now.  It does make the sock(try,commit)
> primitives seem more attractive, but I'd like to avoid that because it
> doesn't match with with how other subsystems like eBPF do socket maps
> and it means we need the kernel side to manage socket references and
> have yet another table that we need to take extreme care with.
>

Yes, recirc replaying sounds more complicated but at the same time it
seems to be _the_ performance bottleneck so I think it's worth the try.

I guess a first step would be to narrow down the previous flow(s) based
on the next ones to avoid having multiple connections per recirc_id.
However, if the socket information is going to be provided via a
5-tuple, NAT would break the whole thing.

> >> +
> >> +    if (ctx->conntracked) {
> >> +        ctx->base_flow.tp_src = ctx->base_flow.ct_tp_src;
> >> +        ctx->base_flow.tp_dst = ctx->base_flow.ct_tp_dst;
> >> +
> >> +        if (af == AF_INET) {
> >> +            ctx->base_flow.nw_src = ctx->base_flow.ct_nw_src;
> >> +            ctx->base_flow.nw_dst = ctx->base_flow.ct_nw_dst;
> >> +        } else {
> >> +            memcpy(&ctx->base_flow.ipv6_src, &ctx->base_flow.ct_ipv6_src,
> >> +                   sizeof(ctx->base_flow.ipv6_src));
> >> +            memcpy(&ctx->base_flow.ipv6_dst, &ctx->base_flow.ct_ipv6_dst,
> >> +                   sizeof(ctx->base_flow.ipv6_dst));
> >> +        }
> >> +    }
> >> +
> >> +    /* Build flow key */
> >> +    compose_socket_action_with_fallback(ctx->odp_actions, netns, inode,
> >> +                                        recirc_id);
> >> +}
> >> +
> >> +static void
> >> +xlate_socket_action(struct xlate_ctx *ctx, const struct xport *xport,
> >> +                    bool is_last_action OVS_UNUSED)
> >> +{
> >> +    struct frozen_state state = {
> >> +        .table_id = ctx->table_id,
> >> +        .ofproto_uuid = ctx->xbridge->ofproto->uuid,
> >> +        .stack = ctx->stack.data,
> >> +        .stack_size = ctx->stack.size,
> >> +        .mirrors = ctx->mirrors,
> >> +        .conntracked = ctx->conntracked,
> >> +        .was_mpls = ctx->was_mpls,
> >> +        .ofpacts = NULL,
> >> +        .ofpacts_len = 0,
> >> +        .action_set = NULL,
> >> +        .action_set_len = 0,
> >> +        .userdata = ctx->pause ? CONST_CAST(uint8_t 
> >> *,ctx->pause->userdata)
> >> +                               : NULL,
> >> +        .userdata_len = ctx->pause ? ctx->pause->userdata_len : 0,
> >> +    };
> >> +    frozen_metadata_from_flow(&state.metadata, &ctx->xin->flow);
> >> +    state.socket_attempt = true;
> >> +
> >> +    /* Allocate the fallback recirc ID, and update the state. */
> >> +    uint32_t recirc_id = recirc_alloc_id_ctx(&state);
> >> +    if (!recirc_id) {
> >> +        xlate_report_error(ctx, "Failed to allocate recirculation id");
> >> +        ctx->error = XLATE_NO_RECIRCULATION_CONTEXT;
> >> +        return;
> >> +    }
> >> +    recirc_refs_add(&ctx->xout->recircs, recirc_id);
> >> +
> >> +    unwildcard_socket_flow(ctx);
> >> +    generate_and_compose_socket_action(ctx, xport->netdev, recirc_id);
> >> +}
> >> +
> >>  static void
> >>  compose_output_action__(struct xlate_ctx *ctx, ofp_port_t ofp_port,
> >>                          const struct xlate_bond_recirc *xr, bool 
> >> check_stp,
> >> @@ -4563,6 +4703,17 @@ compose_output_action__(struct xlate_ctx *ctx, 
> >> ofp_port_t ofp_port,
> >>          /* Commit accumulated flow updates before output. */
> >>          xlate_commit_actions(ctx);
> >>
> >> +        /* Check if we should enable socket flow tracking for this port */
> >> +        if ((!ctx->xin->frozen_state ||
> >> +             !ctx->xin->frozen_state->socket_attempt) &&
> >> +            should_track_socket_flows(ctx, xport)) {
> >> +            xlate_report(ctx, OFT_DETAIL,
> >> +                         "Socket flow tracking enabled for port %d", 
> >> ofp_port);
> >> +            /* Compose the action. */
> >> +            xlate_socket_action(ctx, xport, is_last_action);
> >> +            return;
> >> +        }
> >> +
> >>          if (xr && bond_use_lb_output_action(xport->xbundle->bond)) {
> >>              /*
> >>               * If bond mode is balance-tcp and optimize balance tcp is 
> >> enabled
> >> @@ -8712,6 +8863,8 @@ xlate_resume(struct ofproto_dpif *ofproto,
> >>          .mirrors = pin->mirrors,
> >>          .conntracked = pin->conntracked,
> >>          .xport_uuid = UUID_ZERO,
> >> +        .socket_attempt = true, /* After a pin message ignore socket()
> >> +                                 * calls (for now).*/
> >>
> >>          /* When there are no actions, xlate_actions() will search the flow
> >>           * table.  We don't want it to do that (we want it to resume), so
> >> diff --git a/ofproto/ofproto-dpif-xlate.h b/ofproto/ofproto-dpif-xlate.h
> >> index d973a634ac..79ed645444 100644
> >> --- a/ofproto/ofproto-dpif-xlate.h
> >> +++ b/ofproto/ofproto-dpif-xlate.h
> >> @@ -65,6 +65,12 @@ struct xlate_out {
> >>      /* Keep track of the last action whose purpose is purely 
> >> observational.
> >>       * e.g: IPFIX, sFlow, local sampling. */
> >>      uint32_t last_observe_offset;
> >> +
> >> +    /* Let's the upcall handler know that a socket action should be used
> >> +     * along with any output action. */
> >> +    bool use_socket_action;
> >> +    const struct netdev *socket_outport;
> >> +    uint32_t socket_recirc_id;
> >>  };
> >>
> >>  struct xlate_in {
> >> @@ -244,6 +250,9 @@ bool xlate_delete_static_mac_entry(const struct 
> >> ofproto_dpif *,
> >>  void xlate_set_support(const struct ofproto_dpif *,
> >>                         const struct dpif_backer_support *);
> >>
> >> +bool xlate_socket_flow_revalidation(const struct flow *,
> >> +                                    const struct xlate_out *);
> >> +
> >>  void xlate_txn_start(void);
> >>  void xlate_txn_commit(void);
> >>
> >> diff --git a/tests/ofproto-dpif.at b/tests/ofproto-dpif.at
> >> index a0cd4a5cec..65ce440030 100644
> >> --- a/tests/ofproto-dpif.at
> >> +++ b/tests/ofproto-dpif.at
> >> @@ -13324,3 +13324,84 @@ AT_CHECK([ovs-appctl coverage/read-counter 
> >> revalidate_missing_dp_flow], [0],
> >>
> >>  OVS_VSWITCHD_STOP(["/failed to flow_del (No such file or directory)/d"])
> >>  AT_CLEANUP
> >> +
> >> +AT_SETUP([ofproto-dpif - socket action with TCP packets])
> >> +OVS_VSWITCHD_START
> >> +add_of_ports br0 1 2
> >> +
> >> +# Enable socket offload on port 1
> >> +AT_CHECK([ovs-vsctl set interface p2 other_config:socket-offload=true])
> >> +
> >> +# Add flow rule
> >> +AT_CHECK([ovs-ofctl add-flow br0 'in_port=1,actions=output:2'])
> >> +
> >> +# Test established TCP connection (ACK flag)
> >> +m4_define([TCP_ACK_PKT], [m4_join([,],
> >> +    [eth(src=50:54:00:00:00:05,dst=50:54:00:00:00:07),eth_type(0x0800)],
> >> +    [ipv4(src=10.0.0.1,dst=10.0.0.2,proto=6,tos=0,ttl=64,frag=no)],
> >> +    [tcp(src=1234,dst=80),tcp_flags(ack)])])
> >> +
> >> +# Send TCP ACK packet and trace it
> >> +AT_CHECK([ovs-appctl netdev-dummy/receive p1 'TCP_ACK_PKT'], [0], 
> >> [stdout])
> >> +
> >> +# Check that socket action is generated for established connections too
> >> +AT_CHECK([ovs-appctl dpctl/dump-flows --names | strip_used | strip_stats 
> >> | dnl
> >> +          strip_duration | strip_dp_hash | sort], [0], [dnl
> >> +flow-dump from the main thread:
> >>
> > +recirc_id(0),in_port(p1),packet_type(ns=0,id=0),eth(src=50:54:00:00:00:05,dst=50:54:00:00:00:07),eth_type(0x0800),ipv4(src=10.0.0.1,dst=10.0.0.2,proto=6,frag=no),tcp(src=1234,dst=80),
> > packets:0, bytes:0, used:never,
> > actions:socket(netns=305419896,inode=18446744072938082304,else(recirc(0x1)))
> >> +recirc_id(0x1),in_port(p1),packet_type(ns=0,id=0),eth_type(0x0800),ipv4(frag=no),
> >>  packets:0, bytes:0, used:never, actions:p2
> >> +])
> >> +
> >> +# Test FIN packet
> >> +m4_define([TCP_FIN_PKT], [m4_join([,],
> >> +    [eth(src=50:54:00:00:00:05,dst=50:54:00:00:00:07),eth_type(0x0800)],
> >> +    [ipv4(src=10.0.0.1,dst=10.0.0.2,proto=6,tos=0,ttl=64,frag=no)],
> >> +    [tcp(src=1234,dst=80),tcp_flags(fin)])])
> >> +
> >> +AT_CHECK([ovs-appctl netdev-dummy/receive p1 'TCP_FIN_PKT'], [0], 
> >> [stdout])
> >> +AT_CHECK([ovs-appctl dpctl/dump-flows --names | strip_used | strip_stats 
> >> | dnl
> >> +          strip_duration | strip_dp_hash | sort], [0], [dnl
> >> +flow-dump from the main thread:
> >>
> > +recirc_id(0),in_port(p1),packet_type(ns=0,id=0),eth(src=50:54:00:00:00:05,dst=50:54:00:00:00:07),eth_type(0x0800),ipv4(src=10.0.0.1,dst=10.0.0.2,proto=6,frag=no),tcp(src=1234,dst=80),
> > packets:0, bytes:0, used:0.0s, flags:F,
> > actions:socket(netns=305419896,inode=18446744072938082304,else(recirc(0x1)))
> >> +recirc_id(0x1),in_port(p1),packet_type(ns=0,id=0),eth_type(0x0800),ipv4(frag=no),
> >>  packets:0, bytes:0, used:0.0s, flags:F, actions:p2
> >> +])
> >> +
> >> +OVS_VSWITCHD_STOP
> >> +AT_CLEANUP
> >> +
> >> +AT_SETUP([ofproto-dpif - socket action after CT NAT])
> >> +OVS_VSWITCHD_START
> >> +
> >> +add_of_ports br0 1 2
> >> +
> >> +AT_DATA([flows.txt], [dnl
> >> +table=0,priority=10,ip,in_port=1,tcp,action=ct(commit,table=1,nat(dst=1.2.3.4))
> >> +table=1,priority=10,ip,in_port=1,tcp,action=2
> >> +])
> >> +AT_CHECK([ovs-ofctl --protocols=OpenFlow10 add-flows br0 flows.txt])
> >> +
> >> +# Enable socket offload on port 1
> >> +AT_CHECK([ovs-vsctl set interface p2 other_config:socket-offload=true])
> >> +
> >> +# Test established TCP connection (ACK flag)
> >> +m4_define([TCP_ACK_PKT], [m4_join([,],
> >> +    [eth(src=50:54:00:00:00:05,dst=50:54:00:00:00:07),eth_type(0x0800)],
> >> +    [ipv4(src=10.0.0.1,dst=10.0.0.2,proto=6,tos=0,ttl=64,frag=no)],
> >> +    [tcp(src=1234,dst=80),tcp_flags(ack)])])
> >> +
> >> +AT_CHECK([ovs-appctl vlog/set ofproto_dpif_xlate:dbg])
> >> +
> >> +# Send TCP SYN packet and trace it
> >> +AT_CHECK([ovs-appctl netdev-dummy/receive p1 'TCP_ACK_PKT'], [0], 
> >> [stdout])
> >> +
> >> +# Check that socket action is generated for established connections too
> >> +AT_CHECK([ovs-appctl dpctl/dump-flows --names | strip_used | strip_stats 
> >> | dnl
> >> +          strip_duration | strip_dp_hash | sort], [0], [dnl
> >> +flow-dump from the main thread:
> >>
> > +recirc_id(0),in_port(p1),packet_type(ns=0,id=0),eth_type(0x0800),ipv4(proto=6,frag=no),
> > packets:0, bytes:0, used:never,
> > actions:ct(commit,nat(dst=1.2.3.4)),recirc(0x1)
> >>
> > +recirc_id(0x1),in_port(p1),packet_type(ns=0,id=0),eth(src=50:54:00:00:00:05,dst=50:54:00:00:00:07),eth_type(0x0800),ipv4(src=10.0.0.1,dst=1.2.3.4,proto=6,frag=no),tcp(src=1234,dst=80),
> > packets:0, bytes:0, used:never,
> > actions:socket(netns=305419896,inode=18446744072938082304,else(recirc(0x2)))
> >> +recirc_id(0x2),in_port(p1),packet_type(ns=0,id=0),eth_type(0x0800),ipv4(proto=6,frag=no),
> >>  packets:0, bytes:0, used:never, actions:p2
> >> +])
> >> +
> >> +OVS_VSWITCHD_STOP
> >> +AT_CLEANUP
> >> --
> >> 2.51.0
> >>
>

_______________________________________________
dev mailing list
[email protected]
https://mail.openvswitch.org/mailman/listinfo/ovs-dev

Reply via email to