Adrián Moreno <[email protected]> writes:

> On Tue, Sep 30, 2025 at 12:46:58PM -0400, Aaron Conole wrote:
>> This commit wires up support for the socket() action, which will
>> create a recirculation on the datapath to bypass as many extra
>> recirculations as possible before the output.  The rough idea is
>> that a data packet can 'bypass' certain parts of the networking
>> stack and experience a faster delivery.  This makes some of the
>> tracking features (for example, simple tcpdump type operations)
>> much more complicated because the underlying datapath will not
>> use the complete routing / sockets layer in datapath, but it
>> instead directly will queue the data portion of the packet to
>> the socket buffer.  For non-data packets, the socket() action
>> will hit the 'else' directive and take the recirc chain.
>>
>> One area that still needs enhancement is the ct() path.  The
>> conntrack portions are only doing the socket call after the
>> recirculation chain.  However, the proper way to do this is
>> to rewrite the original recirculation state to hit the socket
>> path first.  That will require quite a bit more context and
>> logic in the compose ct action, so it isn't done here, and
>> it isn't recommended to use this with a really complex flow
>> pipeline yet (since this is still a WIP).  Future work will
>> focus on this area so that an existing flow after freezing
>> that looks like:
>>
>>   recirc_id(0),eth(...),eth_type(...),ipv4(...),
>>     actions=ct(commit,nat(dst=1.2.3.4)),recirc(0x1)
>>
>> would be rewritten as:
>>
>>   recirc_id(0),eth(...),eth_type(...),ipv4(...),...
>>     actions=socket(netns=...,inode=...,else(recirc(1))
>>   recirc_id(0x1),eth(...)...
>>     actions=ct(commit,nat(dst=1.2.3.4)),recirc(0x2)
>>
>> And then the corresponding output could get generated later.
>>
>> Signed-off-by: Aaron Conole <[email protected]>
>> ---
>>  lib/dpif-netdev.c            |  24 +++++-
>>  lib/netdev-dummy.c           |  73 +++++++++++++++++
>>  ofproto/ofproto-dpif-rid.h   |   1 +
>>  ofproto/ofproto-dpif-xlate.c | 153 +++++++++++++++++++++++++++++++++++
>>  ofproto/ofproto-dpif-xlate.h |   9 +++
>>  tests/ofproto-dpif.at        |  81 +++++++++++++++++++
>>  6 files changed, 340 insertions(+), 1 deletion(-)
>>
>> diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c
>> index 1013d736eb..8c0c46c560 100644
>> --- a/lib/dpif-netdev.c
>> +++ b/lib/dpif-netdev.c
>> @@ -9186,6 +9186,29 @@ dp_execute_cb(void *aux_, struct dp_packet_batch 
>> *packets_,
>>      uint32_t packet_count, packets_dropped;
>>
>>      switch ((enum ovs_action_attr)type) {
>> +    case OVS_ACTION_ATTR_SOCKET: {
>> +        /* Since socket isn't supported, but there's a fallback branch, we 
>> will
>> +         * execute the fallback side of the socket call.  In the future, it
>> +         * may be feasible to implement via an AF_XDP type socket. */
>> +        static const struct nl_policy ovs_sock_act_policy[] = {
>> +            [OVS_SOCKET_ACTION_ATTR_NETNS_ID] = { .type = NL_A_U32 },
>> +            [OVS_SOCKET_ACTION_ATTR_INODE]    = { .type = NL_A_U64 },
>> +            [OVS_SOCKET_ACTION_ATTR_ACTIONS]  = { .type = NL_A_NESTED },
>> +        };
>> +        struct nlattr *sock_act[ARRAY_SIZE(ovs_sock_act_policy)];
>> +        if (!nl_parse_nested(a, ovs_sock_act_policy, sock_act,
>> +                             ARRAY_SIZE(sock_act))) {
>> +            VLOG_ERR("Unable to parse socket type.");
>> +            return;
>> +        }
>> +
>> +        struct nlattr *acts = sock_act[OVS_SOCKET_ACTION_ATTR_ACTIONS];
>> +        dp_netdev_execute_actions(aux->pmd, packets_, should_steal,
>> +                                  aux->flow, nl_attr_get(acts),
>> +                                  nl_attr_get_size(acts));
>> +        return;
>> +    }
>> +
>>      case OVS_ACTION_ATTR_OUTPUT:
>>          dp_execute_output_action(pmd, packets_, should_steal,
>>                                   nl_attr_get_odp_port(a));
>> @@ -9495,7 +9518,6 @@ dp_execute_cb(void *aux_, struct dp_packet_batch 
>> *packets_,
>>      case OVS_ACTION_ATTR_ADD_MPLS:
>>      case OVS_ACTION_ATTR_DEC_TTL:
>>      case OVS_ACTION_ATTR_PSAMPLE:
>> -    case OVS_ACTION_ATTR_SOCKET:
>>      case __OVS_ACTION_ATTR_MAX:
>>          OVS_NOT_REACHED();
>>      }
>> diff --git a/lib/netdev-dummy.c b/lib/netdev-dummy.c
>> index b72820fcc5..244c58869f 100644
>> --- a/lib/netdev-dummy.c
>> +++ b/lib/netdev-dummy.c
>> @@ -174,6 +174,9 @@ struct netdev_dummy {
>>
>>      /* Set the segment size for netdev TSO support. */
>>      int ol_tso_segsz OVS_GUARDED;
>> +
>> +    /* Socket lookup functionality state. */
>> +    bool socket_lookup_enabled OVS_GUARDED;
>>  };
>>
>>  /* Max 'recv_queue_len' in struct netdev_dummy. */
>> @@ -739,6 +742,7 @@ netdev_dummy_construct(struct netdev *netdev_)
>>      netdev->requested_n_rxq = netdev_->n_rxq;
>>      netdev->requested_n_txq = netdev_->n_txq;
>>      netdev->numa_id = 0;
>> +    netdev->socket_lookup_enabled = false;
>>
>>      memset(&netdev->custom_stats, 0, sizeof(netdev->custom_stats));
>>
>> @@ -1796,6 +1800,71 @@ exit:
>>      return error ? -1 : 0;
>>  }
>>
>> +static int
>> +netdev_dummy_get_target_ns(const struct netdev *netdev OVS_UNUSED,
>> +                           int *target_ns)
>> +{
>> +    /* For dummy devices, return a fixed hash value instead of real
>> +     * namespace ID. */
>> +    *target_ns = 0x12345678;
>> +    return 0;
>> +}
>> +
>> +static int
>> +netdev_dummy_set_socket_lookup_enabled(struct netdev *netdev_, bool enabled)
>> +{
>> +    struct netdev_dummy *netdev = netdev_dummy_cast(netdev_);
>> +
>> +    ovs_mutex_lock(&netdev->mutex);
>> +    netdev->socket_lookup_enabled = enabled;
>> +    ovs_mutex_unlock(&netdev->mutex);
>> +
>> +    return 0;
>> +}
>> +
>> +static bool
>> +netdev_dummy_get_socket_lookup_enabled(const struct netdev *netdev_)
>> +{
>> +    struct netdev_dummy *netdev = netdev_dummy_cast(netdev_);
>> +    bool enabled;
>> +
>> +    ovs_mutex_lock(&netdev->mutex);
>> +    enabled = netdev->socket_lookup_enabled;
>> +    ovs_mutex_unlock(&netdev->mutex);
>> +
>> +    return enabled;
>> +}
>> +
>> +static int
>> +netdev_dummy_get_socket_inode(const struct netdev *netdev OVS_UNUSED,
>> +                              int proto, int af,
>> +                              const void *src,
>> +                              ovs_be16 sport,
>> +                              const void *dst,
>> +                              ovs_be16 dport,
>> +                              uint64_t *inode_out, uint64_t *netns_out)
>> +{
>> +    /* For dummy devices, return hashed values instead of real inode/netns 
>> */
>> +    if (proto != IPPROTO_TCP) {
>> +        return ENOENT;
>> +    }
>> +
>> +    if (af == AF_INET) {
>> +        uint64_t inode_hash = hash_2words(*(uint32_t *) src,
>> +                                          *(uint32_t *) dst);
>> +        inode_hash = inode_hash << 32;
>> +        inode_hash |=
>> +            ((OVS_FORCE uint16_t) sport << 16) |
>> +            ((OVS_FORCE uint16_t) dport);
>> +
>> +        *inode_out = inode_hash;
>> +        *netns_out = 0x12345678;
>> +    } else {
>> +        return ENOENT;
>> +    }
>> +    return 0;
>> +}
>> +
>>  #define NETDEV_DUMMY_CLASS_COMMON                       \
>>      .run = netdev_dummy_run,                            \
>>      .wait = netdev_dummy_wait,                          \
>> @@ -1822,6 +1891,10 @@ exit:
>>      .dump_queue_stats = netdev_dummy_dump_queue_stats,  \
>>      .get_addr_list = netdev_dummy_get_addr_list,        \
>>      .update_flags = netdev_dummy_update_flags,          \
>> +    .get_target_ns = netdev_dummy_get_target_ns,        \
>> +    .set_socket_lookup_enabled = netdev_dummy_set_socket_lookup_enabled, \
>> +    .get_socket_lookup_enabled = netdev_dummy_get_socket_lookup_enabled, \
>> +    .get_socket_inode = netdev_dummy_get_socket_inode,  \
>>      .rxq_alloc = netdev_dummy_rxq_alloc,                \
>>      .rxq_construct = netdev_dummy_rxq_construct,        \
>>      .rxq_destruct = netdev_dummy_rxq_destruct,          \
>> diff --git a/ofproto/ofproto-dpif-rid.h b/ofproto/ofproto-dpif-rid.h
>> index 4df630c62b..f899a83260 100644
>> --- a/ofproto/ofproto-dpif-rid.h
>> +++ b/ofproto/ofproto-dpif-rid.h
>> @@ -155,6 +155,7 @@ struct frozen_state {
>>      bool conntracked;             /* Conntrack occurred prior to freeze. */
>>      bool was_mpls;                /* MPLS packet */
>>      struct uuid xport_uuid;       /* UUID of 1st port packet received on. */
>> +    bool socket_attempt;          /* A socket output was already attempted. 
>> */
>>
>>      /* Actions to be translated when thawing. */
>>      struct ofpact *ofpacts;
>> diff --git a/ofproto/ofproto-dpif-xlate.c b/ofproto/ofproto-dpif-xlate.c
>> index 2c8197fb73..2ac160507c 100644
>> --- a/ofproto/ofproto-dpif-xlate.c
>> +++ b/ofproto/ofproto-dpif-xlate.c
>> @@ -4456,6 +4456,146 @@ terminate_native_tunnel(struct xlate_ctx *ctx, const 
>> struct xport *xport,
>>      return *tnl_port != ODPP_NONE;
>>  }
>>
>> +static bool should_track_socket_flows(struct xlate_ctx *ctx,
>> +                                      const struct xport *xport)
>> +{
>> +    struct flow *flow = &ctx->xin->flow;
>> +
>> +    /* Only for valid ports (also need to check the addresses involved). */
>> +    return ((xport && xport->netdev &&
>> +             netdev_get_socket_lookup_enabled(xport->netdev)) &&
>> +            (flow->dl_type == htons(ETH_TYPE_IP) ||
>> +             flow->dl_type == htons(ETH_TYPE_IPV6)) &&
>> +            (flow->nw_proto == IPPROTO_TCP) &&
>> +            (flow->tp_src && flow->tp_dst));
>> +}
>> +
>> +static void
>> +unwildcard_socket_flow(struct xlate_ctx *ctx)
>> +{
>> +    struct flow_wildcards *wc = ctx->wc;
>> +    struct flow *flow = &ctx->xin->flow;
>> +
>> +    /* Protocol and TCP ports are common */
>> +    memset(&wc->masks.nw_proto, 0xff, sizeof wc->masks.nw_proto);
>> +    memset(&wc->masks.tp_src, 0xff, sizeof wc->masks.tp_src);
>> +    memset(&wc->masks.tp_dst, 0xff, sizeof wc->masks.tp_dst);
>> +
>> +    /* IP address fields */
>> +    if (flow->dl_type == htons(ETH_TYPE_IP)) {
>> +        /* IPv4 */
>> +        wc->masks.nw_src = OVS_BE32_MAX;
>> +        wc->masks.nw_dst = OVS_BE32_MAX;
>> +    } else if (flow->dl_type == htons(ETH_TYPE_IPV6)) {
>> +        /* IPv6 */
>> +        memset(&wc->masks.ipv6_src, 0xff, sizeof wc->masks.ipv6_src);
>> +        memset(&wc->masks.ipv6_dst, 0xff, sizeof wc->masks.ipv6_dst);
>> +    }
>> +
>> +    /* Eth fields. */
>> +    memset(&wc->masks.dl_type, 0xff, sizeof wc->masks.dl_type);
>> +    memset(&wc->masks.dl_src, 0xff, ETH_ADDR_LEN);
>> +    memset(&wc->masks.dl_dst, 0xff, ETH_ADDR_LEN);
>> +}
>> +
>> +static void
>> +compose_socket_action_with_fallback(struct ofpbuf *actions, uint32_t netns,
>> +                                    uint64_t inode, uint32_t recirc_id)
>> +{
>> +    size_t socket_offset, actions_offset;
>> +
>> +    socket_offset = nl_msg_start_nested(actions, OVS_ACTION_ATTR_SOCKET);
>> +    nl_msg_put_u32(actions, OVS_SOCKET_ACTION_ATTR_NETNS_ID, netns);
>> +    nl_msg_put_u64(actions, OVS_SOCKET_ACTION_ATTR_INODE, inode);
>> +    actions_offset = nl_msg_start_nested(actions,
>> +                                         OVS_SOCKET_ACTION_ATTR_ACTIONS);
>> +    nl_msg_put_u32(actions, OVS_ACTION_ATTR_RECIRC, recirc_id);
>> +    nl_msg_end_nested(actions, actions_offset);
>> +    nl_msg_end_nested(actions, socket_offset);
>> +}
>> +
>> +static void
>> +generate_and_compose_socket_action(struct xlate_ctx *ctx,
>> +                                   struct netdev *netdev, uint32_t 
>> recirc_id)
>> +{
>> +    struct flow flow = ctx->base_flow; /* make a copy of the flow. */
>> +    int af = (flow.dl_type == htons(ETH_TYPE_IP)) ? AF_INET : AF_INET6;
>> +    uint64_t netns, inode;
>> +    int error;
>> +
>> +    if (af == AF_INET) {
>> +        error = netdev_get_socket_inode(netdev, flow.nw_proto, af,
>> +                                       &flow.nw_src, flow.tp_src,
>> +                                       &flow.nw_dst, flow.tp_dst,
>> +                                       &inode, &netns);
>> +    } else {
>> +        error = netdev_get_socket_inode(netdev, flow.nw_proto, af,
>> +                                       &flow.ipv6_src, flow.tp_src,
>> +                                       &flow.ipv6_dst, flow.tp_dst,
>> +                                       &inode, &netns);
>> +    }
>> +
>> +    if (error) {
>> +        VLOG_DBG("Socket lookup failed for flow: %s", ovs_strerror(error));
>> +        return;
>> +    }
>
> This could be problematic. If we fail to find the socket maybe we should
> go with the fallback (i.e: recirc) or even run the "output" normally.

Right - this part needs to be built out more.

> Which makes me think: how would this work with non-ct pipelines? i.e: if
> all TCP packets match the same openflow flows.
> The first packet (SYN) will not find a valid socket and the subsequent will
> match the already installed dp flows so we won't have a chance to make
> the lookup again and install the right "socket" action.

What do you mean?  We narrow the flow during the socket installation.
But I guess you mean when there isn't a socket already existing.

The current strategy I'm working on is to have the socket action itself
take a 5 tuple, then the kernel DP will look up on each packet until it
finds it, and populate the internal reference.  This could be installed
with the narrowed flow as already done, and that means we wouldn't have
cast a wide flow net.  That solves the case that no socket exists at the
time we install the flow.

I don't see how ct vs. non-ct matters for this, though.  That's a
separate issue (whether to even allow this kind of 'offload') that isn't
really going to matter whether a CT flow is present.

As for the CT side, that makes it more difficult, because we need to go
back and rebuild the correct frozen contexts when we install a socket()
action.  That one is a bit trickier because the recirc system isn't
really meant to do that right now.  It does make the sock(try,commit)
primitives seem more attractive, but I'd like to avoid that because it
doesn't match with with how other subsystems like eBPF do socket maps
and it means we need the kernel side to manage socket references and
have yet another table that we need to take extreme care with.

> Thanks.
> Adrián
>
>> +
>> +    if (ctx->conntracked) {
>> +        ctx->base_flow.tp_src = ctx->base_flow.ct_tp_src;
>> +        ctx->base_flow.tp_dst = ctx->base_flow.ct_tp_dst;
>> +
>> +        if (af == AF_INET) {
>> +            ctx->base_flow.nw_src = ctx->base_flow.ct_nw_src;
>> +            ctx->base_flow.nw_dst = ctx->base_flow.ct_nw_dst;
>> +        } else {
>> +            memcpy(&ctx->base_flow.ipv6_src, &ctx->base_flow.ct_ipv6_src,
>> +                   sizeof(ctx->base_flow.ipv6_src));
>> +            memcpy(&ctx->base_flow.ipv6_dst, &ctx->base_flow.ct_ipv6_dst,
>> +                   sizeof(ctx->base_flow.ipv6_dst));
>> +        }
>> +    }
>> +
>> +    /* Build flow key */
>> +    compose_socket_action_with_fallback(ctx->odp_actions, netns, inode,
>> +                                        recirc_id);
>> +}
>> +
>> +static void
>> +xlate_socket_action(struct xlate_ctx *ctx, const struct xport *xport,
>> +                    bool is_last_action OVS_UNUSED)
>> +{
>> +    struct frozen_state state = {
>> +        .table_id = ctx->table_id,
>> +        .ofproto_uuid = ctx->xbridge->ofproto->uuid,
>> +        .stack = ctx->stack.data,
>> +        .stack_size = ctx->stack.size,
>> +        .mirrors = ctx->mirrors,
>> +        .conntracked = ctx->conntracked,
>> +        .was_mpls = ctx->was_mpls,
>> +        .ofpacts = NULL,
>> +        .ofpacts_len = 0,
>> +        .action_set = NULL,
>> +        .action_set_len = 0,
>> +        .userdata = ctx->pause ? CONST_CAST(uint8_t *,ctx->pause->userdata)
>> +                               : NULL,
>> +        .userdata_len = ctx->pause ? ctx->pause->userdata_len : 0,
>> +    };
>> +    frozen_metadata_from_flow(&state.metadata, &ctx->xin->flow);
>> +    state.socket_attempt = true;
>> +
>> +    /* Allocate the fallback recirc ID, and update the state. */
>> +    uint32_t recirc_id = recirc_alloc_id_ctx(&state);
>> +    if (!recirc_id) {
>> +        xlate_report_error(ctx, "Failed to allocate recirculation id");
>> +        ctx->error = XLATE_NO_RECIRCULATION_CONTEXT;
>> +        return;
>> +    }
>> +    recirc_refs_add(&ctx->xout->recircs, recirc_id);
>> +
>> +    unwildcard_socket_flow(ctx);
>> +    generate_and_compose_socket_action(ctx, xport->netdev, recirc_id);
>> +}
>> +
>>  static void
>>  compose_output_action__(struct xlate_ctx *ctx, ofp_port_t ofp_port,
>>                          const struct xlate_bond_recirc *xr, bool check_stp,
>> @@ -4563,6 +4703,17 @@ compose_output_action__(struct xlate_ctx *ctx, 
>> ofp_port_t ofp_port,
>>          /* Commit accumulated flow updates before output. */
>>          xlate_commit_actions(ctx);
>>
>> +        /* Check if we should enable socket flow tracking for this port */
>> +        if ((!ctx->xin->frozen_state ||
>> +             !ctx->xin->frozen_state->socket_attempt) &&
>> +            should_track_socket_flows(ctx, xport)) {
>> +            xlate_report(ctx, OFT_DETAIL,
>> +                         "Socket flow tracking enabled for port %d", 
>> ofp_port);
>> +            /* Compose the action. */
>> +            xlate_socket_action(ctx, xport, is_last_action);
>> +            return;
>> +        }
>> +
>>          if (xr && bond_use_lb_output_action(xport->xbundle->bond)) {
>>              /*
>>               * If bond mode is balance-tcp and optimize balance tcp is 
>> enabled
>> @@ -8712,6 +8863,8 @@ xlate_resume(struct ofproto_dpif *ofproto,
>>          .mirrors = pin->mirrors,
>>          .conntracked = pin->conntracked,
>>          .xport_uuid = UUID_ZERO,
>> +        .socket_attempt = true, /* After a pin message ignore socket()
>> +                                 * calls (for now).*/
>>
>>          /* When there are no actions, xlate_actions() will search the flow
>>           * table.  We don't want it to do that (we want it to resume), so
>> diff --git a/ofproto/ofproto-dpif-xlate.h b/ofproto/ofproto-dpif-xlate.h
>> index d973a634ac..79ed645444 100644
>> --- a/ofproto/ofproto-dpif-xlate.h
>> +++ b/ofproto/ofproto-dpif-xlate.h
>> @@ -65,6 +65,12 @@ struct xlate_out {
>>      /* Keep track of the last action whose purpose is purely observational.
>>       * e.g: IPFIX, sFlow, local sampling. */
>>      uint32_t last_observe_offset;
>> +
>> +    /* Let's the upcall handler know that a socket action should be used
>> +     * along with any output action. */
>> +    bool use_socket_action;
>> +    const struct netdev *socket_outport;
>> +    uint32_t socket_recirc_id;
>>  };
>>
>>  struct xlate_in {
>> @@ -244,6 +250,9 @@ bool xlate_delete_static_mac_entry(const struct 
>> ofproto_dpif *,
>>  void xlate_set_support(const struct ofproto_dpif *,
>>                         const struct dpif_backer_support *);
>>
>> +bool xlate_socket_flow_revalidation(const struct flow *,
>> +                                    const struct xlate_out *);
>> +
>>  void xlate_txn_start(void);
>>  void xlate_txn_commit(void);
>>
>> diff --git a/tests/ofproto-dpif.at b/tests/ofproto-dpif.at
>> index a0cd4a5cec..65ce440030 100644
>> --- a/tests/ofproto-dpif.at
>> +++ b/tests/ofproto-dpif.at
>> @@ -13324,3 +13324,84 @@ AT_CHECK([ovs-appctl coverage/read-counter 
>> revalidate_missing_dp_flow], [0],
>>
>>  OVS_VSWITCHD_STOP(["/failed to flow_del (No such file or directory)/d"])
>>  AT_CLEANUP
>> +
>> +AT_SETUP([ofproto-dpif - socket action with TCP packets])
>> +OVS_VSWITCHD_START
>> +add_of_ports br0 1 2
>> +
>> +# Enable socket offload on port 1
>> +AT_CHECK([ovs-vsctl set interface p2 other_config:socket-offload=true])
>> +
>> +# Add flow rule
>> +AT_CHECK([ovs-ofctl add-flow br0 'in_port=1,actions=output:2'])
>> +
>> +# Test established TCP connection (ACK flag)
>> +m4_define([TCP_ACK_PKT], [m4_join([,],
>> +    [eth(src=50:54:00:00:00:05,dst=50:54:00:00:00:07),eth_type(0x0800)],
>> +    [ipv4(src=10.0.0.1,dst=10.0.0.2,proto=6,tos=0,ttl=64,frag=no)],
>> +    [tcp(src=1234,dst=80),tcp_flags(ack)])])
>> +
>> +# Send TCP ACK packet and trace it
>> +AT_CHECK([ovs-appctl netdev-dummy/receive p1 'TCP_ACK_PKT'], [0], [stdout])
>> +
>> +# Check that socket action is generated for established connections too
>> +AT_CHECK([ovs-appctl dpctl/dump-flows --names | strip_used | strip_stats | 
>> dnl
>> +          strip_duration | strip_dp_hash | sort], [0], [dnl
>> +flow-dump from the main thread:
>>
> +recirc_id(0),in_port(p1),packet_type(ns=0,id=0),eth(src=50:54:00:00:00:05,dst=50:54:00:00:00:07),eth_type(0x0800),ipv4(src=10.0.0.1,dst=10.0.0.2,proto=6,frag=no),tcp(src=1234,dst=80),
> packets:0, bytes:0, used:never,
> actions:socket(netns=305419896,inode=18446744072938082304,else(recirc(0x1)))
>> +recirc_id(0x1),in_port(p1),packet_type(ns=0,id=0),eth_type(0x0800),ipv4(frag=no),
>>  packets:0, bytes:0, used:never, actions:p2
>> +])
>> +
>> +# Test FIN packet
>> +m4_define([TCP_FIN_PKT], [m4_join([,],
>> +    [eth(src=50:54:00:00:00:05,dst=50:54:00:00:00:07),eth_type(0x0800)],
>> +    [ipv4(src=10.0.0.1,dst=10.0.0.2,proto=6,tos=0,ttl=64,frag=no)],
>> +    [tcp(src=1234,dst=80),tcp_flags(fin)])])
>> +
>> +AT_CHECK([ovs-appctl netdev-dummy/receive p1 'TCP_FIN_PKT'], [0], [stdout])
>> +AT_CHECK([ovs-appctl dpctl/dump-flows --names | strip_used | strip_stats | 
>> dnl
>> +          strip_duration | strip_dp_hash | sort], [0], [dnl
>> +flow-dump from the main thread:
>>
> +recirc_id(0),in_port(p1),packet_type(ns=0,id=0),eth(src=50:54:00:00:00:05,dst=50:54:00:00:00:07),eth_type(0x0800),ipv4(src=10.0.0.1,dst=10.0.0.2,proto=6,frag=no),tcp(src=1234,dst=80),
> packets:0, bytes:0, used:0.0s, flags:F,
> actions:socket(netns=305419896,inode=18446744072938082304,else(recirc(0x1)))
>> +recirc_id(0x1),in_port(p1),packet_type(ns=0,id=0),eth_type(0x0800),ipv4(frag=no),
>>  packets:0, bytes:0, used:0.0s, flags:F, actions:p2
>> +])
>> +
>> +OVS_VSWITCHD_STOP
>> +AT_CLEANUP
>> +
>> +AT_SETUP([ofproto-dpif - socket action after CT NAT])
>> +OVS_VSWITCHD_START
>> +
>> +add_of_ports br0 1 2
>> +
>> +AT_DATA([flows.txt], [dnl
>> +table=0,priority=10,ip,in_port=1,tcp,action=ct(commit,table=1,nat(dst=1.2.3.4))
>> +table=1,priority=10,ip,in_port=1,tcp,action=2
>> +])
>> +AT_CHECK([ovs-ofctl --protocols=OpenFlow10 add-flows br0 flows.txt])
>> +
>> +# Enable socket offload on port 1
>> +AT_CHECK([ovs-vsctl set interface p2 other_config:socket-offload=true])
>> +
>> +# Test established TCP connection (ACK flag)
>> +m4_define([TCP_ACK_PKT], [m4_join([,],
>> +    [eth(src=50:54:00:00:00:05,dst=50:54:00:00:00:07),eth_type(0x0800)],
>> +    [ipv4(src=10.0.0.1,dst=10.0.0.2,proto=6,tos=0,ttl=64,frag=no)],
>> +    [tcp(src=1234,dst=80),tcp_flags(ack)])])
>> +
>> +AT_CHECK([ovs-appctl vlog/set ofproto_dpif_xlate:dbg])
>> +
>> +# Send TCP SYN packet and trace it
>> +AT_CHECK([ovs-appctl netdev-dummy/receive p1 'TCP_ACK_PKT'], [0], [stdout])
>> +
>> +# Check that socket action is generated for established connections too
>> +AT_CHECK([ovs-appctl dpctl/dump-flows --names | strip_used | strip_stats | 
>> dnl
>> +          strip_duration | strip_dp_hash | sort], [0], [dnl
>> +flow-dump from the main thread:
>>
> +recirc_id(0),in_port(p1),packet_type(ns=0,id=0),eth_type(0x0800),ipv4(proto=6,frag=no),
> packets:0, bytes:0, used:never,
> actions:ct(commit,nat(dst=1.2.3.4)),recirc(0x1)
>>
> +recirc_id(0x1),in_port(p1),packet_type(ns=0,id=0),eth(src=50:54:00:00:00:05,dst=50:54:00:00:00:07),eth_type(0x0800),ipv4(src=10.0.0.1,dst=1.2.3.4,proto=6,frag=no),tcp(src=1234,dst=80),
> packets:0, bytes:0, used:never,
> actions:socket(netns=305419896,inode=18446744072938082304,else(recirc(0x2)))
>> +recirc_id(0x2),in_port(p1),packet_type(ns=0,id=0),eth_type(0x0800),ipv4(proto=6,frag=no),
>>  packets:0, bytes:0, used:never, actions:p2
>> +])
>> +
>> +OVS_VSWITCHD_STOP
>> +AT_CLEANUP
>> --
>> 2.51.0
>>

_______________________________________________
dev mailing list
[email protected]
https://mail.openvswitch.org/mailman/listinfo/ovs-dev

Reply via email to