OVS allows overlapping megaflows, as long as the actions of these megaflows are equal. However, the current implementation of action modification relies on flow_lookup instead of ufid, this could result in looking up a wrong megaflow and make the ukeys and megaflows inconsistent
Just like the test case in the patch, at first we have a rule with the prefix: 10.1.2.0/24 and we will get a megaflow with prefixes 10.1.2.2/24 when a packet with IP 10.1.2.2 is received. Then suppose we change the rule into 10.1.0.0/16. OVS prefers to keep the 10.1.2.2/24 megaflow and just changes its action instead of extending the prefix into 10.1.2.2/16. then suppose we have a 10.1.0.2 packet, since it misses the megaflow, this time, we will have an overlapping megaflow with the right prefix: 10.1.0.2/16 now we have two megaflows: 10.1.2.2/24 10.1.0.2/16 last, suppose we have changed the ruleset again. The revalidator this time still decides to change the actions of both megaflows instead of deleting them. The dpif_netdev_flow_put will search the megaflow to modify with unmasked keys, however it might lookup the wrong megaflow as the key 10.1.2.2 matches both 10.1.2.2/24 and 10.1.0.2/16! This patch changes the megaflow lookup code in modification path into relying the ufid to find the correct megaflow instead of key lookup. Signed-off-by: Peng He <hepeng.0...@bytedance.com> --- lib/dpif-netdev.c | 62 ++++++++++++++++++++++++++--------------------- tests/pmd.at | 48 ++++++++++++++++++++++++++++++++++++ 2 files changed, 82 insertions(+), 28 deletions(-) diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c index 70b953ae6..b90ed1542 100644 --- a/lib/dpif-netdev.c +++ b/lib/dpif-netdev.c @@ -4198,36 +4198,43 @@ flow_put_on_pmd(struct dp_netdev_pmd_thread *pmd, memset(stats, 0, sizeof *stats); } - ovs_mutex_lock(&pmd->flow_mutex); - netdev_flow = dp_netdev_pmd_lookup_flow(pmd, key, NULL); - if (!netdev_flow) { - if (put->flags & DPIF_FP_CREATE) { + if (put->flags & DPIF_FP_CREATE) { + ovs_mutex_lock(&pmd->flow_mutex); + netdev_flow = dp_netdev_pmd_lookup_flow(pmd, key, NULL); + if (!netdev_flow) { dp_netdev_flow_add(pmd, match, ufid, put->actions, put->actions_len, ODPP_NONE); } else { - error = ENOENT; + error = EEXIST; } + ovs_mutex_unlock(&pmd->flow_mutex); } else { + netdev_flow = dp_netdev_pmd_find_flow(pmd, ufid, + put->key, put->key_len); + if (put->flags & DPIF_FP_MODIFY) { - struct dp_netdev_actions *new_actions; - struct dp_netdev_actions *old_actions; + if (!netdev_flow) { + error = ENOENT; + } else { + struct dp_netdev_actions *new_actions; + struct dp_netdev_actions *old_actions; - new_actions = dp_netdev_actions_create(put->actions, - put->actions_len); + new_actions = dp_netdev_actions_create(put->actions, + put->actions_len); - old_actions = dp_netdev_flow_get_actions(netdev_flow); - ovsrcu_set(&netdev_flow->actions, new_actions); + old_actions = dp_netdev_flow_get_actions(netdev_flow); + ovsrcu_set(&netdev_flow->actions, new_actions); - queue_netdev_flow_put(pmd, netdev_flow, match, - put->actions, put->actions_len, - DP_NETDEV_FLOW_OFFLOAD_OP_MOD); - log_netdev_flow_change(netdev_flow, match, old_actions, - put->actions, put->actions_len); + queue_netdev_flow_put(pmd, netdev_flow, match, + put->actions, put->actions_len, + DP_NETDEV_FLOW_OFFLOAD_OP_MOD); + log_netdev_flow_change(netdev_flow, match, old_actions, + put->actions, put->actions_len); - if (stats) { - get_dpif_flow_status(pmd->dp, netdev_flow, stats, NULL); - } - if (put->flags & DPIF_FP_ZERO_STATS) { + if (stats) { + get_dpif_flow_status(pmd->dp, netdev_flow, stats, NULL); + } + if (put->flags & DPIF_FP_ZERO_STATS) { /* XXX: The userspace datapath uses thread local statistics * (for flows), which should be updated only by the owning * thread. Since we cannot write on stats memory here, @@ -4237,18 +4244,17 @@ flow_put_on_pmd(struct dp_netdev_pmd_thread *pmd, * - Should the need arise, this operation can be implemented * by keeping a base value (to be update here) for each * counter, and subtracting it before outputting the stats */ - error = EOPNOTSUPP; + error = EOPNOTSUPP; + } + ovsrcu_postpone(dp_netdev_actions_free, old_actions); } - - ovsrcu_postpone(dp_netdev_actions_free, old_actions); - } else if (put->flags & DPIF_FP_CREATE) { - error = EEXIST; } else { - /* Overlapping flow. */ - error = EINVAL; + if (netdev_flow) { + /* Overlapping flow. */ + error = EINVAL; + } } } - ovs_mutex_unlock(&pmd->flow_mutex); return error; } diff --git a/tests/pmd.at b/tests/pmd.at index 48f3d432d..85211a30f 100644 --- a/tests/pmd.at +++ b/tests/pmd.at @@ -1300,3 +1300,51 @@ OVS_WAIT_UNTIL([tail -n +$LINENUM ovs-vswitchd.log | grep "PMD load based sleeps OVS_VSWITCHD_STOP AT_CLEANUP + +AT_SETUP([PMD - revalidator wrongly modify userspace megaflows]) + +OVS_VSWITCHD_START( +[add-port br0 p1 \ + -- set interface p1 type=dummy-pmd \ + -- set bridge br0 datapath-type=dummy \ + -- add-port br0 p2 \ + -- set interface p2 type=dummy-pmd -- +]) + +dnl Add one openflow rule and generate a megaflow. +AT_CHECK([ovs-ofctl add-flow br0 'table=0,in_port=p1,ip,nw_dst=10.1.2.0/24,actions=p2']) +AT_CHECK([ovs-appctl netdev-dummy/receive p1 'ipv4(src=10.0.0.1,dst=10.1.2.2),tcp(src=1,dst=2)']) + +AT_CHECK([ovs-appctl dpctl/dump-flows | sed 's/.*core: [[0-9]]*//'], [0], [ +recirc_id(0),in_port(1),packet_type(ns=0,id=0),eth_type(0x0800),ipv4(dst=10.1.2.2/255.255.255.0,frag=no), packets:0, bytes:0, used:never, actions:2 +]) + +AT_CHECK([ovs-appctl netdev-dummy/receive p1 'ipv4(src=10.0.0.1,dst=10.1.2.2),tcp(src=1,dst=2)']) +dnl Replace openflow rules, trigger the revalidation. +AT_CHECK([echo 'table=0,in_port=p1,ip,nw_dst=10.1.0.0/16 actions=ct(commit)' | dnl +ovs-ofctl --bundle replace-flows br0 -]) +AT_CHECK([ovs-appctl revalidator/wait]) + +AT_CHECK([ovs-appctl netdev-dummy/receive p1 'ipv4(src=10.0.0.1,dst=10.1.0.2),tcp(src=1,dst=2)']) +AT_CHECK([ovs-appctl dpctl/dump-flows | sed 's/.*core: [[0-9]]*//' | strip_xout_keep_actions], [0], [ +recirc_id(0),in_port(1),packet_type(ns=0,id=0),eth_type(0x0800),ipv4(dst=10.1.0.2/255.255.0.0,frag=no), packets:0, bytes:0, used:never, actions:ct(commit) +recirc_id(0),in_port(1),packet_type(ns=0,id=0),eth_type(0x0800),ipv4(dst=10.1.2.2/255.255.255.0,frag=no), packets:0, bytes:0, used:0.0s, actions:ct(commit) +]) + +dnl Hold the prefix 10.1.2.2/24 by another 10s. +AT_CHECK([ovs-appctl netdev-dummy/receive p1 'ipv4(src=10.0.0.1,dst=10.1.2.2),tcp(src=1,dst=2)']) +dnl Send more 10.1.0.2 to make 10.1.0.0/16 tuple preprend 10.1.2.0/24 tuple in pvector of subtable. +for i in `seq 0 256`;do +AT_CHECK([ovs-appctl netdev-dummy/receive p1 'ipv4(src=10.0.0.1,dst=10.1.0.2),tcp(src=1,dst=2)']) +done + +AT_CHECK([echo 'table=0,in_port=p1,ip,nw_dst=10.1.0.0/16 actions=p2' | dnl +ovs-ofctl --bundle replace-flows br0 -]) + +AT_CHECK([ovs-appctl dpctl/dump-flows | sed 's/.*core: [[0-9]]*//' | strip_xout_keep_actions], [0], [ +recirc_id(0),in_port(1),packet_type(ns=0,id=0),eth_type(0x0800),ipv4(dst=10.1.0.2/255.255.0.0,frag=no), packets:0, bytes:0, used:0.0s, actions:2 +recirc_id(0),in_port(1),packet_type(ns=0,id=0),eth_type(0x0800),ipv4(dst=10.1.2.2/255.255.255.0,frag=no), packets:0, bytes:0, used:0.0s, actions:2 +]) + +OVS_VSWITCHD_STOP +AT_CLEANUP -- 2.25.1 _______________________________________________ dev mailing list d...@openvswitch.org https://mail.openvswitch.org/mailman/listinfo/ovs-dev