Ports have a new layer3 attribute if they send/receive L3 packets. The packet_type included in structs dp_packet and flow is considered in ofproto-dpif. The classical L2 match fields (dl_src, dl_dst, dl_type, and vlan_tci, vlan_vid, vlan_pcp) now have Ethernet as pre-requisite.
A dummy ethernet header is pushed to L3 packets received from L3 ports before the pipeline processing starts. The ethernet header is popped before sending a packet to a L3 port. For datapath ports that can receive L2 or L3 packets, the packet_type becomes part of the flow key for datapath flows and is handled appropriately in dpif-netdev. Signed-off-by: Lorand Jakab <loja...@cisco.com> Signed-off-by: Simon Horman <simon.hor...@netronome.com> Signed-off-by: Jiri Benc <jb...@redhat.com> Signed-off-by: Yi Yang <yi.y.y...@intel.com> Signed-off-by: Jan Scheurich <jan.scheur...@ericsson.com> Co-authored-by: Zoltan Balogh <zoltan.bal...@ericsson.com> --- build-aux/extract-ofp-fields | 1 + datapath/linux/compat/include/linux/openvswitch.h | 2 + include/openvswitch/match.h | 1 + include/openvswitch/meta-flow.h | 15 +- lib/dpif-netdev.c | 57 ++++---- lib/dpif-netlink.c | 2 +- lib/match.c | 19 ++- lib/meta-flow.c | 2 + lib/netdev-vport.c | 8 +- lib/netdev.h | 1 + lib/odp-execute.c | 2 + lib/odp-util.c | 159 ++++++++++++++++++---- lib/odp-util.h | 6 +- lib/packets.h | 1 - ofproto/ofproto-dpif-sflow.c | 1 + ofproto/ofproto-dpif-upcall.c | 4 +- ofproto/ofproto-dpif-xlate.c | 50 ++++++- ofproto/ofproto-dpif.c | 6 +- 18 files changed, 251 insertions(+), 86 deletions(-) diff --git a/build-aux/extract-ofp-fields b/build-aux/extract-ofp-fields index 40f1bb2..fc6ce1e 100755 --- a/build-aux/extract-ofp-fields +++ b/build-aux/extract-ofp-fields @@ -39,6 +39,7 @@ FORMATTING = {"decimal": ("MFS_DECIMAL", 1, 8), "TCP flags": ("MFS_TCP_FLAGS", 2, 2)} PREREQS = {"none": "MFP_NONE", + "Ethernet": "MFP_ETHERNET", "ARP": "MFP_ARP", "VLAN VID": "MFP_VLAN_VID", "IPv4": "MFP_IPV4", diff --git a/datapath/linux/compat/include/linux/openvswitch.h b/datapath/linux/compat/include/linux/openvswitch.h index d78d8a8..9e4f0b1 100644 --- a/datapath/linux/compat/include/linux/openvswitch.h +++ b/datapath/linux/compat/include/linux/openvswitch.h @@ -361,6 +361,8 @@ enum ovs_key_attr { /* Only used within kernel data path. */ OVS_KEY_ATTR_TUNNEL_INFO, /* struct ovs_tunnel_info */ #endif + + OVS_KEY_ATTR_PACKET_TYPE, /* be32 packet type */ __OVS_KEY_ATTR_MAX }; diff --git a/include/openvswitch/match.h b/include/openvswitch/match.h index 0b5f050..4f4e096 100644 --- a/include/openvswitch/match.h +++ b/include/openvswitch/match.h @@ -99,6 +99,7 @@ void match_set_ct_mark(struct match *, uint32_t ct_mark); void match_set_ct_mark_masked(struct match *, uint32_t ct_mark, uint32_t mask); void match_set_ct_label(struct match *, ovs_u128 ct_label); void match_set_ct_label_masked(struct match *, ovs_u128 ct_label, ovs_u128 mask); +void match_set_packet_type(struct match *, ovs_be32 packet_type); void match_set_skb_priority(struct match *, uint32_t skb_priority); void match_set_dl_type(struct match *, ovs_be16); void match_set_dl_src(struct match *, const struct eth_addr ); diff --git a/include/openvswitch/meta-flow.h b/include/openvswitch/meta-flow.h index d5c0971..9daca36 100644 --- a/include/openvswitch/meta-flow.h +++ b/include/openvswitch/meta-flow.h @@ -854,7 +854,7 @@ enum OVS_PACKED_ENUM mf_field_id { * Type: MAC. * Maskable: bitwise. * Formatting: Ethernet. - * Prerequisites: none. + * Prerequisites: Ethernet. * Access: read/write. * NXM: NXM_OF_ETH_SRC(2) since v1.1. * OXM: OXM_OF_ETH_SRC(4) since OF1.2 and v1.7. @@ -870,7 +870,7 @@ enum OVS_PACKED_ENUM mf_field_id { * Type: MAC. * Maskable: bitwise. * Formatting: Ethernet. - * Prerequisites: none. + * Prerequisites: Ethernet. * Access: read/write. * NXM: NXM_OF_ETH_DST(1) since v1.1. * OXM: OXM_OF_ETH_DST(3) since OF1.2 and v1.7. @@ -889,7 +889,7 @@ enum OVS_PACKED_ENUM mf_field_id { * Type: be16. * Maskable: no. * Formatting: hexadecimal. - * Prerequisites: none. + * Prerequisites: Ethernet. * Access: read-only. * NXM: NXM_OF_ETH_TYPE(3) since v1.1. * OXM: OXM_OF_ETH_TYPE(5) since OF1.2 and v1.7. @@ -919,7 +919,7 @@ enum OVS_PACKED_ENUM mf_field_id { * Type: be16. * Maskable: bitwise. * Formatting: hexadecimal. - * Prerequisites: none. + * Prerequisites: Ethernet. * Access: read/write. * NXM: NXM_OF_VLAN_TCI(4) since v1.1. * OXM: none. @@ -935,7 +935,7 @@ enum OVS_PACKED_ENUM mf_field_id { * Type: be16 (low 12 bits). * Maskable: no. * Formatting: decimal. - * Prerequisites: none. + * Prerequisites: Ethernet. * Access: read/write. * NXM: none. * OXM: none. @@ -953,7 +953,7 @@ enum OVS_PACKED_ENUM mf_field_id { * Type: be16 (low 12 bits). * Maskable: bitwise. * Formatting: decimal. - * Prerequisites: none. + * Prerequisites: Ethernet. * Access: read/write. * NXM: none. * OXM: OXM_OF_VLAN_VID(6) since OF1.2 and v1.7. @@ -969,7 +969,7 @@ enum OVS_PACKED_ENUM mf_field_id { * Type: u8 (low 3 bits). * Maskable: no. * Formatting: decimal. - * Prerequisites: none. + * Prerequisites: Ethernet. * Access: read/write. * NXM: none. * OXM: none. @@ -1676,6 +1676,7 @@ enum OVS_PACKED_ENUM mf_prereqs { MFP_NONE, /* L2 requirements. */ + MFP_ETHERNET, MFP_ARP, MFP_VLAN_VID, MFP_IPV4, diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c index 973814c..ee40969 100644 --- a/lib/dpif-netdev.c +++ b/lib/dpif-netdev.c @@ -1782,24 +1782,6 @@ netdev_flow_key_clone(struct netdev_flow_key *dst, offsetof(struct netdev_flow_key, mf) + src->len); } -/* Slow. */ -static void -netdev_flow_key_from_flow(struct netdev_flow_key *dst, - const struct flow *src) -{ - struct dp_packet packet; - uint64_t buf_stub[512 / 8]; - - dp_packet_use_stub(&packet, buf_stub, sizeof buf_stub); - pkt_metadata_from_flow(&packet.md, src); - flow_compose(&packet, src); - miniflow_extract(&packet, &dst->mf); - dp_packet_uninit(&packet); - - dst->len = netdev_flow_key_size(miniflow_n_values(&dst->mf)); - dst->hash = 0; /* Not computed yet. */ -} - /* Initialize a netdev_flow_key 'mask' from 'match'. */ static inline void netdev_flow_mask_init(struct netdev_flow_key *mask, @@ -2254,9 +2236,9 @@ dp_netdev_flow_add(struct dp_netdev_pmd_thread *pmd, struct ds ds = DS_EMPTY_INITIALIZER; struct ofpbuf key_buf, mask_buf; struct odp_flow_key_parms odp_parms = { - .flow = &match->flow, - .mask = &match->wc.masks, - .support = dp_netdev_support, + .flow = &match->flow, + .mask = &match->wc.masks, + .support = dp_netdev_support, }; ofpbuf_init(&key_buf, 0); @@ -2270,8 +2252,8 @@ dp_netdev_flow_add(struct dp_netdev_pmd_thread *pmd, odp_format_ufid(ufid, &ds); ds_put_cstr(&ds, " "); odp_flow_format(key_buf.data, key_buf.size, - mask_buf.data, mask_buf.size, - NULL, &ds, false); + mask_buf.data, mask_buf.size, + NULL, &ds, false); ds_put_cstr(&ds, ", actions:"); format_odp_actions(&ds, actions, actions_len); @@ -2279,6 +2261,17 @@ dp_netdev_flow_add(struct dp_netdev_pmd_thread *pmd, ofpbuf_uninit(&key_buf); ofpbuf_uninit(&mask_buf); + + /* Add a printout of the actual match isntalled. */ + struct match m; + ds_clear(&ds); + ds_put_cstr(&ds, "flow match: "); + miniflow_expand(&flow->cr.flow.mf, &m.flow); + miniflow_expand(&flow->cr.mask->mf, &m.wc.masks); + match_format(&m, &ds, OFP_DEFAULT_PRIORITY); + + VLOG_DBG_RL(&upcall_rl, "%s", ds_cstr(&ds)); + ds_destroy(&ds); } @@ -2315,8 +2308,12 @@ flow_put_on_pmd(struct dp_netdev_pmd_thread *pmd, error = ENOENT; } } else { - if (put->flags & DPIF_FP_MODIFY - && flow_equal(&match->flow, &netdev_flow->flow)) { + if (put->flags & DPIF_FP_MODIFY) { + /* Removed the additional check + * flow_equal(&match.flow, &netdev_flow->flow) as a) the + * dpcls lookup is sufficient to uniquely identify a flow + * and b) it caused false negatives because the flow in + * netdev->flow may not properly be masked. */ struct dp_netdev_actions *new_actions; struct dp_netdev_actions *old_actions; @@ -2358,7 +2355,7 @@ static int dpif_netdev_flow_put(struct dpif *dpif, const struct dpif_flow_put *put) { struct dp_netdev *dp = get_dp_netdev(dpif); - struct netdev_flow_key key; + struct netdev_flow_key key, mask; struct dp_netdev_pmd_thread *pmd; struct match match; ovs_u128 ufid; @@ -2385,9 +2382,10 @@ dpif_netdev_flow_put(struct dpif *dpif, const struct dpif_flow_put *put) } /* Must produce a netdev_flow_key for lookup. - * This interface is no longer performance critical, since it is not used - * for upcall processing any more. */ - netdev_flow_key_from_flow(&key, &match.flow); + * Use the same method as employed to create the key when adding + * the flow to the dplcs to make sure they match. */ + netdev_flow_mask_init(&mask, &match); + netdev_flow_key_init_masked(&key, &match.flow, &mask); if (put->pmd_id == PMD_ID_NULL) { if (cmap_count(&dp->poll_threads) == 0) { @@ -4109,6 +4107,7 @@ emc_processing(struct dp_netdev_pmd_thread *pmd, key->hash = dpif_netdev_packet_get_rss_hash(packet, &key->mf); flow = emc_lookup(flow_cache, key); + if (OVS_LIKELY(flow)) { dp_netdev_queue_batches(packet, flow, &key->mf, batches, n_batches); diff --git a/lib/dpif-netlink.c b/lib/dpif-netlink.c index 8b21d41..a70d11c 100644 --- a/lib/dpif-netlink.c +++ b/lib/dpif-netlink.c @@ -1562,7 +1562,7 @@ dpif_netlink_encode_execute(int dp_ifindex, const struct dpif_execute *d_exec, dp_packet_size(d_exec->packet)); key_ofs = nl_msg_start_nested(buf, OVS_PACKET_ATTR_KEY); - odp_key_from_pkt_metadata(buf, &d_exec->packet->md); + odp_key_from_dp_packet(buf, d_exec->packet); nl_msg_end_nested(buf, key_ofs); nl_msg_put_unspec(buf, OVS_PACKET_ATTR_ACTIONS, diff --git a/lib/match.c b/lib/match.c index 57529f5..2e28b7b 100644 --- a/lib/match.c +++ b/lib/match.c @@ -384,6 +384,13 @@ match_set_ct_label_masked(struct match *match, ovs_u128 value, ovs_u128 mask) } void +match_set_packet_type(struct match *match, ovs_be32 packet_type) +{ + match->flow.packet_type = packet_type; + match->wc.masks.packet_type = OVS_BE32_MAX; +} + +void match_set_dl_type(struct match *match, ovs_be16 dl_type) { match->wc.masks.dl_type = OVS_BE16_MAX; @@ -1137,6 +1144,10 @@ match_format(const struct match *match, struct ds *s, int priority) format_ct_label_masked(s, &f->ct_label, &wc->masks.ct_label); } + if (wc->masks.packet_type) { + ds_put_format(s, "packet_type=0x%08"PRIx32",", ntohl(f->packet_type)); + } + if (wc->masks.dl_type) { skip_type = true; if (f->dl_type == htons(ETH_TYPE_IP)) { @@ -1233,8 +1244,12 @@ match_format(const struct match *match, struct ds *s, int priority) ntohs(f->vlan_tci), ntohs(wc->masks.vlan_tci)); } } - format_eth_masked(s, "dl_src", f->dl_src, wc->masks.dl_src); - format_eth_masked(s, "dl_dst", f->dl_dst, wc->masks.dl_dst); + if (!eth_addr_is_zero(wc->masks.dl_src)) { + format_eth_masked(s, "dl_src", f->dl_src, wc->masks.dl_src); + } + if (!eth_addr_is_zero(wc->masks.dl_dst)) { + format_eth_masked(s, "dl_dst", f->dl_dst, wc->masks.dl_dst); + } if (!skip_type && wc->masks.dl_type) { ds_put_format(s, "%sdl_type=%s0x%04"PRIx16",", colors.param, colors.end, ntohs(f->dl_type)); diff --git a/lib/meta-flow.c b/lib/meta-flow.c index b92950b..7ff872b 100644 --- a/lib/meta-flow.c +++ b/lib/meta-flow.c @@ -386,6 +386,8 @@ mf_are_prereqs_ok(const struct mf_field *mf, const struct flow *flow, switch (mf->prereqs) { case MFP_NONE: return true; + case MFP_ETHERNET: + return is_ethernet(flow, wc); case MFP_ARP: return (flow->dl_type == htons(ETH_TYPE_ARP) || flow->dl_type == htons(ETH_TYPE_RARP)); diff --git a/lib/netdev-vport.c b/lib/netdev-vport.c index 2d0aa43..a6c1e23 100644 --- a/lib/netdev-vport.c +++ b/lib/netdev-vport.c @@ -97,9 +97,13 @@ netdev_vport_is_patch(const struct netdev *netdev) bool netdev_vport_is_layer3(const struct netdev *dev) { - const char *type = netdev_get_type(dev); + if (is_vport_class(netdev_get_class(dev))) { + struct netdev_vport *vport = netdev_vport_cast(dev); + + return vport->tnl_cfg.is_layer3; + } - return (!strcmp("lisp", type)); + return false; } static bool diff --git a/lib/netdev.h b/lib/netdev.h index d6c07c1..416d2b7 100644 --- a/lib/netdev.h +++ b/lib/netdev.h @@ -100,6 +100,7 @@ struct netdev_tunnel_config { bool csum; bool dont_fragment; + bool is_layer3; }; void netdev_run(void); diff --git a/lib/odp-execute.c b/lib/odp-execute.c index 4ed4475..e394f44 100644 --- a/lib/odp-execute.c +++ b/lib/odp-execute.c @@ -375,6 +375,7 @@ odp_execute_set_action(struct dp_packet *packet, const struct nlattr *a) break; case OVS_KEY_ATTR_UNSPEC: + case OVS_KEY_ATTR_PACKET_TYPE: case OVS_KEY_ATTR_ENCAP: case OVS_KEY_ATTR_ETHERTYPE: case OVS_KEY_ATTR_IN_PORT: @@ -471,6 +472,7 @@ odp_execute_masked_set_action(struct dp_packet *packet, break; case OVS_KEY_ATTR_TUNNEL: /* Masked data not supported for tunnel. */ + case OVS_KEY_ATTR_PACKET_TYPE: case OVS_KEY_ATTR_UNSPEC: case OVS_KEY_ATTR_CT_STATE: case OVS_KEY_ATTR_CT_ZONE: diff --git a/lib/odp-util.c b/lib/odp-util.c index f97c4eb..1ddf658 100644 --- a/lib/odp-util.c +++ b/lib/odp-util.c @@ -170,6 +170,7 @@ ovs_key_attr_to_string(enum ovs_key_attr attr, char *namebuf, size_t bufsize) case OVS_KEY_ATTR_MPLS: return "mpls"; case OVS_KEY_ATTR_DP_HASH: return "dp_hash"; case OVS_KEY_ATTR_RECIRC_ID: return "recirc_id"; + case OVS_KEY_ATTR_PACKET_TYPE: return "packet_type"; case __OVS_KEY_ATTR_MAX: default: @@ -1915,6 +1916,7 @@ static const struct attr_len_tbl ovs_flow_key_attr_lens[OVS_KEY_ATTR_MAX + 1] = [OVS_KEY_ATTR_CT_ZONE] = { .len = 2 }, [OVS_KEY_ATTR_CT_MARK] = { .len = 4 }, [OVS_KEY_ATTR_CT_LABELS] = { .len = sizeof(struct ovs_key_ct_labels) }, + [OVS_KEY_ATTR_PACKET_TYPE] = { .len = 4 }, }; /* Returns the correct length of the payload for a flow key attribute of the @@ -2885,6 +2887,13 @@ format_odp_key_attr(const struct nlattr *a, const struct nlattr *ma, } break; + case OVS_KEY_ATTR_PACKET_TYPE: + ds_put_format(ds, "0x%08"PRIx32, ntohl(nl_attr_get_be32(a))); + if (!is_exact) { + ds_put_format(ds, "/0x%08"PRIx32, ntohl(nl_attr_get_be32(ma))); + } + break; + case OVS_KEY_ATTR_ETHERNET: { const struct ovs_key_ethernet *mask = ma ? nl_attr_get(ma) : NULL; const struct ovs_key_ethernet *key = nl_attr_get(a); @@ -4368,9 +4377,10 @@ odp_flow_key_from_flow__(const struct odp_flow_key_parms *parms, bool export_mask, struct ofpbuf *buf) { struct ovs_key_ethernet *eth_key; - size_t encap; + size_t encap = 0; const struct flow *flow = parms->flow; - const struct flow *data = export_mask ? parms->mask : parms->flow; + const struct flow *mask = parms->mask; + const struct flow *data = export_mask ? mask : flow; nl_msg_put_u32(buf, OVS_KEY_ATTR_PRIORITY, data->skb_priority); @@ -4406,23 +4416,32 @@ odp_flow_key_from_flow__(const struct odp_flow_key_parms *parms, nl_msg_put_odp_port(buf, OVS_KEY_ATTR_IN_PORT, data->in_port.odp_port); } - eth_key = nl_msg_put_unspec_uninit(buf, OVS_KEY_ATTR_ETHERNET, - sizeof *eth_key); - get_ethernet_key(data, eth_key); + if (export_mask || flow->packet_type != PT_ETH) { + nl_msg_put_be32(buf, OVS_KEY_ATTR_PACKET_TYPE, data->packet_type); + } - if (flow->vlan_tci != htons(0) || flow->dl_type == htons(ETH_TYPE_VLAN)) { - if (export_mask) { - nl_msg_put_be16(buf, OVS_KEY_ATTR_ETHERTYPE, OVS_BE16_MAX); + /* Conditionally add L2 attributes for Ethernet packets */ + if (flow->packet_type == PT_ETH) { + eth_key = nl_msg_put_unspec_uninit(buf, OVS_KEY_ATTR_ETHERNET, + sizeof *eth_key); + get_ethernet_key(data, eth_key); + + if (flow->vlan_tci != htons(0) || + flow->dl_type == htons(ETH_TYPE_VLAN)) { + if (export_mask) { + nl_msg_put_be16(buf, OVS_KEY_ATTR_ETHERTYPE, OVS_BE16_MAX); + } else { + nl_msg_put_be16(buf, OVS_KEY_ATTR_ETHERTYPE, + htons(ETH_TYPE_VLAN)); + } + nl_msg_put_be16(buf, OVS_KEY_ATTR_VLAN, data->vlan_tci); + encap = nl_msg_start_nested(buf, OVS_KEY_ATTR_ENCAP); + if (flow->vlan_tci == htons(0)) { + goto unencap; + } } else { - nl_msg_put_be16(buf, OVS_KEY_ATTR_ETHERTYPE, htons(ETH_TYPE_VLAN)); + encap = 0; } - nl_msg_put_be16(buf, OVS_KEY_ATTR_VLAN, data->vlan_tci); - encap = nl_msg_start_nested(buf, OVS_KEY_ATTR_ENCAP); - if (flow->vlan_tci == htons(0)) { - goto unencap; - } - } else { - encap = 0; } if (ntohs(flow->dl_type) < ETH_TYPE_MIN) { @@ -4568,8 +4587,10 @@ odp_flow_key_from_mask(const struct odp_flow_key_parms *parms, /* Generate ODP flow key from the given packet metadata */ void -odp_key_from_pkt_metadata(struct ofpbuf *buf, const struct pkt_metadata *md) +odp_key_from_dp_packet(struct ofpbuf *buf, const struct dp_packet *packet) { + const struct pkt_metadata *md = &packet->md; + nl_msg_put_u32(buf, OVS_KEY_ATTR_PRIORITY, md->skb_priority); if (flow_tnl_dst_is_set(&md->tunnel)) { @@ -4598,18 +4619,28 @@ odp_key_from_pkt_metadata(struct ofpbuf *buf, const struct pkt_metadata *md) if (md->in_port.odp_port != ODPP_NONE) { nl_msg_put_odp_port(buf, OVS_KEY_ATTR_IN_PORT, md->in_port.odp_port); } + + /* Add OVS_KEY_ATTR_ETHERNET for non-Ethernet packets */ + if (PT_NS(packet->packet_type) == OFPHTN_ETHERTYPE) { + nl_msg_put_be16(buf, OVS_KEY_ATTR_ETHERTYPE, PT_NS_TYPE(packet->packet_type)); + } } /* Generate packet metadata from the given ODP flow key. */ void -odp_key_to_pkt_metadata(const struct nlattr *key, size_t key_len, - struct pkt_metadata *md) +odp_key_to_dp_packet(const struct nlattr *key, size_t key_len, + struct dp_packet *packet) { + static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5); const struct nlattr *nla; + struct pkt_metadata *md = &packet->md; + ovs_be32 packet_type = PT_UNKNOWN; + ovs_be16 ethertype = 0; size_t left; uint32_t wanted_attrs = 1u << OVS_KEY_ATTR_PRIORITY | 1u << OVS_KEY_ATTR_SKB_MARK | 1u << OVS_KEY_ATTR_TUNNEL | - 1u << OVS_KEY_ATTR_IN_PORT; + 1u << OVS_KEY_ATTR_IN_PORT | 1u << OVS_KEY_ATTR_ETHERTYPE | + 1u << OVS_KEY_ATTR_ETHERNET; pkt_metadata_init(md, ODPP_NONE); @@ -4674,14 +4705,32 @@ odp_key_to_pkt_metadata(const struct nlattr *key, size_t key_len, md->in_port.odp_port = nl_attr_get_odp_port(nla); wanted_attrs &= ~(1u << OVS_KEY_ATTR_IN_PORT); break; + case OVS_KEY_ATTR_ETHERNET: + /* Presence of OVS_KEY_ATTR_ETHERNET indicates Ethernet packet. */ + packet_type = PT_ETH; + wanted_attrs &= ~(1u << OVS_KEY_ATTR_ETHERNET); + break; + case OVS_KEY_ATTR_ETHERTYPE: + ethertype = nl_attr_get_be16(nla); + wanted_attrs &= ~(1u << OVS_KEY_ATTR_ETHERTYPE); + break; default: break; } if (!wanted_attrs) { - return; /* Have everything. */ + break; /* Have everything. */ } } + + if (packet_type == PT_ETH){ + dp_packet_set_packet_type(packet, PT_ETH); + } else if (packet_type == PT_UNKNOWN && ethertype != 0) { + dp_packet_set_packet_type(packet, + PACKET_TYPE(OFPHTN_ETHERTYPE, ntohs(ethertype))); + } else { + VLOG_ERR_RL(&rl, "Packet without ETHERTYPE. Unknown packet_type.\n"); + } } uint32_t @@ -4845,7 +4894,21 @@ parse_ethertype(const struct nlattr *attrs[OVS_KEY_ATTR_MAX + 1], *expected_attrs |= UINT64_C(1) << OVS_KEY_ATTR_ETHERTYPE; } else { if (!is_mask) { - flow->dl_type = htons(FLOW_DL_TYPE_NONE); + /* Default ethertype for well-known L3 packets. */ + /* XXX: Needed??? */ + if (present_attrs & (UINT64_C(1) << OVS_KEY_ATTR_IPV4)) { + flow->dl_type = htons(ETH_TYPE_IP); + } else if (present_attrs & (UINT64_C(1) << OVS_KEY_ATTR_IPV6)) { + flow->dl_type = htons(ETH_TYPE_IPV6); + } else if (present_attrs & (UINT64_C(1) << OVS_KEY_ATTR_MPLS)) { + flow->dl_type = htons(ETH_TYPE_MPLS); + } else { + flow->dl_type = htons(FLOW_DL_TYPE_NONE); + } + } else if (src_flow->packet_type != PT_ETH) { + /* dl_type is mandatory for non-Ethernet packets */ + /* XXX: Needed??? */ + flow->dl_type = htons(0xffff); } else if (ntohs(src_flow->dl_type) < ETH_TYPE_MIN) { /* See comments in odp_flow_key_from_flow__(). */ VLOG_ERR_RL(&rl, "mask expected for non-Ethernet II frame"); @@ -5253,23 +5316,36 @@ odp_flow_key_to_flow__(const struct nlattr *key, size_t key_len, flow->in_port.odp_port = ODPP_NONE; } - /* Ethernet header. */ + if (present_attrs & (UINT64_C(1) << OVS_KEY_ATTR_PACKET_TYPE)) { + flow->packet_type + = nl_attr_get_be32(attrs[OVS_KEY_ATTR_PACKET_TYPE]); + expected_attrs |= UINT64_C(1) << OVS_KEY_ATTR_PACKET_TYPE; + } else if (!is_mask) { + flow->packet_type = PT_ETH; + } + + /* Check for Ethernet header. */ if (present_attrs & (UINT64_C(1) << OVS_KEY_ATTR_ETHERNET)) { const struct ovs_key_ethernet *eth_key; eth_key = nl_attr_get(attrs[OVS_KEY_ATTR_ETHERNET]); put_ethernet_key(eth_key, flow); - if (is_mask) { - expected_attrs |= UINT64_C(1) << OVS_KEY_ATTR_ETHERNET; + if (!is_mask) { + flow->packet_type = PT_ETH; } - } - if (!is_mask) { expected_attrs |= UINT64_C(1) << OVS_KEY_ATTR_ETHERNET; } + else if (present_attrs & (UINT64_C(1) << OVS_KEY_ATTR_ETHERTYPE)) { + ovs_be16 ethertype = nl_attr_get_be16(attrs[OVS_KEY_ATTR_ETHERTYPE]); + if (!is_mask) { + flow->packet_type = PACKET_TYPE(OFPHTN_ETHERTYPE, ntohs(ethertype)); + } + expected_attrs |= UINT64_C(1) << OVS_KEY_ATTR_ETHERTYPE; + } /* Get Ethertype or 802.1Q TPID or FLOW_DL_TYPE_NONE. */ if (!parse_ethertype(attrs, present_attrs, &expected_attrs, flow, - src_flow)) { + src_flow)) { return ODP_FIT_ERROR; } @@ -5561,6 +5637,31 @@ commit_set_ether_addr_action(const struct flow *flow, struct flow *base_flow, } static void +commit_ether_action(const struct flow *flow, struct flow *base_flow, + struct ofpbuf *odp_actions, struct flow_wildcards *wc, + bool use_masked) +{ + if (flow->packet_type == PT_ETH) { + if (base_flow->packet_type != PT_ETH) { + odp_put_push_eth_action(odp_actions, &flow->dl_src, &flow->dl_dst, + flow->dl_type); + base_flow->packet_type = flow->packet_type; + base_flow->dl_src = flow->dl_src; + base_flow->dl_dst = flow->dl_dst; + base_flow->dl_type = flow->dl_type; + } else { + commit_set_ether_addr_action(flow, base_flow, odp_actions, wc, + use_masked); + } + } else { + if (base_flow->packet_type == PT_ETH) { + odp_put_pop_eth_action(odp_actions); + base_flow->packet_type = flow->packet_type; + } + } +} + +static void pop_vlan(struct flow *base, struct ofpbuf *odp_actions, struct flow_wildcards *wc) { @@ -6021,7 +6122,7 @@ commit_odp_actions(const struct flow *flow, struct flow *base, enum slow_path_reason slow1, slow2; bool mpls_done = false; - commit_set_ether_addr_action(flow, base, odp_actions, wc, use_masked); + commit_ether_action(flow, base, odp_actions, wc, use_masked); /* Make packet a non-MPLS packet before committing L3/4 actions, * which would otherwise do nothing. */ if (eth_type_mpls(base->dl_type) && !eth_type_mpls(flow->dl_type)) { diff --git a/lib/odp-util.h b/lib/odp-util.h index c4cb509..78f3a57 100644 --- a/lib/odp-util.h +++ b/lib/odp-util.h @@ -213,9 +213,9 @@ uint32_t odp_flow_key_hash(const struct nlattr *, size_t); /* Estimated space needed for metadata. */ enum { ODP_KEY_METADATA_SIZE = 9 * 8 }; -void odp_key_from_pkt_metadata(struct ofpbuf *, const struct pkt_metadata *); -void odp_key_to_pkt_metadata(const struct nlattr *key, size_t key_len, - struct pkt_metadata *md); +void odp_key_from_dp_packet(struct ofpbuf *, const struct dp_packet *); +void odp_key_to_dp_packet(const struct nlattr *key, size_t key_len, + struct dp_packet *md); /* How well a kernel-provided flow key (a sequence of OVS_KEY_ATTR_* * attributes) matches OVS userspace expectations. diff --git a/lib/packets.h b/lib/packets.h index a1ce5ea..2c6384d 100644 --- a/lib/packets.h +++ b/lib/packets.h @@ -127,7 +127,6 @@ pkt_metadata_init(struct pkt_metadata *md, odp_port_t port) memset(md, 0, offsetof(struct pkt_metadata, in_port)); md->tunnel.ip_dst = 0; md->tunnel.ipv6_dst = in6addr_any; - md->in_port.odp_port = port; } diff --git a/ofproto/ofproto-dpif-sflow.c b/ofproto/ofproto-dpif-sflow.c index beb4b1f..ba969bb 100644 --- a/ofproto/ofproto-dpif-sflow.c +++ b/ofproto/ofproto-dpif-sflow.c @@ -1026,6 +1026,7 @@ sflow_read_set_action(const struct nlattr *attr, case OVS_KEY_ATTR_CT_MARK: case OVS_KEY_ATTR_CT_LABELS: case OVS_KEY_ATTR_UNSPEC: + case OVS_KEY_ATTR_PACKET_TYPE: case __OVS_KEY_ATTR_MAX: default: break; diff --git a/ofproto/ofproto-dpif-upcall.c b/ofproto/ofproto-dpif-upcall.c index 660383f..96a89b7 100644 --- a/ofproto/ofproto-dpif-upcall.c +++ b/ofproto/ofproto-dpif-upcall.c @@ -1381,8 +1381,8 @@ handle_upcalls(struct udpif *udpif, struct upcall *upcalls, op->dop.type = DPIF_OP_EXECUTE; op->dop.u.execute.packet = CONST_CAST(struct dp_packet *, packet); op->dop.u.execute.flow = upcall->flow; - odp_key_to_pkt_metadata(upcall->key, upcall->key_len, - &op->dop.u.execute.packet->md); + odp_key_to_dp_packet(upcall->key, upcall->key_len, + op->dop.u.execute.packet); op->dop.u.execute.actions = upcall->odp_actions.data; op->dop.u.execute.actions_len = upcall->odp_actions.size; op->dop.u.execute.needs_help = (upcall->xout.slow & SLOW_ACTION) != 0; diff --git a/ofproto/ofproto-dpif-xlate.c b/ofproto/ofproto-dpif-xlate.c index 525cdcd..72dabf8 100644 --- a/ofproto/ofproto-dpif-xlate.c +++ b/ofproto/ofproto-dpif-xlate.c @@ -161,6 +161,7 @@ struct xport { bool may_enable; /* May be enabled in bonds. */ bool is_tunnel; /* Is a tunnel port. */ + bool is_layer3; /* Is a layer 3 port. */ struct cfm *cfm; /* CFM handle or null. */ struct bfd *bfd; /* BFD handle or null. */ @@ -872,6 +873,7 @@ xlate_xport_set(struct xport *xport, odp_port_t odp_port, xport->state = state; xport->stp_port_no = stp_port_no; xport->is_tunnel = is_tunnel; + xport->is_layer3 = netdev_vport_is_layer3(netdev); xport->may_enable = may_enable; xport->odp_port = odp_port; @@ -2517,7 +2519,7 @@ xlate_normal(struct xlate_ctx *ctx) /* Learn source MAC. */ bool is_grat_arp = is_gratuitous_arp(flow, wc); - if (ctx->xin->allow_side_effects) { + if (ctx->xin->allow_side_effects && !in_port->is_layer3) { update_learning_table(ctx, in_xbundle, flow->dl_src, vlan, is_grat_arp); } @@ -3132,6 +3134,17 @@ compose_output_action__(struct xlate_ctx *ctx, ofp_port_t ofp_port, } } + if (flow->packet_type == PT_ETH && xport->is_layer3 ) { + /* Ethernet packet to L3 outport -> pop ethernet header. */ + flow->packet_type = PACKET_TYPE(OFPHTN_ETHERTYPE, ntohs(flow->dl_type)); + } + else if (flow->packet_type != PT_ETH && !xport->is_layer3) { + /* L2 outport and non-ethernet packet_type -> add dummy eth header. */ + flow->packet_type = PT_ETH; + memset(&flow->dl_dst, 0,ETH_ADDR_LEN); + memset(&flow->dl_src, 0,ETH_ADDR_LEN); + } + if (xport->peer) { const struct xport *peer = xport->peer; struct flow old_flow = ctx->xin->flow; @@ -3829,6 +3842,11 @@ execute_controller_action(struct xlate_ctx *ctx, int len, odp_execute_actions(NULL, &batch, false, ctx->odp_actions->data, ctx->odp_actions->size, NULL); + if (dp_packet_packet_type(packet) != PT_ETH) { + dp_packet_delete(packet); + return; + } + /* A packet sent by an action in a table-miss rule is considered an * explicit table miss. OpenFlow before 1.3 doesn't have that concept so * it will get translated back to OFPR_ACTION for those versions. */ @@ -5644,6 +5662,15 @@ xlate_wc_finish(struct xlate_ctx *ctx) * use non-header fields as part of the cache. */ flow_wildcards_clear_non_packet_fields(ctx->wc); + /* Wildcard ethernet addresses if the original packet type was not + * Ethernet. + * XXX: This is a work-around. ofproto shouldn't unwildcard the Ethernet + * addresses at all. */ + if (ctx->xin->upcall_flow->packet_type != PT_ETH) { + memset(&ctx->wc->masks.dl_dst, 0, ETH_ADDR_LEN); + memset(&ctx->wc->masks.dl_src, 0, ETH_ADDR_LEN); + } + /* ICMPv4 and ICMPv6 have 8-bit "type" and "code" fields. struct flow * uses the low 8 bits of the 16-bit tp_src and tp_dst members to * represent these fields. The datapath interface, on the other hand, @@ -5866,6 +5893,22 @@ xlate_actions(struct xlate_in *xin, struct xlate_out *xout) } ctx.wc->masks.tunnel.metadata.tab = flow->tunnel.metadata.tab; + /* Get the proximate input port of the packet. (If xin->frozen_state, + * flow->in_port is the ultimate input port of the packet.) */ + struct xport *in_port = get_ofp_port(xbridge, + ctx.base_flow.in_port.ofp_port); + + /* XXX: ONLY FOR NON-PTAP BRIDGE! */ + if (flow->packet_type != PT_ETH && in_port && in_port->is_layer3 && + ctx.table_id == 0) { + /* Add dummy Ethernet header to non-L2 packet if it's coming from a + * L3 port. So all packets will be L2 packets for lookup. + * The dl_type has already been set from the packet_type. */ + flow->packet_type = PT_ETH; + memset(&flow->dl_src, 0, ETH_ADDR_LEN); + memset(&flow->dl_dst, 0, ETH_ADDR_LEN); + } + if (!xin->ofpacts && !ctx.rule) { ctx.rule = rule_dpif_lookup_from_table( ctx.xbridge->ofproto, ctx.xin->tables_version, flow, ctx.wc, @@ -5885,11 +5928,6 @@ xlate_actions(struct xlate_in *xin, struct xlate_out *xout) xlate_report_table(&ctx, ctx.rule, ctx.table_id); } - /* Get the proximate input port of the packet. (If xin->frozen_state, - * flow->in_port is the ultimate input port of the packet.) */ - struct xport *in_port = get_ofp_port(xbridge, - ctx.base_flow.in_port.ofp_port); - /* Tunnel stats only for not-thawed packets. */ if (!xin->frozen_state && in_port && in_port->is_tunnel) { if (ctx.xin->resubmit_stats) { diff --git a/ofproto/ofproto-dpif.c b/ofproto/ofproto-dpif.c index d7743d9..9ff2588 100644 --- a/ofproto/ofproto-dpif.c +++ b/ofproto/ofproto-dpif.c @@ -137,7 +137,6 @@ struct ofport_dpif { struct lldp *lldp; /* lldp, if any. */ bool may_enable; /* May be enabled in bonds. */ bool is_tunnel; /* This port is a tunnel. */ - bool is_layer3; /* This is a layer 3 port. */ long long int carrier_seq; /* Carrier status changes. */ struct ofport_dpif *peer; /* Peer if patch port. */ @@ -1655,7 +1654,6 @@ port_construct(struct ofport *port_) port->qdscp = NULL; port->n_qdscp = 0; port->carrier_seq = netdev_get_carrier_resets(netdev); - port->is_layer3 = netdev_vport_is_layer3(netdev); if (netdev_vport_is_patch(netdev)) { /* By bailing out here, we don't submit the port to the sFlow module @@ -2674,7 +2672,7 @@ bundle_update(struct ofbundle *bundle) bundle->floodable = true; LIST_FOR_EACH (port, bundle_node, &bundle->ports) { if (port->up.pp.config & OFPUTIL_PC_NO_FLOOD - || port->is_layer3 + || netdev_vport_is_layer3(port->up.netdev) || (bundle->ofproto->stp && !stp_forward_in_state(port->stp_state)) || (bundle->ofproto->rstp && !rstp_forward_in_state(port->rstp_state))) { bundle->floodable = false; @@ -2723,7 +2721,7 @@ bundle_add_port(struct ofbundle *bundle, ofp_port_t ofp_port, port->bundle = bundle; ovs_list_push_back(&bundle->ports, &port->bundle_node); if (port->up.pp.config & OFPUTIL_PC_NO_FLOOD - || port->is_layer3 + || netdev_vport_is_layer3(port->up.netdev) || (bundle->ofproto->stp && !stp_forward_in_state(port->stp_state)) || (bundle->ofproto->rstp && !rstp_forward_in_state(port->rstp_state))) { bundle->floodable = false; -- 1.9.1 _______________________________________________ dev mailing list d...@openvswitch.org https://mail.openvswitch.org/mailman/listinfo/ovs-dev