Jan, which patch is the final solution you mentioned? Has It been merged into net-next? Or it isn't ready at all? From my understanding, the final solution you mentioned will also wait for long time to merge, it is just to add packet_type match field, this won't have any big impact on current patchset from user perspective.
-----Original Message----- From: Jan Scheurich [mailto:jan.scheur...@web.de] Sent: Friday, December 30, 2016 6:59 PM To: Yang, Yi Y <yi.y.y...@intel.com>; d...@openvswitch.org Cc: Simon Horman <simon.hor...@netronome.com>; Jiri Benc <jb...@redhat.com> Subject: Re: [ovs-dev] [PATCH v2 11/17] userspace: add non-tap (l3) support to GRE vports This patch is not in line with the ongoing work to support L3 tunnels on legacy (non packet type-aware) OVS bridges as specified in https://docs.google.com/document/d/1oWMYUH8sjZJzWa72o2q9kU0N6pNE-rwZcLH3-kbbDR8/edit?usp=sharing To avoid extensive rework, we suggest to replace the patch with the final solution based on explicit packet_type fields in dp_packet and struct flow. Regards, Jan On 2016-12-28 13:26, Yi Yang wrote: > Add support for layer 3 GRE vports (non-tap aka non-VTEP). > > This makes use of a vport mode configuration for the existing > (tap/VTEP) GRE vports. > > In order to differentiate packets for two different types of GRE > vports a new flow key attribute, OVS_KEY_ATTR_NEXT_BASE_LAYER, is > used. It is intended that this attribute is only used in userspace as > there appears to be no need for it to be used in the kernel datapath. > > It is envisaged that this attribute may be used for other > encapsulation protocols that support both layer3 and layer2 inner-packets. > > Signed-off-by: Simon Horman <simon.hor...@netronome.com> > Signed-off-by: Jiri Benc <jb...@redhat.com> > Signed-off-by: Yi Yang <yi.y.y...@intel.com> > --- > datapath/linux/compat/include/linux/openvswitch.h | 3 ++ > include/openvswitch/flow.h | 12 ++++-- > lib/flow.c | 34 ++++++++++++---- > lib/match.c | 6 ++- > lib/netdev-linux.c | 3 +- > lib/netdev-native-tnl.c | 26 +++++++++--- > lib/netdev-vport.c | 22 ++++++++-- > lib/netdev.h | 1 + > lib/nx-match.c | 2 +- > lib/odp-execute.c | 2 + > lib/odp-util.c | 22 ++++++++++ > lib/odp-util.h | 4 +- > lib/ofp-util.c | 2 +- > lib/tnl-ports.c | 49 > +++++++++++++++++------ > lib/tnl-ports.h | 3 +- > ofproto/ofproto-dpif-rid.h | 2 +- > ofproto/ofproto-dpif-sflow.c | 1 + > ofproto/ofproto-dpif-xlate.c | 2 +- > ofproto/ofproto-dpif.c | 2 + > ofproto/tunnel.c | 4 +- > tests/tunnel-push-pop-ipv6.at | 12 ++++-- > tests/tunnel-push-pop.at | 26 ++++++++++-- > vswitchd/vswitch.xml | 13 ++++++ > 23 files changed, 202 insertions(+), 51 deletions(-) > > diff --git a/datapath/linux/compat/include/linux/openvswitch.h > b/datapath/linux/compat/include/linux/openvswitch.h > index af4ee5c..e477d35 100644 > --- a/datapath/linux/compat/include/linux/openvswitch.h > +++ b/datapath/linux/compat/include/linux/openvswitch.h > @@ -360,6 +360,9 @@ enum ovs_key_attr { > #ifdef __KERNEL__ > /* Only used within kernel data path. */ > OVS_KEY_ATTR_TUNNEL_INFO, /* struct ovs_tunnel_info */ > +#else > + /* Only used within user-space data path. */ > + OVS_KEY_ATTR_NEXT_BASE_LAYER, /* base layer of encapsulated packet > +*/ > #endif > __OVS_KEY_ATTR_MAX > }; > diff --git a/include/openvswitch/flow.h b/include/openvswitch/flow.h > index 93ed37e..46ef87e 100644 > --- a/include/openvswitch/flow.h > +++ b/include/openvswitch/flow.h > @@ -23,7 +23,7 @@ > /* This sequence number should be incremented whenever anything involving > flows > * or the wildcarding of flows changes. This will cause build assertion > * failures in places which likely need to be updated. */ -#define > FLOW_WC_SEQ 37 > +#define FLOW_WC_SEQ 38 > > /* Number of Open vSwitch extension 32-bit registers. */ > #define FLOW_N_REGS 16 > @@ -138,6 +138,10 @@ struct flow { > ovs_be16 tp_dst; /* TCP/UDP/SCTP destination port/ICMP code. > */ > ovs_be32 igmp_group_ip4; /* IGMP group IPv4 address. > * Keep last for BUILD_ASSERT_DECL > below. */ > + > + uint8_t next_base_layer; /* Fields of encapsulated packet, if any, > + * start at this layer */ > + uint8_t pad4[7]; > }; > BUILD_ASSERT_DECL(sizeof(struct flow) % sizeof(uint64_t) == 0); > BUILD_ASSERT_DECL(sizeof(struct flow_tnl) % sizeof(uint64_t) == 0); > @@ -145,9 +149,9 @@ BUILD_ASSERT_DECL(sizeof(struct flow_tnl) % > sizeof(uint64_t) == 0); > #define FLOW_U64S (sizeof(struct flow) / sizeof(uint64_t)) > > /* Remember to update FLOW_WC_SEQ when changing 'struct flow'. */ > -BUILD_ASSERT_DECL(offsetof(struct flow, igmp_group_ip4) + sizeof(uint32_t) > - == sizeof(struct flow_tnl) + 256 > - && FLOW_WC_SEQ == 37); > +BUILD_ASSERT_DECL(OFFSETOFEND(struct flow, pad4) > + == sizeof(struct flow_tnl) + 264 > + && FLOW_WC_SEQ == 38); > > /* Incremental points at which flow classification may be performed in > * segments. > diff --git a/lib/flow.c b/lib/flow.c > index ac22d55..a65e154 100644 > --- a/lib/flow.c > +++ b/lib/flow.c > @@ -125,7 +125,7 @@ struct mf_ctx { > * away. Some GCC versions gave warnings on ALWAYS_INLINE, so these are > * defined as macros. */ > > -#if (FLOW_WC_SEQ != 37) > +#if (FLOW_WC_SEQ != 38) > #define MINIFLOW_ASSERT(X) ovs_assert(X) > BUILD_MESSAGE("FLOW_WC_SEQ changed: miniflow_extract() will have runtime " > "assertions enabled. Consider updating FLOW_WC_SEQ after " > @@ -846,6 +846,20 @@ miniflow_extract(struct dp_packet *packet, struct > miniflow *dst) > miniflow_push_be16(mf, tp_dst, htons(icmp->icmp6_code)); > miniflow_pad_to_64(mf, tp_dst); > } > + } else if (OVS_LIKELY(nw_proto == IPPROTO_GRE)) { > + if (OVS_LIKELY(size >= sizeof(struct gre_base_hdr))) { > + const struct gre_base_hdr *gre = data_pull(&data, &size, > + sizeof *gre); > + if (gre->protocol == htons(ETH_TYPE_TEB)) { > + /* No need to store a zero value for next_base_layer > + * in the miniflow which would cost an extra word of > + * storage. */ > + BUILD_ASSERT(LAYER_2 == 0); > + } else { > + miniflow_push_uint8(mf, next_base_layer, LAYER_3); > + miniflow_pad_to_64(mf, next_base_layer); > + } > + } > } > } > out: > @@ -894,7 +908,7 @@ flow_get_metadata(const struct flow *flow, struct match > *flow_metadata) > { > int i; > > - BUILD_ASSERT_DECL(FLOW_WC_SEQ == 37); > + BUILD_ASSERT_DECL(FLOW_WC_SEQ == 38); > > match_init_catchall(flow_metadata); > if (flow->tunnel.tun_id != htonll(0)) { @@ -1304,7 +1318,7 @@ > void flow_wildcards_init_for_packet(struct flow_wildcards *wc, > memset(&wc->masks, 0x0, sizeof wc->masks); > > /* Update this function whenever struct flow changes. */ > - BUILD_ASSERT_DECL(FLOW_WC_SEQ == 37); > + BUILD_ASSERT_DECL(FLOW_WC_SEQ == 38); > > if (flow_tnl_dst_is_set(&flow->tunnel)) { > if (flow->tunnel.flags & FLOW_TNL_F_KEY) { @@ -1425,7 > +1439,7 @@ void > flow_wc_map(const struct flow *flow, struct flowmap *map) > { > /* Update this function whenever struct flow changes. */ > - BUILD_ASSERT_DECL(FLOW_WC_SEQ == 37); > + BUILD_ASSERT_DECL(FLOW_WC_SEQ == 38); > > flowmap_init(map); > > @@ -1473,6 +1487,8 @@ flow_wc_map(const struct flow *flow, struct > flowmap *map) > > if (OVS_UNLIKELY(flow->nw_proto == IPPROTO_IGMP)) { > FLOWMAP_SET(map, igmp_group_ip4); > + } else if (OVS_UNLIKELY(flow->nw_proto == IPPROTO_GRE)) { > + FLOWMAP_SET(map, next_base_layer); > } else { > FLOWMAP_SET(map, tcp_flags); > } > @@ -1491,6 +1507,8 @@ flow_wc_map(const struct flow *flow, struct flowmap > *map) > FLOWMAP_SET(map, nd_target); > FLOWMAP_SET(map, arp_sha); > FLOWMAP_SET(map, arp_tha); > + } else if (OVS_UNLIKELY(flow->nw_proto == IPPROTO_GRE)) { > + FLOWMAP_SET(map, next_base_layer); > } else { > FLOWMAP_SET(map, tcp_flags); > } > @@ -1512,7 +1530,7 @@ void > flow_wildcards_clear_non_packet_fields(struct flow_wildcards *wc) > { > /* Update this function whenever struct flow changes. */ > - BUILD_ASSERT_DECL(FLOW_WC_SEQ == 37); > + BUILD_ASSERT_DECL(FLOW_WC_SEQ == 38); > > memset(&wc->masks.metadata, 0, sizeof wc->masks.metadata); > memset(&wc->masks.regs, 0, sizeof wc->masks.regs); @@ -1657,7 > +1675,7 @@ flow_wildcards_set_xxreg_mask(struct flow_wildcards *wc, int idx, > uint32_t > miniflow_hash_5tuple(const struct miniflow *flow, uint32_t basis) > { > - BUILD_ASSERT_DECL(FLOW_WC_SEQ == 37); > + BUILD_ASSERT_DECL(FLOW_WC_SEQ == 38); > uint32_t hash = basis; > > if (flow) { > @@ -1704,7 +1722,7 @@ ASSERT_SEQUENTIAL(ipv6_src, ipv6_dst); > uint32_t > flow_hash_5tuple(const struct flow *flow, uint32_t basis) > { > - BUILD_ASSERT_DECL(FLOW_WC_SEQ == 37); > + BUILD_ASSERT_DECL(FLOW_WC_SEQ == 38); > uint32_t hash = basis; > > if (flow) { > @@ -2172,7 +2190,7 @@ flow_push_mpls(struct flow *flow, int n, > ovs_be16 mpls_eth_type, > > if (clear_flow_L3) { > /* Clear all L3 and L4 fields and dp_hash. */ > - BUILD_ASSERT(FLOW_WC_SEQ == 37); > + BUILD_ASSERT(FLOW_WC_SEQ == 38); > memset((char *) flow + FLOW_SEGMENT_2_ENDS_AT, 0, > sizeof(struct flow) - FLOW_SEGMENT_2_ENDS_AT); > flow->dp_hash = 0; > diff --git a/lib/match.c b/lib/match.c index c551e57..65d5b8e 100644 > --- a/lib/match.c > +++ b/lib/match.c > @@ -1082,7 +1082,7 @@ match_format(const struct match *match, struct > ds *s, int priority) > > int i; > > - BUILD_ASSERT_DECL(FLOW_WC_SEQ == 37); > + BUILD_ASSERT_DECL(FLOW_WC_SEQ == 38); > > if (priority != OFP_DEFAULT_PRIORITY) { > ds_put_format(s, "%spriority=%s%d,", @@ -1358,6 +1358,10 @@ > match_format(const struct match *match, struct ds *s, int priority) > TCP_FLAGS(OVS_BE16_MAX)); > } > > + if (wc->masks.next_base_layer) { > + ds_put_format(s, "next_base_layer=%"PRIu8",", f->next_base_layer); > + } > + > if (s->length > start_len) { > ds_chomp(s, ','); > } > diff --git a/lib/netdev-linux.c b/lib/netdev-linux.c index > 30189b4..786e727 100644 > --- a/lib/netdev-linux.c > +++ b/lib/netdev-linux.c > @@ -5507,7 +5507,8 @@ get_etheraddr(const char *netdev_name, struct eth_addr > *ea) > return error; > } > hwaddr_family = ifr.ifr_hwaddr.sa_family; > - if (hwaddr_family != AF_UNSPEC && hwaddr_family != ARPHRD_ETHER) { > + if (hwaddr_family != AF_UNSPEC && hwaddr_family != ARPHRD_ETHER && > + hwaddr_family != ARPHRD_NONE) { > VLOG_INFO("%s device has unknown hardware address family %d", > netdev_name, hwaddr_family); > return EINVAL; > diff --git a/lib/netdev-native-tnl.c b/lib/netdev-native-tnl.c index > ce2582f..bf52e9d 100644 > --- a/lib/netdev-native-tnl.c > +++ b/lib/netdev-native-tnl.c > @@ -153,6 +153,9 @@ netdev_tnl_push_ip_header(struct dp_packet > *packet, > > memcpy(eth, header, size); > > + dp_packet_reset_offsets(packet); > + packet->l3_ofs = sizeof (struct eth_header); > + > if (netdev_tnl_is_header_ipv6(header)) { > ip6 = netdev_tnl_ipv6_hdr(eth); > *ip_tot_size -= IPV6_HEADER_LEN; @@ -350,10 +353,6 @@ > parse_gre_header(struct dp_packet *packet, > return -EINVAL; > } > > - if (greh->protocol != htons(ETH_TYPE_TEB)) { > - return -EINVAL; > - } > - > hlen = ulen + gre_header_len(greh->flags); > if (hlen > dp_packet_size(packet)) { > return -EINVAL; > @@ -383,6 +382,12 @@ parse_gre_header(struct dp_packet *packet, > options++; > } > > + if (greh->protocol == htons(ETH_TYPE_TEB)) { > + packet->md.packet_ethertype = htons(0); > + } else { > + packet->md.packet_ethertype = greh->protocol; > + } > + > return hlen; > } > > @@ -408,6 +413,12 @@ netdev_gre_pop_header(struct dp_packet *packet) > > dp_packet_reset_packet(packet, hlen); > > + if (eth_type_mpls(packet->md.packet_ethertype)) { > + packet->l2_5_ofs = 0; > + } else if (packet->md.packet_ethertype) { > + packet->l3_ofs = 0; > + } > + > return packet; > err: > dp_packet_delete(packet); > @@ -446,7 +457,12 @@ netdev_gre_build_header(const struct netdev > *netdev, > > greh = netdev_tnl_ip_build_header(data, params, IPPROTO_GRE); > > - greh->protocol = htons(ETH_TYPE_TEB); > + if (tnl_cfg->is_layer3) { > + greh->protocol = params->flow->dl_type; > + } else { > + greh->protocol = htons(ETH_TYPE_TEB); > + } > + > greh->flags = 0; > > options = (ovs_16aligned_be32 *) (greh + 1); diff --git > a/lib/netdev-vport.c b/lib/netdev-vport.c index 02a246a..9fcdc64 > 100644 > --- a/lib/netdev-vport.c > +++ b/lib/netdev-vport.c > @@ -96,9 +96,13 @@ netdev_vport_is_patch(const struct netdev *netdev) > bool > netdev_vport_is_layer3(const struct netdev *dev) > { > - const char *type = netdev_get_type(dev); > + if (is_vport_class(netdev_get_class(dev))) { > + struct netdev_vport *vport = netdev_vport_cast(dev); > + > + return vport->tnl_cfg.is_layer3; > + } > > - return (!strcmp("lisp", type)); > + return false; > } > > static bool > @@ -402,13 +406,14 @@ set_tunnel_config(struct netdev *dev_, const struct > smap *args) > struct netdev_vport *dev = netdev_vport_cast(dev_); > const char *name = netdev_get_name(dev_); > const char *type = netdev_get_type(dev_); > - bool needs_dst_port, has_csum; > + bool needs_dst_port, has_csum, optional_layer3; > uint16_t dst_proto = 0, src_proto = 0; > struct netdev_tunnel_config tnl_cfg; > struct smap_node *node; > > has_csum = strstr(type, "gre") || strstr(type, "geneve") || > strstr(type, "stt") || strstr(type, "vxlan"); > + optional_layer3 = !strcmp(type, "gre"); > memset(&tnl_cfg, 0, sizeof tnl_cfg); > > /* Add a default destination port for tunnel ports if none > specified. */ @@ -422,6 +427,7 @@ set_tunnel_config(struct netdev > *dev_, const struct smap *args) > > if (!strcmp(type, "lisp")) { > tnl_cfg.dst_port = htons(LISP_DST_PORT); > + tnl_cfg.is_layer3 = true; > } > > if (!strcmp(type, "stt")) { > @@ -505,6 +511,10 @@ set_tunnel_config(struct netdev *dev_, const struct smap > *args) > } > > free(str); > + } else if (!strcmp(node->key, "layer3") && optional_layer3) { > + if (!strcmp(node->value, "true")) { > + tnl_cfg.is_layer3 = true; > + } > } else { > VLOG_WARN("%s: unknown %s argument '%s'", name, type, > node->key); > } > @@ -552,6 +562,7 @@ static int > get_tunnel_config(const struct netdev *dev, struct smap *args) > { > struct netdev_vport *netdev = netdev_vport_cast(dev); > + const char *type = netdev_get_type(dev); > struct netdev_tunnel_config tnl_cfg; > > ovs_mutex_lock(&netdev->mutex); > @@ -605,7 +616,6 @@ get_tunnel_config(const struct netdev *dev, struct > smap *args) > > if (tnl_cfg.dst_port) { > uint16_t dst_port = ntohs(tnl_cfg.dst_port); > - const char *type = netdev_get_type(dev); > > if ((!strcmp("geneve", type) && dst_port != GENEVE_DST_PORT) || > (!strcmp("vxlan", type) && dst_port != VXLAN_DST_PORT) > || @@ -619,6 +629,10 @@ get_tunnel_config(const struct netdev *dev, struct > smap *args) > smap_add(args, "csum", "true"); > } > > + if (tnl_cfg.is_layer3 && !strcmp("gre", type)) { > + smap_add(args, "layer3", "true"); > + } > + > if (!tnl_cfg.dont_fragment) { > smap_add(args, "df_default", "false"); > } > diff --git a/lib/netdev.h b/lib/netdev.h index a667fe3..b3ef596 100644 > --- a/lib/netdev.h > +++ b/lib/netdev.h > @@ -98,6 +98,7 @@ struct netdev_tunnel_config { > > bool csum; > bool dont_fragment; > + bool is_layer3; > }; > > void netdev_run(void); > diff --git a/lib/nx-match.c b/lib/nx-match.c index da2919f..a6b7e2f > 100644 > --- a/lib/nx-match.c > +++ b/lib/nx-match.c > @@ -930,7 +930,7 @@ nx_put_raw(struct ofpbuf *b, enum ofp_version oxm, const > struct match *match, > int match_len; > int i; > > - BUILD_ASSERT_DECL(FLOW_WC_SEQ == 37); > + BUILD_ASSERT_DECL(FLOW_WC_SEQ == 38); > > /* Metadata. */ > if (match->wc.masks.dp_hash) { > diff --git a/lib/odp-execute.c b/lib/odp-execute.c index > d7aec06..b239336 100644 > --- a/lib/odp-execute.c > +++ b/lib/odp-execute.c > @@ -375,6 +375,7 @@ odp_execute_set_action(struct dp_packet *packet, const > struct nlattr *a) > case OVS_KEY_ATTR_CT_ZONE: > case OVS_KEY_ATTR_CT_MARK: > case OVS_KEY_ATTR_CT_LABELS: > + case OVS_KEY_ATTR_NEXT_BASE_LAYER: > case __OVS_KEY_ATTR_MAX: > default: > OVS_NOT_REACHED(); > @@ -474,6 +475,7 @@ odp_execute_masked_set_action(struct dp_packet *packet, > case OVS_KEY_ATTR_ICMP: > case OVS_KEY_ATTR_ICMPV6: > case OVS_KEY_ATTR_TCP_FLAGS: > + case OVS_KEY_ATTR_NEXT_BASE_LAYER: > case __OVS_KEY_ATTR_MAX: > default: > OVS_NOT_REACHED(); > diff --git a/lib/odp-util.c b/lib/odp-util.c index 6725294..6c4df51 > 100644 > --- a/lib/odp-util.c > +++ b/lib/odp-util.c > @@ -166,6 +166,7 @@ ovs_key_attr_to_string(enum ovs_key_attr attr, char > *namebuf, size_t bufsize) > case OVS_KEY_ATTR_MPLS: return "mpls"; > case OVS_KEY_ATTR_DP_HASH: return "dp_hash"; > case OVS_KEY_ATTR_RECIRC_ID: return "recirc_id"; > + case OVS_KEY_ATTR_NEXT_BASE_LAYER: return "next_base_layer"; > > case __OVS_KEY_ATTR_MAX: > default: > @@ -1862,6 +1863,7 @@ static const struct attr_len_tbl > ovs_flow_key_attr_lens[OVS_KEY_ATTR_MAX + 1] = > [OVS_KEY_ATTR_CT_ZONE] = { .len = 2 }, > [OVS_KEY_ATTR_CT_MARK] = { .len = 4 }, > [OVS_KEY_ATTR_CT_LABELS] = { .len = sizeof(struct > ovs_key_ct_labels) }, > + [OVS_KEY_ATTR_NEXT_BASE_LAYER] = { .len = 1 }, > }; > > /* Returns the correct length of the payload for a flow key > attribute of the @@ -2986,6 +2988,13 @@ format_odp_key_attr(const struct > nlattr *a, const struct nlattr *ma, > ds_chomp(ds, ','); > break; > } > + > + case OVS_KEY_ATTR_NEXT_BASE_LAYER: { > + const uint8_t *mask = ma ? nl_attr_get(ma) : NULL; > + format_u8u(ds, "type", nl_attr_get_u8(a), mask, verbose); > + break; > + } > + > case OVS_KEY_ATTR_UNSPEC: > case __OVS_KEY_ATTR_MAX: > default: > @@ -4465,6 +4474,11 @@ odp_flow_key_from_flow__(const struct > odp_flow_key_parms *parms, > sctp_key = nl_msg_put_unspec_uninit(buf, OVS_KEY_ATTR_SCTP, > sizeof *sctp_key); > get_tp_key(data, sctp_key); > + } else if (flow->nw_proto == IPPROTO_GRE) { > + if (parms->support.next_base_layer) { > + nl_msg_put_u8(buf, OVS_KEY_ATTR_NEXT_BASE_LAYER, > + data->next_base_layer); > + } > } else if (flow->dl_type == htons(ETH_TYPE_IP) > && flow->nw_proto == IPPROTO_ICMP) { > struct ovs_key_icmp *icmp_key; @@ -5029,6 +5043,14 @@ > parse_l2_5_onward(const struct nlattr *attrs[OVS_KEY_ATTR_MAX + 1], > put_tp_key(sctp_key, flow); > expected_bit = OVS_KEY_ATTR_SCTP; > } > + } else if (src_flow->nw_proto == IPPROTO_GRE > + && (src_flow->dl_type == htons(ETH_TYPE_IP) || > + src_flow->dl_type == htons(ETH_TYPE_IPV6)) > + && !(src_flow->nw_frag & FLOW_NW_FRAG_LATER)) { > + if (present_attrs & (UINT64_C(1) << OVS_KEY_ATTR_NEXT_BASE_LAYER)) { > + flow->next_base_layer = > nl_attr_get_u8(attrs[OVS_KEY_ATTR_NEXT_BASE_LAYER]); > + expected_attrs |= UINT64_C(1) << OVS_KEY_ATTR_NEXT_BASE_LAYER; > + } > } else if (src_flow->nw_proto == IPPROTO_ICMP > && src_flow->dl_type == htons(ETH_TYPE_IP) > && !(src_flow->nw_frag & FLOW_NW_FRAG_LATER)) { diff > --git a/lib/odp-util.h b/lib/odp-util.h index f391e2a..41348cc 100644 > --- a/lib/odp-util.h > +++ b/lib/odp-util.h > @@ -142,7 +142,7 @@ void odp_portno_names_destroy(struct hmap *portno_names); > * add another field and forget to adjust this value. > */ > #define ODPUTIL_FLOW_KEY_BYTES 640 > -BUILD_ASSERT_DECL(FLOW_WC_SEQ == 37); > +BUILD_ASSERT_DECL(FLOW_WC_SEQ == 38); > > /* A buffer with sufficient size and alignment to hold an nlattr-formatted > flow > * key. An array of "struct nlattr" might not, in theory, be > sufficiently @@ -185,6 +185,8 @@ struct odp_support { > * 'ct_state'. The above 'ct_state' member must be true for this > * to make sense */ > bool ct_state_nat; > + > + bool next_base_layer; > }; > > struct odp_flow_key_parms { > diff --git a/lib/ofp-util.c b/lib/ofp-util.c index d5d4b7d..273f18c > 100644 > --- a/lib/ofp-util.c > +++ b/lib/ofp-util.c > @@ -101,7 +101,7 @@ ofputil_netmask_to_wcbits(ovs_be32 netmask) > void > ofputil_wildcard_from_ofpfw10(uint32_t ofpfw, struct flow_wildcards *wc) > { > - BUILD_ASSERT_DECL(FLOW_WC_SEQ == 37); > + BUILD_ASSERT_DECL(FLOW_WC_SEQ == 38); > > /* Initialize most of wc. */ > flow_wildcards_init_catchall(wc); diff --git a/lib/tnl-ports.c > b/lib/tnl-ports.c index ffa1389..8f82ac9 100644 > --- a/lib/tnl-ports.c > +++ b/lib/tnl-ports.c > @@ -27,6 +27,7 @@ > #include "hash.h" > #include "openvswitch/list.h" > #include "netdev.h" > +#include "netdev-vport.h" > #include "openvswitch/ofpbuf.h" > #include "ovs-thread.h" > #include "odp-util.h" > @@ -53,6 +54,7 @@ struct tnl_port { > odp_port_t port; > ovs_be16 tp_port; > uint8_t nw_proto; > + bool is_layer3; > char dev_name[IFNAMSIZ]; > struct ovs_list node; > }; > @@ -83,7 +85,8 @@ tnl_port_free(struct tnl_port_in *p) > > static void > tnl_port_init_flow(struct flow *flow, struct eth_addr mac, > - struct in6_addr *addr, uint8_t nw_proto, ovs_be16 tp_port) > + struct in6_addr *addr, uint8_t nw_proto, ovs_be16 tp_port, > + bool is_layer3) > { > memset(flow, 0, sizeof *flow); > > @@ -98,18 +101,20 @@ tnl_port_init_flow(struct flow *flow, struct > eth_addr mac, > > flow->nw_proto = nw_proto; > flow->tp_dst = tp_port; > + flow->next_base_layer = is_layer3 ? LAYER_3 : LAYER_2; > } > > static void > map_insert(odp_port_t port, struct eth_addr mac, struct in6_addr *addr, > - uint8_t nw_proto, ovs_be16 tp_port, const char dev_name[]) > + uint8_t nw_proto, ovs_be16 tp_port, const char dev_name[], > + bool is_layer3) > { > const struct cls_rule *cr; > struct tnl_port_in *p; > struct match match; > > memset(&match, 0, sizeof match); > - tnl_port_init_flow(&match.flow, mac, addr, nw_proto, tp_port); > + tnl_port_init_flow(&match.flow, mac, addr, nw_proto, tp_port, > + is_layer3); > > do { > cr = classifier_lookup(&cls, OVS_VERSION_MAX, &match.flow, > NULL); @@ -130,6 +135,11 @@ map_insert(odp_port_t port, struct eth_addr mac, > struct in6_addr *addr, > * doesn't make sense to match on UDP port numbers. */ > if (tp_port) { > match.wc.masks.tp_dst = OVS_BE16_MAX; > + } else { > + /* Match base layer for GRE tunnels as it may > + * be used to differentiate them. > + */ > + match.wc.masks.next_base_layer = UINT8_MAX; > } > if (IN6_IS_ADDR_V4MAPPED(addr)) { > match.wc.masks.nw_dst = OVS_BE32_MAX; @@ -149,14 +159,15 > @@ map_insert(odp_port_t port, struct eth_addr mac, struct in6_addr > *addr, > > static void > map_insert_ipdev__(struct ip_device *ip_dev, char dev_name[], > - odp_port_t port, uint8_t nw_proto, ovs_be16 tp_port) > + odp_port_t port, uint8_t nw_proto, ovs_be16 tp_port, > + bool is_layer3) > { > if (ip_dev->n_addr) { > int i; > > for (i = 0; i < ip_dev->n_addr; i++) { > map_insert(port, ip_dev->mac, &ip_dev->addr[i], > - nw_proto, tp_port, dev_name); > + nw_proto, tp_port, dev_name, is_layer3); > } > } > } > @@ -181,7 +192,7 @@ tnl_type_to_nw_proto(const char type[]) > > void > tnl_port_map_insert(odp_port_t port, ovs_be16 tp_port, > - const char dev_name[], const char type[]) > + const char dev_name[], const char type[], bool > + is_layer3) > { > struct tnl_port *p; > struct ip_device *ip_dev; > @@ -194,7 +205,8 @@ tnl_port_map_insert(odp_port_t port, ovs_be16 > tp_port, > > ovs_mutex_lock(&mutex); > LIST_FOR_EACH(p, node, &port_list) { > - if (tp_port == p->tp_port && p->nw_proto == nw_proto) { > + if (tp_port == p->tp_port && p->nw_proto == nw_proto && > + p->is_layer3 == is_layer3) { > goto out; > } > } > @@ -203,11 +215,13 @@ tnl_port_map_insert(odp_port_t port, ovs_be16 tp_port, > p->port = port; > p->tp_port = tp_port; > p->nw_proto = nw_proto; > + p->is_layer3 = is_layer3; > ovs_strlcpy(p->dev_name, dev_name, sizeof p->dev_name); > ovs_list_insert(&port_list, &p->node); > > LIST_FOR_EACH(ip_dev, node, &addr_list) { > - map_insert_ipdev__(ip_dev, p->dev_name, p->port, p->nw_proto, > p->tp_port); > + map_insert_ipdev__(ip_dev, p->dev_name, p->port, p->nw_proto, > + p->tp_port, p->is_layer3); > } > > out: > @@ -228,12 +242,12 @@ tnl_port_unref(const struct cls_rule *cr) > > static void > map_delete(struct eth_addr mac, struct in6_addr *addr, > - ovs_be16 tp_port, uint8_t nw_proto) > + ovs_be16 tp_port, uint8_t nw_proto, bool is_layer3) > { > const struct cls_rule *cr; > struct flow flow; > > - tnl_port_init_flow(&flow, mac, addr, nw_proto, tp_port); > + tnl_port_init_flow(&flow, mac, addr, nw_proto, tp_port, > + is_layer3); > > cr = classifier_lookup(&cls, OVS_VERSION_MAX, &flow, NULL); > tnl_port_unref(cr); > @@ -242,11 +256,14 @@ map_delete(struct eth_addr mac, struct in6_addr *addr, > static void > ipdev_map_delete(struct ip_device *ip_dev, ovs_be16 tp_port, uint8_t > nw_proto) > { > + bool is_layer3 = netdev_vport_is_layer3(ip_dev->dev); > + > if (ip_dev->n_addr) { > int i; > > for (i = 0; i < ip_dev->n_addr; i++) { > - map_delete(ip_dev->mac, &ip_dev->addr[i], tp_port, nw_proto); > + map_delete(ip_dev->mac, &ip_dev->addr[i], tp_port, nw_proto, > + is_layer3); > } > } > } > @@ -352,7 +369,12 @@ tnl_port_show(struct unixctl_conn *conn, int argc > OVS_UNUSED, > } > > LIST_FOR_EACH(p, node, &port_list) { > - ds_put_format(&ds, "%s (%"PRIu32")\n", p->dev_name, p->port); > + /* A layer3 and non-layer3 tunnel port may share the same ODP port. > + * To allow differentiation and avoid displaying otherwise > + * duplicated ouput append " (layer3)" when showing layer-3 > + * tunnel ports. */ > + ds_put_format(&ds, "%s (%"PRIu32")%s\n", p->dev_name, p->port, > + p->is_layer3 ? " (layer3)" : ""); > } > > out: > @@ -367,7 +389,8 @@ map_insert_ipdev(struct ip_device *ip_dev) > struct tnl_port *p; > > LIST_FOR_EACH(p, node, &port_list) { > - map_insert_ipdev__(ip_dev, p->dev_name, p->port, p->nw_proto, > p->tp_port); > + map_insert_ipdev__(ip_dev, p->dev_name, p->port, p->nw_proto, > + p->tp_port, p->is_layer3); > } > } > > diff --git a/lib/tnl-ports.h b/lib/tnl-ports.h index 58b048a..fb57673 > 100644 > --- a/lib/tnl-ports.h > +++ b/lib/tnl-ports.h > @@ -27,7 +27,8 @@ > odp_port_t tnl_port_map_lookup(struct flow *flow, struct > flow_wildcards *wc); > > void tnl_port_map_insert(odp_port_t port, ovs_be16 udp_port, > - const char dev_name[], const char type[]); > + const char dev_name[], const char type[], > + bool is_layer3); > > void tnl_port_map_delete(ovs_be16 udp_port, const char type[]); > void tnl_port_map_insert_ipdev(const char dev[]); diff --git > a/ofproto/ofproto-dpif-rid.h b/ofproto/ofproto-dpif-rid.h index > f622278..8ccd68b 100644 > --- a/ofproto/ofproto-dpif-rid.h > +++ b/ofproto/ofproto-dpif-rid.h > @@ -99,7 +99,7 @@ struct rule; > /* Metadata for restoring pipeline context after recirculation. Helpers > * are inlined below to keep them together with the definition for easier > * updates. */ > -BUILD_ASSERT_DECL(FLOW_WC_SEQ == 37); > +BUILD_ASSERT_DECL(FLOW_WC_SEQ == 38); > > struct frozen_metadata { > /* Metadata in struct flow. */ > diff --git a/ofproto/ofproto-dpif-sflow.c > b/ofproto/ofproto-dpif-sflow.c index f970a57..8f51161 100644 > --- a/ofproto/ofproto-dpif-sflow.c > +++ b/ofproto/ofproto-dpif-sflow.c > @@ -1023,6 +1023,7 @@ sflow_read_set_action(const struct nlattr *attr, > case OVS_KEY_ATTR_CT_MARK: > case OVS_KEY_ATTR_CT_LABELS: > case OVS_KEY_ATTR_UNSPEC: > + case OVS_KEY_ATTR_NEXT_BASE_LAYER: > case __OVS_KEY_ATTR_MAX: > default: > break; > diff --git a/ofproto/ofproto-dpif-xlate.c > b/ofproto/ofproto-dpif-xlate.c index 1e0bcea..f8a573b 100644 > --- a/ofproto/ofproto-dpif-xlate.c > +++ b/ofproto/ofproto-dpif-xlate.c > @@ -2923,7 +2923,7 @@ compose_output_action__(struct xlate_ctx *ctx, > ofp_port_t ofp_port, > > /* If 'struct flow' gets additional metadata, we'll need to zero it out > * before traversing a patch port. */ > - BUILD_ASSERT_DECL(FLOW_WC_SEQ == 37); > + BUILD_ASSERT_DECL(FLOW_WC_SEQ == 38); > memset(&flow_tnl, 0, sizeof flow_tnl); > > if (!xport) { > diff --git a/ofproto/ofproto-dpif.c b/ofproto/ofproto-dpif.c index > a70a491..50c1bb0 100644 > --- a/ofproto/ofproto-dpif.c > +++ b/ofproto/ofproto-dpif.c > @@ -1325,6 +1325,8 @@ check_support(struct dpif_backer *backer) > backer->support.odp.ct_label = check_ct_label(backer); > > backer->support.odp.ct_state_nat = check_ct_state_nat(backer); > + > + backer->support.odp.next_base_layer = > + backer->support.tnl_push_pop; > } > > static int > diff --git a/ofproto/tunnel.c b/ofproto/tunnel.c index > ce727f4..0dc9fe3 100644 > --- a/ofproto/tunnel.c > +++ b/ofproto/tunnel.c > @@ -26,6 +26,7 @@ > #include "hash.h" > #include "openvswitch/hmap.h" > #include "netdev.h" > +#include "netdev-vport.h" > #include "odp-util.h" > #include "openvswitch/ofpbuf.h" > #include "packets.h" > @@ -192,7 +193,8 @@ tnl_port_add__(const struct ofport_dpif *ofport, const > struct netdev *netdev, > const char *type; > > type = netdev_get_type(netdev); > - tnl_port_map_insert(odp_port, cfg->dst_port, name, type); > + tnl_port_map_insert(odp_port, cfg->dst_port, name, type, > + cfg->is_layer3); > > } > return true; > diff --git a/tests/tunnel-push-pop-ipv6.at > b/tests/tunnel-push-pop-ipv6.at index 3f3d5ee..0e4cb91 100644 > --- a/tests/tunnel-push-pop-ipv6.at > +++ b/tests/tunnel-push-pop-ipv6.at > @@ -12,6 +12,8 @@ AT_CHECK([ovs-vsctl add-port int-br t2 -- set Interface t2 > type=vxlan \ > options:remote_ip=2001:cafe::93 options:out_key=flow > options:csum=true ofport_request=4\ > -- add-port int-br t4 -- set Interface t4 type=geneve \ > options:remote_ip=flow options:key=123 > ofport_request=5\ > + -- add-port int-br t5 -- set Interface t5 type=gre \ > + options:remote_ip=2001:cafe::92 > + options:key=455 options:layer3=true ofport_request=6\ > ], [0]) > > AT_CHECK([ovs-appctl dpif/show], [0], [dnl @@ -25,6 +27,7 @@ > dummy@ovs-dummy: hit:0 missed:0 > t2 2/4789: (vxlan: key=123, remote_ip=2001:cafe::92) > t3 4/4789: (vxlan: csum=true, out_key=flow, > remote_ip=2001:cafe::93) > t4 5/6081: (geneve: key=123, remote_ip=flow) > + t5 6/3: (gre: key=455, layer3=true, remote_ip=2001:cafe::92) > ]) > > dnl First setup dummy interface IP address, then add the route @@ > -65,6 +68,7 @@ AT_CHECK([ovs-appctl tnl/ports/show |sort], [0], [dnl > Listening ports: > genev_sys_6081 (6081) > gre_sys (3) > +gre_sys (3) (layer3) > vxlan_sys_4789 (4789) > ]) > > @@ -130,12 +134,12 @@ AT_CHECK([ovs-ofctl dump-ports int-br | grep 'port > 3'], [0], [dnl > port 3: rx pkts=1, bytes=98, drop=?, errs=?, frame=?, over=?, crc=? > ]) > > -dnl Check GRE only accepts encapsulated Ethernet frames > -AT_CHECK([ovs-appctl netdev-dummy/receive p0 > 'aa55aa550000001b213cab6486dd60000000006a2f402001cafe00000000000000000 > 00000922001cafe00000000000000000000008820000800000001c8fe71d883724fbeb > 6f4e1494a080045000054ba200000400184861e0000011e00000200004227e75400030 > af3195500000000f265010000000000101112131415161718191a1b1c1d1e1f2021222 > 32425262728292a2b2c2d2e2f3031323334353637']) > +dnl Check decapsulation of L3GRE packet AT_CHECK([ovs-appctl > +netdev-dummy/receive p0 > +'aa55aa550000001b213cab6486dd60000000005a2f402001cafe0000000000000000 > +000000922001cafe00000000000000000000008820000800000001c745000054ba200 > +000400184861e0000011e00000200004227e75400030af3195500000000f265010000 > +000000101112131415161718191a1b1c1d1e1f202122232425262728292a2b2c2d2e2 > +f3031323334353637']) > ovs-appctl time/warp 1000 > > -AT_CHECK([ovs-ofctl dump-ports int-br | grep 'port 3'], [0], [dnl > - port 3: rx pkts=1, bytes=98, drop=?, errs=?, frame=?, over=?, crc=? > +AT_CHECK([ovs-ofctl dump-ports int-br | grep 'port 6'], [0], [dnl > + port 6: rx pkts=1, bytes=84, drop=?, errs=?, frame=?, over=?, crc=? > ]) > > dnl Check decapsulation of Geneve packet with options diff --git > a/tests/tunnel-push-pop.at b/tests/tunnel-push-pop.at index > 0e596f2..f1f956d 100644 > --- a/tests/tunnel-push-pop.at > +++ b/tests/tunnel-push-pop.at > @@ -12,6 +12,8 @@ AT_CHECK([ovs-vsctl add-port int-br t2 -- set Interface t2 > type=vxlan \ > options:remote_ip=1.1.2.93 options:out_key=flow > options:csum=true ofport_request=4\ > -- add-port int-br t4 -- set Interface t4 type=geneve \ > options:remote_ip=flow options:key=123 > ofport_request=5\ > + -- add-port int-br t5 -- set Interface t5 type=gre \ > + options:remote_ip=1.1.2.92 options:key=455 > + options:layer3=true ofport_request=6\ > ], [0]) > > AT_CHECK([ovs-appctl dpif/show], [0], [dnl @@ -25,6 +27,7 @@ > dummy@ovs-dummy: hit:0 missed:0 > t2 2/4789: (vxlan: key=123, remote_ip=1.1.2.92) > t3 4/4789: (vxlan: csum=true, out_key=flow, remote_ip=1.1.2.93) > t4 5/6081: (geneve: key=123, remote_ip=flow) > + t5 6/3: (gre: key=455, layer3=true, remote_ip=1.1.2.92) > ]) > > dnl First setup dummy interface IP address, then add the route @@ > -70,6 +73,7 @@ AT_CHECK([ovs-appctl tnl/ports/show |sort], [0], [dnl > Listening ports: > genev_sys_6081 (6081) > gre_sys (3) > +gre_sys (3) (layer3) > vxlan_sys_4789 (4789) > ]) > > @@ -108,8 +112,14 @@ AT_CHECK([tail -1 stdout], [0], > dnl Check GRE tunnel push > AT_CHECK([ovs-ofctl add-flow int-br action=3]) > AT_CHECK([ovs-appctl ofproto/trace ovs-dummy > 'in_port(2),eth(src=f8:bc:12:44:34:b6,dst=aa:55:aa:55:00:00),eth_type( > 0x0800),ipv4(src=1.1.3.88,dst=1.1.3.112,proto=47,tos=0,ttl=64,frag=no) > '], [0], [stdout]) > +AT_CHECK([tail -1 stdout], [0], [Datapath actions: > +tnl_push(tnl_port(3),header(size=42,type=3,eth(dst=f8:bc:12:44:34:b6, > +src=aa:55:aa:55:00:00,dl_type=0x0800),ipv4(src=1.1.2.88,dst=1.1.2.92, > +proto=47,tos=0,ttl=64,frag=0x4000),gre((flags=0x2000,proto=0x6558),ke > +y=0x1c8)),out_port(100)) > +]) > + > +dnl Check L3GRE tunnel push > +AT_CHECK([ovs-ofctl add-flow int-br action=6]) AT_CHECK([ovs-appctl > +ofproto/trace ovs-dummy > +'in_port(2),eth(src=f8:bc:12:44:34:b6,dst=aa:55:aa:55:00:00),eth_type > +(0x0800),ipv4(src=1.1.3.88,dst=1.1.3.112,proto=47,tos=0,ttl=64,frag=n > +o)'], [0], [stdout]) > AT_CHECK([tail -1 stdout], [0], > - [Datapath actions: > tnl_push(tnl_port(3),header(size=42,type=3,eth(dst=f8:bc:12:44:34:b6,s > rc=aa:55:aa:55:00:00,dl_type=0x0800),ipv4(src=1.1.2.88,dst=1.1.2.92,pr > oto=47,tos=0,ttl=64,frag=0x4000),gre((flags=0x2000,proto=0x6558),key=0 > x1c8)),out_port(100)) > + [Datapath actions: > + pop_eth,tnl_push(tnl_port(3),header(size=42,type=3,eth(dst=f8:bc:12: > + 44:34:b6,src=aa:55:aa:55:00:00,dl_type=0x0800),ipv4(src=1.1.2.88,dst > + =1.1.2.92,proto=47,tos=0,ttl=64,frag=0x4000),gre((flags=0x2000,proto > + =0x800),key=0x1c7)),out_port(100)) > ]) > > dnl Check Geneve tunnel push > @@ -135,12 +145,20 @@ AT_CHECK([ovs-ofctl dump-ports int-br | grep 'port > 3'], [0], [dnl > port 3: rx pkts=1, bytes=98, drop=?, errs=?, frame=?, over=?, crc=? > ]) > > -dnl Check GRE only accepts encapsulated Ethernet frames > -AT_CHECK([ovs-appctl netdev-dummy/receive p0 > 'aa55aa550000001b213cab6408004500007e79464000402fba550101025c010102582 > 0000800000001c8fe71d883724fbeb6f4e1494a080045000054ba200000400184861e0 > 000011e00000200004227e75400030af3195500000000f265010000000000101112131 > 415161718191a1b1c1d1e1f202122232425262728292a2b2c2d2e2f303132333435363 > 7']) > +dnl Check decapsulation of L3GRE packet AT_CHECK([ovs-appctl > +netdev-dummy/receive p0 > +'aa55aa550000001b213cab6408004500007079464000402fba630101025c01010258 > +20000800000001c745000054ba200000400184861e0000011e00000200004227e7540 > +0030af3195500000000f265010000000000101112131415161718191a1b1c1d1e1f20 > +2122232425262728292a2b2c2d2e2f3031323334353637']) > ovs-appctl time/warp 1000 > > -AT_CHECK([ovs-ofctl dump-ports int-br | grep 'port 3'], [0], [dnl > +AT_CHECK([ovs-ofctl dump-ports int-br | grep 'port 6'], [0], [dnl > + port 6: rx pkts=1, bytes=84, drop=?, errs=?, frame=?, over=?, crc=? > +]) > + > +dnl Check GREL3 only accepts non-fragmented packets? > +AT_CHECK([ovs-appctl netdev-dummy/receive p0 > +'aa55aa550000001b213cab6408004500007e79464000402fba550101025c01010258 > +20000800000001c7fe71d883724fbeb6f4e1494a080045000054ba200000400184861 > +e0000011e00000200004227e75400030af3195500000000f265010000000000101112 > +131415161718191a1b1c1d1e1f202122232425262728292a2b2c2d2e2f30313233343 > +53637']) > + > +AT_CHECK([ovs-ofctl dump-ports int-br | grep 'port [[36]]' | sort], > +[0], [dnl > port 3: rx pkts=1, bytes=98, drop=?, errs=?, frame=?, over=?, crc=? > + port 6: rx pkts=1, bytes=84, drop=?, errs=?, frame=?, over=?, crc=? > ]) > > dnl Check decapsulation of Geneve packet with options diff --git > a/vswitchd/vswitch.xml b/vswitchd/vswitch.xml index b4af5a5..931d39c > 100644 > --- a/vswitchd/vswitch.xml > +++ b/vswitchd/vswitch.xml > @@ -2246,6 +2246,19 @@ > > </group> > > + <group title="Tunnel Options: gre only"> > + <p> > + <code>gre</code> interfaces support these options. > + </p> > + > + <column name="options" key="layer3" type='{"type": "boolean"}'> > + <p> > + Optional. Packets are sent and received without an Ethernet > + header present. > + </p> > + </column> > + </group> > + > <group title="Tunnel Options: gre, geneve, and vxlan"> > <p> > <code>gre</code>, <code>geneve</code>, and _______________________________________________ dev mailing list d...@openvswitch.org https://mail.openvswitch.org/mailman/listinfo/ovs-dev