Re: [PATCH net-next v3 2/2] mpls: flow-based multipath selection
Roopa Prabhuwrites: > From: Robert Shearman > > Change the selection of a multipath route to use a flow-based > hash. This more suitable for traffic sensitive to reordering within a > flow (e.g. TCP, L2VPN) and whilst still allowing a good distribution > of traffic given enough flows. > > Selection of the path for a multipath route is done using a hash of: > 1. Label stack up to MAX_MP_SELECT_LABELS labels or up to and >including entropy label, whichever is first. > 2. 3-tuple of (L3 src, L3 dst, proto) from IPv4/IPv6 header in MPLS >payload, if present. > > Naturally, a 5-tuple hash using L4 information in addition would be > possible and be better in some scenarios, but there is a tradeoff > between looking deeper into the packet to achieve good distribution, > and packet forwarding performance, and I have erred on the side of the > latter as the default. Not a fault with this patch, but this patches use of entropy labels does highlight that we don't handle creating entropy labels on ingress nor dealing with entropy labels on egress. > Signed-off-by: Robert Shearman > --- > net/mpls/af_mpls.c | 88 > ++ > 1 file changed, 83 insertions(+), 5 deletions(-) > > diff --git a/net/mpls/af_mpls.c b/net/mpls/af_mpls.c > index 4d819df..15dd2eb 100644 > --- a/net/mpls/af_mpls.c > +++ b/net/mpls/af_mpls.c > @@ -22,6 +22,11 @@ > #include > #include "internal.h" > > +/* Maximum number of labels to look ahead at when selecting a path of > + * a multipath route > + */ > +#define MAX_MP_SELECT_LABELS 4 This number seems a little small. Especially given that an entopy label and the entropy label indicator consume two of these. Although I suspect 4 is enough for most cases in practice. > + > static int zero = 0; > static int label_limit = (1 << 20) - 1; > > @@ -77,10 +82,78 @@ bool mpls_pkt_too_big(const struct sk_buff *skb, unsigned > int mtu) > } > EXPORT_SYMBOL_GPL(mpls_pkt_too_big); > > -static struct mpls_nh *mpls_select_multipath(struct mpls_route *rt) > +static struct mpls_nh *mpls_select_multipath(struct mpls_route *rt, > + struct sk_buff *skb, bool bos) > { > - /* assume single nexthop for now */ > - return >rt_nh[0]; > + struct mpls_entry_decoded dec; > + struct mpls_shim_hdr *hdr; > + bool eli_seen = false; > + int label_index; > + int nh_index = 0; > + u32 hash = 0; > + > + /* No need to look further into packet if there's only > + * one path > + */ > + if (rt->rt_nhn == 1) > + goto out; > + > + for (label_index = 0; label_index < MAX_MP_SELECT_LABELS && !bos; > + label_index++) { > + if (!pskb_may_pull(skb, sizeof(*hdr) * label_index)) > + break; > + > + /* Read and decode the current label */ > + hdr = mpls_hdr(skb) + label_index; > + dec = mpls_entry_decode(hdr); > + > + /* RFC6790 - reserved labels MUST NOT be used as keys > + * for the load-balancing function > + */ > + if (dec.label == MPLS_LABEL_ENTROPY) { > + eli_seen = true; > + } else if (dec.label >= MPLS_LABEL_FIRST_UNRESERVED) { We should probably test this first and mark this branch as likely. > + hash = jhash_1word(dec.label, hash); > + > + /* The entropy label follows the entropy label > + * indicator, so this means that the entropy > + * label was just added to the hash - no need to > + * go any deeper either in the label stack or in the > + * payload > + */ > + if (eli_seen) > + break; > + } > + > + bos = dec.bos; > + if (bos && pskb_may_pull(skb, sizeof(*hdr) * label_index + > + sizeof(struct iphdr))) { > + const struct iphdr *v4hdr; > + > + v4hdr = (const struct iphdr *)(mpls_hdr(skb) + > +label_index); > + if (v4hdr->version == 4) { > + hash = jhash_3words(ntohl(v4hdr->saddr), > + ntohl(v4hdr->daddr), > + v4hdr->protocol, hash); > + } else if (v4hdr->version == 6 && > + pskb_may_pull(skb, sizeof(*hdr) * label_index + > + sizeof(struct ipv6hdr))) { > + const struct ipv6hdr *v6hdr; > + > + v6hdr = (const struct ipv6hdr *)(mpls_hdr(skb) + > +
Re: [PATCH net-next v3 2/2] mpls: flow-based multipath selection
On 10/11/15, 12:43 PM, Eric W. Biederman wrote: > Roopa Prabhuwrites: > >> From: Robert Shearman >> >> Change the selection of a multipath route to use a flow-based >> hash. This more suitable for traffic sensitive to reordering within a >> flow (e.g. TCP, L2VPN) and whilst still allowing a good distribution >> of traffic given enough flows. >> >> Selection of the path for a multipath route is done using a hash of: >> 1. Label stack up to MAX_MP_SELECT_LABELS labels or up to and >>including entropy label, whichever is first. >> 2. 3-tuple of (L3 src, L3 dst, proto) from IPv4/IPv6 header in MPLS >>payload, if present. >> >> Naturally, a 5-tuple hash using L4 information in addition would be >> possible and be better in some scenarios, but there is a tradeoff >> between looking deeper into the packet to achieve good distribution, >> and packet forwarding performance, and I have erred on the side of the >> latter as the default. > Not a fault with this patch, but this patches use of entropy labels > does highlight that we don't handle creating entropy labels on ingress > nor dealing with entropy labels on egress. > >> Signed-off-by: Robert Shearman >> --- >> net/mpls/af_mpls.c | 88 >> ++ >> 1 file changed, 83 insertions(+), 5 deletions(-) >> >> diff --git a/net/mpls/af_mpls.c b/net/mpls/af_mpls.c >> index 4d819df..15dd2eb 100644 >> --- a/net/mpls/af_mpls.c >> +++ b/net/mpls/af_mpls.c >> @@ -22,6 +22,11 @@ >> #include >> #include "internal.h" >> >> +/* Maximum number of labels to look ahead at when selecting a path of >> + * a multipath route >> + */ >> +#define MAX_MP_SELECT_LABELS 4 > This number seems a little small. Especially given that an entopy label > and the entropy label indicator consume two of these. Although I > suspect 4 is enough for most cases in practice. yes, we have seen 4 to be enough in practice as well. We can increase it in future if need be. > >> + >> static int zero = 0; >> static int label_limit = (1 << 20) - 1; >> >> @@ -77,10 +82,78 @@ bool mpls_pkt_too_big(const struct sk_buff *skb, >> unsigned int mtu) >> } >> EXPORT_SYMBOL_GPL(mpls_pkt_too_big); >> >> -static struct mpls_nh *mpls_select_multipath(struct mpls_route *rt) >> +static struct mpls_nh *mpls_select_multipath(struct mpls_route *rt, >> + struct sk_buff *skb, bool bos) >> { >> -/* assume single nexthop for now */ >> -return >rt_nh[0]; >> +struct mpls_entry_decoded dec; >> +struct mpls_shim_hdr *hdr; >> +bool eli_seen = false; >> +int label_index; >> +int nh_index = 0; >> +u32 hash = 0; >> + >> +/* No need to look further into packet if there's only >> + * one path >> + */ >> +if (rt->rt_nhn == 1) >> +goto out; >> + >> +for (label_index = 0; label_index < MAX_MP_SELECT_LABELS && !bos; >> + label_index++) { >> +if (!pskb_may_pull(skb, sizeof(*hdr) * label_index)) >> +break; >> + >> +/* Read and decode the current label */ >> +hdr = mpls_hdr(skb) + label_index; >> +dec = mpls_entry_decode(hdr); >> + >> +/* RFC6790 - reserved labels MUST NOT be used as keys >> + * for the load-balancing function >> + */ >> +if (dec.label == MPLS_LABEL_ENTROPY) { >> +eli_seen = true; >> +} else if (dec.label >= MPLS_LABEL_FIRST_UNRESERVED) { > We should probably test this first and mark this branch as likely. ok > >> +hash = jhash_1word(dec.label, hash); >> + >> +/* The entropy label follows the entropy label >> + * indicator, so this means that the entropy >> + * label was just added to the hash - no need to >> + * go any deeper either in the label stack or in the >> + * payload >> + */ >> +if (eli_seen) >> +break; >> +} >> + >> +bos = dec.bos; >> +if (bos && pskb_may_pull(skb, sizeof(*hdr) * label_index + >> + sizeof(struct iphdr))) { >> +const struct iphdr *v4hdr; >> + >> +v4hdr = (const struct iphdr *)(mpls_hdr(skb) + >> + label_index); >> +if (v4hdr->version == 4) { >> +hash = jhash_3words(ntohl(v4hdr->saddr), >> +ntohl(v4hdr->daddr), >> +v4hdr->protocol, hash); >> +} else if (v4hdr->version == 6 && >> +pskb_may_pull(skb, sizeof(*hdr) * label_index + >> +