On Apr 19, 2013, at 10:41 , ext Simon Horman wrote: > diff --git a/datapath/actions.c b/datapath/actions.c > index 0dac658..2c923be 100644 > --- a/datapath/actions.c > +++ b/datapath/actions.c > @@ -38,6 +38,7 @@ > #include "vport.h" > > static int do_execute_actions(struct datapath *dp, struct sk_buff *skb, > + unsigned *mpls_stack_depth, > const struct nlattr *attr, int len, bool > keep_skb); > > static int make_writable(struct sk_buff *skb, int write_len) > @@ -48,6 +49,89 @@ static int make_writable(struct sk_buff *skb, int > write_len) > return pskb_expand_head(skb, 0, 0, GFP_ATOMIC); > } > > +static void set_ethertype(struct sk_buff *skb, const __be16 ethertype) > +{ > + struct ethhdr *hdr = (struct ethhdr *)skb_mac_header(skb); > + if (hdr->h_proto == ethertype) > + return; > + hdr->h_proto = ethertype;
Will this work properly if the skb has VLAN headers? I recall there was an earlier version that used the l2_size (now mac_len) to locate the actual "h_proto" to update? > + if (get_ip_summed(skb) == OVS_CSUM_COMPLETE) { > + __be16 diff[] = { ~hdr->h_proto, ethertype }; > + skb->csum = ~csum_partial((char *)diff, sizeof(diff), > + ~skb->csum); > + } > +} > + > +static int push_mpls(struct sk_buff *skb, const struct ovs_action_push_mpls > *mpls) > +{ > + __be32 *new_mpls_lse; > + int err; > + > + err = make_writable(skb, skb->mac_len + MPLS_HLEN); > + if (unlikely(err)) > + return err; > + > + skb_push(skb, MPLS_HLEN); > + memmove(skb_mac_header(skb) - MPLS_HLEN, skb_mac_header(skb), > + skb->mac_len); > + skb_reset_mac_header(skb); > + skb_set_network_header(skb, skb->mac_len); > + > + new_mpls_lse = (__be32 *)skb_network_header(skb); > + *new_mpls_lse = mpls->mpls_lse; > + > + if (get_ip_summed(skb) == OVS_CSUM_COMPLETE) > + skb->csum = csum_add(skb->csum, csum_partial(new_mpls_lse, > + MPLS_HLEN, 0)); > + > + set_ethertype(skb, mpls->mpls_ethertype); > + return 0; > +} > + > +static int pop_mpls(struct sk_buff *skb, const __be16 *ethertype) > +{ > + int err; > + > + err = make_writable(skb, skb->mac_len + MPLS_HLEN); > + if (unlikely(err)) > + return err; > + > + if (get_ip_summed(skb) == OVS_CSUM_COMPLETE) > + skb->csum = csum_sub(skb->csum, > + csum_partial(skb_network_header(skb), > + MPLS_HLEN, 0)); > + > + memmove(skb_mac_header(skb) + MPLS_HLEN, skb_mac_header(skb), > + skb->mac_len); > + > + skb_pull(skb, MPLS_HLEN); > + skb_reset_mac_header(skb); > + skb_set_network_header(skb, skb->mac_len); > + > + set_ethertype(skb, *ethertype); > + return 0; > +} > + > +static int set_mpls(struct sk_buff *skb, const __be32 *mpls_lse) > +{ > + __be32 *stack = (__be32 *)skb_network_header(skb); > + int err; > + > + err = make_writable(skb, skb->mac_len + MPLS_HLEN); > + if (unlikely(err)) > + return err; > + > + if (get_ip_summed(skb) == OVS_CSUM_COMPLETE) { > + __be32 diff[] = { ~(*stack), *mpls_lse }; > + skb->csum = ~csum_partial((char *)diff, sizeof(diff), > + ~skb->csum); > + } > + > + *stack = *mpls_lse; > + > + return 0; > +} > + > /* remove VLAN header from packet and update csum accordingly. */ > static int __pop_vlan_tci(struct sk_buff *skb, __be16 *current_tci) > { > @@ -115,6 +199,9 @@ static int push_vlan(struct sk_buff *skb, const struct > ovs_action_push_vlan *vla > if (!__vlan_put_tag(skb, current_tag)) > return -ENOMEM; > > + /* update mac_len for MPLS functions */ > + skb_reset_mac_len(skb); > + > if (get_ip_summed(skb) == OVS_CSUM_COMPLETE) > skb->csum = csum_add(skb->csum, csum_partial(skb->data > + (2 * ETH_ALEN), VLAN_HLEN, 0)); > @@ -352,13 +439,26 @@ static int set_tcp(struct sk_buff *skb, const struct > ovs_key_tcp *tcp_port_key) > return 0; > } > > -static int do_output(struct datapath *dp, struct sk_buff *skb, int out_port) > +static int do_output(struct datapath *dp, struct sk_buff *skb, int out_port, > + unsigned mpls_stack_depth) > { > struct vport *vport; > > if (unlikely(!skb)) > return -ENOMEM; > > + /* The mpls_stack_depth is only non zero if a non-MPLS packet is > + * turned into an MPLS packet via an MPLS push action. In this case > + * the skb may be GSO so update skb->mac_len and skb's > + * network_header to correspond to the bottom of the MPLS label > + * stack rather than the end of the original L2 data which is now > + * the top of the MPLS label stack. */ It is not clear to me that "bottom of the MPLS label stack" necessarily refers to the start of L3 header, "Bottom of stack" having a special meaning with MPLS. It might be clearer to state that "during action execution network_header, and mac_len, correspondingly, have tracked the end of the L2 frame (including any VLAN headers), but proper skb output processing (e.g., GSO) requires the network_header (and mac_len) to track the start of the L3 header instead. These differ in the presence of MPLS headers." > + if (mpls_stack_depth) { > + skb->mac_len += MPLS_HLEN * mpls_stack_depth; > + skb_set_network_header(skb, skb->mac_len); > + skb_set_encapsulation_features(skb); > + } > + > vport = ovs_vport_rcu(dp, out_port); > if (unlikely(!vport)) { > kfree_skb(skb); > @@ -398,7 +498,7 @@ static int output_userspace(struct datapath *dp, struct > sk_buff *skb, > } > > static int sample(struct datapath *dp, struct sk_buff *skb, > - const struct nlattr *attr) > + unsigned *mpls_stack_depth, const struct nlattr *attr) > { > const struct nlattr *acts_list = NULL; > const struct nlattr *a; > @@ -418,8 +518,9 @@ static int sample(struct datapath *dp, struct sk_buff > *skb, > } > } > > - return do_execute_actions(dp, skb, nla_data(acts_list), > - nla_len(acts_list), true); > + return do_execute_actions(dp, skb, mpls_stack_depth, > + nla_data(acts_list), nla_len(acts_list), > + true); > } > > static int execute_set_action(struct sk_buff *skb, > @@ -459,13 +560,23 @@ static int execute_set_action(struct sk_buff *skb, > case OVS_KEY_ATTR_UDP: > err = set_udp(skb, nla_data(nested_attr)); > break; > + > + case OVS_KEY_ATTR_MPLS: > + err = set_mpls(skb, nla_data(nested_attr)); > + break; > } > > return err; > } > > -/* Execute a list of actions against 'skb'. */ > +/* Execute a list of actions against 'skb'. > + * > + * The stack depth is only tracked in the case of a non-MPLS packet > + * that becomes MPLS via an MPLS push action. The stack depth > + * is passed to do_output() in order to allow it to prepare the > + * skb for possible GSO segmentation. */ > static int do_execute_actions(struct datapath *dp, struct sk_buff *skb, > + unsigned *mpls_stack_depth, > const struct nlattr *attr, int len, bool keep_skb) > { > /* Every output action needs a separate clone of 'skb', but the common > @@ -481,7 +592,8 @@ static int do_execute_actions(struct datapath *dp, struct > sk_buff *skb, > int err = 0; > > if (prev_port != -1) { > - do_output(dp, skb_clone(skb, GFP_ATOMIC), prev_port); > + do_output(dp, skb_clone(skb, GFP_ATOMIC), > + prev_port, *mpls_stack_depth); > prev_port = -1; > } > > @@ -494,6 +606,18 @@ static int do_execute_actions(struct datapath *dp, > struct sk_buff *skb, > output_userspace(dp, skb, a); > break; > > + case OVS_ACTION_ATTR_PUSH_MPLS: > + err = push_mpls(skb, nla_data(a)); > + if (!eth_p_mpls(skb->protocol)) > + (*mpls_stack_depth)++; > + break; > + > + case OVS_ACTION_ATTR_POP_MPLS: > + err = pop_mpls(skb, nla_data(a)); > + if (!eth_p_mpls(skb->protocol)) > + (*mpls_stack_depth)--; > + break; > + In both cases the stack is changed whenever err == 0, but it is not immediately clear to me whether the '!eth_p_mpls(skb->protocol)' is true if and only if the label stack has changed. Jarno _______________________________________________ dev mailing list dev@openvswitch.org http://openvswitch.org/mailman/listinfo/dev