On 14 Jun 2022, at 13:57, Emma Finn wrote: > This commit adds the AVX512 implementation of the > push_vlan action. > > Signed-off-by: Emma Finn <emma.f...@intel.com> > --- > lib/odp-execute-avx512.c | 37 +++++++++++++++++++++++++++++++++++++ > lib/odp-execute-private.c | 1 + > lib/odp-execute.c | 22 +++++++++++++--------- > 3 files changed, 51 insertions(+), 9 deletions(-) > > diff --git a/lib/odp-execute-avx512.c b/lib/odp-execute-avx512.c > index f9e2b1727..bb178cbac 100644 > --- a/lib/odp-execute-avx512.c > +++ b/lib/odp-execute-avx512.c > @@ -109,6 +109,41 @@ action_avx512_pop_vlan(struct dp_packet_batch *batch, > } > } > > +/* This function will load the entire eth_header into a 128-bit wide > register. > + * Then use an 8-byte shuffle to shift the data left to make room for > + * the vlan header. Insert the new vlan header and then store back to the > + * original packet. */ > +static void > +action_avx512_push_vlan(struct dp_packet_batch *batch, const struct nlattr > *a) > +{ > + struct dp_packet *packet; > + const struct ovs_action_push_vlan *vlan = nl_attr_get(a); > + ovs_be16 tpid, tci; > + > + DP_PACKET_BATCH_FOR_EACH (i, packet, batch) { > + tpid = vlan->vlan_tpid; > + tci = vlan->vlan_tci; > + > + avx512_dp_packet_resize_l2(packet, VLAN_HEADER_LEN); > + > + /* Build up the VLAN TCI/TPID, and merge with the moving of Ether. */ > + char *pkt_data = (char *) dp_packet_data(packet); > + const uint16_t tci_proc = tci & htons(~VLAN_CFI); > + const uint32_t tpid_tci = (tci_proc << 16) | tpid; > + > + static const uint8_t vlan_push_shuffle_mask[16] = { > + 4, 5, 6, 7, 8, 9, 10, 11, > + 12, 13, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF > + }; > + > + __m128i v_ether = _mm_loadu_si128((void *) pkt_data); > + __m128i v_index = _mm_loadu_si128((void *) vlan_push_shuffle_mask); > + __m128i v_shift = _mm_shuffle_epi8(v_ether, v_index); > + __m128i v_vlan_hdr = _mm_insert_epi32(v_shift, tpid_tci, 3); > + _mm_storeu_si128((void *) pkt_data, v_vlan_hdr); > + } > +} > + > /* Probe functions to check ISA requirements. */ > static bool > avx512_isa_probe(void) > @@ -140,6 +175,8 @@ action_avx512_init(struct odp_execute_action_impl *self) > /* Set function pointers for actions that can be applied directly, these > * are identified by OVS_ACTION_ATTR_*. */ > self->funcs[OVS_ACTION_ATTR_POP_VLAN] = action_avx512_pop_vlan; > + self->funcs[OVS_ACTION_ATTR_PUSH_VLAN] = action_avx512_push_vlan; > + > return 0; > } > > diff --git a/lib/odp-execute-private.c b/lib/odp-execute-private.c > index de2e4dfc4..751a68fe3 100644 > --- a/lib/odp-execute-private.c > +++ b/lib/odp-execute-private.c > @@ -209,6 +209,7 @@ action_autoval_init(struct odp_execute_action_impl *self) > /* Set function pointers for actions that can be applied directly, these > * are identified by OVS_ACTION_ATTR_*. */ > self->funcs[OVS_ACTION_ATTR_POP_VLAN] = action_autoval_generic; > + self->funcs[OVS_ACTION_ATTR_PUSH_VLAN] = action_autoval_generic;
Based on my previous comments I think this should no longer be needed. > > return 0; > } > diff --git a/lib/odp-execute.c b/lib/odp-execute.c > index a49b331ef..59f6bdc64 100644 > --- a/lib/odp-execute.c > +++ b/lib/odp-execute.c > @@ -845,6 +845,17 @@ action_pop_vlan(struct dp_packet_batch *batch, > } > } > > +static void > +action_push_vlan(struct dp_packet_batch *batch, const struct nlattr *a) > +{ > + struct dp_packet *packet; > + const struct ovs_action_push_vlan *vlan = nl_attr_get(a); > + > + DP_PACKET_BATCH_FOR_EACH (i, packet, batch) { > + eth_push_vlan(packet, vlan->vlan_tpid, vlan->vlan_tci); > + } > +} > + > /* Implementation of the scalar actions impl init function. Build up the > * array of func ptrs here. > */ > @@ -854,6 +865,7 @@ odp_action_scalar_init(struct odp_execute_action_impl > *self) > /* Set function pointers for actions that can be applied directly, these > * are identified by OVS_ACTION_ATTR_*. */ > self->funcs[OVS_ACTION_ATTR_POP_VLAN] = action_pop_vlan; > + self->funcs[OVS_ACTION_ATTR_PUSH_VLAN] = action_push_vlan; > > return 0; > } > @@ -995,15 +1007,6 @@ odp_execute_actions(void *dp, struct dp_packet_batch > *batch, bool steal, > break; > } > > - case OVS_ACTION_ATTR_PUSH_VLAN: { > - const struct ovs_action_push_vlan *vlan = nl_attr_get(a); > - > - DP_PACKET_BATCH_FOR_EACH (i, packet, batch) { > - eth_push_vlanog og eth_push_vlan(packet, vlan->vlan_tpid, > vlan->vlan_tci); > - } > - break; > - } > - > case OVS_ACTION_ATTR_PUSH_MPLS: { > const struct ovs_action_push_mpls *mpls = nl_attr_get(a); > > @@ -1156,6 +1159,7 @@ odp_execute_actions(void *dp, struct dp_packet_batch > *batch, bool steal, > case __OVS_ACTION_ATTR_MAX: > /* The following actions are handled by the scalar implementation. */ > case OVS_ACTION_ATTR_POP_VLAN: > + case OVS_ACTION_ATTR_PUSH_VLAN: > OVS_NOT_REACHED(); > } > > -- > 2.32.0 Other than the one remark above I have nothing to add other than some more explicit comments on the AVX section. See diff below: diff --git a/lib/odp-execute-avx512.c b/lib/odp-execute-avx512.c index 7b0e00ffc..54313cd1f 100644 --- a/lib/odp-execute-avx512.c +++ b/lib/odp-execute-avx512.c @@ -154,10 +154,8 @@ action_avx512_pop_vlan(struct dp_packet_batch *batch, } } -/* This function will load the entire eth_header into a 128-bit wide register. - * Then use an 8-byte shuffle to shift the data left to make room for - * the vlan header. Insert the new vlan header and then store back to the - * original packet. */ +/* This function performance the same operation on each packet in the batch as + * the scalar eth_push_vlan() function. */ static void action_avx512_push_vlan(struct dp_packet_batch *batch, const struct nlattr *a) { @@ -169,22 +167,42 @@ action_avx512_push_vlan(struct dp_packet_batch *batch, const struct nlattr *a) tpid = vlan->vlan_tpid; tci = vlan->vlan_tci; + /* As we are about to insert the VLAN_HEADER we now need to adjust all + * the offsets. */ avx512_dp_packet_resize_l2(packet, VLAN_HEADER_LEN); - /* Build up the VLAN TCI/TPID, and merge with the moving of Ether. */ char *pkt_data = (char *) dp_packet_data(packet); + + /* Build up the VLAN TCI/TPID in a single uint32_t. */ const uint16_t tci_proc = tci & htons(~VLAN_CFI); const uint32_t tpid_tci = (tci_proc << 16) | tpid; + /* This shuffle mask is used below, and each position tells where to + * move the bytes to. So here, the fourth byte in v_ether is moved to + * byte location 0 in v_shift. The fifth is moved to 1, etc., etc. + * The 0xFF is special it tells to fill that position with 0. + */ static const uint8_t vlan_push_shuffle_mask[16] = { 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF }; + /* Load the first 128-bits of the packet into the v_ether register. + * Note that this includes the 4 unused bytes (VLAN_HEADER_LEN). */ __m128i v_ether = _mm_loadu_si128((void *) pkt_data); + + /* Load the shuffle mask in v_index. */ __m128i v_index = _mm_loadu_si128((void *) vlan_push_shuffle_mask); + + /* Move(shuffle) the veth_dst and veth_src data to create room for + * the vlan header. */ __m128i v_shift = _mm_shuffle_epi8(v_ether, v_index); + + /* Copy(insert) the 32-bit VLAN header, tpid_tci, at the 3rd 32-bit + * word offset, i.e., ofssetof(vlan_eth_header, veth_type) */ __m128i v_vlan_hdr = _mm_insert_epi32(v_shift, tpid_tci, 3); + + /* Write back the modified ethernet header. */ _mm_storeu_si128((void *) pkt_data, v_vlan_hdr); } } _______________________________________________ dev mailing list d...@openvswitch.org https://mail.openvswitch.org/mailman/listinfo/ovs-dev