On 14 Jun 2022, at 13:57, Emma Finn wrote:
> This commit adds the AVX512 implementation of the
> push_vlan action.
>
> Signed-off-by: Emma Finn <[email protected]>
> ---
> lib/odp-execute-avx512.c | 37 +++++++++++++++++++++++++++++++++++++
> lib/odp-execute-private.c | 1 +
> lib/odp-execute.c | 22 +++++++++++++---------
> 3 files changed, 51 insertions(+), 9 deletions(-)
>
> diff --git a/lib/odp-execute-avx512.c b/lib/odp-execute-avx512.c
> index f9e2b1727..bb178cbac 100644
> --- a/lib/odp-execute-avx512.c
> +++ b/lib/odp-execute-avx512.c
> @@ -109,6 +109,41 @@ action_avx512_pop_vlan(struct dp_packet_batch *batch,
> }
> }
>
> +/* This function will load the entire eth_header into a 128-bit wide
> register.
> + * Then use an 8-byte shuffle to shift the data left to make room for
> + * the vlan header. Insert the new vlan header and then store back to the
> + * original packet. */
> +static void
> +action_avx512_push_vlan(struct dp_packet_batch *batch, const struct nlattr
> *a)
> +{
> + struct dp_packet *packet;
> + const struct ovs_action_push_vlan *vlan = nl_attr_get(a);
> + ovs_be16 tpid, tci;
> +
> + DP_PACKET_BATCH_FOR_EACH (i, packet, batch) {
> + tpid = vlan->vlan_tpid;
> + tci = vlan->vlan_tci;
> +
> + avx512_dp_packet_resize_l2(packet, VLAN_HEADER_LEN);
> +
> + /* Build up the VLAN TCI/TPID, and merge with the moving of Ether. */
> + char *pkt_data = (char *) dp_packet_data(packet);
> + const uint16_t tci_proc = tci & htons(~VLAN_CFI);
> + const uint32_t tpid_tci = (tci_proc << 16) | tpid;
> +
> + static const uint8_t vlan_push_shuffle_mask[16] = {
> + 4, 5, 6, 7, 8, 9, 10, 11,
> + 12, 13, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF
> + };
> +
> + __m128i v_ether = _mm_loadu_si128((void *) pkt_data);
> + __m128i v_index = _mm_loadu_si128((void *) vlan_push_shuffle_mask);
> + __m128i v_shift = _mm_shuffle_epi8(v_ether, v_index);
> + __m128i v_vlan_hdr = _mm_insert_epi32(v_shift, tpid_tci, 3);
> + _mm_storeu_si128((void *) pkt_data, v_vlan_hdr);
> + }
> +}
> +
> /* Probe functions to check ISA requirements. */
> static bool
> avx512_isa_probe(void)
> @@ -140,6 +175,8 @@ action_avx512_init(struct odp_execute_action_impl *self)
> /* Set function pointers for actions that can be applied directly, these
> * are identified by OVS_ACTION_ATTR_*. */
> self->funcs[OVS_ACTION_ATTR_POP_VLAN] = action_avx512_pop_vlan;
> + self->funcs[OVS_ACTION_ATTR_PUSH_VLAN] = action_avx512_push_vlan;
> +
> return 0;
> }
>
> diff --git a/lib/odp-execute-private.c b/lib/odp-execute-private.c
> index de2e4dfc4..751a68fe3 100644
> --- a/lib/odp-execute-private.c
> +++ b/lib/odp-execute-private.c
> @@ -209,6 +209,7 @@ action_autoval_init(struct odp_execute_action_impl *self)
> /* Set function pointers for actions that can be applied directly, these
> * are identified by OVS_ACTION_ATTR_*. */
> self->funcs[OVS_ACTION_ATTR_POP_VLAN] = action_autoval_generic;
> + self->funcs[OVS_ACTION_ATTR_PUSH_VLAN] = action_autoval_generic;
Based on my previous comments I think this should no longer be needed.
>
> return 0;
> }
> diff --git a/lib/odp-execute.c b/lib/odp-execute.c
> index a49b331ef..59f6bdc64 100644
> --- a/lib/odp-execute.c
> +++ b/lib/odp-execute.c
> @@ -845,6 +845,17 @@ action_pop_vlan(struct dp_packet_batch *batch,
> }
> }
>
> +static void
> +action_push_vlan(struct dp_packet_batch *batch, const struct nlattr *a)
> +{
> + struct dp_packet *packet;
> + const struct ovs_action_push_vlan *vlan = nl_attr_get(a);
> +
> + DP_PACKET_BATCH_FOR_EACH (i, packet, batch) {
> + eth_push_vlan(packet, vlan->vlan_tpid, vlan->vlan_tci);
> + }
> +}
> +
> /* Implementation of the scalar actions impl init function. Build up the
> * array of func ptrs here.
> */
> @@ -854,6 +865,7 @@ odp_action_scalar_init(struct odp_execute_action_impl
> *self)
> /* Set function pointers for actions that can be applied directly, these
> * are identified by OVS_ACTION_ATTR_*. */
> self->funcs[OVS_ACTION_ATTR_POP_VLAN] = action_pop_vlan;
> + self->funcs[OVS_ACTION_ATTR_PUSH_VLAN] = action_push_vlan;
>
> return 0;
> }
> @@ -995,15 +1007,6 @@ odp_execute_actions(void *dp, struct dp_packet_batch
> *batch, bool steal,
> break;
> }
>
> - case OVS_ACTION_ATTR_PUSH_VLAN: {
> - const struct ovs_action_push_vlan *vlan = nl_attr_get(a);
> -
> - DP_PACKET_BATCH_FOR_EACH (i, packet, batch) {
> - eth_push_vlanog og eth_push_vlan(packet, vlan->vlan_tpid,
> vlan->vlan_tci);
> - }
> - break;
> - }
> -
> case OVS_ACTION_ATTR_PUSH_MPLS: {
> const struct ovs_action_push_mpls *mpls = nl_attr_get(a);
>
> @@ -1156,6 +1159,7 @@ odp_execute_actions(void *dp, struct dp_packet_batch
> *batch, bool steal,
> case __OVS_ACTION_ATTR_MAX:
> /* The following actions are handled by the scalar implementation. */
> case OVS_ACTION_ATTR_POP_VLAN:
> + case OVS_ACTION_ATTR_PUSH_VLAN:
> OVS_NOT_REACHED();
> }
>
> --
> 2.32.0
Other than the one remark above I have nothing to add other than some more
explicit comments on the AVX section. See diff below:
diff --git a/lib/odp-execute-avx512.c b/lib/odp-execute-avx512.c
index 7b0e00ffc..54313cd1f 100644
--- a/lib/odp-execute-avx512.c
+++ b/lib/odp-execute-avx512.c
@@ -154,10 +154,8 @@ action_avx512_pop_vlan(struct dp_packet_batch *batch,
}
}
-/* This function will load the entire eth_header into a 128-bit wide register.
- * Then use an 8-byte shuffle to shift the data left to make room for
- * the vlan header. Insert the new vlan header and then store back to the
- * original packet. */
+/* This function performance the same operation on each packet in the batch as
+ * the scalar eth_push_vlan() function. */
static void
action_avx512_push_vlan(struct dp_packet_batch *batch, const struct nlattr *a)
{
@@ -169,22 +167,42 @@ action_avx512_push_vlan(struct dp_packet_batch *batch,
const struct nlattr *a)
tpid = vlan->vlan_tpid;
tci = vlan->vlan_tci;
+ /* As we are about to insert the VLAN_HEADER we now need to adjust all
+ * the offsets. */
avx512_dp_packet_resize_l2(packet, VLAN_HEADER_LEN);
- /* Build up the VLAN TCI/TPID, and merge with the moving of Ether. */
char *pkt_data = (char *) dp_packet_data(packet);
+
+ /* Build up the VLAN TCI/TPID in a single uint32_t. */
const uint16_t tci_proc = tci & htons(~VLAN_CFI);
const uint32_t tpid_tci = (tci_proc << 16) | tpid;
+ /* This shuffle mask is used below, and each position tells where to
+ * move the bytes to. So here, the fourth byte in v_ether is moved to
+ * byte location 0 in v_shift. The fifth is moved to 1, etc., etc.
+ * The 0xFF is special it tells to fill that position with 0.
+ */
static const uint8_t vlan_push_shuffle_mask[16] = {
4, 5, 6, 7, 8, 9, 10, 11,
12, 13, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF
};
+ /* Load the first 128-bits of the packet into the v_ether register.
+ * Note that this includes the 4 unused bytes (VLAN_HEADER_LEN). */
__m128i v_ether = _mm_loadu_si128((void *) pkt_data);
+
+ /* Load the shuffle mask in v_index. */
__m128i v_index = _mm_loadu_si128((void *) vlan_push_shuffle_mask);
+
+ /* Move(shuffle) the veth_dst and veth_src data to create room for
+ * the vlan header. */
__m128i v_shift = _mm_shuffle_epi8(v_ether, v_index);
+
+ /* Copy(insert) the 32-bit VLAN header, tpid_tci, at the 3rd 32-bit
+ * word offset, i.e., ofssetof(vlan_eth_header, veth_type) */
__m128i v_vlan_hdr = _mm_insert_epi32(v_shift, tpid_tci, 3);
+
+ /* Write back the modified ethernet header. */
_mm_storeu_si128((void *) pkt_data, v_vlan_hdr);
}
}
_______________________________________________
dev mailing list
[email protected]
https://mail.openvswitch.org/mailman/listinfo/ovs-dev