Re: [ovs-dev] [v4] odp-execute: Add ISA implementation of set_masked IPv6 action

Eelco Chaudron Fri, 25 Nov 2022 04:26:51 -0800

On 24 Nov 2022, at 10:30, Emma Finn wrote:

> This commit adds support for the AVX512 implementation of the
> ipv6_set_addrs action as well as an AVX512 implementation of
> updating the L4 checksums.
>
> Signed-off-by: Emma Finn <emma.f...@intel.com>

Thanks Emma for the v4, I have one question and a couple of style issues. To 
speed things up I just provide the diff for the style issues.

I was not able to do any actual testing, as my system did not have the 
avx512vbmi extension :(

Cheers,

Eelco

> ---

Style issues diff:

diff --git a/lib/odp-execute-avx512.c b/lib/odp-execute-avx512.c
index 82ff7e647..f798d6708 100644
--- a/lib/odp-execute-avx512.c
+++ b/lib/odp-execute-avx512.c
@@ -20,9 +20,9 @@

 #include <config.h>
 #include <errno.h>
-#include <sys/types.h>
 #include <netinet/in.h>
 #include <netinet/ip6.h>
+#include <sys/types.h>

 #include "csum.h"
 #include "dp-packet.h"
@@ -547,8 +547,8 @@ avx512_ipv6_sum_header(__m512i ip6_header)
      * horizontal add. */
     __m256i v_swap32a = _mm256_setr_epi32(0x0, 0x4, 0xF, 0xF,
                                           0xF, 0xF, 0xF, 0xF);
-    v_delta = _mm256_permutexvar_epi32(v_swap32a, v_delta);

+    v_delta = _mm256_permutexvar_epi32(v_swap32a, v_delta);
     v_delta = _mm256_hadd_epi32(v_delta, v_zeros);
     v_delta = _mm256_hadd_epi16(v_delta, v_zeros);

@@ -562,7 +562,7 @@ avx512_ipv6_addr_csum_delta(__m512i old_header, __m512i 
new_header)
 {
     uint16_t old_delta = avx512_ipv6_sum_header(old_header);
     uint16_t new_delta = avx512_ipv6_sum_header(new_header);
-    uint32_t csum_delta = (uint16_t)~old_delta + new_delta;
+    uint32_t csum_delta = ((uint16_t) ~old_delta) + new_delta;

     return  ~csum_finish(csum_delta);
 }
@@ -606,15 +606,15 @@ action_avx512_ipv6_set_addrs(struct dp_packet_batch 
*batch,
     __m512i v_key_shuf = _mm512_permutexvar_epi8(v_shuffle, v_key);
     __m512i v_mask_shuf = _mm512_permutexvar_epi8(v_shuffle, v_mask);

-
     /* Set the v_zero register to all zero's. */
     const __m128i v_zeros = _mm_setzero_si128();
+
     /* Set the v_all_ones register to all one's. */
     const __m128i v_all_ones = _mm_cmpeq_epi16(v_zeros, v_zeros);

-    /* Load ip6 src and dst respectively into 128-bit wide registers. */
+    /* Load ip6 src and dst masks respectively into 128-bit wide registers. */
     __m128i v_src = _mm_loadu_si128((void *) mask);
-    __m128i v_dst = _mm_maskz_loadu_epi64(0xC,(void *) mask);
+    __m128i v_dst = _mm_maskz_loadu_epi64(0xC, (void *) mask);

     /* Perform a bitwise OR between src and dst registers. */
     __m128i v_or = _mm_or_si128(v_src, v_dst);

> v4:
>   - Reworked and moved check for checksum outside loop.
>   - Code cleanup based on review from Eelco.
> v3:
>   - Added a runtime check for AVX512 vbmi.
> v2:
>   - Added check for availbility of s6_addr32 field of struct in6_addr.
>   - Fixed network headers for freebsd builds.
>

<SNIP>

> +    /* Load ip6 src and dst respectively into 128-bit wide registers. */
> +    __m128i v_src = _mm_loadu_si128((void *) mask);
> +    __m128i v_dst = _mm_maskz_loadu_epi64(0xC,(void *) mask);

Guess it might be me, but I do not understand how _mm_maskz_loadu_epi64() will 
load the dst from the mask.
Looking at the intrinsics guide it will only read the first two 64-bit values, 
but mask points to src?

Should we not just do the following here?

+    __m128i v_src = _mm_loadu_si128((void *) &mask->ipv6_src);
+    __m128i v_dst = _mm_loadu_si128((void *) &mask->ipv6_dst);

> +
> +    /* Perform a bitwise OR between src and dst registers. */
> +    __m128i v_or = _mm_or_si128(v_src, v_dst);
> +
> +    /* Will return true if any bit has been set in v_or, else it will return
> +     * false. */
> +    bool do_checksum = !_mm_test_all_zeros(v_or, v_all_ones);
> +
> +    DP_PACKET_BATCH_FOR_EACH (i, packet, batch) {
> +        struct ovs_16aligned_ip6_hdr *nh = dp_packet_l3(packet);
> +
> +        /* Load the 40 bytes of the IPv6 header. */
> +        __m512i v_packet = _mm512_maskz_loadu_epi64(0x1F, (void *) nh);
> +
> +        /* AND the v_pkt_mask to the packet data (v_packet). */
> +        __m512i v_pkt_masked = _mm512_andnot_si512(v_mask_shuf, v_packet);
> +
> +        /* OR the new addresses (v_key_shuf) with the masked packet addresses
> +         * (v_pkt_masked). */
> +        __m512i v_new_hdr = _mm512_or_si512(v_key_shuf, v_pkt_masked);
> +
> +        /* If ip6_src or ip6_dst has been modified, L4 checksum needs to
> +         * be updated. */
> +        if (do_checksum) {
> +            uint8_t proto = nh->ip6_nxt;
> +            uint16_t delta_checksum = avx512_ipv6_addr_csum_delta(v_packet,
> +                                                                  v_new_hdr);
> +
> +            if (proto == IPPROTO_UDP) {
> +                struct udp_header *uh = dp_packet_l4(packet);
> +
> +                if (uh->udp_csum) {
> +                    uint16_t old_udp_checksum = ~uh->udp_csum;
> +                    uint32_t udp_checksum = old_udp_checksum + 
> delta_checksum;
> +
> +                    udp_checksum = csum_finish(udp_checksum);
> +
> +                    if (!udp_checksum) {
> +                        udp_checksum = htons(0xffff);
> +                    }
> +
> +                    uh->udp_csum = udp_checksum;
> +                }
> +            } else if (proto == IPPROTO_TCP) {
> +                struct tcp_header *th = dp_packet_l4(packet);
> +                uint16_t old_tcp_checksum = ~th->tcp_csum;
> +                uint32_t tcp_checksum = old_tcp_checksum + delta_checksum;
> +
> +                tcp_checksum = csum_finish(tcp_checksum);
> +                th->tcp_csum = tcp_checksum;
> +            } else if (proto == IPPROTO_ICMPV6) {
> +                struct icmp6_header *icmp = dp_packet_l4(packet);
> +                uint16_t old_icmp6_checksum = ~icmp->icmp6_cksum;
> +                uint32_t icmp6_checksum = old_icmp6_checksum + 
> delta_checksum;
> +
> +                icmp6_checksum = csum_finish(icmp6_checksum);
> +                icmp->icmp6_cksum = icmp6_checksum;
> +            }
> +        }
> +        /* Write back the modified IPv6 addresses. */
> +         _mm512_mask_storeu_epi64((void *) nh, 0x1F, v_new_hdr);
> +    }
> +}
> +#endif /* HAVE_AVX512VBMI */
> +
>  static void
>  action_avx512_set_masked(struct dp_packet_batch *batch, const struct nlattr 
> *a)
>  {
> @@ -514,6 +711,13 @@ action_avx512_init(struct odp_execute_action_impl *self 
> OVS_UNUSED)
>      impl_set_masked_funcs[OVS_KEY_ATTR_ETHERNET] = 
> action_avx512_eth_set_addrs;
>      impl_set_masked_funcs[OVS_KEY_ATTR_IPV4] = action_avx512_ipv4_set_addrs;
>
> +#if HAVE_AVX512VBMI
> +    if (action_avx512vbmi_isa_probe()) {
> +        impl_set_masked_funcs[OVS_KEY_ATTR_IPV6] =
> +                              action_avx512_ipv6_set_addrs;
> +    }
> +#endif
> +
>      return 0;
>  }
>
> diff --git a/lib/odp-execute-private.c b/lib/odp-execute-private.c
> index f80ae5a23..8b86b1e4f 100644
> --- a/lib/odp-execute-private.c
> +++ b/lib/odp-execute-private.c
> @@ -60,6 +60,23 @@ action_avx512_isa_probe(void)
>
>  #endif
>
> +#if ACTION_IMPL_AVX512_CHECK && HAVE_AVX512VBMI
> +bool
> +action_avx512vbmi_isa_probe(void)
> +{
> +    if (cpu_has_isa(OVS_CPU_ISA_X86_AVX512VBMI)) {
> +        return true;
> +    }
> +    return false;
> +}
> +#else
> +bool
> +action_avx512vbmi_isa_probe(void)
> +{
> +    return false;
> +}
> +#endif
> +
>  static struct odp_execute_action_impl action_impls[] = {
>      [ACTION_IMPL_AUTOVALIDATOR] = {
>          .available = false,
> diff --git a/lib/odp-execute-private.h b/lib/odp-execute-private.h
> index 940180c99..643f41c2a 100644
> --- a/lib/odp-execute-private.h
> +++ b/lib/odp-execute-private.h
> @@ -78,6 +78,7 @@ BUILD_ASSERT_DECL(ACTION_IMPL_AUTOVALIDATOR == 1);
>  #define ACTION_IMPL_BEGIN (ACTION_IMPL_AUTOVALIDATOR + 1)
>
>  bool action_avx512_isa_probe(void);
> +bool action_avx512vbmi_isa_probe(void);
>
>  /* Odp execute init handles setting up the state of the actions functions at
>   * initialization time. It cannot return errors, as it must always succeed in
> -- 
> 2.25.1

_______________________________________________
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev
Re: [ovs-dev] [v4] odp-execute: Add ISA implementation of set_masked IPv6 action

Reply via email to