Re: [PATCH] lib/raid6: use vdupq_n_u8 to avoid endianness warnings

Ard Biesheuvel Mon, 25 Feb 2019 23:19:53 -0800

On Tue, 26 Feb 2019 at 05:03, <ndesaulni...@google.com> wrote:
>
> Clang warns: vector initializers are not compatible with NEON intrinsics
> in big endian mode [-Wnonportable-vector-initialization]
>
> While this is usually the case, it's not an issue for this case since
> we're initializing the uint8x16_t (16x uint8_t's) with the same value.
>
> Instead, use vdupq_n_u8 which both compilers lower into a single movi
> instruction: https://godbolt.org/z/vBrgzt
>
> This avoids the static storage for a constant value.
>
> Link: https://github.com/ClangBuiltLinux/linux/issues/214
> Suggested-by: Nathan Chancellor <natechancel...@gmail.com>
> Signed-off-by: Nick Desaulniers <ndesaulni...@google.com>


Much better, thanks,

Did you double check that the intrinsic exists on 32-bit ARM as well?
I assume it does, but please make sure if you haven't yet.

If so,

Reviewed-by: Ard Biesheuvel <ard.biesheu...@linaro.org>

> ---
>  lib/raid6/neon.uc            | 5 ++---
>  lib/raid6/recov_neon_inner.c | 7 ++-----
>  2 files changed, 4 insertions(+), 8 deletions(-)
>
> diff --git a/lib/raid6/neon.uc b/lib/raid6/neon.uc
> index d5242f544551..b7c68030da4f 100644
> --- a/lib/raid6/neon.uc
> +++ b/lib/raid6/neon.uc
> @@ -28,7 +28,6 @@
>
>  typedef uint8x16_t unative_t;
>
> -#define NBYTES(x) ((unative_t){x,x,x,x, x,x,x,x, x,x,x,x, x,x,x,x})
>  #define NSIZE  sizeof(unative_t)
>
>  /*
> @@ -61,7 +60,7 @@ void raid6_neon$#_gen_syndrome_real(int disks, unsigned 
> long bytes, void **ptrs)
>         int d, z, z0;
>
>         register unative_t wd$$, wq$$, wp$$, w1$$, w2$$;
> -       const unative_t x1d = NBYTES(0x1d);
> +       const unative_t x1d = vdupq_n_u8(0x1d);
>
>         z0 = disks - 3;         /* Highest data disk */
>         p = dptr[z0+1];         /* XOR parity */
> @@ -92,7 +91,7 @@ void raid6_neon$#_xor_syndrome_real(int disks, int start, 
> int stop,
>         int d, z, z0;
>
>         register unative_t wd$$, wq$$, wp$$, w1$$, w2$$;
> -       const unative_t x1d = NBYTES(0x1d);
> +       const unative_t x1d = vdupq_n_u8(0x1d);
>
>         z0 = stop;              /* P/Q right side optimization */
>         p = dptr[disks-2];      /* XOR parity */
> diff --git a/lib/raid6/recov_neon_inner.c b/lib/raid6/recov_neon_inner.c
> index 8cd20c9f834a..7d00c31a6547 100644
> --- a/lib/raid6/recov_neon_inner.c
> +++ b/lib/raid6/recov_neon_inner.c
> @@ -10,11 +10,6 @@
>
>  #include <arm_neon.h>
>
> -static const uint8x16_t x0f = {
> -       0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f,
> -       0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f,
> -};
> -
>  #ifdef CONFIG_ARM
>  /*
>   * AArch32 does not provide this intrinsic natively because it does not
> @@ -41,6 +36,7 @@ void __raid6_2data_recov_neon(int bytes, uint8_t *p, 
> uint8_t *q, uint8_t *dp,
>         uint8x16_t pm1 = vld1q_u8(pbmul + 16);
>         uint8x16_t qm0 = vld1q_u8(qmul);
>         uint8x16_t qm1 = vld1q_u8(qmul + 16);
> +       uint8x16_t x0f = vdupq_n_u8(0x0f);
>
>         /*
>          * while ( bytes-- ) {
> @@ -87,6 +83,7 @@ void __raid6_datap_recov_neon(int bytes, uint8_t *p, 
> uint8_t *q, uint8_t *dq,
>  {
>         uint8x16_t qm0 = vld1q_u8(qmul);
>         uint8x16_t qm1 = vld1q_u8(qmul + 16);
> +       uint8x16_t x0f = vdupq_n_u8(0x0f);
>
>         /*
>          * while (bytes--) {
> --
> 2.21.0.rc2.261.ga7da99ff1b-goog
>

Re: [PATCH] lib/raid6: use vdupq_n_u8 to avoid endianness warnings

Reply via email to