On 07/12 16:00:04, Github ODP bot wrote:
> From: Dmitry Eremin-Solenikov <dmitry.ereminsoleni...@linaro.org>
> 
> Clang 3.8 is stricter than GCC wrt register allocation vs 128-bit
> variables. Sometimes it can not understand using 128-bit var in place of
> 64-bit register resulting in the following errors:
> 
> /odp_ipfragreass_atomics_arm.h:18:51: error: value size does not match
> register
>       size specified by the constraint and modifier
>       [-Werror,-Wasm-operand-widths]
>                 __asm__ volatile("ldaxp %0, %H0, [%1]" : "=&r" (old)
>                                                                 ^
> ./odp_ipfragreass_atomics_arm.h:18:27: note: use constraint modifier "w"
>                 __asm__ volatile("ldaxp %0, %H0, [%1]" : "=&r" (old)
> 
> Explicitly pass low and high parts of 128-bit variable in separate
> assembly parameters.
> 
> Signed-off-by: Dmitry Eremin-Solenikov <dmitry.ereminsoleni...@linaro.org>
> ---
> /** Email created from pull request 73 (lumag:cross-2)
>  ** https://github.com/Linaro/odp/pull/73
>  ** Patch: https://github.com/Linaro/odp/pull/73.patch
>  ** Base sha: 7fc6d27e937b57b31360b07028388c811f8300dc
>  ** Merge commit sha: 3dd283d7e61ba21beed2607ef200584754f22d76
>  **/
>  example/ipfragreass/odp_ipfragreass_atomics_arm.h | 19 +++++++++++++------
>  1 file changed, 13 insertions(+), 6 deletions(-)
> 
> diff --git a/example/ipfragreass/odp_ipfragreass_atomics_arm.h 
> b/example/ipfragreass/odp_ipfragreass_atomics_arm.h
> index 99c37a77..94ccd00e 100644
> --- a/example/ipfragreass/odp_ipfragreass_atomics_arm.h
> +++ b/example/ipfragreass/odp_ipfragreass_atomics_arm.h
> @@ -13,26 +13,33 @@
>  static inline __int128 lld(__int128 *var, int mo)
>  {
>       __int128 old;
> +     uint64_t lo, hi;
>  
>       if (mo == __ATOMIC_ACQUIRE)
> -             __asm__ volatile("ldaxp %0, %H0, [%1]" : "=&r" (old)
> +             __asm__ volatile("ldaxp %0, %1, [%2]" : "=r" (lo), "=r" (hi)

The LDAXP inst performs two loads into two 64-bit registers. The order of these
loads are not specified, so either of the output operands can be written to
before the instruction finishes. So, the early clobber modifier '&' should be
used not just on the first output operand (lo) but also the last (hi).

Please see platform/linux-generic/arch/arm/odp_llsc.h for reference.

>                                : "r" (var) : "memory");
>       else /* mo == __ATOMIC_RELAXED */
> -             __asm__ volatile("ldxp %0, %H0, [%1]" : "=&r" (old)
> +             __asm__ volatile("ldxp %0, %1, [%2]" : "=r" (lo), "=r" (hi)
>                                : "r" (var) : );
> +     old = hi;
> +     old <<= 64;
> +     old |= lo;
> +
>       return old;
> +
>  }
>  
>  static inline uint32_t scd(__int128 *var, __int128 neu, int mo)
>  {
>       uint32_t ret;
> +     uint64_t lo = neu, hi = neu >> 64;
>  
>       if (mo == __ATOMIC_RELEASE)
> -             __asm__ volatile("stlxp %w0, %1, %H1, [%2]" : "=&r" (ret)
> -                              : "r" (neu), "r" (var) : "memory");
> +             __asm__ volatile("stlxp %w0, %1, %2, [%3]" : "=&r" (ret)
> +                              : "r" (lo), "r" (hi), "r" (var) : "memory");
>       else /* mo == __ATOMIC_RELAXED */
> -             __asm__ volatile("stxp %w0, %1, %H1, [%2]" : "=&r" (ret)
> -                              : "r" (neu), "r" (var) : );
> +             __asm__ volatile("stxp %w0, %1, %2, [%3]" : "=&r" (ret)
> +                              : "r" (lo), "r" (hi), "r" (var) : "memory");
>       return ret;
>  }
>  
> 

Reply via email to