CC odp ML for this issue.

Maxim.

On 13 March 2018 at 03:33, Bill Fischofer <bill.fischo...@linaro.org> wrote:

> Additional details.
>
> __atomic_load_n() is a GCC intrinsic, however __lockfree_load_16() is
> defined in platform/linux-generic/arch/aarch64/odp_atomic.h:
>
> static inline __int128 __lockfree_load_16(__int128 *var, int mo)
> {
>         __int128 old = *var; /* Possibly torn read */
>
>         /* Do CAS to ensure atomicity
>          * Either CAS succeeds (writing back the same value)
>          * Or CAS fails and returns the old value (atomic read)
>          */
>         (void)__lockfree_compare_exchange_16(var, &old, old, false, mo,
> mo);
>         return old;
> }
>
> As is __lockfree_compare_exchange_16():
>
> static inline bool
> __lockfree_compare_exchange_16(register __int128 *var, __int128 *exp,
>        register __int128 neu, bool weak, int mo_success,
>        int mo_failure)
> {
> (void)weak; /* Always do strong CAS or we can't perform atomic read */
> /* Ignore memory ordering for failure, memory order for
> * success must be stronger or equal. */
> (void)mo_failure;
> register __int128 old;
> register __int128 expected;
> int ll_mo = LL_MO(mo_success);
> int sc_mo = SC_MO(mo_success);
>
> expected = *exp;
> __asm__ volatile("" ::: "memory");
> do {
> /* Atomicity of LLD is not guaranteed */
> old = lld(var, ll_mo);
> /* Must write back neu or old to verify atomicity of LLD */
> } while (odp_unlikely(scd(var, old == expected ? neu : old, sc_mo)));
> *exp = old; /* Always update, atomically read value */
> return old == expected;
> }
>
> In turn lld() and scd() are defined in platform/linux-generic/arch/
> aarch64/odp_llsc.h:
>
> static inline __int128 lld(__int128 *var, int mm)
> {
> union i128 old;
>
> if (mm == __ATOMIC_ACQUIRE)
> __asm__ volatile("ldaxp %0, %1, [%2]"
> : "=&r" (old.i64[0]), "=&r" (old.i64[1])
> : "r" (var)
> : "memory");
> else if (mm == __ATOMIC_RELAXED)
> __asm__ volatile("ldxp %0, %1, [%2]"
> : "=&r" (old.i64[0]), "=&r" (old.i64[1])
> : "r" (var)
> : );
> else
> ODP_ABORT();
> return old.i128;
> }
>
> /* Return 0 on success, 1 on failure */
> static inline uint32_t scd(__int128 *var, __int128 neu, int mm)
> {
> uint32_t ret;
>
> if (mm == __ATOMIC_RELEASE)
> __asm__ volatile("stlxp %w0, %1, %2, [%3]"
> : "=&r" (ret)
> : "r" (((union i128)neu).i64[0]),
>    "r" (((union i128)neu).i64[1]),
>    "r" (var)
> : "memory");
> else if (mm == __ATOMIC_RELAXED)
> __asm__ volatile("stxp %w0, %1, %2, [%3]"
> : "=&r" (ret)
> : "r" (((union i128)neu).i64[0]),
>    "r" (((union i128)neu).i64[1]),
>    "r" (var)
> : );
> else
> ODP_ABORT();
> return ret;
> }
>
> So these boil down to a sequence of __asm__() instructions. If these are
> hanging it suggests a compiler issue. Does this occur with a newer GCC
> level?
>
> On Mon, Mar 12, 2018 at 5:21 PM, Maxim Uvarov <maxim.uva...@linaro.org>
> wrote:
>
>> gcc -v
>> Using built-in specs.
>> COLLECT_GCC=gcc
>> COLLECT_LTO_WRAPPER=/usr/lib/gcc/aarch64-linux-gnu/4.8/lto-wrapper
>> Target: aarch64-linux-gnu
>> Configured with: ../src/configure -v --with-pkgversion='Ubuntu/Linaro
>> 4.8.5-4ubuntu2' --with-bugurl=file:///usr/share/doc/gcc-4.8/README.Bugs
>> --enable-languages=c,c++,java,go,d,fortran,objc,obj-c++ --prefix=/usr
>> --program-suffix=-4.8 --enable-shared --enable-linker-build-id
>> --libexecdir=/usr/lib --without-included-gettext --enable-threads=posix
>> --with-gxx-include-dir=/usr/include/c++/4.8 --libdir=/usr/lib
>> --enable-nls --with-sysroot=/ --enable-clocale=gnu --enable-libstdcxx-debug
>> --enable-libstdcxx-time=yes --enable-gnu-unique-object --disable-libmudflap
>> --disable-libsanitizer --disable-libquadmath --enable-plugin
>> --with-system-zlib --disable-browser-plugin --enable-java-awt=gtk
>> --enable-gtk-cairo --with-java-home=/usr/lib/jvm/java-1.5.0-gcj-4.8-arm64/jre
>> --enable-java-home --with-jvm-root-dir=/usr/lib/jvm/java-1.5.0-gcj-4.8-arm64
>> --with-jvm-jar-dir=/usr/lib/jvm-exports/java-1.5.0-gcj-4.8-arm64
>> --with-arch-directory=arm64 --with-ecj-jar=/usr/share/java/eclipse-ecj.jar
>> --enable-multiarch --disable-werror --enable-checking=release
>> --build=aarch64-linux-gnu --host=aarch64-linux-gnu
>> --target=aarch64-linux-gnu
>> Thread model: posix
>> gcc version 4.8.5 (Ubuntu/Linaro 4.8.5-4ubuntu2)
>>
>> On 13 March 2018 at 00:20, Maxim Uvarov <maxim.uva...@linaro.org> wrote:
>>
>>> this fixes a problem. But it's too late today to do clean patch. (fun
>>> debug if gdb does not work under docker).
>>> So it might be something thunder-x specific.
>>>
>>>
>>> --- a/platform/linux-generic/include/odp_bitset.h
>>> +++ b/platform/linux-generic/include/odp_bitset.h
>>> @@ -27,7 +27,7 @@
>>>  /* Find a suitable data type that supports lock-free atomic operations
>>> */
>>>  #if defined(__aarch64__) && defined(__SIZEOF_INT128__) && \
>>>         __SIZEOF_INT128__ == 16
>>> -#define LOCKFREE16
>>> +// #define LOCKFREE16
>>>  typedef __int128 bitset_t;
>>>  #define ATOM_BITSET_SIZE (CHAR_BIT * __SIZEOF_INT128__)
>>>
>>>
>>> On 13 March 2018 at 00:14, Maxim Uvarov <maxim.uva...@linaro.org> wrote:
>>>
>>>> platform/linux-generic/odp_schedule_scalable.c
>>>>
>>>> static odp_schedule_group_t schedule_group_create(const char *name,
>>>>                                                   const odp_thrmask_t
>>>> *mask)
>>>> {
>>>>
>>>> ......
>>>>
>>>>         printf("%s()%d\n", __func__, __LINE__); <-- prints
>>>>         /* Validate inputs */
>>>>         if (mask == NULL)
>>>>                 ODP_ABORT("mask is NULL\n");
>>>>
>>>>         printf("%s()%d\n", __func__, __LINE__); <- prints
>>>>         odp_spinlock_lock(&sched_grp_lock);
>>>>
>>>>         printf("%s()%d\n", __func__, __LINE__);
>>>>         /* Allocate a scheduler group */
>>>>         free = atom_bitset_load(&sg_free, __ATOMIC_RELAXED);
>>>>         printf("%s()%d\n", __func__, __LINE__); <- not printed, hung
>>>> forever before this
>>>>
>>>> Maxim.
>>>>
>>>> On 13 March 2018 at 00:08, Bill Fischofer <bill.fischo...@linaro.org>
>>>> wrote:
>>>>
>>>>> That's interesting since it was developed by Arm and presumably tested
>>>>> by them on Arm systems.
>>>>>
>>>>> On Mon, Mar 12, 2018 at 4:58 PM, Maxim Uvarov <maxim.uva...@linaro.org>
>>>>> wrote:
>>>>> > I see that odp_init_global() fails on thunder-x with salable
>>>>> scheduler.
>>>>> >
>>>>> > On 12 March 2018 at 23:57, Bill Fischofer <bill.fischo...@linaro.org>
>>>>> wrote:
>>>>> >>
>>>>> >> Sure. Dmitry says it's a clang related failure. Is that what you're
>>>>> >> seeing? If it's related to a specific level of clang we may be able
>>>>> to
>>>>> >> simply document it as such.
>>>>> >>
>>>>> >> On Mon, Mar 12, 2018 at 4:25 PM, Maxim Uvarov <
>>>>> maxim.uva...@linaro.org>
>>>>> >> wrote:
>>>>> >> > Bill,
>>>>> >> >
>>>>> >> > I reproduced fail on thunder-x. So I would like to take a look at
>>>>> it one
>>>>> >> > more day before doing rc2.
>>>>> >> >
>>>>> >> > Maxim.
>>>>> >
>>>>> >
>>>>>
>>>>
>>>>
>>>
>>
>

Reply via email to