Ack, I am OoO during 5/1-5/4, I'll start looking at those patches in the GCC 13 queue including this one :)
On Fri, Apr 29, 2022 at 6:12 AM Philipp Tomsich <philipp.toms...@vrull.eu> wrote: > > Kito, > > Did you have a chance to take a look at this one? > > I assume this will have to wait until we reopen for 13... > OK for 13? Also: OK for a backport (once a branch for that exists)? > > Philipp. > > > On Sun, 24 Apr 2022 at 01:44, Philipp Tomsich <philipp.toms...@vrull.eu> > wrote: > > > > The Zbb support has introduced ctz and clz to the backend, but some > > transformations in GCC need to know what the value of c[lt]z at zero > > is. This affects how the optab is generated and may suppress use of > > CLZ/CTZ in tree passes. > > > > Among other things, this is needed for the transformation of > > table-based ctz-implementations, such as in deepsjeng, to work > > (see https://gcc.gnu.org/bugzilla/show_bug.cgi?id=90838). > > > > Prior to this change, the test case from PR90838 would compile to > > on RISC-V targets with Zbb: > > myctz: > > lui a4,%hi(.LC0) > > ld a4,%lo(.LC0)(a4) > > neg a5,a0 > > and a5,a5,a0 > > mul a5,a5,a4 > > lui a4,%hi(.LANCHOR0) > > addi a4,a4,%lo(.LANCHOR0) > > srli a5,a5,58 > > sh2add a5,a5,a4 > > lw a0,0(a5) > > ret > > > > After this change, we get: > > myctz: > > ctz a0,a0 > > andi a0,a0,63 > > ret > > > > Testing this with deepsjeng_r (from SPEC 2017) against QEMU, this > > shows a clear reduction in dynamic instruction count: > > - before 1961888067076 > > - after 1907928279874 (2.75% reduction) > > > > gcc/ChangeLog: > > > > * config/riscv/riscv.h (CLZ_DEFINED_VALUE_AT_ZERO): Implement. > > (CTZ_DEFINED_VALUE_AT_ZERO): Same. > > > > gcc/testsuite/ChangeLog: > > > > * gcc.dg/pr90838.c: Add additional flags (dg-additional-options) > > when compiling for riscv64. > > * gcc.target/riscv/zbb-ctz.c: New test. > > > > Signed-off-by: Philipp Tomsich <philipp.toms...@vrull.eu> > > Signed-off-by: Manolis Tsamis <manolis.tsa...@vrull.eu> > > Co-developed-by: Manolis Tsamis <manolis.tsa...@vrull.eu> > > > > --- > > gcc/config/riscv/riscv.h | 5 ++ > > gcc/testsuite/gcc.dg/pr90838.c | 2 + > > gcc/testsuite/gcc.target/riscv/zbb-ctz-32.c | 65 ++++++++++++++++++++ > > gcc/testsuite/gcc.target/riscv/zbb-ctz.c | 66 +++++++++++++++++++++ > > 4 files changed, 138 insertions(+) > > create mode 100644 gcc/testsuite/gcc.target/riscv/zbb-ctz-32.c > > create mode 100644 gcc/testsuite/gcc.target/riscv/zbb-ctz.c > > > > diff --git a/gcc/config/riscv/riscv.h b/gcc/config/riscv/riscv.h > > index 4210e252255..95f72e2fd3f 100644 > > --- a/gcc/config/riscv/riscv.h > > +++ b/gcc/config/riscv/riscv.h > > @@ -1019,4 +1019,9 @@ extern void riscv_remove_unneeded_save_restore_calls > > (void); > > > > #define HARD_REGNO_RENAME_OK(FROM, TO) riscv_hard_regno_rename_ok (FROM, > > TO) > > > > +#define CLZ_DEFINED_VALUE_AT_ZERO(MODE, VALUE) \ > > + ((VALUE) = GET_MODE_UNIT_BITSIZE (MODE), 2) > > +#define CTZ_DEFINED_VALUE_AT_ZERO(MODE, VALUE) \ > > + ((VALUE) = GET_MODE_UNIT_BITSIZE (MODE), 2) > > + > > #endif /* ! GCC_RISCV_H */ > > diff --git a/gcc/testsuite/gcc.dg/pr90838.c b/gcc/testsuite/gcc.dg/pr90838.c > > index 41c5dab9a5c..162bd6f51d0 100644 > > --- a/gcc/testsuite/gcc.dg/pr90838.c > > +++ b/gcc/testsuite/gcc.dg/pr90838.c > > @@ -1,5 +1,6 @@ > > /* { dg-do compile } */ > > /* { dg-options "-O2 -fdump-tree-forwprop2-details" } */ > > +/* { dg-additional-options "-march=rv64gc_zbb" { target riscv64*-*-* } } */ > > > > int ctz1 (unsigned x) > > { > > @@ -57,3 +58,4 @@ int ctz4 (unsigned long x) > > } > > > > /* { dg-final { scan-tree-dump-times {= \.CTZ} 4 "forwprop2" { target > > aarch64*-*-* } } } */ > > +/* { dg-final { scan-tree-dump-times {= \.CTZ} 4 "forwprop2" { target > > riscv64*-*-* } } } */ > > diff --git a/gcc/testsuite/gcc.target/riscv/zbb-ctz-32.c > > b/gcc/testsuite/gcc.target/riscv/zbb-ctz-32.c > > new file mode 100644 > > index 00000000000..b903517197a > > --- /dev/null > > +++ b/gcc/testsuite/gcc.target/riscv/zbb-ctz-32.c > > @@ -0,0 +1,65 @@ > > +/* { dg-do compile } */ > > +/* { dg-options "-march=rv32gc_zbb -mabi=ilp32" } */ > > +/* { dg-skip-if "" { *-*-* } { "-O0" "-Og" } } */ > > + > > +int ctz1 (unsigned x) > > +{ > > + static const char table[32] = > > + { > > + 0, 1, 28, 2, 29, 14, 24, 3, 30, 22, 20, 15, 25, 17, 4, 8, > > + 31, 27, 13, 23, 21, 19, 16, 7, 26, 12, 18, 6, 11, 5, 10, 9 > > + }; > > + > > + return table[((unsigned)((x & -x) * 0x077CB531U)) >> 27]; > > +} > > + > > +int ctz2 (unsigned x) > > +{ > > +#define u 0 > > + static short table[64] = > > + { > > + 32, 0, 1,12, 2, 6, u,13, 3, u, 7, u, u, u, u,14, > > + 10, 4, u, u, 8, u, u,25, u, u, u, u, u,21,27,15, > > + 31,11, 5, u, u, u, u, u, 9, u, u,24, u, u,20,26, > > + 30, u, u, u, u,23, u,19,29, u,22,18,28,17,16, u > > + }; > > + > > + x = (x & -x) * 0x0450FBAF; > > + return table[x >> 26]; > > +} > > + > > +int ctz3 (unsigned x) > > +{ > > + static int table[32] = > > + { > > + 0, 1, 2,24, 3,19, 6,25, 22, 4,20,10,16, 7,12,26, > > + 31,23,18, 5,21, 9,15,11,30,17, 8,14,29,13,28,27 > > + }; > > + > > + if (x == 0) return 32; > > + x = (x & -x) * 0x04D7651F; > > + return table[x >> 27]; > > +} > > + > > +static const unsigned long long magic = 0x03f08c5392f756cdULL; > > + > > +static const char table[64] = { > > + 0, 1, 12, 2, 13, 22, 17, 3, > > + 14, 33, 23, 36, 18, 58, 28, 4, > > + 62, 15, 34, 26, 24, 48, 50, 37, > > + 19, 55, 59, 52, 29, 44, 39, 5, > > + 63, 11, 21, 16, 32, 35, 57, 27, > > + 61, 25, 47, 49, 54, 51, 43, 38, > > + 10, 20, 31, 56, 60, 46, 53, 42, > > + 9, 30, 45, 41, 8, 40, 7, 6, > > +}; > > + > > +int ctz4 (unsigned long x) > > +{ > > + unsigned long lsb = x & -x; > > + return table[(lsb * magic) >> 58]; > > +} > > + > > +/* { dg-final { scan-assembler-times "ctz\t" 3 } } */ > > +/* { dg-final { scan-assembler-times "andi\t" 1 } } */ > > +/* { dg-final { scan-assembler-times "mul\t" 1 } } */ > > diff --git a/gcc/testsuite/gcc.target/riscv/zbb-ctz.c > > b/gcc/testsuite/gcc.target/riscv/zbb-ctz.c > > new file mode 100644 > > index 00000000000..f9fbcb38dee > > --- /dev/null > > +++ b/gcc/testsuite/gcc.target/riscv/zbb-ctz.c > > @@ -0,0 +1,66 @@ > > +/* { dg-do compile } */ > > +/* { dg-options "-march=rv64gc_zbb -mabi=lp64" } */ > > +/* { dg-skip-if "" { *-*-* } { "-O0" "-Og" } } */ > > + > > +int ctz1 (unsigned x) > > +{ > > + static const char table[32] = > > + { > > + 0, 1, 28, 2, 29, 14, 24, 3, 30, 22, 20, 15, 25, 17, 4, 8, > > + 31, 27, 13, 23, 21, 19, 16, 7, 26, 12, 18, 6, 11, 5, 10, 9 > > + }; > > + > > + return table[((unsigned)((x & -x) * 0x077CB531U)) >> 27]; > > +} > > + > > +int ctz2 (unsigned x) > > +{ > > +#define u 0 > > + static short table[64] = > > + { > > + 32, 0, 1,12, 2, 6, u,13, 3, u, 7, u, u, u, u,14, > > + 10, 4, u, u, 8, u, u,25, u, u, u, u, u,21,27,15, > > + 31,11, 5, u, u, u, u, u, 9, u, u,24, u, u,20,26, > > + 30, u, u, u, u,23, u,19,29, u,22,18,28,17,16, u > > + }; > > + > > + x = (x & -x) * 0x0450FBAF; > > + return table[x >> 26]; > > +} > > + > > +int ctz3 (unsigned x) > > +{ > > + static int table[32] = > > + { > > + 0, 1, 2,24, 3,19, 6,25, 22, 4,20,10,16, 7,12,26, > > + 31,23,18, 5,21, 9,15,11,30,17, 8,14,29,13,28,27 > > + }; > > + > > + if (x == 0) return 32; > > + x = (x & -x) * 0x04D7651F; > > + return table[x >> 27]; > > +} > > + > > +static const unsigned long long magic = 0x03f08c5392f756cdULL; > > + > > +static const char table[64] = { > > + 0, 1, 12, 2, 13, 22, 17, 3, > > + 14, 33, 23, 36, 18, 58, 28, 4, > > + 62, 15, 34, 26, 24, 48, 50, 37, > > + 19, 55, 59, 52, 29, 44, 39, 5, > > + 63, 11, 21, 16, 32, 35, 57, 27, > > + 61, 25, 47, 49, 54, 51, 43, 38, > > + 10, 20, 31, 56, 60, 46, 53, 42, > > + 9, 30, 45, 41, 8, 40, 7, 6, > > +}; > > + > > +int ctz4 (unsigned long x) > > +{ > > + unsigned long lsb = x & -x; > > + return table[(lsb * magic) >> 58]; > > +} > > + > > +/* { dg-final { scan-assembler-times "ctzw\t" 3 } } */ > > +/* { dg-final { scan-assembler-times "ctz\t" 1 } } */ > > +/* { dg-final { scan-assembler-times "andi\t" 2 } } */ > > +/* { dg-final { scan-assembler-not "mul" } } */ > > -- > > 2.34.1 > >