https://gcc.gnu.org/bugzilla/show_bug.cgi?id=49263

--- Comment #32 from Alexander Klepikov <klepikov.alex+bugs at gmail dot com> 
---
I'm not sure whether I should write here or open new discussion, but these
topics are related very closely. I've been writing a patch to eliminate the
generation of dynamic shift instructions 'shad' and 'shld' completely at least
for SH4 CPU. And then I get a surprising result - in all the examples I gave
earlier, library call converted to 'tst' instructions!

Here is the patch itself (I also will attach a file):

--- ../gcc-12.3.0.orig/gcc/config/sh/sh.cc      2023-05-08 15:14:39.681161695
+0300
+++ ./gcc/config/sh/sh.cc       2023-05-23 12:23:25.964375731 +0300
@@ -3061,7 +3061,7 @@
   else
     insn_count = ashl_lshr_seq[shift_amount_i].insn_count;

-  return TARGET_DYNSHIFT && (insn_count > 1 + SH_DYNAMIC_SHIFT_COST);
+  return TARGET_DYNSHIFT && (insn_count > 1 + SH_DYNAMIC_SHIFT_COST) && !
disable_dynshift;
 }

 /* Assuming we have a value that has been sign-extended by at least one bit,
@@ -3812,8 +3812,10 @@
   rtx wrk;
   char func[18];
   int value;
+  int long_shift  = disable_dynshift ? 30 : 19;
+  int short_shift = disable_dynshift ? 15 : 5;

-  if (TARGET_DYNSHIFT)
+  if (TARGET_DYNSHIFT && ! disable_dynshift)
     {
       if (!CONST_INT_P (operands[2]))
        {
@@ -3851,7 +3853,7 @@
       emit_insn (gen_ashrsi2_31 (operands[0], operands[1]));
       return true;
     }
-  else if (value >= 16 && value <= 19)
+  else if (value >= 16 && value <= long_shift)
     {
       wrk = gen_reg_rtx (SImode);
       emit_insn (gen_ashrsi2_16 (wrk, operands[1]));
@@ -3862,7 +3864,7 @@
       return true;
     }
   /* Expand a short sequence inline, longer call a magic routine.  */
-  else if (value <= 5)
+  else if (value <= short_shift)
     {
       wrk = gen_reg_rtx (SImode);
       emit_move_insn (wrk, operands[1]);
diff -ur ../gcc-12.3.0.orig/gcc/config/sh/sh.opt ./gcc/config/sh/sh.opt
--- ../gcc-12.3.0.orig/gcc/config/sh/sh.opt     2023-05-08 15:14:39.689161810
+0300
+++ ./gcc/config/sh/sh.opt      2023-05-23 10:45:36.814371159 +0300
@@ -301,3 +301,7 @@
 mlra
 Target Var(sh_lra_flag) Init(0) Save
 Use LRA instead of reload (transitional).
+
+mdisable-dynshift
+Target Var(disable_dynshift) Init(0)
+Disable dynamic shift 'shad' and 'shld' instructions

And here are my tests:
$ cat f.c
#define ADDR 0xFFFF0000
#define P ((unsigned char *)ADDR)
#define FLAG 0x40
#define S 7

unsigned char f(char v){
    return (v & FLAG) == FLAG;
}

unsigned char f_(unsigned char v){
    return (v & FLAG) == FLAG;
}

unsigned char f1(void){
    return (*P & FLAG) == FLAG;
}

int f_signed_rshift(int v){
    return v >> S;
}

int f_signed_lshift(int v){
    return v << S;
}

unsigned int f_unsigned_rshift(unsigned int v){
    return v >> S;
}

unsigned int f_unsigned_lshift(unsigned int v){
    return v << S;
}

$ /usr/local/sh-toolchain/bin/sh-elf-gcc -c -mrenesas -m2e -mb -O
-fno-toplevel-reorder -mdisable-dynshift -S f.c
$ cat f.s
        .file   "f.c"
        .text
        .text
        .align 1
        .global _f
        .type   _f, @function
_f:
        mov     r4,r0
        tst     #64,r0
        mov     #-1,r0
        rts
        negc    r0,r0
        .size   _f, .-_f
        .align 1
        .global _f_
        .type   _f_, @function
_f_:
        mov     r4,r0
        tst     #64,r0
        mov     #-1,r0
        rts
        negc    r0,r0
        .size   _f_, .-_f_
        .align 1
        .global _f1
        .type   _f1, @function
_f1:
        mov.l   .L4,r1
        mov.b   @r1,r0
        tst     #64,r0
        mov     #-1,r0
        rts
        negc    r0,r0
.L5:
        .align 2
.L4:
        .long   -65536
        .size   _f1, .-_f1
        .align 1
        .global _f_signed_rshift
        .type   _f_signed_rshift, @function
_f_signed_rshift:
        mov     r4,r0
        shar    r0
        shar    r0
        shar    r0
        shar    r0
        shar    r0
        shar    r0
        rts
        shar    r0
        .size   _f_signed_rshift, .-_f_signed_rshift
        .align 1
        .global _f_signed_lshift
        .type   _f_signed_lshift, @function
_f_signed_lshift:
        mov     r4,r0
        shll2   r0
        shll2   r0
        add     r0,r0
        rts
        shll2   r0
        .size   _f_signed_lshift, .-_f_signed_lshift
        .align 1
        .global _f_unsigned_rshift
        .type   _f_unsigned_rshift, @function
_f_unsigned_rshift:
        mov     r4,r0
        shlr2   r0
        shlr2   r0
        shlr    r0
        rts
        shlr2   r0
        .size   _f_unsigned_rshift, .-_f_unsigned_rshift
        .align 1
        .global _f_unsigned_lshift
        .type   _f_unsigned_lshift, @function
_f_unsigned_lshift:
        mov     r4,r0
        shll2   r0
        shll2   r0
        add     r0,r0
        rts
        shll2   r0
        .size   _f_unsigned_lshift, .-_f_unsigned_lshift
        .ident  "GCC: (GNU) 12.3.0"

I also compiled my project with '-m2e' and new '-mdisable-dynshift' options and
tested it in SH-2E mone on Renesas's emulator that comes with High-performance
Embedded Workshop and all unit tests run as expected.

If this patch is useful let's include it in GCC.

Reply via email to