Re: [PATCH] RISC-V: Optimise adding a (larger than simm12) constant

2022-11-21 Thread Philipp Tomsich
On Mon, 21 Nov 2022 at 04:11, Kito Cheng  wrote:
>
> > @@ -464,6 +464,60 @@
> >[(set_attr "type" "arith")
> > (set_attr "mode" "DI")])
> >
> > +(define_expand "add3"
> > +  [(set (match_operand:GPR   0 "register_operand"  "=r,r")
> > +   (plus:GPR (match_operand:GPR 1 "register_operand"  " r,r")
> > + (match_operand:GPR 2 "addi_operand"  " r,I")))]
>
> Is it possible to just define a predicate that accepts
> register_operand and CONST_INT_P,
> and then handle all cases in add3 pattern?


Great suggestion.

>
> My point is put all check in one place:
>
> e.g.
> check TARGET_ZBA && const_arith_shifted123_operand (operands[2],
> mode) in add3
> rather than check TARGET_ZBA in addi_operand and use sh[123]add in
> add3 without check.
>
> and that also means we need to sync addi_opearnad and add3
> once we have extension XX could improve addi codegen.
>
>
> > +  ""
> > +{
> > +  if (arith_operand (operands[2], mode))
> > +emit_insn (gen_riscv_add3 (operands[0], operands[1], 
> > operands[2]));
> > +  else if (const_arith_2simm12_operand (operands[2], mode))
>
> const_arith_2simm12_operand only used once, could you inline the condition 
> here?


If we handle all cases in a single pattern, we'll punt this to riscv.cc anyway.
So let's see how the code looks once we have a single predicate and do
the inlining there...

>
> > +{
> > +  /* Split into two immediates that add up to the desired value:
> > +   * e.g., break up "a + 2445" into:
> > +   * addi  a0,a0,2047
> > +   *addi   a0,a0,398
> > +   */
> > +
> > +  HOST_WIDE_INT val = INTVAL (operands[2]);
> > +  HOST_WIDE_INT saturated = HOST_WIDE_INT_M1U << (IMM_BITS - 1);
> > +
> > +  if (val >= 0)
> > +saturated = ~saturated;
> > +
> > +  val -= saturated;
> > +
> > +  rtx tmp = gen_reg_rtx (mode);
> > +  emit_insn (gen_riscv_add3 (tmp, operands[1], GEN_INT 
> > (saturated)));
> > +  emit_insn (gen_riscv_add3 (operands[0], tmp, GEN_INT (val)));
> > +}
> > +  else if (mode == word_mode
> > +  && const_arith_shifted123_operand (operands[2], mode))
>
> Same for const_arith_shifted123_operand.
>
> > +{
> > +  /* Use a sh[123]add and an immediate shifted down by 1, 2, or 3. */
> > +
> > +  HOST_WIDE_INT val = INTVAL (operands[2]);
> > +  int shamt = ctz_hwi (val);
> > +
> > +  if (shamt > 3)
> > +   shamt = 3;
> > +
> > +  rtx tmp = gen_reg_rtx (mode);
> > +  emit_insn (gen_rtx_SET (tmp, GEN_INT (val >> shamt)));
> > +
> > +  /* We don't use gen_riscv_shNadd here, as it will only exist for
> > +.  Instead we build up its canonical form directly.  */
> > +  rtx shifted_imm = gen_rtx_ASHIFT (mode, tmp, GEN_INT (shamt));
> > +  rtx shNadd = gen_rtx_PLUS (mode, shifted_imm, operands[1]);
> > +  emit_insn (gen_rtx_SET (operands[0], shNadd));
> > +}
> > +  else
> > +FAIL;
>
> Seems add3 FAIL will cause problems, we need either add something like:
>
>   operands[2] = force_reg (mode, operands[2]);
>   emit_insn (gen_rtx_SET (operands[0],
>  gen_rtx_PLUS (mode,
>operands[1], operands[2])));
>
> Or just gcc_unreachable () if we keep using addi_operand to guard this 
> pattern.


This is a case for "gcc_unreachable ();".
The change will be in v2.

Thanks,
Philipp.


Re: [PATCH] RISC-V: Optimise adding a (larger than simm12) constant

2022-11-20 Thread Kito Cheng via Gcc-patches
> @@ -464,6 +464,60 @@
>[(set_attr "type" "arith")
> (set_attr "mode" "DI")])
>
> +(define_expand "add3"
> +  [(set (match_operand:GPR   0 "register_operand"  "=r,r")
> +   (plus:GPR (match_operand:GPR 1 "register_operand"  " r,r")
> + (match_operand:GPR 2 "addi_operand"  " r,I")))]

Is it possible to just define a predicate that accepts
register_operand and CONST_INT_P,
and then handle all cases in add3 pattern?

My point is put all check in one place:

e.g.
check TARGET_ZBA && const_arith_shifted123_operand (operands[2],
mode) in add3
rather than check TARGET_ZBA in addi_operand and use sh[123]add in
add3 without check.

and that also means we need to sync addi_opearnad and add3
once we have extension XX could improve addi codegen.


> +  ""
> +{
> +  if (arith_operand (operands[2], mode))
> +emit_insn (gen_riscv_add3 (operands[0], operands[1], operands[2]));
> +  else if (const_arith_2simm12_operand (operands[2], mode))

const_arith_2simm12_operand only used once, could you inline the condition here?

> +{
> +  /* Split into two immediates that add up to the desired value:
> +   * e.g., break up "a + 2445" into:
> +   * addi  a0,a0,2047
> +   *addi   a0,a0,398
> +   */
> +
> +  HOST_WIDE_INT val = INTVAL (operands[2]);
> +  HOST_WIDE_INT saturated = HOST_WIDE_INT_M1U << (IMM_BITS - 1);
> +
> +  if (val >= 0)
> +saturated = ~saturated;
> +
> +  val -= saturated;
> +
> +  rtx tmp = gen_reg_rtx (mode);
> +  emit_insn (gen_riscv_add3 (tmp, operands[1], GEN_INT 
> (saturated)));
> +  emit_insn (gen_riscv_add3 (operands[0], tmp, GEN_INT (val)));
> +}
> +  else if (mode == word_mode
> +  && const_arith_shifted123_operand (operands[2], mode))

Same for const_arith_shifted123_operand.

> +{
> +  /* Use a sh[123]add and an immediate shifted down by 1, 2, or 3. */
> +
> +  HOST_WIDE_INT val = INTVAL (operands[2]);
> +  int shamt = ctz_hwi (val);
> +
> +  if (shamt > 3)
> +   shamt = 3;
> +
> +  rtx tmp = gen_reg_rtx (mode);
> +  emit_insn (gen_rtx_SET (tmp, GEN_INT (val >> shamt)));
> +
> +  /* We don't use gen_riscv_shNadd here, as it will only exist for
> +.  Instead we build up its canonical form directly.  */
> +  rtx shifted_imm = gen_rtx_ASHIFT (mode, tmp, GEN_INT (shamt));
> +  rtx shNadd = gen_rtx_PLUS (mode, shifted_imm, operands[1]);
> +  emit_insn (gen_rtx_SET (operands[0], shNadd));
> +}
> +  else
> +FAIL;

Seems add3 FAIL will cause problems, we need either add something like:

  operands[2] = force_reg (mode, operands[2]);
  emit_insn (gen_rtx_SET (operands[0],
 gen_rtx_PLUS (mode,
   operands[1], operands[2])));

Or just gcc_unreachable () if we keep using addi_operand to guard this pattern.


Re: [PATCH] RISC-V: Optimise adding a (larger than simm12) constant

2022-11-18 Thread Jeff Law



On 11/18/22 14:26, Philipp Tomsich wrote:

On Fri, 18 Nov 2022 at 22:13, Jeff Law  wrote:


On 11/9/22 16:07, Philipp Tomsich wrote:

Handling the register-const_int addition has very quickly escalated to
creating a full sign-extended 32bit constant and performing a
register-register for RISC-V in GCC so far, resulting in sequences like
(for the case of "a + 2048"):
   li  a5,4096
   addia5,a5,-2048
   add a0,a0,a5

By adding an expansion for add3, we can emit optimised RTL that
matches the capabilities of RISC-V better by adding support for the
following, previously unoptimised cases:
- addi + addi
   addia0,a0,2047
   addia0,a0,1
- li + sh[123]add (if Zba is enabled)
   li  a5,960
   sh3add  a0,a5,a0

With this commit, we also fix up riscv_adjust_libcall_cfi_prologue()
and riscv_adjust_libcall_cfi_epilogue() to not use gen_add3_insn, as
the expander will otherwise wrap the resulting set-expression in an
insn (causing an ICE at dwarf2-time) when invoked with -msave-restore.

This closes the gap to LLVM, which has already been emitting these
optimised sequences.

Note that this benefits is perlbench (in SPEC CPU 2017), which needs
to add the constant 3840.

gcc/ChangeLog:

   * config/riscv/bitmanip.md (*shNadd): Rename.
   (riscv_shNadd): Expose as gen_riscv_shNadd{di/si}.
   * config/riscv/predicates.md (const_arith_shifted123_operand):
   New predicate (for constants that are a simm12, shifted by
   1, 2 or 3).
   (const_arith_2simm12_operand): New predicate (that can be
   expressed by adding 2 simm12 together).
   (addi_operand): New predicate (an immedaite operand suitable
   for the new add3 expansion).
   * config/riscv/riscv.cc (riscv_adjust_libcall_cfi_prologue):
   Don't use gen_add3_insn, where a RTX instead of an INSN is
   required (otherwise this will break as soon as we have a
   define_expand for add3).
   (riscv_adjust_libcall_cfi_epilogue): Same.
   * config/riscv/riscv.md (addsi3): Rename.
   (riscv_addsi3): New name for addsi3.
   (adddi3): Rename.
   (riscv_adddi3): New name for adddi3.
   (add3): New expander that handles the basic and fancy
   (such as li+sh[123]add, addi+addi, ...) cases for adding
   register-register and register-const_int.

gcc/testsuite/ChangeLog:

   * gcc.target/riscv/addi.c: New test.
   * gcc.target/riscv/zba-shNadd-06.c: New test.

Signed-off-by: Philipp Tomsich 
---

   gcc/config/riscv/bitmanip.md  |  2 +-
   gcc/config/riscv/predicates.md| 28 +
   gcc/config/riscv/riscv.cc | 10 ++--
   gcc/config/riscv/riscv.md | 58 ++-
   gcc/testsuite/gcc.target/riscv/addi.c | 39 +
   .../gcc.target/riscv/zba-shNadd-06.c  | 11 
   6 files changed, 141 insertions(+), 7 deletions(-)
   create mode 100644 gcc/testsuite/gcc.target/riscv/addi.c
   create mode 100644 gcc/testsuite/gcc.target/riscv/zba-shNadd-06.c



diff --git a/gcc/config/riscv/riscv.md b/gcc/config/riscv/riscv.md
index 171a0cdced6..289ff7470c6 100644
--- a/gcc/config/riscv/riscv.md
+++ b/gcc/config/riscv/riscv.md
@@ -464,6 +464,60 @@
 [(set_attr "type" "arith")
  (set_attr "mode" "DI")])

+(define_expand "add3"
+  [(set (match_operand:GPR   0 "register_operand"  "=r,r")
+ (plus:GPR (match_operand:GPR 1 "register_operand"  " r,r")
+   (match_operand:GPR 2 "addi_operand"  " r,I")))]
+  ""
+{
+  if (arith_operand (operands[2], mode))
+emit_insn (gen_riscv_add3 (operands[0], operands[1], operands[2]));
+  else if (const_arith_2simm12_operand (operands[2], mode))
+{
+  /* Split into two immediates that add up to the desired value:
+   * e.g., break up "a + 2445" into:
+   * addia0,a0,2047
+   *  addi   a0,a0,398
+   */

Nit.  GNU comment style please.



+
+  HOST_WIDE_INT val = INTVAL (operands[2]);
+  HOST_WIDE_INT saturated = HOST_WIDE_INT_M1U << (IMM_BITS - 1);
+
+  if (val >= 0)
+  saturated = ~saturated;
+
+  val -= saturated;
+
+  rtx tmp = gen_reg_rtx (mode);

Can't add3 be generated by LRA?  If so, don't you have to guard
against going into this path as we shouldn't be creating new pseudos at
that point (I know LRA can create some internally, but I don't think it
handles new ones showing up due to target expanders).


Similarly for the shifted_123 case immediately following.


If we do indeed have an issue here, I'm not sure how best to resolve.
If the output operand does not overlap with the inputs, then we're
golden and can just re-use it to form the constant.  If not,  then it's
a bit tougher.  I'm not keen to add a test of no_new_pseudos to the
operand predicate, but I don't see a better option yet.

 From a cursory glance, LRA does not try to go through gen_add3_insn,
but rather forms PLUS rtx.  This 

Re: [PATCH] RISC-V: Optimise adding a (larger than simm12) constant

2022-11-18 Thread Philipp Tomsich
On Fri, 18 Nov 2022 at 22:13, Jeff Law  wrote:
>
>
> On 11/9/22 16:07, Philipp Tomsich wrote:
> > Handling the register-const_int addition has very quickly escalated to
> > creating a full sign-extended 32bit constant and performing a
> > register-register for RISC-V in GCC so far, resulting in sequences like
> > (for the case of "a + 2048"):
> >   li  a5,4096
> >   addia5,a5,-2048
> >   add a0,a0,a5
> >
> > By adding an expansion for add3, we can emit optimised RTL that
> > matches the capabilities of RISC-V better by adding support for the
> > following, previously unoptimised cases:
> >- addi + addi
> >   addia0,a0,2047
> >   addia0,a0,1
> >- li + sh[123]add (if Zba is enabled)
> >   li  a5,960
> >   sh3add  a0,a5,a0
> >
> > With this commit, we also fix up riscv_adjust_libcall_cfi_prologue()
> > and riscv_adjust_libcall_cfi_epilogue() to not use gen_add3_insn, as
> > the expander will otherwise wrap the resulting set-expression in an
> > insn (causing an ICE at dwarf2-time) when invoked with -msave-restore.
> >
> > This closes the gap to LLVM, which has already been emitting these
> > optimised sequences.
> >
> > Note that this benefits is perlbench (in SPEC CPU 2017), which needs
> > to add the constant 3840.
> >
> > gcc/ChangeLog:
> >
> >   * config/riscv/bitmanip.md (*shNadd): Rename.
> >   (riscv_shNadd): Expose as gen_riscv_shNadd{di/si}.
> >   * config/riscv/predicates.md (const_arith_shifted123_operand):
> >   New predicate (for constants that are a simm12, shifted by
> >   1, 2 or 3).
> >   (const_arith_2simm12_operand): New predicate (that can be
> >   expressed by adding 2 simm12 together).
> >   (addi_operand): New predicate (an immedaite operand suitable
> >   for the new add3 expansion).
> >   * config/riscv/riscv.cc (riscv_adjust_libcall_cfi_prologue):
> >   Don't use gen_add3_insn, where a RTX instead of an INSN is
> >   required (otherwise this will break as soon as we have a
> >   define_expand for add3).
> >   (riscv_adjust_libcall_cfi_epilogue): Same.
> >   * config/riscv/riscv.md (addsi3): Rename.
> >   (riscv_addsi3): New name for addsi3.
> >   (adddi3): Rename.
> >   (riscv_adddi3): New name for adddi3.
> >   (add3): New expander that handles the basic and fancy
> >   (such as li+sh[123]add, addi+addi, ...) cases for adding
> >   register-register and register-const_int.
> >
> > gcc/testsuite/ChangeLog:
> >
> >   * gcc.target/riscv/addi.c: New test.
> >   * gcc.target/riscv/zba-shNadd-06.c: New test.
> >
> > Signed-off-by: Philipp Tomsich 
> > ---
> >
> >   gcc/config/riscv/bitmanip.md  |  2 +-
> >   gcc/config/riscv/predicates.md| 28 +
> >   gcc/config/riscv/riscv.cc | 10 ++--
> >   gcc/config/riscv/riscv.md | 58 ++-
> >   gcc/testsuite/gcc.target/riscv/addi.c | 39 +
> >   .../gcc.target/riscv/zba-shNadd-06.c  | 11 
> >   6 files changed, 141 insertions(+), 7 deletions(-)
> >   create mode 100644 gcc/testsuite/gcc.target/riscv/addi.c
> >   create mode 100644 gcc/testsuite/gcc.target/riscv/zba-shNadd-06.c
> >
> >
> >
> > diff --git a/gcc/config/riscv/riscv.md b/gcc/config/riscv/riscv.md
> > index 171a0cdced6..289ff7470c6 100644
> > --- a/gcc/config/riscv/riscv.md
> > +++ b/gcc/config/riscv/riscv.md
> > @@ -464,6 +464,60 @@
> > [(set_attr "type" "arith")
> >  (set_attr "mode" "DI")])
> >
> > +(define_expand "add3"
> > +  [(set (match_operand:GPR   0 "register_operand"  "=r,r")
> > + (plus:GPR (match_operand:GPR 1 "register_operand"  " r,r")
> > +   (match_operand:GPR 2 "addi_operand"  " r,I")))]
> > +  ""
> > +{
> > +  if (arith_operand (operands[2], mode))
> > +emit_insn (gen_riscv_add3 (operands[0], operands[1], 
> > operands[2]));
> > +  else if (const_arith_2simm12_operand (operands[2], mode))
> > +{
> > +  /* Split into two immediates that add up to the desired value:
> > +   * e.g., break up "a + 2445" into:
> > +   * addia0,a0,2047
> > +   *  addi   a0,a0,398
> > +   */
>
> Nit.  GNU comment style please.
>
>
> > +
> > +  HOST_WIDE_INT val = INTVAL (operands[2]);
> > +  HOST_WIDE_INT saturated = HOST_WIDE_INT_M1U << (IMM_BITS - 1);
> > +
> > +  if (val >= 0)
> > +  saturated = ~saturated;
> > +
> > +  val -= saturated;
> > +
> > +  rtx tmp = gen_reg_rtx (mode);
>
> Can't add3 be generated by LRA?  If so, don't you have to guard
> against going into this path as we shouldn't be creating new pseudos at
> that point (I know LRA can create some internally, but I don't think it
> handles new ones showing up due to target expanders).
>
>
> Similarly for the shifted_123 case immediately following.
>
>
> If we do indeed have an issue here, I'm not sure how best to resolve.
> If the output 

Re: [PATCH] RISC-V: Optimise adding a (larger than simm12) constant

2022-11-18 Thread Jeff Law via Gcc-patches



On 11/9/22 16:07, Philipp Tomsich wrote:

Handling the register-const_int addition has very quickly escalated to
creating a full sign-extended 32bit constant and performing a
register-register for RISC-V in GCC so far, resulting in sequences like
(for the case of "a + 2048"):
li  a5,4096
addia5,a5,-2048
add a0,a0,a5

By adding an expansion for add3, we can emit optimised RTL that
matches the capabilities of RISC-V better by adding support for the
following, previously unoptimised cases:
   - addi + addi
addia0,a0,2047
addia0,a0,1
   - li + sh[123]add (if Zba is enabled)
li  a5,960
sh3add  a0,a5,a0

With this commit, we also fix up riscv_adjust_libcall_cfi_prologue()
and riscv_adjust_libcall_cfi_epilogue() to not use gen_add3_insn, as
the expander will otherwise wrap the resulting set-expression in an
insn (causing an ICE at dwarf2-time) when invoked with -msave-restore.

This closes the gap to LLVM, which has already been emitting these
optimised sequences.

Note that this benefits is perlbench (in SPEC CPU 2017), which needs
to add the constant 3840.

gcc/ChangeLog:

* config/riscv/bitmanip.md (*shNadd): Rename.
(riscv_shNadd): Expose as gen_riscv_shNadd{di/si}.
* config/riscv/predicates.md (const_arith_shifted123_operand):
New predicate (for constants that are a simm12, shifted by
1, 2 or 3).
(const_arith_2simm12_operand): New predicate (that can be
expressed by adding 2 simm12 together).
(addi_operand): New predicate (an immedaite operand suitable
for the new add3 expansion).
* config/riscv/riscv.cc (riscv_adjust_libcall_cfi_prologue):
Don't use gen_add3_insn, where a RTX instead of an INSN is
required (otherwise this will break as soon as we have a
define_expand for add3).
(riscv_adjust_libcall_cfi_epilogue): Same.
* config/riscv/riscv.md (addsi3): Rename.
(riscv_addsi3): New name for addsi3.
(adddi3): Rename.
(riscv_adddi3): New name for adddi3.
(add3): New expander that handles the basic and fancy
(such as li+sh[123]add, addi+addi, ...) cases for adding
register-register and register-const_int.

gcc/testsuite/ChangeLog:

* gcc.target/riscv/addi.c: New test.
* gcc.target/riscv/zba-shNadd-06.c: New test.

Signed-off-by: Philipp Tomsich 
---

  gcc/config/riscv/bitmanip.md  |  2 +-
  gcc/config/riscv/predicates.md| 28 +
  gcc/config/riscv/riscv.cc | 10 ++--
  gcc/config/riscv/riscv.md | 58 ++-
  gcc/testsuite/gcc.target/riscv/addi.c | 39 +
  .../gcc.target/riscv/zba-shNadd-06.c  | 11 
  6 files changed, 141 insertions(+), 7 deletions(-)
  create mode 100644 gcc/testsuite/gcc.target/riscv/addi.c
  create mode 100644 gcc/testsuite/gcc.target/riscv/zba-shNadd-06.c



diff --git a/gcc/config/riscv/riscv.md b/gcc/config/riscv/riscv.md
index 171a0cdced6..289ff7470c6 100644
--- a/gcc/config/riscv/riscv.md
+++ b/gcc/config/riscv/riscv.md
@@ -464,6 +464,60 @@
[(set_attr "type" "arith")
 (set_attr "mode" "DI")])
  
+(define_expand "add3"

+  [(set (match_operand:GPR   0 "register_operand"  "=r,r")
+   (plus:GPR (match_operand:GPR 1 "register_operand"  " r,r")
+ (match_operand:GPR 2 "addi_operand"  " r,I")))]
+  ""
+{
+  if (arith_operand (operands[2], mode))
+emit_insn (gen_riscv_add3 (operands[0], operands[1], operands[2]));
+  else if (const_arith_2simm12_operand (operands[2], mode))
+{
+  /* Split into two immediates that add up to the desired value:
+   * e.g., break up "a + 2445" into:
+   * addi  a0,a0,2047
+   *addi   a0,a0,398
+   */


Nit.  GNU comment style please.



+
+  HOST_WIDE_INT val = INTVAL (operands[2]);
+  HOST_WIDE_INT saturated = HOST_WIDE_INT_M1U << (IMM_BITS - 1);
+
+  if (val >= 0)
+saturated = ~saturated;
+
+  val -= saturated;
+
+  rtx tmp = gen_reg_rtx (mode);


Can't add3 be generated by LRA?  If so, don't you have to guard 
against going into this path as we shouldn't be creating new pseudos at 
that point (I know LRA can create some internally, but I don't think it 
handles new ones showing up due to target expanders).



Similarly for the shifted_123 case immediately following.


If we do indeed have an issue here, I'm not sure how best to resolve.  
If the output operand does not overlap with the inputs, then we're 
golden and can just re-use it to form the constant.  If not,  then it's 
a bit tougher.  I'm not keen to add a test of no_new_pseudos to the 
operand predicate, but I don't see a better option yet.



jeff




[PATCH] RISC-V: Optimise adding a (larger than simm12) constant

2022-11-09 Thread Philipp Tomsich
Handling the register-const_int addition has very quickly escalated to
creating a full sign-extended 32bit constant and performing a
register-register for RISC-V in GCC so far, resulting in sequences like
(for the case of "a + 2048"):
li  a5,4096
addia5,a5,-2048
add a0,a0,a5

By adding an expansion for add3, we can emit optimised RTL that
matches the capabilities of RISC-V better by adding support for the
following, previously unoptimised cases:
  - addi + addi
addia0,a0,2047
addia0,a0,1
  - li + sh[123]add (if Zba is enabled)
li  a5,960
sh3add  a0,a5,a0

With this commit, we also fix up riscv_adjust_libcall_cfi_prologue()
and riscv_adjust_libcall_cfi_epilogue() to not use gen_add3_insn, as
the expander will otherwise wrap the resulting set-expression in an
insn (causing an ICE at dwarf2-time) when invoked with -msave-restore.

This closes the gap to LLVM, which has already been emitting these
optimised sequences.

Note that this benefits is perlbench (in SPEC CPU 2017), which needs
to add the constant 3840.

gcc/ChangeLog:

* config/riscv/bitmanip.md (*shNadd): Rename.
(riscv_shNadd): Expose as gen_riscv_shNadd{di/si}.
* config/riscv/predicates.md (const_arith_shifted123_operand):
New predicate (for constants that are a simm12, shifted by
1, 2 or 3).
(const_arith_2simm12_operand): New predicate (that can be
expressed by adding 2 simm12 together).
(addi_operand): New predicate (an immedaite operand suitable
for the new add3 expansion).
* config/riscv/riscv.cc (riscv_adjust_libcall_cfi_prologue):
Don't use gen_add3_insn, where a RTX instead of an INSN is
required (otherwise this will break as soon as we have a
define_expand for add3).
(riscv_adjust_libcall_cfi_epilogue): Same.
* config/riscv/riscv.md (addsi3): Rename.
(riscv_addsi3): New name for addsi3.
(adddi3): Rename.
(riscv_adddi3): New name for adddi3.
(add3): New expander that handles the basic and fancy
(such as li+sh[123]add, addi+addi, ...) cases for adding
register-register and register-const_int.

gcc/testsuite/ChangeLog:

* gcc.target/riscv/addi.c: New test.
* gcc.target/riscv/zba-shNadd-06.c: New test.

Signed-off-by: Philipp Tomsich 
---

 gcc/config/riscv/bitmanip.md  |  2 +-
 gcc/config/riscv/predicates.md| 28 +
 gcc/config/riscv/riscv.cc | 10 ++--
 gcc/config/riscv/riscv.md | 58 ++-
 gcc/testsuite/gcc.target/riscv/addi.c | 39 +
 .../gcc.target/riscv/zba-shNadd-06.c  | 11 
 6 files changed, 141 insertions(+), 7 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/riscv/addi.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/zba-shNadd-06.c

diff --git a/gcc/config/riscv/bitmanip.md b/gcc/config/riscv/bitmanip.md
index bb23ceb86d9..78fdf02c2ec 100644
--- a/gcc/config/riscv/bitmanip.md
+++ b/gcc/config/riscv/bitmanip.md
@@ -29,7 +29,7 @@
   [(set_attr "type" "bitmanip,load")
(set_attr "mode" "DI")])
 
-(define_insn "*shNadd"
+(define_insn "riscv_shNadd"
   [(set (match_operand:X 0 "register_operand" "=r")
(plus:X (ashift:X (match_operand:X 1 "register_operand" "r")
  (match_operand:QI 2 "imm123_operand" "Ds3"))
diff --git a/gcc/config/riscv/predicates.md b/gcc/config/riscv/predicates.md
index 6772228e5b6..c56bfa99339 100644
--- a/gcc/config/riscv/predicates.md
+++ b/gcc/config/riscv/predicates.md
@@ -308,3 +308,31 @@
 (match_test "INTVAL (op) > 0")))
(ior (match_test "SMALL_OPERAND (UINTVAL (op) & ~(HOST_WIDE_INT_1U << 
floor_log2 (UINTVAL (op")
(match_test "popcount_hwi (UINTVAL (op)) == 2"
+
+;; A CONST_INT that can be shifted down by 1, 2 or 3 bits (i.e., has
+;; these bits clear) and will then form a SMALL_OPERAND.
+(define_predicate "const_arith_shifted123_operand"
+  (and (match_code "const_int")
+   (not (match_test "SMALL_OPERAND (INTVAL (op))")))
+{
+  HOST_WIDE_INT val = INTVAL (op);
+  int trailing = ctz_hwi (val);
+
+  /* Clamp to 3, as we have sh[123]add instructions only. */
+  if (trailing > 3)
+ trailing = 3;
+
+  return trailing > 0 && SMALL_OPERAND (val >> trailing);
+})
+
+;; A CONST_INT that can formed by adding two SMALL_OPERANDs together
+(define_predicate "const_arith_2simm12_operand"
+  (and (match_code "const_int")
+   (ior (match_test "SMALL_OPERAND(INTVAL (op) - ~(HOST_WIDE_INT_M1U << 
(IMM_BITS - 1)))")
+   (match_test "SMALL_OPERAND(INTVAL (op) -  (HOST_WIDE_INT_M1U << 
(IMM_BITS - 1)))"
+
+(define_predicate "addi_operand"
+  (ior (match_operand 0 "arith_operand")
+   (match_operand 0 "const_arith_2simm12_operand")
+   (and (match_operand 0 "const_arith_shifted123_operand")
+   (match_test