On Tue, 24 Jun 2025, Richard Biener wrote:

> On Tue, 24 Jun 2025, Richard Sandiford wrote:
> 
> > Tamar Christina <tamar.christ...@arm.com> writes:
> > > store_bit_field_1 has an optimization where if a target is not a memory 
> > > operand
> > > and the entire value is being set from something larger we can just wrap a
> > > subreg around the source and emit a move.
> > >
> > > For vector constructors this is however problematic because the subreg 
> > > means
> > > that the expansion of the constructor won't happen through vec_init 
> > > anymore.
> > >
> > > Complicated constructors which aren't natively supported by targets then 
> > > ICE as
> > > they wouldn't have been expanded so recog fails.
> > >
> > > This patch blocks the optimization on non-constant vector constructors. 
> > > Or non-uniform
> > > non-constant vectors. I allowed constant vectors because if I read the 
> > > code right
> > > simplify-rtx should be able to perform the simplification of pulling out 
> > > the element
> > > or merging the constant values.  There are several testcases in 
> > > aarch64-sve-pcs.exp
> > > that test this as well. I allowed uniform non-constant vectors because 
> > > they
> > > would be folded into a vec_select later on.
> > >
> > > Note that codegen is quite horrible, for what should only be an lsr.  But 
> > > I'll
> > > address that separately so that this patch is backportable.
> > >
> > > Bootstrapped Regtested on aarch64-none-linux-gnu,
> > > arm-none-linux-gnueabihf, x86_64-pc-linux-gnu
> > > -m32, -m64 and no issues.
> > >
> > > Ok for master? and GCC 15, 14, 13?
> > 
> > I was discussing this Alex off-list last week, and the fix we talked
> > about there was:
> > 
> > diff --git a/gcc/explow.cc b/gcc/explow.cc
> > index 7799a98053b..8b138f54f75 100644
> > --- a/gcc/explow.cc
> > +++ b/gcc/explow.cc
> > @@ -753,7 +753,7 @@ force_subreg (machine_mode outermode, rtx op,
> >           machine_mode innermode, poly_uint64 byte)
> >  {
> >    rtx x = simplify_gen_subreg (outermode, op, innermode, byte);
> > -  if (x)
> > +  if (x && (!SUBREG_P (x) || REG_P (SUBREG_REG (x))))
> >      return x;
> >  
> >    auto *start = get_last_insn ();
> > 
> > The justification is that force_subreg is somewhat like a "subreg
> > version of force_operand", and so should try to avoid returning
> > subregs that force_operand would have replaced.  The force_operand
> > code I mean is:
> 
> Yeah, in particular CONSTANT_P isn't sth documented as valid as
> subreg operands, only registers (and memory) are.  But isn't this
> then a bug in simplify_gen_subreg itself, that it creates a SUBREG
> of a non-REG/MEM?

Aka

diff --git a/gcc/simplify-rtx.cc b/gcc/simplify-rtx.cc
index cbe61b49bf6..4fa947f84cd 100644
--- a/gcc/simplify-rtx.cc
+++ b/gcc/simplify-rtx.cc
@@ -8425,7 +8425,8 @@ simplify_context::simplify_gen_subreg (machine_mode 
outermode, rtx op,
          || GET_CODE (op) == CONST_VECTOR))
     return NULL_RTX;
 
-  if (validate_subreg (outermode, innermode, op, byte))
+  if ((REG_P (op) || MEM_P (op))
+      && validate_subreg (outermode, innermode, op, byte))
     return gen_rtx_SUBREG (outermode, op, byte);
 
   return NULL_RTX;

?

> Richard.
> 
> >   /* Check for subreg applied to an expression produced by loop optimizer.  
> > */
> >   if (code == SUBREG
> >       && !REG_P (SUBREG_REG (value))
> >       && !MEM_P (SUBREG_REG (value)))
> >     {
> >       value
> >     = simplify_gen_subreg (GET_MODE (value),
> >                            force_reg (GET_MODE (SUBREG_REG (value)),
> >                                       force_operand (SUBREG_REG (value),
> >                                                      NULL_RTX)),
> >                            GET_MODE (SUBREG_REG (value)),
> >                            SUBREG_BYTE (value));
> >       code = GET_CODE (value);
> >     }
> > 
> > Thanks,
> > Richard
> > 
> > > Thanks,
> > > Tamar
> > >
> > >
> > > gcc/ChangeLog:
> > >
> > >   PR target/120718
> > >   * expmed.cc (store_bit_field_1): Only push subreg over uniform vector
> > >   constructors.
> > >   (foldable_value_with_subreg): New.
> > >
> > > gcc/testsuite/ChangeLog:
> > >
> > >   PR target/120718
> > >   * gcc.target/aarch64/sve/pr120718.c: New test.
> > >
> > > ---
> > >
> > > diff --git a/gcc/expmed.cc b/gcc/expmed.cc
> > > index 
> > > be427dca5d9afeed2013954472dde3a5430169e0..a468aa5c0c3f20bd62a7afc1d245d64e87be5396
> > >  100644
> > > --- a/gcc/expmed.cc
> > > +++ b/gcc/expmed.cc
> > > @@ -740,6 +740,28 @@ store_bit_field_using_insv (const extraction_insn 
> > > *insv, rtx op0,
> > >    return false;
> > >  }
> > >  
> > > +/* For non-constant vectors wrapping a subreg around the RTX will not 
> > > make
> > > +   the expression expand properly through vec_init.  For constant vectors
> > > +   we can because simplification can just extract the element out by
> > > +   by merging the values.  This can be done by simplify-rtx and so the
> > > +   subreg will be eliminated.  However poly constants require vec_init as
> > > +   they are a runtime value.  So only allow the subreg for simple integer
> > > +   or floating point constants.  */
> > > +
> > > +static bool
> > > +foldable_value_with_subreg (rtx value)
> > > +{
> > > +  if (GET_CODE (value) != CONST_VECTOR || const_vec_duplicate_p (value))
> > > +    return true;
> > > +
> > > +  for (unsigned i = 0; i < const_vector_encoded_nelts (value); i++)
> > > +    if (!CONST_INT_P (const_vector_elt (value, i))
> > > + && !CONST_DOUBLE_P (const_vector_elt (value, i)))
> > > +      return false;
> > > +
> > > +  return true;
> > > +}
> > > +
> > >  /* A subroutine of store_bit_field, with the same arguments.  Return true
> > >     if the operation could be implemented.
> > >  
> > > @@ -795,7 +817,8 @@ store_bit_field_1 (rtx str_rtx, poly_uint64 bitsize, 
> > > poly_uint64 bitnum,
> > >    /* If the target is a register, overwriting the entire object, or 
> > > storing
> > >       a full-word or multi-word field can be done with just a SUBREG.  */
> > >    if (!MEM_P (op0)
> > > -      && known_eq (bitsize, GET_MODE_BITSIZE (fieldmode)))
> > > +      && known_eq (bitsize, GET_MODE_BITSIZE (fieldmode))
> > > +      && foldable_value_with_subreg (value))
> > >      {
> > >        /* Use the subreg machinery either to narrow OP0 to the required
> > >    words or to cope with mode punning between equal-sized modes.
> > > diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pr120718.c 
> > > b/gcc/testsuite/gcc.target/aarch64/sve/pr120718.c
> > > new file mode 100644
> > > index 
> > > 0000000000000000000000000000000000000000..cb21d94792f0679a48cc20c3dcdf78c89c05a5c6
> > > --- /dev/null
> > > +++ b/gcc/testsuite/gcc.target/aarch64/sve/pr120718.c
> > > @@ -0,0 +1,13 @@
> > > +/* { dg-do compile } */
> > > +/* { dg-additional-options "-O3" } */
> > > +
> > > +#include <arm_sve.h>
> > > +typedef int __attribute__((vector_size(8))) v2si;
> > > +typedef struct { int x; int y; } A;
> > > +void bar(A a);
> > > +void foo()
> > > +{
> > > +    A a;
> > > +    *(v2si *)&a = (v2si){0, (int)svcntd_pat(SV_ALL)};
> > > +    bar(a);
> > > +}
> > 
> 
> 

-- 
Richard Biener <rguent...@suse.de>
SUSE Software Solutions Germany GmbH,
Frankenstrasse 146, 90461 Nuernberg, Germany;
GF: Ivo Totev, Andrew McDonald, Werner Knoblich; (HRB 36809, AG Nuernberg)

Reply via email to