Hi,

This patch is fixing an issue about parameter accessing if the
parameter is struct type and passed through integer registers, and
there is floating member is accessed. Like below code:

typedef struct DF {double a[4]; long l; } DF;
double foo_df (DF arg){return arg.a[3];}

On ppc64le, with trunk gcc, "std 6,-24(1) ; lfd 1,-24(1)" is
generated.  While instruction "mtvsrd 1, 6" would be enough for
this case.

This patch updates the behavior when loading floating members of a
parameter: if that floating member is stored via integer register,
then loading it as integer mode first, and converting it to floating
mode.

Compare with previous patch:
https://gcc.gnu.org/pipermail/gcc-patches/2023-January/609396.html
This version supports the non-zero stack offset for parameter
argument.

I also tried to enhance CSE/DSE for this issue.  But because the
limitations (e.g. CSE does not like new pseudo, DSE is not good
at cross-blocks), some cases (as this patch) can not be handled.

Bootstrap and regtest passes on ppc64{,le}.
Is this ok for trunk?  Thanks for comments!


BR,
Jeff (Jiufu)


        PR target/108073

gcc/ChangeLog:

        * expr.cc (extract_subreg_from_loading_word): New function.
        (expand_expr_real_1): Call extract_subreg_from_loading_word.

gcc/testsuite/ChangeLog:

        * g++.target/powerpc/pr102024.C: Updated.
        * gcc.target/powerpc/pr108073.c: New test.

---
 gcc/expr.cc                                 | 85 +++++++++++++++++++++
 gcc/testsuite/g++.target/powerpc/pr102024.C |  2 +-
 gcc/testsuite/gcc.target/powerpc/pr108073.c | 29 +++++++
 3 files changed, 115 insertions(+), 1 deletion(-)
 create mode 100644 gcc/testsuite/gcc.target/powerpc/pr108073.c

diff --git a/gcc/expr.cc b/gcc/expr.cc
index 3917fc24c8c..57bc29c5678 100644
--- a/gcc/expr.cc
+++ b/gcc/expr.cc
@@ -10711,6 +10711,77 @@ stmt_is_replaceable_p (gimple *stmt)
   return false;
 }
 
+/* Return the content of the memory slot SOURCE as MODE.
+   SOURCE is based on BASE. BASE is a memory block that is stored via words.
+
+   To get the content from SOURCE:
+   first load the word from the memory which covers the SOURCE slot first;
+   next return the word's subreg which offsets to SOURCE slot;
+   then convert to MODE as necessary.  */
+
+static rtx
+extract_subreg_from_loading_word (machine_mode mode, rtx source, rtx base, 
poly_uint64 bytepos)
+{
+  rtx src_base = XEXP (source, 0);
+  poly_uint64 offset = MEM_OFFSET (source);
+
+  if (GET_CODE (src_base) == PLUS && CONSTANT_P (XEXP (src_base, 1)))
+    {
+      offset += INTVAL (XEXP (src_base, 1));
+      src_base = XEXP (src_base, 0);
+    }
+
+  base = XEXP (base, 0);
+  if (GET_CODE (base) == PLUS && CONSTANT_P (XEXP (base, 1)))
+    {
+      poly_uint64 offset_on_base = INTVAL (XEXP (base, 1));
+      base = XEXP (base, 0);
+      offset -= offset_on_base;
+    }
+
+  if (!rtx_equal_p (base, src_base) || !known_ge (offset, bytepos))
+    return NULL_RTX;
+
+  /* Subreg(DI,n) -> DF/SF/SI/HI/QI */
+  poly_uint64 word_size = GET_MODE_SIZE (word_mode);
+  poly_uint64 mode_size = GET_MODE_SIZE (mode);
+  poly_uint64 byte_off;
+  unsigned int start;
+  machine_mode int_mode;
+  if (known_ge (word_size, mode_size) && multiple_p (word_size, mode_size)
+      && int_mode_for_mode (mode).exists (&int_mode)
+      && can_div_trunc_p (offset, word_size, &start, &byte_off)
+      && multiple_p (byte_off, mode_size))
+    {
+      rtx word_mem = copy_rtx (source);
+      PUT_MODE (word_mem, word_mode);
+      word_mem = adjust_address (word_mem, word_mode, -byte_off);
+
+      rtx word_reg = gen_reg_rtx (word_mode);
+      emit_move_insn (word_reg, word_mem);
+
+      poly_uint64 low_off = subreg_lowpart_offset (int_mode, word_mode);
+      if (!known_eq (byte_off, low_off))
+       {
+         poly_uint64 shift_bytes = known_gt (byte_off, low_off)
+                                     ? byte_off - low_off
+                                     : low_off - byte_off;
+         word_reg = expand_shift (RSHIFT_EXPR, word_mode, word_reg,
+                                  shift_bytes * BITS_PER_UNIT, word_reg, 0);
+       }
+
+      rtx int_subreg = gen_lowpart (int_mode, word_reg);
+      if (mode == int_mode)
+       return int_subreg;
+
+      rtx int_mode_reg = gen_reg_rtx (int_mode);
+      emit_move_insn (int_mode_reg, int_subreg);
+      return gen_lowpart (mode, int_mode_reg);
+    }
+
+  return NULL_RTX;
+}
+
 rtx
 expand_expr_real_1 (tree exp, rtx target, machine_mode tmode,
                    enum expand_modifier modifier, rtx *alt_rtl,
@@ -11892,6 +11963,20 @@ expand_expr_real_1 (tree exp, rtx target, machine_mode 
tmode,
            && modifier != EXPAND_WRITE)
          op0 = flip_storage_order (mode1, op0);
 
+       /* Accessing sub-field of struct parameter which passed via integer
+          registers.  */
+       if (mode == mode1 && TREE_CODE (tem) == PARM_DECL
+           && DECL_INCOMING_RTL (tem) && REG_P (DECL_INCOMING_RTL (tem))
+           && GET_MODE (DECL_INCOMING_RTL (tem)) == BLKmode && MEM_P (op0)
+           && MEM_OFFSET_KNOWN_P (op0))
+         {
+           rtx subreg
+             = extract_subreg_from_loading_word (mode, op0, DECL_RTL (tem),
+                                                 bytepos);
+           if (subreg)
+             op0 = subreg;
+         }
+
        if (mode == mode1 || mode1 == BLKmode || mode1 == tmode
            || modifier == EXPAND_CONST_ADDRESS
            || modifier == EXPAND_INITIALIZER)
diff --git a/gcc/testsuite/g++.target/powerpc/pr102024.C 
b/gcc/testsuite/g++.target/powerpc/pr102024.C
index 769585052b5..c8995cae707 100644
--- a/gcc/testsuite/g++.target/powerpc/pr102024.C
+++ b/gcc/testsuite/g++.target/powerpc/pr102024.C
@@ -5,7 +5,7 @@
 // Test that a zero-width bit field in an otherwise homogeneous aggregate
 // generates a psabi warning and passes arguments in GPRs.
 
-// { dg-final { scan-assembler-times {\mstd\M} 4 } }
+// { dg-final { scan-assembler-times {\mmtvsrd\M} 4 } }
 
 struct a_thing
 {
diff --git a/gcc/testsuite/gcc.target/powerpc/pr108073.c 
b/gcc/testsuite/gcc.target/powerpc/pr108073.c
new file mode 100644
index 00000000000..7dd1a4a326a
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/pr108073.c
@@ -0,0 +1,29 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -save-temps" } */
+
+typedef struct DF {double a[4]; short s1; short s2; short s3; short s4; } DF;
+typedef struct SF {float a[4]; int i1; int i2; } SF;
+
+/* { dg-final { scan-assembler-times {\mmtvsrd\M} 3 {target { has_arch_ppc64 
&& has_arch_pwr8 } } } } */
+/* { dg-final { scan-assembler-not {\mlwz\M} {target { has_arch_ppc64 && 
has_arch_pwr8 } } } } */
+/* { dg-final { scan-assembler-not {\mlhz\M} {target { has_arch_ppc64 && 
has_arch_pwr8 } } } } */
+short  __attribute__ ((noipa)) foo_hi (DF a, int flag){if (flag == 2)return 
a.s2+a.s3;return 0;}
+int  __attribute__ ((noipa)) foo_si (SF a, int flag){if (flag == 2)return 
a.i2+a.i1;return 0;}
+double __attribute__ ((noipa)) foo_df (DF arg, int flag){if (flag == 2)return 
arg.a[3];else return 0.0;}
+float  __attribute__ ((noipa)) foo_sf (SF arg, int flag){if (flag == 2)return 
arg.a[2]; return 0;}
+float  __attribute__ ((noipa)) foo_sf1 (SF arg, int flag){if (flag == 2)return 
arg.a[1];return 0;}
+
+DF gdf = {{1.0,2.0,3.0,4.0}, 1, 2, 3, 4};
+SF gsf = {{1.0f,2.0f,3.0f,4.0f}, 1, 2};
+
+int main()
+{
+  if (!(foo_hi (gdf, 2) == 5 && foo_si (gsf, 2) == 3 && foo_df (gdf, 2) == 4.0
+       && foo_sf (gsf, 2) == 3.0 && foo_sf1 (gsf, 2) == 2.0))
+    __builtin_abort ();
+  if (!(foo_hi (gdf, 1) == 0 && foo_si (gsf, 1) == 0 && foo_df (gdf, 1) == 0
+       && foo_sf (gsf, 1) == 0 && foo_sf1 (gsf, 1) == 0))
+    __builtin_abort ();
+  return 0;
+}
+
-- 
2.31.1

Reply via email to