My Attachment seems to have been stripped. So sending it again inline and
hoping the tabs don't get stripped.
diff --git a/gcc/config/aarch64/aarch64-simd.md
b/gcc/config/aarch64/aarch64-simd.md
index
89aaf8c018e3340dd2d53fc2a6538d3d1220b103..47428dd3e9c4fa8fc1f3d876e774defb6bc64640
100644
--- a/gcc/config/aarch64/aarch64-simd.md
+++ b/gcc/config/aarch64/aarch64-simd.md
@@ -5330,6 +5330,29 @@ (define_expand "mov<mode>"
if (GET_CODE (operands[0]) != REG)
operands[1] = force_reg (<MODE>mode, operands[1]);
}
+
+ /* If we have a paradoxical subreg trying to write to <MODE> from and the
+ registers don't overlap then we need to break it apart. What it's trying
+ to do is give two kind of information at the same time. It's trying to
+ convey liveness information by saying that the entire register will be
+ written to eventually, but it also only wants to write a single part of
the
+ register. Hence the paradoxical subreg.
+
+ However reload doesn't understand this concept and it will ultimately ICE.
+ Instead of allowing this we will split the two concerns. The liveness
+ information will be conveyed using a clobber and then we break apart the
+ paradoxical subreg into just a normal write of the part that it wanted to
+ write originally. */
+
+ if (paradoxical_subreg_p (operands[1]))
+ {
+ if (!reg_overlap_mentioned_p (operands[0], operands[1]))
+ emit_clobber (operands[0]);
+ poly_uint64 offset = SUBREG_BYTE (operands[1]);
+ operands[1] = SUBREG_REG (operands[1]);
+ operands[0] = simplify_gen_subreg (GET_MODE (operands[1]), operands[0],
+ <MODE>mode, offset);
+ }
})
diff --git a/gcc/testsuite/g++.target/aarch64/pr94052.C
b/gcc/testsuite/g++.target/aarch64/pr94052.C
new file mode 100644
index
0000000000000000000000000000000000000000..d36c9bdc1588533db35eb3cbd2502034edd25452
--- /dev/null
+++ b/gcc/testsuite/g++.target/aarch64/pr94052.C
@@ -0,0 +1,174 @@
+/* { dg-do compile } */
+/* { dg-additional-options "-O2 -std=gnu++11 -w" } */
+
+namespace c {
+typedef int d;
+template <typename e> struct f { typedef e g; };
+template <bool, typename> struct h;
+template <typename e> e aa(typename f<e>::g i) { return i; }
+template <typename, typename> struct j {};
+template <d, typename> struct k;
+template <class l, class m> struct k<1, j<l, m>> { typedef m g; };
+template <d n, class l, class m> typename k<n, j<l, m>>::g ab(j<l, m>);
+} // namespace c
+typedef long d;
+typedef char o;
+typedef int p;
+typedef char q;
+typedef int r;
+namespace {
+struct s;
+constexpr d t = 6;
+template <typename> class ad {
+public:
+ static constexpr d u = t;
+ d v();
+ d x();
+ d y();
+};
+class z : ad<int> {};
+struct ae {
+ p af;
+};
+class ag {
+public:
+ ae ah();
+};
+} // namespace
+typedef __Int32x4_t ai;
+typedef struct {
+ ai aj[2];
+} ak;
+typedef int al;
+void am(p *a, ai b) { __builtin_aarch64_st1v4si(a, b); }
+namespace an {
+class ao {
+public:
+ bool operator==(ao);
+ d v();
+ d x();
+};
+class ap : public ad<r> {};
+class aq {
+public:
+ c::j<int, int> ar();
+ int as();
+ int at();
+};
+class au {
+public:
+ virtual d av(d);
+ virtual ap aw();
+ virtual ag ax();
+};
+class ay {};
+class az {
+ virtual void ba(const ay &, const s &);
+};
+using bb = az;
+class bc;
+class bd : bb {
+ void ba(const ay &, const s &);
+ bc *be;
+ bc *bf;
+ bc *bg;
+ aq bh;
+ int bi;
+ int bj;
+ ao bk;
+};
+namespace bl {
+namespace bm {
+namespace bn {
+class bo;
+}
+} // namespace bm
+} // namespace bl
+namespace bn {
+template <typename ac = c::h<0, bl::bm ::bn::bo>>
+ai bp(ac *, ac *, ac *, al, al, al, d, p);
+template <typename ac = c::h<0, bl::bm ::bn::bo>>
+ak bq(ac *br, ac *bs, ac *bt, al bu, al bv, al bw, d bx, int, int by) {
+ ak{bp(br, bs, bt, bu, bv, bw, bx, by), bp(br, bs, bt, bu, bv, bw, bx, by)};
+}
+template <typename ac = c::h<0, bl::bm ::bn::bo>>
+ak bz(ac *, ac *, ac *, al, al, al &, int, p);
+template <int> void ca(p *, const ak &);
+template <> void ca<1>(p *buffer, const ak &cb) {
+ am(buffer, cb.aj[0]);
+ am(buffer + 4, cb.aj[1]);
+}
+int cc(int, int);
+} // namespace bn
+class bc {
+public:
+ virtual au *cd();
+};
+class ce {
+public:
+ q *cf();
+};
+template <d> struct cg {
+ template <typename ch> static void ci(ay, z cj, ch ck) { ck(cj); }
+};
+template <typename ch> void cl(ay w, ch ck) {
+ z cj;
+ cg<z::u>::ci(w, cj, c::aa<ch>(ck));
+}
+namespace {
+template <typename T1, typename cm, int cn> class co {
+public:
+ static void convolve(ay, int cs, bc *cp, bc *cq, bc *cr, aq cw, int, ao ct) {
+ int by = cp->cd()->ax().ah().af;
+ int cu = cq->cd()->ax().ah().af;
+ cp->cd()->aw().v();
+ int cv = cp->cd()->aw().x();
+ cp->cd()->aw().y();
+ cp->cd()->aw();
+ int da = cr->cd()->aw().x();
+ int cx = cq->cd()->aw().x();
+ cq->cd()->aw().y();
+ int cy = cr->cd()->av(0);
+ int cz = cr->cd()->av(1);
+ bn::cc(cs, cn);
+ int de = c::ab<1>(cw.ar());
+ cw.as();
+ cw.at();
+ ay db;
+ ce dc;
+ ce dd;
+ ce w;
+ q *di = w.cf();
+ cl(db, [&](z) {
+ int df;
+ dc;
+ di;
+ cx;
+ auto dg(cu);
+ auto dh(cu);
+ auto dl(cu);
+ for (; cz; df += de) {
+ auto br = reinterpret_cast<T1 *>(cv);
+ auto bs = reinterpret_cast<T1 *>(cv);
+ auto bt = reinterpret_cast<T1 *>(df * ct.x());
+ auto dj = reinterpret_cast<cm *>(dd.cf() + da);
+ for (int dk; dk < cy; dk += cs, dj += cs)
+ if (ct == ao()) {
+ auto vres = bn::bz(br, bs, bt, dg, dh, dl, cn, by);
+ bn::ca<cn>(dj, vres);
+ } else
+ bn::bq(br, bs, bt, dg, dh, dl, ct.v(), cn, by);
+ }
+ });
+ }
+};
+template <typename T1, typename cm>
+void bz(ay dm, int cs, bc *cp, bc *cq, bc *cr, aq cw, int dn, ao ct) {
+ co<T1, cm, 1>::convolve(dm, cs, cp, cq, cr, cw, dn, ct);
+ co<T1, cm, 2>::convolve(dm, cs, cp, cq, cr, cw, dn, ct);
+}
+} // namespace
+void bd::ba(const ay &dm, const s &) {
+ bz<o, p>(dm, bi, be, bg, bf, bh, bj, bk);
+}
+} // namespace an
> -----Original Message-----
> From: Tamar Christina <[email protected]>
> Sent: Monday, March 9, 2020 13:53
> To: [email protected]
> Cc: nd <[email protected]>; Richard Earnshaw <[email protected]>;
> Marcus Shawcroft <[email protected]>; Kyrylo Tkachov
> <[email protected]>; Richard Sandiford
> <[email protected]>
> Subject: [PATCH][GCC][AArch64]: Break apart paradoxical subregs for
> VSTRUCT writes (PR target/94052)
>
> Hi All,
>
> This works around an ICE in reload where from expand we get the following
> RTL generated for VSTRUCT mode writes:
>
> (insn 446 354 445 2 (set (reg:CI 383)
> (subreg:CI (reg:V4SI 291) 0)) "small.i":146:22 3408 {*aarch64_movci}
> (nil))
>
> This sequence is trying to say two things:
>
> 1) liveliness: It's trying to say that eventually the whole CI reg will be
> written to. It does this by generating the paradoxical subreg.
> 2) write data: It's trying to in the same instruction also write the V4SI mode
> component at offset 0 in the CI reg.
>
> Reload is unable to understand this concept and so it attempts to handle this
> instruction by breaking apart the instruction, first writing the data and then
> tries to reload the paradoxical part. This gets it to the same instruction
> again
> and eventually we ICE since we reach the limit of no. reloads.
>
> This patch fixes it by in the backend when we see such a paradoxical
> construction breaking it apart and issuing a clobber to correct the liveliness
> information and then emitting a normal subreg write for the component that
> the paradoxical subreg was trying to write to.
>
> Concretely we generate this:
>
> (insn 42 41 43 (clobber (reg/v:CI 122 [ diD.5226 ])) "small.i":121:23 -1
> (nil))
>
> (insn 43 42 44 (set (subreg:V4SI (reg/v:CI 122 [ diD.5226 ]) 0)
> (reg:V4SI 136)) "small.i":121:23 -1
> (nil))
>
> Bootstrapped Regtested on aarch64-none-linux-gnu and no issues.
>
> Ok for master and back-port to GCC 9 and GCC 8 after some stew?
>
> I will look into seeing if we can not generate these at all, but I'm not sure
> this
> is possible since the mid-end would need both the Mode and the Class to
> know that a pseudo will be assigned to multiple hardregs.
>
> Thanks,
> Tamar
>
> gcc/ChangeLog:
>
> 2020-03-09 Tamar Christina <[email protected]>
>
> PR target/94052
> * config/aarch64/aarch64-simd.md (mov<mode>): Remove
> paradoxical
> subregs of VSTRUCT modes.
>
> gcc/testsuite/ChangeLog:
>
> 2020-03-09 Tamar Christina <[email protected]>
>
> PR target/94052
> * gcc.target/aarch64/pr94052.C: New test.
>
> --