From: Kong Lingling <[email protected]>
In inline asm, we do not know if the insn can use EGPR, so disable EGPR
usage by default from mapping the common reg/mem constraint to non-EGPR
constraints. Use a flag mapx-inline-asm-use-gpr32 to enable EGPR usage
for inline asm.
gcc/ChangeLog:
* config/i386/i386.cc (INCLUDE_STRING): Add include for
ix86_md_asm_adjust.
(ix86_md_asm_adjust): When APX EGPR enabled without specifying the
target option, map reg/mem constraints to non-EGPR constraints.
* config/i386/i386.opt: Add option mapx-inline-asm-use-gpr32.
gcc/testsuite/ChangeLog:
* gcc.target/i386/apx-inline-gpr-norex2.c: New test.
---
gcc/config/i386/i386.cc | 44 +++++++
gcc/config/i386/i386.opt | 5 +
.../gcc.target/i386/apx-inline-gpr-norex2.c | 107 ++++++++++++++++++
3 files changed, 156 insertions(+)
create mode 100644 gcc/testsuite/gcc.target/i386/apx-inline-gpr-norex2.c
diff --git a/gcc/config/i386/i386.cc b/gcc/config/i386/i386.cc
index d26d9ab0d9d..9460ebbfda4 100644
--- a/gcc/config/i386/i386.cc
+++ b/gcc/config/i386/i386.cc
@@ -17,6 +17,7 @@ You should have received a copy of the GNU General Public
License
along with GCC; see the file COPYING3. If not see
<http://www.gnu.org/licenses/>. */
+#define INCLUDE_STRING
#define IN_TARGET_CODE 1
#include "config.h"
@@ -23077,6 +23078,49 @@ ix86_md_asm_adjust (vec<rtx> &outputs, vec<rtx> &
/*inputs*/,
bool saw_asm_flag = false;
start_sequence ();
+ /* TODO: Here we just mapped the general r/m constraints to non-EGPR
+ constraints, will eventually map all the usable constraints in the future.
*/
+ if (TARGET_APX_EGPR && !ix86_apx_inline_asm_use_gpr32)
+ {
+ /* Map "r" constraint in inline asm to "h" that disallows r16-r31
+ and replace only r, exclude Br and Yr. */
+ for (unsigned i = 0; i < constraints.length (); i++)
+ {
+ std::string *s = new std::string (constraints[i]);
+ size_t pos = s->find ('r');
+ while (pos != std::string::npos)
+ {
+ if (pos > 0
+ && (s->at (pos - 1) == 'Y' || s->at (pos - 1) == 'B'))
+ pos = s->find ('r', pos + 1);
+ else
+ {
+ s->replace (pos, 1, "h");
+ constraints[i] = (const char*) s->c_str ();
+ break;
+ }
+ }
+ }
+ /* Also map "m/memory/Bm" constraint that may use GPR32, replace them
with
+ "Bt/Bt/BT". */
+ for (unsigned i = 0; i < constraints.length (); i++)
+ {
+ std::string *s = new std::string (constraints[i]);
+ size_t pos = s->find ("m");
+ size_t pos2 = s->find ("memory");
+ if (pos != std::string::npos)
+ {
+ if (pos > 0 && (s->at (pos - 1) == 'B'))
+ s->replace (pos - 1, 2, "BT");
+ else if (pos2 != std::string::npos)
+ s->replace (pos, 6, "Bt");
+ else
+ s->replace (pos, 1, "Bt");
+ constraints[i] = (const char*) s->c_str ();
+ }
+ }
+ }
+
for (unsigned i = 0, n = outputs.length (); i < n; ++i)
{
const char *con = constraints[i];
diff --git a/gcc/config/i386/i386.opt b/gcc/config/i386/i386.opt
index 1ee4d90186e..5c8d3a207e3 100644
--- a/gcc/config/i386/i386.opt
+++ b/gcc/config/i386/i386.opt
@@ -1335,3 +1335,8 @@ Enum(apx_features) String(ndd) Value(apx_ndd) Set(4)
EnumValue
Enum(apx_features) String(all) Value(apx_all) Set(1)
+
+mapx-inline-asm-use-gpr32
+Target Var(ix86_apx_inline_asm_use_gpr32) Init(0)
+Enable GPR32 in inline asm when APX_EGPR enabled, do not
+hook reg or mem constraint in inline asm to GPR16.
diff --git a/gcc/testsuite/gcc.target/i386/apx-inline-gpr-norex2.c
b/gcc/testsuite/gcc.target/i386/apx-inline-gpr-norex2.c
new file mode 100644
index 00000000000..21534450045
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/apx-inline-gpr-norex2.c
@@ -0,0 +1,107 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mapxf -m64 -march=skylake-avx512 -DDTYPE32" } */
+
+typedef unsigned int u32;
+typedef unsigned long long u64;
+
+#ifdef DTYPE32
+typedef u32 DTYPE;
+#define byteswap byteswapu32
+#endif
+
+#define R(x,n) ( (x >> n) | (x << (32 - n)))
+
+#define S0(x) (R(x, 2) ^ R(x,13) ^ R(x,22))
+#define S1(x) (R(x, 6) ^ R(x,11) ^ R(x,25))
+
+#define TT(a,b,c,d,e,f,g,h,x,K) \
+{ \
+ tmp1 = h + S1(e) + (g ^ (e & (f ^ g))) + K + x; \
+ tmp2 = S0(a) + ((a & b) | (c & (a | b))); \
+ h = tmp1 + tmp2; \
+ d += tmp1; \
+}
+
+static inline u32 byteswapu32(u32 x)
+{
+ x = (x & 0x0000FFFF) << 16 | (x & 0xFFFF0000) >> 16;
+ x = (x & 0x00FF00FF) << 8 | (x & 0xFF00FF00) >> 8;
+ return x;
+}
+
+void foo (DTYPE in[16], DTYPE out[8], const DTYPE C[16])
+{
+ DTYPE tmp1 = 0, tmp2 = 0, a, b, c, d, e, f, g, h;
+ DTYPE w0, w1, w2, w3, w4, w5, w6, w7,
+ w8, w9, w10, w11, w12, w13, w14, w15;
+ w0 = byteswap(in[0]);
+ w1 = byteswap(in[1]);
+ w2 = byteswap(in[2]);
+ w3 = byteswap(in[3]);
+ w4 = byteswap(in[4]);
+ w5 = byteswap(in[5]);
+ w6 = byteswap(in[6]);
+ w7 = byteswap(in[7]);
+ w8 = byteswap(in[8]);
+ w9 = byteswap(in[9]);
+ w10 = byteswap(in[10]);
+ w11 = byteswap(in[11]);
+ w12 = byteswap(in[12]);
+ w13 = byteswap(in[13]);
+ w14 = byteswap(in[14]);
+ w15 = byteswap(in[15]);
+ a = out[0];
+ b = out[1];
+ c = out[2];
+ d = out[3];
+ e = out[4];
+ f = out[5];
+ g = out[6];
+ h = out[7];
+
+ TT(a, b, c, d, e, f, g, h, w0, C[0]);
+ TT(h, a, b, c, d, e, f, g, w1, C[1]);
+ TT(g, h, a, b, c, d, e, f, w2, C[2]);
+ TT(f, g, h, a, b, c, d, e, w3, C[3]);
+ TT(e, f, g, h, a, b, c, d, w4, C[4]);
+ TT(d, e, f, g, h, a, b, c, w5, C[5]);
+ TT(c, d, e, f, g, h, a, b, w6, C[6]);
+ TT(b, c, d, e, f, g, h, a, w7, C[7]);
+ TT(a, b, c, d, e, f, g, h, w8, C[8]);
+ TT(h, a, b, c, d, e, f, g, w9, C[9]);
+ TT(g, h, a, b, c, d, e, f, w10, C[10]);
+ TT(f, g, h, a, b, c, d, e, w11, C[11]);
+ TT(e, f, g, h, a, b, c, d, w12, C[12]);
+ TT(d, e, f, g, h, a, b, c, w13, C[13]);
+ TT(c, d, e, f, g, h, a, b, w14, C[14]);
+ TT(b, c, d, e, f, g, h, a, w15, C[15]);
+
+ out[0] += a;
+ out[1] += b;
+ out[2] += c;
+ out[3] += d;
+ out[4] += e;
+ out[5] += f;
+ out[6] += g;
+ out[7] += h;
+
+ __asm__ __volatile__ ("test_asm_xmm %0, %%rax" : : "Yr" (out[7]) : "rax");
+ __asm__ __volatile__ ("test_asm_Brr %0, %%rax" : : "Brr" (w14) : "rbx");
+ __asm__ __volatile__ ("test_asm_rBr %0, %%rax" : : "rBr" (w13) : "rbx");
+ __asm__ __volatile__ ("test_asm_r %0, %%rax" : : "r" (w15) : "rbx");
+ __asm__ __volatile__ ("test_asm_m %0, %%rax" : : "m" (out[0]) : "rbx");
+ __asm__ __volatile__ ("test_asm_mem %0, %%rax" : : "memory" (out[1]) :
"rbx");
+}
+
+/* { dg-final { scan-assembler-not "knot" } } */
+/* { dg-final { scan-assembler-not "kxor" } } */
+/* { dg-final { scan-assembler-not "kor" } } */
+/* { dg-final { scan-assembler-not "kandn" } } */
+/* { dg-final { scan-assembler-times "test_asm_xmm %xmm5, %rax" 1 } } */
+/* { dg-final { scan-assembler-times "test_asm_Brr %r15d, %rax" 1 } } */
+/* { dg-final { scan-assembler-times "test_asm_rBr %r14d, %rax" 1 } } */
+/* { dg-final { scan-assembler-times "test_asm_r %r13d, %rax" 1 } } */
+/* { dg-final { scan-assembler-not "test_asm_rBr %r31d, %rax" } } */
+/* { dg-final { scan-assembler-not "test_asm_r %r30d, %rax" } } */
+/* { dg-final { scan-assembler-not "test_asm_m \\(%r29d\\), %rax" } } */
+/* { dg-final { scan-assembler-not "test_asm_mem \\(%r28d\\), %rax" } } */
--
2.31.1