From: Kong Lingling <lingling.k...@intel.com>

In inline asm, we do not know if the insn can use EGPR, so disable EGPR
usage by default from mapping the common reg/mem constraint to non-EGPR
constraints. Use a flag mapx-inline-asm-use-gpr32 to enable EGPR usage
for inline asm.

gcc/ChangeLog:

        * config/i386/i386.cc (INCLUDE_STRING): Add include for
        ix86_md_asm_adjust.
        (ix86_md_asm_adjust): When APX EGPR enabled without specifying the
        target option, map reg/mem constraints to non-EGPR constraints.
        * config/i386/i386.opt: Add option mapx-inline-asm-use-gpr32.

gcc/testsuite/ChangeLog:

        * gcc.target/i386/apx-inline-gpr-norex2.c: New test.
---
 gcc/config/i386/i386.cc                       |  44 +++++++
 gcc/config/i386/i386.opt                      |   5 +
 .../gcc.target/i386/apx-inline-gpr-norex2.c   | 107 ++++++++++++++++++
 3 files changed, 156 insertions(+)
 create mode 100644 gcc/testsuite/gcc.target/i386/apx-inline-gpr-norex2.c

diff --git a/gcc/config/i386/i386.cc b/gcc/config/i386/i386.cc
index d26d9ab0d9d..9460ebbfda4 100644
--- a/gcc/config/i386/i386.cc
+++ b/gcc/config/i386/i386.cc
@@ -17,6 +17,7 @@ You should have received a copy of the GNU General Public 
License
 along with GCC; see the file COPYING3.  If not see
 <http://www.gnu.org/licenses/>.  */
 
+#define INCLUDE_STRING
 #define IN_TARGET_CODE 1
 
 #include "config.h"
@@ -23077,6 +23078,49 @@ ix86_md_asm_adjust (vec<rtx> &outputs, vec<rtx> & 
/*inputs*/,
   bool saw_asm_flag = false;
 
   start_sequence ();
+  /* TODO: Here we just mapped the general r/m constraints to non-EGPR
+   constraints, will eventually map all the usable constraints in the future. 
*/
+  if (TARGET_APX_EGPR && !ix86_apx_inline_asm_use_gpr32)
+    {
+      /* Map "r" constraint in inline asm to "h" that disallows r16-r31
+        and replace only r, exclude Br and Yr.  */
+      for (unsigned i = 0; i < constraints.length (); i++)
+       {
+         std::string *s = new std::string (constraints[i]);
+         size_t pos = s->find ('r');
+         while (pos != std::string::npos)
+           {
+             if (pos > 0
+                 && (s->at (pos - 1) == 'Y' || s->at (pos - 1) == 'B'))
+               pos = s->find ('r', pos + 1);
+             else
+               {
+                 s->replace (pos, 1, "h");
+                 constraints[i] = (const char*) s->c_str ();
+                 break;
+               }
+           }
+       }
+      /* Also map "m/memory/Bm" constraint that may use GPR32, replace them 
with
+        "Bt/Bt/BT".  */
+      for (unsigned i = 0; i < constraints.length (); i++)
+       {
+         std::string *s = new std::string (constraints[i]);
+         size_t pos = s->find ("m");
+         size_t pos2 = s->find ("memory");
+         if (pos != std::string::npos)
+           {
+             if (pos > 0 && (s->at (pos - 1) == 'B'))
+                 s->replace (pos - 1, 2, "BT");
+             else if (pos2 != std::string::npos)
+                 s->replace (pos, 6, "Bt");
+             else
+                 s->replace (pos, 1, "Bt");
+             constraints[i] = (const char*) s->c_str ();
+           }
+       }
+     }
+
   for (unsigned i = 0, n = outputs.length (); i < n; ++i)
     {
       const char *con = constraints[i];
diff --git a/gcc/config/i386/i386.opt b/gcc/config/i386/i386.opt
index 1ee4d90186e..5c8d3a207e3 100644
--- a/gcc/config/i386/i386.opt
+++ b/gcc/config/i386/i386.opt
@@ -1335,3 +1335,8 @@ Enum(apx_features) String(ndd) Value(apx_ndd) Set(4)
 
 EnumValue
 Enum(apx_features) String(all) Value(apx_all) Set(1)
+
+mapx-inline-asm-use-gpr32
+Target Var(ix86_apx_inline_asm_use_gpr32) Init(0)
+Enable GPR32 in inline asm when APX_EGPR enabled, do not
+hook reg or mem constraint in inline asm to GPR16.
diff --git a/gcc/testsuite/gcc.target/i386/apx-inline-gpr-norex2.c 
b/gcc/testsuite/gcc.target/i386/apx-inline-gpr-norex2.c
new file mode 100644
index 00000000000..21534450045
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/apx-inline-gpr-norex2.c
@@ -0,0 +1,107 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mapxf -m64 -march=skylake-avx512 -DDTYPE32" } */
+
+typedef unsigned int u32;
+typedef unsigned long long u64;
+
+#ifdef DTYPE32
+typedef u32 DTYPE;
+#define byteswap byteswapu32
+#endif
+
+#define R(x,n) ( (x >> n) | (x << (32 - n)))
+
+#define S0(x) (R(x, 2) ^ R(x,13) ^ R(x,22))
+#define S1(x) (R(x, 6) ^ R(x,11) ^ R(x,25))
+
+#define TT(a,b,c,d,e,f,g,h,x,K)                 \
+{                                                        \
+    tmp1 = h + S1(e) + (g ^ (e & (f ^ g))) + K + x;                \
+    tmp2 = S0(a) + ((a & b) | (c & (a | b)));                           \
+    h  = tmp1 + tmp2;                                    \
+    d += tmp1;                                           \
+}
+
+static inline u32 byteswapu32(u32 x)
+{
+  x = (x & 0x0000FFFF) << 16 | (x & 0xFFFF0000) >> 16;
+  x = (x & 0x00FF00FF) << 8 | (x & 0xFF00FF00) >> 8;  
+  return x;
+}
+
+void foo (DTYPE in[16], DTYPE out[8], const DTYPE C[16])
+{
+    DTYPE tmp1 = 0, tmp2 = 0, a, b, c, d, e, f, g, h;
+    DTYPE w0, w1, w2, w3, w4, w5, w6, w7,
+       w8, w9, w10, w11, w12, w13, w14, w15;
+    w0  = byteswap(in[0]);
+    w1  = byteswap(in[1]);
+    w2  = byteswap(in[2]);
+    w3  = byteswap(in[3]);
+    w4  = byteswap(in[4]);
+    w5  = byteswap(in[5]);
+    w6  = byteswap(in[6]);
+    w7  = byteswap(in[7]);
+    w8  = byteswap(in[8]);
+    w9  = byteswap(in[9]);
+    w10 = byteswap(in[10]);
+    w11 = byteswap(in[11]);
+    w12 = byteswap(in[12]);
+    w13 = byteswap(in[13]);
+    w14 = byteswap(in[14]);
+    w15 = byteswap(in[15]);
+    a = out[0];
+    b = out[1];
+    c = out[2];
+    d = out[3];
+    e = out[4];
+    f = out[5];
+    g = out[6];
+    h = out[7];
+    
+    TT(a, b, c, d, e, f, g, h,  w0, C[0]);
+    TT(h, a, b, c, d, e, f, g,  w1, C[1]);
+    TT(g, h, a, b, c, d, e, f,  w2, C[2]);
+    TT(f, g, h, a, b, c, d, e,  w3, C[3]);
+    TT(e, f, g, h, a, b, c, d,  w4, C[4]);
+    TT(d, e, f, g, h, a, b, c,  w5, C[5]);
+    TT(c, d, e, f, g, h, a, b,  w6, C[6]);
+    TT(b, c, d, e, f, g, h, a,  w7, C[7]);
+    TT(a, b, c, d, e, f, g, h,  w8, C[8]);
+    TT(h, a, b, c, d, e, f, g,  w9, C[9]);
+    TT(g, h, a, b, c, d, e, f, w10, C[10]);
+    TT(f, g, h, a, b, c, d, e, w11, C[11]);
+    TT(e, f, g, h, a, b, c, d, w12, C[12]);
+    TT(d, e, f, g, h, a, b, c, w13, C[13]);
+    TT(c, d, e, f, g, h, a, b, w14, C[14]);
+    TT(b, c, d, e, f, g, h, a, w15, C[15]);
+
+    out[0] += a;
+    out[1] += b;
+    out[2] += c;
+    out[3] += d;
+    out[4] += e;
+    out[5] += f;
+    out[6] += g;
+    out[7] += h;
+
+    __asm__ __volatile__ ("test_asm_xmm %0, %%rax" : : "Yr" (out[7]) : "rax");
+    __asm__ __volatile__ ("test_asm_Brr %0, %%rax" : : "Brr" (w14) : "rbx");
+    __asm__ __volatile__ ("test_asm_rBr %0, %%rax" : : "rBr" (w13) : "rbx");
+    __asm__ __volatile__ ("test_asm_r %0, %%rax" : : "r" (w15) : "rbx");
+    __asm__ __volatile__ ("test_asm_m %0, %%rax" : : "m" (out[0]) : "rbx");
+    __asm__ __volatile__ ("test_asm_mem %0, %%rax" : : "memory" (out[1]) : 
"rbx");
+}
+
+/* { dg-final { scan-assembler-not "knot" } } */
+/* { dg-final { scan-assembler-not "kxor" } } */
+/* { dg-final { scan-assembler-not "kor" } } */
+/* { dg-final { scan-assembler-not "kandn" } } */
+/* { dg-final { scan-assembler-times "test_asm_xmm %xmm5, %rax" 1 } } */
+/* { dg-final { scan-assembler-times "test_asm_Brr %r15d, %rax" 1 } } */
+/* { dg-final { scan-assembler-times "test_asm_rBr %r14d, %rax" 1 } } */
+/* { dg-final { scan-assembler-times "test_asm_r %r13d, %rax" 1 } } */
+/* { dg-final { scan-assembler-not "test_asm_rBr %r31d, %rax" } } */
+/* { dg-final { scan-assembler-not "test_asm_r %r30d, %rax" } } */
+/* { dg-final { scan-assembler-not "test_asm_m \\(%r29d\\), %rax" } } */
+/* { dg-final { scan-assembler-not "test_asm_mem \\(%r28d\\), %rax" } } */
-- 
2.31.1

Reply via email to