For left shift, there is an optimization TARGET_DOUBLE_WITH_ADD that shl
1 can be optimized to add. As NDD form of add requires src operand to
be register since NDD cannot take 2 memory src, we currently just keep
using NDD form shift instead of add.

The optimization TARGET_SHIFT1 will try to remove constant 1 to use shorter
opcode, but under NDD assembler will automatically use it whether $1 exist
or not, so do not involve NDD with it.

The doubleword insns for left shift calls ix86_expand_ashl, which assume
all shift related pattern has same operand[0] and operand[1]. For these pattern
we will support them in a standalone patch.

gcc/ChangeLog:

        * config/i386/i386.md (*ashl<mode>3_1): Extend with new
        alternatives to support NDD, limit the new alternative to
        generate sal only, and adjust output template for NDD.
        (*ashlsi3_1_zext): Likewise.
        (*ashlhi3_1): Likewise.
        (*ashlqi3_1): Likewise.
        (*ashl<mode>3_cmp): Likewise.
        (*ashlsi3_cmp_zext): Likewise, and use nonimmediate_operand for
        operands[1] to accept memory input for NDD alternative.
        (*ashl<mode>3_cconly): Likewise.
        (*ashl<dwi>3_doubleword_highpart): Adjust codegen for NDD.

gcc/testsuite/ChangeLog:

        * gcc.target/i386/apx-ndd.c: Add tests for sal.
---
 gcc/config/i386/i386.md                 | 172 ++++++++++++++++--------
 gcc/testsuite/gcc.target/i386/apx-ndd.c |  22 +++
 2 files changed, 136 insertions(+), 58 deletions(-)

diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md
index ad4c958a1e8..c67896cf97c 100644
--- a/gcc/config/i386/i386.md
+++ b/gcc/config/i386/i386.md
@@ -14472,10 +14472,19 @@ (define_insn_and_split 
"*ashl<dwi>3_doubleword_highpart"
 {
   split_double_mode (<DWI>mode, &operands[0], 1, &operands[0], &operands[3]);
   int bits = INTVAL (operands[2]) - (<MODE_SIZE> * BITS_PER_UNIT);
-  if (!rtx_equal_p (operands[3], operands[1]))
-    emit_move_insn (operands[3], operands[1]);
-  if (bits > 0)
-    emit_insn (gen_ashl<mode>3 (operands[3], operands[3], GEN_INT (bits)));
+  bool op_equal_p = rtx_equal_p (operands[3], operands[1]);
+  if (bits == 0)
+    {
+      if (!op_equal_p)
+       emit_move_insn (operands[3], operands[1]);
+    }
+  else
+    {
+      if (!op_equal_p && !TARGET_APX_NDD)
+       emit_move_insn (operands[3], operands[1]);
+      rtx op_tmp = TARGET_APX_NDD ? operands[1] : operands[3];
+      emit_insn (gen_ashl<mode>3 (operands[3], op_tmp, GEN_INT (bits)));
+    }
   ix86_expand_clear (operands[0]);
   DONE;
 })
@@ -14782,12 +14791,14 @@ (define_insn "*bmi2_ashl<mode>3_1"
    (set_attr "mode" "<MODE>")])
 
 (define_insn "*ashl<mode>3_1"
-  [(set (match_operand:SWI48 0 "nonimmediate_operand" "=rm,r,r,?k")
-       (ashift:SWI48 (match_operand:SWI48 1 "nonimmediate_operand" "0,l,rm,k")
-                     (match_operand:QI 2 "nonmemory_operand" "c<S>,M,r,<KS>")))
+  [(set (match_operand:SWI48 0 "nonimmediate_operand" "=rm,r,r,?k,r")
+       (ashift:SWI48 (match_operand:SWI48 1 "nonimmediate_operand" 
"0,l,rm,k,rm")
+                     (match_operand:QI 2 "nonmemory_operand" 
"c<S>,M,r,<KS>,c<S>")))
    (clobber (reg:CC FLAGS_REG))]
-  "ix86_binary_operator_ok (ASHIFT, <MODE>mode, operands)"
+  "ix86_binary_operator_ok (ASHIFT, <MODE>mode, operands,
+                           TARGET_APX_NDD)"
 {
+  bool use_ndd = get_attr_isa (insn) == ISA_APX_NDD;
   switch (get_attr_type (insn))
     {
     case TYPE_LEA:
@@ -14802,18 +14813,25 @@ (define_insn "*ashl<mode>3_1"
 
     default:
       if (operands[2] == const1_rtx
-         && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun)))
+         && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun))
+         /* For NDD form instructions related to TARGET_SHIFT1, the $1
+            immediate do not need to be omitted as assembler will map it
+            to use shorter encoding. */
+         && !use_ndd)
        return "sal{<imodesuffix>}\t%0";
       else
-       return "sal{<imodesuffix>}\t{%2, %0|%0, %2}";
+       return use_ndd ? "sal{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}"
+                      : "sal{<imodesuffix>}\t{%2, %0|%0, %2}";
     }
 }
-  [(set_attr "isa" "*,*,bmi2,<kmov_isa>")
+  [(set_attr "isa" "*,*,bmi2,<kmov_isa>,apx_ndd")
    (set (attr "type")
      (cond [(eq_attr "alternative" "1")
              (const_string "lea")
            (eq_attr "alternative" "2")
              (const_string "ishiftx")
+           (eq_attr "alternative" "4")
+             (const_string "ishift")
             (and (and (match_test "TARGET_DOUBLE_WITH_ADD")
                      (match_operand 0 "register_operand"))
                 (match_operand 2 "const1_operand"))
@@ -14855,13 +14873,15 @@ (define_insn "*bmi2_ashlsi3_1_zext"
    (set_attr "mode" "SI")])
 
 (define_insn "*ashlsi3_1_zext"
-  [(set (match_operand:DI 0 "register_operand" "=r,r,r")
+  [(set (match_operand:DI 0 "register_operand" "=r,r,r,r")
        (zero_extend:DI
-         (ashift:SI (match_operand:SI 1 "nonimmediate_operand" "0,l,rm")
-                    (match_operand:QI 2 "nonmemory_operand" "cI,M,r"))))
+         (ashift:SI (match_operand:SI 1 "nonimmediate_operand" "0,l,rm,rm")
+                    (match_operand:QI 2 "nonmemory_operand" "cI,M,r,cI"))))
    (clobber (reg:CC FLAGS_REG))]
-  "TARGET_64BIT && ix86_binary_operator_ok (ASHIFT, SImode, operands)"
+  "TARGET_64BIT && ix86_binary_operator_ok (ASHIFT, SImode, operands,
+                                           TARGET_APX_NDD)"
 {
+  bool use_ndd = get_attr_isa (insn) == ISA_APX_NDD;
   switch (get_attr_type (insn))
     {
     case TYPE_LEA:
@@ -14874,18 +14894,22 @@ (define_insn "*ashlsi3_1_zext"
 
     default:
       if (operands[2] == const1_rtx
-         && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun)))
+         && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun))
+         && !use_ndd)
        return "sal{l}\t%k0";
       else
-       return "sal{l}\t{%2, %k0|%k0, %2}";
+       return use_ndd ? "sal{l}\t{%2, %1, %k0|%k0, %1, %2}"
+                      : "sal{l}\t{%2, %k0|%k0, %2}";
     }
 }
-  [(set_attr "isa" "*,*,bmi2")
+  [(set_attr "isa" "*,*,bmi2,apx_ndd")
    (set (attr "type")
      (cond [(eq_attr "alternative" "1")
              (const_string "lea")
            (eq_attr "alternative" "2")
              (const_string "ishiftx")
+           (eq_attr "alternative" "3")
+             (const_string "ishift")
             (and (match_test "TARGET_DOUBLE_WITH_ADD")
                 (match_operand 2 "const1_operand"))
              (const_string "alu")
@@ -14915,12 +14939,14 @@ (define_split
   "operands[2] = gen_lowpart (SImode, operands[2]);")
 
 (define_insn "*ashlhi3_1"
-  [(set (match_operand:HI 0 "nonimmediate_operand" "=rm,Yp,?k")
-       (ashift:HI (match_operand:HI 1 "nonimmediate_operand" "0,l,k")
-                  (match_operand:QI 2 "nonmemory_operand" "cI,M,Ww")))
+  [(set (match_operand:HI 0 "nonimmediate_operand" "=rm,Yp,?k,r")
+       (ashift:HI (match_operand:HI 1 "nonimmediate_operand" "0,l,k,rm")
+                  (match_operand:QI 2 "nonmemory_operand" "cI,M,Ww,cI")))
    (clobber (reg:CC FLAGS_REG))]
-  "ix86_binary_operator_ok (ASHIFT, HImode, operands)"
+  "ix86_binary_operator_ok (ASHIFT, HImode, operands,
+                           TARGET_APX_NDD)"
 {
+  bool use_ndd = get_attr_isa (insn) == ISA_APX_NDD;
   switch (get_attr_type (insn))
     {
     case TYPE_LEA:
@@ -14933,18 +14959,22 @@ (define_insn "*ashlhi3_1"
 
     default:
       if (operands[2] == const1_rtx
-         && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun)))
+         && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun))
+         && !use_ndd)
        return "sal{w}\t%0";
       else
-       return "sal{w}\t{%2, %0|%0, %2}";
+       return use_ndd ? "sal{w}\t{%2, %1, %0|%0, %1, %2}"
+                      : "sal{w}\t{%2, %0|%0, %2}";
     }
 }
-  [(set_attr "isa" "*,*,avx512f")
+  [(set_attr "isa" "*,*,avx512f,apx_ndd")
    (set (attr "type")
      (cond [(eq_attr "alternative" "1")
              (const_string "lea")
            (eq_attr "alternative" "2")
              (const_string "msklog")
+           (eq_attr "alternative" "3")
+             (const_string "ishift")
             (and (and (match_test "TARGET_DOUBLE_WITH_ADD")
                      (match_operand 0 "register_operand"))
                 (match_operand 2 "const1_operand"))
@@ -14960,15 +14990,17 @@ (define_insn "*ashlhi3_1"
                           (match_test "optimize_function_for_size_p 
(cfun)")))))
        (const_string "0")
        (const_string "*")))
-   (set_attr "mode" "HI,SI,HI")])
+   (set_attr "mode" "HI,SI,HI,HI")])
 
 (define_insn "*ashlqi3_1"
-  [(set (match_operand:QI 0 "nonimmediate_operand" "=qm,r,Yp,?k")
-       (ashift:QI (match_operand:QI 1 "nonimmediate_operand" "0,0,l,k")
-                  (match_operand:QI 2 "nonmemory_operand" "cI,cI,M,Wb")))
+  [(set (match_operand:QI 0 "nonimmediate_operand" "=qm,r,Yp,?k,r")
+       (ashift:QI (match_operand:QI 1 "nonimmediate_operand" "0,0,l,k,rm")
+                  (match_operand:QI 2 "nonmemory_operand" "cI,cI,M,Wb,cI")))
    (clobber (reg:CC FLAGS_REG))]
-  "ix86_binary_operator_ok (ASHIFT, QImode, operands)"
+  "ix86_binary_operator_ok (ASHIFT, QImode, operands,
+                           TARGET_APX_NDD)"
 {
+  bool use_ndd = get_attr_isa (insn) == ISA_APX_NDD;
   switch (get_attr_type (insn))
     {
     case TYPE_LEA:
@@ -14984,7 +15016,8 @@ (define_insn "*ashlqi3_1"
 
     default:
       if (operands[2] == const1_rtx
-         && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun)))
+         && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun))
+         && !use_ndd)
        {
          if (get_attr_mode (insn) == MODE_SI)
            return "sal{l}\t%k0";
@@ -14996,16 +15029,19 @@ (define_insn "*ashlqi3_1"
          if (get_attr_mode (insn) == MODE_SI)
            return "sal{l}\t{%2, %k0|%k0, %2}";
          else
-           return "sal{b}\t{%2, %0|%0, %2}";
+           return use_ndd ? "sal{b}\t{%2, %1, %0|%0, %1, %2}"
+                          : "sal{b}\t{%2, %0|%0, %2}";
        }
     }
 }
-  [(set_attr "isa" "*,*,*,avx512dq")
+  [(set_attr "isa" "*,*,*,avx512dq,apx_ndd")
    (set (attr "type")
      (cond [(eq_attr "alternative" "2")
              (const_string "lea")
            (eq_attr "alternative" "3")
              (const_string "msklog")
+           (eq_attr "alternative" "4")
+             (const_string "ishift")
             (and (and (match_test "TARGET_DOUBLE_WITH_ADD")
                      (match_operand 0 "register_operand"))
                 (match_operand 2 "const1_operand"))
@@ -15021,10 +15057,10 @@ (define_insn "*ashlqi3_1"
                           (match_test "optimize_function_for_size_p 
(cfun)")))))
        (const_string "0")
        (const_string "*")))
-   (set_attr "mode" "QI,SI,SI,QI")
+   (set_attr "mode" "QI,SI,SI,QI,QI")
    ;; Potential partial reg stall on alternative 1.
    (set (attr "preferred_for_speed")
-     (cond [(eq_attr "alternative" "1")
+     (cond [(eq_attr "alternative" "1,4")
              (symbol_ref "!TARGET_PARTIAL_REG_STALL")]
           (symbol_ref "true")))])
 
@@ -15119,10 +15155,10 @@ (define_split
 (define_insn "*ashl<mode>3_cmp"
   [(set (reg FLAGS_REG)
        (compare
-         (ashift:SWI (match_operand:SWI 1 "nonimmediate_operand" "0")
-                     (match_operand:QI 2 "<shift_immediate_operand>" "<S>"))
+         (ashift:SWI (match_operand:SWI 1 "nonimmediate_operand" "0,rm")
+                     (match_operand:QI 2 "<shift_immediate_operand>" 
"<S>,<S>"))
          (const_int 0)))
-   (set (match_operand:SWI 0 "nonimmediate_operand" "=<r>m")
+   (set (match_operand:SWI 0 "nonimmediate_operand" "=<r>m,r")
        (ashift:SWI (match_dup 1) (match_dup 2)))]
   "(optimize_function_for_size_p (cfun)
     || !TARGET_PARTIAL_FLAG_REG_STALL
@@ -15130,8 +15166,10 @@ (define_insn "*ashl<mode>3_cmp"
        && (TARGET_SHIFT1
            || (TARGET_DOUBLE_WITH_ADD && REG_P (operands[0])))))
    && ix86_match_ccmode (insn, CCGOCmode)
-   && ix86_binary_operator_ok (ASHIFT, <MODE>mode, operands)"
+   && ix86_binary_operator_ok (ASHIFT, <MODE>mode, operands,
+                              TARGET_APX_NDD)"
 {
+  bool use_ndd = get_attr_isa (insn) == ISA_APX_NDD;
   switch (get_attr_type (insn))
     {
     case TYPE_ALU:
@@ -15140,14 +15178,19 @@ (define_insn "*ashl<mode>3_cmp"
 
     default:
       if (operands[2] == const1_rtx
-         && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun)))
+         && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun))
+         && !use_ndd)
        return "sal{<imodesuffix>}\t%0";
       else
-       return "sal{<imodesuffix>}\t{%2, %0|%0, %2}";
+       return use_ndd ? "sal{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}"
+                      : "sal{<imodesuffix>}\t{%2, %0|%0, %2}";
     }
 }
-  [(set (attr "type")
-     (cond [(and (and (match_test "TARGET_DOUBLE_WITH_ADD")
+  [(set_attr "isa" "*,apx_ndd")
+   (set (attr "type")
+     (cond [(eq_attr "alternative" "1")
+             (const_string "ishift")
+           (and (and (match_test "TARGET_DOUBLE_WITH_ADD")
                      (match_operand 0 "register_operand"))
                 (match_operand 2 "const1_operand"))
              (const_string "alu")
@@ -15167,10 +15210,10 @@ (define_insn "*ashl<mode>3_cmp"
 (define_insn "*ashlsi3_cmp_zext"
   [(set (reg FLAGS_REG)
        (compare
-         (ashift:SI (match_operand:SI 1 "register_operand" "0")
+         (ashift:SI (match_operand:SI 1 "nonimmediate_operand" "0,rm")
                     (match_operand:QI 2 "const_1_to_31_operand"))
          (const_int 0)))
-   (set (match_operand:DI 0 "register_operand" "=r")
+   (set (match_operand:DI 0 "register_operand" "=r,r")
        (zero_extend:DI (ashift:SI (match_dup 1) (match_dup 2))))]
   "TARGET_64BIT
    && (optimize_function_for_size_p (cfun)
@@ -15179,8 +15222,10 @@ (define_insn "*ashlsi3_cmp_zext"
           && (TARGET_SHIFT1
               || TARGET_DOUBLE_WITH_ADD)))
    && ix86_match_ccmode (insn, CCGOCmode)
-   && ix86_binary_operator_ok (ASHIFT, SImode, operands)"
+   && ix86_binary_operator_ok (ASHIFT, SImode, operands,
+                              TARGET_APX_NDD)"
 {
+  bool use_ndd = get_attr_isa (insn) == ISA_APX_NDD;
   switch (get_attr_type (insn))
     {
     case TYPE_ALU:
@@ -15189,14 +15234,19 @@ (define_insn "*ashlsi3_cmp_zext"
 
     default:
       if (operands[2] == const1_rtx
-         && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun)))
+         && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun))
+         && !use_ndd)
        return "sal{l}\t%k0";
       else
-       return "sal{l}\t{%2, %k0|%k0, %2}";
+       return use_ndd ? "sal{l}\t{%2, %1, %k0|%k0, %1, %2}"
+                      : "sal{l}\t{%2, %k0|%k0, %2}";
     }
 }
-  [(set (attr "type")
-     (cond [(and (match_test "TARGET_DOUBLE_WITH_ADD")
+  [(set_attr "isa" "*,apx_ndd")
+   (set (attr "type")
+     (cond [(eq_attr "alternative" "1")
+             (const_string "ishift")
+           (and (match_test "TARGET_DOUBLE_WITH_ADD")
                 (match_operand 2 "const1_operand"))
              (const_string "alu")
           ]
@@ -15215,10 +15265,10 @@ (define_insn "*ashlsi3_cmp_zext"
 (define_insn "*ashl<mode>3_cconly"
   [(set (reg FLAGS_REG)
        (compare
-         (ashift:SWI (match_operand:SWI 1 "register_operand" "0")
-                     (match_operand:QI 2 "<shift_immediate_operand>" "<S>"))
+         (ashift:SWI (match_operand:SWI 1 "nonimmediate_operand" "0,rm")
+                     (match_operand:QI 2 "<shift_immediate_operand>" 
"<S>,<S>"))
          (const_int 0)))
-   (clobber (match_scratch:SWI 0 "=<r>"))]
+   (clobber (match_scratch:SWI 0 "=<r>,r"))]
   "(optimize_function_for_size_p (cfun)
     || !TARGET_PARTIAL_FLAG_REG_STALL
     || (operands[2] == const1_rtx
@@ -15226,22 +15276,28 @@ (define_insn "*ashl<mode>3_cconly"
            || TARGET_DOUBLE_WITH_ADD)))
    && ix86_match_ccmode (insn, CCGOCmode)"
 {
+  bool use_ndd = get_attr_isa (insn) == ISA_APX_NDD;
   switch (get_attr_type (insn))
     {
     case TYPE_ALU:
       gcc_assert (operands[2] == const1_rtx);
       return "add{<imodesuffix>}\t%0, %0";
 
-    default:
+  default:
       if (operands[2] == const1_rtx
-         && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun)))
+         && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun))
+         && !use_ndd)
        return "sal{<imodesuffix>}\t%0";
       else
-       return "sal{<imodesuffix>}\t{%2, %0|%0, %2}";
+       return use_ndd ? "sal{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}"
+                      : "sal{<imodesuffix>}\t{%2, %0|%0, %2}";
     }
 }
-  [(set (attr "type")
-     (cond [(and (and (match_test "TARGET_DOUBLE_WITH_ADD")
+  [(set_attr "isa" "*,apx_ndd")
+   (set (attr "type")
+     (cond [(eq_attr "alternative" "1")
+             (const_string "ishift")
+           (and (and (match_test "TARGET_DOUBLE_WITH_ADD")
                      (match_operand 0 "register_operand"))
                 (match_operand 2 "const1_operand"))
              (const_string "alu")
diff --git a/gcc/testsuite/gcc.target/i386/apx-ndd.c 
b/gcc/testsuite/gcc.target/i386/apx-ndd.c
index d97648c876d..9951fb00a4c 100644
--- a/gcc/testsuite/gcc.target/i386/apx-ndd.c
+++ b/gcc/testsuite/gcc.target/i386/apx-ndd.c
@@ -29,6 +29,16 @@ foo2_##OP_NAME##_##TYPE (TYPE *a, TYPE b) \
   return c;                              \
 }
 
+#define FOO3(TYPE, OP_NAME, OP, IMM)  \
+TYPE                                 \
+__attribute__ ((noipa))                      \
+foo3_##OP_NAME##_##TYPE (TYPE a)      \
+{                                    \
+  TYPE b = a OP IMM;                 \
+  return b;                          \
+}                      
+
+
 #define F(TYPE, OP_NAME, OP)   \
 TYPE                            \
 __attribute__ ((noipa))         \
@@ -112,6 +122,16 @@ FOO (int, xor, ^)
 FOO1 (int, xor, ^)
 FOO (long, xor, ^)
 FOO1 (long, xor, ^)
+
+FOO (char, shl, <<)
+FOO3 (char, shl, <<, 7)
+FOO (short, shl, <<)
+FOO3 (short, shl, <<, 7)
+FOO (int, shl, <<)
+FOO3 (int, shl, <<, 7)
+FOO (long, shl, <<)
+FOO3 (long, shl, <<, 7)
+
 /* { dg-final { scan-assembler-times "add(?:b|l|w|q)\[^\n\r]*1, \\(%rdi\\), 
%(?:|r|e)a(?:x|l)" 4 } } */
 /* { dg-final { scan-assembler-times 
"lea(?:l|q)\[^\n\r]\\(%r(?:d|s)i,%r(?:d|s)i\\), %(?:|r|e)ax" 4 } } */
 /* { dg-final { scan-assembler-times "add(?:b|l|w|q)\[^\n\r]%(?:|r|e)si(?:|l), 
\\(%rdi\\), %(?:|r|e)a(?:x|l)" 4 } } */
@@ -134,3 +154,5 @@ FOO1 (long, xor, ^)
 /* { dg-final { scan-assembler-times "xor(?:l|w|q)\[^\n\r]*1, \\(%rdi\\), 
%(?:|r|e)ax" 3 } } */
 /* { dg-final { scan-assembler-times "xor(?:l|w|q)\[^\n\r]%(?:|r|e)di, 
%(?:|r|e)si, %(?:|r|e)ax" 2 } } */
 /* { dg-final { scan-assembler-times "xor(?:l|w|q)\[^\n\r]%(?:|r|e)si, 
%(?:|r|e)di, %(?:|r|e)ax" 2 } } */
+/* { dg-final { scan-assembler-times "sal(?:b|l|w|q)\[^\n\r]*1, \\(%rdi\\), 
%(?:|r|e)a(?:x|l)" 4 } } */
+/* { dg-final { scan-assembler-times "sal(?:l|w|q)\[^\n\r]*7, %(?:|r|e)di, 
%(?:|r|e)ax" 4 } } */
-- 
2.31.1

Reply via email to