This patch adds support for signed and unsigned, HImode, SImode and
DImode highpart multiplications to the nvptx backend.  Without the
middle-end patch that I've just posted, the middle-end is able to
(easily) make use of the narrow four of the six instructions, but
with that patch, all six of these instructions are generated in the
provided test cases.

This patch has been tested on nvptx-none hosted on x86_64-pc-linux-gnu
with a "make" and "make -k check" with no new failures with the
above patch, and just the two failures to find mul.hi.?64 against
current mainline.  I'd considered submitting this patch either without
support for the 64bit variants, or without tests for them, but it
seemed more reasonable to make both enhancements at the same time.

Ok for mainline (once the previous patch has been approved/pushed)?


2020-08-04  Roger Sayle  <ro...@nextmovesoftware.com>

gcc/ChangeLog
        * config/nvptx/nvptx.md (smulhi3_highpart, smulsi3_highpart,
        smuldi4_highpart, umulhi3_highpart, umulsi3_highpart,
        umuldi3_highpart): New instructions.

gcc/testsuite/ChangeLog
        * gcc.target/nvptx/mul-hi.c: New test.
        * gcc.target/nvptx/umul-hi.c: New test.


Thanks in advance,
Roger
--
Roger Sayle
NextMove Software
Cambridge, UK

diff --git a/gcc/config/nvptx/nvptx.md b/gcc/config/nvptx/nvptx.md
index c23edcf..0459549 100644
--- a/gcc/config/nvptx/nvptx.md
+++ b/gcc/config/nvptx/nvptx.md
@@ -568,6 +568,78 @@
   ""
   "%.\\tmul.wide.u32\\t%0, %1, %2;")
 
+(define_insn "smulhi3_highpart"
+  [(set (match_operand:HI 0 "nvptx_register_operand" "=R")
+       (truncate:HI
+        (lshiftrt:SI
+         (mult:SI (sign_extend:SI
+                   (match_operand:HI 1 "nvptx_register_operand" "R"))
+                  (sign_extend:SI
+                   (match_operand:HI 2 "nvptx_register_operand" "R")))
+         (const_int 16))))]
+  ""
+  "%.\\tmul.hi.s16\\t%0, %1, %2;")
+
+(define_insn "smulsi3_highpart"
+  [(set (match_operand:SI 0 "nvptx_register_operand" "=R")
+       (truncate:SI
+        (lshiftrt:DI
+         (mult:DI (sign_extend:DI
+                   (match_operand:SI 1 "nvptx_register_operand" "R"))
+                  (sign_extend:DI
+                   (match_operand:SI 2 "nvptx_register_operand" "R")))
+         (const_int 32))))]
+  ""
+  "%.\\tmul.hi.s32\\t%0, %1, %2;")
+
+(define_insn "smuldi3_highpart"
+  [(set (match_operand:DI 0 "nvptx_register_operand" "=R")
+       (truncate:DI
+        (lshiftrt:TI
+         (mult:TI (sign_extend:TI
+                   (match_operand:DI 1 "nvptx_register_operand" "R"))
+                  (sign_extend:TI
+                   (match_operand:DI 2 "nvptx_register_operand" "R")))
+         (const_int 64))))]
+  ""
+  "%.\\tmul.hi.s64\\t%0, %1, %2;")
+
+(define_insn "umulhi3_highpart"
+  [(set (match_operand:HI 0 "nvptx_register_operand" "=R")
+       (truncate:HI
+        (lshiftrt:SI
+         (mult:SI (zero_extend:SI
+                   (match_operand:HI 1 "nvptx_register_operand" "R"))
+                  (zero_extend:SI
+                   (match_operand:HI 2 "nvptx_register_operand" "R")))
+         (const_int 16))))]
+  ""
+  "%.\\tmul.hi.u16\\t%0, %1, %2;")
+
+(define_insn "umulsi3_highpart"
+  [(set (match_operand:SI 0 "nvptx_register_operand" "=R")
+       (truncate:SI
+        (lshiftrt:DI
+         (mult:DI (zero_extend:DI
+                   (match_operand:SI 1 "nvptx_register_operand" "R"))
+                  (zero_extend:DI
+                   (match_operand:SI 2 "nvptx_register_operand" "R")))
+         (const_int 32))))]
+  ""
+  "%.\\tmul.hi.u32\\t%0, %1, %2;")
+
+(define_insn "umuldi3_highpart"
+  [(set (match_operand:DI 0 "nvptx_register_operand" "=R")
+       (truncate:DI
+        (lshiftrt:TI
+         (mult:TI (zero_extend:TI
+                   (match_operand:DI 1 "nvptx_register_operand" "R"))
+                  (zero_extend:TI
+                   (match_operand:DI 2 "nvptx_register_operand" "R")))
+         (const_int 64))))]
+  ""
+  "%.\\tmul.hi.u64\\t%0, %1, %2;")
+
 ;; Shifts
 
 (define_insn "ashl<mode>3"
diff --git a/gcc/testsuite/gcc.target/nvptx/mul-hi.c 
b/gcc/testsuite/gcc.target/nvptx/mul-hi.c
new file mode 100644
index 0000000..2cc35af
--- /dev/null
+++ b/gcc/testsuite/gcc.target/nvptx/mul-hi.c
@@ -0,0 +1,23 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -Wno-long-long" } */
+
+typedef int __attribute ((mode(TI))) ti_t;
+
+short smulhi3_highpart(short x, short y)
+{
+  return ((int)x * (int)y) >> 16;
+}
+
+int smulsi3_highpart(int x, int y)
+{
+  return ((long)x * (long)y) >> 32;
+}
+
+long smuldi3_highpart(long x, long y)
+{
+  return ((ti_t)x * (ti_t)y) >> 64;
+}
+
+/* { dg-final { scan-assembler-times "mul.hi.s16" 1 } } */
+/* { dg-final { scan-assembler-times "mul.hi.s32" 1 } } */
+/* { dg-final { scan-assembler-times "mul.hi.s64" 1 } } */
diff --git a/gcc/testsuite/gcc.target/nvptx/umul-hi.c 
b/gcc/testsuite/gcc.target/nvptx/umul-hi.c
new file mode 100644
index 0000000..148d1ce
--- /dev/null
+++ b/gcc/testsuite/gcc.target/nvptx/umul-hi.c
@@ -0,0 +1,23 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -Wno-long-long" } */
+
+typedef unsigned int __attribute ((mode(TI))) uti_t;
+
+unsigned short umulhi3_highpart(unsigned short x, unsigned short y)
+{
+  return ((unsigned int)x * (unsigned int)y) >> 16;
+}
+
+unsigned int umulsi3_highpart(unsigned int x, unsigned int y)
+{
+  return ((unsigned long)x * (unsigned long)y) >> 32;
+}
+
+unsigned long umuldi3_highpart(unsigned long x, unsigned long y)
+{
+  return ((uti_t)x * (uti_t)y) >> 64;
+}
+
+/* { dg-final { scan-assembler-times "mul.hi.u16" 1 } } */
+/* { dg-final { scan-assembler-times "mul.hi.u32" 1 } } */
+/* { dg-final { scan-assembler-times "mul.hi.u64" 1 } } */

Reply via email to