For MMX 16-bit, 32-bit and 64-bit constant vector loads from constant
vector pool:

(insn 6 2 7 2 (set (reg:V1SI 5 di)
        (mem/u/c:V1SI (symbol_ref/u:DI ("*.LC0") [flags 0x2]) [0  S4 A32])) "pr1
21062-2.c":10:3 2036 {*movv1si_internal}
     (expr_list:REG_EQUAL (const_vector:V1SI [
                (const_int -1 [0xffffffffffffffff])
            ])
        (nil)))

we can convert it to

(insn 12 2 7 2 (set (reg:SI 5 di)
        (const_int -1 [0xffffffffffffffff])) "pr121062-2.c":10:3 100 {*movsi_int
ernal}
     (nil))

Co-Developed-by: H.J. Lu <hjl.to...@gmail.com>

gcc/

PR target/121062
* config/i386/i386.cc (ix86_convert_const_vector_to_integer):
Handle E_V1SImode and E_V1DImode.
* config/i386/mmx.md (V_16_32_64): Add V1SI, V2BF and V1DI.
(mmxinsnmode): Add V1DI and V1SI.
Add V_16_32_64 splitter for constant vector loads from constant
vector pool.
(V_16_32_64:*mov<mode>_imm): Replace lowpart_subreg with
adjust_address.

gcc/testsuite/

PR target/121062
* gcc.target/i386/pr121062-1.c: New test.
* gcc.target/i386/pr121062-2.c: Likewise.
* gcc.target/i386/pr121062-3a.c: Likewise.
* gcc.target/i386/pr121062-3b.c: Likewise.
* gcc.target/i386/pr121062-3c.c: Likewise.
* gcc.target/i386/pr121062-4.c: Likewise.
* gcc.target/i386/pr121062-5.c: Likewise.
* gcc.target/i386/pr121062-6.c: Likewise.
* gcc.target/i386/pr121062-7.c: Likewise.

OK for master?

Thanks.


-- 
H.J.
From 835ba39742c73863d4f4b8de85d153fc32fae736 Mon Sep 17 00:00:00 2001
From: Uros Bizjak <ubiz...@gmail.com>
Date: Tue, 15 Jul 2025 05:05:10 +0800
Subject: [PATCH] x86: Convert MMX integer loads from constant vector pool

For MMX 16-bit, 32-bit and 64-bit constant vector loads from constant
vector pool:

(insn 6 2 7 2 (set (reg:V1SI 5 di)
        (mem/u/c:V1SI (symbol_ref/u:DI ("*.LC0") [flags 0x2]) [0  S4 A32])) "pr121062-2.c":10:3 2036 {*movv1si_internal}
     (expr_list:REG_EQUAL (const_vector:V1SI [
                (const_int -1 [0xffffffffffffffff])
            ])
        (nil)))

we can convert it to

(insn 12 2 7 2 (set (reg:SI 5 di)
        (const_int -1 [0xffffffffffffffff])) "pr121062-2.c":10:3 100 {*movsi_internal}
     (nil))

Co-Developed-by: H.J. Lu <hjl.to...@gmail.com>

gcc/

	PR target/121062
	* config/i386/i386.cc (ix86_convert_const_vector_to_integer):
	Handle E_V1SImode and E_V1DImode.
	* config/i386/mmx.md (V_16_32_64): Add V1SI, V2BF and V1DI.
	(mmxinsnmode): Add V1DI and V1SI.
	Add V_16_32_64 splitter for constant vector loads from constant
	vector pool.
	(V_16_32_64:*mov<mode>_imm): Replace lowpart_subreg with
	adjust_address.

gcc/testsuite/

	PR target/121062
	* gcc.target/i386/pr121062-1.c: New test.
	* gcc.target/i386/pr121062-2.c: Likewise.
	* gcc.target/i386/pr121062-3a.c: Likewise.
	* gcc.target/i386/pr121062-3b.c: Likewise.
	* gcc.target/i386/pr121062-3c.c: Likewise.
	* gcc.target/i386/pr121062-4.c: Likewise.
	* gcc.target/i386/pr121062-5.c: Likewise.
	* gcc.target/i386/pr121062-6.c: Likewise.
	* gcc.target/i386/pr121062-7.c: Likewise.
---
 gcc/config/i386/i386.cc                     |  4 +++
 gcc/config/i386/mmx.md                      | 34 +++++++++++++++++----
 gcc/testsuite/gcc.target/i386/pr121062-1.c  | 34 +++++++++++++++++++++
 gcc/testsuite/gcc.target/i386/pr121062-2.c  | 14 +++++++++
 gcc/testsuite/gcc.target/i386/pr121062-3a.c | 23 ++++++++++++++
 gcc/testsuite/gcc.target/i386/pr121062-3b.c |  6 ++++
 gcc/testsuite/gcc.target/i386/pr121062-3c.c |  6 ++++
 gcc/testsuite/gcc.target/i386/pr121062-4.c  | 14 +++++++++
 gcc/testsuite/gcc.target/i386/pr121062-5.c  | 13 ++++++++
 gcc/testsuite/gcc.target/i386/pr121062-6.c  | 13 ++++++++
 gcc/testsuite/gcc.target/i386/pr121062-7.c  | 13 ++++++++
 11 files changed, 168 insertions(+), 6 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/i386/pr121062-1.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pr121062-2.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pr121062-3a.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pr121062-3b.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pr121062-3c.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pr121062-4.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pr121062-5.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pr121062-6.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pr121062-7.c

diff --git a/gcc/config/i386/i386.cc b/gcc/config/i386/i386.cc
index 313522b88e3..37db8a1d118 100644
--- a/gcc/config/i386/i386.cc
+++ b/gcc/config/i386/i386.cc
@@ -16704,6 +16704,10 @@ ix86_convert_const_vector_to_integer (rtx op, machine_mode mode)
 	  val = wi::insert (val, wv, innermode_bits * i, innermode_bits);
 	}
       break;
+    case E_V1SImode:
+    case E_V1DImode:
+      op = CONST_VECTOR_ELT (op, 0);
+      return INTVAL (op);
     case E_V2HFmode:
     case E_V2BFmode:
     case E_V4HFmode:
diff --git a/gcc/config/i386/mmx.md b/gcc/config/i386/mmx.md
index 29a8cb599a7..22d420ddf69 100644
--- a/gcc/config/i386/mmx.md
+++ b/gcc/config/i386/mmx.md
@@ -81,12 +81,13 @@ (define_mode_iterator VI_16_32 [V4QI V2QI V2HI])
 ;; 4-byte and 2-byte QImode vector modes
 (define_mode_iterator VI1_16_32 [V4QI V2QI])
 
-;; All 2-byte, 4-byte and 8-byte vector modes with more than 1 element
+;; All 2-byte, 4-byte and 8-byte vector modes.
 (define_mode_iterator V_16_32_64
-   [V2QI V4QI V2HI V2HF
+   [V2QI V4QI V2HI V1SI V2HF V2BF
     (V8QI "TARGET_64BIT") (V4HI "TARGET_64BIT")
     (V4HF "TARGET_64BIT") (V4BF "TARGET_64BIT")
-    (V2SI "TARGET_64BIT") (V2SF "TARGET_64BIT")])
+    (V2SI "TARGET_64BIT") (V2SF "TARGET_64BIT")
+    (V1DI "TARGET_64BIT")])
 
 ;; V2S* modes
 (define_mode_iterator V2FI [V2SF V2SI])
@@ -107,6 +108,7 @@ (define_mode_attr mmxinsnmode
   [(V8QI "DI") (V4QI "SI") (V2QI "HI")
    (V4HI "DI") (V2HI "SI")
    (V2SI "DI")
+   (V1DI "DI") (V1SI "SI")
    (V4HF "DI") (V2HF "SI")
    (V4BF "DI") (V2BF "SI")
    (V2SF "DI")])
@@ -407,6 +409,25 @@ (define_insn "*mov<mode>_internal"
 	   ]
 	   (symbol_ref "true")))])
 
+(define_split
+  [(set (match_operand:V_16_32_64 0 "general_reg_operand")
+	(match_operand:V_16_32_64 1 "memory_operand"))]
+  "reload_completed
+   && SYMBOL_REF_P (XEXP (operands[1], 0))
+   && CONSTANT_POOL_ADDRESS_P (XEXP (operands[1], 0))"
+  [(set (match_dup 0) (match_dup 1))]
+{
+  rtx op1 = avoid_constant_pool_reference (operands[1]);
+
+  if (!CONST_VECTOR_P (op1))
+    FAIL;
+
+  HOST_WIDE_INT val = ix86_convert_const_vector_to_integer (op1, <MODE>mode);
+
+  operands[0] = lowpart_subreg (<mmxinsnmode>mode, operands[0], <MODE>mode);
+  operands[1] = GEN_INT (val);
+})
+
 ;; 16-bit, 32-bit and 64-bit constant vector stores.  After reload,
 ;; convert them to immediate integer stores.
 (define_insn_and_split "*mov<mode>_imm"
@@ -417,10 +438,11 @@ (define_insn_and_split "*mov<mode>_imm"
   "&& reload_completed"
   [(set (match_dup 0) (match_dup 1))]
 {
-  HOST_WIDE_INT val = ix86_convert_const_vector_to_integer (operands[1],
-							    <MODE>mode);
+  rtx op1 = operands[1];
+  HOST_WIDE_INT val = ix86_convert_const_vector_to_integer (op1, <MODE>mode);
+
+  operands[0] = adjust_address (operands[0], <mmxinsnmode>mode, 0);
   operands[1] = GEN_INT (val);
-  operands[0] = lowpart_subreg (<mmxinsnmode>mode, operands[0], <MODE>mode);
 })
 
 ;; For TARGET_64BIT we always round up to 8 bytes.
diff --git a/gcc/testsuite/gcc.target/i386/pr121062-1.c b/gcc/testsuite/gcc.target/i386/pr121062-1.c
new file mode 100644
index 00000000000..799f8562c9f
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr121062-1.c
@@ -0,0 +1,34 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -march=x86-64-v3" } */
+
+extern union {
+  int i;
+  float f;
+} int_as_float_u;
+
+extern int render_result_from_bake_w;
+extern int render_result_from_bake_h_seed_pass;
+extern float *render_result_from_bake_h_primitive;
+extern float *render_result_from_bake_h_seed;
+
+float
+int_as_float(int i)
+{
+  int_as_float_u.i = i;
+  return int_as_float_u.f;
+}
+
+void
+render_result_from_bake_h(int tx)
+{
+  while (render_result_from_bake_w) {
+    for (; tx < render_result_from_bake_w; tx++)
+      render_result_from_bake_h_primitive[1] =
+          render_result_from_bake_h_primitive[2] = int_as_float(-1);
+    if (render_result_from_bake_h_seed_pass) {
+      *render_result_from_bake_h_seed = 0;
+    }
+  }
+}
+
+/* { dg-final { scan-assembler-times "movq\[ \\t\]+\\\$-1, %r\[a-z0-9\]+" 2 { target { ! ia32 } } } } */
diff --git a/gcc/testsuite/gcc.target/i386/pr121062-2.c b/gcc/testsuite/gcc.target/i386/pr121062-2.c
new file mode 100644
index 00000000000..723d68a4003
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr121062-2.c
@@ -0,0 +1,14 @@
+/* { dg-do compile } */
+/* { dg-options "-Og -fno-dce -mtune=generic" } */
+
+typedef int __attribute__((__vector_size__ (4))) S;
+extern void bar (S);
+
+void
+foo ()
+{
+  bar ((S){-1});
+}
+
+/* { dg-final { scan-assembler-times "movl\[ \\t\]+\\\$-1, \\(%esp\\)" 1 { target ia32 } } } */
+/* { dg-final { scan-assembler-times "movl\[ \\t\]+\\\$-1, %edi" 1 { target { ! ia32 } } } } */
diff --git a/gcc/testsuite/gcc.target/i386/pr121062-3a.c b/gcc/testsuite/gcc.target/i386/pr121062-3a.c
new file mode 100644
index 00000000000..effd4ff5367
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr121062-3a.c
@@ -0,0 +1,23 @@
+/* { dg-do compile { target fpic } } */
+/* { dg-options "-O2 -march=x86-64 -fpic" } */
+
+typedef struct {
+  struct {
+    unsigned short lo4;
+    unsigned short lo3;
+    unsigned short lo2;
+    unsigned short lo1;
+  } i;
+} BID_BINARY80LDOUBLE;
+extern BID_BINARY80LDOUBLE __bid64_to_binary80_x_out;
+void
+__bid64_to_binary80 (void)
+{
+  __bid64_to_binary80_x_out.i.lo4
+    = __bid64_to_binary80_x_out.i.lo3
+    = __bid64_to_binary80_x_out.i.lo2
+    = __bid64_to_binary80_x_out.i.lo1 = 65535;
+}
+
+/* { dg-final { scan-assembler-times "movq\[ \\t\]+%xmm\[0-9\]+, " 1 { target ia32 } } } */
+/* { dg-final { scan-assembler-times "movq\[ \\t\]+\\\$-1, \\(%(e|r)\[a-z0-9\]+\\)" 1 { target { ! ia32 } } } } */
diff --git a/gcc/testsuite/gcc.target/i386/pr121062-3b.c b/gcc/testsuite/gcc.target/i386/pr121062-3b.c
new file mode 100644
index 00000000000..eb89b5da091
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr121062-3b.c
@@ -0,0 +1,6 @@
+/* { dg-do compile { target { fpic && lp64 } } } */
+/* { dg-options "-O2 -march=x86-64 -fno-pic -mcmodel=large" } */
+
+#include "pr121062-3a.c"
+
+/* { dg-final { scan-assembler-times "movq\[ \\t\]+\\\$-1, \\(%r\[a-z0-9\]+\\)" 1 } } */
diff --git a/gcc/testsuite/gcc.target/i386/pr121062-3c.c b/gcc/testsuite/gcc.target/i386/pr121062-3c.c
new file mode 100644
index 00000000000..4c07029c4f5
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr121062-3c.c
@@ -0,0 +1,6 @@
+/* { dg-do compile { target { fpic && lp64 } } } */
+/* { dg-options "-O2 -march=x86-64 -fpic -mcmodel=large" } */
+
+#include "pr121062-3a.c"
+
+/* { dg-final { scan-assembler-times "movq\[ \\t\]+\\\$-1, \\(%r\[a-z0-9\]+\\)" 1 } } */
diff --git a/gcc/testsuite/gcc.target/i386/pr121062-4.c b/gcc/testsuite/gcc.target/i386/pr121062-4.c
new file mode 100644
index 00000000000..77a0c2e90bb
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr121062-4.c
@@ -0,0 +1,14 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -march=x86-64" } */
+
+typedef long long int __attribute__((__vector_size__ (8))) S;
+
+void
+foo (S *c)
+{
+  *c = (S){0x12345678badbeefULL};
+}
+
+
+/* { dg-final { scan-assembler-times "movq\[ \\t\]+%xmm\[0-9\]+, " 1 { target ia32 } } } */
+/* { dg-final { scan-assembler-times "movabsq\[ \\t\]+\\\$81985529250168559, %r\[a-z0-9\]+" 1 { target { ! ia32 } } } } */
diff --git a/gcc/testsuite/gcc.target/i386/pr121062-5.c b/gcc/testsuite/gcc.target/i386/pr121062-5.c
new file mode 100644
index 00000000000..22c09a6bfec
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr121062-5.c
@@ -0,0 +1,13 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -march=x86-64" } */
+
+typedef int __attribute__((__vector_size__ (4))) S;
+
+void
+foo (S *c)
+{
+  *c = (S){0x12345678};
+}
+
+
+/* { dg-final { scan-assembler-times "movl\[ \\t\]+\\\$305419896, \\(%(e|r)\[a-z0-9\]+\\)" 1 } } */
diff --git a/gcc/testsuite/gcc.target/i386/pr121062-6.c b/gcc/testsuite/gcc.target/i386/pr121062-6.c
new file mode 100644
index 00000000000..780b496b504
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr121062-6.c
@@ -0,0 +1,13 @@
+/* { dg-do compile } */
+/* { dg-options "-Og -fno-dce -mtune=generic" } */
+
+typedef int __attribute__((__vector_size__ (8))) S;
+
+void
+foo (S *c)
+{
+  *c = (S){0x12345678,0xbadbeefULL};
+}
+
+/* { dg-final { scan-assembler-times "movq\[ \\t\]+%xmm\[0-9\]+, " 1 { target ia32 } } } */
+/* { dg-final { scan-assembler-times "movabsq\[ \\t\]+\\\$841538639400031864, %r\[a-z0-9\]+" 1 { target { ! ia32 } } } } */
diff --git a/gcc/testsuite/gcc.target/i386/pr121062-7.c b/gcc/testsuite/gcc.target/i386/pr121062-7.c
new file mode 100644
index 00000000000..f1834f8e173
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr121062-7.c
@@ -0,0 +1,13 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -march=x86-64" } */
+
+typedef __bf16 __attribute__((__vector_size__ (4))) S;
+
+void
+foo (S *c)
+{
+  *c = (S){-0.1, 2.1};
+}
+
+
+/* { dg-final { scan-assembler-times "movl\[ \\t\]+\\\$1074183629, \\(%(e|r)\[a-z0-9\]+\\)" 1 } } */
-- 
2.50.1

Reply via email to