https://gcc.gnu.org/bugzilla/show_bug.cgi?id=110583

            Bug ID: 110583
           Summary: [x86] missed optimizations in vector concatenation
                    patterns
           Product: gcc
           Version: 14.0
            Status: UNCONFIRMED
          Keywords: missed-optimization
          Severity: normal
          Priority: P3
         Component: target
          Assignee: unassigned at gcc dot gnu.org
          Reporter: mkretz at gcc dot gnu.org
  Target Milestone: ---
            Target: x86_64-*-*, i?86-*-*

Test case (https://godbolt.org/z/f8GdzfjbW):
Compile with e.g. `-O2 -std=gnu++20 -march=skylake`

using short4 [[gnu::vector_size(4 * sizeof(short))]] = short;
using short8 [[gnu::vector_size(8 * sizeof(short))]] = short;
using int4 [[gnu::vector_size(4 * sizeof(int))]] = int;
using int8 [[gnu::vector_size(8 * sizeof(int))]] = int;
using float4 [[gnu::vector_size(4 * sizeof(float))]] = float;
using float8 [[gnu::vector_size(8 * sizeof(float))]] = float;
using double1 [[gnu::vector_size(1 * sizeof(double))]] = double;
using double4 [[gnu::vector_size(4 * sizeof(double))]] = double;

// ------------------------------------------------
// vpunpcklqdq xmm0, xmm0, xmm1
// ret
short8 vpunpcklqdq_1(short4 a, short4 b)
{ return short8{a[0], a[1], a[2], a[3], b[0], b[1], b[2], b[3]}; }

short8 vpunpcklqdq_2(short4 a, short4 b)
{ return __builtin_shufflevector(a, b, 0, 1, 2, 3, 4, 5, 6, 7); }

// ------------------------------------------------
// vinserti128 ymm0, ymm0, xmm1, 1
// ret
int8 vinserti128_1(int4 a, int4 b)
{ return int8{a[0], a[1], a[2], a[3], b[0], b[1], b[2], b[3]}; }

int8 vinserti128_2(int4 a, int4 b)
{ return __builtin_shufflevector(a, b, 0, 1, 2, 3, 4, 5, 6, 7); }

// ------------------------------------------------
// vinsertf128 ymm0, ymm0, xmm1, 1
// ret
float8 vinsertf128_good(float4 a, float4 b)
{ return float8{a[0], a[1], a[2], a[3], b[0], b[1], b[2], b[3]}; }

float8 vinsertf128_bad(float4 a, float4 b)
{ return __builtin_shufflevector(a, b, 0, 1, 2, 3, 4, 5, 6, 7); }

// ------------------------------------------------
// vbroadcastsd    ymm1, xmm1
// vblendps        ymm0, ymm0, ymm1, 192
// ret
double4 broadcast_blend_0(double4 a, double b)
{ return double4{a[0], a[1], a[2], b}; }

// ------------------------------------------------
// vbroadcastsd    ymm1, QWORD PTR [rsp+8]
// vblendps        ymm0, ymm0, ymm1, 192
// ret
double4 broadcast_blend_1(double4 a, double1 b)
{ return double4{a[0], a[1], a[2], b[0]}; }

double4 broadcast_blend_2(double4 a, double1 b)
{ return __builtin_shufflevector(a, b, 0, 1, 2, 4); }


These functions should compile to the asm in the comments above them. Only
vinsertf128_good is fine, however I added it because it should be equivalent to
vinsertf128_bad (with the latter having to change ;) ).

Reply via email to