https://gcc.gnu.org/bugzilla/show_bug.cgi?id=125941

            Bug ID: 125941
           Summary: AArch64: Inefficient code generation for non-constant
                    svbool_t initializer
           Product: gcc
           Version: 17.0
            Status: UNCONFIRMED
          Severity: normal
          Priority: P3
         Component: target
          Assignee: unassigned at gcc dot gnu.org
          Reporter: Chris.Bazley at arm dot com
  Target Milestone: ---

The store_constructor function handles variable-length vector types
inefficiently using a fallback path. One source of such types is ongoing work
to enabled predicated SLP vectorisation of basic blocks; another is an existing
test.

The cause is twofold:
1. The number of subparts in a variable-length vector type is not divisible by 
the number of subparts of the element type.
2. convert_optab_handler cannot retrieve an insn code for vec_init that
converts from E_QImode to E_VNx16BImode.

Addressing either of these causes alone is insufficient. If both can be fixed,
then store_constructor should assert that a suitable insn code is available for
any uses of vec_init.

Reproducer:

make check-gcc RUNTESTFLAGS="aarch64-sve-acle.exp=cops_bool.c"

Variable values:

mode = E_VNx16BImode
eltmode = E_QImode
icode = CODE_FOR_nothing

Backtrace:

The source code for this test is macro-heavy. It includes the following:

  svbool_t __attribute__ ((noipa)) \
  func_init4 () { \
    svbool_t temp = VECT_CSTN; \
    return temp; \
  } \

and:

#define VECT_CSTN { -1, t (), 0, -1, 0, f (), 0, 0, 0, -1, 0, -1, 0, -1, 0, -1
} /* { dg-warning "overflow in conversion from" "" { target c } }  */
    /* { dg-warning "narrowing conversion of" "" { target c++ } .-1 }  */ 

which compiles to this GIMPLE:

__attribute__((noipa, noinline, noclone, no_icf))
svbool_t func_init4 ()
{
  svbool_t temp;
  int _1;
  <signed-boolean:1> _2;
  int _3;
  <signed-boolean:1> _4;

  <bb 2> [local count: 1073741824]:
  _1 = t ();
  _2 = _1 != 0;
  _3 = f ();
  _4 = _3 != 0;
  temp_8 = {-1, _2, 0, -1, 0, _4, 0, 0, 0, -1, 0, -1, 0, -1, 0, -1};
  return temp_8;

}

which is lowered to very inefficient-looking AArch64 code:

to very inefficient-looking code:

func_init4:
.LFB24:
    .cfi_startproc
    addvl    sp, sp, #-1
    .cfi_escape 0xf,0x8,0x8f,0,0x92,0x2e,0,0x38,0x1e,0x22
    sub    sp, sp, #32
    .cfi_escape 0xf,0xa,0x8f,0,0x92,0x2e,0,0x38,0x1e,0x23,0x20,0x22
    stp    x29, x30, [sp]
    .cfi_escape 0x10,0x1d,0x2,0x8f,0
    .cfi_escape 0x10,0x1e,0x2,0x8f,0x8
    mov    x29, sp
    str    x19, [sp, 16]
    addvl    sp, sp, #-18
    .cfi_escape 0xf,0xb,0x8f,0,0x92,0x2e,0,0x8,0x98,0x1e,0x23,0x20,0x22
    .cfi_escape 0x10,0x13,0xb,0x8f,0,0x92,0x2e,0,0x8,0x90,0x1e,0x23,0x10,0x22
    str    z8, [sp, #2, mul vl]
    str    z9, [sp, #3, mul vl]
    str    z10, [sp, #4, mul vl]
    str    z11, [sp, #5, mul vl]
    str    p15, [sp, #11, mul vl]
    str    p5, [sp, #1, mul vl]
    str    p6, [sp, #2, mul vl]
    str    p7, [sp, #3, mul vl]
    str    p8, [sp, #4, mul vl]
    str    p9, [sp, #5, mul vl]
    str    p10, [sp, #6, mul vl]
    str    p11, [sp, #7, mul vl]
    str    p12, [sp, #8, mul vl]
    str    p13, [sp, #9, mul vl]
    str    p14, [sp, #10, mul vl]
    str    z12, [sp, #6, mul vl]
    str    z13, [sp, #7, mul vl]
    str    z14, [sp, #8, mul vl]
    str    z15, [sp, #9, mul vl]
    str    z16, [sp, #10, mul vl]
    str    z17, [sp, #11, mul vl]
    str    z18, [sp, #12, mul vl]
    str    z19, [sp, #13, mul vl]
    str    z20, [sp, #14, mul vl]
    str    z21, [sp, #15, mul vl]
    str    p4, [sp]
    str    z22, [sp, #16, mul vl]
    str    z23, [sp, #17, mul vl]
    .cfi_escape 0x10,0x48,0x8,0x8f,0,0x92,0x2e,0,0x40,0x1e,0x22
    .cfi_escape 0x10,0x49,0x8,0x8f,0,0x92,0x2e,0,0x48,0x1e,0x22
    .cfi_escape 0x10,0x4a,0x9,0x8f,0,0x92,0x2e,0,0x8,0x20,0x1e,0x22
    .cfi_escape 0x10,0x4b,0x9,0x8f,0,0x92,0x2e,0,0x8,0x28,0x1e,0x22
    .cfi_escape 0x10,0x4c,0x9,0x8f,0,0x92,0x2e,0,0x8,0x30,0x1e,0x22
    .cfi_escape 0x10,0x4d,0x9,0x8f,0,0x92,0x2e,0,0x8,0x38,0x1e,0x22
    .cfi_escape 0x10,0x4e,0x9,0x8f,0,0x92,0x2e,0,0x8,0x40,0x1e,0x22
    .cfi_escape 0x10,0x4f,0x9,0x8f,0,0x92,0x2e,0,0x8,0x48,0x1e,0x22
    bl    t
    mov    w19, w0

    bl    f
    mov    w2, 151

    cntd    x1
    pfalse    p15.b
    mov    w3, 151
    cmp    w19, 0
    mul    x1, x1, x2
    cntd    x2
    mul    x2, x2, x3
    add    x1, x1, 32
    add    x1, sp, x1
    str    p15, [x1]

    add    x2, sp, x2
    mov    w1, 1
    strh    w1, [x2, 32]

    cntd    x1
    rdvl    x2, #20
    mul    x1, x1, x3
    rdvl    x3, #20
    add    x1, x1, 32
    add    x1, sp, x1
    ldr    p15, [x1]
    addpl    x1, x2, #-10
    rdvl    x2, #20
    add    x1, x1, 32
    add    x1, sp, x1
    str    p15, [x1]

    addpl    x1, x2, #-10
    csetm    w2, ne
    add    x1, x1, 32
    cmp    w0, 0
    ldrh    w1, [sp, x1]
    bfi    w1, w2, 1, 1
    addpl    x2, x3, #-10
    add    x2, x2, 32
    add    x2, sp, x2
    strh    w1, [x2]

    addpl    x1, x3, #-10
    add    x1, x1, 32
    mov    w2, 149
    add    x1, sp, x1
    ldr    p15, [x1]
    mov    w3, 149
    cntd    x1
    mul    x1, x1, x2
    add    x1, x1, 32
    add    x1, sp, x1
    str    p15, [x1]

etc..

Reply via email to