https://gcc.gnu.org/bugzilla/show_bug.cgi?id=122189

            Bug ID: 122189
           Summary: [MVE] carry-in incorrectly taken into account by vadcq
           Product: gcc
           Version: 13.0
            Status: UNCONFIRMED
          Severity: normal
          Priority: P3
         Component: target
          Assignee: unassigned at gcc dot gnu.org
          Reporter: clyon at gcc dot gnu.org
  Target Milestone: ---
            Target: arm

#include <arm_mve.h>
#include <inttypes.h>
#include <stdio.h>

__attribute((noinline)) void print_uint32x4_t(const char *name,
                                              uint32x4_t val) {
  printf("%s: %u, %u, %u, %u\n", name, vgetq_lane_u32(val, 0),
         vgetq_lane_u32(val, 1), vgetq_lane_u32(val, 2),
         vgetq_lane_u32(val, 3));
}

void __attribute__ ((noinline)) test_2(void) {
  uint32x4_t v12 = vdupq_n_u32(1);
  unsigned v17 = 0;
  uint32x4_t v18 = vadcq_u32(v12, v12, &v17);
  v17 = 1;
  uint32x4_t v108 = vadcq_u32(v12, v12, &v17);
  print_uint32x4_t("v108", v108);
}

int main() { test_2(); }

Compiled with -O2 -march=armv8.1-m.main+mve.fp -mfloat-abi=hard
prints
v108: 2, 2, 2, 2
instead of the expected
v108: 3, 2, 2, 2

generates:
test_2:
        @ args = 0, pretend = 0, frame = 8
        @ frame_needed = 0, uses_anonymous_args = 0
        push    {lr}
        sub     sp, sp, #12
        vmov.i32        q3, #0x1  @ v4si
        vmrs    r3, FPSCR_nzcvqc
        bic     r3, r3, #536870912
        vmsr    FPSCR_nzcvqc, r3
        vadc.i32        q0, q3, q3
        vmrs    r3, FPSCR_nzcvqc
        vmrs    r3, FPSCR_nzcvqc
        bic     r3, r3, #536870912
        orr     r3, r3, #536870912
        vmsr    FPSCR_nzcvqc, r3
        vadc.i32        q3, q3, q3
        vmrs    r3, FPSCR_nzcvqc
        ubfx    r3, r3, #29, #1
        str     r3, [sp, #4]
        ldr     r0, .L7
        bl      print_uint32x4_t
        add     sp, sp, #12
        @ sp needed
        ldr     pc, [sp], #4
.L8:
        .align  2
.L7:
        .word   .LC1
        .size   test_2, .-test_2

The problem is that the vadcq pattern does not mention vfpcc as input in the
"add" part. As a result, the compiler sees 2 additions with the same inputs and
concludes both give the same results, thus using the 1st result as input of the
print routine.

In addition, the "set carry" part of the pattern should use a different unspec
code.
  • [Bug target/122189] New: [MVE] ... clyon at gcc dot gnu.org via Gcc-bugs

Reply via email to