On 6/28/23 03:47, Juzhe-Zhong wrote:
This bug blocks the following patches.

GCC doesn't know RVV is using compact mask model.
Consider this following case:

#define N 16

int
main ()
{
   int8_t mask[N] = {0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1};
   int8_t out[N] = {0};
   for (int8_t i = 0; i < N; ++i)
     if (mask[i])
       out[i] = i;
   for (int8_t i = 0; i < N; ++i)
     {
       if (mask[i])
        assert (out[i] == i);
       else
        assert (out[i] == 0);
     }
}

Before this patch, the pre-calculated mask in constant memory pool:
.LC1:
         .byte   68 ====> 0b01000100

This is incorrect, such case failed in execution.

After this patch:
.LC1:
        .byte   10 ====> 0b1010
So I don't get anything like this in my testing. What are the precise arguments you're using to build the testcase?

I'm compiling the test use a trunk compiler with

 -O3 --param riscv-autovec-preference=fixed-vlmax -march=rv64gcv

I get the attached code both before and after your patch. Clearly I'm doing something different/wrong. So my request is for the precise command line you're using and the before/after resulting assembly code.

Jeff
        .file   "j.c"
        .option nopic
        .attribute arch, 
"rv64i2p1_m2p0_a2p1_f2p2_d2p2_c2p0_v1p0_zicsr2p0_zifencei2p0_zve32f1p0_zve32x1p0_zve64d1p0_zve64f1p0_zve64x1p0_zvl128b1p0_zvl32b1p0_zvl64b1p0"
        .attribute unaligned_access, 0
        .attribute stack_align, 16
        .text
        .section        .rodata.str1.8,"aMS",@progbits,1
        .align  3
.LC1:
        .string "j.c"
        .align  3
.LC2:
        .string "out[i] == i"
        .align  3
.LC3:
        .string "out[i] == 0"
        .section        .text.startup,"ax",@progbits
        .align  1
        .globl  main
        .type   main, @function
main:
.LFB0:
        .cfi_startproc
        lui     a5,%hi(.LANCHOR0)
        addi    a5,a5,%lo(.LANCHOR0)
        ld      a4,0(a5)
        ld      a5,8(a5)
        addi    sp,sp,-48
        .cfi_def_cfa_offset 48
        vsetivli        zero,16,e8,m1,ta,ma
        sd      zero,16(sp)
        sd      a4,0(sp)
        sd      a5,8(sp)
        sd      ra,40(sp)
        .cfi_offset 1, -8
        addi    a5,sp,16
        sd      zero,24(sp)
        vid.v   v1
        vl1re8.v        v0,0(sp)
        vmsne.vi        v0,v0,0
        vsetvli a4,zero,e8,m1,ta,ma
        vse8.v  v1,0(a5),v0.t
        lbu     a5,16(sp)
        bne     a5,zero,.L2
        lbu     a4,17(sp)
        li      a5,1
        bne     a4,a5,.L3
        lbu     a5,18(sp)
        bne     a5,zero,.L2
        lbu     a4,19(sp)
        li      a5,3
        bne     a4,a5,.L3
        lbu     a5,20(sp)
        bne     a5,zero,.L2
        lbu     a4,21(sp)
        li      a5,5
        bne     a4,a5,.L3
        lbu     a5,22(sp)
        bne     a5,zero,.L2
        lbu     a4,23(sp)
        li      a5,7
        bne     a4,a5,.L3
        lbu     a5,24(sp)
        bne     a5,zero,.L2
        lbu     a4,25(sp)
        li      a5,9
        bne     a4,a5,.L3
        lbu     a5,26(sp)
        bne     a5,zero,.L2
        lbu     a4,27(sp)
        li      a5,11
        bne     a4,a5,.L3
        lbu     a5,28(sp)
        bne     a5,zero,.L2
        lbu     a4,29(sp)
        li      a5,13
        bne     a4,a5,.L3
        lbu     a5,30(sp)
        bne     a5,zero,.L2
        lbu     a4,31(sp)
        li      a5,15
        bne     a4,a5,.L3
        ld      ra,40(sp)
        .cfi_remember_state
        .cfi_restore 1
        li      a0,0
        addi    sp,sp,48
        .cfi_def_cfa_offset 0
        jr      ra
.L2:
        .cfi_restore_state
        lui     a3,%hi(__PRETTY_FUNCTION__.0)
        lui     a1,%hi(.LC1)
        lui     a0,%hi(.LC3)
        addi    a3,a3,%lo(__PRETTY_FUNCTION__.0)
        li      a2,18
        addi    a1,a1,%lo(.LC1)
        addi    a0,a0,%lo(.LC3)
        call    __assert_fail
.L3:
        lui     a3,%hi(__PRETTY_FUNCTION__.0)
        lui     a1,%hi(.LC1)
        lui     a0,%hi(.LC2)
        addi    a3,a3,%lo(__PRETTY_FUNCTION__.0)
        li      a2,16
        addi    a1,a1,%lo(.LC1)
        addi    a0,a0,%lo(.LC2)
        call    __assert_fail
        .cfi_endproc
.LFE0:
        .size   main, .-main
        .section        .rodata
        .align  3
        .set    .LANCHOR0,. + 0
.LC0:
        .string ""
        .string "\001"
        .string "\001"
        .string "\001"
        .string "\001"
        .string "\001"
        .string "\001"
        .string "\001"
        .ascii  "\001"
        .section        .srodata,"a"
        .align  3
        .type   __PRETTY_FUNCTION__.0, @object
        .size   __PRETTY_FUNCTION__.0, 5
__PRETTY_FUNCTION__.0:
        .string "main"
        .ident  "GCC: (GNU) 14.0.0 20230628 (experimental)"
        .section        .note.GNU-stack,"",@progbits

Reply via email to