On 6/28/23 03:47, Juzhe-Zhong wrote:
This bug blocks the following patches.
GCC doesn't know RVV is using compact mask model.
Consider this following case:
#define N 16
int
main ()
{
int8_t mask[N] = {0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1};
int8_t out[N] = {0};
for (int8_t i = 0; i < N; ++i)
if (mask[i])
out[i] = i;
for (int8_t i = 0; i < N; ++i)
{
if (mask[i])
assert (out[i] == i);
else
assert (out[i] == 0);
}
}
Before this patch, the pre-calculated mask in constant memory pool:
.LC1:
.byte 68 ====> 0b01000100
This is incorrect, such case failed in execution.
After this patch:
.LC1:
.byte 10 ====> 0b1010
So I don't get anything like this in my testing. What are the precise
arguments you're using to build the testcase?
I'm compiling the test use a trunk compiler with
-O3 --param riscv-autovec-preference=fixed-vlmax -march=rv64gcv
I get the attached code both before and after your patch. Clearly I'm
doing something different/wrong. So my request is for the precise
command line you're using and the before/after resulting assembly code.
Jeff
.file "j.c"
.option nopic
.attribute arch,
"rv64i2p1_m2p0_a2p1_f2p2_d2p2_c2p0_v1p0_zicsr2p0_zifencei2p0_zve32f1p0_zve32x1p0_zve64d1p0_zve64f1p0_zve64x1p0_zvl128b1p0_zvl32b1p0_zvl64b1p0"
.attribute unaligned_access, 0
.attribute stack_align, 16
.text
.section .rodata.str1.8,"aMS",@progbits,1
.align 3
.LC1:
.string "j.c"
.align 3
.LC2:
.string "out[i] == i"
.align 3
.LC3:
.string "out[i] == 0"
.section .text.startup,"ax",@progbits
.align 1
.globl main
.type main, @function
main:
.LFB0:
.cfi_startproc
lui a5,%hi(.LANCHOR0)
addi a5,a5,%lo(.LANCHOR0)
ld a4,0(a5)
ld a5,8(a5)
addi sp,sp,-48
.cfi_def_cfa_offset 48
vsetivli zero,16,e8,m1,ta,ma
sd zero,16(sp)
sd a4,0(sp)
sd a5,8(sp)
sd ra,40(sp)
.cfi_offset 1, -8
addi a5,sp,16
sd zero,24(sp)
vid.v v1
vl1re8.v v0,0(sp)
vmsne.vi v0,v0,0
vsetvli a4,zero,e8,m1,ta,ma
vse8.v v1,0(a5),v0.t
lbu a5,16(sp)
bne a5,zero,.L2
lbu a4,17(sp)
li a5,1
bne a4,a5,.L3
lbu a5,18(sp)
bne a5,zero,.L2
lbu a4,19(sp)
li a5,3
bne a4,a5,.L3
lbu a5,20(sp)
bne a5,zero,.L2
lbu a4,21(sp)
li a5,5
bne a4,a5,.L3
lbu a5,22(sp)
bne a5,zero,.L2
lbu a4,23(sp)
li a5,7
bne a4,a5,.L3
lbu a5,24(sp)
bne a5,zero,.L2
lbu a4,25(sp)
li a5,9
bne a4,a5,.L3
lbu a5,26(sp)
bne a5,zero,.L2
lbu a4,27(sp)
li a5,11
bne a4,a5,.L3
lbu a5,28(sp)
bne a5,zero,.L2
lbu a4,29(sp)
li a5,13
bne a4,a5,.L3
lbu a5,30(sp)
bne a5,zero,.L2
lbu a4,31(sp)
li a5,15
bne a4,a5,.L3
ld ra,40(sp)
.cfi_remember_state
.cfi_restore 1
li a0,0
addi sp,sp,48
.cfi_def_cfa_offset 0
jr ra
.L2:
.cfi_restore_state
lui a3,%hi(__PRETTY_FUNCTION__.0)
lui a1,%hi(.LC1)
lui a0,%hi(.LC3)
addi a3,a3,%lo(__PRETTY_FUNCTION__.0)
li a2,18
addi a1,a1,%lo(.LC1)
addi a0,a0,%lo(.LC3)
call __assert_fail
.L3:
lui a3,%hi(__PRETTY_FUNCTION__.0)
lui a1,%hi(.LC1)
lui a0,%hi(.LC2)
addi a3,a3,%lo(__PRETTY_FUNCTION__.0)
li a2,16
addi a1,a1,%lo(.LC1)
addi a0,a0,%lo(.LC2)
call __assert_fail
.cfi_endproc
.LFE0:
.size main, .-main
.section .rodata
.align 3
.set .LANCHOR0,. + 0
.LC0:
.string ""
.string "\001"
.string "\001"
.string "\001"
.string "\001"
.string "\001"
.string "\001"
.string "\001"
.ascii "\001"
.section .srodata,"a"
.align 3
.type __PRETTY_FUNCTION__.0, @object
.size __PRETTY_FUNCTION__.0, 5
__PRETTY_FUNCTION__.0:
.string "main"
.ident "GCC: (GNU) 14.0.0 20230628 (experimental)"
.section .note.GNU-stack,"",@progbits