From: Ju-Zhe Zhong <juzhe.zh...@rivai.ai> This patch is to fix RVV mask modes size. Since mask mode size are adjust as a whole RVV register size LMUL = 1 which not only make each mask type for example vbool32_t tied to vint8m1_t but also increase memory consuming.
I notice this issue during development of VSETVL PASS. Since it is not part of VSETVL support, I seperate it into a single fix patch now. gcc/ChangeLog: * config/riscv/riscv-modes.def (ADJUST_BYTESIZE): Reduce RVV mask mode size. * config/riscv/riscv.cc (riscv_v_adjust_bytesize): New function. (riscv_modes_tieable_p): Don't tie mask modes which will create issue. * config/riscv/riscv.h (riscv_v_adjust_bytesize): New function. --- gcc/config/riscv/riscv-modes.def | 14 ++++---- gcc/config/riscv/riscv.cc | 61 ++++++++++++++++++++++++++++++++ gcc/config/riscv/riscv.h | 1 + 3 files changed, 69 insertions(+), 7 deletions(-) diff --git a/gcc/config/riscv/riscv-modes.def b/gcc/config/riscv/riscv-modes.def index 556b5c55253..339b41b32eb 100644 --- a/gcc/config/riscv/riscv-modes.def +++ b/gcc/config/riscv/riscv-modes.def @@ -64,13 +64,13 @@ ADJUST_ALIGNMENT (VNx16BI, 1); ADJUST_ALIGNMENT (VNx32BI, 1); ADJUST_ALIGNMENT (VNx64BI, 1); -ADJUST_BYTESIZE (VNx1BI, riscv_vector_chunks * riscv_bytes_per_vector_chunk); -ADJUST_BYTESIZE (VNx2BI, riscv_vector_chunks * riscv_bytes_per_vector_chunk); -ADJUST_BYTESIZE (VNx4BI, riscv_vector_chunks * riscv_bytes_per_vector_chunk); -ADJUST_BYTESIZE (VNx8BI, riscv_vector_chunks * riscv_bytes_per_vector_chunk); -ADJUST_BYTESIZE (VNx16BI, riscv_vector_chunks * riscv_bytes_per_vector_chunk); -ADJUST_BYTESIZE (VNx32BI, riscv_vector_chunks * riscv_bytes_per_vector_chunk); -ADJUST_BYTESIZE (VNx64BI, riscv_v_adjust_nunits (VNx64BImode, 8)); +ADJUST_BYTESIZE (VNx1BI, riscv_v_adjust_bytesize (VNx1BImode, 1)); +ADJUST_BYTESIZE (VNx2BI, riscv_v_adjust_bytesize (VNx2BImode, 1)); +ADJUST_BYTESIZE (VNx4BI, riscv_v_adjust_bytesize (VNx4BImode, 1)); +ADJUST_BYTESIZE (VNx8BI, riscv_v_adjust_bytesize (VNx8BImode, 1)); +ADJUST_BYTESIZE (VNx16BI, riscv_v_adjust_bytesize (VNx16BImode, 2)); +ADJUST_BYTESIZE (VNx32BI, riscv_v_adjust_bytesize (VNx32BImode, 4)); +ADJUST_BYTESIZE (VNx64BI, riscv_v_adjust_bytesize (VNx64BImode, 8)); /* | Mode | MIN_VLEN=32 | MIN_VLEN=32 | MIN_VLEN=64 | MIN_VLEN=64 | diff --git a/gcc/config/riscv/riscv.cc b/gcc/config/riscv/riscv.cc index 1198a08b13e..2d380aa42cb 100644 --- a/gcc/config/riscv/riscv.cc +++ b/gcc/config/riscv/riscv.cc @@ -979,6 +979,46 @@ riscv_v_adjust_nunits (machine_mode mode, int scale) return scale; } +/* Call from ADJUST_BYTESIZE in riscv-modes.def. Return the correct + BYTES for corresponding MODE_VECTOR_BOOL machine_mode. */ + +poly_int64 +riscv_v_adjust_bytesize (machine_mode mode, int scale) +{ + /* According to RVV ISA, each BOOL element occupy 1-bit. + However, GCC assume each BOOL element occupy at least + 1-bytes. ??? TODO: Maybe we can adjust it and support + 1-bit BOOL in the future ???? + + One solution is to adjust all MODE_VECTOR_BOOL with + the same size which is LMUL = 1. However, for VNx1BImode + which only occupy a small fractional bytes of a single + LMUL = 1 size that is wasting memory usage and increasing + memory access traffic. + + Ideally, a RVV mask datatype like 'vbool64_t' for example + which is VNx1BI when TARGET_MIN_VLEN > 32 should be the + BYTESIZE of 1/8 of vint8mf8_t (VNx1QImode) according to RVV + ISA. However, GCC can not support 1-bit bool value, we can + only adjust the BYTESIZE to the smallest size which the + BYTESIZE of vint8mf8_t (VNx1QImode). + + Base on this circumstance, we can model MODE_VECOR_BOOL + as small bytesize as possible so that we could reduce + memory traffic and memory consuming. */ + + /* Only adjust BYTESIZE of RVV mask mode. */ + gcc_assert (GET_MODE_CLASS (mode) == MODE_VECTOR_BOOL); + if (riscv_v_ext_vector_mode_p (mode)) + { + if (known_lt (GET_MODE_SIZE (mode), BYTES_PER_RISCV_VECTOR)) + return GET_MODE_SIZE (mode); + else + return BYTES_PER_RISCV_VECTOR; + } + return scale; +} + /* Return true if X is a valid address for machine mode MODE. If it is, fill in INFO appropriately. STRICT_P is true if REG_OK_STRICT is in effect. */ @@ -5735,6 +5775,27 @@ riscv_hard_regno_mode_ok (unsigned int regno, machine_mode mode) static bool riscv_modes_tieable_p (machine_mode mode1, machine_mode mode2) { + if (riscv_v_ext_vector_mode_p (mode1) && riscv_v_ext_vector_mode_p (mode2)) + { + /* Base on the riscv_v_adjust_bytesize, RVV mask mode is not + accurately modeled. For example, we model VNx1BI as the + BYTESIZE of VNx1QImode even though VNx1BI should be the + 1/8 of VNx1QImode BYTESIZE. We shouldn't allow them to be + tieable each other since it produce incorrect codegen. + + For example: + if (cond == 0) { + vint8mf8_t v = *(vint8mf8_t*)in; + } else { + vbool64_t v = *(vbool64_t*)in; + } + GCC will tie them together which is incorrect since they + are the same BYTESIZE. */ + if (GET_MODE_CLASS (mode1) == MODE_VECTOR_BOOL + || GET_MODE_CLASS (mode2) == MODE_VECTOR_BOOL) + return mode1 == mode2; + return known_eq (GET_MODE_SIZE (mode1), GET_MODE_SIZE (mode2)); + } return (mode1 == mode2 || !(GET_MODE_CLASS (mode1) == MODE_FLOAT && GET_MODE_CLASS (mode2) == MODE_FLOAT)); diff --git a/gcc/config/riscv/riscv.h b/gcc/config/riscv/riscv.h index defb475f948..b9cb6b9859c 100644 --- a/gcc/config/riscv/riscv.h +++ b/gcc/config/riscv/riscv.h @@ -1034,6 +1034,7 @@ extern unsigned riscv_stack_boundary; extern unsigned riscv_bytes_per_vector_chunk; extern poly_uint16 riscv_vector_chunks; extern poly_int64 riscv_v_adjust_nunits (enum machine_mode, int); +extern poly_int64 riscv_v_adjust_bytesize (enum machine_mode, int); /* The number of bits and bytes in a RVV vector. */ #define BITS_PER_RISCV_VECTOR (poly_uint16 (riscv_vector_chunks * riscv_bytes_per_vector_chunk * 8)) #define BYTES_PER_RISCV_VECTOR (poly_uint16 (riscv_vector_chunks * riscv_bytes_per_vector_chunk)) -- 2.36.3