https://gcc.gnu.org/bugzilla/show_bug.cgi?id=115725
--- Comment #8 from JuzheZhong <juzhe.zhong at rivai dot ai> --- I think we should include operands[0] as the "merge/maskoff" operand which we need to depend on and use TU for vec_set pattern Take ARM for example: (define_expand "vec_set<mode>" [(match_operand:VALL_F16 0 "register_operand") (match_operand:<VEL> 1 "aarch64_simd_nonimmediate_operand") (match_operand:SI 2 "immediate_operand")] "TARGET_SIMD" { HOST_WIDE_INT elem = (HOST_WIDE_INT) 1 << INTVAL (operands[2]); emit_insn (gen_aarch64_simd_vec_set<mode> (operands[0], operands[1], GEN_INT (elem), operands[0])); DONE; } ) (define_insn "aarch64_simd_vec_set<mode>" [(set (match_operand:VALL_F16 0 "register_operand" "=w,w,w") (vec_merge:VALL_F16 (vec_duplicate:VALL_F16 (match_operand:<VEL> 1 "aarch64_simd_nonimmediate_operand" "w,?r,Utv")) (match_operand:VALL_F16 3 "register_operand" "0,0,0") (match_operand:SI 2 "immediate_operand" "i,i,i")))] "TARGET_SIMD && exact_log2 (INTVAL (operands[2])) >= 0" { int elt = ENDIAN_LANE_N (<nunits>, exact_log2 (INTVAL (operands[2]))); operands[2] = GEN_INT ((HOST_WIDE_INT) 1 << elt); switch (which_alternative) { case 0: return "ins\\t%0.<Vetype>[%p2], %1.<Vetype>[0]"; case 1: return "ins\\t%0.<Vetype>[%p2], %<vwcore>1"; case 2: return "ld1\\t{%0.<Vetype>}[%p2], %1"; default: gcc_unreachable (); } } [(set_attr "type" "neon_ins<q>, neon_from_gp<q>, neon_load1_one_lane<q>")] ) After we use specify "vmv.s.x" as "TU" policy, the "avl_prop" PASS won't propagate "avl=1" from "vmv.s.x" to "vle16.v". So, I think the correct ASM should be after we vec_set it into "TU": vec_set_vnx8hi_0: vsetivli zero,8,e16,mf4,ta,ma vle16.v v1,0(a1) vsetivli zero,1,e16,mf4,tu,ma vmv.s.x v1,a2 vsetivli zero,8,e16,mf4,ta,ma vse16.v v1,0(a0) ret Robin could you make sure whether the assembly is like above after this "TU" fix? If yes, I think you can send a patch to fix it and backport it to GCC-14.