In gcc-10, we don't handle disassembling a vector pair in little-endian mode correctly. The solution is to make use of the disassemble accumulator code that is endian friendly.
Trunk does not have this bug, as the use of opaque modes for the MMA types "fixed" this issue there. This passed bootstrap and regtesting on powerpc64le-linux with no regressions. Ok for the GCC 10 release branch? Peter gcc/ 2021-03-16 Peter Bergner <berg...@linux.ibm.com> * config/rs6000/rs6000-call.c (rs6000_gimple_fold_mma_builtin): Handle disassembling a vector pair vector by vector in little-endian mode. diff --git a/gcc/config/rs6000/rs6000-call.c b/gcc/config/rs6000/rs6000-call.c index 538a57adceb..a112593878a 100644 --- a/gcc/config/rs6000/rs6000-call.c +++ b/gcc/config/rs6000/rs6000-call.c @@ -10850,10 +10850,12 @@ rs6000_gimple_fold_mma_builtin (gimple_stmt_iterator *gsi) tree src = make_ssa_name (TREE_TYPE (src_type)); gimplify_assign (src, build_simple_mem_ref (src_ptr), &new_seq); - /* If we are not disassembling an accumulator or our destination is - another accumulator, then just copy the entire thing as is. */ - if (fncode != MMA_BUILTIN_DISASSEMBLE_ACC - || TREE_TYPE (TREE_TYPE (dst_ptr)) == vector_quad_type_node) + /* If we are disassembling an accumulator/pair and our destination is + another accumulator/pair, then just copy the entire thing as is. */ + if ((fncode == MMA_BUILTIN_DISASSEMBLE_ACC + && TREE_TYPE (TREE_TYPE (dst_ptr)) == vector_quad_type_node) + || (fncode == VSX_BUILTIN_DISASSEMBLE_PAIR + && TREE_TYPE (TREE_TYPE (dst_ptr)) == vector_pair_type_node)) { tree dst = build_simple_mem_ref (build1 (VIEW_CONVERT_EXPR, src_type, dst_ptr)); @@ -10865,21 +10867,25 @@ rs6000_gimple_fold_mma_builtin (gimple_stmt_iterator *gsi) /* We're disassembling an accumulator into a different type, so we need to emit a xxmfacc instruction now, since we cannot do it later. */ - new_decl = rs6000_builtin_decls[MMA_BUILTIN_XXMFACC_INTERNAL]; - new_call = gimple_build_call (new_decl, 1, src); - src = make_ssa_name (vector_quad_type_node); - gimple_call_set_lhs (new_call, src); - gimple_seq_add_stmt (&new_seq, new_call); + if (fncode == MMA_BUILTIN_DISASSEMBLE_ACC) + { + new_decl = rs6000_builtin_decls[MMA_BUILTIN_XXMFACC_INTERNAL]; + new_call = gimple_build_call (new_decl, 1, src); + src = make_ssa_name (vector_quad_type_node); + gimple_call_set_lhs (new_call, src); + gimple_seq_add_stmt (&new_seq, new_call); + } - /* Copy the accumulator vector by vector. */ + /* Copy the accumulator/pair vector by vector. */ + unsigned nvecs = (fncode == MMA_BUILTIN_DISASSEMBLE_ACC) ? 4 : 2; tree dst_type = build_pointer_type_for_mode (unsigned_V16QI_type_node, ptr_mode, true); tree dst_base = build1 (VIEW_CONVERT_EXPR, dst_type, dst_ptr); - tree array_type = build_array_type_nelts (unsigned_V16QI_type_node, 4); + tree array_type = build_array_type_nelts (unsigned_V16QI_type_node, nvecs); tree src_array = build1 (VIEW_CONVERT_EXPR, array_type, src); - for (unsigned i = 0; i < 4; i++) + for (unsigned i = 0; i < nvecs; i++) { - unsigned index = WORDS_BIG_ENDIAN ? i : 3 - i; + unsigned index = WORDS_BIG_ENDIAN ? i : nvecs - 1 - i; tree ref = build4 (ARRAY_REF, unsigned_V16QI_type_node, src_array, build_int_cst (size_type_node, i), NULL_TREE, NULL_TREE);