This patch reworks the VEC_PERM_EXPR folding so that more of it works for variable-length vectors. E.g. it means that we can now recognise variable-length permutes that reduce to a single vector, or cases in which a variable-length permute only needs one input. There should be no functional change for fixed-length vectors.
2017-12-09 Richard Sandiford <richard.sandif...@linaro.org> gcc/ * selftest.h (selftest::vec_perm_indices_c_tests): Declare. * selftest-run-tests.c (selftest::run_tests): Call it. * vector-builder.h (vector_builder::operator ==): New function. (vector_builder::operator !=): Likewise. * vec-perm-indices.h (vec_perm_indices::series_p): Declare. (vec_perm_indices::all_from_input_p): New function. * vec-perm-indices.c (vec_perm_indices::series_p): Likewise. (test_vec_perm_12, selftest::vec_perm_indices_c_tests): Likewise. * fold-const.c (fold_ternary_loc): Use tree_to_vec_perm_builder instead of reading the VECTOR_CST directly. Detect whether both vector inputs are the same before constructing the vec_perm_indices, and update the number of inputs argument accordingly. Use the utility functions added above. Only construct sel2 if we need to. Index: gcc/selftest.h =================================================================== *** gcc/selftest.h 2017-12-09 23:06:55.002855594 +0000 --- gcc/selftest.h 2017-12-09 23:21:51.517599734 +0000 *************** extern void vec_c_tests (); *** 201,206 **** --- 201,207 ---- extern void wide_int_cc_tests (); extern void predict_c_tests (); extern void simplify_rtx_c_tests (); + extern void vec_perm_indices_c_tests (); extern int num_passes; Index: gcc/selftest-run-tests.c =================================================================== *** gcc/selftest-run-tests.c 2017-12-09 23:06:55.002855594 +0000 --- gcc/selftest-run-tests.c 2017-12-09 23:21:51.517599734 +0000 *************** selftest::run_tests () *** 73,78 **** --- 73,79 ---- /* Mid-level data structures. */ input_c_tests (); + vec_perm_indices_c_tests (); tree_c_tests (); gimple_c_tests (); rtl_tests_c_tests (); Index: gcc/vector-builder.h =================================================================== *** gcc/vector-builder.h 2017-12-09 23:06:55.002855594 +0000 --- gcc/vector-builder.h 2017-12-09 23:21:51.518600090 +0000 *************** #define GCC_VECTOR_BUILDER_H *** 97,102 **** --- 97,105 ---- bool encoded_full_vector_p () const; T elt (unsigned int) const; + bool operator == (const Derived &) const; + bool operator != (const Derived &x) const { return !operator == (x); } + void finalize (); protected: *************** vector_builder<T, Derived>::new_vector ( *** 168,173 **** --- 171,196 ---- this->truncate (0); } + /* Return true if this vector and OTHER have the same elements and + are encoded in the same way. */ + + template<typename T, typename Derived> + bool + vector_builder<T, Derived>::operator == (const Derived &other) const + { + if (m_full_nelts != other.m_full_nelts + || m_npatterns != other.m_npatterns + || m_nelts_per_pattern != other.m_nelts_per_pattern) + return false; + + unsigned int nelts = encoded_nelts (); + for (unsigned int i = 0; i < nelts; ++i) + if (!derived ()->equal_p ((*this)[i], other[i])) + return false; + + return true; + } + /* Return the value of vector element I, which might or might not be encoded explicitly. */ Index: gcc/vec-perm-indices.h =================================================================== *** gcc/vec-perm-indices.h 2017-12-09 23:20:13.233112018 +0000 --- gcc/vec-perm-indices.h 2017-12-09 23:21:51.517599734 +0000 *************** typedef int_vector_builder<HOST_WIDE_INT *** 62,68 **** --- 62,70 ---- element_type clamp (element_type) const; element_type operator[] (unsigned int i) const; + bool series_p (unsigned int, unsigned int, element_type, element_type) const; bool all_in_range_p (element_type, element_type) const; + bool all_from_input_p (unsigned int) const; private: vec_perm_indices (const vec_perm_indices &); *************** vec_perm_indices::operator[] (unsigned i *** 119,122 **** --- 121,133 ---- return clamp (m_encoding.elt (i)); } + /* Return true if the permutation vector only selects elements from + input I. */ + + inline bool + vec_perm_indices::all_from_input_p (unsigned int i) const + { + return all_in_range_p (i * m_nelts_per_input, m_nelts_per_input); + } + #endif Index: gcc/vec-perm-indices.c =================================================================== *** gcc/vec-perm-indices.c 2017-12-09 23:20:13.233112018 +0000 --- gcc/vec-perm-indices.c 2017-12-09 23:21:51.517599734 +0000 *************** Software Foundation; either version 3, o *** 28,33 **** --- 28,34 ---- #include "rtl.h" #include "memmodel.h" #include "emit-rtl.h" + #include "selftest.h" /* Switch to a new permutation vector that selects between NINPUTS vector inputs that have NELTS_PER_INPUT elements each. Take the elements of the *************** vec_perm_indices::rotate_inputs (int del *** 85,90 **** --- 86,139 ---- m_encoding[i] = clamp (m_encoding[i] + element_delta); } + /* Return true if index OUT_BASE + I * OUT_STEP selects input + element IN_BASE + I * IN_STEP. */ + + bool + vec_perm_indices::series_p (unsigned int out_base, unsigned int out_step, + element_type in_base, element_type in_step) const + { + /* Check the base value. */ + if (clamp (m_encoding.elt (out_base)) != clamp (in_base)) + return false; + + unsigned int full_nelts = m_encoding.full_nelts (); + unsigned int npatterns = m_encoding.npatterns (); + + /* Calculate which multiple of OUT_STEP elements we need to get + back to the same pattern. */ + unsigned int cycle_length = least_common_multiple (out_step, npatterns); + + /* Check the steps. */ + in_step = clamp (in_step); + out_base += out_step; + unsigned int limit = 0; + for (;;) + { + /* Succeed if we've checked all the elements in the vector. */ + if (out_base >= full_nelts) + return true; + + if (out_base >= npatterns) + { + /* We've got to the end of the "foreground" values. Check + 2 elements from each pattern in the "background" values. */ + if (limit == 0) + limit = out_base + cycle_length * 2; + else if (out_base >= limit) + return true; + } + + element_type v0 = m_encoding.elt (out_base - out_step); + element_type v1 = m_encoding.elt (out_base); + if (clamp (v1 - v0) != in_step) + return false; + + out_base += out_step; + } + return true; + } + /* Return true if all elements of the permutation vector are in the range [START, START + SIZE). */ *************** vec_perm_indices_to_rtx (machine_mode mo *** 180,182 **** --- 229,280 ---- RTVEC_ELT (v, i) = gen_int_mode (indices[i], GET_MODE_INNER (mode)); return gen_rtx_CONST_VECTOR (mode, v); } + + #if CHECKING_P + + namespace selftest { + + /* Test a 12-element vector. */ + + static void + test_vec_perm_12 (void) + { + vec_perm_builder builder (12, 12, 1); + for (unsigned int i = 0; i < 4; ++i) + { + builder.quick_push (i * 5); + builder.quick_push (3 + i); + builder.quick_push (2 + 3 * i); + } + vec_perm_indices indices (builder, 1, 12); + ASSERT_TRUE (indices.series_p (0, 3, 0, 5)); + ASSERT_FALSE (indices.series_p (0, 3, 3, 5)); + ASSERT_FALSE (indices.series_p (0, 3, 0, 8)); + ASSERT_TRUE (indices.series_p (1, 3, 3, 1)); + ASSERT_TRUE (indices.series_p (2, 3, 2, 3)); + + ASSERT_TRUE (indices.series_p (0, 4, 0, 4)); + ASSERT_FALSE (indices.series_p (1, 4, 3, 4)); + + ASSERT_TRUE (indices.series_p (0, 6, 0, 10)); + ASSERT_FALSE (indices.series_p (0, 6, 0, 100)); + + ASSERT_FALSE (indices.series_p (1, 10, 3, 7)); + ASSERT_TRUE (indices.series_p (1, 10, 3, 8)); + + ASSERT_TRUE (indices.series_p (0, 12, 0, 10)); + ASSERT_TRUE (indices.series_p (0, 12, 0, 11)); + ASSERT_TRUE (indices.series_p (0, 12, 0, 100)); + } + + /* Run selftests for this file. */ + + void + vec_perm_indices_c_tests () + { + test_vec_perm_12 (); + } + + } // namespace selftest + + #endif Index: gcc/fold-const.c =================================================================== *** gcc/fold-const.c 2017-12-09 23:18:12.040041251 +0000 --- gcc/fold-const.c 2017-12-09 23:21:51.517599734 +0000 *************** fold_ternary_loc (location_t loc, enum t *** 11547,11645 **** case VEC_PERM_EXPR: if (TREE_CODE (arg2) == VECTOR_CST) { ! unsigned int nelts = VECTOR_CST_NELTS (arg2), i, mask, mask2; ! bool need_mask_canon = false; ! bool need_mask_canon2 = false; ! bool all_in_vec0 = true; ! bool all_in_vec1 = true; ! bool maybe_identity = true; ! bool single_arg = (op0 == op1); ! bool changed = false; ! ! mask2 = 2 * nelts - 1; ! mask = single_arg ? (nelts - 1) : mask2; ! gcc_assert (nelts == TYPE_VECTOR_SUBPARTS (type)); ! vec_perm_builder sel (nelts, nelts, 1); ! vec_perm_builder sel2 (nelts, nelts, 1); ! for (i = 0; i < nelts; i++) ! { ! tree val = VECTOR_CST_ELT (arg2, i); ! if (TREE_CODE (val) != INTEGER_CST) ! return NULL_TREE; ! ! /* Make sure that the perm value is in an acceptable ! range. */ ! wi::tree_to_wide_ref t = wi::to_wide (val); ! need_mask_canon |= wi::gtu_p (t, mask); ! need_mask_canon2 |= wi::gtu_p (t, mask2); ! unsigned int elt = t.to_uhwi () & mask; ! unsigned int elt2 = t.to_uhwi () & mask2; ! ! if (elt < nelts) ! all_in_vec1 = false; ! else ! all_in_vec0 = false; ! ! if ((elt & (nelts - 1)) != i) ! maybe_identity = false; ! ! sel.quick_push (elt); ! sel2.quick_push (elt2); ! } ! if (maybe_identity) ! { ! if (all_in_vec0) ! return op0; ! if (all_in_vec1) ! return op1; ! } ! if (all_in_vec0) ! op1 = op0; ! else if (all_in_vec1) ! { ! op0 = op1; ! for (i = 0; i < nelts; i++) ! sel[i] -= nelts; ! need_mask_canon = true; } - vec_perm_indices indices (sel, 2, nelts); if ((TREE_CODE (op0) == VECTOR_CST || TREE_CODE (op0) == CONSTRUCTOR) && (TREE_CODE (op1) == VECTOR_CST || TREE_CODE (op1) == CONSTRUCTOR)) { ! tree t = fold_vec_perm (type, op0, op1, indices); if (t != NULL_TREE) return t; } ! if (op0 == op1 && !single_arg) ! changed = true; ! /* Some targets are deficient and fail to expand a single ! argument permutation while still allowing an equivalent ! 2-argument version. */ ! if (need_mask_canon && arg2 == op2 ! && !can_vec_perm_const_p (TYPE_MODE (type), indices, false) ! && can_vec_perm_const_p (TYPE_MODE (type), ! vec_perm_indices (sel2, 2, nelts), ! false)) { ! need_mask_canon = need_mask_canon2; ! sel.truncate (0); ! sel.splice (sel2); ! } ! ! if (need_mask_canon && arg2 == op2) ! { ! tree eltype = TREE_TYPE (TREE_TYPE (arg2)); ! tree_vector_builder tsel (TREE_TYPE (arg2), nelts, 1); ! for (i = 0; i < nelts; i++) ! tsel.quick_push (build_int_cst (eltype, sel[i])); ! op2 = tsel.build (); changed = true; } --- 11547,11611 ---- case VEC_PERM_EXPR: if (TREE_CODE (arg2) == VECTOR_CST) { ! /* Build a vector of integers from the tree mask. */ ! vec_perm_builder builder; ! if (!tree_to_vec_perm_builder (&builder, arg2)) ! return NULL_TREE; ! /* Create a vec_perm_indices for the integer vector. */ ! unsigned int nelts = TYPE_VECTOR_SUBPARTS (type); ! bool single_arg = (op0 == op1); ! vec_perm_indices sel (builder, single_arg ? 1 : 2, nelts); ! /* Check for cases that fold to OP0 or OP1 in their original ! element order. */ ! if (sel.series_p (0, 1, 0, 1)) ! return op0; ! if (sel.series_p (0, 1, nelts, 1)) ! return op1; ! ! if (!single_arg) ! { ! if (sel.all_from_input_p (0)) ! op1 = op0; ! else if (sel.all_from_input_p (1)) ! { ! op0 = op1; ! sel.rotate_inputs (1); ! } } if ((TREE_CODE (op0) == VECTOR_CST || TREE_CODE (op0) == CONSTRUCTOR) && (TREE_CODE (op1) == VECTOR_CST || TREE_CODE (op1) == CONSTRUCTOR)) { ! tree t = fold_vec_perm (type, op0, op1, sel); if (t != NULL_TREE) return t; } ! bool changed = (op0 == op1 && !single_arg); ! /* Generate a canonical form of the selector. */ ! if (arg2 == op2 && sel.encoding () != builder) { ! /* Some targets are deficient and fail to expand a single ! argument permutation while still allowing an equivalent ! 2-argument version. */ ! if (sel.ninputs () == 2 ! || can_vec_perm_const_p (TYPE_MODE (type), sel, false)) ! op2 = vec_perm_indices_to_tree (TREE_TYPE (arg2), sel); ! else ! { ! vec_perm_indices sel2 (builder, 2, nelts); ! if (can_vec_perm_const_p (TYPE_MODE (type), sel2, false)) ! op2 = vec_perm_indices_to_tree (TREE_TYPE (arg2), sel2); ! else ! /* Not directly supported with either encoding, ! so use the preferred form. */ ! op2 = vec_perm_indices_to_tree (TREE_TYPE (arg2), sel); ! } changed = true; }