Sorry for the slow reponse. "Kewen.Lin" <li...@linux.ibm.com> writes: > diff --git a/gcc/vec-perm-indices.c b/gcc/vec-perm-indices.c > index ede590dc5c9..57dd11d723c 100644 > --- a/gcc/vec-perm-indices.c > +++ b/gcc/vec-perm-indices.c > @@ -101,6 +101,70 @@ vec_perm_indices::new_expanded_vector (const > vec_perm_indices &orig, > m_encoding.finalize (); > } > > +/* Check whether we can switch to a new permutation vector that > + selects the same input elements as ORIG, but with each element > + built up from FACTOR pieces. Return true if yes, otherwise > + return false. Every FACTOR permutation indexes should be > + continuous separately and the first one of each batch should > + be able to exactly modulo FACTOR. For example, if ORIG is > + { 2, 3, 4, 5, 0, 1, 6, 7 } and FACTOR is 2, the new permutation > + is { 1, 2, 0, 3 }. */ > + > +bool > +vec_perm_indices::new_shrunk_vector (const vec_perm_indices &orig, > + unsigned int factor) > +{ > + gcc_assert (factor > 0); > + > + if (maybe_lt (orig.m_nelts_per_input, factor)) > + return false; > + > + poly_uint64 nelts; > + /* Invalid if vector units number isn't multiple of factor. */ > + if (!multiple_p (orig.m_nelts_per_input, factor, &nelts)) > + return false; > + > + /* Only handle the case that npatterns is multiple of factor. > + FIXME: Try to see whether we can reshape it by factor npatterns. */ > + if (orig.m_encoding.npatterns () % factor != 0) > + return false; > + > + unsigned int encoded_nelts = orig.m_encoding.encoded_nelts (); > + auto_vec<element_type> encodings (encoded_nelts);
auto_vec<element_type, 32> would avoid memory allocations in the same cases that m_encoding can. “encoding” might be better than “encodings” since there's only really one encoding here. > + /* Separate all encoded elements into batches by size factor, > + then ensure the first element of each batch is multiple of > + factor and all elements in each batch is consecutive from > + the first one. */ > + for (unsigned int i = 0; i < encoded_nelts; i += factor) > + { > + element_type first = orig.m_encoding[i]; > + element_type new_index; > + if (!multiple_p (first, factor, &new_index)) > + return false; > + for (unsigned int j = 1; j < factor; ++j) > + { > + if (maybe_ne (first + j, orig.m_encoding[i + j])) > + return false; > + } Formatting nit: unnecessary braces around if. > + encodings.quick_push (new_index); > + } > + > + m_ninputs = orig.m_ninputs; > + m_nelts_per_input = nelts; > + poly_uint64 full_nelts = exact_div (orig.m_encoding.full_nelts (), factor); > + unsigned int npatterns = orig.m_encoding.npatterns () / factor; > + > + m_encoding.new_vector (full_nelts, npatterns, > + orig.m_encoding.nelts_per_pattern ()); > + > + for (unsigned int i = 0; i < encodings.length (); i++) > + m_encoding.quick_push (encodings[i]); I think this can be: m_encoding.splice (encodings); OK with those changes, thanks. Thanks also for doing it in a variable-length-friendly way. Richard > + > + m_encoding.finalize (); > + > + return true; > +} > + > /* Rotate the inputs of the permutation right by DELTA inputs. This changes > the values of the permutation vector but it doesn't change the way that > the elements are encoded. */ > diff --git a/gcc/vec-perm-indices.h b/gcc/vec-perm-indices.h > index bc70ecd8a1d..98d27f0ec42 100644 > --- a/gcc/vec-perm-indices.h > +++ b/gcc/vec-perm-indices.h > @@ -57,6 +57,7 @@ public: > > void new_vector (const vec_perm_builder &, unsigned int, poly_uint64); > void new_expanded_vector (const vec_perm_indices &, unsigned int); > + bool new_shrunk_vector (const vec_perm_indices &, unsigned int); > void rotate_inputs (int delta); > > /* Return the underlying vector encoding. */