This splits out load permutation computation from SLP tree building.
It also removes the broken support for swapping mismatched operands.
If it ever triggers we'll ICE later because:

        case vect_internal_def:
!           if (different_types)
!             {
!             oprnd0_info = (*oprnds_info)[0];
!             oprnd1_info = (*oprnds_info)[0];
!               if (i == 0)
!                 oprnd1_info->def_stmts.quick_push (def_stmt);
!               else
!                 oprnd0_info->def_stmts.quick_push (def_stmt);
!             }

pushes to the same operand vector twice ...  The cases this
tries to handle should all be canonicalized by reassoc earlier.

I am going to re-instantiate more complete support for handling
commutated operations in the next patch.

Bootstrapped and tested on x86_64-unknown-linux-gnu, applied.

Richard.

2013-04-10  Richard Biener  <rguent...@suse.de>

        * tree-vect-slp.c (vect_get_and_check_slp_defs): Remove
        broken code swapping operands.
        (vect_build_slp_tree): Do not compute load permutations here.
        (vect_analyze_slp_instance): Compute load permutations here,
        after building the SLP tree.

Index: gcc/tree-vect-slp.c
===================================================================
*** gcc/tree-vect-slp.c (revision 197635)
--- gcc/tree-vect-slp.c (working copy)
*************** vect_get_and_check_slp_defs (loop_vec_in
*** 204,218 ****
  {
    tree oprnd;
    unsigned int i, number_of_oprnds;
!   tree def, def_op0 = NULL_TREE;
    gimple def_stmt;
    enum vect_def_type dt = vect_uninitialized_def;
-   enum vect_def_type dt_op0 = vect_uninitialized_def;
    struct loop *loop = NULL;
-   enum tree_code rhs_code;
-   bool different_types = false;
    bool pattern = false;
!   slp_oprnd_info oprnd_info, oprnd0_info, oprnd1_info;
    int op_idx = 1;
    tree compare_rhs = NULL_TREE;
  
--- 204,215 ----
  {
    tree oprnd;
    unsigned int i, number_of_oprnds;
!   tree def;
    gimple def_stmt;
    enum vect_def_type dt = vect_uninitialized_def;
    struct loop *loop = NULL;
    bool pattern = false;
!   slp_oprnd_info oprnd_info;
    int op_idx = 1;
    tree compare_rhs = NULL_TREE;
  
*************** vect_get_and_check_slp_defs (loop_vec_in
*** 334,345 ****
                oprnd_info->first_def_type = NULL_TREE;
                oprnd_info->first_const_oprnd = oprnd;
              }
- 
-         if (i == 0)
-           {
-             def_op0 = def;
-             dt_op0 = dt;
-           }
        }
        else
        {
--- 331,336 ----
*************** vect_get_and_check_slp_defs (loop_vec_in
*** 357,413 ****
                                           TREE_TYPE (def))))
               || (!def
                   && !types_compatible_p (TREE_TYPE 
(oprnd_info->first_const_oprnd),
!                                          TREE_TYPE (oprnd)))
!              || different_types)
            {
!             if (number_of_oprnds != 2)
!               {
!                 if (dump_enabled_p ())
!                   dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
!                                    "Build SLP failed: different types ");
! 
!                 return false;
!                 }
! 
!             /* Try to swap operands in case of binary operation.  */
!               if (i == 0)
!                 different_types = true;
!               else
!               {
!                 oprnd0_info = (*oprnds_info)[0];
!                 if (is_gimple_assign (stmt)
!                     && (rhs_code = gimple_assign_rhs_code (stmt))
!                     && TREE_CODE_CLASS (rhs_code) == tcc_binary
!                     && commutative_tree_code (rhs_code)
!                     && oprnd0_info->first_dt == dt
!                     && oprnd_info->first_dt == dt_op0
!                     && def_op0 && def
!                     && !(oprnd0_info->first_def_type
!                          && !types_compatible_p (oprnd0_info->first_def_type,
!                                                  TREE_TYPE (def)))
!                       && !(oprnd_info->first_def_type
!                            && !types_compatible_p (oprnd_info->first_def_type,
!                                                    TREE_TYPE (def_op0))))
!                     {
!                       if (dump_enabled_p ())
!                       {
!                         dump_printf_loc (MSG_NOTE, vect_location,
!                                          "Swapping operands of ");
!                         dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
!                       }
! 
!                     swap_tree_operands (stmt, gimple_assign_rhs1_ptr (stmt),
!                                         gimple_assign_rhs2_ptr (stmt));
!                   }
!                   else
!                     {
!                     if (dump_enabled_p ())
!                       dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
!                                        "Build SLP failed: different types ");
  
!                     return false;
!                   }
!               }
            }
        }
  
--- 348,360 ----
                                           TREE_TYPE (def))))
               || (!def
                   && !types_compatible_p (TREE_TYPE 
(oprnd_info->first_const_oprnd),
!                                          TREE_TYPE (oprnd))))
            {
!             if (dump_enabled_p ())
!               dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
!                                "Build SLP failed: different types ");
  
!             return false;
            }
        }
  
*************** vect_get_and_check_slp_defs (loop_vec_in
*** 420,437 ****
          break;
  
        case vect_internal_def:
!           if (different_types)
!             {
!             oprnd0_info = (*oprnds_info)[0];
!             oprnd1_info = (*oprnds_info)[0];
!               if (i == 0)
!                 oprnd1_info->def_stmts.quick_push (def_stmt);
!               else
!                 oprnd0_info->def_stmts.quick_push (def_stmt);
!             }
!         else
!           oprnd_info->def_stmts.quick_push (def_stmt);
! 
          break;
  
        default:
--- 367,373 ----
          break;
  
        case vect_internal_def:
!         oprnd_info->def_stmts.quick_push (def_stmt);
          break;
  
        default:
*************** static bool
*** 460,468 ****
  vect_build_slp_tree (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo,
                       slp_tree *node, unsigned int group_size,
                       unsigned int *max_nunits,
-                      vec<int> *load_permutation,
                       vec<slp_tree> *loads,
!                      unsigned int vectorization_factor, bool *loads_permuted)
  {
    unsigned int i;
    vec<gimple> stmts = SLP_TREE_SCALAR_STMTS (*node);
--- 396,403 ----
  vect_build_slp_tree (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo,
                       slp_tree *node, unsigned int group_size,
                       unsigned int *max_nunits,
                       vec<slp_tree> *loads,
!                      unsigned int vectorization_factor)
  {
    unsigned int i;
    vec<gimple> stmts = SLP_TREE_SCALAR_STMTS (*node);
*************** vect_build_slp_tree (loop_vec_info loop_
*** 478,485 ****
    enum machine_mode vec_mode;
    struct data_reference *first_dr;
    HOST_WIDE_INT dummy;
-   bool permutation = false;
-   unsigned int load_place;
    gimple first_load = NULL, prev_first_load = NULL, old_first_load = NULL;
    vec<slp_oprnd_info> oprnds_info;
    unsigned int nops;
--- 413,418 ----
*************** vect_build_slp_tree (loop_vec_info loop_
*** 839,854 ****
                      }
                  }
  
-               /* Store the place of this load in the interleaving chain.  In
-                  case that permutation is needed we later decide if a specific
-                  permutation is supported.  */
-               load_place = vect_get_place_in_interleaving_chain (stmt,
-                                                                  first_load);
-               if (load_place != i)
-                 permutation = true;
- 
-               load_permutation->safe_push (load_place);
- 
                /* We stop the tree when we reach a group of loads.  */
                stop_recursion = true;
               continue;
--- 772,777 ----
*************** vect_build_slp_tree (loop_vec_info loop_
*** 925,941 ****
    if (stop_recursion)
      {
        loads->safe_push (*node);
-       if (permutation)
-       *loads_permuted = true;
-       else
-         {
-           /* We don't check here complex numbers chains, so we set
-              LOADS_PERMUTED for further check in
-              vect_supported_load_permutation_p.  */
-           if (rhs_code == REALPART_EXPR || rhs_code == IMAGPART_EXPR)
-             *loads_permuted = true;
-         }
- 
        vect_free_oprnd_info (oprnds_info);
        return true;
      }
--- 848,853 ----
*************** vect_build_slp_tree (loop_vec_info loop_
*** 951,958 ****
        child = vect_create_new_slp_node (oprnd_info->def_stmts);
        if (!child
            || !vect_build_slp_tree (loop_vinfo, bb_vinfo, &child, group_size,
!                                  max_nunits, load_permutation, loads,
!                                  vectorization_factor, loads_permuted))
          {
          if (child)
            oprnd_info->def_stmts = vNULL;
--- 863,870 ----
        child = vect_create_new_slp_node (oprnd_info->def_stmts);
        if (!child
            || !vect_build_slp_tree (loop_vinfo, bb_vinfo, &child, group_size,
!                                  max_nunits, loads,
!                                  vectorization_factor))
          {
          if (child)
            oprnd_info->def_stmts = vNULL;
*************** vect_analyze_slp_instance (loop_vec_info
*** 1604,1613 ****
    unsigned int vectorization_factor = 0;
    int i;
    unsigned int max_nunits = 0;
-   vec<int> load_permutation;
    vec<slp_tree> loads;
    struct data_reference *dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt));
-   bool loads_permuted = false;
    vec<gimple> scalar_stmts;
  
    if (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)))
--- 1516,1523 ----
*************** vect_analyze_slp_instance (loop_vec_info
*** 1689,1701 ****
  
    node = vect_create_new_slp_node (scalar_stmts);
  
-   load_permutation.create (group_size * group_size);
    loads.create (group_size);
  
    /* Build the tree for the SLP instance.  */
    if (vect_build_slp_tree (loop_vinfo, bb_vinfo, &node, group_size,
!                          &max_nunits, &load_permutation, &loads,
!                          vectorization_factor, &loads_permuted))
      {
        /* Calculate the unrolling factor based on the smallest type.  */
        if (max_nunits > nunits)
--- 1599,1610 ----
  
    node = vect_create_new_slp_node (scalar_stmts);
  
    loads.create (group_size);
  
    /* Build the tree for the SLP instance.  */
    if (vect_build_slp_tree (loop_vinfo, bb_vinfo, &node, group_size,
!                          &max_nunits, &loads,
!                          vectorization_factor))
      {
        /* Calculate the unrolling factor based on the smallest type.  */
        if (max_nunits > nunits)
*************** vect_analyze_slp_instance (loop_vec_info
*** 1709,1715 ****
                             "Build SLP failed: unrolling required in basic"
                             " block SLP");
          vect_free_slp_tree (node);
-         load_permutation.release ();
          loads.release ();
            return false;
          }
--- 1618,1623 ----
*************** vect_analyze_slp_instance (loop_vec_info
*** 1722,1731 ****
        SLP_INSTANCE_BODY_COST_VEC (new_instance) = vNULL;
        SLP_INSTANCE_LOADS (new_instance) = loads;
        SLP_INSTANCE_FIRST_LOAD_STMT (new_instance) = NULL;
!       SLP_INSTANCE_LOAD_PERMUTATION (new_instance) = load_permutation;
  
        if (loads_permuted)
          {
            if (!vect_supported_load_permutation_p (new_instance, group_size,
                                                    load_permutation))
              {
--- 1630,1666 ----
        SLP_INSTANCE_BODY_COST_VEC (new_instance) = vNULL;
        SLP_INSTANCE_LOADS (new_instance) = loads;
        SLP_INSTANCE_FIRST_LOAD_STMT (new_instance) = NULL;
!       SLP_INSTANCE_LOAD_PERMUTATION (new_instance) = vNULL;
! 
!       /* Compute the load permutation.  */
!       slp_tree load_node;
!       bool loads_permuted = false;
!       vec<int> load_permutation;
!       load_permutation.create (group_size * group_size);
!       FOR_EACH_VEC_ELT (loads, i, load_node)
!       {
!         int j;
!         gimple load;
!         FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (load_node), j, load)
!           {
!             int load_place;
!             load_place = vect_get_place_in_interleaving_chain
!               (load, GROUP_FIRST_ELEMENT (vinfo_for_stmt (load)));
!             if (load_place != j
!                 /* ???  We allow loads from different groups to
!                    get to here for a special case handled in
!                    the permutation code.  Make sure we get to that.  */
!                 || (GROUP_FIRST_ELEMENT
!                       (vinfo_for_stmt (SLP_TREE_SCALAR_STMTS (load_node)[0]))
!                     != GROUP_FIRST_ELEMENT (vinfo_for_stmt (load))))
!               loads_permuted = true;
!             load_permutation.safe_push (load_place);
!           }
!       }
  
        if (loads_permuted)
          {
+         SLP_INSTANCE_LOAD_PERMUTATION (new_instance) = load_permutation;
            if (!vect_supported_load_permutation_p (new_instance, group_size,
                                                    load_permutation))
              {
*************** vect_analyze_slp_instance (loop_vec_info
*** 1745,1751 ****
               = vect_find_first_load_in_slp_instance (new_instance);
          }
        else
!         SLP_INSTANCE_LOAD_PERMUTATION (new_instance).release ();
  
        /* Compute the costs of this SLP instance.  */
        vect_analyze_slp_cost (loop_vinfo, bb_vinfo,
--- 1680,1686 ----
               = vect_find_first_load_in_slp_instance (new_instance);
          }
        else
!         load_permutation.release ();
  
        /* Compute the costs of this SLP instance.  */
        vect_analyze_slp_cost (loop_vinfo, bb_vinfo,
*************** vect_analyze_slp_instance (loop_vec_info
*** 1765,1771 ****
    /* Failed to SLP.  */
    /* Free the allocated memory.  */
    vect_free_slp_tree (node);
-   load_permutation.release ();
    loads.release ();
  
    return false;
--- 1700,1705 ----

Reply via email to