On Mon, Dec 12, 2011 at 9:54 PM, Ira Rosen <i...@il.ibm.com> wrote:
>
>
> gcc-patches-ow...@gcc.gnu.org wrote on 13/12/2011 04:05:57 AM:
>
>> On core2, unaligned vector load/store using movdqu is a very slow
> operation.
>> Experiments show it is six times slower than movdqa (aligned) and this is
>> irrespective of whether the resulting data happens to be aligned or not.
>> For Corei7, there is no performance difference between the two and on
> AMDs,
>> movdqu is only about 10% slower.
>>
>> This patch does not vectorize loops that need to generate the slow
> unaligned
>> memory load/stores on core2.
>>
>>
>>    Do not vectorize loops on Core2 that need to use unaligned
>>    vector load/stores.
>>    * tree-vect-stmts.c (is_slow_vect_unaligned_load_store): New function.
>>    (vect_analyze_stmt): Check if the vectorizable load/store is slow.
>>    * target.def (TARGET_SLOW_UNALIGNED_VECTOR_MEMOP): New target hook.
>>    * doc/m.texi.in: Document new target hook:
>>    TARGET_SLOW_UNALIGNED_VECTOR_MEMOP
>>    * doc/m.texi: Regenerate.
>>    * config/i386/i386.c (ix86_slow_unaligned_vector_memop): New function.
>>    (TARGET_SLOW_UNALIGNED_VECTOR_MEMOP): New macro.
>>
>
>
>> @@ -5065,27 +5112,43 @@ vect_analyze_stmt (gimple stmt, bool
> *need_to_vect
>>     if (!bb_vinfo
>>         && (STMT_VINFO_RELEVANT_P (stmt_info)
>>             || STMT_VINFO_DEF_TYPE (stmt_info) == vect_reduction_def))
>> +    {
>>        ok = (vectorizable_type_promotion (stmt, NULL, NULL, NULL)
>>              || vectorizable_type_demotion (stmt, NULL, NULL, NULL)
>>              || vectorizable_conversion (stmt, NULL, NULL, NULL)
>>              || vectorizable_shift (stmt, NULL, NULL, NULL)
>>              || vectorizable_operation (stmt, NULL, NULL, NULL)
>>              || vectorizable_assignment (stmt, NULL, NULL, NULL)
>> -            || vectorizable_load (stmt, NULL, NULL, NULL, NULL)
>>              || vectorizable_call (stmt, NULL, NULL)
>> -            || vectorizable_store (stmt, NULL, NULL, NULL)
>> -            || vectorizable_reduction (stmt, NULL, NULL, NULL)
>> +       || vectorizable_reduction (stmt, NULL, NULL, NULL)
>>              || vectorizable_condition (stmt, NULL, NULL, NULL, 0));
>> +
>> +      if (!ok)
>> +   {
>> +          ok = (vectorizable_load (stmt, NULL, NULL, NULL, NULL)
>> +           || vectorizable_store (stmt, NULL, NULL, NULL));
>> +
>> +     if (ok && is_slow_vect_unaligned_load_store (stmt))
>> +       ok = false;
>
> Why not call is_slow_vect_unaligned_load_store from
> vectorizable_load/store?

Yes, I should have done that. Somehow, I missed that!

>
> Ira
>
>
>> +   }
>> +    }
>>      else
>>        {
>>          if (bb_vinfo)
>> -          ok = (vectorizable_type_promotion (stmt, NULL, NULL, node)
>> -                || vectorizable_type_demotion (stmt, NULL, NULL, node)
>> -               || vectorizable_shift (stmt, NULL, NULL, node)
>> -                || vectorizable_operation (stmt, NULL, NULL, node)
>> -                || vectorizable_assignment (stmt, NULL, NULL, node)
>> -                || vectorizable_load (stmt, NULL, NULL, node, NULL)
>> -                || vectorizable_store (stmt, NULL, NULL, node));
>> +     {
>> +       ok = (vectorizable_type_promotion (stmt, NULL, NULL, node)
>> +        || vectorizable_type_demotion (stmt, NULL, NULL, node)
>> +        || vectorizable_shift (stmt, NULL, NULL, node)
>> +                  || vectorizable_operation (stmt, NULL, NULL, node)
>> +                  || vectorizable_assignment (stmt, NULL, NULL, node));
>> +            if (!ok)
>> +         {
>> +                ok = (vectorizable_load (stmt, NULL, NULL, node, NULL)
>> +            || vectorizable_store (stmt, NULL, NULL, node));
>> +           if (ok && is_slow_vect_unaligned_load_store (stmt))
>> +        ok = false;
>> +         }
>> +     }
>>        }
>

Reply via email to