Ping

On 03/11/23 1:14 pm, Surya Kumari Jangala wrote:
> Hi Segher,
> I have incorporated changes in the code as per the review comments provided 
> by you 
> for version 2 of the patch. Please review.
> 
> Regards,
> Surya
> 
> 
> rs6000/p8swap: Fix incorrect lane extraction by vec_extract() [PR106770]
> 
> In the routine rs6000_analyze_swaps(), special handling of swappable
> instructions is done even if the webs that contain the swappable instructions
> are not optimized, i.e., the webs do not contain any permuting load/store
> instructions along with the associated register swap instructions. Doing 
> special
> handling in such webs will result in the extracted lane being adjusted
> unnecessarily for vec_extract.
> 
> Another issue is that existing code treats non-permuting loads/stores as 
> special
> swappables. Non-permuting loads/stores (that have not yet been split into a
> permuting load/store and a swap) are handled by converting them into a 
> permuting
> load/store (which effectively removes the swap). As a result, if special
> swappables are handled only in webs containing permuting loads/stores, then
> non-optimal code is generated for non-permuting loads/stores.
> 
> Hence, in this patch, all webs containing either permuting loads/ stores or
> non-permuting loads/stores are marked as requiring special handling of
> swappables. Swaps associated with permuting loads/stores are marked for 
> removal,
> and non-permuting loads/stores are converted to permuting loads/stores. Then 
> the
> special swappables in the webs are fixed up.
> 
> This patch also ensures that swappable instructions are not modified in the
> following webs as it is incorrect to do so:
>  - webs containing permuting load/store instructions and associated swap
>    instructions that are transformed by converting the permuting memory
>    instructions into non-permuting instructions and removing the swap
>    instructions.
>  - webs where swap(load(vector constant)) instructions are replaced with
>    load(swapped vector constant).
> 
> 2023-09-10  Surya Kumari Jangala  <jskum...@linux.ibm.com>
> 
> gcc/
>       PR rtl-optimization/PR106770
>       * config/rs6000/rs6000-p8swap.cc (non_permuting_mem_insn): New function.
>       (handle_non_permuting_mem_insn): New function.
>       (rs6000_analyze_swaps): Handle swappable instructions only in certain
>       webs.
>       (web_requires_special_handling): New instance variable.
>       (handle_special_swappables): Remove handling of non-permuting load/store
>       instructions.
> 
> gcc/testsuite/
>       PR rtl-optimization/PR106770
>       * gcc.target/powerpc/pr106770.c: New test.
> ---
> 
> diff --git a/gcc/config/rs6000/rs6000-p8swap.cc 
> b/gcc/config/rs6000/rs6000-p8swap.cc
> index 0388b9bd736..02ea299bc3d 100644
> --- a/gcc/config/rs6000/rs6000-p8swap.cc
> +++ b/gcc/config/rs6000/rs6000-p8swap.cc
> @@ -179,6 +179,13 @@ class swap_web_entry : public web_entry_base
>    unsigned int special_handling : 4;
>    /* Set if the web represented by this entry cannot be optimized.  */
>    unsigned int web_not_optimizable : 1;
> +  /* Set if the swappable insns in the web represented by this entry
> +     have to be fixed. Swappable insns have to be fixed in:
> +       - webs containing permuting loads/stores and the swap insns
> +      in such webs have been marked for removal
> +       - webs where non-permuting loads/stores have been converted
> +      to permuting loads/stores  */
> +  unsigned int web_requires_special_handling : 1;
>    /* Set if this insn should be deleted.  */
>    unsigned int will_delete : 1;
>  };
> @@ -1468,14 +1475,6 @@ handle_special_swappables (swap_web_entry *insn_entry, 
> unsigned i)
>        if (dump_file)
>       fprintf (dump_file, "Adjusting subreg in insn %d\n", i);
>        break;
> -    case SH_NOSWAP_LD:
> -      /* Convert a non-permuting load to a permuting one.  */
> -      permute_load (insn);
> -      break;
> -    case SH_NOSWAP_ST:
> -      /* Convert a non-permuting store to a permuting one.  */
> -      permute_store (insn);
> -      break;
>      case SH_EXTRACT:
>        /* Change the lane on an extract operation.  */
>        adjust_extract (insn);
> @@ -2401,6 +2400,25 @@ recombine_lvx_stvx_patterns (function *fun)
>    free (to_delete);
>  }
>  
> +/* Return true if insn is a non-permuting load/store.  */
> +static bool
> +non_permuting_mem_insn (swap_web_entry *insn_entry, unsigned int i)
> +{
> +  return insn_entry[i].special_handling == SH_NOSWAP_LD
> +      || insn_entry[i].special_handling == SH_NOSWAP_ST;
> +}
> +
> +/* Convert a non-permuting load/store insn to a permuting one.  */
> +static void
> +convert_mem_insn (swap_web_entry *insn_entry, unsigned int i)
> +{
> +  rtx_insn *insn = insn_entry[i].insn;
> +  if (insn_entry[i].special_handling == SH_NOSWAP_LD)
> +    permute_load (insn);
> +  if (insn_entry[i].special_handling == SH_NOSWAP_ST)
> +    permute_store (insn);
> +}
> +
>  /* Main entry point for this pass.  */
>  unsigned int
>  rs6000_analyze_swaps (function *fun)
> @@ -2624,25 +2642,55 @@ rs6000_analyze_swaps (function *fun)
>        dump_swap_insn_table (insn_entry);
>      }
>  
> -  /* For each load and store in an optimizable web (which implies
> -     the loads and stores are permuting), find the associated
> -     register swaps and mark them for removal.  Due to various
> -     optimizations we may mark the same swap more than once.  Also
> -     perform special handling for swappable insns that require it.  */
> +  /* There are two kinds of optimizations that can be performed on an
> +     optimizable web:
> +     1. Remove the register swaps associated with permuting load/store
> +     in an optimizable web
> +     2. Convert the vanilla loads/stores (that have not yet been split
> +     into a permuting load/store and a swap) into a permuting
> +     load/store (which effectively removes the swap)
> +     In both the cases, swappable instructions in the webs need
> +     special handling to fix them up.  */
>    for (i = 0; i < e; ++i)
> +    /* For each permuting load/store in an optimizable web, find
> +       the associated register swaps and mark them for removal.
> +       Due to various optimizations we may mark the same swap more
> +       than once.  */
>      if ((insn_entry[i].is_load || insn_entry[i].is_store)
>       && insn_entry[i].is_swap)
>        {
>       swap_web_entry* root_entry
>         = (swap_web_entry*)((&insn_entry[i])->unionfind_root ());
>       if (!root_entry->web_not_optimizable)
> -       mark_swaps_for_removal (insn_entry, i);
> +       {
> +         mark_swaps_for_removal (insn_entry, i);
> +         root_entry->web_requires_special_handling = true;
> +       }
>        }
> -    else if (insn_entry[i].is_swappable && insn_entry[i].special_handling)
> +    /* Convert the non-permuting loads/stores into a permuting
> +       load/store.  */
> +    else if (insn_entry[i].is_swappable
> +          && non_permuting_mem_insn (insn_entry, i))
>        {
>       swap_web_entry* root_entry
>         = (swap_web_entry*)((&insn_entry[i])->unionfind_root ());
>       if (!root_entry->web_not_optimizable)
> +       {
> +         convert_mem_insn (insn_entry, i);
> +         root_entry->web_requires_special_handling = true;
> +       }
> +      }
> +
> +  /* Now that the webs which require special handling have been
> +     identified, modify the instructions that are sensitive to
> +     element order.  */
> +  for (i = 0; i < e; ++i)
> +    if (insn_entry[i].is_swappable && insn_entry[i].special_handling
> +     && !non_permuting_mem_insn (insn_entry, i))
> +      {
> +     swap_web_entry* root_entry
> +       = (swap_web_entry*)((&insn_entry[i])->unionfind_root ());
> +     if (root_entry->web_requires_special_handling)
>         handle_special_swappables (insn_entry, i);
>        }
>  
> diff --git a/gcc/testsuite/gcc.target/powerpc/pr106770.c 
> b/gcc/testsuite/gcc.target/powerpc/pr106770.c
> new file mode 100644
> index 00000000000..5b300b94a41
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/powerpc/pr106770.c
> @@ -0,0 +1,20 @@
> +/* { dg-require-effective-target powerpc_p8vector_ok } */
> +/* { dg-options "-mdejagnu-cpu=power8 -O2 " } */
> +/* The 2 xxpermdi instructions are generated by the two
> +   calls to vec_promote() */
> +/* { dg-final { scan-assembler-times {xxpermdi} 2 } } */
> +
> +/* Test case to resolve PR106770  */
> +
> +#include <altivec.h>
> +
> +int cmp2(double a, double b)
> +{
> +    vector double va = vec_promote(a, 1);
> +    vector double vb = vec_promote(b, 1);
> +    vector long long vlt = (vector long long)vec_cmplt(va, vb);
> +    vector long long vgt = (vector long long)vec_cmplt(vb, va);
> +    vector signed long long vr = vec_sub(vlt, vgt);
> +
> +    return vec_extract(vr, 1);
> +}

Reply via email to