On Fri, 10 Jan 2025, Alexandre Oliva wrote:
>
> A narrowing conversion and a shift both drop bits from the loaded
> value, but we need to take into account which one comes first to get
> the right number of bits and mask.
>
> Fold when applying masks to parts, comparing the parts, and combining
> the results, in the odd chance either mask happens to be zero.
>
> Regstrapped on x86_64-linux-gnu. Ok to intall?
OK.
Richard.
>
> for gcc/ChangeLog
>
> PR tree-optimization/118206
> * gimple-fold.cc (decode_field_reference): Account for upper
> bits dropped by narrowing conversions whether before or after
> a right shift.
> (fold_truth_andor_for_ifcombine): Fold masks, compares, and
> combined results.
>
> for gcc/testsuite/ChangeLog
>
> PR tree-optimization/118206
> * gcc.dg/field-merge-18.c: New.
> ---
> gcc/gimple-fold.cc | 39 ++++++++++++++++++++++++----
> gcc/testsuite/gcc.dg/field-merge-18.c | 46
> +++++++++++++++++++++++++++++++++
> 2 files changed, 79 insertions(+), 6 deletions(-)
> create mode 100644 gcc/testsuite/gcc.dg/field-merge-18.c
>
> diff --git a/gcc/gimple-fold.cc b/gcc/gimple-fold.cc
> index c8a726e0ae3f3..d95f04213ee40 100644
> --- a/gcc/gimple-fold.cc
> +++ b/gcc/gimple-fold.cc
> @@ -7547,6 +7547,7 @@ decode_field_reference (tree *pexp, HOST_WIDE_INT
> *pbitsize,
> int shiftrt = 0;
> tree res_ops[2];
> machine_mode mode;
> + bool convert_before_shift = false;
>
> *load = NULL;
> *psignbit = false;
> @@ -7651,6 +7652,12 @@ decode_field_reference (tree *pexp, HOST_WIDE_INT
> *pbitsize,
> if (*load)
> loc[3] = gimple_location (*load);
> exp = res_ops[0];
> + /* This looks backwards, but we're going back the def chain, so if we
> + find the conversion here, after finding a shift, that's because the
> + convert appears before the shift, and we should thus adjust the bit
> + pos and size because of the shift after adjusting it due to type
> + conversion. */
> + convert_before_shift = true;
> }
>
> /* Identify the load, if there is one. */
> @@ -7693,6 +7700,15 @@ decode_field_reference (tree *pexp, HOST_WIDE_INT
> *pbitsize,
> *pvolatilep = volatilep;
>
> /* Adjust shifts... */
> + if (convert_before_shift
> + && outer_type && *pbitsize > TYPE_PRECISION (outer_type))
> + {
> + HOST_WIDE_INT excess = *pbitsize - TYPE_PRECISION (outer_type);
> + if (*preversep ? !BYTES_BIG_ENDIAN : BYTES_BIG_ENDIAN)
> + *pbitpos += excess;
> + *pbitsize -= excess;
> + }
> +
> if (shiftrt)
> {
> if (!*preversep ? !BYTES_BIG_ENDIAN : BYTES_BIG_ENDIAN)
> @@ -7701,7 +7717,8 @@ decode_field_reference (tree *pexp, HOST_WIDE_INT
> *pbitsize,
> }
>
> /* ... and bit position. */
> - if (outer_type && *pbitsize > TYPE_PRECISION (outer_type))
> + if (!convert_before_shift
> + && outer_type && *pbitsize > TYPE_PRECISION (outer_type))
> {
> HOST_WIDE_INT excess = *pbitsize - TYPE_PRECISION (outer_type);
> if (*preversep ? !BYTES_BIG_ENDIAN : BYTES_BIG_ENDIAN)
> @@ -8377,6 +8394,8 @@ fold_truth_andor_for_ifcombine (enum tree_code code,
> tree truth_type,
> if (get_best_mode (end_bit - first_bit, first_bit, 0, ll_end_region,
> ll_align, BITS_PER_WORD, volatilep, &lnmode))
> l_split_load = false;
> + /* ??? If ll and rl share the same load, reuse that?
> + See PR 118206 -> gcc.dg/field-merge-18.c */
> else
> {
> /* Consider the possibility of recombining loads if any of the
> @@ -8757,11 +8776,11 @@ fold_truth_andor_for_ifcombine (enum tree_code code,
> tree truth_type,
> /* Apply masks. */
> for (int j = 0; j < 2; j++)
> if (mask[j] != wi::mask (0, true, mask[j].get_precision ()))
> - op[j] = build2_loc (locs[j][2], BIT_AND_EXPR, type,
> - op[j], wide_int_to_tree (type, mask[j]));
> + op[j] = fold_build2_loc (locs[j][2], BIT_AND_EXPR, type,
> + op[j], wide_int_to_tree (type, mask[j]));
>
> - cmp[i] = build2_loc (i ? rloc : lloc, wanted_code, truth_type,
> - op[0], op[1]);
> + cmp[i] = fold_build2_loc (i ? rloc : lloc, wanted_code, truth_type,
> + op[0], op[1]);
> }
>
> /* Reorder the compares if needed. */
> @@ -8773,7 +8792,15 @@ fold_truth_andor_for_ifcombine (enum tree_code code,
> tree truth_type,
> if (parts == 1)
> result = cmp[0];
> else if (!separatep || !maybe_separate)
> - result = build2_loc (rloc, orig_code, truth_type, cmp[0], cmp[1]);
> + {
> + /* Only fold if any of the cmp is known, otherwise we may lose the
> + sequence point, and that may prevent further optimizations. */
> + if (TREE_CODE (cmp[0]) == INTEGER_CST
> + || TREE_CODE (cmp[1]) == INTEGER_CST)
> + result = fold_build2_loc (rloc, orig_code, truth_type, cmp[0], cmp[1]);
> + else
> + result = build2_loc (rloc, orig_code, truth_type, cmp[0], cmp[1]);
> + }
> else
> {
> result = cmp[0];
> diff --git a/gcc/testsuite/gcc.dg/field-merge-18.c
> b/gcc/testsuite/gcc.dg/field-merge-18.c
> new file mode 100644
> index 0000000000000..e8413873d2418
> --- /dev/null
> +++ b/gcc/testsuite/gcc.dg/field-merge-18.c
> @@ -0,0 +1,46 @@
> +/* { dg-do run } */
> +/* { dg-options "-O1" } */
> +
> +/* PR tree-optimization/118206 */
> +/* Check that shifts, whether before or after narrowing conversions, mask out
> + the bits that are to be discarded. */
> +
> +/* This only uses bits from the least significant byte in the short. */
> +__attribute__((noipa)) int
> +foo (const void *x)
> +{
> + unsigned short b;
> + __builtin_memcpy (&b, x, sizeof (short));
> + if ((b & 15) != 8)
> + return 1;
> + if ((((unsigned char) b) >> 4) > 7)
> + return 1;
> + return 0;
> +}
> +
> +__attribute__((noipa)) int
> +bar (const void *x)
> +{
> + unsigned short b;
> + __builtin_memcpy (&b, x, sizeof (short));
> + if ((b & 15) != 8)
> + return 1;
> + if ((unsigned char)(b >> 4) > 7)
> + return 1;
> + return 0;
> +}
> +
> +int
> +main ()
> +{
> + unsigned short a = 0x78 - 0x80 - 0x80;
> + if (foo (&a) != 0 || bar (&a) != (a > 0xff))
> + __builtin_abort ();
> + unsigned short b = 0x88;
> + if (foo (&b) != 1 || bar (&b) != 1)
> + __builtin_abort ();
> + unsigned short c = 8;
> + if (foo (&c) != 0 || bar (&c) != 0)
> + __builtin_abort ();
> + return 0;
> +}
>
>
--
Richard Biener <[email protected]>
SUSE Software Solutions Germany GmbH,
Frankenstrasse 146, 90461 Nuernberg, Germany;
GF: Ivo Totev, Andrew McDonald, Werner Knoblich; (HRB 36809, AG Nuernberg)