On Tue, Aug 24, 2021 at 5:40 PM Hongtao Liu <crazy...@gmail.com> wrote:
>
> On Tue, Aug 17, 2021 at 9:52 AM Hongtao Liu <crazy...@gmail.com> wrote:
> >
> > On Mon, Aug 9, 2021 at 4:34 PM Hongtao Liu <crazy...@gmail.com> wrote:
> > >
> > > On Fri, Aug 6, 2021 at 7:27 PM Richard Biener via Gcc-patches
> > > <gcc-patches@gcc.gnu.org> wrote:
> > > >
> > > > On Fri, Aug 6, 2021 at 11:05 AM Richard Sandiford
> > > > <richard.sandif...@arm.com> wrote:
> > > > >
> > > > > Richard Biener via Gcc-patches <gcc-patches@gcc.gnu.org> writes:
> > > > > > On Fri, Aug 6, 2021 at 5:32 AM liuhongt <hongtao....@intel.com> 
> > > > > > wrote:
> > > > > >>
> > > > > >> Hi:
> > > > > >> ---
> > > > > >> OK, I think sth is amiss here upthread.  insv/extv do look like 
> > > > > >> they
> > > > > >> are designed
> > > > > >> to work on integer modes (but docs do not say anything about this 
> > > > > >> here).
> > > > > >> In fact the caller of extract_bit_field_using_extv is named
> > > > > >> extract_integral_bit_field.  Of course nothing seems to check what 
> > > > > >> kind of
> > > > > >> modes we're dealing with, but we're for example happily doing
> > > > > >> expand_shift in 'mode'.  In the extract_integral_bit_field call 
> > > > > >> 'mode' is
> > > > > >> some integer mode and op0 is HFmode?  From the above I get it's
> > > > > >> the other way around?  In that case we should wrap the
> > > > > >> call to extract_integral_bit_field, extracting in an integer mode 
> > > > > >> with the
> > > > > >> same size as 'mode' and then converting the result as (subreg:HF 
> > > > > >> (reg:HI ...)).
> > > > > >> ---
> > > > > >>   This is a separate patch as a follow up of upper comments.
> > > > > >>
> > > > > >> gcc/ChangeLog:
> > > > > >>
> > > > > >>         * expmed.c (extract_bit_field_1): Wrap the call to
> > > > > >>         extract_integral_bit_field, extracting in an integer mode 
> > > > > >> with
> > > > > >>         the same size as 'tmode' and then converting the result
> > > > > >>         as (subreg:tmode (reg:imode)).
> > > > > >>
> > > > > >> gcc/testsuite/ChangeLog:
> > > > > >>         * gcc.target/i386/float16-5.c: New test.
> > > > > >> ---
> > > > > >>  gcc/expmed.c                              | 19 +++++++++++++++++++
> > > > > >>  gcc/testsuite/gcc.target/i386/float16-5.c | 12 ++++++++++++
> > > > > >>  2 files changed, 31 insertions(+)
> > > > > >>  create mode 100644 gcc/testsuite/gcc.target/i386/float16-5.c
> > > > > >>
> > > > > >> diff --git a/gcc/expmed.c b/gcc/expmed.c
> > > > > >> index 3143f38e057..72790693ef0 100644
> > > > > >> --- a/gcc/expmed.c
> > > > > >> +++ b/gcc/expmed.c
> > > > > >> @@ -1850,6 +1850,25 @@ extract_bit_field_1 (rtx str_rtx, 
> > > > > >> poly_uint64 bitsize, poly_uint64 bitnum,
> > > > > >>        op0_mode = opt_scalar_int_mode ();
> > > > > >>      }
> > > > > >>
> > > > > >> +  /* Make sure we are playing with integral modes.  Pun with 
> > > > > >> subregs
> > > > > >> +     if we aren't. When tmode is HFmode, op0 is SImode, there 
> > > > > >> will be ICE
> > > > > >> +     in extract_integral_bit_field.  */
> > > > > >> +  if (int_mode_for_mode (tmode).exists (&imode)
> > > > > >
> > > > > > check !INTEGRAL_MODE_P (tmode) before, that should be slightly
> > > > > > cheaper.  Then imode should always be != tmode.  Maybe
> > > > > > even GET_MDOE_CLASS (tmode) != MODE_INT since I'm not sure
> > > > > > how it behaves for composite modes.
> > > > > >
> > > > > > Of course the least surprises would happen when we restrict this
> > > > > > to FLOAT_MODE_P (tmode).
> > > > > >
> > > > > > Richard - any preferences?
> > > > >
> > > > > If the bug is that extract_integral_bit_field is being called with
> > > > > a non-integral mode parameter, then it looks odd that we can still
> > > > > fall through to it without an integral mode (when exists is false).
> > > > >
> > > > > If calling extract_integral_bit_field without an integral mode is
> > > > > a bug then I think we should have:
> > > > >
> > > > >   int_mode_for_mode (mode).require ()
> > > > >
> > > > > whenever mode is not already SCALAR_INT_MODE_P/is_a<scalar_int_mode>.
> > > > > Ideally we'd make the mode parameter scalar_int_mode too.
> > > > >
> > > > > extract_integral_bit_field currently has:
> > > > >
> > > > >   /* Find a correspondingly-sized integer field, so we can apply
> > > > >      shifts and masks to it.  */
> > > > >   scalar_int_mode int_mode;
> > > > >   if (!int_mode_for_mode (tmode).exists (&int_mode))
> > > > >     /* If this fails, we should probably push op0 out to memory and 
> > > > > then
> > > > >        do a load.  */
> > > > >     int_mode = int_mode_for_mode (mode).require ();
> > > > >
> > > > > which would seem to be redundant after this change.
> > > >
> > > > I'm not sure what exactly the bug is, but extract_integral_bit_field 
> > > > ends
> > > > up creating a lowpart subreg that's not allowed and that ICEs (and I
> > > > can't see a way to check beforehand).  So it seems to me at least
> > > > part of that function doesn't expect non-integral extraction modes.
> > > >
> > > > But who knows - the code is older than I am (OK, not, but older than
> > > > my involvment in GCC ;))
> > > >
> > > How about attached patch w/ below changelog
> > >
> > > gcc/ChangeLog:
> > >
> > >         * expmed.c (extract_bit_field_1): Make sure we're playing with
> > >         integral modes before call extract_integral_bit_field.
> > >         (extract_integral_bit_field): Add a parameter of type
> > >         scalar_int_mode which corresponds to of tmode.
> > >         And call extract_and_convert_fixed_bit_field instead of
> > >         extract_fixed_bit_field and convert_extracted_bit_field.
> > >         (extract_and_convert_fixed_bit_field): New function, it's a
> > >         combination of extract_fixed_bit_field and
> > >         convert_extracted_bit_field.
> > >
> > > gcc/testsuite/ChangeLog:
> > >         * gcc.target/i386/float16-5.c: New test.
> > >
> > I'd like to ping this patch, or maybe we can use the patch before with
> > richi's comments.
> > >
>
> Rebased and ping^2, there are plenty of avx512fp16 patches blocked by
> this patch, i'd like someone to help review this patch.
>
Please ignore the former attached patch, should be the patch attached here.
> > > > Richard.
> > > >
> > > > > >> +      && imode != tmode
> > > > > >> +      && imode != GET_MODE (op0))
> > > > > >> +    {
> > > > > >> +      rtx ret = extract_integral_bit_field (op0, op0_mode,
> > > > > >> +                                           bitsize.to_constant (),
> > > > > >> +                                           bitnum.to_constant (), 
> > > > > >> unsignedp,
> > > > > >> +                                           NULL, imode, imode,
> > > > > >> +                                           reverse, fallback_p);
> > > > > >> +      gcc_assert (ret);
> > > > > >> +
> > > > > >> +      if (!REG_P (ret))
> > > > > >> +       ret = force_reg (imode, ret);
> > > > > >> +      return gen_lowpart_SUBREG (tmode, ret);
> > > > > >> +    }
> > > > > >> +
> > > > > >>    /* It's possible we'll need to handle other cases here for
> > > > > >>       polynomial bitnum and bitsize.  */
> > > > >
> > > > > Minor nit, but since the code is using to_constant, it should go after
> > > > > this comment rather than before it.
> > > > >
> > > > > Thanks,
> > > > > Richard
> > > > >
> > > > > >>
> > > > > >> diff --git a/gcc/testsuite/gcc.target/i386/float16-5.c 
> > > > > >> b/gcc/testsuite/gcc.target/i386/float16-5.c
> > > > > >> new file mode 100644
> > > > > >> index 00000000000..ebc0af1490b
> > > > > >> --- /dev/null
> > > > > >> +++ b/gcc/testsuite/gcc.target/i386/float16-5.c
> > > > > >> @@ -0,0 +1,12 @@
> > > > > >> +/* { dg-do compile } */
> > > > > >> +/* { dg-options "-msse2 -O2" } */
> > > > > >> +_Float16
> > > > > >> +foo (int a)
> > > > > >> +{
> > > > > >> +  union {
> > > > > >> +    int a;
> > > > > >> +    _Float16 b;
> > > > > >> +  }c;
> > > > > >> +  c.a = a;
> > > > > >> +  return c.b;
> > > > > >> +}
> > > > > >> --
> > > > > >> 2.27.0
> > > > > >>
> > >
> > >
> > >
> > > --
> > > BR,
> > > Hongtao
> >
> >
> >
> > --
> > BR,
> > Hongtao
>
>
>
> --
> BR,
> Hongtao



-- 
BR,
Hongtao
From 9c77ac15e69b567156a82debe45e3ced10df1110 Mon Sep 17 00:00:00 2001
From: liuhongt <hongtao....@intel.com>
Date: Fri, 6 Aug 2021 10:18:43 +0800
Subject: [PATCH] Make sure we're playing with integral modes before call
 extract_integral_bit_field.

gcc/ChangeLog:

	* expmed.c (extract_bit_field_1): Make sure we're playing with
	integral modes before call extract_integral_bit_field.
	(extract_integral_bit_field): Add a parameter of type
	scalar_int_mode which corresponds to of tmode.
	And call extract_and_convert_fixed_bit_field instead of
	extract_fixed_bit_field and convert_extracted_bit_field.
	(extract_and_convert_fixed_bit_field): New function, it's a
	combination of extract_fixed_bit_field and
	convert_extracted_bit_field.

gcc/testsuite/ChangeLog:
	* gcc.target/i386/float16-5.c: New test.
---
 gcc/expmed.c                              | 103 ++++++++++++++++------
 gcc/testsuite/gcc.target/i386/float16-5.c |  12 +++
 2 files changed, 90 insertions(+), 25 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/i386/float16-5.c

diff --git a/gcc/expmed.c b/gcc/expmed.c
index 3143f38e057..f083d6e86d0 100644
--- a/gcc/expmed.c
+++ b/gcc/expmed.c
@@ -71,7 +71,14 @@ static void store_split_bit_field (rtx, opt_scalar_int_mode,
 static rtx extract_integral_bit_field (rtx, opt_scalar_int_mode,
 				       unsigned HOST_WIDE_INT,
 				       unsigned HOST_WIDE_INT, int, rtx,
-				       machine_mode, machine_mode, bool, bool);
+				       machine_mode, machine_mode,
+				       scalar_int_mode, bool, bool);
+static rtx extract_and_convert_fixed_bit_field (scalar_int_mode,
+						machine_mode, machine_mode,
+						rtx, opt_scalar_int_mode,
+						unsigned HOST_WIDE_INT,
+						unsigned HOST_WIDE_INT, rtx,
+						int, bool);
 static rtx extract_fixed_bit_field (machine_mode, rtx, opt_scalar_int_mode,
 				    unsigned HOST_WIDE_INT,
 				    unsigned HOST_WIDE_INT, rtx, int, bool);
@@ -1632,6 +1639,7 @@ extract_bit_field_1 (rtx str_rtx, poly_uint64 bitsize, poly_uint64 bitnum,
 {
   rtx op0 = str_rtx;
   machine_mode mode1;
+  scalar_int_mode int_tmode;
 
   if (tmode == VOIDmode)
     tmode = mode;
@@ -1853,10 +1861,46 @@ extract_bit_field_1 (rtx str_rtx, poly_uint64 bitsize, poly_uint64 bitnum,
   /* It's possible we'll need to handle other cases here for
      polynomial bitnum and bitsize.  */
 
+  /* Make sure we are playing with integral modes.  Pun with subregs
+     if we aren't. When tmode is HFmode, op0 is SImode, there will be ICE
+     in extract_integral_bit_field.  */
+  opt_scalar_int_mode target_imode = int_mode_for_mode (tmode);
+  if (!target_imode.exists (&int_tmode) || int_tmode != tmode)
+    {
+      if (target_imode.exists (&int_tmode))
+	{
+	  rtx ret = extract_integral_bit_field (op0, op0_mode,
+						bitsize.to_constant (),
+						bitnum.to_constant (),
+						unsignedp, NULL, int_tmode,
+						int_tmode, int_tmode,
+						reverse, fallback_p);
+	  gcc_assert (ret);
+
+	  if (!REG_P (ret))
+	    ret = force_reg (int_tmode, ret);
+	  return gen_lowpart_SUBREG (tmode, ret);
+	}
+      else
+	{
+	  if (!fallback_p)
+	    return NULL;
+
+	  int_tmode = int_mode_for_mode (mode).require ();
+	  return extract_and_convert_fixed_bit_field (int_tmode, tmode, mode,
+						      op0, op0_mode,
+						      bitsize.to_constant (),
+						      bitnum.to_constant (),
+						      target, unsignedp,
+						      reverse);
+	}
+    }
+
   /* From here on we need to be looking at a fixed-size insertion.  */
   return extract_integral_bit_field (op0, op0_mode, bitsize.to_constant (),
 				     bitnum.to_constant (), unsignedp,
-				     target, mode, tmode, reverse, fallback_p);
+				     target, mode, tmode,
+				     int_tmode, reverse, fallback_p);
 }
 
 /* Subroutine of extract_bit_field_1, with the same arguments, except
@@ -1869,6 +1913,7 @@ extract_integral_bit_field (rtx op0, opt_scalar_int_mode op0_mode,
 			    unsigned HOST_WIDE_INT bitsize,
 			    unsigned HOST_WIDE_INT bitnum, int unsignedp,
 			    rtx target, machine_mode mode, machine_mode tmode,
+			    scalar_int_mode int_tmode,
 			    bool reverse, bool fallback_p)
 {
   /* Handle fields bigger than a word.  */
@@ -2035,29 +2080,10 @@ extract_integral_bit_field (rtx op0, opt_scalar_int_mode op0_mode,
   if (!fallback_p)
     return NULL;
 
-  /* Find a correspondingly-sized integer field, so we can apply
-     shifts and masks to it.  */
-  scalar_int_mode int_mode;
-  if (!int_mode_for_mode (tmode).exists (&int_mode))
-    /* If this fails, we should probably push op0 out to memory and then
-       do a load.  */
-    int_mode = int_mode_for_mode (mode).require ();
-
-  target = extract_fixed_bit_field (int_mode, op0, op0_mode, bitsize,
-				    bitnum, target, unsignedp, reverse);
-
-  /* Complex values must be reversed piecewise, so we need to undo the global
-     reversal, convert to the complex mode and reverse again.  */
-  if (reverse && COMPLEX_MODE_P (tmode))
-    {
-      target = flip_storage_order (int_mode, target);
-      target = convert_extracted_bit_field (target, mode, tmode, unsignedp);
-      target = flip_storage_order (tmode, target);
-    }
-  else
-    target = convert_extracted_bit_field (target, mode, tmode, unsignedp);
-
-  return target;
+  return extract_and_convert_fixed_bit_field (int_tmode, tmode, mode,
+					      op0, op0_mode, bitsize,
+					      bitnum, target, unsignedp,
+					      reverse);
 }
 
 /* Generate code to extract a byte-field from STR_RTX
@@ -2129,6 +2155,33 @@ extract_bit_field (rtx str_rtx, poly_uint64 bitsize, poly_uint64 bitnum,
   return extract_bit_field_1 (str_rtx, bitsize, bitnum, unsignedp,
 			      target, mode, tmode, reverse, true, alt_rtl);
 }
+
+/* Combination of extract_fixed_bit_field and convert_extracted_bit_field.  */
+static rtx
+extract_and_convert_fixed_bit_field (scalar_int_mode int_tmode,
+				     machine_mode tmode, machine_mode mode,
+				     rtx op0, opt_scalar_int_mode op0_mode,
+				     unsigned HOST_WIDE_INT bitsize,
+				     unsigned HOST_WIDE_INT bitnum,
+				     rtx target, int unsignedp, bool reverse)
+{
+  target = extract_fixed_bit_field (int_tmode, op0, op0_mode, bitsize,
+				    bitnum, target, unsignedp, reverse);
+
+  /* Complex values must be reversed piecewise, so we need to undo the global
+     reversal, convert to the complex mode and reverse again.  */
+  if (reverse && COMPLEX_MODE_P (tmode))
+    {
+      target = flip_storage_order (int_tmode, target);
+      target = convert_extracted_bit_field (target, mode, tmode, unsignedp);
+      target = flip_storage_order (tmode, target);
+    }
+  else
+    target = convert_extracted_bit_field (target, mode, tmode, unsignedp);
+
+  return target;
+}
+
 
 /* Use shifts and boolean operations to extract a field of BITSIZE bits
    from bit BITNUM of OP0.  If OP0_MODE is defined, it is the mode of OP0,
diff --git a/gcc/testsuite/gcc.target/i386/float16-5.c b/gcc/testsuite/gcc.target/i386/float16-5.c
new file mode 100644
index 00000000000..ebc0af1490b
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/float16-5.c
@@ -0,0 +1,12 @@
+/* { dg-do compile } */
+/* { dg-options "-msse2 -O2" } */
+_Float16
+foo (int a)
+{
+  union {
+    int a;
+    _Float16 b;
+  }c;
+  c.a = a;
+  return c.b;
+}
-- 
2.27.0

Reply via email to