Re: [PATCH] IBM Z: Fix usage of "f" constraint with long doubles

Andreas Krebbel via Gcc-patches Tue, 26 Jan 2021 23:58:33 -0800

On 1/18/21 10:54 PM, Ilya Leoshkevich wrote:
...

> +static rtx_insn *
> +s390_md_asm_adjust (vec<rtx> &outputs, vec<rtx> &inputs,
> +                 vec<machine_mode> &input_modes,
> +                 vec<const char *> &constraints, vec<rtx> & /*clobbers*/,
> +                 HARD_REG_SET & /*clobbered_regs*/)
> +{
> +  if (!TARGET_VXE)
> +    /* Long doubles are stored in FPR pairs - nothing to do.  */
> +    return NULL;
> +
> +  rtx_insn *after_md_seq = NULL, *after_md_end = NULL;
> +
> +  unsigned ninputs = inputs.length ();
> +  unsigned noutputs = outputs.length ();
> +  for (unsigned i = 0; i < noutputs; i++)
> +    {
> +      if (GET_MODE (outputs[i]) != TFmode)
> +     /* Not a long double - nothing to do.  */
> +     continue;
> +      const char *constraint = constraints[i];
> +      bool allows_mem, allows_reg, is_inout;
> +      bool ok = parse_output_constraint (&constraint, i, ninputs, noutputs,
> +                                      &allows_mem, &allows_reg, &is_inout);
> +      gcc_assert (ok);
> +      if (strcmp (constraint, "=f") != 0)
> +     /* Long double with a constraint other than "=f" - nothing to do.  */
> +     continue;


What about other constraint modifiers like & and %? Don't we need to handle 
matching constraints as
well here?

> +      gcc_assert (allows_reg);
> +      gcc_assert (!allows_mem);
> +      gcc_assert (!is_inout);
> +      /* Copy output value from a FPR pair into a vector register.  */
> +      rtx fprx2 = gen_reg_rtx (FPRX2mode);
> +      push_to_sequence2 (after_md_seq, after_md_end);
> +      emit_insn (gen_fprx2_to_tf (outputs[i], fprx2));
> +      after_md_seq = get_insns ();
> +      after_md_end = get_last_insn ();
> +      end_sequence ();
> +      outputs[i] = fprx2;
> +    }
> +
> +  for (unsigned i = 0; i < ninputs; i++)
> +    {
> +      if (GET_MODE (inputs[i]) != TFmode)
> +     /* Not a long double - nothing to do.  */
> +     continue;
> +      const char *constraint = constraints[noutputs + i];
> +      bool allows_mem, allows_reg;
> +      bool ok = parse_input_constraint (&constraint, i, ninputs, noutputs, 0,
> +                                     constraints.address (), &allows_mem,
> +                                     &allows_reg);
> +      gcc_assert (ok);
> +      if (strcmp (constraint, "f") != 0 && strcmp (constraint, "=f") != 0)
> +     /* Long double with a constraint other than "f" (or "=f" for inout
> +        operands) - nothing to do.  */
> +     continue;
> +      gcc_assert (allows_reg);
> +      gcc_assert (!allows_mem);
> +      /* Copy input value from a vector register into a FPR pair.  */
> +      rtx fprx2 = gen_reg_rtx (FPRX2mode);
> +      emit_insn (gen_tf_to_fprx2 (fprx2, inputs[i]));
> +      inputs[i] = fprx2;
> +      input_modes[i] = FPRX2mode;
> +    }
> +
> +  return after_md_seq;
> +}
> +
>  /* Initialize GCC target structure.  */
>  
>  #undef  TARGET_ASM_ALIGNED_HI_OP
> @@ -16995,6 +17065,9 @@ s390_shift_truncation_mask (machine_mode mode)
>  #undef TARGET_MAX_ANCHOR_OFFSET
>  #define TARGET_MAX_ANCHOR_OFFSET 0xfff
>  
> +#undef TARGET_MD_ASM_ADJUST
> +#define TARGET_MD_ASM_ADJUST s390_md_asm_adjust
> +
>  struct gcc_target targetm = TARGET_INITIALIZER;
>  
>  #include "gt-s390.h"
> diff --git a/gcc/config/s390/vector.md b/gcc/config/s390/vector.md
> index 0e3c31f5d4f..1332a65a1d1 100644
> --- a/gcc/config/s390/vector.md
> +++ b/gcc/config/s390/vector.md
> @@ -616,12 +616,23 @@ (define_insn "*vec_tf_to_v1tf_vr"
>     vlvgp\t%v0,%1,%N1"
>    [(set_attr "op_type" "VRR,VRX,VRX,VRI,VRR")])
>  
> -(define_insn "*fprx2_to_tf"
> -  [(set (match_operand:TF               0 "nonimmediate_operand" "=v")
> -     (subreg:TF (match_operand:FPRX2 1 "general_operand"       "f") 0))]
> +(define_insn_and_split "fprx2_to_tf"
> +  [(set (match_operand:TF               0 "nonimmediate_operand" "=v,R")
> +     (subreg:TF (match_operand:FPRX2 1 "general_operand"       "f,f") 0))]
>    "TARGET_VXE"
> -  "vmrhg\t%v0,%1,%N1"
> -  [(set_attr "op_type" "VRR")])
> +  "@
> +   vmrhg\t%v0,%1,%N1
> +   #"
> +  "!(MEM_P (operands[0]) && MEM_VOLATILE_P (operands[0]))"
> +  [(set (match_dup 2) (match_dup 3))
> +   (set (match_dup 4) (match_dup 5))]
> +{
> +  operands[2] = simplify_gen_subreg (DFmode, operands[0], TFmode, 0);
> +  operands[3] = simplify_gen_subreg (DFmode, operands[1], FPRX2mode, 0);
> +  operands[4] = simplify_gen_subreg (DFmode, operands[0], TFmode, 8);
> +  operands[5] = simplify_gen_subreg (DFmode, operands[1], FPRX2mode, 8);
> +}
> +  [(set_attr "op_type" "VRR,*")])

Splitting an address like this might cause the displacement to overflow in the 
second part. This
would require an additional reg to make the address valid again. Which in turn 
will be a problem
after reload. You can use the 'AR' constraint for the memory alternative. That 
way reload will make
sure the address is offsetable.

Andreas


>  
>  (define_insn "*vec_ti_to_v1ti"
>    [(set (match_operand:V1TI                   0 "nonimmediate_operand" 
> "=v,v,R,  v,  v,v")
> @@ -753,6 +764,21 @@ (define_insn "*tf_to_fprx2_1"
>    "vpdi\t%V0,%v1,%V0,5"
>    [(set_attr "op_type" "VRR")])
>  
> +(define_insn_and_split "tf_to_fprx2"
> +  [(set (match_operand:FPRX2            0 "nonimmediate_operand" "=f,f")
> +     (subreg:FPRX2 (match_operand:TF 1 "general_operand"       "v,R") 0))]
> +  "TARGET_VXE"
> +  "#"
> +  "!(MEM_P (operands[1]) && MEM_VOLATILE_P (operands[1]))"
> +  [(set (match_dup 2) (match_dup 3))
> +   (set (match_dup 4) (match_dup 5))]
> +{
> +  operands[2] = simplify_gen_subreg (DFmode, operands[0], FPRX2mode, 0);
> +  operands[3] = simplify_gen_subreg (DFmode, operands[1], TFmode, 0);
> +  operands[4] = simplify_gen_subreg (DFmode, operands[0], FPRX2mode, 8);
> +  operands[5] = simplify_gen_subreg (DFmode, operands[1], TFmode, 8);
> +})
> +
>  ; vec_perm_const for V2DI using vpdi?
>  
>  ;;
> diff --git a/gcc/testsuite/gcc.target/s390/vector/long-double-asm-abi.c 
> b/gcc/testsuite/gcc.target/s390/vector/long-double-asm-abi.c
> new file mode 100644
> index 00000000000..9cd50b62b48
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/s390/vector/long-double-asm-abi.c
> @@ -0,0 +1,26 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O3 -march=z14 -mzarch --save-temps" } */
> +/* { dg-do run { target { s390_z14_hw } } } */
> +#include <assert.h>
> +#include <stdint.h>
> +
> +__attribute__ ((noipa)) static long double
> +xsqrt (long double x)
> +{
> +  long double res;
> +  asm("sqxbr\t%0,%1" : "=f"(res) : "f"(x));
> +  return res;
> +}
> +
> +/* Check that the generated code is very small and straightforward.  In
> +   particular, there must be no unnecessary copying and no stack frame.  */
> +/* { dg-final { scan-assembler 
> {\n\tld\t.*\n\tld\t.*\n(#.*\n)*\tsqxbr\t.*\n\tstd\t.*\n\tstd\t.*\n\tbr\t%r14\n}
>  } } */
> +
> +int
> +main (void)
> +{
> +  long double res, x = 0x1.0000000000001p+0L,
> +                exp = 1.00000000000000011102230246251564788e+0L;
> +  res = xsqrt (x);
> +  assert (res == exp);
> +}
> diff --git a/gcc/testsuite/gcc.target/s390/vector/long-double-asm-in-out.c 
> b/gcc/testsuite/gcc.target/s390/vector/long-double-asm-in-out.c
> new file mode 100644
> index 00000000000..5380311b435
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/s390/vector/long-double-asm-in-out.c
> @@ -0,0 +1,14 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O3 -march=z14 -mzarch" } */
> +/* { dg-do run { target { s390_z14_hw } } } */
> +#include <assert.h>
> +#include <stdint.h>
> +
> +int
> +main (void)
> +{
> +  long double res, x = 0x1.0000000000001p+0L,
> +                exp = 1.00000000000000011102230246251564788e+0L;
> +  asm("sqxbr %0,%1" : "=f"(res) : "f"(x));
> +  assert (res == exp);
> +}
> diff --git a/gcc/testsuite/gcc.target/s390/vector/long-double-asm-inout.c 
> b/gcc/testsuite/gcc.target/s390/vector/long-double-asm-inout.c
> new file mode 100644
> index 00000000000..6dcd2dc8ac1
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/s390/vector/long-double-asm-inout.c
> @@ -0,0 +1,14 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O3 -march=z14 -mzarch" } */
> +/* { dg-do run { target { s390_z14_hw } } } */
> +#include <assert.h>
> +#include <stdint.h>
> +
> +int
> +main (void)
> +{
> +  long double res = 0x1.0000000000001p+0L,
> +           exp = 1.00000000000000011102230246251564788e+0L;
> +  asm("sqxbr %0,%0" : "+f"(res));
> +  assert (res == exp);
> +}
> diff --git 
> a/gcc/testsuite/gcc.target/s390/vector/long-double-volatile-from-i64.c 
> b/gcc/testsuite/gcc.target/s390/vector/long-double-volatile-from-i64.c
> new file mode 100644
> index 00000000000..f4489841c28
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/s390/vector/long-double-volatile-from-i64.c
> @@ -0,0 +1,22 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O3 -march=z14 -mzarch --save-temps" } */
> +/* { dg-do run { target { s390_z14_hw } } } */
> +#include <assert.h>
> +#include <stdint.h>
> +
> +__attribute__ ((noipa)) static long double
> +long_double_volatile_from_i64 (int64_t x)
> +{
> +  static volatile long double y;
> +  y = x;
> +  return y;
> +}
> +
> +/* { dg-final { scan-assembler-times {\n\tcxgbr\t} 1 } } */
> +
> +int
> +main (void)
> +{
> +  assert (long_double_volatile_from_i64 (42) == 42.L);
> +  assert (long_double_volatile_from_i64 (-42) == -42.L);
> +}
>

Re: [PATCH] IBM Z: Fix usage of "f" constraint with long doubles

Reply via email to