Hi Robert,

Next batch of comments for this patch. I've covered mips-msa.md up to the copy
patterns and one supporting function from mips.c.

>+++ b/gcc/config/mips/mips-msa.md

>+;; The attribute gives half modes for vector modes.
>+(define_mode_attr VHMODE
>+  [(V8HI "V16QI")
>+   (V4SI "V8HI")
>+   (V2DI "V4SI")
>+   (V2DF "V4SF")])
>+
>+;; The attribute gives double modes for vector modes.
>+(define_mode_attr VDMODE
>+  [(V4SI "V2DI")
>+   (V8HI "V4SI")
>+   (V16QI "V8HI")])

Presumably there is a reason why this is not a mirror of VHMODE. I.e. it does
not have floating point modes?

>+;; The attribute gives half modes with same number of elements for vector 
>modes.
>+(define_mode_attr TRUNCMODE
>+  [(V8HI "V8QI")
>+   (V4SI "V4HI")
>+   (V2DI "V2SI")])
>+
>+;; This attribute gives the mode of the result for "copy_s_b, copy_u_b" etc.
>+(define_mode_attr RES
>+  [(V2DF "DF")
>+   (V4SF "SF")
>+   (V2DI "DI")
>+   (V4SI "SI")
>+   (V8HI "SI")
>+   (V16QI "SI")])

Verhaps prefix these with a 'V' to clarify they are vector mode attributes.

>+;; This attribute gives define_insn suffix for MSA instructions with need

with => that need

>+;; distinction between integer and floating point.
>+(define_mode_attr msafmt_f
>+  [(V2DF "d_f")
>+   (V4SF "w_f")
>+   (V2DI "d")
>+   (V4SI "w")
>+   (V8HI "h")
>+   (V16QI "b")])
>+
>+;; To represent bitmask needed for vec_merge using "const_<bitmask>_operand".

Commenting style is different here. Everything else starts with: This attribute 
...

>+(define_mode_attr bitmask
>+  [(V2DF "exp_2")
>+   (V4SF "exp_4")
>+   (V2DI "exp_2")
>+   (V4SI "exp_4")
>+   (V8HI "exp_8")
>+   (V16QI "exp_16")])
>+
>+;; This attribute used to form an immediate operand constraint using

used to => is used to

>+;; "const_<bitimm>_operand".
>+(define_mode_attr bitimm
>+  [(V16QI "uimm3")
>+   (V8HI  "uimm4")
>+   (V4SI  "uimm5")
>+   (V2DI  "uimm6")
>+  ])
>+

>+(define_expand "fixuns_trunc<FMSA:mode><mode_i>2"
>+  [(set (match_operand:<VIMODE> 0 "register_operand" "=f")
>+      (unsigned_fix:<VIMODE> (match_operand:FMSA 1 "register_operand" "f")))]
>+  "ISA_HAS_MSA"
>+{
>+  emit_insn (gen_msa_ftrunc_u_<msafmt> (operands[0], operands[1]));
>+  DONE;
>+})

The msa_ftrunc_u_* define_insns should just be renamed to use the standard
pattern names and, more importantly, standard RTL not UNSPEC.

>+
>+(define_expand "fix_trunc<FMSA:mode><mode_i>2"
>+  [(set (match_operand:<VIMODE> 0 "register_operand" "=f")
>+      (fix:<VIMODE> (match_operand:FMSA 1 "register_operand" "f")))]
>+  "ISA_HAS_MSA"
>+{
>+  emit_insn (gen_msa_ftrunc_s_<msafmt> (operands[0], operands[1]));
>+  DONE;
>+})

Likewise.

>+
>+(define_expand "vec_pack_trunc_v2df"
>+  [(set (match_operand:V4SF 0 "register_operand")
>+      (vec_concat:V4SF
>+        (float_truncate:V2SF (match_operand:V2DF 1 "register_operand"))
>+        (float_truncate:V2SF (match_operand:V2DF 2 "register_operand"))))]
>+  "ISA_HAS_MSA"
>+  "")

Rename msa_fexdo_w to vec_pack_trunc_v2df.

I see that fexdo has a 'halfword' variant which creates a half-float. What
else can operate on half-float and should this really be recorded as an
HFmode?

>+(define_expand "vec_unpacks_hi_v4sf"
>+  [(set (match_operand:V2DF 0 "register_operand" "=f")
>+      (float_extend:V2DF
>+        (vec_select:V2SF
>+          (match_operand:V4SF 1 "register_operand" "f")
>+          (parallel [(const_int 0) (const_int 1)])

If we swap the (parallel) for a match_operand 2...

>+        )))]
>+  "ISA_HAS_MSA"
>+{
>+  if (BYTES_BIG_ENDIAN)
>+    emit_insn (gen_msa_fexupr_d (operands[0], operands[1]));
>+  else
>+    emit_insn (gen_msa_fexupl_d (operands[0], operands[1]));

Then these two could change to set up operands[2] with either
a parallel of 0/1 or 2/3 and then...

You could change the fexupr_d and fexupl_d insn patterns to use normal RTL
that select the appropriate elements (either 0/1 and 2/3).

>+  DONE;

Which means the RTL in the pattern would be used to expand this and
you would remove the DONE. As it stands the pattern on this expand
is simply never used.

>+})
>+
>+(define_expand "vec_unpacks_lo_v4sf"
>+  [(set (match_operand:V2DF 0 "register_operand" "=f")
>+      (float_extend:V2DF
>+        (vec_select:V2SF
>+          (match_operand:V4SF 1 "register_operand" "f")
>+          (parallel [(const_int 0) (const_int 1)])
>+        )))]
>+  "ISA_HAS_MSA"
>+{
>+  if (BYTES_BIG_ENDIAN)
>+    emit_insn (gen_msa_fexupl_d (operands[0], operands[1]));
>+  else
>+    emit_insn (gen_msa_fexupr_d (operands[0], operands[1]));
>+  DONE;
>+})

Likewise but inverted.

>+
>+(define_expand "vec_unpacks_hi_<mode>"
>+  [(set (match_operand:<VDMODE> 0 "register_operand")
>+      (match_operand:IMSA_WHB 1 "register_operand"))]

Not much point in having the (set) in here as it would be illegal anyway
if it were actually expanded. Just list the two operands. (same throughout)

>+  "ISA_HAS_MSA"
>+{
>+  mips_expand_vec_unpack (operands, false/*unsigned_p*/, true/*high_p*/);
>+  DONE;
>+})
>+
>+(define_expand "vec_unpacks_lo_<mode>"
>+  [(set (match_operand:<VDMODE> 0 "register_operand")
>+      (match_operand:IMSA_WHB 1 "register_operand"))]
>+  "ISA_HAS_MSA"
>+{
>+  mips_expand_vec_unpack (operands, false/*unsigned_p*/, false/*high_p*/);
>+  DONE;
>+})
>+
>+(define_expand "vec_unpacku_hi_<mode>"
>+  [(set (match_operand:<VDMODE> 0 "register_operand")
>+      (match_operand:IMSA_WHB 1 "register_operand"))]
>+  "ISA_HAS_MSA"
>+{
>+  mips_expand_vec_unpack (operands, true/*unsigned_p*/, true/*high_p*/);
>+  DONE;
>+})
>+
>+(define_expand "vec_unpacku_lo_<mode>"
>+  [(set (match_operand:<VDMODE> 0 "register_operand")
>+      (match_operand:IMSA_WHB 1 "register_operand"))]
>+  "ISA_HAS_MSA"
>+{
>+  mips_expand_vec_unpack (operands, true/*unsigned_p*/, false/*high_p*/);
>+  DONE;
>+})
>+

>+(define_expand "vec_set<mode>"
>+  [(match_operand:IMSA 0 "register_operand")
>+   (match_operand:<UNITMODE> 1 "reg_or_0_operand")
>+   (match_operand 2 "const_<indeximm>_operand")]
>+  "ISA_HAS_MSA"
>+{
>+  emit_insn (gen_msa_insert_<msafmt>_insn (operands[0], operands[1],
>+                                         operands[0],
>+                                         GEN_INT(1 << INTVAL (operands[2]))));
>+  DONE;
>+})
>+
>+(define_expand "vec_set<mode>"
>+  [(match_operand:FMSA 0 "register_operand")
>+   (match_operand:<UNITMODE> 1 "register_operand")
>+   (match_operand 2 "const_<indeximm>_operand")]
>+  "ISA_HAS_MSA"
>+{
>+  emit_insn (gen_msa_insve_<msafmt_f>_s (operands[0], operands[0],
>+                                       GEN_INT(1 << INTVAL (operands[2])),
>+                                       operands[1]));
>+  DONE;
>+})
>+
>+(define_expand "vcondu<MSA:mode><IMSA:mode>"
>+  [(set (match_operand:MSA 0 "register_operand")
>+      (if_then_else:MSA
>+        (match_operator 3 ""
>+          [(match_operand:IMSA 4 "register_operand")
>+           (match_operand:IMSA 5 "register_operand")])
>+        (match_operand:MSA 1 "reg_or_m1_operand")
>+        (match_operand:MSA 2 "reg_or_0_operand")))]
>+  "ISA_HAS_MSA
>+   && (GET_MODE_NUNITS (<MSA:MODE>mode) == GET_MODE_NUNITS (<IMSA:MODE>mode))"
>+{
>+  mips_expand_vec_cond_expr (<MSA:MODE>mode, <MSA:VIMODE>mode, operands);
>+  DONE;
>+})
>+
>+(define_expand "vcond<MSA:mode><MSA_2:mode>"
>+  [(set (match_operand:MSA 0 "register_operand")
>+      (if_then_else:MSA
>+        (match_operator 3 ""
>+          [(match_operand:MSA_2 4 "register_operand")
>+           (match_operand:MSA_2 5 "register_operand")])
>+        (match_operand:MSA 1 "reg_or_m1_operand")
>+        (match_operand:MSA 2 "reg_or_0_operand")))]
>+  "ISA_HAS_MSA
>+   && (GET_MODE_NUNITS (<MSA:MODE>mode) == GET_MODE_NUNITS (<MSA:MODE>mode))"

Bug: This compares MSA to MSA. the second should be MSA_2.

>+{
>+  mips_expand_vec_cond_expr (<MSA:MODE>mode, <MSA:VIMODE>mode, operands);
>+  DONE;
>+})
>+
>+;; Note used directly by builtins but via the following define_expand.
>+(define_insn "msa_insert_<msafmt>_insn"
>+  [(set (match_operand:IMSA 0 "register_operand" "=f")
>+      (vec_merge:IMSA
>+        (vec_duplicate:IMSA
>+          (match_operand:<UNITMODE> 1 "reg_or_0_operand" "dJ"))
>+        (match_operand:IMSA 2 "register_operand" "0")
>+        (match_operand 3 "const_<bitmask>_operand" "")))]
>+  "ISA_HAS_MSA"
>+  "insert.<msafmt>\t%w0[%y3],%z1"
>+  [(set_attr "type" "simd_insert")
>+   (set_attr "mode" "<MODE>")])

Rename to remove _insn, see below. V2DI mode should have # as its pattern
for 32-bit targets to ensure it gets split 

>+
>+;; Expand builtin for HImode and QImode which takes SImode.
>+(define_expand "msa_insert_<msafmt>"
>+  [(match_operand:IMSA 0 "register_operand")
>+   (match_operand:IMSA 1 "register_operand")
>+   (match_operand 2 "const_<indeximm>_operand")
>+   (match_operand:<RES> 3 "reg_or_0_operand")]
>+  "ISA_HAS_MSA"
>+{
>+  if ((GET_MODE_SIZE (<UNITMODE>mode) < GET_MODE_SIZE (<RES>mode))
>+      && (REG_P (operands[3]) || (GET_CODE (operands[3]) == SUBREG
>+                                && REG_P (SUBREG_REG (operands[3])))))
>+    operands[3] = lowpart_subreg (<UNITMODE>mode, operands[3], <RES>mode);
>+  emit_insn (gen_msa_insert_<msafmt>_insn (operands[0], operands[3],
>+                                         operands[1],
>+                                         GEN_INT(1 << INTVAL (operands[2]))));
>+  DONE;
>+})
>+

Lets do this during mips_expand_builtin_insn like ilvl and friends. Having 
expanders
simply to map the builtins to real instructions doesn't seem very useful.

>+(define_expand "msa_insert_<msafmt_f>"
>+  [(match_operand:FMSA 0 "register_operand")
>+   (match_operand:FMSA 1 "register_operand")
>+   (match_operand 2 "const_<indeximm>_operand")
>+   (match_operand:<UNITMODE> 3 "reg_or_0_operand")]
>+  "ISA_HAS_MSA"
>+{
>+  emit_insn (gen_msa_insert_<msafmt_f>_insn (operands[0], operands[3],
>+                                           operands[1],
>+                                           GEN_INT(1 << INTVAL 
>(operands[2]))));
>+  DONE;
>+})

Likewise.

>+
>+(define_insn "msa_insert_<msafmt_f>_insn"
>+  [(set (match_operand:FMSA 0 "register_operand" "=f")
>+      (vec_merge:FMSA
>+        (vec_duplicate:FMSA
>+          (match_operand:<UNITMODE> 1 "register_operand" "d"))
>+        (match_operand:FMSA 2 "register_operand" "0")
>+        (match_operand 3 "const_<bitmask>_operand" "")))]
>+  "ISA_HAS_MSA"
>+  "insert.<msafmt>\t%w0[%y3],%z1"
>+  [(set_attr "type" "simd_insert")
>+   (set_attr "mode" "<MODE>")])

Rename to remove _insn, see above. V2DF mode should have # for 32-bit targets to
ensure it gets split.

>+
>+(define_split
>+  [(set (match_operand:MSA_D 0 "register_operand")
>+      (vec_merge:MSA_D
>+        (vec_duplicate:MSA_D
>+          (match_operand:<UNITMODE> 1 "<MSA_D:msa_d>_operand"))
>+        (match_operand:MSA_D 2 "register_operand")
>+        (match_operand 3 "const_<bitmask>_operand")))]
>+  "reload_completed && TARGET_MSA && !TARGET_64BIT"
>+  [(const_int 0)]
>+{
>+  mips_split_msa_insert_d (operands[0], operands[2], operands[3], 
>operands[1]);
>+  DONE;
>+})

...

>+(define_expand "msa_insve_<msafmt_f>"
>+  [(set (match_operand:MSA 0 "register_operand")
>+      (vec_merge:MSA
>+        (vec_duplicate:MSA
>+          (vec_select:<UNITMODE>
>+            (match_operand:MSA 3 "register_operand")
>+            (parallel [(const_int 0)])))
>+        (match_operand:MSA 1 "register_operand")
>+        (match_operand 2 "const_<indeximm>_operand")))]
>+  "ISA_HAS_MSA"
>+{
>+  operands[2] = GEN_INT (1 << INTVAL (operands[2]));
>+})

Like for insert patterns do this in mips_expand_builtin_insn and rename the
instruction below to remove _insn.

>+(define_insn "msa_insve_<msafmt_f>_insn"
>+  [(set (match_operand:MSA 0 "register_operand" "=f")
>+      (vec_merge:MSA
>+        (vec_duplicate:MSA
>+          (vec_select:<UNITMODE>
>+            (match_operand:MSA 3 "register_operand" "f")
>+            (parallel [(const_int 0)])))
>+        (match_operand:MSA 1 "register_operand" "0")
>+        (match_operand 2 "const_<bitmask>_operand" "")))]
>+  "ISA_HAS_MSA"
>+  "insve.<msafmt>\t%w0[%y2],%w3[0]"
>+  [(set_attr "type" "simd_insert")
>+   (set_attr "mode" "<MODE>")])
>+
>+;; Operand 3 is a scalar.
>+(define_insn "msa_insve_<msafmt>_f_s"

It would be more clear to have <msafmt_f> instead of <msafmt>_f. The 's' here
is for scalar I believe. Perhaps spell it out and put scalar.

>+  [(set (match_operand:FMSA 0 "register_operand" "=f")
>+      (vec_merge:FMSA
>+        (vec_duplicate:FMSA
>+          (match_operand:<UNITMODE> 3 "register_operand" "f"))
>+        (match_operand:FMSA 1 "register_operand" "0")
>+        (match_operand 2 "const_<bitmask>_operand" "")))]
>+  "ISA_HAS_MSA"
>+  "insve.<msafmt>\t%w0[%y2],%w3[0]"
>+  [(set_attr "type" "simd_insert")
>+   (set_attr "mode" "<MODE>")])

>+;; Note that copy_s.d and copy_s.d_f will be split later if !TARGET_64BIT.
>+(define_insn "msa_copy_s_<msafmt_f>"
>+  [(set (match_operand:<RES> 0 "register_operand" "=d")
>+      (sign_extend:<RES>
>+        (vec_select:<UNITMODE>
>+          (match_operand:MSA 1 "register_operand" "f")
>+          (parallel [(match_operand 2 "const_<indeximm>_operand" "")]))))]
>+  "ISA_HAS_MSA"
>+  "copy_s.<msafmt>\t%0,%w1[%2]"
>+  [(set_attr "type" "simd_copy")
>+   (set_attr "mode" "<MODE>")])

I think the splits should be explicit and therefore generate # for the the
two patterns that will be split for !TARGET_64BIT. The sign_extend should
only be present for V8HI and V16QI modes. There could be value in adding
widening patterns to extend to DImode on 64-bit targets but it may not
trigger much.

>+(define_split
>+  [(set (match_operand:<UNITMODE> 0 "register_operand")
>+      (sign_extend:<UNITMODE>
>+        (vec_select:<UNITMODE>
>+          (match_operand:MSA_D 1 "register_operand")
>+          (parallel [(match_operand 2 "const_0_or_1_operand")]))))]
>+  "reload_completed && TARGET_MSA && !TARGET_64BIT"
>+  [(const_int 0)]
>+{
>+  mips_split_msa_copy_d (operands[0], operands[1], operands[2],
>+                       gen_msa_copy_s_w);
>+  DONE;
>+})
>+
>+;; Note that copy_u.d and copy_u.d_f will be split later if !TARGET_64BIT.
>+(define_insn "msa_copy_u_<msafmt_f>"
>+  [(set (match_operand:<RES> 0 "register_operand" "=d")
>+      (zero_extend:<RES>
>+        (vec_select:<UNITMODE>
>+          (match_operand:MSA 1 "register_operand" "f")
>+          (parallel [(match_operand 2 "const_<indeximm>_operand" "")]))))]
>+  "ISA_HAS_MSA"
>+  "copy_u.<msafmt>\t%0,%w1[%2]"
>+  [(set_attr "type" "simd_copy")
>+   (set_attr "mode" "<MODE>")])

Likewise on all accounts except that V2SI mode should not be included at all
here as we need to use the copy_s for V2SI->SImode on 64-bit targets to get
the correct canonical result.

>+(define_split
>+  [(set (match_operand:<UNITMODE> 0 "register_operand")
>+      (zero_extend:<UNITMODE>
>+        (vec_select:<UNITMODE>
>+          (match_operand:MSA_D 1 "register_operand")
>+          (parallel [(match_operand 2 "const_0_or_1_operand")]))))]
>+  "reload_completed && TARGET_MSA && !TARGET_64BIT"
>+  [(const_int 0)]
>+{
>+  mips_split_msa_copy_d (operands[0], operands[1], operands[2],
>+                       gen_msa_copy_u_w);

This should use copy_s so that when the 32-bit code is used on a 64-bit arch
then the data stays in canonical form. The copy_u should never be used on
a 32-bit target, I don't think it should even exist in the MSA32 spec
actually. I am discussing this with our architecture team and will update
the thread with the outcome.

>+  DONE;
>+})

= mips.c =

>+/* Expand VEC_COND_EXPR, where:
>+   MODE is mode of the result
>+   VIMODE equivalent integer mode
>+   OPERANDS operands of VEC_COND_EXPR.  */
>+
>+void
>+mips_expand_vec_cond_expr (machine_mode mode, machine_mode vimode,
>+                         rtx *operands)
>+{
>+  rtx cond = operands[3];
>+  rtx cmp_op0 = operands[4];
>+  rtx cmp_op1 = operands[5];
>+  rtx cmp_res = gen_reg_rtx (vimode);
>+
>+  mips_expand_msa_cmp (cmp_res, GET_CODE (cond), cmp_op0, cmp_op1);
>+
>+  /* We handle the following cases:
>+     1) r = a CMP b ? -1 : 0
>+     2) r = a CMP b ? -1 : v
>+     3) r = a CMP b ?  v : 0
>+     4) r = a CMP b ? v1 : v2  */
>+
>+  /* Case (1) above.  We only move the results.  */
>+  if (operands[1] == CONSTM1_RTX (vimode)
>+      && operands[2] == CONST0_RTX (vimode))
>+    emit_move_insn (operands[0], cmp_res);
>+  else
>+    {
>+      rtx src1 = gen_reg_rtx (vimode);
>+      rtx src2 = gen_reg_rtx (vimode);
>+      rtx mask = gen_reg_rtx (vimode);
>+      rtx bsel;
>+
>+      /* Move the vector result to use it as a mask.  */
>+      emit_move_insn (mask, cmp_res);
>+
>+      if (register_operand (operands[1], mode))
>+      {
>+        rtx xop1 = operands[1];
>+        if (mode != vimode)
>+          {
>+            xop1 = gen_reg_rtx (vimode);
>+            emit_move_insn (xop1, gen_rtx_SUBREG (vimode, operands[1], 0));
>+          }
>+        emit_move_insn (src1, xop1);
>+      }
>+      else
>+      /* Case (2) if the below doesn't move the mask to src2.  */
>+      emit_move_insn (src1, mask);

Please assert that operands[1] is constm1.

>+
>+      if (register_operand (operands[2], mode))
>+      {
>+        rtx xop2 = operands[2];
>+        if (mode != vimode)
>+          {
>+            xop2 = gen_reg_rtx (vimode);
>+            emit_move_insn (xop2, gen_rtx_SUBREG (vimode, operands[2], 0));
>+          }
>+        emit_move_insn (src2, xop2);
>+      }
>+      else
>+      /* Case (3) if the above didn't move the mask to src1.  */
>+      emit_move_insn (src2, mask);

Please assert that operands[2] is const0.

>+
>+      /* We deal with case (4) if the mask wasn't moved to either src1 or 
>src2.
>+       In any case, we eventually do vector mask-based copy.  */
>+      bsel = gen_rtx_UNSPEC (vimode, gen_rtvec (3, mask, src2, src1),
>+                           UNSPEC_MSA_BSEL_V);
>+      /* The result is placed back to a register with the mask.  */
>+      emit_insn (gen_rtx_SET (mask, bsel));

I guess you expand like this instead of gen_ to avoid having to select the 
function
based on vimode.

>+      emit_move_insn (operands[0], gen_rtx_SUBREG (mode, mask, 0));
>+    }
>+}
>+

Thanks,
Matthew

Reply via email to