On 12/11/2011 04:50 AM, Richard Sandiford wrote: > [Mingjie, please could you help with the Loongson question near the end?]
Actually, can you tell me how to test these abi combinations? I keep trying to use mips-sim or mips64-sim and get linker errors complaining of abi combinations. > Little-endian: > > The semantics of the RTL pattern are: > > { 0L, 0U } = { X[I3], X[I4 + 2] }, where X = { 1L, 1U, 2L, 2U } > > so: 0L = { 1L, 1U }[I3] (= <bop><bUL>) > 0U = { 2L, 2U }[I4] (= <aop><aUL>) > > <aop> = 2, <aUL> = I4 ? U : L > <bop> = 1, <bUL> = I3 ? U : L > > [LL] !I4 && !I3 [UL] I4 && !I3 > [LU] !I4 && I3 [UU] I4 && I3 > > Big-endian: > > The semantics of the RTL pattern are: > > { 0U, 0L } = { X[I3], X[I4 + 2] }, where X = { 1U, 1L, 2U, 2L } > > so: 0U = { 1U, 1L }[I3] (= <aop><aUL>) > 0L = { 2U, 2L }[I4] (= <bop><bUL>) > > <aop> = 1, <aUL> = I3 ? L : U > <bop> = 2, <bUL> = I4 ? L : U > > [UU] !I3 && !I4 [UL] !I3 && I4 > [LU] I3 && !I4 [LL] I3 && I4. */ > > which suggests that the PUL and PLU entries for big-endian should be > the other way around. Does that sound right, or have I misunderstood? Yes, that sounds right. > ...for little-endian, we need to pass the "U" and "L" components of the > mnemonic in the reverse order: the MIPS instruction specifies the upper > part first, whereas the rtl pattern specifies the lower part first. > And for little-endian, U refers to memory element 1 and L to memory > element 0. So I think this should be: ... Except that the actual output of the LE insn actually swaps the operands too. So I think these expanders should not *also* swap the operands. I've tidied these up a bit since then. >> +static bool >> +mips_expand_vpc_ps (struct expand_vec_perm_d *d) I've eliminated this function since then. >> + /* Convert the selector into the packed 8-bit form for pshufh. */ >> + for (i = mask = 0; i < 4; i++) >> + mask |= (d->perm[i] & 3) << (i * 2); > > I think this is endian-dependent. For little-endian, the bottom two bits > of the mask determine element 0; for big-endian, the top two bits of the > mask do. Recall that loongson can only run in little-endian. I added comments about that in the md file, but it would do no harm to add another here. > (There's a machine in the farm, but bootstrapping on it is rather slow.) Yeah, I started checking out the tree there yesterday and it never completed. > I think a lot of the endianness stuff in the patch is dependent on byte > endianness rather than word endianness. Since we only support two out > of the four combinations, it seems better not to worry which and simply > use TARGET_{BIG,LITTLE}_ENDIAN instead of {WORDS,BYTES}_{BIG,LITTLE}_ENDIAN. Sure. This is my current patch, which doesn't have the pul/plu insns swapped, as suggested above. I did change the loongson.h interface as H-P suggested. r~
commit b7790c7a9e53d66d1f348c3f2adb5b8a9bf2d93c Author: Richard Henderson <r...@redhat.com> Date: Wed Dec 7 14:17:02 2011 -0800 mips: Implement vec_perm_const. diff --git a/gcc/config/mips/loongson.h b/gcc/config/mips/loongson.h index 6bfd4d7..dfd6505 100644 --- a/gcc/config/mips/loongson.h +++ b/gcc/config/mips/loongson.h @@ -447,15 +447,15 @@ psadbh (uint8x8_t s, uint8x8_t t) /* Shuffle halfwords. */ __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) -pshufh_u (uint16x4_t dest, uint16x4_t s, uint8_t order) +pshufh_u (uint16x4_t s, uint8_t order) { - return __builtin_loongson_pshufh_u (dest, s, order); + return __builtin_loongson_pshufh_u (s, order); } __extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) -pshufh_s (int16x4_t dest, int16x4_t s, uint8_t order) +pshufh_s (int16x4_t s, uint8_t order) { - return __builtin_loongson_pshufh_s (dest, s, order); + return __builtin_loongson_pshufh_s (s, order); } /* Shift left logical. */ diff --git a/gcc/config/mips/loongson.md b/gcc/config/mips/loongson.md index 225f4d1..7c7e29f 100644 --- a/gcc/config/mips/loongson.md +++ b/gcc/config/mips/loongson.md @@ -24,10 +24,7 @@ UNSPEC_LOONGSON_PCMPEQ UNSPEC_LOONGSON_PCMPGT UNSPEC_LOONGSON_PEXTR - UNSPEC_LOONGSON_PINSR_0 - UNSPEC_LOONGSON_PINSR_1 - UNSPEC_LOONGSON_PINSR_2 - UNSPEC_LOONGSON_PINSR_3 + UNSPEC_LOONGSON_PINSRH UNSPEC_LOONGSON_PMADD UNSPEC_LOONGSON_PMOVMSK UNSPEC_LOONGSON_PMULHU @@ -200,6 +197,51 @@ "pandn\t%0,%1,%2" [(set_attr "type" "fmul")]) +;; Logical AND. +(define_insn "*loongson_and" + [(set (match_operand:VWHB 0 "register_operand" "=f") + (and:VWHB (match_operand:VWHB 1 "register_operand" "f") + (match_operand:VWHB 2 "register_operand" "f")))] + "TARGET_HARD_FLOAT && TARGET_LOONGSON_VECTORS" + "and\t%0,%1,%2" + [(set_attr "type" "fmul")]) + +;; Logical OR. +(define_insn "*loongson_or" + [(set (match_operand:VWHB 0 "register_operand" "=f") + (ior:VWHB (match_operand:VWHB 1 "register_operand" "f") + (match_operand:VWHB 2 "register_operand" "f")))] + "TARGET_HARD_FLOAT && TARGET_LOONGSON_VECTORS" + "or\t%0,%1,%2" + [(set_attr "type" "fmul")]) + +;; Logical XOR. +(define_insn "*loongson_xor" + [(set (match_operand:VWHB 0 "register_operand" "=f") + (xor:VWHB (match_operand:VWHB 1 "register_operand" "f") + (match_operand:VWHB 2 "register_operand" "f")))] + "TARGET_HARD_FLOAT && TARGET_LOONGSON_VECTORS" + "xor\t%0,%1,%2" + [(set_attr "type" "fmul")]) + +;; Logical NOR. +(define_insn "*loongson_nor" + [(set (match_operand:VWHB 0 "register_operand" "=f") + (and:VWHB + (not:VWHB (match_operand:VWHB 1 "register_operand" "f")) + (not:VWHB (match_operand:VWHB 2 "register_operand" "f"))))] + "TARGET_HARD_FLOAT && TARGET_LOONGSON_VECTORS" + "nor\t%0,%1,%2" + [(set_attr "type" "fmul")]) + +;; Logical NOT. +(define_insn "*loongson_not" + [(set (match_operand:VWHB 0 "register_operand" "=f") + (not:VWHB (match_operand:VWHB 1 "register_operand" "f")))] + "TARGET_HARD_FLOAT && TARGET_LOONGSON_VECTORS" + "nor\t%0,%1,%1" + [(set_attr "type" "fmul")]) + ;; Average. (define_insn "loongson_pavg<V_suffix>" [(set (match_operand:VHB 0 "register_operand" "=f") @@ -231,52 +273,110 @@ [(set_attr "type" "fadd")]) ;; Extract halfword. -(define_insn "loongson_pextr<V_suffix>" - [(set (match_operand:VH 0 "register_operand" "=f") - (unspec:VH [(match_operand:VH 1 "register_operand" "f") - (match_operand:SI 2 "register_operand" "f")] +(define_insn "loongson_pextrh" + [(set (match_operand:V4HI 0 "register_operand" "=f") + (unspec:V4HI [(match_operand:V4HI 1 "register_operand" "f") + (match_operand:SI 2 "register_operand" "f")] UNSPEC_LOONGSON_PEXTR))] "TARGET_HARD_FLOAT && TARGET_LOONGSON_VECTORS" "pextr<V_suffix>\t%0,%1,%2" [(set_attr "type" "fmul")]) +(define_insn "vec_extractv4hi_1" + [(set (match_operand:SI 0 "register_operand" "=f") + (zero_extend:SI + (unspec:HI + [(match_operand:V4HI 1 "register_operand" "f") + (match_operand:SI 2 "register_operand" "f")] + UNSPEC_LOONGSON_PEXTR)))] + "TARGET_HARD_FLOAT && TARGET_LOONGSON_VECTORS" + "pextr<V_suffix>\t%0,%1,%2" + [(set_attr "type" "fmul")]) + +(define_expand "vec_extractv4hi" + [(match_operand:HI 0 "register_operand" "") + (match_operand:V4HI 1 "register_operand" "") + (match_operand:SI 2 "register_operand" "")] + "TARGET_HARD_FLOAT && TARGET_LOONGSON_VECTORS" +{ + rtx x = gen_reg_rtx (SImode); + emit_insn (gen_vec_extractv4hi_1 (x, operands[1], operands[2])); + emit_move_insn (operands[0], gen_lowpart (HImode, x)); + DONE; +}) + ;; Insert halfword. -(define_insn "loongson_pinsr<V_suffix>_0" - [(set (match_operand:VH 0 "register_operand" "=f") - (unspec:VH [(match_operand:VH 1 "register_operand" "f") - (match_operand:VH 2 "register_operand" "f")] - UNSPEC_LOONGSON_PINSR_0))] +(define_insn "loongson_pinsrh_0" + [(set (match_operand:V4HI 0 "register_operand" "=f") + (vec_select:V4HI + (vec_concat:V8HI + (match_operand:V4HI 1 "register_operand" "f") + (match_operand:V4HI 2 "register_operand" "f")) + (parallel [(const_int 4) (const_int 1) + (const_int 2) (const_int 3)])))] + "TARGET_HARD_FLOAT && TARGET_LOONGSON_VECTORS" + "pinsrh_0\t%0,%1,%2" + [(set_attr "type" "fdiv")]) + +(define_insn "loongson_pinsrh_1" + [(set (match_operand:V4HI 0 "register_operand" "=f") + (vec_select:V4HI + (vec_concat:V8HI + (match_operand:V4HI 1 "register_operand" "f") + (match_operand:V4HI 2 "register_operand" "f")) + (parallel [(const_int 0) (const_int 4) + (const_int 2) (const_int 3)])))] "TARGET_HARD_FLOAT && TARGET_LOONGSON_VECTORS" - "pinsr<V_suffix>_0\t%0,%1,%2" + "pinsrh_1\t%0,%1,%2" [(set_attr "type" "fdiv")]) -(define_insn "loongson_pinsr<V_suffix>_1" - [(set (match_operand:VH 0 "register_operand" "=f") - (unspec:VH [(match_operand:VH 1 "register_operand" "f") - (match_operand:VH 2 "register_operand" "f")] - UNSPEC_LOONGSON_PINSR_1))] +(define_insn "loongson_pinsrh_2" + [(set (match_operand:V4HI 0 "register_operand" "=f") + (vec_select:V4HI + (vec_concat:V8HI + (match_operand:V4HI 1 "register_operand" "f") + (match_operand:V4HI 2 "register_operand" "f")) + (parallel [(const_int 0) (const_int 1) + (const_int 4) (const_int 3)])))] "TARGET_HARD_FLOAT && TARGET_LOONGSON_VECTORS" - "pinsr<V_suffix>_1\t%0,%1,%2" + "pinsrh_2\t%0,%1,%2" [(set_attr "type" "fdiv")]) -(define_insn "loongson_pinsr<V_suffix>_2" - [(set (match_operand:VH 0 "register_operand" "=f") - (unspec:VH [(match_operand:VH 1 "register_operand" "f") - (match_operand:VH 2 "register_operand" "f")] - UNSPEC_LOONGSON_PINSR_2))] +(define_insn "loongson_pinsrh_3" + [(set (match_operand:V4HI 0 "register_operand" "=f") + (vec_select:V4HI + (vec_concat:V8HI + (match_operand:V4HI 1 "register_operand" "f") + (match_operand:V4HI 2 "register_operand" "f")) + (parallel [(const_int 0) (const_int 1) + (const_int 2) (const_int 4)])))] "TARGET_HARD_FLOAT && TARGET_LOONGSON_VECTORS" - "pinsr<V_suffix>_2\t%0,%1,%2" + "pinsrh_3\t%0,%1,%2" [(set_attr "type" "fdiv")]) -(define_insn "loongson_pinsr<V_suffix>_3" - [(set (match_operand:VH 0 "register_operand" "=f") - (unspec:VH [(match_operand:VH 1 "register_operand" "f") - (match_operand:VH 2 "register_operand" "f")] - UNSPEC_LOONGSON_PINSR_3))] +(define_insn "*vec_setv4hi" + [(set (match_operand:V4HI 0 "register_operand" "=f") + (unspec:V4HI [(match_operand:V4HI 1 "register_operand" "f") + (match_operand:SI 2 "register_operand" "f") + (match_operand:SI 3 "const_0_to_3_operand" "")] + UNSPEC_LOONGSON_PINSRH))] "TARGET_HARD_FLOAT && TARGET_LOONGSON_VECTORS" - "pinsr<V_suffix>_3\t%0,%1,%2" + "pinsrh_%3\t%0,%1,%2" [(set_attr "type" "fdiv")]) +(define_expand "vec_setv4hi" + [(set (match_operand:V4HI 0 "register_operand" "=f") + (unspec:V4HI [(match_operand:V4HI 1 "register_operand" "f") + (match_operand:HI 2 "register_operand" "f") + (match_operand:SI 3 "const_0_to_3_operand" "")] + UNSPEC_LOONGSON_PINSRH))] + "TARGET_HARD_FLOAT && TARGET_LOONGSON_VECTORS" +{ + rtx ext = gen_reg_rtx (SImode); + emit_move_insn (ext, gen_lowpart (SImode, operands[1])); + operands[1] = ext; +}) + ;; Multiply and add packed integers. (define_insn "loongson_pmadd<V_stretch_half_suffix>" [(set (match_operand:<V_stretch_half> 0 "register_operand" "=f") @@ -403,12 +503,11 @@ ;; Shuffle halfwords. (define_insn "loongson_pshufh" [(set (match_operand:VH 0 "register_operand" "=f") - (unspec:VH [(match_operand:VH 1 "register_operand" "0") - (match_operand:VH 2 "register_operand" "f") - (match_operand:SI 3 "register_operand" "f")] + (unspec:VH [(match_operand:VH 1 "register_operand" "f") + (match_operand:SI 2 "register_operand" "f")] UNSPEC_LOONGSON_PSHUFH))] "TARGET_HARD_FLOAT && TARGET_LOONGSON_VECTORS" - "pshufh\t%0,%2,%3" + "pshufh\t%0,%1,%2" [(set_attr "type" "fmul")]) ;; Shift left logical. @@ -478,26 +577,95 @@ "psubus<V_suffix>\t%0,%1,%2" [(set_attr "type" "fadd")]) -;; Unpack high data. -(define_insn "vec_interleave_high<mode>" - [(set (match_operand:VWHB 0 "register_operand" "=f") - (unspec:VWHB [(match_operand:VWHB 1 "register_operand" "f") - (match_operand:VWHB 2 "register_operand" "f")] - UNSPEC_LOONGSON_PUNPCKH))] +;; Unpack high data. Recall that Loongson only runs in little-endian. +(define_insn "loongson_punpckhbh" + [(set (match_operand:V8QI 0 "register_operand" "=f") + (vec_select:V8QI + (vec_concat:V16QI + (match_operand:V8QI 1 "register_operand" "f") + (match_operand:V8QI 2 "register_operand" "f")) + (parallel [(const_int 4) (const_int 12) + (const_int 5) (const_int 13) + (const_int 6) (const_int 14) + (const_int 7) (const_int 15)])))] + "TARGET_HARD_FLOAT && TARGET_LOONGSON_VECTORS" + "punpckhbh\t%0,%1,%2" + [(set_attr "type" "fdiv")]) + +(define_insn "loongson_punpckhhw" + [(set (match_operand:V4HI 0 "register_operand" "=f") + (vec_select:V4HI + (vec_concat:V8HI + (match_operand:V4HI 1 "register_operand" "f") + (match_operand:V4HI 2 "register_operand" "f")) + (parallel [(const_int 2) (const_int 6) + (const_int 3) (const_int 7)])))] "TARGET_HARD_FLOAT && TARGET_LOONGSON_VECTORS" - "punpckh<V_stretch_half_suffix>\t%0,%1,%2" + "punpckhhw\t%0,%1,%2" + [(set_attr "type" "fdiv")]) + +(define_insn "loongson_punpckhwd" + [(set (match_operand:V2SI 0 "register_operand" "=f") + (vec_select:V2SI + (vec_concat:V4SI + (match_operand:V2SI 1 "register_operand" "f") + (match_operand:V2SI 2 "register_operand" "f")) + (parallel [(const_int 1) (const_int 3)])))] + "TARGET_HARD_FLOAT && TARGET_LOONGSON_VECTORS" + "punpckhwd\t%0,%1,%2" [(set_attr "type" "fdiv")]) ;; Unpack low data. -(define_insn "vec_interleave_low<mode>" - [(set (match_operand:VWHB 0 "register_operand" "=f") - (unspec:VWHB [(match_operand:VWHB 1 "register_operand" "f") - (match_operand:VWHB 2 "register_operand" "f")] - UNSPEC_LOONGSON_PUNPCKL))] +(define_insn "loongson_punpcklbh" + [(set (match_operand:V8QI 0 "register_operand" "=f") + (vec_select:V8QI + (vec_concat:V16QI + (match_operand:V8QI 1 "register_operand" "f") + (match_operand:V8QI 2 "register_operand" "f")) + (parallel [(const_int 0) (const_int 8) + (const_int 1) (const_int 9) + (const_int 2) (const_int 10) + (const_int 3) (const_int 11)])))] + "TARGET_HARD_FLOAT && TARGET_LOONGSON_VECTORS" + "punpcklbh\t%0,%1,%2" + [(set_attr "type" "fdiv")]) + +(define_insn "loongson_punpcklhw" + [(set (match_operand:V4HI 0 "register_operand" "=f") + (vec_select:V4HI + (vec_concat:V8HI + (match_operand:V4HI 1 "register_operand" "f") + (match_operand:V4HI 2 "register_operand" "f")) + (parallel [(const_int 0) (const_int 4) + (const_int 1) (const_int 5)])))] "TARGET_HARD_FLOAT && TARGET_LOONGSON_VECTORS" - "punpckl<V_stretch_half_suffix>\t%0,%1,%2" + "punpcklhw\t%0,%1,%2" [(set_attr "type" "fdiv")]) +(define_insn "loongson_punpcklwd" + [(set (match_operand:V2SI 0 "register_operand" "=f") + (vec_select:V2SI + (vec_concat:V4SI + (match_operand:V2SI 1 "register_operand" "f") + (match_operand:V2SI 2 "register_operand" "f")) + (parallel [(const_int 0) (const_int 2)])))] + "TARGET_HARD_FLOAT && TARGET_LOONGSON_VECTORS" + "punpcklwd\t%0,%1,%2" + [(set_attr "type" "fdiv")]) + +(define_expand "vec_perm_const<mode>" + [(match_operand:VWHB 0 "register_operand" "") + (match_operand:VWHB 1 "register_operand" "") + (match_operand:VWHB 2 "register_operand" "") + (match_operand:VWHB 3 "" "")] + "TARGET_HARD_FLOAT && TARGET_LOONGSON_VECTORS" +{ + if (mips_expand_vec_perm_const (operands)) + DONE; + else + FAIL; +}) + ;; Integer division and modulus. For integer multiplication, see mips.md. (define_insn "<u>div<mode>3" diff --git a/gcc/config/mips/mips-modes.def b/gcc/config/mips/mips-modes.def index b9c508b..85861a9 100644 --- a/gcc/config/mips/mips-modes.def +++ b/gcc/config/mips/mips-modes.def @@ -26,9 +26,15 @@ RESET_FLOAT_FORMAT (DF, mips_double_format); FLOAT_MODE (TF, 16, mips_quad_format); /* Vector modes. */ -VECTOR_MODES (INT, 8); /* V8QI V4HI V2SI */ -VECTOR_MODES (FLOAT, 8); /* V4HF V2SF */ -VECTOR_MODES (INT, 4); /* V4QI V2HI */ +VECTOR_MODES (INT, 8); /* V8QI V4HI V2SI */ +VECTOR_MODES (FLOAT, 8); /* V4HF V2SF */ +VECTOR_MODES (INT, 4); /* V4QI V2HI */ + +/* Double-sized vector modes for vec_concat. */ +VECTOR_MODE (INT, QI, 16); +VECTOR_MODE (INT, HI, 8); +VECTOR_MODE (INT, SI, 4); +VECTOR_MODE (FLOAT, SF, 4); VECTOR_MODES (FRACT, 4); /* V4QQ V2HQ */ VECTOR_MODES (UFRACT, 4); /* V4UQQ V2UHQ */ diff --git a/gcc/config/mips/mips-protos.h b/gcc/config/mips/mips-protos.h index dbabdff..37c958d 100644 --- a/gcc/config/mips/mips-protos.h +++ b/gcc/config/mips/mips-protos.h @@ -328,6 +328,7 @@ extern void mips_expand_atomic_qihi (union mips_gen_fn_ptrs, rtx, rtx, rtx, rtx); extern void mips_expand_vector_init (rtx, rtx); +extern bool mips_expand_vec_perm_const (rtx op[4]); extern bool mips_eh_uses (unsigned int); extern bool mips_epilogue_uses (unsigned int); diff --git a/gcc/config/mips/mips-ps-3d.md b/gcc/config/mips/mips-ps-3d.md index 504f43c..1b07f12 100644 --- a/gcc/config/mips/mips-ps-3d.md +++ b/gcc/config/mips/mips-ps-3d.md @@ -89,61 +89,94 @@ DONE; }) -; pul.ps - Pair Upper Lower -(define_insn "mips_pul_ps" +(define_insn "*vec_perm_const_ps" [(set (match_operand:V2SF 0 "register_operand" "=f") - (vec_merge:V2SF - (match_operand:V2SF 1 "register_operand" "f") - (match_operand:V2SF 2 "register_operand" "f") - (const_int 2)))] + (vec_select:V2SF + (vec_concat:V4SF + (match_operand:V2SF 1 "register_operand" "f") + (match_operand:V2SF 2 "register_operand" "f")) + (parallel [(match_operand:SI 3 "const_0_or_1_operand" "") + (match_operand:SI 4 "const_2_or_3_operand" "")])))] "TARGET_HARD_FLOAT && TARGET_PAIRED_SINGLE_FLOAT" - "pul.ps\t%0,%1,%2" +{ + static const char * const mnemonics[2][4] = { + /* LE */ { "pll.ps\t%0,%2,%1", "pul.ps\t%0,%2,%1", + "plu.ps\t%0,%2,%1", "puu.ps\t%0,%2,%1" }, + /* BE */ { "puu.ps\t%0,%1,%2", "plu.ps\t%0,%1,%2", + "pul.ps\t%0,%1,%2", "pll.ps\t%0,%1,%2" }, + }; + + unsigned mask = INTVAL (operands[3]) * 2 + (INTVAL (operands[4]) - 2); + return mnemonics[BYTES_BIG_ENDIAN][mask]; +} [(set_attr "type" "fmove") (set_attr "mode" "SF")]) -; puu.ps - Pair upper upper -(define_insn "mips_puu_ps" +(define_expand "vec_perm_constv2sf" + [(match_operand:V2SF 0 "register_operand" "") + (match_operand:V2SF 1 "register_operand" "") + (match_operand:V2SF 2 "register_operand" "") + (match_operand:V2SI 3 "" "")] + "TARGET_HARD_FLOAT && TARGET_PAIRED_SINGLE_FLOAT" +{ + if (mips_expand_vec_perm_const (operands)) + DONE; + else + FAIL; +}) + +;; Expanders for builtins +(define_expand "mips_puu_ps" [(set (match_operand:V2SF 0 "register_operand" "=f") - (vec_merge:V2SF - (match_operand:V2SF 1 "register_operand" "f") - (vec_select:V2SF (match_operand:V2SF 2 "register_operand" "f") - (parallel [(const_int 1) - (const_int 0)])) - (const_int 2)))] - "TARGET_HARD_FLOAT && TARGET_PAIRED_SINGLE_FLOAT" - "puu.ps\t%0,%1,%2" - [(set_attr "type" "fmove") - (set_attr "mode" "SF")]) + (vec_select:V2SF + (vec_concat:V4SF + (match_operand:V2SF 1 "register_operand" "f") + (match_operand:V2SF 2 "register_operand" "f")) + (parallel [(match_dup 3) (match_dup 4)])))] + "TARGET_HARD_FLOAT && TARGET_PAIRED_SINGLE_FLOAT" +{ + operands[3] = (BYTES_BIG_ENDIAN ? const0_rtx : const1_rtx); + operands[4] = (BYTES_BIG_ENDIAN ? const2_rtx : GEN_INT (3)); +}) -; pll.ps - Pair Lower Lower -(define_insn "mips_pll_ps" +(define_expand "mips_pul_ps" [(set (match_operand:V2SF 0 "register_operand" "=f") - (vec_merge:V2SF - (vec_select:V2SF (match_operand:V2SF 1 "register_operand" "f") - (parallel [(const_int 1) - (const_int 0)])) - (match_operand:V2SF 2 "register_operand" "f") - (const_int 2)))] - "TARGET_HARD_FLOAT && TARGET_PAIRED_SINGLE_FLOAT" - "pll.ps\t%0,%1,%2" - [(set_attr "type" "fmove") - (set_attr "mode" "SF")]) + (vec_select:V2SF + (vec_concat:V4SF + (match_operand:V2SF 1 "register_operand" "f") + (match_operand:V2SF 2 "register_operand" "f")) + (parallel [(match_dup 3) (match_dup 4)])))] + "TARGET_HARD_FLOAT && TARGET_PAIRED_SINGLE_FLOAT" +{ + operands[3] = (BYTES_BIG_ENDIAN ? const1_rtx : const0_rtx); + operands[4] = (BYTES_BIG_ENDIAN ? const2_rtx : GEN_INT (3)); +}) -; plu.ps - Pair Lower Upper -(define_insn "mips_plu_ps" +(define_expand "mips_plu_ps" [(set (match_operand:V2SF 0 "register_operand" "=f") - (vec_merge:V2SF - (vec_select:V2SF (match_operand:V2SF 1 "register_operand" "f") - (parallel [(const_int 1) - (const_int 0)])) - (vec_select:V2SF (match_operand:V2SF 2 "register_operand" "f") - (parallel [(const_int 1) - (const_int 0)])) - (const_int 2)))] - "TARGET_HARD_FLOAT && TARGET_PAIRED_SINGLE_FLOAT" - "plu.ps\t%0,%1,%2" - [(set_attr "type" "fmove") - (set_attr "mode" "SF")]) + (vec_select:V2SF + (vec_concat:V4SF + (match_operand:V2SF 1 "register_operand" "f") + (match_operand:V2SF 2 "register_operand" "f")) + (parallel [(match_dup 3) (match_dup 4)])))] + "TARGET_HARD_FLOAT && TARGET_PAIRED_SINGLE_FLOAT" +{ + operands[3] = (BYTES_BIG_ENDIAN ? const0_rtx : const1_rtx); + operands[4] = (BYTES_BIG_ENDIAN ? GEN_INT (3) : const2_rtx); +}) + +(define_expand "mips_pll_ps" + [(set (match_operand:V2SF 0 "register_operand" "=f") + (vec_select:V2SF + (vec_concat:V4SF + (match_operand:V2SF 1 "register_operand" "f") + (match_operand:V2SF 2 "register_operand" "f")) + (parallel [(match_dup 3) (match_dup 4)])))] + "TARGET_HARD_FLOAT && TARGET_PAIRED_SINGLE_FLOAT" +{ + operands[3] = (BYTES_BIG_ENDIAN ? const1_rtx : const0_rtx); + operands[4] = (BYTES_BIG_ENDIAN ? GEN_INT (3) : const2_rtx); +}) ; vec_init (define_expand "vec_initv2sf" @@ -195,22 +228,21 @@ ;; no other way to get a vector mode bitfield store currently. (define_expand "vec_setv2sf" - [(match_operand:V2SF 0 "register_operand") - (match_operand:SF 1 "register_operand") - (match_operand 2 "const_0_or_1_operand")] + [(set (match_operand:V2SF 0 "register_operand" "") + (vec_select:V2SF + (vec_concat:V4SF + (match_operand:SF 1 "register_operand" "") + (match_dup 0)) + (parallel [(match_operand 2 "const_0_or_1_operand" "") + (match_dup 3)])))] "TARGET_HARD_FLOAT && TARGET_PAIRED_SINGLE_FLOAT" { - rtx temp; - /* We don't have an insert instruction, so we duplicate the float, and then use a PUL instruction. */ - temp = gen_reg_rtx (V2SFmode); + rtx temp = gen_reg_rtx (V2SFmode); emit_insn (gen_mips_cvt_ps_s (temp, operands[1], operands[1])); - if (INTVAL (operands[2]) == !BYTES_BIG_ENDIAN) - emit_insn (gen_mips_pul_ps (operands[0], temp, operands[0])); - else - emit_insn (gen_mips_pul_ps (operands[0], operands[0], temp)); - DONE; + operands[1] = temp; + operands[3] = GEN_INT (1 - INTVAL (operands[2]) + 2); }) ; cvt.ps.s - Floating Point Convert Pair to Paired Single diff --git a/gcc/config/mips/mips.c b/gcc/config/mips/mips.c index d3fd709..2f2578a 100644 --- a/gcc/config/mips/mips.c +++ b/gcc/config/mips/mips.c @@ -12774,12 +12774,6 @@ AVAIL_NON_MIPS16 (cache, TARGET_CACHE_BUILTIN) #define CODE_FOR_loongson_psubsb CODE_FOR_sssubv8qi3 #define CODE_FOR_loongson_psubush CODE_FOR_ussubv4hi3 #define CODE_FOR_loongson_psubusb CODE_FOR_ussubv8qi3 -#define CODE_FOR_loongson_punpckhbh CODE_FOR_vec_interleave_highv8qi -#define CODE_FOR_loongson_punpckhhw CODE_FOR_vec_interleave_highv4hi -#define CODE_FOR_loongson_punpckhwd CODE_FOR_vec_interleave_highv2si -#define CODE_FOR_loongson_punpcklbh CODE_FOR_vec_interleave_lowv8qi -#define CODE_FOR_loongson_punpcklhw CODE_FOR_vec_interleave_lowv4hi -#define CODE_FOR_loongson_punpcklwd CODE_FOR_vec_interleave_lowv2si static const struct mips_builtin_description mips_builtins[] = { DIRECT_BUILTIN (pll_ps, MIPS_V2SF_FTYPE_V2SF_V2SF, paired_single), @@ -13021,8 +13015,8 @@ static const struct mips_builtin_description mips_builtins[] = { LOONGSON_BUILTIN (pasubub, MIPS_UV8QI_FTYPE_UV8QI_UV8QI), LOONGSON_BUILTIN (biadd, MIPS_UV4HI_FTYPE_UV8QI), LOONGSON_BUILTIN (psadbh, MIPS_UV4HI_FTYPE_UV8QI_UV8QI), - LOONGSON_BUILTIN_SUFFIX (pshufh, u, MIPS_UV4HI_FTYPE_UV4HI_UV4HI_UQI), - LOONGSON_BUILTIN_SUFFIX (pshufh, s, MIPS_V4HI_FTYPE_V4HI_V4HI_UQI), + LOONGSON_BUILTIN_SUFFIX (pshufh, u, MIPS_UV4HI_FTYPE_UV4HI_UQI), + LOONGSON_BUILTIN_SUFFIX (pshufh, s, MIPS_V4HI_FTYPE_V4HI_UQI), LOONGSON_BUILTIN_SUFFIX (psllh, u, MIPS_UV4HI_FTYPE_UV4HI_UQI), LOONGSON_BUILTIN_SUFFIX (psllh, s, MIPS_V4HI_FTYPE_V4HI_UQI), LOONGSON_BUILTIN_SUFFIX (psllw, u, MIPS_UV2SI_FTYPE_UV2SI_UQI), @@ -16326,6 +16320,219 @@ mips_shift_truncation_mask (enum machine_mode mode) } +/* Generate or test for an insn that supports a constant permutation. */ + +#define MAX_VECT_LEN 8 + +struct expand_vec_perm_d +{ + rtx target, op0, op1; + unsigned char perm[MAX_VECT_LEN]; + enum machine_mode vmode; + unsigned char nelt; + bool one_vector_p; + bool testing_p; +}; + +/* Construct (set target (vec_select op0 (parallel perm))) and + return true if that's a valid instruction in the active ISA. */ + +static bool +expand_vselect (rtx target, rtx op0, const unsigned char *perm, unsigned nelt) +{ + rtx rperm[MAX_VECT_LEN], x; + unsigned i; + + for (i = 0; i < nelt; ++i) + rperm[i] = GEN_INT (perm[i]); + + x = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (nelt, rperm)); + x = gen_rtx_VEC_SELECT (GET_MODE (target), op0, x); + x = gen_rtx_SET (VOIDmode, target, x); + + x = emit_insn (x); + if (recog_memoized (x) < 0) + { + remove_insn (x); + return false; + } + return true; +} + +/* Similar, but generate a vec_concat from op0 and op1 as well. */ + +static bool +expand_vselect_vconcat (rtx target, rtx op0, rtx op1, + const unsigned char *perm, unsigned nelt) +{ + enum machine_mode v2mode; + rtx x; + + v2mode = GET_MODE_2XWIDER_MODE (GET_MODE (op0)); + x = gen_rtx_VEC_CONCAT (v2mode, op0, op1); + return expand_vselect (target, x, perm, nelt); +} + +/* Recognize patterns for the Loongson PSHUFH instruction. */ + +static bool +mips_expand_vpc_loongson_pshufh (struct expand_vec_perm_d *d) +{ + unsigned i, mask; + + if (!(TARGET_HARD_FLOAT && TARGET_LOONGSON_VECTORS)) + return false; + if (d->vmode != V4HImode) + return false; + if (!d->one_vector_p) + return false; + if (d->testing_p) + return true; + + /* Convert the selector into the packed 8-bit form for pshufh. */ + for (i = mask = 0; i < 4; i++) + mask |= (d->perm[i] & 3) << (i * 2); + + emit_insn (gen_loongson_pshufh (d->target, d->op0, + force_reg (SImode, GEN_INT (mask)))); + return true; +} + +static bool +mips_expand_vec_perm_const_1 (struct expand_vec_perm_d *d) +{ + unsigned int i, nelt = d->nelt; + unsigned char perm2[MAX_VECT_LEN]; + + if (d->one_vector_p) + { + /* Try interleave with alternating operands. */ + memcpy (perm2, d->perm, sizeof(perm2)); + for (i = 1; i < nelt; i += 2) + perm2[i] += nelt; + if (expand_vselect_vconcat (d->target, d->op0, d->op1, perm2, nelt)) + return true; + } + else + { + if (expand_vselect_vconcat (d->target, d->op0, d->op1, d->perm, nelt)) + return true; + + /* Try again with swapped operands. */ + for (i = 0; i < nelt; ++i) + perm2[i] = (d->perm[i] + nelt) & (2 * nelt - 1); + if (expand_vselect_vconcat (d->target, d->op1, d->op0, perm2, nelt)) + return true; + } + + if (mips_expand_vpc_loongson_pshufh (d)) + return true; + return false; +} + +/* Expand a vec_perm_const pattern. */ + +bool +mips_expand_vec_perm_const (rtx operands[4]) +{ + struct expand_vec_perm_d d; + int i, nelt, which; + rtx sel; + + d.target = operands[0]; + d.op0 = operands[1]; + d.op1 = operands[2]; + sel = operands[3]; + + d.vmode = GET_MODE (d.target); + gcc_assert (VECTOR_MODE_P (d.vmode)); + d.nelt = nelt = GET_MODE_NUNITS (d.vmode); + d.testing_p = false; + + for (i = which = 0; i < nelt; ++i) + { + rtx e = XVECEXP (sel, 0, i); + int ei = INTVAL (e) & (2 * nelt - 1); + which |= (ei < nelt ? 1 : 2); + d.perm[i] = ei; + } + + switch (which) + { + default: + gcc_unreachable(); + + case 3: + d.one_vector_p = false; + if (!rtx_equal_p (d.op0, d.op1)) + break; + + /* The backend (vec_select (vec_concat)) patterns are not duplicated + for single-operand. Try once with the original un-folded selector. */ + if (mips_expand_vec_perm_const_1 (&d)) + return true; + + /* Try again after folding the selector to a single operand. */ + /* FALLTHRU */ + case 2: + for (i = 0; i < nelt; ++i) + d.perm[i] &= nelt - 1; + d.op0 = d.op1; + d.one_vector_p = true; + break; + + case 1: + d.op1 = d.op0; + d.one_vector_p = true; + break; + } + + return mips_expand_vec_perm_const_1 (&d); +} + +/* Implement TARGET_VECTORIZE_VEC_PERM_CONST_OK. */ + +static bool +mips_vectorize_vec_perm_const_ok (enum machine_mode vmode, + const unsigned char *sel) +{ + struct expand_vec_perm_d d; + unsigned int i, nelt, which; + bool ret; + + d.vmode = vmode; + d.nelt = nelt = GET_MODE_NUNITS (d.vmode); + d.testing_p = true; + memcpy (d.perm, sel, nelt); + + /* Categorize the set of elements in the selector. */ + for (i = which = 0; i < nelt; ++i) + { + unsigned char e = d.perm[i]; + gcc_assert (e < 2 * nelt); + which |= (e < nelt ? 1 : 2); + } + + /* For all elements from second vector, fold the elements to first. */ + if (which == 2) + for (i = 0; i < nelt; ++i) + d.perm[i] -= nelt; + + /* Check whether the mask can be applied to the vector type. */ + d.one_vector_p = (which != 3); + + d.target = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 1); + d.op1 = d.op0 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 2); + if (!d.one_vector_p) + d.op1 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 3); + + start_sequence (); + ret = mips_expand_vec_perm_const_1 (&d); + end_sequence (); + + return ret; +} + /* Initialize the GCC target structure. */ #undef TARGET_ASM_ALIGNED_HI_OP #define TARGET_ASM_ALIGNED_HI_OP "\t.half\t" @@ -16544,6 +16751,9 @@ mips_shift_truncation_mask (enum machine_mode mode) #undef TARGET_SHIFT_TRUNCATION_MASK #define TARGET_SHIFT_TRUNCATION_MASK mips_shift_truncation_mask +#undef TARGET_VECTORIZE_VEC_PERM_CONST_OK +#define TARGET_VECTORIZE_VEC_PERM_CONST_OK mips_vectorize_vec_perm_const_ok + struct gcc_target targetm = TARGET_INITIALIZER; #include "gt-mips.h" diff --git a/gcc/config/mips/mips.h b/gcc/config/mips/mips.h index ee40cfa..fe1e135 100644 --- a/gcc/config/mips/mips.h +++ b/gcc/config/mips/mips.h @@ -517,7 +517,7 @@ struct mips_cpu_info { \ /* Whether Loongson vector modes are enabled. */ \ if (TARGET_LOONGSON_VECTORS) \ - builtin_define ("__mips_loongson_vector_rev"); \ + builtin_define ("__mips_loongson_vector_rev=2"); \ \ /* Historical Octeon macro. */ \ if (TARGET_OCTEON) \ diff --git a/gcc/config/mips/predicates.md b/gcc/config/mips/predicates.md index 5e9398e..b611373 100644 --- a/gcc/config/mips/predicates.md +++ b/gcc/config/mips/predicates.md @@ -73,8 +73,15 @@ ;; This is used for indexing into vectors, and hence only accepts const_int. (define_predicate "const_0_or_1_operand" (and (match_code "const_int") - (ior (match_test "op == CONST0_RTX (GET_MODE (op))") - (match_test "op == CONST1_RTX (GET_MODE (op))")))) + (match_test "IN_RANGE (INTVAL (op), 0, 1)"))) + +(define_predicate "const_2_or_3_operand" + (and (match_code "const_int") + (match_test "IN_RANGE (INTVAL (op), 2, 3)"))) + +(define_predicate "const_0_to_3_operand" + (and (match_code "const_int") + (match_test "IN_RANGE (INTVAL (op), 0, 3)"))) (define_predicate "qi_mask_operand" (and (match_code "const_int")