When optimizing for size, try to avoid using long immediate by employing alternative (short) instructions.
Ok to apply? Claudiu gcc/ xxxx-xx-xx Claudiu Zissulescu <claz...@synopsys.com> * config/arc/arc-protos.h (arc_check_ior_const): Declare. (arc_split_ior): Likewise. (arc_check_mov_const): Likewise. (arc_split_mov_const): Likewise. * config/arc/arc.c (arc_print_operand): Fix 'z' letter. (arc_rtx_costs): Replace check Crr with Cax constraint. (prepare_move_operands): Cleanup, remove unused code. (arc_split_ior): New function. (arc_check_ior_const): Likewise. (arc_split_mov_const): Likewise. (arc_check_mov_const): Likewise. * config/arc/arc.md (movsi_insn): Restructure it, and convert it in define_insn_and_split pattern. (iorsi3): Likewise. (mulsi3_v2): Add new matching variant. (andsi3_i): Cleanup pattern. (rotrsi3_cnt1): Update pattern. (rotrsi3_cnt8): New pattern. (ashlsi2_cnt8): Likewise. (ashlsi2_cnt16): Likewise. * config/arc/constraints.md (C0p): Update constraint. (Crr): Remove it. (C0x): New pattern. (Cax): New pattern. testsuite/ xxxx-xx-xx Claudiu Zissulescu <claz...@synopsys.com> * gcc.target/arc/and-cnst-size.c: New test. * gcc.target/arc/mov-cnst-size.c: Likewise. * gcc.target/arc/or-cnst-size.c: Likewise. * gcc.target/arc/store-merge-1.c: Update test. * gcc.target/arc/arc700-stld-hazard.c: Likewise. * gcc.target/arc/cmem-1.c: Likewise. * gcc.target/arc/cmem-2.c: Likewise. * gcc.target/arc/cmem-3.c: Likewise. * gcc.target/arc/cmem-4.c: Likewise. * gcc.target/arc/cmem-5.c: Likewise. * gcc.target/arc/cmem-6.c: Likewise. * gcc.target/arc/loop-4.c: Likewise. * gcc.target/arc/movh_cl-1.c: Likewise. * gcc.target/arc/sdata-3.c: Likewise. --- gcc/config/arc/arc-protos.h | 4 + gcc/config/arc/arc.c | 222 +++++++++++++++--- gcc/config/arc/arc.md | 215 ++++++++++------- gcc/config/arc/constraints.md | 22 +- gcc/testsuite/gcc.target/arc/and-cnst-size.c | 16 ++ .../gcc.target/arc/arc700-stld-hazard.c | 4 +- gcc/testsuite/gcc.target/arc/cmem-1.c | 6 +- gcc/testsuite/gcc.target/arc/cmem-2.c | 6 +- gcc/testsuite/gcc.target/arc/cmem-3.c | 6 +- gcc/testsuite/gcc.target/arc/cmem-4.c | 6 +- gcc/testsuite/gcc.target/arc/cmem-5.c | 6 +- gcc/testsuite/gcc.target/arc/cmem-6.c | 6 +- gcc/testsuite/gcc.target/arc/loop-4.c | 3 +- gcc/testsuite/gcc.target/arc/mov-cnst-size.c | 42 ++++ gcc/testsuite/gcc.target/arc/movh_cl-1.c | 2 +- gcc/testsuite/gcc.target/arc/or-cnst-size.c | 16 ++ gcc/testsuite/gcc.target/arc/sdata-3.c | 18 +- gcc/testsuite/gcc.target/arc/store-merge-1.c | 2 +- 18 files changed, 449 insertions(+), 153 deletions(-) create mode 100644 gcc/testsuite/gcc.target/arc/and-cnst-size.c create mode 100644 gcc/testsuite/gcc.target/arc/mov-cnst-size.c create mode 100644 gcc/testsuite/gcc.target/arc/or-cnst-size.c diff --git a/gcc/config/arc/arc-protos.h b/gcc/config/arc/arc-protos.h index ac0de6b2874..f501bc30ee7 100644 --- a/gcc/config/arc/arc-protos.h +++ b/gcc/config/arc/arc-protos.h @@ -48,6 +48,10 @@ extern bool arc_is_uncached_mem_p (rtx); extern bool gen_operands_ldd_std (rtx *operands, bool load, bool commute); extern bool arc_check_multi (rtx, bool); extern void arc_adjust_reg_alloc_order (void); +extern bool arc_check_ior_const (HOST_WIDE_INT ); +extern void arc_split_ior (rtx *); +extern bool arc_check_mov_const (HOST_WIDE_INT ); +extern bool arc_split_mov_const (rtx *); #endif /* RTX_CODE */ extern unsigned int arc_compute_frame_size (int); diff --git a/gcc/config/arc/arc.c b/gcc/config/arc/arc.c index b49f2539408..f398c4a0086 100644 --- a/gcc/config/arc/arc.c +++ b/gcc/config/arc/arc.c @@ -4232,7 +4232,7 @@ arc_print_operand (FILE *file, rtx x, int code) case 'z': if (GET_CODE (x) == CONST_INT) - fprintf (file, "%d",exact_log2(INTVAL (x)) ); + fprintf (file, "%d",exact_log2 (INTVAL (x) & 0xffffffff)); else output_operand_lossage ("invalid operand to %%z code"); @@ -5597,9 +5597,6 @@ arc_rtx_costs (rtx x, machine_mode mode, int outer_code, if (satisfies_constraint_C0p (x)) /* bxor */ nolimm = fast = condexec = true; break; - case SET: - if (satisfies_constraint_Crr (x)) /* ror b,u6 */ - nolimm = true; default: break; } @@ -9088,31 +9085,6 @@ prepare_move_operands (rtx *operands, machine_mode mode) MEM_COPY_ATTRIBUTES (pat, operands[0]); operands[0] = pat; } - if (!cse_not_expected) - { - rtx pat = XEXP (operands[0], 0); - - pat = arc_legitimize_address_0 (pat, pat, mode); - if (pat) - { - pat = change_address (operands[0], mode, pat); - MEM_COPY_ATTRIBUTES (pat, operands[0]); - operands[0] = pat; - } - } - } - - if (MEM_P (operands[1]) && !cse_not_expected) - { - rtx pat = XEXP (operands[1], 0); - - pat = arc_legitimize_address_0 (pat, pat, mode); - if (pat) - { - pat = change_address (operands[1], mode, pat); - MEM_COPY_ATTRIBUTES (pat, operands[1]); - operands[1] = pat; - } } return false; @@ -11457,6 +11429,198 @@ arc_memory_move_cost (machine_mode mode, return (2 * GET_MODE_SIZE (mode)); } +/* Split an OR instruction into multiple BSET/OR instructions in a + tentative to avoid long immediate constants. The next strategies + are employed when destination is 'q' reg. + + 1. if there are up to three bits set in the mask, a succesion of + three bset instruction will be emitted: + OR rA, rB, mask -> + BSET(_S) rA,rB,mask1/BSET_S rA,rA,mask2/BSET_S rA,rA,mask3 + + 2. if the lower 6 bits of the mask is set and there is only one + bit set in the upper remaining bits then we will emit one bset and + one OR instruction: + OR rA, rB, mask -> OR rA,rB,mask1/BSET_S rA,mask2 + + 3. otherwise an OR with limm will be emmitted. */ + +void +arc_split_ior (rtx *operands) +{ + unsigned HOST_WIDE_INT mask, maskx; + rtx op1 = operands[1]; + + gcc_assert (CONST_INT_P (operands[2])); + mask = INTVAL (operands[2]) & 0xffffffff; + + if (__builtin_popcount (mask) > 3 || (mask & 0x3f)) + { + maskx = mask & 0x3f; + emit_insn (gen_rtx_SET (operands[0], + gen_rtx_IOR (SImode, op1, GEN_INT (maskx)))); + op1 = operands[0]; + mask &= ~maskx; + } + + switch (__builtin_popcount (mask)) + { + case 3: + maskx = 1 << (__builtin_ffs (mask) - 1); + emit_insn (gen_rtx_SET (operands[0], + gen_rtx_IOR (SImode, op1, GEN_INT (maskx)))); + mask &= ~maskx; + op1 = operands[0]; + /* FALLTHRU */ + case 2: + maskx = 1 << (__builtin_ffs (mask) - 1); + emit_insn (gen_rtx_SET (operands[0], + gen_rtx_IOR (SImode, op1, GEN_INT (maskx)))); + mask &= ~maskx; + op1 = operands[0]; + /* FALLTHRU */ + case 1: + maskx = 1 << (__builtin_ffs (mask) - 1); + emit_insn (gen_rtx_SET (operands[0], + gen_rtx_IOR (SImode, op1, GEN_INT (maskx)))); + break; + default: + break; + } +} + +/* Helper to check C0x constraint. */ + +bool +arc_check_ior_const (HOST_WIDE_INT ival) +{ + unsigned int mask = (unsigned int) (ival & 0xffffffff); + if (__builtin_popcount (mask) <= 3) + return true; + if (__builtin_popcount (mask & ~0x3f) <= 1) + return true; + return false; +} + +/* Split a mov with long immediate instruction into smaller, size + friendly instructions. */ + +bool +arc_split_mov_const (rtx *operands) +{ + unsigned HOST_WIDE_INT ival; + HOST_WIDE_INT shimm; + machine_mode mode = GET_MODE (operands[0]); + + /* Manage a constant. */ + gcc_assert (CONST_INT_P (operands[1])); + ival = INTVAL (operands[1]) & 0xffffffff; + + if (SIGNED_INT12 (ival)) + return false; + + /* 1. Check if we can just rotate limm by 8 but using ROR8. */ + if (TARGET_BARREL_SHIFTER && TARGET_V2 + && ((ival & ~0x3f000000) == 0)) + { + shimm = (ival >> 24) & 0x3f; + emit_insn (gen_rtx_SET (operands[0], + gen_rtx_ROTATERT (mode, GEN_INT (shimm), + GEN_INT (8)))); + return true; + } + /* 2. Check if we can just shift by 8 to fit into the u6 of LSL8. */ + if (TARGET_BARREL_SHIFTER && TARGET_V2 + && ((ival & ~0x3f00) == 0)) + { + shimm = (ival >> 8) & 0x3f; + emit_insn (gen_rtx_SET (operands[0], + gen_rtx_ASHIFT (mode, GEN_INT (shimm), + GEN_INT (8)))); + return true; + } + + /* 3. Check if we can just shift by 16 to fit into the u6 of LSL16. */ + if (TARGET_BARREL_SHIFTER && TARGET_V2 + && ((ival & ~0x3f0000) == 0)) + { + shimm = (ival >> 16) & 0x3f; + emit_insn (gen_rtx_SET (operands[0], + gen_rtx_ASHIFT (mode, GEN_INT (shimm), + GEN_INT (16)))); + return true; + } + + /* 4. Check if we can do something like mov_s h,u8 / asl_s ra,h,#nb. */ + if (((ival >> (__builtin_ffs (ival) - 1)) & 0xffffff00) == 0 + && TARGET_BARREL_SHIFTER) + { + HOST_WIDE_INT shift = __builtin_ffs (ival); + shimm = (ival >> (shift - 1)) & 0xff; + emit_insn (gen_rtx_SET (operands[0], GEN_INT (shimm))); + emit_insn (gen_rtx_SET (operands[0], + gen_rtx_ASHIFT (mode, operands[0], + GEN_INT (shift - 1)))); + return true; + } + + /* 5. Check if we can just rotate the limm, useful when no barrel + shifter is present. */ + if ((ival & ~0x8000001f) == 0) + { + shimm = (ival * 2 + 1) & 0x3f; + emit_insn (gen_rtx_SET (operands[0], + gen_rtx_ROTATERT (mode, GEN_INT (shimm), + const1_rtx))); + return true; + } + + /* 6. Check if we can do something with bmask. */ + if (IS_POWEROF2_P (ival + 1)) + { + emit_insn (gen_rtx_SET (operands[0], constm1_rtx)); + emit_insn (gen_rtx_SET (operands[0], + gen_rtx_AND (mode, operands[0], + GEN_INT (ival)))); + return true; + } + + return false; +} + +/* Helper to check Cax constraint. */ + +bool +arc_check_mov_const (HOST_WIDE_INT ival) +{ + ival = ival & 0xffffffff; + + if ((ival & ~0x8000001f) == 0) + return true; + + if (IS_POWEROF2_P (ival + 1)) + return true; + + /* The next rules requires a barrel shifter. */ + if (!TARGET_BARREL_SHIFTER) + return false; + + if (((ival >> (__builtin_ffs (ival) - 1)) & 0xffffff00) == 0) + return true; + + if ((ival & ~0x3f00) == 0) + return true; + + if ((ival & ~0x3f0000) == 0) + return true; + + if ((ival & ~0x3f000000) == 0) + return true; + + return false; +} + + #undef TARGET_USE_ANCHORS_FOR_SYMBOL_P #define TARGET_USE_ANCHORS_FOR_SYMBOL_P arc_use_anchors_for_symbol_p diff --git a/gcc/config/arc/arc.md b/gcc/config/arc/arc.md index 4891faf681e..082b5bba6ec 100644 --- a/gcc/config/arc/arc.md +++ b/gcc/config/arc/arc.md @@ -750,66 +750,63 @@ core_3, archs4x, archs4xd, archs4xd_slow" ; In order to allow the ccfsm machinery to do its work, the leading compact ; alternatives say 'canuse' - there is another alternative that will match ; when the condition codes are used. -; Rcq won't match if the condition is actually used; to avoid a spurious match -; via q, q is inactivated as constraint there. -; Likewise, the length of an alternative that might be shifted to conditional +; The length of an alternative that might be shifted to conditional ; execution must reflect this, lest out-of-range branches are created. ; the iscompact attribute allows the epilogue expander to know for which ; insns it should lengthen the return insn. -; N.B. operand 1 of alternative 7 expands into pcl,symbol@gotpc . -(define_insn "*movsi_insn" ; 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 - [(set (match_operand:SI 0 "move_dest_operand" "=Rcq,Rcq#q, w,Rcq#q, h,wl, w, w, w, w, w,???w, ?w, w,Rcq#q, h, wl,Rcq, S, Us<,RcqRck,!*x, r,!*Rsd,!*Rcd,r,Ucm, Usd,m,???m, m,VUsc") - (match_operand:SI 1 "move_src_operand" " cL, cP,Rcq#q, P,hCm1,cL, I,Crr,Clo,Chi,Cbi,?Rac,Cpc,Clb, ?Cal,Cal,?Cal,Uts,Rcq,RcqRck, Us>,Usd,Ucm, Usd, Ucd,m, w,!*Rzd,c,?Rac,Cm3, C32"))] +(define_insn_and_split "*movsi_insn" ; 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 + [(set (match_operand:SI 0 "move_dest_operand" "=q, q,r,q, h,rl, r, r, r, r, ?r, r, q, h, rl, q, S, Us<,RcqRck,!*x, r,!*Rsd,!*Rcd,r,Ucm, Usd,m, m,VUsc") + (match_operand:SI 1 "move_src_operand" "rL,rP,q,P,hCm1,rL, I,Clo,Chi,Cbi,Cpc,Clb,Cax,Cal,Cal,Uts,Rcq,RcqRck, Us>,Usd,Ucm, Usd, Ucd,m, r,!*Rzd,r,Cm3, C32"))] "register_operand (operands[0], SImode) || register_operand (operands[1], SImode) || (CONSTANT_P (operands[1]) - /* Don't use a LIMM that we could load with a single insn - we loose - delay-slot filling opportunities. */ - && !satisfies_constraint_I (operands[1]) + && (!satisfies_constraint_I (operands[1]) || !optimize_size) && satisfies_constraint_Usc (operands[0])) || (satisfies_constraint_Cm3 (operands[1]) && memory_operand (operands[0], SImode))" "@ - mov%? %0,%1%& ;0 - mov%? %0,%1%& ;1 - mov%? %0,%1%& ;2 - mov%? %0,%1%& ;3 - mov%? %0,%1%& ;4 - mov%? %0,%1 ;5 - mov%? %0,%1 ;6 - ror %0,((%1*2+1) & 0x3f) ;7 - movl.cl %0,%1 ;8 - movh.cl %0,%L1>>16 ;9 - * return INTVAL (operands[1]) & 0xffffff ? \"movbi.cl %0,%1 >> %p1,%p1,8;10\" : \"movbi.cl %0,%L1 >> 24,24,8;10\"; - mov%? %0,%1 ;11 - add %0,%1 ;12 - add %0,pcl,%1@pcl ;13 - mov%? %0,%j1 ;14 - mov%? %0,%j1 ;15 - mov%? %0,%j1 ;16 - ld%? %0,%1 ;17 - st%? %1,%0%& ;18 - * return arc_short_long (insn, \"push%? %1%&\", \"st%U0 %1,%0%&\"); - * return arc_short_long (insn, \"pop%? %0%&\", \"ld%U1 %0,%1%&\"); - ld%? %0,%1%& ;21 - xld%U1 %0,%1 ;22 - ld%? %0,%1%& ;23 - ld%? %0,%1%& ;24 - ld%U1%V1 %0,%1 ;25 - xst%U0 %1,%0 ;26 - st%? %1,%0%& ;27 - st%U0%V0 %1,%0 ;28 - st%U0%V0 %1,%0 ;29 - st%U0%V0 %1,%0 ;30 - st%U0%V0 %1,%0 ;31" - ; 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 - [(set_attr "type" "move, move, move,move,move, move, move,two_cycle_core,shift,shift,shift, move,binary,binary, move, move, move,load,store,store,load,load, load,load,load, load,store,store,store,store,store,store") - (set_attr "iscompact" "maybe,maybe,maybe,true,true,false,false, false,false,false,false,false, false, false,maybe_limm,maybe_limm,false,true, true, true,true,true,false,true,true,false,false, true,false,false,false,false") - ; Use default length for iscompact to allow for COND_EXEC. But set length - ; of Crr to 4. - (set_attr "length" "*,*,*,*,*,4,4,4,4,4,4,4,8,8,*,*,*,*,*,*,*,*,4,*,4,*,*,*,*,*,*,8") - (set_attr "predicable" "yes,no,yes,no,no,yes,no,no,no,no,no,yes,no,no,yes,yes,yes,no,no,no,no,no,no,no,no,no,no,no,no,no,no,no") - (set_attr "cpu_facility" "av1,av1,av1,av2,av2,*,*,*,*,*,*,*,*,*,*,*,*,*,*,*,*,*,*,av2,av2,*,*,av2,*,*,av2,*")]) + mov%?\\t%0,%1 ;0 + mov%?\\t%0,%1 ;1 + mov%?\\t%0,%1 ;2 + mov%?\\t%0,%1 ;3 + mov%?\\t%0,%1 ;4 + mov%?\\t%0,%1 ;5 + mov%?\\t%0,%1 ;6 + movl.cl\\t %0,%1 ;7 + movh.cl\\t %0,%L1>>16 ;8 + * return INTVAL (operands[1]) & 0xffffff ? \"movbi.cl\\t%0,%1 >> %p1,%p1,8;9\" : \"movbi.cl\\t%0,%L1 >> 24,24,8;9\"; + add\\t%0,%1 ;10 + add\\t%0,pcl,%1@pcl ;11 + # + mov%?\\t%0,%j1 ;13 + mov%?\\t%0,%j1 ;14 + ld%?\\t%0,%1 ;15 + st%?\\t %1,%0 ;16 + * return arc_short_long (insn, \"push%?\\t%1%&\", \"st%U0\\t%1,%0%&\"); + * return arc_short_long (insn, \"pop%?\\t%0%&\", \"ld%U1\\t%0,%1%&\"); + ld%?\\t%0,%1 ;19 + xld%U1\\t%0,%1 ;20 + ld%?\\t%0,%1 ;21 + ld%?\\t%0,%1 ;22 + ld%U1%V1\\t%0,%1 ;23 + xst%U0\\t%1,%0 ;24 + st%?\\t%1,%0%& ;25 + st%U0%V0\\t%1,%0 ;26 + st%U0%V0\\t%1,%0 ;37 + st%U0%V0\\t%1,%0 ;28" + "reload_completed && satisfies_constraint_Cax (operands[1]) + && register_operand (operands[0], SImode)" + [(const_int 0)] + " + arc_split_mov_const (operands); + DONE; + " + ; 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 + [(set_attr "type" "move, move, move,move,move, move, move,shift,shift,shift,binary,binary,multi,move, move,load,store,store,load,load, load,load,load, load,store,store,store,store,store") + (set_attr "iscompact" "maybe,maybe,maybe,true,true,false,false,false,false,false, false, false,false,true,false,true, true, true,true,true,false,true,true,false,false, true,false,false,false") + (set_attr "length" "*,*,*,*,*,4,4,4,4,4,8,8,*,6,*,*,*,*,*,*,4,*,4,*,*,*,*,*,8") + (set_attr "predicable" "yes,no,yes,no,no,yes,no,no,no,yes,no,no,no,yes,yes,no,no,no,no,no,no,no,no,no,no,no,no,no,no") + (set_attr "cpu_facility" "av1,av1,av1,av2,av2,*,*,*,*,*,*,*,*,*,*,*,*,*,*,*,*,av2,av2,*,*,av2,*,av2,*")]) ;; Sometimes generated by the epilogue code. We don't want to ;; recognize these addresses in general, because the limm is costly, @@ -2304,16 +2301,23 @@ core_3, archs4x, archs4xd, archs4xd_slow" ; ARCv2 has no penalties between mpy and mpyu. So, we use mpy because of its ; short variant. LP_COUNT constraints are still valid. (define_insn "mulsi3_v2" - [(set (match_operand:SI 0 "mpy_dest_reg_operand" "=Rcqq,Rcr, r,r,Rcr, r") - (mult:SI (match_operand:SI 1 "register_operand" "%0, 0, c,0, 0, c") - (match_operand:SI 2 "nonmemory_operand" " Rcqq, cL,cL,I,Cal,Cal")))] + [(set (match_operand:SI 0 "mpy_dest_reg_operand" "=q,q, r, r,r, r, r") + (mult:SI (match_operand:SI 1 "register_operand" "%0,q, 0, r,0, 0, c") + (match_operand:SI 2 "nonmemory_operand" "q,0,rL,rL,I,Cal,Cal")))] "TARGET_MULTI" - "mpy%? %0,%1,%2" - [(set_attr "length" "*,4,4,4,8,8") - (set_attr "iscompact" "maybe,false,false,false,false,false") + "@ + mpy%?\\t%0,%1,%2 + mpy%?\\t%0,%2,%1 + mpy%?\\t%0,%1,%2 + mpy%?\\t%0,%1,%2 + mpy%?\\t%0,%1,%2 + mpy%?\\t%0,%1,%2 + mpy%?\\t%0,%1,%2" + [(set_attr "length" "*,*,4,4,4,8,8") + (set_attr "iscompact" "maybe,maybe,false,false,false,false,false") (set_attr "type" "umulti") - (set_attr "predicable" "no,yes,no,no,yes,no") - (set_attr "cond" "nocond,canuse,nocond,canuse_limm,canuse,nocond")]) + (set_attr "predicable" "no,no,yes,no,no,yes,no") + (set_attr "cond" "nocond,nocond,canuse,nocond,canuse_limm,canuse,nocond")]) (define_expand "mulsidi3" [(set (match_operand:DI 0 "register_operand" "") @@ -3217,10 +3221,10 @@ core_3, archs4x, archs4xd, archs4xd_slow" operands[1] = force_reg (SImode, operands[1]); ") -(define_insn "andsi3_i" - [(set (match_operand:SI 0 "dest_reg_operand" "=Rcqq,Rcq,Rcqq,Rcqq,Rcqq,Rcw,Rcw, Rcw,Rcw,Rcw,Rcw, w, w, w, w,Rrq,w,Rcw, w,W") - (and:SI (match_operand:SI 1 "nonimmediate_operand" "%0,Rcq, 0, 0,Rcqq, 0, c, 0, 0, 0, 0, c, c, c, c,Rrq,0, 0, c,o") - (match_operand:SI 2 "nonmemory_operand" "Rcqq, 0, C1p, Ccp, Cux, cL, 0,C2pC1p,Ccp,CnL, I,Lc,C2pC1p,Ccp,CnL,Cbf,I,Cal,Cal,Cux")))] +(define_insn "andsi3_i" ;0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 + [(set (match_operand:SI 0 "dest_reg_operand" "=q,q, q, q, q, r,r, r, r, r,r, r, r, r, r, q,r, r, r, W") + (and:SI (match_operand:SI 1 "nonimmediate_operand" "%0,q, 0, 0, q, 0,r, 0, 0, 0,0, r, r, r, r, q,0, 0, r, o") + (match_operand:SI 2 "nonmemory_operand" "q,0,C1p,Ccp,Cux,rL,0,C2pC1p,Ccp,CnL,I,rL,C2pC1p,Ccp,CnL,Cbf,I,Cal,Cal,Cux")))] "(register_operand (operands[1], SImode) && nonmemory_operand (operands[2], SImode)) || (memory_operand (operands[1], SImode) @@ -3318,27 +3322,35 @@ core_3, archs4x, archs4xd, archs4xd_slow" (set_attr "predicable" "no,yes,no,yes,no,no,no") (set_attr "cond" "canuse,canuse,canuse_limm,canuse,nocond,nocond,nocond")]) -(define_insn "iorsi3" - [(set (match_operand:SI 0 "dest_reg_operand" "=Rcqq,Rcq,Rcqq,Rcw,Rcw,Rcw,Rcw,w, w,w,Rcw, w") - (ior:SI (match_operand:SI 1 "nonmemory_operand" "% 0,Rcq, 0, 0, c, 0, 0, c, c,0, 0, c") - (match_operand:SI 2 "nonmemory_operand" "Rcqq, 0, C0p, cL, 0,C0p, I,cL,C0p,I,Cal,Cal")))] +(define_insn_and_split "iorsi3" + [(set (match_operand:SI 0 "dest_reg_operand" "=q,q, q, r,r, r,r, r, r,r, q, r, r") + (ior:SI (match_operand:SI 1 "register_operand" "%0,q, 0, 0,r, 0,0, r, r,0, r, 0, r") + (match_operand:SI 2 "nonmemory_operand" "q,0,C0p,rL,0,C0p,I,rL,C0p,I,C0x,Cal,Cal")))] "" - "* - switch (which_alternative) - { - case 0: case 3: case 6: case 7: case 9: case 10: case 11: - return \"or%? %0,%1,%2%&\"; - case 1: case 4: - return \"or%? %0,%2,%1%&\"; - case 2: case 5: case 8: - return \"bset%? %0,%1,%z2%&\"; - default: - gcc_unreachable (); - }" - [(set_attr "iscompact" "maybe,maybe,maybe,false,false,false,false,false,false,false,false,false") - (set_attr "length" "*,*,*,4,4,4,4,4,4,4,8,8") - (set_attr "predicable" "no,no,no,yes,yes,yes,no,no,no,no,yes,no") - (set_attr "cond" "canuse,canuse,canuse,canuse,canuse,canuse,canuse_limm,nocond,nocond,canuse_limm,canuse,nocond")]) + "@ + or%?\\t%0,%1,%2 + or%?\\t%0,%2,%1 + bset%?\\t%0,%1,%z2 + or%?\\t%0,%1,%2 + or%?\\t%0,%2,%1 + bset%?\\t%0,%1,%z2 + or%?\\t%0,%1,%2 + or%?\\t%0,%1,%2 + bset%?\\t%0,%1,%z2 + or%?\\t%0,%1,%2 + # + or%?\\t%0,%1,%2 + or%?\\t%0,%1,%2" + "reload_completed && satisfies_constraint_C0x (operands[2])" + [(const_int 0)] + " + arc_split_ior (operands); + DONE; + " + [(set_attr "iscompact" "maybe,maybe,maybe,false,false,false,false,false,false,false,false,false,false") + (set_attr "length" "*,*,*,4,4,4,4,4,4,4,*,8,8") + (set_attr "predicable" "no,no,no,yes,yes,yes,no,no,no,no,no,yes,no") + (set_attr "cond" "canuse,canuse,canuse,canuse,canuse,canuse,canuse_limm,nocond,nocond,canuse_limm,nocond,canuse,nocond")]) (define_insn "xorsi3" [(set (match_operand:SI 0 "dest_reg_operand" "=Rcqq,Rcq,Rcw,Rcw,Rcw,Rcw, w, w,w, w, w") @@ -5973,11 +5985,21 @@ core_3, archs4x, archs4xd, archs4xd_slow" (match_dup 1)]) (define_insn "*rotrsi3_cnt1" - [(set (match_operand:SI 0 "dest_reg_operand" "=w") - (rotatert:SI (match_operand:SI 1 "register_operand" "c") + [(set (match_operand:SI 0 "dest_reg_operand" "=r") + (rotatert:SI (match_operand:SI 1 "nonmemory_operand" "rL") (const_int 1)))] "" - "ror %0,%1%&" + "ror\\t%0,%1" + [(set_attr "type" "shift") + (set_attr "predicable" "no") + (set_attr "length" "4")]) + +(define_insn "*rotrsi3_cnt8" + [(set (match_operand:SI 0 "register_operand" "=r") + (rotatert:SI (match_operand:SI 1 "nonmemory_operand" "rL") + (const_int 8)))] + "TARGET_BARREL_SHIFTER && TARGET_V2" + "ror8\\t%0,%1" [(set_attr "type" "shift") (set_attr "predicable" "no") (set_attr "length" "4")]) @@ -5990,8 +6012,31 @@ core_3, archs4x, archs4xd, archs4xd_slow" "asl%? %0,%1%&" [(set_attr "type" "shift") (set_attr "iscompact" "maybe,false") + (set_attr "length" "4") (set_attr "predicable" "no,no")]) +(define_insn "*ashlsi2_cnt8" + [(set (match_operand:SI 0 "register_operand" "=r") + (ashift:SI (match_operand:SI 1 "nonmemory_operand" "rL") + (const_int 8)))] + "TARGET_BARREL_SHIFTER && TARGET_V2" + "lsl8\\t%0,%1" + [(set_attr "type" "shift") + (set_attr "iscompact" "false") + (set_attr "length" "4") + (set_attr "predicable" "no")]) + +(define_insn "*ashlsi2_cnt16" + [(set (match_operand:SI 0 "register_operand" "=r") + (ashift:SI (match_operand:SI 1 "nonmemory_operand" "rL") + (const_int 16)))] + "TARGET_BARREL_SHIFTER && TARGET_V2" + "lsl16\\t%0,%1" + [(set_attr "type" "shift") + (set_attr "iscompact" "false") + (set_attr "length" "4") + (set_attr "predicable" "no")]) + (define_insn "*lshrsi3_cnt1" [(set (match_operand:SI 0 "dest_reg_operand" "=Rcqq,w") (lshiftrt:SI (match_operand:SI 1 "register_operand" "Rcqq,c") diff --git a/gcc/config/arc/constraints.md b/gcc/config/arc/constraints.md index 494e4792316..bbb0b96f7e3 100644 --- a/gcc/config/arc/constraints.md +++ b/gcc/config/arc/constraints.md @@ -201,7 +201,7 @@ "@internal power of two" (and (match_code "const_int") - (match_test "IS_POWEROF2_P (ival)"))) + (match_test "IS_POWEROF2_P (ival & 0xffffffff)"))) (define_constraint "C1p" "@internal @@ -275,12 +275,6 @@ (and (match_code "const_int") (match_test "ival == 1 || ival == 2 || ival == 4 || ival == 8"))) -(define_constraint "Crr" - "@internal - constant that can be loaded with ror b,u6" - (and (match_code "const_int") - (match_test "(ival & ~0x8000001f) == 0 && !arc_ccfsm_cond_exec_p ()"))) - (define_constraint "Cbi" "@internal constant that can be loaded with movbi.cl" @@ -290,6 +284,20 @@ || ((ival & 0xffffffffUL) >> exact_log2 (ival & -ival) <= 0xff)"))) +(define_constraint "C0x" + "@internal + special const_int pattern used to split ior insns" + (and (match_code "const_int") + (match_test "optimize_size") + (match_test "arc_check_ior_const (ival)"))) + +(define_constraint "Cax" + "@internal + special const_int pattern used to split mov insns" + (and (match_code "const_int") + (match_test "optimize_size") + (match_test "arc_check_mov_const (ival)"))) + ;; Floating-point constraints (define_constraint "G" diff --git a/gcc/testsuite/gcc.target/arc/and-cnst-size.c b/gcc/testsuite/gcc.target/arc/and-cnst-size.c new file mode 100644 index 00000000000..9437be992f4 --- /dev/null +++ b/gcc/testsuite/gcc.target/arc/and-cnst-size.c @@ -0,0 +1,16 @@ +/* Tests to check if and instructions are emitted efficiently. */ +/* { dg-require-effective-target codedensity } */ +/* { dg-options "-Os" } */ + +int check_bclr (int a) +{ + return a & (~0x40); +} + +int check_bmskn (int a) +{ + return a & (-128); +} + +/* { dg-final { scan-assembler "bclr_s" } } */ +/* { dg-final { scan-assembler "bmskn" } } */ diff --git a/gcc/testsuite/gcc.target/arc/arc700-stld-hazard.c b/gcc/testsuite/gcc.target/arc/arc700-stld-hazard.c index 3a3853ec552..49b5664b370 100644 --- a/gcc/testsuite/gcc.target/arc/arc700-stld-hazard.c +++ b/gcc/testsuite/gcc.target/arc/arc700-stld-hazard.c @@ -1,6 +1,6 @@ /* { dg-do compile } */ /* { dg-skip-if "" { ! { clmcpu } } } */ -/* { dg-options "-mcpu=arc700 -mno-sdata" } */ +/* { dg-options "-mcpu=arc700 -mno-sdata -O2" } */ volatile int a; volatile int b; @@ -12,4 +12,4 @@ foo () b = a; } -/* { dg-final { scan-assembler "st r\[0-9\]+,\\\[@a\\\]\[^\n\]*\n\[ \t\]+nop_s\[^\n\]*\n\[ \t\]+nop_s\[^\n\]*\n\[ \t\]+ld r\[0-9\]+,\\\[@a\\\]" } } */ +/* { dg-final { scan-assembler "st\\s+r\[0-9\]+,\\\[@a\\\]\\.*\[^\n\]*\n\[ \t\]+nop_s\[^\n\]*\n\[ \t\]+nop_s\[^\n\]*\n\[ \t\]+ld\\s+r\[0-9\]+,\\\[@a\\\]" } } */ diff --git a/gcc/testsuite/gcc.target/arc/cmem-1.c b/gcc/testsuite/gcc.target/arc/cmem-1.c index 8ed5dcf2f01..f0e7a8d6bbd 100644 --- a/gcc/testsuite/gcc.target/arc/cmem-1.c +++ b/gcc/testsuite/gcc.target/arc/cmem-1.c @@ -6,6 +6,6 @@ #include "cmem-st.inc" -/* { dg-final { scan-assembler "xst " } } */ -/* { dg-final { scan-assembler "xstw " } } */ -/* { dg-final { scan-assembler "xstb " } } */ +/* { dg-final { scan-assembler "xst\\s" } } */ +/* { dg-final { scan-assembler "xstw\\s" } } */ +/* { dg-final { scan-assembler "xstb\\s" } } */ diff --git a/gcc/testsuite/gcc.target/arc/cmem-2.c b/gcc/testsuite/gcc.target/arc/cmem-2.c index 39bfb2885c7..a63df0eb4ef 100644 --- a/gcc/testsuite/gcc.target/arc/cmem-2.c +++ b/gcc/testsuite/gcc.target/arc/cmem-2.c @@ -6,6 +6,6 @@ #include "cmem-ld.inc" -/* { dg-final { scan-assembler "xld " } } */ -/* { dg-final { scan-assembler "xldw " } } */ -/* { dg-final { scan-assembler "xldb " } } */ +/* { dg-final { scan-assembler "xld\\s" } } */ +/* { dg-final { scan-assembler "xldw\\s" } } */ +/* { dg-final { scan-assembler "xldb\\s" } } */ diff --git a/gcc/testsuite/gcc.target/arc/cmem-3.c b/gcc/testsuite/gcc.target/arc/cmem-3.c index 109084f01fb..d3d1d5954fa 100644 --- a/gcc/testsuite/gcc.target/arc/cmem-3.c +++ b/gcc/testsuite/gcc.target/arc/cmem-3.c @@ -6,6 +6,6 @@ #include "cmem-st.inc" -/* { dg-final { scan-assembler "xst " } } */ -/* { dg-final { scan-assembler "xstw " } } */ -/* { dg-final { scan-assembler "xstb " } } */ +/* { dg-final { scan-assembler "xst\\s" } } */ +/* { dg-final { scan-assembler "xstw\\s" } } */ +/* { dg-final { scan-assembler "xstb\\s" } } */ diff --git a/gcc/testsuite/gcc.target/arc/cmem-4.c b/gcc/testsuite/gcc.target/arc/cmem-4.c index 4ac8a22f231..2160f1f072d 100644 --- a/gcc/testsuite/gcc.target/arc/cmem-4.c +++ b/gcc/testsuite/gcc.target/arc/cmem-4.c @@ -6,6 +6,6 @@ #include "cmem-ld.inc" -/* { dg-final { scan-assembler "xld " } } */ -/* { dg-final { scan-assembler "xldw " } } */ -/* { dg-final { scan-assembler "xldb " } } */ +/* { dg-final { scan-assembler "xld\\s" } } */ +/* { dg-final { scan-assembler "xldw\\s" } } */ +/* { dg-final { scan-assembler "xldb\\s" } } */ diff --git a/gcc/testsuite/gcc.target/arc/cmem-5.c b/gcc/testsuite/gcc.target/arc/cmem-5.c index 451218b9765..db4142f7ee4 100644 --- a/gcc/testsuite/gcc.target/arc/cmem-5.c +++ b/gcc/testsuite/gcc.target/arc/cmem-5.c @@ -6,6 +6,6 @@ #include "cmem-st.inc" -/* { dg-final { scan-assembler "xst " } } */ -/* { dg-final { scan-assembler "xstw " } } */ -/* { dg-final { scan-assembler "xstb " } } */ +/* { dg-final { scan-assembler "xst\\s" } } */ +/* { dg-final { scan-assembler "xstw\\s" } } */ +/* { dg-final { scan-assembler "xstb\\s" } } */ diff --git a/gcc/testsuite/gcc.target/arc/cmem-6.c b/gcc/testsuite/gcc.target/arc/cmem-6.c index 0ed06085514..ac22ad50653 100644 --- a/gcc/testsuite/gcc.target/arc/cmem-6.c +++ b/gcc/testsuite/gcc.target/arc/cmem-6.c @@ -6,6 +6,6 @@ #include "cmem-ld.inc" -/* { dg-final { scan-assembler "xld " } } */ -/* { dg-final { scan-assembler "xldw " } } */ -/* { dg-final { scan-assembler "xldb " } } */ +/* { dg-final { scan-assembler "xld\\s" } } */ +/* { dg-final { scan-assembler "xldw\\s" } } */ +/* { dg-final { scan-assembler "xldb\\s" } } */ diff --git a/gcc/testsuite/gcc.target/arc/loop-4.c b/gcc/testsuite/gcc.target/arc/loop-4.c index dbe5d3fe18f..06de4187948 100644 --- a/gcc/testsuite/gcc.target/arc/loop-4.c +++ b/gcc/testsuite/gcc.target/arc/loop-4.c @@ -3,12 +3,13 @@ /* { dg-options "-Os -fbranch-count-reg" } */ -void fn1(void *p1, int p2, int p3) +int fn1(void *p1, int p2, int p3) { char *d = p1; do *d++ = p2; while (--p3); + return *d; } /* { dg-final { scan-assembler "lp_count" } } */ diff --git a/gcc/testsuite/gcc.target/arc/mov-cnst-size.c b/gcc/testsuite/gcc.target/arc/mov-cnst-size.c new file mode 100644 index 00000000000..916dd8b7620 --- /dev/null +++ b/gcc/testsuite/gcc.target/arc/mov-cnst-size.c @@ -0,0 +1,42 @@ +/* Tests to check if mov instructions are emitted efficiently. */ +/* { dg-require-effective-target codedensity } */ +/* { dg-options "-Os" } */ + +int rule1 (void) +{ + return 0x3f000000; +} + +int rule2 (void) +{ + return 0x3f00; +} + +int rule3 (void) +{ + return 0x3f0000; +} + +int rule4 (void) +{ + return 0x22000; +} + +int rule5 (void) +{ + return 0x8000001f; +} + +int rule6 (void) +{ + return 0x3fffff; +} + +/* { dg-final { scan-assembler "ror8\\s+r0,63" } } */ +/* { dg-final { scan-assembler "lsl8\\s+r0,63" } } */ +/* { dg-final { scan-assembler "lsl16\\s+r0,63" } } */ +/* { dg-final { scan-assembler "ror\\s+r0,63" } } */ +/* { dg-final { scan-assembler "mov_s\\s+r0,17" } } */ +/* { dg-final { scan-assembler "asl_s\\s+r0,r0,13" } } */ +/* { dg-final { scan-assembler "mov_s\\s+r0,-1" } } */ +/* { dg-final { scan-assembler "bmsk_s\\s+r0,r0,21" } } */ diff --git a/gcc/testsuite/gcc.target/arc/movh_cl-1.c b/gcc/testsuite/gcc.target/arc/movh_cl-1.c index 9c0036c1a3a..7e838bb2a69 100644 --- a/gcc/testsuite/gcc.target/arc/movh_cl-1.c +++ b/gcc/testsuite/gcc.target/arc/movh_cl-1.c @@ -36,4 +36,4 @@ woof () func (xx.raw); } -/* { dg-final { scan-assembler "movh\.cl r\[0-9\]+,0xc0000000>>16" } } */ +/* { dg-final { scan-assembler "movh\.cl\\s+r\[0-9\]+,0xc0000000>>16" } } */ diff --git a/gcc/testsuite/gcc.target/arc/or-cnst-size.c b/gcc/testsuite/gcc.target/arc/or-cnst-size.c new file mode 100644 index 00000000000..c4a9f0fbaa9 --- /dev/null +++ b/gcc/testsuite/gcc.target/arc/or-cnst-size.c @@ -0,0 +1,16 @@ +/* Tests to check if or instructions are emitted efficiently. */ +/* { dg-require-effective-target codedensity } */ +/* { dg-options "-Os" } */ + +int check_bset1 (int a) +{ + return a | 0x80000000; +} + +int check_bset2(int a) +{ + return a | 0x2022; +} + +/* { dg-final { scan-assembler-times "bset_s" 2 } } */ +/* { dg-final { scan-assembler "or" } } */ diff --git a/gcc/testsuite/gcc.target/arc/sdata-3.c b/gcc/testsuite/gcc.target/arc/sdata-3.c index 895b41ab586..df66cbd70c5 100644 --- a/gcc/testsuite/gcc.target/arc/sdata-3.c +++ b/gcc/testsuite/gcc.target/arc/sdata-3.c @@ -24,13 +24,13 @@ TEST (b, int) TEST (c, short) TEST (d, char) -/* { dg-final { scan-assembler "ld r2,\\\[gp,@g_a@sda\\\]" } } */ -/* { dg-final { scan-assembler "ld.as r2,\\\[gp,@g_b@sda\\\]" } } */ -/* { dg-final { scan-assembler "ld\[hw\]\\\.as r2,\\\[gp,@g_c@sda\\\]" } } */ -/* { dg-final { scan-assembler "ldb r2,\\\[gp,@g_d@sda\\\]" } } */ +/* { dg-final { scan-assembler "ld\\s+r2,\\\[gp,@g_a@sda\\\]" } } */ +/* { dg-final { scan-assembler "ld.as\\s+r2,\\\[gp,@g_b@sda\\\]" } } */ +/* { dg-final { scan-assembler "ld\[hw\]\\\.as\\s+r2,\\\[gp,@g_c@sda\\\]" } } */ +/* { dg-final { scan-assembler "ldb\\s+r2,\\\[gp,@g_d@sda\\\]" } } */ -/* { dg-final { scan-assembler "st r0,\\\[gp,@g_a@sda\\\]" } } */ -/* { dg-final { scan-assembler "st_s r0,\\\[gp,@g_b@sda\\\]" { target { codedensity } } } } */ -/* { dg-final { scan-assembler "st\\\.as r0,\\\[gp,@g_b@sda\\\]" { target { ! { codedensity } } } } } */ -/* { dg-final { scan-assembler "st\[hw\]\\\.as r0,\\\[gp,@g_c@sda\\\]" } } */ -/* { dg-final { scan-assembler "stb r0,\\\[gp,@g_d@sda\\\]" } } */ +/* { dg-final { scan-assembler "st\\s+r0,\\\[gp,@g_a@sda\\\]" } } */ +/* { dg-final { scan-assembler "st_s\\s+r0,\\\[gp,@g_b@sda\\\]" { target { codedensity } } } } */ +/* { dg-final { scan-assembler "st\\\.as\\s+r0,\\\[gp,@g_b@sda\\\]" { target { ! { codedensity } } } } } */ +/* { dg-final { scan-assembler "st\[hw\]\\\.as\\s+r0,\\\[gp,@g_c@sda\\\]" } } */ +/* { dg-final { scan-assembler "stb\\s+r0,\\\[gp,@g_d@sda\\\]" } } */ diff --git a/gcc/testsuite/gcc.target/arc/store-merge-1.c b/gcc/testsuite/gcc.target/arc/store-merge-1.c index 525afe19c62..a133e8719b4 100644 --- a/gcc/testsuite/gcc.target/arc/store-merge-1.c +++ b/gcc/testsuite/gcc.target/arc/store-merge-1.c @@ -15,4 +15,4 @@ int sigemptyset2 (sigset_t *set) return 0; } -/* { dg-final { scan-assembler-times "st 0,\\\[r" 2 } } */ +/* { dg-final { scan-assembler-times "std\\s*0,\\\[r" 1 } } */ -- 2.21.0