Modify aarch64_expand_subvti into a form that handles all addition and subtraction, modulo, signed or unsigned overflow.
Use expand_insn to put the operands into the proper form, and do not force values into register if not required. * config/aarch64/aarch64.c (aarch64_ti_split) New. (aarch64_addti_scratch_regs): Remove. (aarch64_subvti_scratch_regs): Remove. (aarch64_expand_subvti): Remove. (aarch64_expand_addsubti): New. * config/aarch64/aarch64-protos.h: Update to match. * config/aarch64/aarch64.md (addti3): Use aarch64_expand_addsubti. (addvti4, uaddvti4): Likewise. (subvti4, usubvti4): Likewise. (subti3): Likewise; accept immediates for operand 2. --- gcc/config/aarch64/aarch64-protos.h | 10 +-- gcc/config/aarch64/aarch64.c | 129 +++++++++------------------- gcc/config/aarch64/aarch64.md | 125 ++++++--------------------- 3 files changed, 67 insertions(+), 197 deletions(-) diff --git a/gcc/config/aarch64/aarch64-protos.h b/gcc/config/aarch64/aarch64-protos.h index d6d668ea920..787085b24d2 100644 --- a/gcc/config/aarch64/aarch64-protos.h +++ b/gcc/config/aarch64/aarch64-protos.h @@ -630,16 +630,8 @@ void aarch64_reset_previous_fndecl (void); bool aarch64_return_address_signing_enabled (void); bool aarch64_bti_enabled (void); void aarch64_save_restore_target_globals (tree); -void aarch64_addti_scratch_regs (rtx, rtx, rtx *, - rtx *, rtx *, - rtx *, rtx *, - rtx *); -void aarch64_subvti_scratch_regs (rtx, rtx, rtx *, - rtx *, rtx *, - rtx *, rtx *, rtx *); -void aarch64_expand_subvti (rtx, rtx, rtx, - rtx, rtx, rtx, rtx, bool); +void aarch64_expand_addsubti (rtx, rtx, rtx, int, int, int); /* Initialize builtins for SIMD intrinsics. */ void init_aarch64_simd_builtins (void); diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c index 7a13a8e8ec4..6263897c9a0 100644 --- a/gcc/config/aarch64/aarch64.c +++ b/gcc/config/aarch64/aarch64.c @@ -20241,110 +20241,61 @@ aarch64_gen_unlikely_cbranch (enum rtx_code code, machine_mode cc_mode, aarch64_emit_unlikely_jump (gen_rtx_SET (pc_rtx, x)); } -/* Generate DImode scratch registers for 128-bit (TImode) addition. +/* Generate DImode scratch registers for 128-bit (TImode) add/sub. + INPUT represents the TImode input operand + LO represents the low half (DImode) of the TImode operand + HI represents the high half (DImode) of the TImode operand. */ - OP1 represents the TImode destination operand 1 - OP2 represents the TImode destination operand 2 - LOW_DEST represents the low half (DImode) of TImode operand 0 - LOW_IN1 represents the low half (DImode) of TImode operand 1 - LOW_IN2 represents the low half (DImode) of TImode operand 2 - HIGH_DEST represents the high half (DImode) of TImode operand 0 - HIGH_IN1 represents the high half (DImode) of TImode operand 1 - HIGH_IN2 represents the high half (DImode) of TImode operand 2. */ - -void -aarch64_addti_scratch_regs (rtx op1, rtx op2, rtx *low_dest, - rtx *low_in1, rtx *low_in2, - rtx *high_dest, rtx *high_in1, - rtx *high_in2) +static void +aarch64_ti_split (rtx input, rtx *lo, rtx *hi) { - *low_dest = gen_reg_rtx (DImode); - *low_in1 = gen_lowpart (DImode, op1); - *low_in2 = simplify_gen_subreg (DImode, op2, TImode, - subreg_lowpart_offset (DImode, TImode)); - *high_dest = gen_reg_rtx (DImode); - *high_in1 = gen_highpart (DImode, op1); - *high_in2 = simplify_gen_subreg (DImode, op2, TImode, - subreg_highpart_offset (DImode, TImode)); + *lo = simplify_gen_subreg (DImode, input, TImode, + subreg_lowpart_offset (DImode, TImode)); + *hi = simplify_gen_subreg (DImode, input, TImode, + subreg_highpart_offset (DImode, TImode)); } -/* Generate DImode scratch registers for 128-bit (TImode) subtraction. - - This function differs from 'arch64_addti_scratch_regs' in that - OP1 can be an immediate constant (zero). We must call - subreg_highpart_offset with DImode and TImode arguments, otherwise - VOIDmode will be used for the const_int which generates an internal - error from subreg_size_highpart_offset which does not expect a size of zero. - - OP1 represents the TImode destination operand 1 - OP2 represents the TImode destination operand 2 - LOW_DEST represents the low half (DImode) of TImode operand 0 - LOW_IN1 represents the low half (DImode) of TImode operand 1 - LOW_IN2 represents the low half (DImode) of TImode operand 2 - HIGH_DEST represents the high half (DImode) of TImode operand 0 - HIGH_IN1 represents the high half (DImode) of TImode operand 1 - HIGH_IN2 represents the high half (DImode) of TImode operand 2. */ - - -void -aarch64_subvti_scratch_regs (rtx op1, rtx op2, rtx *low_dest, - rtx *low_in1, rtx *low_in2, - rtx *high_dest, rtx *high_in1, - rtx *high_in2) -{ - *low_dest = gen_reg_rtx (DImode); - *low_in1 = simplify_gen_subreg (DImode, op1, TImode, - subreg_lowpart_offset (DImode, TImode)); - - *low_in2 = simplify_gen_subreg (DImode, op2, TImode, - subreg_lowpart_offset (DImode, TImode)); - *high_dest = gen_reg_rtx (DImode); - - *high_in1 = simplify_gen_subreg (DImode, op1, TImode, - subreg_highpart_offset (DImode, TImode)); - *high_in2 = simplify_gen_subreg (DImode, op2, TImode, - subreg_highpart_offset (DImode, TImode)); -} - -/* Generate RTL for 128-bit (TImode) subtraction with overflow. - +/* Generate RTL for 128-bit (TImode) addition or subtraction. OP0 represents the TImode destination operand 0 - LOW_DEST represents the low half (DImode) of TImode operand 0 - LOW_IN1 represents the low half (DImode) of TImode operand 1 - LOW_IN2 represents the low half (DImode) of TImode operand 2 - HIGH_DEST represents the high half (DImode) of TImode operand 0 - HIGH_IN1 represents the high half (DImode) of TImode operand 1 - HIGH_IN2 represents the high half (DImode) of TImode operand 2 - UNSIGNED_P is true if the operation is being performed on unsigned - values. */ + OP1 and OP2 represent the TImode input operands. + + Normal or Overflow behaviour is obtained via the INSN_CODE operands: + CODE_HI_LO0 is used when the low half of OP2 == 0, otherwise + CODE_LO is used on the low halves, + CODE_HI is used on the high halves. */ + void -aarch64_expand_subvti (rtx op0, rtx low_dest, rtx low_in1, - rtx low_in2, rtx high_dest, rtx high_in1, - rtx high_in2, bool unsigned_p) +aarch64_expand_addsubti (rtx op0, rtx op1, rtx op2, + int code_hi_lo0, int code_lo, int code_hi) { - if (low_in2 == const0_rtx) + rtx low_dest, low_op1, low_op2, high_dest, high_op1, high_op2; + struct expand_operand ops[3]; + + aarch64_ti_split (op1, &low_op1, &high_op1); + aarch64_ti_split (op2, &low_op2, &high_op2); + + if (low_op2 == const0_rtx) { - low_dest = low_in1; - high_in2 = force_reg (DImode, high_in2); - if (unsigned_p) - emit_insn (gen_subdi3_compare1 (high_dest, high_in1, high_in2)); - else - emit_insn (gen_subvdi_insn (high_dest, high_in1, high_in2)); + low_dest = low_op1; + code_hi = code_hi_lo0; } else { - emit_insn (gen_subdi3_compare1 (low_dest, low_in1, low_in2)); - - high_in2 = force_reg (DImode, high_in2); - if (unsigned_p) - emit_insn (gen_usubdi3_carryinC (high_dest, high_in1, high_in2)); - else - emit_insn (gen_subdi3_carryinV (high_dest, high_in1, high_in2)); + low_dest = gen_reg_rtx (DImode); + create_output_operand(&ops[0], low_dest, DImode); + create_input_operand(&ops[1], low_op1, DImode); + create_input_operand(&ops[2], low_op2, DImode); + expand_insn ((insn_code)code_lo, 3, ops); } + high_dest = gen_reg_rtx (DImode); + create_output_operand(&ops[0], high_dest, DImode); + create_input_operand(&ops[1], high_op1, DImode); + create_input_operand(&ops[2], high_op2, DImode); + expand_insn ((insn_code)code_hi, 3, ops); + emit_move_insn (gen_lowpart (DImode, op0), low_dest); emit_move_insn (gen_highpart (DImode, op0), high_dest); - } /* Implement the TARGET_ASAN_SHADOW_OFFSET hook. */ diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md index d6389cc8148..532c114a42e 100644 --- a/gcc/config/aarch64/aarch64.md +++ b/gcc/config/aarch64/aarch64.md @@ -2044,30 +2044,10 @@ (match_operand:TI 2 "aarch64_reg_or_imm")))] "" { - rtx low_dest, op1_low, op2_low, high_dest, op1_high, op2_high; - - aarch64_addti_scratch_regs (operands[1], operands[2], - &low_dest, &op1_low, &op2_low, - &high_dest, &op1_high, &op2_high); - - if (op2_low == const0_rtx) - { - low_dest = op1_low; - if (!aarch64_pluslong_operand (op2_high, DImode)) - op2_high = force_reg (DImode, op2_high); - emit_insn (gen_adddi3 (high_dest, op1_high, op2_high)); - } - else - { - emit_insn (gen_adddi3_compareC (low_dest, op1_low, - force_reg (DImode, op2_low))); - emit_insn (gen_adddi3_carryin (high_dest, op1_high, - force_reg (DImode, op2_high))); - } - - emit_move_insn (gen_lowpart (DImode, operands[0]), low_dest); - emit_move_insn (gen_highpart (DImode, operands[0]), high_dest); - + aarch64_expand_addsubti (operands[0], operands[1], operands[2], + CODE_FOR_adddi3, + CODE_FOR_adddi3_compareC, + CODE_FOR_adddi3_carryin); DONE; }) @@ -2078,29 +2058,10 @@ (label_ref (match_operand 3 "" ""))] "" { - rtx low_dest, op1_low, op2_low, high_dest, op1_high, op2_high; - - aarch64_addti_scratch_regs (operands[1], operands[2], - &low_dest, &op1_low, &op2_low, - &high_dest, &op1_high, &op2_high); - - if (op2_low == const0_rtx) - { - low_dest = op1_low; - emit_insn (gen_adddi3_compareV (high_dest, op1_high, - force_reg (DImode, op2_high))); - } - else - { - emit_insn (gen_adddi3_compareC (low_dest, op1_low, - force_reg (DImode, op2_low))); - emit_insn (gen_adddi3_carryinV (high_dest, op1_high, - force_reg (DImode, op2_high))); - } - - emit_move_insn (gen_lowpart (DImode, operands[0]), low_dest); - emit_move_insn (gen_highpart (DImode, operands[0]), high_dest); - + aarch64_expand_addsubti (operands[0], operands[1], operands[2], + CODE_FOR_adddi3_compareV, + CODE_FOR_adddi3_compareC, + CODE_FOR_adddi3_carryinV); aarch64_gen_unlikely_cbranch (NE, CC_Vmode, operands[3]); DONE; }) @@ -2112,32 +2073,13 @@ (label_ref (match_operand 3 "" ""))] "" { - rtx low_dest, op1_low, op2_low, high_dest, op1_high, op2_high; - - aarch64_addti_scratch_regs (operands[1], operands[2], - &low_dest, &op1_low, &op2_low, - &high_dest, &op1_high, &op2_high); - - if (op2_low == const0_rtx) - { - low_dest = op1_low; - emit_insn (gen_adddi3_compareC (high_dest, op1_high, - force_reg (DImode, op2_high))); - } - else - { - emit_insn (gen_adddi3_compareC (low_dest, op1_low, - force_reg (DImode, op2_low))); - emit_insn (gen_adddi3_carryinC (high_dest, op1_high, - force_reg (DImode, op2_high))); - } - - emit_move_insn (gen_lowpart (DImode, operands[0]), low_dest); - emit_move_insn (gen_highpart (DImode, operands[0]), high_dest); - + aarch64_expand_addsubti (operands[0], operands[1], operands[2], + CODE_FOR_adddi3_compareC, + CODE_FOR_adddi3_compareC, + CODE_FOR_adddi3_carryinC); aarch64_gen_unlikely_cbranch (GEU, CC_ADCmode, operands[3]); DONE; - }) +}) (define_insn "add<mode>3_compare0" [(set (reg:CC_NZ CC_REGNUM) @@ -2980,20 +2922,13 @@ (define_expand "subti3" [(set (match_operand:TI 0 "register_operand") (minus:TI (match_operand:TI 1 "aarch64_reg_or_zero") - (match_operand:TI 2 "register_operand")))] + (match_operand:TI 2 "aarch64_reg_or_imm")))] "" { - rtx low_dest, op1_low, op2_low, high_dest, op1_high, op2_high; - - aarch64_subvti_scratch_regs (operands[1], operands[2], - &low_dest, &op1_low, &op2_low, - &high_dest, &op1_high, &op2_high); - - emit_insn (gen_subdi3_compare1 (low_dest, op1_low, op2_low)); - emit_insn (gen_subdi3_carryin (high_dest, op1_high, op2_high)); - - emit_move_insn (gen_lowpart (DImode, operands[0]), low_dest); - emit_move_insn (gen_highpart (DImode, operands[0]), high_dest); + aarch64_expand_addsubti (operands[0], operands[1], operands[2], + CODE_FOR_subdi3, + CODE_FOR_subdi3_compare1, + CODE_FOR_subdi3_carryin); DONE; }) @@ -3004,14 +2939,10 @@ (label_ref (match_operand 3 "" ""))] "" { - rtx low_dest, op1_low, op2_low, high_dest, op1_high, op2_high; - - aarch64_subvti_scratch_regs (operands[1], operands[2], - &low_dest, &op1_low, &op2_low, - &high_dest, &op1_high, &op2_high); - aarch64_expand_subvti (operands[0], low_dest, op1_low, op2_low, - high_dest, op1_high, op2_high, false); - + aarch64_expand_addsubti (operands[0], operands[1], operands[2], + CODE_FOR_subvdi_insn, + CODE_FOR_subdi3_compare1, + CODE_FOR_subdi3_carryinV); aarch64_gen_unlikely_cbranch (NE, CC_Vmode, operands[3]); DONE; }) @@ -3023,14 +2954,10 @@ (label_ref (match_operand 3 "" ""))] "" { - rtx low_dest, op1_low, op2_low, high_dest, op1_high, op2_high; - - aarch64_subvti_scratch_regs (operands[1], operands[2], - &low_dest, &op1_low, &op2_low, - &high_dest, &op1_high, &op2_high); - aarch64_expand_subvti (operands[0], low_dest, op1_low, op2_low, - high_dest, op1_high, op2_high, true); - + aarch64_expand_addsubti (operands[0], operands[1], operands[2], + CODE_FOR_subdi3_compare1, + CODE_FOR_subdi3_compare1, + CODE_FOR_usubdi3_carryinC); aarch64_gen_unlikely_cbranch (LTU, CCmode, operands[3]); DONE; }) -- 2.20.1