Aside from the hashing, and the splitting of insn generation from recipe creation, there's no algorithmic change.
Cc: Marcus Shawcroft <marcus.shawcr...@arm.com> Cc: Richard Earnshaw <richard.earns...@arm.com> --- * config/aarch64/aarch64.c: Include genimm-hash.h (aa_gi_code): New enum. (genimm_aa64): New class. (genimm_aa64::genimm_aa64): New. (genimm_aa64::set0, genimm_aa64::opN, genimm_aa64::insN): New. (genimm_aa64::exam_simple): New. (genimm_aa64::exam_plus): New. (genimm_aa64::generate): New. (genimm_aa64::exam_full): Extract from the body of the old aarch64_internal_mov_immediate. (aarch64_internal_mov_immediate): Rewrite using genimm_hash. --- gcc/config/aarch64/aarch64.c | 446 +++++++++++++++++++++++++------------------ 1 file changed, 256 insertions(+), 190 deletions(-) diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c index 1394ed7..6b12a07 100644 --- a/gcc/config/aarch64/aarch64.c +++ b/gcc/config/aarch64/aarch64.c @@ -76,6 +76,7 @@ #include "sched-int.h" #include "cortex-a57-fma-steering.h" #include "target-globals.h" +#include "genimm-hash.h" /* This file should be included last. */ #include "target-def.h" @@ -1317,54 +1318,144 @@ aarch64_add_offset (machine_mode mode, rtx temp, rtx reg, HOST_WIDE_INT offset) return plus_constant (mode, reg, offset); } -static int -aarch64_internal_mov_immediate (rtx dest, rtx imm, bool generate, - machine_mode mode) +namespace { + +/* In order to simplify the below, make sure none of the + given rtx codes are in {0,16,32,48}. */ +STATIC_ASSERT (((int)PLUS & ~48) != 0); +STATIC_ASSERT (((int)IOR & ~48) != 0); +STATIC_ASSERT (((int)AND & ~48) != 0); + +enum aa_gi_code { - unsigned HOST_WIDE_INT mask; - int i; - bool first; - unsigned HOST_WIDE_INT val; - bool subtargets; - rtx subtarget; - int one_match, zero_match, first_not_ffff_match; - int num_insns = 0; + AA_GI_NIL = -2, + AA_GI_SET = -1, + + AA_GI_INS0 = 0, + AA_GI_INS1 = 16, + AA_GI_INS2 = 32, + AA_GI_INS3 = 48, + + AA_GI_PLUS = PLUS, + AA_GI_IOR = IOR, + AA_GI_AND = AND +}; + +struct genimm_aa64 : genimm_base<aa_gi_code, 4> +{ + static const int max_simple = 2; + + static rtx_code aa_gi_binop(aa_gi_code c) + { + return (c == AA_GI_PLUS || c == AA_GI_IOR || c == AA_GI_AND + ? (rtx_code)c : UNKNOWN); + } + + genimm_aa64 (HOST_WIDE_INT c); + + void set0 (HOST_WIDE_INT v); + void opN (aa_gi_code o, HOST_WIDE_INT v); + void insN (int b, unsigned HOST_WIDE_INT v); + + /* The search algorithm that we use for aarch64 is non-recursive. + Thus we do not require the iteration provided by genimm_hash. + Produce an empty loop and go straight to exam_full. */ + bool exam_search (unsigned HOST_WIDE_INT, int) { return false; } + + bool exam_simple (HOST_WIDE_INT val, machine_mode mode, int); + bool exam_plus (unsigned HOST_WIDE_INT val, unsigned HOST_WIDE_INT base); + void exam_full (unsigned HOST_WIDE_INT val); + void generate (rtx dest, machine_mode mode) const; +}; - if (CONST_INT_P (imm) && aarch64_move_imm (INTVAL (imm), mode)) +genimm_aa64::genimm_aa64 (HOST_WIDE_INT c) + : genimm_base (c) +{ +#ifdef ENABLE_CHECKING + code[0] = code[1] = code[2] = code[3] = AA_GI_NIL; + op[0] = op[1] = op[2] = op[3] = 0; +#endif +} + +void +genimm_aa64::set0 (HOST_WIDE_INT v) +{ + cost = 1; + code[0] = AA_GI_SET; + op[0] = v; +} + +void +genimm_aa64::opN (aa_gi_code c, HOST_WIDE_INT v) +{ + int n = cost++; + gcc_checking_assert (n > 0 && n < max_cost); + code[n] = c; + op[n] = v; +} + +void +genimm_aa64::insN (int b, unsigned HOST_WIDE_INT v) +{ + int n = cost++; + gcc_checking_assert (n > 0 && n < max_cost); + gcc_checking_assert ((b & ~48) == 0); + code[n] = (aa_gi_code)b; + op[n] = (v >> b) & 0xffff; +} + +/* Look for simple constants that aren't worth hashing. */ + +bool +genimm_aa64::exam_simple (HOST_WIDE_INT val, machine_mode mode, int) +{ + if (aarch64_move_imm (val, mode)) { - if (generate) - emit_insn (gen_rtx_SET (dest, imm)); - num_insns++; - return num_insns; + set0 (val); + return true; } - if (mode == SImode) { /* We know we can't do this in 1 insn, and we must be able to do it in two; so don't mess around looking for sequences that don't buy us anything. */ - if (generate) - { - emit_insn (gen_rtx_SET (dest, GEN_INT (INTVAL (imm) & 0xffff))); - emit_insn (gen_insv_immsi (dest, GEN_INT (16), - GEN_INT ((INTVAL (imm) >> 16) & 0xffff))); - } - num_insns += 2; - return num_insns; + set0 (val & 0xffff); + insN (16, val); + return true; } + return false; +} - /* Remaining cases are all for DImode. */ +/* A subroutine of genimm_aa64::exam_full. If VAL can be created from BASE + via the addition of a constant, construct the recipe as appropriate and + return true. Otherwise return false. */ - val = INTVAL (imm); - subtargets = optimize && can_create_pseudo_p (); +bool +genimm_aa64::exam_plus (unsigned HOST_WIDE_INT val, unsigned HOST_WIDE_INT base) +{ + HOST_WIDE_INT diff = val - base; + if (aarch64_uimm12_shift (diff < 0 ? -diff : diff)) + { + set0 (base); + opN (AA_GI_PLUS, diff); + return true; + } + return false; +} - one_match = 0; - zero_match = 0; - mask = 0xffff; - first_not_ffff_match = -1; +/* Examine the DImode quantity VAL, and store a recipe for its creation. */ - for (i = 0; i < 64; i += 16, mask <<= 16) +void +genimm_aa64::exam_full (unsigned HOST_WIDE_INT val) +{ + unsigned HOST_WIDE_INT mask; + int one_match = 0; + int zero_match = 0; + int first_not_ffff_match = -1; + + for (int i = 0; i < 64; i += 16) { + mask = HOST_WIDE_INT_UC (0xffff) << i; if ((val & mask) == mask) one_match++; else @@ -1379,211 +1470,186 @@ aarch64_internal_mov_immediate (rtx dest, rtx imm, bool generate, if (one_match == 2) { /* Set one of the quarters and then insert back into result. */ - mask = 0xffffll << first_not_ffff_match; - if (generate) - { - emit_insn (gen_rtx_SET (dest, GEN_INT (val | mask))); - emit_insn (gen_insv_immdi (dest, GEN_INT (first_not_ffff_match), - GEN_INT ((val >> first_not_ffff_match) - & 0xffff))); - } - num_insns += 2; - return num_insns; + mask = HOST_WIDE_INT_UC (0xffff) << first_not_ffff_match; + set0 (val | mask); + insN (first_not_ffff_match, val); + return; } if (zero_match == 2) goto simple_sequence; - mask = 0x0ffff0000UL; - for (i = 16; i < 64; i += 16, mask <<= 16) + for (int i = 16; i < 64; i += 16, mask <<= 16) { - HOST_WIDE_INT comp = mask & ~(mask - 1); + unsigned HOST_WIDE_INT comp = HOST_WIDE_INT_1U << i; + mask = HOST_WIDE_INT_UC (0xffff) << i; - if (aarch64_uimm12_shift (val - (val & mask))) - { - if (generate) - { - subtarget = subtargets ? gen_reg_rtx (DImode) : dest; - emit_insn (gen_rtx_SET (subtarget, GEN_INT (val & mask))); - emit_insn (gen_adddi3 (dest, subtarget, - GEN_INT (val - (val & mask)))); - } - num_insns += 2; - return num_insns; - } - else if (aarch64_uimm12_shift (-(val - ((val + comp) & mask)))) - { - if (generate) - { - subtarget = subtargets ? gen_reg_rtx (DImode) : dest; - emit_insn (gen_rtx_SET (subtarget, - GEN_INT ((val + comp) & mask))); - emit_insn (gen_adddi3 (dest, subtarget, - GEN_INT (val - ((val + comp) & mask)))); - } - num_insns += 2; - return num_insns; - } - else if (aarch64_uimm12_shift (val - ((val - comp) | ~mask))) - { - if (generate) - { - subtarget = subtargets ? gen_reg_rtx (DImode) : dest; - emit_insn (gen_rtx_SET (subtarget, - GEN_INT ((val - comp) | ~mask))); - emit_insn (gen_adddi3 (dest, subtarget, - GEN_INT (val - ((val - comp) | ~mask)))); - } - num_insns += 2; - return num_insns; - } - else if (aarch64_uimm12_shift (-(val - (val | ~mask)))) - { - if (generate) - { - subtarget = subtargets ? gen_reg_rtx (DImode) : dest; - emit_insn (gen_rtx_SET (subtarget, GEN_INT (val | ~mask))); - emit_insn (gen_adddi3 (dest, subtarget, - GEN_INT (val - (val | ~mask)))); - } - num_insns += 2; - return num_insns; - } + if (exam_plus (val, val & mask)) + return; + if (exam_plus (val, (val + comp) & mask)) + return; + if (exam_plus (val, (val - comp) | ~mask)) + return; + if (exam_plus (val, val | ~mask)) + return; } - /* See if we can do it by arithmetically combining two - immediates. */ - for (i = 0; i < AARCH64_NUM_BITMASKS; i++) + /* See if we can do it by arithmetically combining two immediates. */ + for (int i = 0; i < AARCH64_NUM_BITMASKS; i++) { - int j; - mask = 0xffff; + unsigned HOST_WIDE_INT bmi = aarch64_bitmasks[i]; - if (aarch64_uimm12_shift (val - aarch64_bitmasks[i]) - || aarch64_uimm12_shift (-val + aarch64_bitmasks[i])) - { - if (generate) - { - subtarget = subtargets ? gen_reg_rtx (DImode) : dest; - emit_insn (gen_rtx_SET (subtarget, - GEN_INT (aarch64_bitmasks[i]))); - emit_insn (gen_adddi3 (dest, subtarget, - GEN_INT (val - aarch64_bitmasks[i]))); - } - num_insns += 2; - return num_insns; - } + if (exam_plus (val, bmi)) + return; - for (j = 0; j < 64; j += 16, mask <<= 16) + for (int j = 0; j < 64; j += 16) { - if ((aarch64_bitmasks[i] & ~mask) == (val & ~mask)) + mask = HOST_WIDE_INT_UC (0xffff) << j; + if ((bmi & ~mask) == (val & ~mask)) { - if (generate) - { - emit_insn (gen_rtx_SET (dest, - GEN_INT (aarch64_bitmasks[i]))); - emit_insn (gen_insv_immdi (dest, GEN_INT (j), - GEN_INT ((val >> j) & 0xffff))); - } - num_insns += 2; - return num_insns; + set0 (bmi); + insN (j, val); + return; } } } /* See if we can do it by logically combining two immediates. */ - for (i = 0; i < AARCH64_NUM_BITMASKS; i++) + for (int i = 0; i < AARCH64_NUM_BITMASKS; i++) { - if ((aarch64_bitmasks[i] & val) == aarch64_bitmasks[i]) + unsigned HOST_WIDE_INT bmi = aarch64_bitmasks[i]; + + if ((bmi & val) == bmi) { - int j; + for (int j = i + 1; j < AARCH64_NUM_BITMASKS; j++) + { + unsigned HOST_WIDE_INT bmj = aarch64_bitmasks[j]; - for (j = i + 1; j < AARCH64_NUM_BITMASKS; j++) - if (val == (aarch64_bitmasks[i] | aarch64_bitmasks[j])) - { - if (generate) - { - subtarget = subtargets ? gen_reg_rtx (mode) : dest; - emit_insn (gen_rtx_SET (subtarget, - GEN_INT (aarch64_bitmasks[i]))); - emit_insn (gen_iordi3 (dest, subtarget, - GEN_INT (aarch64_bitmasks[j]))); - } - num_insns += 2; - return num_insns; - } + if (val == (bmi | bmj)) + { + set0 (bmi); + opN (AA_GI_IOR, bmj); + return; + } + } } - else if ((val & aarch64_bitmasks[i]) == val) + else if ((val & bmi) == val) { - int j; + for (int j = i + 1; j < AARCH64_NUM_BITMASKS; j++) + { + unsigned HOST_WIDE_INT bmj = aarch64_bitmasks[j]; - for (j = i + 1; j < AARCH64_NUM_BITMASKS; j++) - if (val == (aarch64_bitmasks[j] & aarch64_bitmasks[i])) - { - if (generate) - { - subtarget = subtargets ? gen_reg_rtx (mode) : dest; - emit_insn (gen_rtx_SET (subtarget, - GEN_INT (aarch64_bitmasks[j]))); - emit_insn (gen_anddi3 (dest, subtarget, - GEN_INT (aarch64_bitmasks[i]))); - } - num_insns += 2; - return num_insns; - } + if (val == (bmi & bmj)) + { + set0 (bmi); + opN (AA_GI_AND, bmj); + return; + } + } } } if (one_match > zero_match) { /* Set either first three quarters or all but the third. */ - mask = 0xffffll << (16 - first_not_ffff_match); - if (generate) - emit_insn (gen_rtx_SET (dest, - GEN_INT (val | mask | 0xffffffff00000000ull))); - num_insns ++; + mask = HOST_WIDE_INT_UC (0xffff) << (16 - first_not_ffff_match); + set0 (val | mask | HOST_WIDE_INT_UC (0xffffffff00000000)); /* Now insert other two quarters. */ - for (i = first_not_ffff_match + 16, mask <<= (first_not_ffff_match << 1); - i < 64; i += 16, mask <<= 16) + for (int i = first_not_ffff_match + 16; i < 64; i += 16) { + mask = HOST_WIDE_INT_UC (0xffff) << i; if ((val & mask) != mask) - { - if (generate) - emit_insn (gen_insv_immdi (dest, GEN_INT (i), - GEN_INT ((val >> i) & 0xffff))); - num_insns ++; - } + insN (i, val); } - return num_insns; + return; } simple_sequence: - first = true; - mask = 0xffff; - for (i = 0; i < 64; i += 16, mask <<= 16) + cost = 0; + for (int i = 0; i < 64; i += 16) { + mask = HOST_WIDE_INT_UC (0xffff) << i; if ((val & mask) != 0) { - if (first) - { - if (generate) - emit_insn (gen_rtx_SET (dest, GEN_INT (val & mask))); - num_insns ++; - first = false; - } + if (cost == 0) + set0 (val & mask); else - { - if (generate) - emit_insn (gen_insv_immdi (dest, GEN_INT (i), - GEN_INT ((val >> i) & 0xffff))); - num_insns ++; - } + insN (i, val); } } +} + +/* Follow the recipe to construct a value in MODE + placing the result in DEST. */ + +void +genimm_aa64::generate (rtx dest, machine_mode mode) const +{ + int n = cost; - return num_insns; + gcc_checking_assert (n >= 1 && n <= max_cost); + gcc_checking_assert (code[0] == AA_GI_SET); + + /* If possible, put the original SET into its own pseudo, so that + it might be CSE'd. We can't do this if we use INSV, and we only + ever use arithmetic with N == 2. */ + if (n == 2 && optimize && can_create_pseudo_p ()) + { + rtx_code rc = aa_gi_binop (code[1]); + if (rc != UNKNOWN) + { + rtx sub = gen_reg_rtx (mode); + emit_insn (gen_rtx_SET (sub, GEN_INT (op[0]))); + sub = gen_rtx_fmt_ee (rc, mode, sub, GEN_INT (op[1])); + emit_insn (gen_rtx_SET (dest, sub)); + return; + } + } + + emit_insn (gen_rtx_SET (dest, GEN_INT (op[0]))); + + for (int i = 1; i < n; ++i) + { + rtx x = GEN_INT (op[i]); + switch (code[i]) + { + case AA_GI_PLUS: + case AA_GI_IOR: + case AA_GI_AND: + x = gen_rtx_fmt_ee (aa_gi_binop (code[i]), mode, dest, x); + x = gen_rtx_SET (dest, x); + break; + case AA_GI_INS0: + case AA_GI_INS1: + case AA_GI_INS2: + case AA_GI_INS3: + if (mode == SImode) + x = gen_insv_immsi (dest, GEN_INT ((int)code[i]), x); + else + x = gen_insv_immdi (dest, GEN_INT ((int)code[i]), x); + break; + default: + gcc_unreachable (); + } + emit_insn (x); + } } +} // anon namespace + +/* Examine IMM in MODE and return the number insns required to construct it. + If GENERATE is true, emit instructions to compute IMM into DEST. */ + +static inline int +aarch64_internal_mov_immediate (rtx dest, rtx imm, bool generate, + machine_mode mode) +{ + genimm_aa64 data = genimm_hash<genimm_aa64>::hash (INTVAL (imm), mode); + if (generate) + data.generate (dest, mode); + return data.cost; +} void aarch64_expand_mov_immediate (rtx dest, rtx imm) -- 2.4.3