On Wed, Dec 21, 2005 at 03:07:21PM -0500, DJ Delorie wrote: > > > It was because I had decided to expose the registers as %al, %ah, > > ... %bl, %bh, ... instead of the customary %[e]ax and friends. > > I originally did this for the m32c port (which has hi/low pairs like > the i386) but discovered that reload always allocates registers in > UNITS_PER_WORD chunks, and move-by-pieces uses UNITS_PER_WORD chunks, > so if you have 8 bit registers you end up with 8 bit moves all over > the place. If you have 8 bit registers and 16 bit moves, reload > counts wrong. I ended up switching to the word-sized register model > that i386 currently uses, even though it meant worse code generation. > > I seem to recall ranting about it at the time, too. UNITS_PER_WORD > must die! The m32c has four 8 bit registers, two 16 bit registers, > and five 24 bit registers. They can be combined to form 8, 16, 24, > 32, 48, and 64 bit registers. GCC has no way of expressing that.
Like Bernd, I'm playing with a 16-bit ix86 port where the eight 8-bit registers are not represented as four 16-bit registers. This means that reload has to deal with eight 8-bit registers and four 16-bit registers. With a few patches, reload is able to work with mixed register sizes. I have rewritten subreg_regno_offset() and subreg_offset_representable_p(). There are two known problems with the rewritten versions: 1) The i386 complex modes with holes are not supported, but I think that can be added. 2) The i386 backend causes subreg_regno_offset (9, DFmode, 4, SImode) calls which fail the first assertion. Register 9 is a floating point register. I have no idea what should be returned in this case. Index: gcc/rtlanal.c =================================================================== --- gcc/rtlanal.c (revision 109766) +++ gcc/rtlanal.c (working copy) @@ -3130,7 +3130,7 @@ return subreg_lsb_1 (GET_MODE (x), GET_MODE (SUBREG_REG (x)), SUBREG_BYTE (x)); } - +#if 1 /* This function returns the regno offset of a subreg expression. xregno - A regno of an inner hard subreg_reg (or what will become one). xmode - The mode of xregno. @@ -3141,6 +3141,42 @@ subreg_regno_offset (unsigned int xregno, enum machine_mode xmode, unsigned int offset, enum machine_mode ymode) { +/* (subreg:Ymode (reg:Xmode Xregno) offset) */ + unsigned int r, units; + + gcc_assert (xregno < FIRST_PSEUDO_REGISTER); + + if (GET_MODE_SIZE (xmode) >= GET_MODE_SIZE (ymode)) + { + for (r = xregno, units = 0; + units < offset; + units += GET_MODE_SIZE (reg_raw_mode[r]), r ++) + ; + gcc_assert (units == offset); + } + else if (offset == 0 && GET_MODE_SIZE (ymode) > reg_raw_mode[xregno] + ? WORDS_BIG_ENDIAN : BYTES_BIG_ENDIAN) + { + /* Big endian paradoxical subreg. */ + for (r = xregno, units = 0; + units < GET_MODE_SIZE (ymode); + units += GET_MODE_SIZE (reg_raw_mode[r]), r --) + ; + gcc_assert (units == GET_MODE_SIZE (ymode)); + } + else + { + r = xregno; + gcc_assert (offset == 0); + } + return (r - xregno); +} + +#else /* 0 */ +unsigned int +subreg_regno_offset (unsigned int xregno, enum machine_mode xmode, + unsigned int offset, enum machine_mode ymode) +{ int nregs_xmode, nregs_ymode, nregs_xmode_unit_int; int mode_multiple, nregs_multiple; int y_offset; @@ -3194,6 +3230,7 @@ nregs_multiple = nregs_xmode / nregs_ymode; return (y_offset / (mode_multiple / nregs_multiple)) * nregs_ymode; } +#endif /* 0 */ /* This function returns true when the offset is representable via subreg_offset in the given regno. @@ -3202,10 +3239,42 @@ offset - The byte offset. ymode - The mode of a top level SUBREG (or what may become one). RETURN - Whether the offset is representable. */ + bool subreg_offset_representable_p (unsigned int xregno, enum machine_mode xmode, unsigned int offset, enum machine_mode ymode) { +/* (subreg:Ymode (reg:Xmode Xregno) offset) */ + unsigned int r, units; + + gcc_assert (xregno < FIRST_PSEUDO_REGISTER); + + if (GET_MODE_SIZE (xmode) >= GET_MODE_SIZE (ymode)) + { + for (r = xregno, units = 0; + units < offset; + units += GET_MODE_SIZE (reg_raw_mode[r]), r ++) + ; + return (units == offset); + } + else if (offset == 0 && GET_MODE_SIZE (ymode) > reg_raw_mode[xregno] + ? WORDS_BIG_ENDIAN : BYTES_BIG_ENDIAN) + { + /* Big endian paradoxical subreg. */ + for (r = xregno, units = 0; + units < GET_MODE_SIZE (ymode); + units += GET_MODE_SIZE (reg_raw_mode[r]), r --) + ; + return (units == GET_MODE_SIZE (ymode)); + } + return (offset == 0); +} + +#if 0 +bool +subreg_offset_representable_p (unsigned int xregno, enum machine_mode xmode, + unsigned int offset, enum machine_mode ymode) +{ int nregs_xmode, nregs_ymode, nregs_xmode_unit, nregs_xmode_unit_int; int mode_multiple, nregs_multiple; int y_offset; @@ -3299,6 +3368,7 @@ return (!(y_offset % (mode_multiple / nregs_multiple))); } +#endif /* 0 */ /* Return the final regno that a subreg expression refers to. */ unsigned int In reload, the way that push_reload() calls find_valid_class() assumes that HARD_REGNO_NREGS (regno, mode) does not change when regno changes. I had to rewrite that. Note that I end up with two outer mode parameters. I don't know if their values ever differ. find_valid_class() would never return something else than NO_REGS or ALL_REGS. I fixed that too. Without the patch, reload would run into problems trying to load a shift count in %si into %cl. With the patch, it uses %cx as an intermediate register when reloading %cl from a 16-bit register, e.g. "mov %si,%cx" and "shr %cl,dx", while reloads from an 8-bit register don't clobber %ch, e.g. "mov %al,cl" and "shl %cl,dx". Index: gcc/reload.c =================================================================== --- gcc/reload.c (revision 109766) +++ gcc/reload.c (working copy) @@ -246,7 +246,7 @@ enum machine_mode, enum reload_type, enum insn_code *, secondary_reload_info *); static enum reg_class find_valid_class (enum machine_mode, enum machine_mode, - int, unsigned int); + int, unsigned int, enum machine_mode); static int reload_inner_reg_of_subreg (rtx, enum machine_mode, int); static void push_replacement (rtx *, int, enum machine_mode); static void dup_replacements (rtx *, rtx *); @@ -626,17 +626,20 @@ /* Find the largest class which has at least one register valid in mode INNER, and which for every such register, that register number - plus N is also valid in OUTER (if in range) and is cheap to move - into REGNO. Such a class must exist. */ + plus subreg_regno_offset (regnum, INNER, OFFSET, OUTER2) is also valid + in OUTER (if in range) and is cheap to move into REGNO. + Such a class must exist. */ static enum reg_class find_valid_class (enum machine_mode outer ATTRIBUTE_UNUSED, - enum machine_mode inner ATTRIBUTE_UNUSED, int n, - unsigned int dest_regno ATTRIBUTE_UNUSED) + enum machine_mode inner ATTRIBUTE_UNUSED, int offset, + unsigned int dest_regno ATTRIBUTE_UNUSED, + enum machine_mode outer2) { int best_cost = -1; int class; int regno; + int n; enum reg_class best_class = NO_REGS; enum reg_class dest_class ATTRIBUTE_UNUSED = REGNO_REG_CLASS (dest_regno); unsigned int best_size = 0; @@ -646,11 +649,16 @@ { int bad = 0; int good = 0; - for (regno = 0; regno < FIRST_PSEUDO_REGISTER - n && ! bad; regno++) - if (TEST_HARD_REG_BIT (reg_class_contents[class], regno)) + unsigned int nregs; + for (regno = 0, nregs = hard_regno_nregs[regno][inner]; + regno + nregs - 1 < FIRST_PSEUDO_REGISTER && ! bad; + regno++, nregs = hard_regno_nregs[regno][inner]) + if (TEST_HARD_REG_BIT (reg_class_contents[class], regno) + && TEST_HARD_REG_BIT (reg_class_contents[class], regno + nregs - 1)) { if (HARD_REGNO_MODE_OK (regno, inner)) { + n = subreg_regno_offset (regno, inner, offset, outer2); good = 1; if (! TEST_HARD_REG_BIT (reg_class_contents[class], regno + n) || ! HARD_REGNO_MODE_OK (regno + n, outer)) @@ -1055,11 +1066,8 @@ if (REG_P (SUBREG_REG (in))) in_class = find_valid_class (inmode, GET_MODE (SUBREG_REG (in)), - subreg_regno_offset (REGNO (SUBREG_REG (in)), - GET_MODE (SUBREG_REG (in)), - SUBREG_BYTE (in), - GET_MODE (in)), - REGNO (SUBREG_REG (in))); + SUBREG_BYTE (in), REGNO (SUBREG_REG (in)), + GET_MODE (in)); /* This relies on the fact that emit_reload_insns outputs the instructions for input reloads of type RELOAD_OTHER in the same @@ -1149,11 +1157,8 @@ push_reload (SUBREG_REG (out), SUBREG_REG (out), &SUBREG_REG (out), &SUBREG_REG (out), find_valid_class (outmode, GET_MODE (SUBREG_REG (out)), - subreg_regno_offset (REGNO (SUBREG_REG (out)), - GET_MODE (SUBREG_REG (out)), - SUBREG_BYTE (out), - GET_MODE (out)), - REGNO (SUBREG_REG (out))), + SUBREG_BYTE (out), + REGNO (SUBREG_REG (out)), GET_MODE (out)), VOIDmode, VOIDmode, 0, 0, opnum, RELOAD_OTHER); } Comments will be appreciated. It would also be very interesting to see how this works on the m32c, in particular. Best regards, Rask Ingemann Lambertsen