The lower-subreg patch that Richard Henderson posted, and that comes up again and again from time to time, may also help. It does require a bit of hacking in the MDs (mostly removing the DImode patterns for logical operations since the middle-end is able to synthesize them on its own).

Thanks for the information.  I'll look at this.

Here is an updated patch; the code is also cleaned up a bit to comply better with the GCC coding standards.

The big TODO item there, is that the pass has "a bald-faced assumption that [every] subreg [of a multi-word reg] is actually inside an operand, and is thus replacable. This might be false if the target plays games with subregs in the patterns. Perhaps a better approach is to mirror what regrename does wrt recognizing the insn, iterating over the operands, smashing the operands out and iterating over the resulting pattern." Note that regrename as far as I understand does *much* more than what this pass should do.

Paolo
Index: Makefile.in
===================================================================
--- Makefile.in (revision 108713)
+++ Makefile.in (working copy)
@@ -972,7 +978,7 @@ OBJS-common = \
  insn-extract.o insn-opinit.o insn-output.o insn-peep.o insn-recog.o      \
  integrate.o intl.o jump.o  langhooks.o lcm.o lists.o local-alloc.o       \
  loop.o mode-switching.o modulo-sched.o optabs.o options.o opts.o         \
- params.o postreload.o postreload-gcse.o predict.o                        \
+ params.o postreload.o postreload-gcse.o predict.o lower-subreg.o         \
  insn-preds.o pointer-set.o                                               \
  print-rtl.o print-tree.o profile.o value-prof.o var-tracking.o                
   \
  real.o recog.o reg-stack.o regclass.o regmove.o regrename.o              \
@@ -1722,6 +1767,8 @@ langhooks.o : langhooks.c $(CONFIG_H) $(
    $(TREE_H) toplev.h tree-inline.h $(RTL_H) insn-config.h $(INTEGRATE_H) \
    langhooks.h $(LANGHOOKS_DEF_H) $(FLAGS_H) $(GGC_H) $(DIAGNOSTIC_H) intl.h \
    $(TREE_GIMPLE_H)
+lower-subreg.o : lower-subreg.c $(CONFIG_H) $(SYSTEM_H) coretypes.h \
+   $(MACHMODE_H) $(RTL_H) bitmap.h
 tree.o : tree.c $(CONFIG_H) $(SYSTEM_H) coretypes.h $(TM_H) $(TREE_H) \
    $(FLAGS_H) function.h $(PARAMS_H) \
    toplev.h $(GGC_H) $(HASHTAB_H) $(TARGET_H) output.h $(TM_P_H) langhooks.h \
Index: dwarf2out.c
===================================================================
--- dwarf2out.c (revision 108713)
+++ dwarf2out.c (working copy)
@@ -8892,6 +8892,31 @@ concat_loc_descriptor (rtx x0, rtx x1)
   return cc_loc_result;
 }
 
+/* Return a descriptor that describes the concatenation of N locations.  */
+
+static dw_loc_descr_ref
+concatn_loc_descriptor (rtx concatn)
+{
+  dw_loc_descr_ref cc_loc_result = NULL;
+  unsigned int i, n = XVECLEN (concatn, 0);
+
+  for (i = 0; i < n; ++i)
+    {
+      dw_loc_descr_ref ref;
+      rtx x = XVECEXP (concatn, 0, i);
+
+      ref = loc_descriptor (x);
+      if (ref == NULL)
+       return NULL;
+
+      add_loc_descr (&cc_loc_result, ref);
+      ref = new_loc_descr (DW_OP_piece, GET_MODE_SIZE (GET_MODE (x)), 0);
+      add_loc_descr (&cc_loc_result, ref);
+    }
+
+  return cc_loc_result;
+}
+
 /* Output a proper Dwarf location descriptor for a variable or parameter
    which is either allocated in a register or in a memory location.  For a
    register, we just generate an OP_REG and the register number.  For a
@@ -8929,6 +8954,10 @@ loc_descriptor (rtx rtl)
       loc_result = concat_loc_descriptor (XEXP (rtl, 0), XEXP (rtl, 1));
       break;
 
+    case CONCATN:
+      loc_result = concatn_loc_descriptor (rtl);
+      break;
+
     case VAR_LOCATION:
       /* Single part.  */
       if (GET_CODE (XEXP (rtl, 1)) != PARALLEL)
Index: emit-rtl.c
===================================================================
--- emit-rtl.c  (revision 108713)
+++ emit-rtl.c  (working copy)
@@ -846,13 +846,12 @@ gen_reg_rtx (enum machine_mode mode)
   return val;
 }
 
-/* Generate a register with same attributes as REG, but offsetted by OFFSET.
+/* Update NEW with same attributes as REG, but offsetted by OFFSET.
    Do the big endian correction if needed.  */
 
-rtx
-gen_rtx_REG_offset (rtx reg, enum machine_mode mode, unsigned int regno, int 
offset)
+static void
+update_reg_offset (rtx new, rtx reg, int offset)
 {
-  rtx new = gen_rtx_REG (mode, regno);
   tree decl;
   HOST_WIDE_INT var_size;
 
@@ -894,7 +893,7 @@ gen_rtx_REG_offset (rtx reg, enum machin
   if ((BYTES_BIG_ENDIAN || WORDS_BIG_ENDIAN)
       && decl != NULL
       && offset > 0
-      && GET_MODE_SIZE (GET_MODE (reg)) > GET_MODE_SIZE (mode)
+      && GET_MODE_SIZE (GET_MODE (reg)) > GET_MODE_SIZE (GET_MODE (new))
       && ((var_size = int_size_in_bytes (TREE_TYPE (decl))) > 0
          && var_size < GET_MODE_SIZE (GET_MODE (reg))))
     {
@@ -938,6 +937,27 @@ gen_rtx_REG_offset (rtx reg, enum machin
 
   REG_ATTRS (new) = get_reg_attrs (REG_EXPR (reg),
                                   REG_OFFSET (reg) + offset);
+}
+
+/* Generate a register with same attributes as REG, but offsetted by OFFSET. */
+
+rtx
+gen_rtx_REG_offset (rtx reg, enum machine_mode mode,
+                   unsigned int regno, int offset)
+{
+  rtx new = gen_rtx_REG (mode, regno);
+  update_reg_offset (new, reg, offset);
+  return new;
+}
+
+/* Generate a new pseudo register with same attributes as REG, but
+   offsetted by OFFSET.  */
+
+rtx
+gen_reg_rtx_offset (rtx reg, enum machine_mode mode, int offset)
+{
+  rtx new = gen_reg_rtx (mode);
+  update_reg_offset (new, reg, offset);
   return new;
 }
 
@@ -1187,8 +1207,9 @@ gen_lowpart_common (enum machine_mode mo
        return gen_rtx_fmt_e (GET_CODE (x), mode, XEXP (x, 0));
     }
   else if (GET_CODE (x) == SUBREG || REG_P (x)
-          || GET_CODE (x) == CONCAT || GET_CODE (x) == CONST_VECTOR
-          || GET_CODE (x) == CONST_DOUBLE || GET_CODE (x) == CONST_INT)
+          || GET_CODE (x) == CONCAT || GET_CODE (x) == CONCATN
+          || GET_CODE (x) == CONST_VECTOR || GET_CODE (x) == CONST_DOUBLE
+          || GET_CODE (x) == CONST_INT)
     return simplify_gen_subreg (mode, x, innermode, offset);
 
   /* Otherwise, we can't do this.  */
Index: loop.c
===================================================================
--- loop.c      (revision 108713)
+++ loop.c      (working copy)
@@ -10662,7 +10662,8 @@ loop_regs_scan (const struct loop *loop,
   /* Don't try to move insns which set CC registers if we should not
      create CCmode register copies.  */
   for (i = regs->num - 1; i >= FIRST_PSEUDO_REGISTER; i--)
-    if (GET_MODE_CLASS (GET_MODE (regno_reg_rtx[i])) == MODE_CC)
+    if (regno_reg_rtx[i]
+       && GET_MODE_CLASS (GET_MODE (regno_reg_rtx[i])) == MODE_CC)
       regs->array[i].may_not_optimize = 1;
 #endif
 
Index: lower-subreg.c
===================================================================
--- lower-subreg.c      (revision 0)
+++ lower-subreg.c      (revision 0)
@@ -0,0 +1,681 @@
+/* Decompose multiword subregs.
+   Copyright (C) 2006 Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free
+Software Foundation; either version 2, or (at your option) any later
+version.
+
+GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING.  If not, write to the Free
+Software Foundation, 59 Temple Place - Suite 330, Boston, MA
+02111-1307, USA.  */
+
+
+#include "config.h"
+#include "system.h"
+#include "coretypes.h"
+#include "machmode.h"
+#include "tm.h"
+#include "rtl.h"
+#include "function.h"
+#include "expr.h"
+#include "obstack.h"
+#include "bitmap.h"
+#include "tree-pass.h"
+#include "tm_p.h"
+#include "basic-block.h"
+#include "timevar.h"
+
+
+#ifdef STACK_GROWS_DOWNWARD
+# undef STACK_GROWS_DOWNWARD
+# define STACK_GROWS_DOWNWARD 1
+#else
+# define STACK_GROWS_DOWNWARD 0
+#endif
+
+
+DEF_VEC_P(bitmap);
+DEF_VEC_ALLOC_P(bitmap,heap);
+
+/* Bit N set if regno N is used in a context in which we can decompose it.  */
+static bitmap decomposable_context;
+
+/* Bit N set if regno N is used in a context in which it cannot
+   be decomposed.  */
+static bitmap non_decomposable_context;
+
+/* Bit N in element M set if there exists a copy from reg M to reg N.  */
+static VEC(bitmap,heap) *reg_copy_graph;
+
+
+/* If INSN is a single set between two objects, return the SET.  Such
+   insns can always be decomposed.  */
+
+static rtx
+simple_move (rtx insn)
+{
+  rtx x, set = single_set (insn);
+
+  if (!set)
+    return NULL;
+
+  x = SET_DEST (set);
+  if (!OBJECT_P (x) && GET_CODE (x) != SUBREG)
+    return NULL;
+  if (MEM_P (x) && MEM_VOLATILE_P (x))
+    return NULL;
+  /* (set cc0 reg) is a comparison instruction, not a move.  */
+  if (CC0_P (x))
+    return NULL;
+
+  x = SET_SRC (set);
+  if (!OBJECT_P (x) && GET_CODE (x) != SUBREG)
+    return NULL;
+  if (MEM_P (x) && MEM_VOLATILE_P (x))
+    return NULL;
+
+  return set;
+}
+
+/* Given a SET rtx, if it is a copy between pseudos mark it as such into
+   reg_copy_graph.  */
+
+static void
+mark_pseudo_copy (rtx set)
+{
+  rtx dst = SET_DEST (set);
+  rtx src = SET_SRC (set);
+  unsigned int rd, rs;
+  bitmap b;
+
+  if (!REG_P (dst) || !REG_P (src))
+    return;
+
+  rd = REGNO (dst);
+  rs = REGNO (src);
+  if (HARD_REGISTER_NUM_P (rd) || HARD_REGISTER_NUM_P (rs))
+    return;
+
+  if (GET_MODE_SIZE (GET_MODE (dst)) < UNITS_PER_WORD)
+    return;
+
+  b = VEC_index (bitmap, reg_copy_graph, rs);
+  if (b == NULL)
+    {
+      b = BITMAP_ALLOC (NULL);
+      VEC_replace (bitmap, reg_copy_graph, rs, b);
+    }
+  bitmap_set_bit (b, rd);
+}
+
+/* Using reg_copy_graph, set decomposable_context for pseudos copied
+   from pseudos that are also decomposable.  */
+
+static void
+propagate_pseudo_copies (void)
+{
+  bitmap queue, propagate;
+
+  queue = BITMAP_ALLOC (NULL);
+  propagate = BITMAP_ALLOC (NULL);
+
+  bitmap_copy (queue, decomposable_context);
+  do
+    {
+      bitmap_iterator iter;
+      unsigned int i;
+
+      bitmap_clear (propagate);
+      EXECUTE_IF_SET_IN_BITMAP (queue, 0, i, iter)
+       {
+         bitmap b = VEC_index (bitmap, reg_copy_graph, i);
+         if (b)
+           bitmap_ior_and_compl_into (propagate, b, non_decomposable_context);
+       }
+
+      bitmap_and_compl (queue, propagate, decomposable_context);
+      bitmap_ior_into (decomposable_context, propagate);
+    }
+  while (!bitmap_empty_p (queue));
+
+  BITMAP_FREE (queue);
+  BITMAP_FREE (propagate);
+}
+
+/* Called via for_each_rtx.  Examine the given expression and set bits as
+   appropriate in decomposable_context and non_decomposable_context.  SM
+   is the result of simple_move for the complete insn.  */
+
+static int
+find_decomposable_subregs (rtx *px, void *sm)
+{
+  rtx x = *px, inner;
+  unsigned int inner_size, outer_size;
+  unsigned int inner_words, outer_words;
+  unsigned int regno;
+
+  switch (GET_CODE (x))
+    {
+    case SUBREG:
+      /* Ensure we're not looking at something other than a subreg of a
+        pseudo register.  One might hope these tests never fail, since
+        that would indicate someone not using simplify_gen_subreg or some
+        related interface, but that no doubt happens all too often.  */
+      inner = SUBREG_REG (x);
+      if (!REG_P (inner))
+       break;
+
+      regno = REGNO (inner);
+      if (HARD_REGISTER_NUM_P (regno))
+       return -1;
+
+      /* Compute the number of words covered by the subreg and the reg.  */
+      outer_size = GET_MODE_SIZE (GET_MODE (x));
+      inner_size = GET_MODE_SIZE (GET_MODE (inner));
+      outer_words = (outer_size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
+      inner_words = (inner_size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
+
+      /* If we've got a single-word subreg of a multi-word reg, then this
+        should be a candidate for decomposition.  Return -1 so that we
+        don't iterate over the inner register and decide it is used in a
+        context we can't decompose.
+
+        This doesn't allow e.g. DImode subregs of TImode values on
+        32-bit targets.  We'd need to record the way in which the pseudo
+        is used, and only decompose if all uses were with the same number
+        of pieces.  Hopefully this doesn't happen with any frequency.  */
+
+      /* ??? This is a bald-faced assumption that the subreg is actually
+        inside an operand, and is thus replacable.  This might be false
+        if the target plays games with subregs in the patterns.  Perhaps
+        a better approach is to mirror what regrename does wrt recognizing
+        the insn, iterating over the operands, smashing the operands out
+        and iterating over the resulting pattern.  */
+      if (outer_words == 1 && inner_words > 1)
+       {
+         bitmap_set_bit (decomposable_context, regno);
+         return -1;
+       }
+      break;
+
+    case REG:
+      /* Since we see outer subregs and avoid iterating over inner registers
+        when we can handle the decomposition, that means that anywhere else
+        we come across the register must be a place we can't decompose it.
+        Avoid setting the bit for single-word pseudos to keep down the size
+        of the bitmap.
+
+        Simple moves will be handled in propagate_pseudo_copies, so do not
+        set the bit.  */
+      regno = REGNO (x);
+      if (!HARD_REGISTER_NUM_P (regno) && !sm
+         && GET_MODE_SIZE (GET_MODE (x)) > UNITS_PER_WORD)
+       bitmap_set_bit (non_decomposable_context, regno);
+      break;
+
+    default:
+      break;
+    }
+
+  return 0;
+}
+
+/* Decompose pseudo REGNO into word-sized components.  We smash the REG
+   node in place.  This ensures that (1) something goes wrong quickly if
+   we fail to find a place in which we ought to be performing some 
+   replacement, and (2) the debug information inside the symbol table is
+   automatically kept up to date.  */
+
+static void
+decompose_register (unsigned int regno)
+{
+  unsigned int words, i;
+  rtvec v;
+  rtx reg;
+
+  reg = regno_reg_rtx[regno];
+  regno_reg_rtx[regno] = NULL;
+
+  words = GET_MODE_SIZE (GET_MODE (reg));
+  words = (words + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
+
+  /* TODO: We may want to create a CONCAT instead of the CONCATN for
+     two-word registers.  */
+  if (dump_file)
+    fprintf (dump_file, "; Splitting reg %u ->", REGNO (reg));
+
+  PUT_CODE (reg, CONCATN);
+  XVEC (reg, 0) = v = rtvec_alloc (words);
+
+  for (i = 0; i < words; ++i)
+  RTVEC_ELT (v, i)
+    = gen_reg_rtx_offset (reg, word_mode, i * UNITS_PER_WORD);
+
+  if (dump_file)
+    {
+      for (i = 0; i < words; ++i)
+       fprintf (dump_file, " %u", REGNO (XVECEXP (reg, 0, i)));
+      fputc ('\n', dump_file);
+    }
+}
+
+/* Return true if X is an rtx which we produced out of a multi-word
+   pseudo in decompose_register.  */
+static inline bool
+decomposed_reg_p (rtx x)
+{
+  return GET_CODE (x) == CONCAT || GET_CODE (x) == CONCATN;
+}
+
+/* Return true if X is a subreg of an rtx which we produced out of a multi-word
+   pseudo in decompose_register.  */
+static inline bool
+decomposed_subreg_p (rtx x)
+{
+  return GET_CODE (x) == SUBREG && decomposed_reg_p (SUBREG_REG (x));
+}
+
+/* Replace all uses of a decomposed multi-word pseudo with the appropriate
+   REG.  Return 1 if a REG is found for such a pseudo, that is not wrapped
+   within a SUBREG.  */
+
+static int
+replace_subreg_use (rtx *px, void *data ATTRIBUTE_UNUSED)
+{
+  rtx x = *px;
+
+  if (x == NULL)
+    return 0;
+
+  /* If this is a (subreg (concat)) pattern, then it must be something that
+     we created via decompose_register.  */
+  if (decomposed_subreg_p (x))
+    {
+      /* This must be resolvable.  */
+      *px = simplify_subreg (GET_MODE (x), SUBREG_REG (x),
+                            GET_MODE (SUBREG_REG (x)), SUBREG_BYTE (x));
+      gcc_assert (*px != NULL);
+      return -1;
+    }
+
+  if (decomposed_reg_p (x))
+    return 1;
+
+  return 0;
+}
+
+/* Move a REG_LIBCALL note from OLD_START to NEW_START.  Update the
+   corresponding REG_RETVAL note to mark the new start of the libcall.  */
+
+static void
+move_libcall_note (rtx old_start, rtx new_start)
+{
+  rtx note0, note1, end;
+
+  note0 = find_reg_note (old_start, REG_LIBCALL, NULL);
+  if (note0 == NULL)
+    return;
+
+  remove_note (old_start, note0);
+  end = XEXP (note0, 0);
+  note1 = find_reg_note (end, REG_RETVAL, NULL);
+
+  XEXP (note0, 1) = REG_NOTES (new_start);
+  REG_NOTES (new_start) = note0;
+  XEXP (note1, 0) = new_start;
+}
+
+/* Remove the REG_RETVAL note associated to INSN (if present), and any linked
+   REG_LIBCALL and REG_NO_CONFLICT notes.  */
+
+static void
+remove_libcall_notes (rtx insn1)
+{
+  rtx note, note0, insn0, note1, insn;
+
+  note1 = find_reg_note (insn1, REG_RETVAL, NULL);
+  if (note1 == NULL)
+    return;
+
+  insn0 = XEXP (note1, 0);
+  note0 = find_reg_note (insn0, REG_LIBCALL, NULL);
+
+  remove_note (insn0, note0);
+  remove_note (insn1, note1);
+
+  for (insn = insn0; insn != insn1; insn = NEXT_INSN (insn))
+    while ((note = find_reg_note (insn, REG_NO_CONFLICT, NULL)))
+      remove_note (insn, note);
+}
+
+/* Remove mentions of decomposed multi-word pseudos from INSN.  This
+   includes REG_EQUAL/REG_EQUIV notes, REG_NO_CONFLICT notes, and
+   notes that compose a libcall block ending at INSN.  */
+
+static void
+remove_reg_notes_for_decomposed_regs (rtx insn)
+{
+  rtx *pnote, note;
+
+  note = find_reg_equal_equiv_note (insn);
+  if (note && for_each_rtx (&XEXP (note, 0), replace_subreg_use, NULL))
+    {
+      remove_note (insn, note);
+      remove_libcall_notes (insn);
+    }
+
+  pnote = &REG_NOTES (insn);
+  while ((note = *pnote))
+    {
+      bool delete = false;
+
+      switch (REG_NOTE_KIND (note))
+       {
+       case REG_NO_CONFLICT:
+         if (decomposed_reg_p (XEXP (note, 0)))
+           delete = true;
+         break;
+
+       default:
+         break;
+       }
+
+      if (delete)
+       *pnote = XEXP (note, 1);
+      else
+       pnote = &XEXP (note, 1);
+    }
+}
+
+/* Return whether X is a non-decomposable pseudo, or a hard register
+   which we cannot access in word_mode.  */
+
+static bool
+cannot_decompose_p (rtx x)
+{
+  if (REG_P (x))
+    {
+      unsigned int regno = REGNO (x);
+      if (HARD_REGISTER_NUM_P (regno))
+       return !validate_subreg (word_mode, GET_MODE (x), x, UNITS_PER_WORD);
+      else
+       return bitmap_bit_p (non_decomposable_context, regno);
+    }
+  return false;
+}
+
+/* Try to replace SET (a simple_move appearing in INSN) with a sequence of
+   word-wide moves.  Return INSN if this could not be done, return the first
+   move if this succeeded.  */
+
+static rtx
+replace_subregs_in_simple_move (rtx set, rtx insn)
+{
+  rtx dst, src, tmp;
+  bool must_decompose_dest, must_decompose_src, delete;
+  unsigned int i, words;
+  enum machine_mode orig_mode;
+
+  dst = SET_DEST (set);
+  src = SET_SRC (set);
+  orig_mode = GET_MODE (dst);
+
+  if (GET_CODE (dst) == SUBREG)
+    must_decompose_dest = decomposed_subreg_p (dst)
+                         && GET_MODE_SIZE (orig_mode) > UNITS_PER_WORD;
+  else
+    must_decompose_dest = decomposed_reg_p (dst);
+
+  if (GET_CODE (src) == SUBREG)
+    must_decompose_src = decomposed_subreg_p (src)
+                        && GET_MODE_SIZE (orig_mode) > UNITS_PER_WORD;
+  else
+    must_decompose_src = decomposed_reg_p (src);
+
+  if (!must_decompose_dest && !must_decompose_src)
+    return insn;
+
+  start_sequence ();
+
+  delete = true;
+
+  if (must_decompose_src && GET_CODE (src) == SUBREG)
+    {
+      /* Moving from a multi-word SUBREG.  First decompose a copy into
+         a pseudo of the right size, then copy that into DEST.  */
+      tmp = SUBREG_REG (src);
+      orig_mode = GET_MODE (tmp);
+      dst = gen_reg_rtx (orig_mode);
+      SUBREG_REG (src) = dst;
+      src = tmp;
+      delete = false;
+    }
+  else if (!must_decompose_src && cannot_decompose_p (src))
+    {
+      /* Moving from a source register that we cannot decompose.  First copy
+         into a same sized pseudo, and decompose the copy from that pseudo.  */
+      tmp = gen_reg_rtx (orig_mode);
+      emit_move_insn (tmp, src);
+      src = tmp;
+    }
+
+  if (must_decompose_dest && GET_CODE (dst) == SUBREG)
+    {
+      /* ??? Moving into a multi-word SUBREG.  */
+      tmp = SUBREG_REG (dst);
+      orig_mode = GET_MODE (tmp);
+      SUBREG_REG (dst) = gen_reg_rtx (orig_mode);
+      emit_move_insn (dst, src);
+      src = SUBREG_REG (dst);
+      dst = tmp;
+    }
+  else if (!must_decompose_dest && cannot_decompose_p (dst))
+    {
+      /* Moving into a destination register that we cannot decompose.
+         First decompose a copy into a same sized pseudo, then copy that.  */
+      dst = gen_reg_rtx (orig_mode);
+      SET_SRC (set) = dst;
+      delete = false;
+    }
+
+  words = (GET_MODE_SIZE (orig_mode) + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
+
+  /* Either synthesize many pushes, or many copies.  */
+  if (push_operand (dst, orig_mode))
+    {
+      unsigned int j, jinc;
+
+      gcc_assert (GET_MODE_SIZE (orig_mode) % UNITS_PER_WORD == 0);
+      gcc_assert (GET_CODE (XEXP (dst, 0)) != PRE_MODIFY);
+      gcc_assert (GET_CODE (XEXP (dst, 0)) != POST_MODIFY);
+
+      if (WORDS_BIG_ENDIAN == STACK_GROWS_DOWNWARD)
+       j = 0, jinc = 1;
+      else
+       j = words - 1, jinc = -1;
+
+      for (i = 0; i < words; i++, j += jinc)
+       {
+         tmp = copy_rtx (XEXP (dst, 0));
+         tmp = adjust_automodify_address_nv (dst, word_mode, tmp,
+                                             j * UNITS_PER_WORD);
+         emit_move_insn (tmp, simplify_subreg (word_mode, src, orig_mode,
+                                               j * UNITS_PER_WORD));
+       }
+    }
+  else
+    {
+      gcc_assert (!MEM_P (dst)
+                 || GET_RTX_CLASS (GET_CODE (XEXP (dst, 0))) != RTX_AUTOINC);
+      gcc_assert (!MEM_P (src)
+                 || GET_RTX_CLASS (GET_CODE (XEXP (src, 0))) != RTX_AUTOINC);
+
+      if (REG_P (dst) && !HARD_REGISTER_NUM_P (REGNO (dst)))
+       emit_insn (gen_rtx_CLOBBER (VOIDmode, dst));
+
+      for (i = 0; i < words; ++i)
+       emit_move_insn (simplify_gen_subreg (word_mode, dst, orig_mode,
+                                            UNITS_PER_WORD * i),
+                       simplify_gen_subreg (word_mode, src, orig_mode,
+                                            UNITS_PER_WORD * i));
+    }
+
+  tmp = get_insns ();
+  end_sequence ();
+
+  emit_insn_before (tmp, insn);
+  if (delete)
+    {
+      move_libcall_note (insn, tmp);
+      remove_libcall_notes (insn);
+      delete_insn (insn);
+    }
+
+  return tmp;
+}
+
+/* If the clobbered item is a pseudo that was decomposed, replace the
+   clobber with one clobber for each part we created out of the pseudo.  */
+static void
+replace_subregs_in_clobber (rtx pat, rtx insn)
+{
+  rtx reg = XEXP (pat, 0);
+  unsigned int words, i;
+  enum machine_mode orig_mode;
+
+  if (!decomposed_reg_p (reg))
+    return;
+
+  orig_mode = GET_MODE (reg);
+  words = GET_MODE_SIZE (orig_mode);
+  words = (words + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
+
+  XEXP (pat, 0) = simplify_subreg (word_mode, reg, orig_mode, 0);
+  for (i = words - 1; i > 0; --i)
+    {
+      pat = simplify_subreg (word_mode, reg, orig_mode, i * UNITS_PER_WORD);
+      pat = gen_rtx_CLOBBER (VOIDmode, pat);
+      emit_insn_after (pat, insn);
+    }
+}
+
+/* If the clobbered item is a pseudo that was decomposed, delete the
+   USE.  It should belonged to a libcall note, and we delete those.  */
+static void
+replace_subregs_in_use (rtx pat, rtx insn)
+{
+  if (decomposed_subreg_p (XEXP (pat, 0)))
+    delete_insn (insn);
+}
+
+/* Find every multi-word register that we only refer to a word at a time,
+   and decompose it into multiple word-sized pseudos.  */
+
+static void
+decompose_multiword_subregs (void)
+{
+  rtx insn, set;
+  basic_block bb;
+
+  decomposable_context = BITMAP_ALLOC (NULL);
+  non_decomposable_context = BITMAP_ALLOC (NULL);
+
+  {
+    unsigned int max = max_reg_num ();
+    reg_copy_graph = VEC_alloc (bitmap, heap, max);
+    VEC_safe_grow (bitmap, heap, reg_copy_graph, max);
+    memset (VEC_address (bitmap, reg_copy_graph), 0, sizeof (bitmap) * max);
+  }
+
+  FOR_EACH_BB (bb)
+    FOR_BB_INSNS (bb, insn)
+      if (INSN_P (insn)
+          && GET_CODE (PATTERN (insn)) != CLOBBER
+          && GET_CODE (PATTERN (insn)) != USE)
+      {
+       set = simple_move (insn);
+       if (set)
+         mark_pseudo_copy (set);
+        for_each_rtx (&PATTERN (insn), find_decomposable_subregs, set);
+      }
+
+  bitmap_and_compl_into (decomposable_context, non_decomposable_context);
+  if (!bitmap_empty_p (decomposable_context))
+    {
+      bitmap_iterator iter;
+      unsigned int regno;
+
+      propagate_pseudo_copies ();
+
+      EXECUTE_IF_SET_IN_BITMAP (decomposable_context, 0, regno, iter)
+       decompose_register (regno);
+
+      for (insn = get_insns (); insn ; insn = NEXT_INSN (insn))
+       {
+         rtx pat;
+
+         if (!INSN_P (insn))
+           continue;
+
+         pat = PATTERN (insn);
+         if (GET_CODE (pat) == CLOBBER)
+           replace_subregs_in_clobber (pat, insn);
+         else if (GET_CODE (pat) == USE)
+           replace_subregs_in_use (pat, insn);
+         else
+           {
+             set = simple_move (insn);
+             if (set)
+               insn = replace_subregs_in_simple_move (set, insn);
+             for_each_rtx (&PATTERN (insn), replace_subreg_use, NULL);
+             remove_reg_notes_for_decomposed_regs (insn);
+           }
+       }
+    }
+
+  BITMAP_FREE (decomposable_context);
+  BITMAP_FREE (non_decomposable_context);
+
+  {
+    unsigned int i;
+    bitmap b;
+    for (i = 0; VEC_iterate (bitmap, reg_copy_graph, i, b); ++i)
+      if (b)
+       BITMAP_FREE (b);
+  }
+  VEC_free (bitmap, heap, reg_copy_graph);
+}
+
+static bool
+gate_lower_subreg (void)
+{
+  return optimize > 0;
+}
+
+
+struct tree_opt_pass pass_subreg =
+{
+  "subreg",                             /* name */
+  gate_lower_subreg,                    /* gate */
+  decompose_multiword_subregs,          /* execute */
+  NULL,                                 /* sub */
+  NULL,                                 /* next */
+  0,                                    /* static_pass_number */
+  TV_LOWER_SUBREG,                      /* tv_id */
+  0,                                    /* properties_required */
+  0,                                    /* properties_provided */
+  0,                                    /* properties_destroyed */
+  0,                                    /* todo_flags_start */
+  TODO_dump_func | TODO_ggc_collect,    /* todo_flags_finish */
+  'u'                                   /* letter */
+};
+
Index: passes.c
===================================================================
--- passes.c    (revision 108713)
+++ passes.c    (working copy)
@@ -644,6 +694,7 @@ init_optimization_passes (void)
   NEXT_PASS (pass_unshare_all_rtl);
   NEXT_PASS (pass_instantiate_virtual_regs);
   NEXT_PASS (pass_jump2);
+  NEXT_PASS (pass_subreg);
   NEXT_PASS (pass_cse);
   NEXT_PASS (pass_gcse);
   NEXT_PASS (pass_loop_optimize);
Index: rtl.def
===================================================================
--- rtl.def     (revision 108713)
+++ rtl.def     (working copy)
@@ -377,10 +377,10 @@ DEF_RTL_EXPR(SUBREG, "subreg", "ei", RTX
 
 DEF_RTL_EXPR(STRICT_LOW_PART, "strict_low_part", "e", RTX_EXTRA)
 
-/* (CONCAT a b) represents the virtual concatenation of a and b
-   to make a value that has as many bits as a and b put together.
-   This is used for complex values.  Normally it appears only
-   in DECL_RTLs and during RTL generation, but not in the insn chain.  */
+/* (CONCAT a b) represents the virtual concatenation of a and b to make a
+   value that has as many bits as a and b put together.  This is used for,
+   among other things, complex values.  Normally it appears only in DECL_RTLs
+   and during RTL generation, but not in the insn chain.  */
 DEF_RTL_EXPR(CONCAT, "concat", "ee", RTX_OBJ)
 
 /* A memory location; operand is the address.  The second operand is the
@@ -407,6 +407,13 @@ DEF_RTL_EXPR(SYMBOL_REF, "symbol_ref", "
    pretend to be looking at the entire value and comparing it.  */
 DEF_RTL_EXPR(CC0, "cc0", "", RTX_OBJ)
 
+/* (CONCATN [a1 a2 .. an]) represents the virtual concatenation of all
+   An to make a value.  This is an extension of the CONCAT to larger 
+   numbers of components.  This is used for decomposing large values
+   into register sized components.  Like CONCAT, it should not appear
+   in the insn chain.  */
+DEF_RTL_EXPR (CONCATN, "concatn", "E", RTX_OBJ)
+
 /* ----------------------------------------------------------------------
    Expressions for operators in an rtl pattern
    ---------------------------------------------------------------------- */
Index: rtl.h
===================================================================
--- rtl.h       (revision 108713)
+++ rtl.h       (working copy)
@@ -1372,6 +1372,7 @@ extern int rtx_equal_p (rtx, rtx);
 extern rtvec gen_rtvec_v (int, rtx *);
 extern rtx gen_reg_rtx (enum machine_mode);
 extern rtx gen_rtx_REG_offset (rtx, enum machine_mode, unsigned int, int);
+extern rtx gen_reg_rtx_offset (rtx, enum machine_mode, int);
 extern rtx gen_label_rtx (void);
 extern rtx gen_lowpart_common (enum machine_mode, rtx);
 
Index: simplify-rtx.c
===================================================================
--- simplify-rtx.c      (revision 108713)
+++ simplify-rtx.c      (working copy)
@@ -3958,15 +3958,24 @@ simplify_subreg (enum machine_mode outer
       && GET_MODE_SIZE (outermode) <= GET_MODE_SIZE (GET_MODE (op)))
     return adjust_address_nv (op, outermode, byte);
 
-  /* Handle complex values represented as CONCAT
-     of real and imaginary part.  */
-  if (GET_CODE (op) == CONCAT)
+  /* Handle values represented as CONCAT.  */
+  if (GET_CODE (op) == CONCAT || GET_CODE (op) == CONCATN)
     {
       unsigned int inner_size, final_offset;
       rtx part, res;
 
-      inner_size = GET_MODE_UNIT_SIZE (innermode);
-      part = byte < inner_size ? XEXP (op, 0) : XEXP (op, 1);
+      if (GET_CODE (op) == CONCAT)
+       {
+          inner_size = GET_MODE_SIZE (innermode) / 2;
+         part = byte < inner_size ? XEXP (op, 0) : XEXP (op, 1);
+       }
+      else
+       {
+         /* ??? We've got room; perhaps we should store the inner size
+            of the CONCATN in one of the subsequent unused fields.  */
+         inner_size = GET_MODE_SIZE (innermode) / XVECLEN (op, 0);
+         part = XVECEXP (op, 0, byte / inner_size);
+       }
       final_offset = byte % inner_size;
       if (final_offset + GET_MODE_SIZE (outermode) > inner_size)
        return NULL_RTX;
Index: timevar.def
===================================================================
--- timevar.def (revision 108713)
+++ timevar.def (working copy)
@@ -124,6 +124,7 @@ DEFTIMEVAR (TV_OVERLOAD              , "
 DEFTIMEVAR (TV_TEMPLATE_INSTANTIATION, "template instantiation")
 DEFTIMEVAR (TV_EXPAND               , "expand")
 DEFTIMEVAR (TV_VARCONST              , "varconst")
+DEFTIMEVAR (TV_LOWER_SUBREG          , "lower subreg")
 DEFTIMEVAR (TV_JUMP                  , "jump")
 DEFTIMEVAR (TV_CSE                   , "CSE")
 DEFTIMEVAR (TV_LOOP                  , "loop analysis")

Reply via email to