I allowed me to CC Vladimir; maybe he can propose how the backend can describe an efficient, constraint-based solution. The problem is about expanders producing insns with non-fixed hard-regs as in/out operands or clobbers. This includes move insn from non-generic address spaces which require dedicated hard regs. Issue is about correctness and efficiency of generated code.

Am 10/24/2014 um 08:29 PM schrieb Jakub Jelinek:
On Fri, Oct 24, 2014 at 08:19:57PM +0200, Georg-Johann Lay wrote:
Yes, that's the straight forward approach which works so far.  Bit tedious,
but well...

In one case expmed generated different code, though: divmodhi instead of
mulhi_highpart for HI division by const_int.  Cheating with costs did not
help.  However for now I am mostly after correct, ICE-less code.

What I am concerned about is:

1) May it happen that a value lives in a hard-reg across the expander? The
expander has no means to detect that situation and interfere, e.g.

hard-reg = source_value // middle-end
expand-code             // back-end
sink_value = hard-reg   // middle-end

where "expand-code" is one defind_expand / define_insn that clobbers /
changes (parts of) hard-reg but does *not* get hard-reg as operand. This is
wrong code obviously.

It can happen, but if it happens, that would mean user code bug, like using
register asm with an register that is unsuitable for use as global or local
register var on the target, or it could be backend bug (expansion of some
pattern clobbering register that has other fixed uses).
You shouldn't ICE on it, but what happens is undefined.

Before RA, the use of hard regs should be limited (pretty much just fixed
regs where really necessary, global and local register variables (user needs
to use with care), function arguments and return values (short lived around
the call patterns).

        Jakub

FYI, the problem with using hard regs returned, now as PR65657 with move insns which use hard regs.

There is one simple and obvious solution : Don't use hard regs, but instead introduce respective constraints and let the register allocator do the job of allocating the hard regs.

I tried that and it works in principle (for non-moves), but the code generated by the register allocator is *bloated* beyond all recognition.

The use of hard regs in the avr BE is motivated by avoiding standard ABI calls for support functions. Many of these libgcc functions are written in assembly and have a much smaller footprint than ordinary ABI functions. The move insns mentioned above perform loading from a non-standard address-sppace which is too complicated to be expanded inline -- and even if expanded inline, the code will need specific hard registers in specific operands because the instruction set is dictating that.

Bottom line is: Using that simple and obvious hard-regs-by-constraint approach would lead to code that is not acceptable w.r.t its performance.

One only solution that might work without bloating the code as mad might be to perform the register selection by hand in a dedicated, pre-reload, target-specific pass as outlined in

https://gcc.gnu.org/ml/gcc-patches/2015-04/msg00823.html


Johann


Index: gcc-4_9-branch/gcc/config/avr/avr.md
===================================================================
--- gcc-4_9-branch/gcc/config/avr/avr.md	(revision 221321)
+++ gcc-4_9-branch/gcc/config/avr/avr.md	(working copy)
@@ -165,6 +165,13 @@ (define_attr "isa"
    standard"
   (const_string "standard"))
 
+
+(define_attr "fixregs"
+  "xload_A, xloadQI_A,
+   no"
+  (const_string "no"))
+
+
 (define_attr "enabled" ""
   (cond [(eq_attr "isa" "standard")
          (const_int 1)
@@ -494,9 +501,9 @@ (define_insn "load_<mode>_libgcc"
 ;; "xload8qi_A"
 ;; "xload8qq_A" "xload8uqq_A"
 (define_insn_and_split "xload8<mode>_A"
-  [(set (match_operand:ALL1 0 "register_operand" "=r")
-        (match_operand:ALL1 1 "memory_operand"    "m"))
-   (clobber (reg:HI REG_Z))]
+  [(set (match_operand:ALL1 0 "register_operand"  "=r")
+        (match_operand:ALL1 1 "memory_operand"     "m"))
+   (clobber (match_operand:HI 2 "scratch_operand" "=z"))] ;; HI 30
   "can_create_pseudo_p()
    && !avr_xload_libgcc_p (<MODE>mode)
    && avr_mem_memx_p (operands[1])
@@ -505,6 +512,8 @@ (define_insn_and_split "xload8<mode>_A"
   "&& 1"
   [(clobber (const_int 0))]
   {
+    gcc_assert (SCRATCH != GET_CODE (operands[2]));
+
     /* ; Split away the high part of the address.  GCC's register allocator
        ; in not able to allocate segment registers and reload the resulting
        ; expressions.  Notice that no address register can hold a PSImode.  */
@@ -520,7 +529,9 @@ (define_insn_and_split "xload8<mode>_A"
     set_mem_addr_space (SET_SRC (single_set (insn)),
                                  MEM_ADDR_SPACE (operands[1]));
     DONE;
-  })
+  }
+  [(set_attr "fixregs" "xloadQI_A")])
+
 
 ;; "xloadqi_A" "xloadqq_A" "xloaduqq_A"
 ;; "xloadhi_A" "xloadhq_A" "xloaduhq_A" "xloadha_A" "xloaduha_A"
@@ -530,9 +541,9 @@ (define_insn_and_split "xload8<mode>_A"
 (define_insn_and_split "xload<mode>_A"
   [(set (match_operand:MOVMODE 0 "register_operand" "=r")
         (match_operand:MOVMODE 1 "memory_operand"    "m"))
-   (clobber (reg:MOVMODE 22))
-   (clobber (reg:QI 21))
-   (clobber (reg:HI REG_Z))]
+   (clobber (match_operand:MOVMODE 2 "scratch_operand" "=r"))   ;; MOVMODE 22
+   (clobber (match_operand:QI 3      "scratch_operand" "=r"))   ;; QI 21
+   (clobber (match_operand:HI 4      "scratch_operand" "=z"))]  ;; HI 30 (Z)
   "can_create_pseudo_p()
    && avr_mem_memx_p (operands[1])
    && REG_P (XEXP (operands[1], 0))"
@@ -540,6 +551,10 @@ (define_insn_and_split "xload<mode>_A"
   "&& 1"
   [(clobber (const_int 0))]
   {
+    gcc_assert (SCRATCH != GET_CODE (operands[2]));
+    gcc_assert (SCRATCH != GET_CODE (operands[3]));
+    gcc_assert (SCRATCH != GET_CODE (operands[4]));
+
     rtx addr = XEXP (operands[1], 0);
     rtx reg_z = gen_rtx_REG (HImode, REG_Z);
     rtx addr_hi8 = simplify_gen_subreg (QImode, addr, PSImode, 2);
@@ -558,7 +573,9 @@ (define_insn_and_split "xload<mode>_A"
     emit_move_insn (operands[0], gen_rtx_REG (<MODE>mode, 22));
 
     DONE;
-  })
+  }
+  [(set_attr "fixregs" "xload_A")])
+
 
 ;; Move value from address space memx to a register
 ;; These insns must be prior to respective generic move insn.
@@ -638,10 +655,10 @@ (define_expand "mov<mode>"
         if (!avr_xload_libgcc_p (<MODE>mode))
           /* ; No <mode> here because gen_xload8<mode>_A only iterates over ALL1.
              ; insn-emit does not depend on the mode, it's all about operands.  */
-          emit_insn (gen_xload8qi_A (dest, src));
+          emit_insn (gen_xload8qi_A (dest, src, gen_rtx_SCRATCH (HImode)));
         else
-          emit_insn (gen_xload<mode>_A (dest, src));
-
+          emit_insn (gen_xload<mode>_A (dest, src, gen_rtx_SCRATCH (<MODE>mode),
+                                        gen_rtx_SCRATCH (QImode), gen_rtx_SCRATCH (HImode)));
         DONE;
       }
 
Index: gcc-4_9-branch/gcc/config/avr/avr.c
===================================================================
--- gcc-4_9-branch/gcc/config/avr/avr.c	(revision 221321)
+++ gcc-4_9-branch/gcc/config/avr/avr.c	(working copy)
@@ -287,6 +287,96 @@ avr_to_int_mode (rtx x)
 }
 
 
+static void
+avr_rest_of_handle_expand_xload (void)
+{
+  basic_block bb;
+
+  FOR_EACH_BB_FN (bb, cfun)
+    {
+      rtx insn, next;
+
+      FOR_BB_INSNS_SAFE (bb, insn, next)
+        {
+          enum attr_fixregs fixregs;
+
+          if (-1 == recog_memoized (insn)
+              || FIXREGS_NO == (fixregs = get_attr_fixregs (insn)))
+            {
+              continue;
+            }
+            
+          avr_edump ("%?: Must fix insn:\n  %r\n\n", insn);
+          if (dump_file)
+            avr_fdump (dump_file, ";; Fixing insn:\n  %r\n\n", insn);
+
+          extract_insn (insn);
+
+          switch (fixregs)
+            {
+            default:
+              gcc_unreachable();
+
+            case FIXREGS_XLOADQI_A:
+              if (dump_file)
+                avr_fdump (dump_file, "$2 = %r\n\n", recog_data.operand[2]);
+              // FIXME: Todo
+              break;
+
+            case FIXREGS_XLOAD_A:
+              if (dump_file)
+                {
+                  avr_fdump (dump_file, "$2 = %r\n", recog_data.operand[2]);
+                  avr_fdump (dump_file, "$3 = %r\n", recog_data.operand[3]);
+                  avr_fdump (dump_file, "$4 = %r\n\n", recog_data.operand[4]);
+                  // FIXME: Todo
+                }
+              break;
+            }
+        } // insn
+    }
+}
+
+
+static const pass_data avr_pass_data_expand_xload =
+{
+  RTL_PASS,       // type
+  "",             // name (will be patched)
+  OPTGROUP_NONE,  // optinfo_flags
+  false,          // has_gate
+  true,           // has_execute
+  TV_NONE,        // tv_id
+  0,              // properties_required
+  0,              // properties_provided
+  0,              // properties_destroyed
+  0,              // todo_flags_start
+  // todo_flags_finish
+  TODO_df_finish | TODO_verify_rtl_sharing | TODO_verify_flow
+};
+
+
+class avr_pass_expand_xload : public rtl_opt_pass
+{
+public:
+  avr_pass_expand_xload (gcc::context *ctxt, const char *name)
+    : rtl_opt_pass (avr_pass_data_expand_xload, ctxt)
+  {
+    this->name = name;
+  }
+
+  unsigned int execute (void)
+  {
+    df_lr_add_problem ();
+    df_live_add_problem ();
+    df_analyze ();
+
+    avr_rest_of_handle_expand_xload();
+
+    return 0;
+  }
+}; // avr_pass_recompute_notes
+
+
 static const pass_data avr_pass_data_recompute_notes =
 {
   RTL_PASS,       // type
@@ -326,6 +416,11 @@ public:
 static void
 avr_register_passes (void)
 {
+  /* This avr-specific fixed PR63633 for the case of mov insns.  */
+
+  register_pass (new avr_pass_expand_xload (g, "avr-expand-xload"),
+                 PASS_POS_INSERT_BEFORE, "split1", 1);
+
   /* This avr-specific pass (re)computes insn notes, in particular REG_DEAD
      notes which are used by `avr.c::reg_unused_after' and branch offset
      computations.  These notes must be correct, i.e. there must be no

Reply via email to