On 08/04/2015 04:50 AM, Nathan Sidwell wrote:
> +/* Worker reduction address expander. */
> +static rtx
> +nvptx_expand_work_red_addr (tree exp, rtx target,
> + machine_mode ARG_UNUSED (mode),
> + int ignore)
> {
> - return nvptx_expand_lock_unlock (desc, exp, false);
> + if (ignore)
> + return target;
> +
> + rtx loop_id = expand_expr (CALL_EXPR_ARG (exp, 0),
> + NULL_RTX, mode, EXPAND_NORMAL);
> + rtx red_id = expand_expr (CALL_EXPR_ARG (exp, 1),
> + NULL_RTX, mode, EXPAND_NORMAL);
> + gcc_assert (GET_CODE (loop_id) == CONST_INT
> + && GET_CODE (red_id) == CONST_INT);
> + gcc_assert (REG_P (target));
> +
> + unsigned lid = (unsigned)UINTVAL (loop_id);
> + unsigned rid = (unsigned)UINTVAL (red_id);
> +
> + unsigned ix;
> +
> + for (ix = 0; ix != loop_reds.length (); ix++)
> + if (loop_reds[ix].id == lid)
> + goto found_lid;
> + /* Allocate a new loop. */
> + loop_reds.safe_push (loop_red (lid));
> + found_lid:
> + loop_red &loop = loop_reds[ix];
> + for (ix = 0; ix != loop.vars.length (); ix++)
> + if (loop.vars[ix].first == rid)
> + goto found_rid;
> +
> + /* Allocate a new var. */
> + {
> + tree type = TREE_TYPE (TREE_TYPE (exp));
> + enum machine_mode mode = TYPE_MODE (type);
> + unsigned align = GET_MODE_ALIGNMENT (mode) / BITS_PER_UNIT;
> + unsigned off = loop.hwm;
> +
> + if (align > worker_red_align)
> + worker_red_align = align;
> + off = (off + align - 1) & ~(align -1);
> + loop.hwm = off + GET_MODE_SIZE (mode);
> + loop.vars.safe_push (var_red_t (rid, off));
> + }
> + found_rid:
> +
> + /* Return offset into worker reduction array. */
> + unsigned offset = loop.vars[ix].second;
> +
> + rtx addr = gen_reg_rtx (Pmode);
> + emit_move_insn (addr,
> + gen_rtx_PLUS (Pmode, worker_red_sym, GEN_INT (offset)));
> + emit_insn (gen_rtx_SET (target,
> + gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr),
> + UNSPEC_FROM_SHARED)));
> + return target;
> }
Something is wrong over here. I'm seeing this ICE:
wred.c: In function ‘main._omp_fn.0’:
wred.c:9:9: error: unrecognizable insn:
#pragma acc parallel loop vector_length (32) num_workers (32) worker
reduction (+:red) copy (red)
^
(insn 28 27 29 2 (set (reg:DI 59)
(plus:DI (symbol_ref:DI ("__worker_red"))
(const_int 0 [0]))) wred.c:9 -1
(nil))
The attached patch fixes it by assigning worker_red_sym to a scratch
register. Is this OK gomp-4_0-branch?
Cesar
2015-08-06 Cesar Philippidis <[email protected]>
gcc/
* config/nvptx/nvptx.c (nvptx_expand_work_red_addr): Use a
scratch register for worker_red_sym.
diff --git a/gcc/config/nvptx/nvptx.c b/gcc/config/nvptx/nvptx.c
index e343e53..389e370 100644
--- a/gcc/config/nvptx/nvptx.c
+++ b/gcc/config/nvptx/nvptx.c
@@ -3415,10 +3415,12 @@ nvptx_expand_work_red_addr (tree exp, rtx target,
/* Return offset into worker reduction array. */
unsigned offset = loop.vars[ix].second;
-
+
+ rtx base = gen_reg_rtx (Pmode);
rtx addr = gen_reg_rtx (Pmode);
+ emit_insn (gen_rtx_SET (base, worker_red_sym));
emit_move_insn (addr,
- gen_rtx_PLUS (Pmode, worker_red_sym, GEN_INT (offset)));
+ gen_rtx_PLUS (Pmode, base, GEN_INT (offset)));
emit_insn (gen_rtx_SET (target,
gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr),
UNSPEC_FROM_SHARED)));