On 08/04/2015 04:50 AM, Nathan Sidwell wrote: > +/* Worker reduction address expander. */ > +static rtx > +nvptx_expand_work_red_addr (tree exp, rtx target, > + machine_mode ARG_UNUSED (mode), > + int ignore) > { > - return nvptx_expand_lock_unlock (desc, exp, false); > + if (ignore) > + return target; > + > + rtx loop_id = expand_expr (CALL_EXPR_ARG (exp, 0), > + NULL_RTX, mode, EXPAND_NORMAL); > + rtx red_id = expand_expr (CALL_EXPR_ARG (exp, 1), > + NULL_RTX, mode, EXPAND_NORMAL); > + gcc_assert (GET_CODE (loop_id) == CONST_INT > + && GET_CODE (red_id) == CONST_INT); > + gcc_assert (REG_P (target)); > + > + unsigned lid = (unsigned)UINTVAL (loop_id); > + unsigned rid = (unsigned)UINTVAL (red_id); > + > + unsigned ix; > + > + for (ix = 0; ix != loop_reds.length (); ix++) > + if (loop_reds[ix].id == lid) > + goto found_lid; > + /* Allocate a new loop. */ > + loop_reds.safe_push (loop_red (lid)); > + found_lid: > + loop_red &loop = loop_reds[ix]; > + for (ix = 0; ix != loop.vars.length (); ix++) > + if (loop.vars[ix].first == rid) > + goto found_rid; > + > + /* Allocate a new var. */ > + { > + tree type = TREE_TYPE (TREE_TYPE (exp)); > + enum machine_mode mode = TYPE_MODE (type); > + unsigned align = GET_MODE_ALIGNMENT (mode) / BITS_PER_UNIT; > + unsigned off = loop.hwm; > + > + if (align > worker_red_align) > + worker_red_align = align; > + off = (off + align - 1) & ~(align -1); > + loop.hwm = off + GET_MODE_SIZE (mode); > + loop.vars.safe_push (var_red_t (rid, off)); > + } > + found_rid: > + > + /* Return offset into worker reduction array. */ > + unsigned offset = loop.vars[ix].second; > + > + rtx addr = gen_reg_rtx (Pmode); > + emit_move_insn (addr, > + gen_rtx_PLUS (Pmode, worker_red_sym, GEN_INT (offset))); > + emit_insn (gen_rtx_SET (target, > + gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), > + UNSPEC_FROM_SHARED))); > + return target; > }
Something is wrong over here. I'm seeing this ICE: wred.c: In function ‘main._omp_fn.0’: wred.c:9:9: error: unrecognizable insn: #pragma acc parallel loop vector_length (32) num_workers (32) worker reduction (+:red) copy (red) ^ (insn 28 27 29 2 (set (reg:DI 59) (plus:DI (symbol_ref:DI ("__worker_red")) (const_int 0 [0]))) wred.c:9 -1 (nil)) The attached patch fixes it by assigning worker_red_sym to a scratch register. Is this OK gomp-4_0-branch? Cesar
2015-08-06 Cesar Philippidis <ce...@codesourcery.com> gcc/ * config/nvptx/nvptx.c (nvptx_expand_work_red_addr): Use a scratch register for worker_red_sym. diff --git a/gcc/config/nvptx/nvptx.c b/gcc/config/nvptx/nvptx.c index e343e53..389e370 100644 --- a/gcc/config/nvptx/nvptx.c +++ b/gcc/config/nvptx/nvptx.c @@ -3415,10 +3415,12 @@ nvptx_expand_work_red_addr (tree exp, rtx target, /* Return offset into worker reduction array. */ unsigned offset = loop.vars[ix].second; - + + rtx base = gen_reg_rtx (Pmode); rtx addr = gen_reg_rtx (Pmode); + emit_insn (gen_rtx_SET (base, worker_red_sym)); emit_move_insn (addr, - gen_rtx_PLUS (Pmode, worker_red_sym, GEN_INT (offset))); + gen_rtx_PLUS (Pmode, base, GEN_INT (offset))); emit_insn (gen_rtx_SET (target, gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_FROM_SHARED)));