This is one of those things I don't know why we didn't notice sooner.
The patch ensures that unused lanes in masked vector loads are
zero-initialized, as per the internals manual.
This fixes an execution failure in testcase gfortran.dg/assumed_rank_1.f90.
When investigating the bug I got confused about the meaning of the
"gather<mode>_exec" define_expand, which doesn't quite fit the pattern
of the other "_exec" instructions. It's only used in one place so I've
inlined it to avoid future confusion. It also reduces the likelihood of
accidentally bypassing the zero-initialization in future.
I also needed a convenient way to create 0.0 vector constants without
uglifying the machine description code, so extending gcn_vec_constant
seemed like a useful place to do it.
Andrew
Zero-initialise masked load destinations
Fixes an execution failure in testcase gfortran.dg/assumed_rank_1.f90.
2020-01-30 Andrew Stubbs <a...@codesourcery.com>
gcc/
* config/gcn/gcn-valu.md (gather<mode>_exec): Move contents ...
(mask_gather_load<mode>): ... here, and zero-initialize the
destination.
(maskload<mode>di): Zero-initialize the destination.
* config/gcn/gcn.c:
diff --git a/gcc/config/gcn/gcn-valu.md b/gcc/config/gcn/gcn-valu.md
index 331c768cb88..4aad835b2ef 100644
--- a/gcc/config/gcn/gcn-valu.md
+++ b/gcc/config/gcn/gcn-valu.md
@@ -701,34 +701,6 @@
DONE;
})
-(define_expand "gather<mode>_exec"
- [(match_operand:VEC_ALLREG_MODE 0 "register_operand")
- (match_operand:DI 1 "register_operand")
- (match_operand:V64SI 2 "register_operand")
- (match_operand 3 "immediate_operand")
- (match_operand:SI 4 "gcn_alu_operand")
- (match_operand:DI 5 "gcn_exec_reg_operand")]
- ""
- {
- rtx undefmode = gcn_gen_undef (<MODE>mode);
-
- rtx addr = gcn_expand_scaled_offsets (DEFAULT_ADDR_SPACE, operands[1],
- operands[2], operands[4],
- INTVAL (operands[3]), operands[5]);
-
- if (GET_MODE (addr) == V64DImode)
- emit_insn (gen_gather<mode>_insn_1offset_exec (operands[0], addr,
- const0_rtx, const0_rtx,
- const0_rtx, undefmode,
- operands[5]));
- else
- emit_insn (gen_gather<mode>_insn_2offsets_exec (operands[0], operands[1],
- addr, const0_rtx,
- const0_rtx, const0_rtx,
- undefmode, operands[5]));
- DONE;
- })
-
; Allow any address expression
(define_expand "gather<mode>_expr<exec>"
[(set (match_operand:VEC_ALLREG_MODE 0 "register_operand")
@@ -2801,9 +2773,12 @@
(<MODE>mode, exec, operands[1], gen_rtx_SCRATCH (V64DImode));
rtx as = gen_rtx_CONST_INT (VOIDmode, MEM_ADDR_SPACE (operands[1]));
rtx v = gen_rtx_CONST_INT (VOIDmode, MEM_VOLATILE_P (operands[1]));
- rtx undef = gcn_gen_undef (<MODE>mode);
- emit_insn (gen_gather<mode>_expr_exec (operands[0], addr, as, v, undef,
- exec));
+
+ /* Masked lanes are required to hold zero. */
+ emit_move_insn (operands[0], gcn_vec_constant (<MODE>mode, 0));
+
+ emit_insn (gen_gather<mode>_expr_exec (operands[0], addr, as, v,
+ operands[0], exec));
DONE;
})
@@ -2843,8 +2818,23 @@
operands[2] = tmp;
}
- emit_insn (gen_gather<mode>_exec (operands[0], operands[1], operands[2],
- operands[3], operands[4], exec));
+ rtx addr = gcn_expand_scaled_offsets (DEFAULT_ADDR_SPACE, operands[1],
+ operands[2], operands[4],
+ INTVAL (operands[3]), exec);
+
+ /* Masked lanes are required to hold zero. */
+ emit_move_insn (operands[0], gcn_vec_constant (<MODE>mode, 0));
+
+ if (GET_MODE (addr) == V64DImode)
+ emit_insn (gen_gather<mode>_insn_1offset_exec (operands[0], addr,
+ const0_rtx, const0_rtx,
+ const0_rtx, operands[0],
+ exec));
+ else
+ emit_insn (gen_gather<mode>_insn_2offsets_exec (operands[0], operands[1],
+ addr, const0_rtx,
+ const0_rtx, const0_rtx,
+ operands[0], exec));
DONE;
})
diff --git a/gcc/config/gcn/gcn.c b/gcc/config/gcn/gcn.c
index a39e9f3fbd6..16c3aa2567e 100644
--- a/gcc/config/gcn/gcn.c
+++ b/gcc/config/gcn/gcn.c
@@ -992,9 +992,19 @@ gcn_vec_constant (machine_mode mode, int a)
return CONST2_RTX (mode);*/
int units = GET_MODE_NUNITS (mode);
- rtx tem = gen_int_mode (a, GET_MODE_INNER (mode));
- rtvec v = rtvec_alloc (units);
+ machine_mode innermode = GET_MODE_INNER (mode);
+
+ rtx tem;
+ if (FLOAT_MODE_P (innermode))
+ {
+ REAL_VALUE_TYPE rv;
+ real_from_integer (&rv, NULL, a, SIGNED);
+ tem = const_double_from_real_value (rv, innermode);
+ }
+ else
+ tem = gen_int_mode (a, innermode);
+ rtvec v = rtvec_alloc (units);
for (int i = 0; i < units; ++i)
RTVEC_ELT (v, i) = tem;