This patch merges my most recent sequence of ptx backend changes to the gomp4
branch.
nathan
2016-01-05 Nathan Sidwell <nat...@acm.org>
Merge from mainline:
2015-12-30 Nathan Sidwell <nat...@acm.org>
* config/nvptx/nvptx.c (nvptx_assemble_undefined_decl): Check
it's not a constant pool object.
2015-12-28 Nathan Sidwell <nat...@acm.org>
* config/nvptx/nvptx.c (nvptx_output_call_insn): Expect hard regs.
* config/nvptx/nvptx.md (nvptx_reg_or_mem_operand): Rename to ...
(nvptx_nonimmediate_operand): ... here. Update all uses.
(call_insn_operand): Use REG_P.
(call_operation): Allow hard regs.
2015-12-23 Nathan Sidwell <nat...@acm.org>
* config/nvptx/nvptx-protos.h
(nvptx_maybe_convert_symbolic_operand): Delete prototype.
* config/nvptx/nvptx.c (nvptx_maybe_convert_symbolic_operand): Delete.
(nvptx_output_mov_insn): Record fnsym here.
(nvptx_wpropagate): Don't create UNSPEC_TO_GENERIC unspec.
* config/nvptx/nvptx.md (UNSPEC_TO_GENERIC): Delete.
(symbolic_operand): Delete predicate.
(nvptx_nonimmediate_operand): Delete predicate.
(mov<mode>): Hard regs are perfectly ok here.
(convaddr_<mode>): Delete.
2015-12-18 Nathan Sidwell <nat...@acm.org>
* config/nvptx/nvptx.c (nvptx_maybe_convert_symbolic_operand):
Remove UNSPEC_TO_GENERIC generation.
(nvptx_output_mov_insn): Generate cvta for symbolic src.
* config/nvptx/nvptx.md (nvptx_register_operand): Allow hard reg.
(nvptx_reg_or_mem_operand): Likewise.
(nvptx_nonmemory_operand): Likewise.
(nvptx_general_operand): Delete.
(*mov<mode>_insn): Use nonimmediate_operand, permit hardregs.
(oacc_fork, oacc_join): Use general_operand.
2015-12-18 Nathan Sidwell <nat...@acm.org>
* config/nvptx/nvptx.c (nvptx_option_override): Emit sorry for
stabs debug.
(nvptx_assemble_undefined_decl): Use nvptx_assemble_decl_end.
2015-12-18 Nathan Sidwell <nat...@acm.org>
* config/nvptx/nvptx.c (worker_bcast_name, worker_red_name): Delete.
(nvptx_option_override): Adjust worker symbol creation.
(nvptx_gen_wcast): Wrap worker address in UNSPEC_TO_GENERIC.
(write_worker_buffer): New.
(nvptx_file_end): Call write_worker_buffer.
* config/nvptx/nvptx.md (UNSPEC_SHARED_DATA): Delete.
(worker_load<mode>, worker_store<mode>): Delete.
2015-12-17 Nathan Sidwell <nat...@acm.org>
* config/nvptx/nvptx.h (NVPTX_RETURN_REGNUM, FRAME_POINTER_REGNUM,
ARG_POINTER_REGNUM, STATIC_CHAIN_REGNUM): Renumber.
(REGISTER_NAMES): Update and rename.
(FIXED_REGISTERS, CALL_USED_REGISTERS): Update.
(enum_reg_class, REG_CLASS_NAMES, REG_CLASS_CONTENTS): Reformat.
2015-12-16 Nathan Sidwell <nat...@acm.org>
* config/nvptx/nvptx.h (OUTGOING_STATIC_CHAIN_REGNUM): Remove.
(REGISTER_NAMES): Adjust.
* config/nvptx/nvptx.c (nvptx_pass_by_reference): Avoid long line.
(nvptx_static_hain): Delete.
(write_arg_mode): Don't emit initializer if argno < 0.
(write_arg_type): Fix whitespace.
(init_frame): Initialize reg to zero if frame is zero-sized.
(nvptx_declare_function_name): Use write_arg_type to emit chain
decl.
(nvptx_output_call_insn): Adjust static chain emission.
(nvptx_goacc_reduction): Make static.
(TARGET_STATIC_CHAIN): Don't override.
2015-12-16 Nathan Sidwell <nat...@acm.org>
* config/nvptx/nvptx-protos.h (nvptx_hard_regno_mode_ok): Delete.
* config/nvptx/nvptx.h (struct machine_function):
Reimplement. Adjust all users.
* config/nvptx/nvptx.c (nvptx_declare_function_name): Move stack
and frame array generation earlier.
(nvptx_call_args): Reimplement.
(nvptx_expand_call): Adjust.
(nvptx_hard_reno_mode_ok): Delete.
(nvptx_reorg): Revert scan of hard regs.
Index: config/nvptx/nvptx.h
===================================================================
--- config/nvptx/nvptx.h (revision 232059)
+++ config/nvptx/nvptx.h (working copy)
@@ -1,5 +1,5 @@
/* Target Definitions for NVPTX.
- Copyright (C) 2014-2015 Free Software Foundation, Inc.
+ Copyright (C) 2014-2016 Free Software Foundation, Inc.
Contributed by Bernd Schmidt <ber...@codesourcery.com>
This file is part of GCC.
@@ -29,8 +29,6 @@
#define STARTFILE_SPEC "%{mmainkernel:crt0.o}"
-#define ASM_SPEC "%{misa=*:-m %*}"
-
#define TARGET_CPU_CPP_BUILTINS() \
do \
{ \
@@ -84,21 +82,17 @@
#define PTRDIFF_TYPE (TARGET_ABI64 ? "long int" : "int")
#define POINTER_SIZE (TARGET_ABI64 ? 64 : 32)
-
#define Pmode (TARGET_ABI64 ? DImode : SImode)
#define TARGET_SM35 (ptx_isa_option >= PTX_ISA_SM35)
/* Registers. Since ptx is a virtual target, we just define a few
- hard registers for special purposes and leave pseudos unallocated. */
-
-#define FIRST_PSEUDO_REGISTER 16
-/* We have to have some available hard registers, to keep gcc setup
+ hard registers for special purposes and leave pseudos unallocated.
+ We have to have some available hard registers, to keep gcc setup
happy. */
-#define FIXED_REGISTERS \
- { 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1 }
-#define CALL_USED_REGISTERS \
- { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 }
+#define FIRST_PSEUDO_REGISTER 16
+#define FIXED_REGISTERS { 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }
+#define CALL_USED_REGISTERS { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 }
#define HARD_REGNO_NREGS(REG, MODE) \
((void)(REG), (void)(MODE), 1)
@@ -108,32 +102,13 @@
((void)(REG), (void)(MODE), true)
/* Register Classes. */
-
-enum reg_class
- {
- NO_REGS,
- ALL_REGS,
- LIM_REG_CLASSES
- };
-
+enum reg_class { NO_REGS, ALL_REGS, LIM_REG_CLASSES };
+#define REG_CLASS_NAMES { "NO_REGS", "ALL_REGS" }
+#define REG_CLASS_CONTENTS { { 0x0000 }, { 0xFFFF } }
#define N_REG_CLASSES (int) LIM_REG_CLASSES
-#define REG_CLASS_NAMES { \
- "NO_REGS", \
- "ALL_REGS" }
-
-#define REG_CLASS_CONTENTS \
-{ \
- /* NO_REGS. */ \
- { 0x0000 }, \
- /* ALL_REGS. */ \
- { 0xFFFF }, \
-}
-
#define GENERAL_REGS ALL_REGS
-
#define REGNO_REG_CLASS(R) ((void)(R), ALL_REGS)
-
#define BASE_REG_CLASS ALL_REGS
#define INDEX_REG_CLASS NO_REGS
@@ -159,18 +134,16 @@ enum reg_class
#define FRAME_GROWS_DOWNWARD 0
#define STACK_GROWS_DOWNWARD 1
+#define NVPTX_RETURN_REGNUM 0
#define STACK_POINTER_REGNUM 1
-#define NVPTX_RETURN_REGNUM 4
-#define FRAME_POINTER_REGNUM 15
-#define ARG_POINTER_REGNUM 14
-
-#define STATIC_CHAIN_REGNUM 12
-#define OUTGOING_STATIC_CHAIN_REGNUM 10
+#define FRAME_POINTER_REGNUM 2
+#define ARG_POINTER_REGNUM 3
+#define STATIC_CHAIN_REGNUM 4
#define REGISTER_NAMES \
{ \
- "%hr0", "%outargs", "%hfp", "%hr3", "%retval", "%hr5", "%hr6", "%hr7", \
- "%hr8", "%hr9", "%chain_out", "%hr11", "%chain_in", "%hr13", "%argp", "%frame" \
+ "%value", "%stack", "%frame", "%args", "%chain", "%hr5", "%hr6", "%hr7", \
+ "%hr8", "%hr9", "%hr10", "%hr11", "%hr12", "%hr13", "%hr14", "%hr15" \
}
#define FIRST_PARM_OFFSET(FNDECL) ((void)(FNDECL), 0)
@@ -228,14 +201,15 @@ struct nvptx_args {
#if defined HOST_WIDE_INT
struct GTY(()) machine_function
{
- rtx_expr_list *call_args;
- rtx start_call;
- tree funtype;
- bool has_call_with_varargs;
- bool has_call_with_sc;
- HOST_WIDE_INT outgoing_stdarg_size;
- int ret_reg_mode; /* machine_mode not defined yet. */
- rtx axis_predicate[2];
+ rtx_expr_list *call_args; /* Arg list for the current call. */
+ bool doing_call; /* Within a CALL_ARGS ... CALL_ARGS_END sequence. */
+ bool is_varadic; /* This call is varadic */
+ bool has_varadic; /* Current function has a varadic call. */
+ bool has_chain; /* Current function has outgoing static chain. */
+ int num_args; /* Number of args of current call. */
+ int return_mode; /* Return mode of current fn.
+ (machine_mode not defined yet.) */
+ rtx axis_predicate[2]; /* Neutering predicates. */
};
#endif
Index: config/nvptx/nvptx-protos.h
===================================================================
--- config/nvptx/nvptx-protos.h (revision 232059)
+++ config/nvptx/nvptx-protos.h (working copy)
@@ -1,5 +1,5 @@
/* Prototypes for exported functions defined in nvptx.c.
- Copyright (C) 2014-2015 Free Software Foundation, Inc.
+ Copyright (C) 2014-2016 Free Software Foundation, Inc.
Contributed by Bernd Schmidt <ber...@codesourcery.com>
This file is part of GCC.
@@ -41,7 +41,5 @@ extern const char *nvptx_ptx_type_from_m
extern const char *nvptx_output_mov_insn (rtx, rtx);
extern const char *nvptx_output_call_insn (rtx_insn *, rtx, rtx);
extern const char *nvptx_output_return (void);
-extern bool nvptx_hard_regno_mode_ok (int, machine_mode);
-extern rtx nvptx_maybe_convert_symbolic_operand (rtx);
#endif
#endif
Index: config/nvptx/nvptx.md
===================================================================
--- config/nvptx/nvptx.md (revision 232059)
+++ config/nvptx/nvptx.md (working copy)
@@ -1,5 +1,5 @@
;; Machine description for NVPTX.
-;; Copyright (C) 2014-2015 Free Software Foundation, Inc.
+;; Copyright (C) 2014-2016 Free Software Foundation, Inc.
;; Contributed by Bernd Schmidt <ber...@codesourcery.com>
;;
;; This file is part of GCC.
@@ -20,7 +20,6 @@
(define_c_enum "unspec" [
UNSPEC_ARG_REG
- UNSPEC_TO_GENERIC
UNSPEC_COPYSIGN
UNSPEC_LOG2
@@ -39,8 +38,6 @@
UNSPEC_DIM_SIZE
- UNSPEC_SHARED_DATA
-
UNSPEC_BIT_CONV
UNSPEC_SHUFFLE
@@ -52,7 +49,6 @@
UNSPECV_CAS
UNSPECV_XCHG
UNSPECV_BARSYNC
- UNSPECV_MEMBAR
UNSPECV_DIM_POS
UNSPECV_FORK
@@ -64,56 +60,27 @@
(define_attr "subregs_ok" "false,true"
(const_string "false"))
+;; The nvptx operand predicates, in general, don't permit subregs and
+;; only literal constants, which differ from the generic ones, which
+;; permit subregs and symbolc constants (as appropriate)
(define_predicate "nvptx_register_operand"
(match_code "reg")
{
- if (REG_P (op))
- return !HARD_REGISTER_P (op);
return register_operand (op, mode);
})
-(define_predicate "nvptx_reg_or_mem_operand"
+(define_predicate "nvptx_nonimmediate_operand"
(match_code "mem,reg")
{
- if (REG_P (op))
- return !HARD_REGISTER_P (op);
- return memory_operand (op, mode) || register_operand (op, mode);
+ return (REG_P (op) ? register_operand (op, mode)
+ : memory_operand (op, mode));
})
-;; Allow symbolic constants.
-(define_predicate "symbolic_operand"
- (match_code "symbol_ref,const"))
-
-;; Registers or constants for normal instructions. Does not allow symbolic
-;; constants.
(define_predicate "nvptx_nonmemory_operand"
(match_code "reg,const_int,const_double")
{
- if (REG_P (op))
- return !HARD_REGISTER_P (op);
- return nonmemory_operand (op, mode);
-})
-
-;; A source operand for a move instruction. This is the only predicate we use
-;; that accepts symbolic constants.
-(define_predicate "nvptx_general_operand"
- (match_code "reg,subreg,mem,const,symbol_ref,label_ref,const_int,const_double")
-{
- if (REG_P (op))
- return !HARD_REGISTER_P (op);
- return general_operand (op, mode);
-})
-
-;; A destination operand for a move instruction. This is the only destination
-;; predicate that accepts the return register since it requires special handling.
-(define_predicate "nvptx_nonimmediate_operand"
- (match_code "reg,subreg,mem")
-{
- if (REG_P (op))
- return (op != frame_pointer_rtx
- && op != arg_pointer_rtx
- && op != stack_pointer_rtx);
- return nonimmediate_operand (op, mode);
+ return (REG_P (op) ? register_operand (op, mode)
+ : immediate_operand (op, mode));
})
(define_predicate "const0_operand"
@@ -137,7 +104,7 @@
(define_predicate "call_insn_operand"
(match_code "symbol_ref,reg")
{
- return GET_CODE (op) != SYMBOL_REF || SYMBOL_REF_FUNCTION_P (op);
+ return REG_P (op) || SYMBOL_REF_FUNCTION_P (op);
})
;; Return true if OP is a call with parallel USEs of the argument
@@ -151,11 +118,7 @@
{
rtx elt = XVECEXP (op, 0, i);
- if (GET_CODE (elt) != USE
- || GET_CODE (XEXP (elt, 0)) != REG
- || XEXP (elt, 0) == frame_pointer_rtx
- || XEXP (elt, 0) == arg_pointer_rtx
- || XEXP (elt, 0) == stack_pointer_rtx)
+ if (GET_CODE (elt) != USE || !REG_P (XEXP (elt, 0)))
return false;
}
return true;
@@ -213,10 +176,9 @@
%.\\tsetp.eq.u32\\t%0, 1, 1;")
(define_insn "*mov<mode>_insn"
- [(set (match_operand:QHSDIM 0 "nvptx_nonimmediate_operand" "=R,R,m")
+ [(set (match_operand:QHSDIM 0 "nonimmediate_operand" "=R,R,m")
(match_operand:QHSDIM 1 "general_operand" "Ri,m,R"))]
- "!MEM_P (operands[0])
- || (REG_P (operands[1]) && REGNO (operands[1]) > LAST_VIRTUAL_REGISTER)"
+ "!MEM_P (operands[0]) || REG_P (operands[1])"
{
if (which_alternative == 1)
return "%.\\tld%A1%u1\\t%0, %1;";
@@ -228,7 +190,7 @@
[(set_attr "subregs_ok" "true")])
(define_insn "*mov<mode>_insn"
- [(set (match_operand:SDFM 0 "nvptx_nonimmediate_operand" "=R,R,m")
+ [(set (match_operand:SDFM 0 "nonimmediate_operand" "=R,R,m")
(match_operand:SDFM 1 "general_operand" "RF,m,R"))]
"!MEM_P (operands[0]) || REG_P (operands[1])"
{
@@ -256,17 +218,11 @@
"%.\\tmov%t0\\t%0, %%ar%1;")
(define_expand "mov<mode>"
- [(set (match_operand:QHSDISDFM 0 "nvptx_nonimmediate_operand" "")
+ [(set (match_operand:QHSDISDFM 0 "nonimmediate_operand" "")
(match_operand:QHSDISDFM 1 "general_operand" ""))]
""
{
- operands[1] = nvptx_maybe_convert_symbolic_operand (operands[1]);
-
- /* Hard registers are often actually symbolic operands on this target.
- Don't allow them when storing to memory. */
- if (MEM_P (operands[0])
- && (!REG_P (operands[1])
- || REGNO (operands[1]) <= LAST_VIRTUAL_REGISTER))
+ if (MEM_P (operands[0]) && !REG_P (operands[1]))
{
rtx tmp = gen_reg_rtx (<MODE>mode);
emit_move_insn (tmp, operands[1]);
@@ -277,7 +233,7 @@
(define_insn "zero_extendqihi2"
[(set (match_operand:HI 0 "nvptx_register_operand" "=R,R")
- (zero_extend:HI (match_operand:QI 1 "nvptx_reg_or_mem_operand" "R,m")))]
+ (zero_extend:HI (match_operand:QI 1 "nvptx_nonimmediate_operand" "R,m")))]
""
"@
%.\\tcvt.u16.u%T1\\t%0, %1;
@@ -286,7 +242,7 @@
(define_insn "zero_extend<mode>si2"
[(set (match_operand:SI 0 "nvptx_register_operand" "=R,R")
- (zero_extend:SI (match_operand:QHIM 1 "nvptx_reg_or_mem_operand" "R,m")))]
+ (zero_extend:SI (match_operand:QHIM 1 "nvptx_nonimmediate_operand" "R,m")))]
""
"@
%.\\tcvt.u32.u%T1\\t%0, %1;
@@ -295,7 +251,7 @@
(define_insn "zero_extend<mode>di2"
[(set (match_operand:DI 0 "nvptx_register_operand" "=R,R")
- (zero_extend:DI (match_operand:QHSIM 1 "nvptx_reg_or_mem_operand" "R,m")))]
+ (zero_extend:DI (match_operand:QHSIM 1 "nvptx_nonimmediate_operand" "R,m")))]
""
"@
%.\\tcvt.u64.u%T1\\t%0, %1;
@@ -304,7 +260,7 @@
(define_insn "extend<mode>si2"
[(set (match_operand:SI 0 "nvptx_register_operand" "=R,R")
- (sign_extend:SI (match_operand:QHIM 1 "nvptx_reg_or_mem_operand" "R,m")))]
+ (sign_extend:SI (match_operand:QHIM 1 "nvptx_nonimmediate_operand" "R,m")))]
""
"@
%.\\tcvt.s32.s%T1\\t%0, %1;
@@ -313,7 +269,7 @@
(define_insn "extend<mode>di2"
[(set (match_operand:DI 0 "nvptx_register_operand" "=R,R")
- (sign_extend:DI (match_operand:QHSIM 1 "nvptx_reg_or_mem_operand" "R,m")))]
+ (sign_extend:DI (match_operand:QHSIM 1 "nvptx_nonimmediate_operand" "R,m")))]
""
"@
%.\\tcvt.s64.s%T1\\t%0, %1;
@@ -321,7 +277,7 @@
[(set_attr "subregs_ok" "true")])
(define_insn "trunchiqi2"
- [(set (match_operand:QI 0 "nvptx_reg_or_mem_operand" "=R,m")
+ [(set (match_operand:QI 0 "nvptx_nonimmediate_operand" "=R,m")
(truncate:QI (match_operand:HI 1 "nvptx_register_operand" "R,R")))]
""
"@
@@ -330,7 +286,7 @@
[(set_attr "subregs_ok" "true")])
(define_insn "truncsi<mode>2"
- [(set (match_operand:QHIM 0 "nvptx_reg_or_mem_operand" "=R,m")
+ [(set (match_operand:QHIM 0 "nvptx_nonimmediate_operand" "=R,m")
(truncate:QHIM (match_operand:SI 1 "nvptx_register_operand" "R,R")))]
""
"@
@@ -339,7 +295,7 @@
[(set_attr "subregs_ok" "true")])
(define_insn "truncdi<mode>2"
- [(set (match_operand:QHSIM 0 "nvptx_reg_or_mem_operand" "=R,m")
+ [(set (match_operand:QHSIM 0 "nvptx_nonimmediate_operand" "=R,m")
(truncate:QHSIM (match_operand:DI 1 "nvptx_register_operand" "R,R")))]
""
"@
@@ -347,14 +303,6 @@
%.\\tst%A0.u%T0\\t%0, %1;"
[(set_attr "subregs_ok" "true")])
-;; Pointer address space conversion
-(define_insn "convaddr_<mode>"
- [(set (match_operand:P 0 "nvptx_register_operand" "=R")
- (unspec:P [(match_operand:P 1 "symbolic_operand" "s")]
- UNSPEC_TO_GENERIC))]
- ""
- "%.\\tcvta%D1%t0\\t%0, %1;")
-
;; Integer arithmetic
(define_insn "add<mode>3"
@@ -1140,7 +1088,7 @@
(define_expand "oacc_fork"
[(set (match_operand:SI 0 "nvptx_nonmemory_operand" "")
- (match_operand:SI 1 "nvptx_general_operand" ""))
+ (match_operand:SI 1 "general_operand" ""))
(unspec_volatile:SI [(match_operand:SI 2 "const_int_operand" "")]
UNSPECV_FORKED)]
""
@@ -1153,7 +1101,7 @@
(define_expand "oacc_join"
[(set (match_operand:SI 0 "nvptx_nonmemory_operand" "")
- (match_operand:SI 1 "nvptx_general_operand" ""))
+ (match_operand:SI 1 "general_operand" ""))
(unspec_volatile:SI [(match_operand:SI 2 "const_int_operand" "")]
UNSPECV_JOIN)]
""
@@ -1194,20 +1142,6 @@
""
"%.\\tmov.b64\\t%0, {%1,%2};")
-(define_insn "worker_load<mode>"
- [(set (match_operand:SDISDFM 0 "nvptx_register_operand" "=R")
- (unspec:SDISDFM [(match_operand:SDISDFM 1 "memory_operand" "m")]
- UNSPEC_SHARED_DATA))]
- ""
- "%.\\tld.shared%u0\\t%0, %1;")
-
-(define_insn "worker_store<mode>"
- [(set (unspec:SDISDFM [(match_operand:SDISDFM 1 "memory_operand" "=m")]
- UNSPEC_SHARED_DATA)
- (match_operand:SDISDFM 0 "nvptx_register_operand" "R"))]
- ""
- "%.\\tst.shared%u1\\t%1, %0;")
-
;; Atomic insns.
(define_expand "atomic_compare_and_swap<mode>"
@@ -1281,6 +1215,7 @@
(define_code_iterator any_logic [and ior xor])
(define_code_attr logic [(and "and") (ior "or") (xor "xor")])
+;; Currently disabled until we add better subtarget support - requires sm_32.
(define_insn "atomic_fetch_<logic><mode>"
[(set (match_operand:SDIM 1 "memory_operand" "+m")
(unspec_volatile:SDIM
@@ -1290,10 +1225,9 @@
UNSPECV_LOCK))
(set (match_operand:SDIM 0 "nvptx_register_operand" "=R")
(match_dup 1))]
- "<MODE>mode == SImode || TARGET_SM35"
+ "0"
"%.\\tatom%A1.b%T0.<logic>\\t%0, %1, %2;")
-;; ??? Mark as not predicable later?
(define_insn "nvptx_barsync"
[(unspec_volatile [(match_operand:SI 0 "const_int_operand" "")]
UNSPECV_BARSYNC)]
Index: config/nvptx/nvptx.c
===================================================================
--- config/nvptx/nvptx.c (revision 232059)
+++ config/nvptx/nvptx.c (working copy)
@@ -1,5 +1,5 @@
/* Target code for NVPTX.
- Copyright (C) 2014-2015 Free Software Foundation, Inc.
+ Copyright (C) 2014-2016 Free Software Foundation, Inc.
Contributed by Bernd Schmidt <ber...@codesourcery.com>
This file is part of GCC.
@@ -128,14 +128,12 @@ static GTY((cache)) hash_table<tree_hash
shared across TUs (taking the largest size). */
static unsigned worker_bcast_size;
static unsigned worker_bcast_align;
-#define worker_bcast_name "__worker_bcast"
static GTY(()) rtx worker_bcast_sym;
/* Buffer needed for worker reductions. This has to be distinct from
the worker broadcast array, as both may be live concurrently. */
static unsigned worker_red_size;
static unsigned worker_red_align;
-#define worker_red_name "__worker_red"
static GTY(()) rtx worker_red_sym;
/* Global lock variable, needed for 128bit worker & gang reductions. */
@@ -147,7 +145,7 @@ static struct machine_function *
nvptx_init_machine_status (void)
{
struct machine_function *p = ggc_cleared_alloc<machine_function> ();
- p->ret_reg_mode = VOIDmode;
+ p->return_mode = VOIDmode;
return p;
}
@@ -161,6 +159,13 @@ nvptx_option_override (void)
flag_toplevel_reorder = 1;
/* Assumes that it will see only hard registers. */
flag_var_tracking = 0;
+
+ if (write_symbols == DBX_DEBUG)
+ /* The stabs testcases want to know stabs isn't supported. */
+ sorry ("stabs debug format not supported");
+
+ /* Actually we don't have any debug format, but don't be
+ unneccesarily noisy. */
write_symbols = NO_DEBUG;
debug_info_level = DINFO_LEVEL_NONE;
@@ -172,11 +177,11 @@ nvptx_option_override (void)
declared_libfuncs_htab
= hash_table<declared_libfunc_hasher>::create_ggc (17);
- worker_bcast_sym = gen_rtx_SYMBOL_REF (Pmode, worker_bcast_name);
+ worker_bcast_sym = gen_rtx_SYMBOL_REF (Pmode, "__worker_bcast");
SET_SYMBOL_DATA_AREA (worker_bcast_sym, DATA_AREA_SHARED);
worker_bcast_align = GET_MODE_ALIGNMENT (SImode) / BITS_PER_UNIT;
- worker_red_sym = gen_rtx_SYMBOL_REF (Pmode, worker_red_name);
+ worker_red_sym = gen_rtx_SYMBOL_REF (Pmode, "__worker_red");
SET_SYMBOL_DATA_AREA (worker_red_sym, DATA_AREA_SHARED);
worker_red_align = GET_MODE_ALIGNMENT (SImode) / BITS_PER_UNIT;
}
@@ -487,7 +492,7 @@ nvptx_strict_argument_naming (cumulative
static rtx
nvptx_libcall_value (machine_mode mode, const_rtx)
{
- if (cfun->machine->start_call == NULL_RTX)
+ if (!cfun->machine->doing_call)
/* Pretend to return in a hard reg for early uses before pseudos can be
generated. */
return gen_rtx_REG (mode, NVPTX_RETURN_REGNUM);
@@ -506,7 +511,7 @@ nvptx_function_value (const_tree type, c
if (outgoing)
{
- cfun->machine->ret_reg_mode = mode;
+ cfun->machine->return_mode = mode;
return gen_rtx_REG (mode, NVPTX_RETURN_REGNUM);
}
@@ -525,8 +530,9 @@ nvptx_function_value_regno_p (const unsi
reference in memory. */
static bool
-nvptx_pass_by_reference (cumulative_args_t ARG_UNUSED (cum), machine_mode mode,
- const_tree type, bool ARG_UNUSED (named))
+nvptx_pass_by_reference (cumulative_args_t ARG_UNUSED (cum),
+ machine_mode mode, const_tree type,
+ bool ARG_UNUSED (named))
{
return pass_in_memory (mode, type, false);
}
@@ -549,18 +555,6 @@ nvptx_promote_function_mode (const_tree
return promote_arg (mode, for_return || !type || TYPE_ARG_TYPES (funtype));
}
-/* Implement TARGET_STATIC_CHAIN. */
-
-static rtx
-nvptx_static_chain (const_tree fndecl, bool incoming_p)
-{
- if (!DECL_STATIC_CHAIN (fndecl))
- return NULL;
-
- return gen_rtx_REG (Pmode, (incoming_p ? STATIC_CHAIN_REGNUM
- : OUTGOING_STATIC_CHAIN_REGNUM));
-}
-
/* Helper for write_arg. Emit a single PTX argument of MODE, either
in a prototype, or as copy in a function prologue. ARGNO is the
index of this argument in the PTX function. FOR_REG is negative,
@@ -588,12 +582,15 @@ write_arg_mode (std::stringstream &s, in
else
s << "%ar" << argno;
s << ";\n";
- s << "\tld.param" << ptx_type << " ";
- if (for_reg)
- s << reg_names[for_reg];
- else
- s << "%ar" << argno;
- s << ", [%in_ar" << argno << "];\n";
+ if (argno >= 0)
+ {
+ s << "\tld.param" << ptx_type << " ";
+ if (for_reg)
+ s << reg_names[for_reg];
+ else
+ s << "%ar" << argno;
+ s << ", [%in_ar" << argno << "];\n";
+ }
}
return argno + 1;
}
@@ -625,7 +622,7 @@ write_arg_type (std::stringstream &s, in
{
/* Complex types are sent as two separate args. */
type = TREE_TYPE (type);
- mode = TYPE_MODE (type);
+ mode = TYPE_MODE (type);
prototyped = true;
}
@@ -678,14 +675,14 @@ write_return_type (std::stringstream &s,
optimization-level specific, so no caller can make use of
this data, but more importantly for us, we must ensure it
doesn't change the PTX prototype. */
- mode = (machine_mode) cfun->machine->ret_reg_mode;
+ mode = (machine_mode) cfun->machine->return_mode;
if (mode == VOIDmode)
return return_in_mem;
- /* Clear ret_reg_mode to inhibit copy of retval to non-existent
+ /* Clear return_mode to inhibit copy of retval to non-existent
retval parameter. */
- cfun->machine->ret_reg_mode = VOIDmode;
+ cfun->machine->return_mode = VOIDmode;
}
else
mode = promote_return (mode);
@@ -917,16 +914,20 @@ nvptx_maybe_record_fnsym (rtx sym)
}
/* Emit a local array to hold some part of a conventional stack frame
- and initialize REGNO to point to it. */
+ and initialize REGNO to point to it. If the size is zero, it'll
+ never be valid to dereference, so we can simply initialize to
+ zero. */
static void
init_frame (FILE *file, int regno, unsigned align, unsigned size)
{
- fprintf (file, "\t.reg.u%d %s;\n"
- "\t.local.align %d .b8 %s_ar[%u];\n"
- "\tcvta.local.u%d %s, %s_ar;\n",
- POINTER_SIZE, reg_names[regno],
- align, reg_names[regno], size ? size : 1,
+ if (size)
+ fprintf (file, "\t.local .align %d .b8 %s_ar[%u];\n",
+ align, reg_names[regno], size);
+ fprintf (file, "\t.reg.u%d %s;\n",
+ POINTER_SIZE, reg_names[regno]);
+ fprintf (file, (size ? "\tcvta.local.u%d %s, %s_ar;\n"
+ : "\tmov.u%d %s, 0;\n"),
POINTER_SIZE, reg_names[regno], reg_names[regno]);
}
@@ -981,17 +982,26 @@ nvptx_declare_function_name (FILE *file,
}
if (stdarg_p (fntype))
- argno = write_arg_type (s, ARG_POINTER_REGNUM, argno, ptr_type_node, true);
-
- if (DECL_STATIC_CHAIN (decl))
- argno = write_arg_type (s, STATIC_CHAIN_REGNUM, argno, ptr_type_node,
+ argno = write_arg_type (s, ARG_POINTER_REGNUM, argno, ptr_type_node,
true);
+ if (DECL_STATIC_CHAIN (decl) || cfun->machine->has_chain)
+ write_arg_type (s, STATIC_CHAIN_REGNUM,
+ DECL_STATIC_CHAIN (decl) ? argno : -1, ptr_type_node,
+ true);
+
fprintf (file, "%s", s.str().c_str());
- if (regno_reg_rtx[OUTGOING_STATIC_CHAIN_REGNUM] != const0_rtx)
- fprintf (file, "\t.reg.u%d %s;\n", GET_MODE_BITSIZE (Pmode),
- reg_names[OUTGOING_STATIC_CHAIN_REGNUM]);
+ /* Declare a local var for outgoing varargs. */
+ if (cfun->machine->has_varadic)
+ init_frame (file, STACK_POINTER_REGNUM,
+ UNITS_PER_WORD, crtl->outgoing_args_size);
+
+ /* Declare a local variable for the frame. */
+ HOST_WIDE_INT sz = get_frame_size ();
+ if (sz || cfun->machine->has_chain)
+ init_frame (file, FRAME_POINTER_REGNUM,
+ crtl->stack_alignment_needed / BITS_PER_UNIT, sz);
/* Declare the pseudos we have as ptx registers. */
int maxregs = max_reg_num ();
@@ -1010,17 +1020,6 @@ nvptx_declare_function_name (FILE *file,
}
}
- /* Declare a local var for outgoing varargs. */
- if (cfun->machine->has_call_with_varargs)
- init_frame (file, STACK_POINTER_REGNUM,
- UNITS_PER_WORD, crtl->outgoing_args_size);
-
- /* Declare a local variable for the frame. */
- HOST_WIDE_INT sz = get_frame_size ();
- if (sz || cfun->machine->has_call_with_sc)
- init_frame (file, FRAME_POINTER_REGNUM,
- crtl->stack_alignment_needed / BITS_PER_UNIT, sz);
-
/* Emit axis predicates. */
if (cfun->machine->axis_predicate[0])
nvptx_init_axis_predicate (file,
@@ -1036,7 +1035,7 @@ nvptx_declare_function_name (FILE *file,
const char *
nvptx_output_return (void)
{
- machine_mode mode = (machine_mode)cfun->machine->ret_reg_mode;
+ machine_mode mode = (machine_mode)cfun->machine->return_mode;
if (mode != VOIDmode)
fprintf (asm_out_file, "\tst.param%s\t[%s_out], %s;\n",
@@ -1076,20 +1075,28 @@ nvptx_get_drap_rtx (void)
argument to the next call. */
static void
-nvptx_call_args (rtx arg, tree funtype)
+nvptx_call_args (rtx arg, tree fntype)
{
- if (cfun->machine->start_call == NULL_RTX)
+ if (!cfun->machine->doing_call)
{
- cfun->machine->call_args = NULL;
- cfun->machine->funtype = funtype;
- cfun->machine->start_call = const0_rtx;
+ cfun->machine->doing_call = true;
+ cfun->machine->is_varadic = false;
+ cfun->machine->num_args = 0;
+
+ if (fntype && stdarg_p (fntype))
+ {
+ cfun->machine->is_varadic = true;
+ cfun->machine->has_varadic = true;
+ cfun->machine->num_args++;
+ }
}
- if (arg == pc_rtx)
- return;
- rtx_expr_list *args_so_far = cfun->machine->call_args;
- if (REG_P (arg))
- cfun->machine->call_args = alloc_EXPR_LIST (VOIDmode, arg, args_so_far);
+ if (REG_P (arg) && arg != pc_rtx)
+ {
+ cfun->machine->num_args++;
+ cfun->machine->call_args = alloc_EXPR_LIST (VOIDmode, arg,
+ cfun->machine->call_args);
+ }
}
/* Implement the corresponding END_CALL_ARGS hook. Clear and free the
@@ -1098,7 +1105,7 @@ nvptx_call_args (rtx arg, tree funtype)
static void
nvptx_end_call_args (void)
{
- cfun->machine->start_call = NULL_RTX;
+ cfun->machine->doing_call = false;
free_EXPR_LIST_list (&cfun->machine->call_args);
}
@@ -1111,16 +1118,10 @@ nvptx_end_call_args (void)
void
nvptx_expand_call (rtx retval, rtx address)
{
- int nargs = 0;
rtx callee = XEXP (address, 0);
- rtx pat, t;
- rtvec vec;
rtx varargs = NULL_RTX;
unsigned parallel = 0;
- for (t = cfun->machine->call_args; t; t = XEXP (t, 1))
- nargs++;
-
if (!call_insn_operand (callee, Pmode))
{
callee = force_reg (Pmode, callee);
@@ -1133,7 +1134,7 @@ nvptx_expand_call (rtx retval, rtx addre
if (decl != NULL_TREE)
{
if (DECL_STATIC_CHAIN (decl))
- cfun->machine->has_call_with_sc = true;
+ cfun->machine->has_chain = true;
tree attr = get_oacc_fn_attrib (decl);
if (attr)
@@ -1154,35 +1155,31 @@ nvptx_expand_call (rtx retval, rtx addre
}
}
- if (cfun->machine->funtype
- && stdarg_p (cfun->machine->funtype))
+ unsigned nargs = cfun->machine->num_args;
+ if (cfun->machine->is_varadic)
{
varargs = gen_reg_rtx (Pmode);
emit_move_insn (varargs, stack_pointer_rtx);
- cfun->machine->has_call_with_varargs = true;
}
- vec = rtvec_alloc (nargs + 1 + (varargs ? 1 : 0));
- pat = gen_rtx_PARALLEL (VOIDmode, vec);
+ rtvec vec = rtvec_alloc (nargs + 1);
+ rtx pat = gen_rtx_PARALLEL (VOIDmode, vec);
int vec_pos = 0;
-
+
+ rtx call = gen_rtx_CALL (VOIDmode, address, const0_rtx);
rtx tmp_retval = retval;
- t = gen_rtx_CALL (VOIDmode, address, const0_rtx);
- if (retval != NULL_RTX)
+ if (retval)
{
if (!nvptx_register_operand (retval, GET_MODE (retval)))
tmp_retval = gen_reg_rtx (GET_MODE (retval));
- t = gen_rtx_SET (tmp_retval, t);
+ call = gen_rtx_SET (tmp_retval, call);
}
- XVECEXP (pat, 0, vec_pos++) = t;
+ XVECEXP (pat, 0, vec_pos++) = call;
/* Construct the call insn, including a USE for each argument pseudo
register. These will be used when printing the insn. */
for (rtx arg = cfun->machine->call_args; arg; arg = XEXP (arg, 1))
- {
- rtx this_arg = XEXP (arg, 0);
- XVECEXP (pat, 0, vec_pos++) = gen_rtx_USE (VOIDmode, this_arg);
- }
+ XVECEXP (pat, 0, vec_pos++) = gen_rtx_USE (VOIDmode, XEXP (arg, 0));
if (varargs)
XVECEXP (pat, 0, vec_pos++) = gen_rtx_USE (VOIDmode, varargs);
@@ -1390,7 +1387,6 @@ nvptx_gen_wcast (rtx reg, propagate_mask
}
addr = gen_rtx_MEM (mode, addr);
- addr = gen_rtx_UNSPEC (mode, gen_rtvec (1, addr), UNSPEC_SHARED_DATA);
if (pm == PM_read)
res = gen_rtx_SET (addr, reg);
else if (pm == PM_write)
@@ -1417,39 +1413,6 @@ nvptx_gen_wcast (rtx reg, propagate_mask
}
return res;
}
-
-/* When loading an operand ORIG_OP, verify whether an address space
- conversion to generic is required, and if so, perform it. Check
- for SYMBOL_REFs and record them if needed. Return either the
- original operand, or the converted one. */
-
-rtx
-nvptx_maybe_convert_symbolic_operand (rtx op)
-{
- if (GET_MODE (op) != Pmode)
- return op;
-
- rtx sym = op;
- if (GET_CODE (sym) == CONST)
- sym = XEXP (sym, 0);
- if (GET_CODE (sym) == PLUS)
- sym = XEXP (sym, 0);
-
- if (GET_CODE (sym) != SYMBOL_REF)
- return op;
-
- nvptx_maybe_record_fnsym (sym);
-
- nvptx_data_area area = SYMBOL_DATA_AREA (sym);
- if (area == DATA_AREA_GENERIC)
- return op;
-
- rtx dest = gen_reg_rtx (Pmode);
- emit_insn (gen_rtx_SET (dest,
- gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op),
- UNSPEC_TO_GENERIC)));
- return dest;
-}
/* Returns true if X is a valid address for use in a memory reference. */
@@ -1477,18 +1440,6 @@ nvptx_legitimate_address_p (machine_mode
return false;
}
}
-
-/* Implement HARD_REGNO_MODE_OK. We barely use hard regs, but we want
- to ensure that the return register's mode isn't changed. */
-
-bool
-nvptx_hard_regno_mode_ok (int regno, machine_mode mode)
-{
- if (regno != NVPTX_RETURN_REGNUM
- || cfun == NULL || cfun->machine->ret_reg_mode == VOIDmode)
- return true;
- return mode == cfun->machine->ret_reg_mode;
-}
/* Machinery to output constant initializers. When beginning an
initializer, we decide on a fragment size (which is visible in ptx
@@ -1767,6 +1718,11 @@ nvptx_globalize_label (FILE *, const cha
static void
nvptx_assemble_undefined_decl (FILE *file, const char *name, const_tree decl)
{
+ /* The middle end can place constant pool decls into the varpool as
+ undefined. Until that is fixed, catch the problem here. */
+ if (DECL_IN_CONSTANT_POOL (decl))
+ return;
+
write_var_marker (file, false, TREE_PUBLIC (decl), name);
fprintf (file, "\t.extern ");
@@ -1774,7 +1730,7 @@ nvptx_assemble_undefined_decl (FILE *fil
nvptx_assemble_decl_begin (file, name, section_for_decl (decl),
TREE_TYPE (decl), size ? tree_to_shwi (size) : 0,
DECL_ALIGN (decl));
- fprintf (file, ";\n");
+ nvptx_assemble_decl_end ();
}
/* Output a pattern for a move instruction. */
@@ -1788,6 +1744,16 @@ nvptx_output_mov_insn (rtx dst, rtx src)
machine_mode src_inner = (GET_CODE (src) == SUBREG
? GET_MODE (XEXP (src, 0)) : dst_mode);
+ rtx sym = src;
+ if (GET_CODE (sym) == CONST)
+ sym = XEXP (XEXP (sym, 0), 0);
+ if (SYMBOL_REF_P (sym))
+ {
+ if (SYMBOL_DATA_AREA (sym) != DATA_AREA_GENERIC)
+ return "%.\tcvta%D1%t0\t%0, %1;";
+ nvptx_maybe_record_fnsym (sym);
+ }
+
if (src_inner == dst_inner)
return "%.\tmov%t0\t%0, %1;";
@@ -1847,14 +1813,14 @@ nvptx_output_call_insn (rtx_insn *insn,
{
rtx t = XEXP (XVECEXP (pat, 0, argno), 0);
machine_mode mode = GET_MODE (t);
+ const char *ptx_type = nvptx_ptx_type_from_mode (mode, false);
/* Mode splitting has already been done. */
- fprintf (asm_out_file, "\t\t.param%s %%out_arg%d%s;\n",
- nvptx_ptx_type_from_mode (mode, false), argno,
- mode == QImode || mode == HImode ? "[1]" : "");
- fprintf (asm_out_file, "\t\tst.param%s [%%out_arg%d], %%r%d;\n",
- nvptx_ptx_type_from_mode (mode, false), argno,
- REGNO (t));
+ fprintf (asm_out_file, "\t\t.param%s %%out_arg%d;\n"
+ "\t\tst.param%s [%%out_arg%d], ",
+ ptx_type, argno, ptx_type, argno);
+ output_reg (asm_out_file, REGNO (t), VOIDmode);
+ fprintf (asm_out_file, ";\n");
}
fprintf (asm_out_file, "\t\tcall ");
@@ -1878,8 +1844,7 @@ nvptx_output_call_insn (rtx_insn *insn,
}
if (decl && DECL_STATIC_CHAIN (decl))
{
- fprintf (asm_out_file, ", %s%s", open,
- reg_names [OUTGOING_STATIC_CHAIN_REGNUM]);
+ fprintf (asm_out_file, ", %s%s", open, reg_names [STATIC_CHAIN_REGNUM]);
open = "";
}
if (!open[0])
@@ -3379,7 +3344,7 @@ nvptx_wpropagate (bool pre_p, basic_bloc
/* Stuff was emitted, initialize the base pointer now. */
rtx init = gen_rtx_SET (data.base, worker_bcast_sym);
emit_insn_after (init, insn);
-
+
if (worker_bcast_size < data.offset)
worker_bcast_size = data.offset;
}
@@ -3773,7 +3738,7 @@ nvptx_reorg (void)
/* Mark unused regs as unused. */
int max_regs = max_reg_num ();
- for (int i = 0; i < max_regs; i++)
+ for (int i = LAST_VIRTUAL_REGISTER + 1; i < max_regs; i++)
if (REG_N_SETS (i) == 0 && REG_N_REFS (i) == 0)
regno_reg_rtx[i] = const0_rtx;
@@ -3946,6 +3911,18 @@ nvptx_file_start (void)
fputs ("// END PREAMBLE\n", asm_out_file);
}
+/* Emit a declaration for a worker-level buffer in .shared memory. */
+
+static void
+write_worker_buffer (FILE *file, rtx sym, unsigned align, unsigned size)
+{
+ const char *name = XSTR (sym, 0);
+
+ write_var_marker (file, true, false, name);
+ fprintf (file, ".shared .align %d .u8 %s[%d];\n",
+ align, name, size);
+}
+
/* Write out the function declarations we've collected and declare storage
for the broadcast buffer. */
@@ -3959,30 +3936,12 @@ nvptx_file_end (void)
fputs (func_decls.str().c_str(), asm_out_file);
if (worker_bcast_size)
- {
- /* Define the broadcast buffer. */
-
- worker_bcast_size = (worker_bcast_size + worker_bcast_align - 1)
- & ~(worker_bcast_align - 1);
-
- write_var_marker (asm_out_file, true, false, worker_bcast_name);
- fprintf (asm_out_file, ".shared .align %d .u8 %s[%d];\n",
- worker_bcast_align,
- worker_bcast_name, worker_bcast_size);
- }
+ write_worker_buffer (asm_out_file, worker_bcast_sym,
+ worker_bcast_align, worker_bcast_size);
if (worker_red_size)
- {
- /* Define the reduction buffer. */
-
- worker_red_size = ((worker_red_size + worker_red_align - 1)
- & ~(worker_red_align - 1));
-
- write_var_marker (asm_out_file, true, false, worker_red_name);
- fprintf (asm_out_file, ".shared .align %d .u8 %s[%d];\n",
- worker_red_align,
- worker_red_name, worker_red_size);
- }
+ write_worker_buffer (asm_out_file, worker_red_sym,
+ worker_red_align, worker_red_size);
}
/* Expander for the shuffle builtins. */
@@ -4796,7 +4755,7 @@ nvptx_goacc_reduction_teardown (gcall *c
/* NVPTX reduction expander. */
-void
+static void
nvptx_goacc_reduction (gcall *call)
{
unsigned code = (unsigned)TREE_INT_CST_LOW (gimple_call_arg (call, 0));
@@ -4862,9 +4821,6 @@ nvptx_goacc_reduction (gcall *call)
#define TARGET_OMIT_STRUCT_RETURN_REG true
#undef TARGET_STRICT_ARGUMENT_NAMING
#define TARGET_STRICT_ARGUMENT_NAMING nvptx_strict_argument_naming
-#undef TARGET_STATIC_CHAIN
-#define TARGET_STATIC_CHAIN nvptx_static_chain
-
#undef TARGET_CALL_ARGS
#define TARGET_CALL_ARGS nvptx_call_args
#undef TARGET_END_CALL_ARGS