Re: [patch] nvptx libgcc atomic routines

2018-10-05 Thread Tom de Vries
On 9/26/18 8:33 PM, Cesar Philippidis wrote:
> This patch adds nvptx support for the atomic FETCH_AND_OP functions. I
> recall that this used to be important for OpenACC reductions back in the
> GCC 5.0 days before Nathan split reductions into four phases. Nowadays,
> atomic reductions use a spin lock that's implemented directly by the
> nvptx BE. Therefore, I'm not sure if the nvptx port still needs support
> for atomic fetch_and_*.
> 
> Tom and Thomas, do either of you have any thoughts on this? Should I
> commit it to trunk?

I'd say no. I can think of only one possible use for this, which is to
be able use -fno-inline-atomics to workaround problems in atomics in
ptx, and I think that that's not sufficiently valuable to start
maintaining these routines in trunk.

Thanks,
- Tom

> I bootstrapped and regtested it for x86_64 Linux
> with nvptx offloading.


[patch] nvptx libgcc atomic routines

2018-09-26 Thread Cesar Philippidis
This patch adds nvptx support for the atomic FETCH_AND_OP functions. I
recall that this used to be important for OpenACC reductions back in the
GCC 5.0 days before Nathan split reductions into four phases. Nowadays,
atomic reductions use a spin lock that's implemented directly by the
nvptx BE. Therefore, I'm not sure if the nvptx port still needs support
for atomic fetch_and_*.

Tom and Thomas, do either of you have any thoughts on this? Should I
commit it to trunk? I bootstrapped and regtested it for x86_64 Linux
with nvptx offloading.

Thanks,
Cesar
nvptx libgcc atomic routines

2018-XX-YY  Cesar Philippidis  

	libgcc/
	* config/nvptx/atomic.c: New file.
	* config/nvptx/t-nvptx (LIB2ADD): Include it.

(cherry picked from gomp-4_0-branch r223177)
---
 libgcc/config/nvptx/atomic.c | 279 +++
 libgcc/config/nvptx/t-nvptx  |   3 +-
 2 files changed, 281 insertions(+), 1 deletion(-)
 create mode 100644 libgcc/config/nvptx/atomic.c

diff --git a/libgcc/config/nvptx/atomic.c b/libgcc/config/nvptx/atomic.c
new file mode 100644
index 000..ab6cf23ef9d
--- /dev/null
+++ b/libgcc/config/nvptx/atomic.c
@@ -0,0 +1,279 @@
+/* Atomic operations for PTX.
+   Copyright (C) 2015-2018 Free Software Foundation, Inc.
+   Contributed by Mentor Graphics.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free
+Software Foundation; either version 3, or (at your option) any later
+version.
+
+GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+Under Section 7 of GPL version 3, you are granted additional
+permissions described in the GCC Runtime Library Exception, version
+3.1, as published by the Free Software Foundation.
+
+You should have received a copy of the GNU General Public License and
+a copy of the GCC Runtime Library Exception along with this program;
+see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+.  */
+
+/* Kernel helper for compare-and-exchange.  */
+static int
+nvidia_cas (int oldval, int newval, int *ptr)
+{
+  int ret;
+
+  asm volatile ("atom.cas.b32 %0, [%1], %2, %3;" : "=r"(ret) : "r"(ptr),
+		"r"(oldval), "r"(newval));
+
+  return ret;
+}
+
+#define __kernel_cmpxchg (nvidia_cas)
+
+/* Kernel helper for memory barrier.  */
+static void
+__threadfence_block (void)
+{
+  asm volatile ("membar.cta;");
+}
+
+#define __kernel_dmb (__threadfence_block)
+
+#define HIDDEN
+
+/* Warning: this assumes that all nvptx targets are little endian.  */
+
+#define INVERT_MASK_1 0
+#define INVERT_MASK_2 0
+
+#define MASK_1 0xffu
+#define MASK_2 0xu
+
+#define FETCH_AND_OP_WORD(OP, PFX_OP, INF_OP)\
+  int HIDDEN\
+  __sync_fetch_and_##OP##_4 (int *ptr, int val)\
+  {	\
+int failure, tmp;			\
+	\
+do {\
+  tmp = *ptr;			\
+  failure = __kernel_cmpxchg (tmp, PFX_OP (tmp INF_OP val), ptr);	\
+} while (failure != 0);		\
+	\
+return tmp;\
+  }
+
+FETCH_AND_OP_WORD (add,   , +)
+FETCH_AND_OP_WORD (sub,   , -)
+FETCH_AND_OP_WORD (or,, |)
+FETCH_AND_OP_WORD (and,   , &)
+FETCH_AND_OP_WORD (xor,   , ^)
+FETCH_AND_OP_WORD (nand, ~, &)
+
+#define NAME_oldval(OP, WIDTH) __sync_fetch_and_##OP##_##WIDTH
+#define NAME_newval(OP, WIDTH) __sync_##OP##_and_fetch_##WIDTH
+
+/* Implement both __sync__and_fetch and __sync_fetch_and_ for
+   subword-sized quantities.  */
+
+#define SUBWORD_SYNC_OP(OP, PFX_OP, INF_OP, TYPE, WIDTH, RETURN)	\
+  TYPE HIDDEN\
+  NAME##_##RETURN (OP, WIDTH) (TYPE *ptr, TYPE val)			\
+  {	\
+int *wordptr = (int *) ((unsigned long) ptr & ~3);			\
+unsigned int mask, shift, oldval, newval;\
+int failure;			\
+	\
+shift = (((unsigned long) ptr & 3) << 3) ^ INVERT_MASK_##WIDTH;	\
+mask = MASK_##WIDTH << shift;	\
+	\
+do {\
+  oldval = *wordptr;		\
+  newval = ((PFX_OP (((oldval & mask) >> shift)			\
+			 INF_OP (unsigned int) val)) << shift) & mask;	\
+  newval |= oldval & ~mask;		\
+  failure = __kernel_cmpxchg (oldval, newval, wordptr);		\
+} while (failure != 0);		\
+	\
+return (RETURN & mask) >> shift;	\
+  }
+
+SUBWORD_SYNC_OP (add,   , +, unsigned short, 2, oldval)
+SUBWORD_SYNC_OP (sub,   , -, unsigned short, 2, oldval)
+SUBWORD_SYNC_OP (or,, |, unsigned short, 2, oldval)
+SUBWORD_SYNC_OP (and,   , &, unsigned short, 2, oldval)
+SUBWORD_SYNC_OP (xor,   , ^, unsigned short, 2, oldval)
+SUBWORD_SYNC_OP (nand, ~, &, unsigned short, 2, oldval)
+
+SUBWORD_SYNC_OP (add,   , +, unsigned char, 1, oldval)
+SUBWORD_SYNC_OP (sub,   , -, unsigned char, 1, oldval)
+SUBWORD_SYNC_OP (or,, |, unsigned char, 1, oldval)