[PATCH 3/3] openmp: Add support for iterators in to/from clauses (C/C++)

2024-05-24 Thread Kwok Cheung Yeung
This patch extends the previous patch to cover to/from clauses in 
'target update'.From 99addc124535307b50fbdeb66c4f90bb0cbeb041 Mon Sep 17 00:00:00 2001
From: Kwok Cheung Yeung 
Date: Mon, 15 Apr 2024 13:50:22 +0100
Subject: [PATCH 3/3] openmp: Add support for iterators in to/from clauses
 (C/C++)

This adds support for iterators in 'to' and 'from' clauses in the
'target update' OpenMP directive.

2024-05-24  Kwok Cheung Yeung  

gcc/c/
* c-parser.cc (c_parser_omp_clause_from_to): Parse 'iterator' modifier.

gcc/cp/
* parser.cc (cp_parser_omp_clause_from_to): Parse 'iterator' modifier.

gcc/
* gimplify.cc (gimplify_omp_map_iterators): Gimplify iterators in
to/from clauses.
(gimplify_scan_omp_clauses): Call gimplify_omp_map_iterators once to
handle clauses with iterators, then skip subsequent iterator clauses.
* omp-low.cc (scan_sharing_clauses): Skip firstprivate handling for
to/from clauses with iterators.
(lower_omp_target): Handle kinds for to/from clauses with iterators.
* tree-pretty-print.cc (dump_omp_clause): Call dump_omp_map_iterators
for to/from clauses with iterators.

gcc/testsuite/
* c-c++-common/gomp/target-update-iterator-1.c: New.
* c-c++-common/gomp/target-update-iterator-2.c: New.
* c-c++-common/gomp/target-update-iterator-3.c: New.

libgomp/
* target.c (gomp_update): Call gomp_merge_iterator_maps.  Free
allocated variables.
* testsuite/libgomp.c-c++-common/target-update-iterators-1.c: New.
* testsuite/libgomp.c-c++-common/target-update-iterators-2.c: New.
* testsuite/libgomp.c-c++-common/target-update-iterators-3.c: New.
---
 gcc/c/c-parser.cc | 105 ++--
 gcc/cp/parser.cc  | 116 --
 gcc/gimplify.cc   |  17 ++-
 gcc/omp-low.cc|  24 +++-
 .../gomp/target-update-iterator-1.c   |  20 +++
 .../gomp/target-update-iterator-2.c   |  17 +++
 .../gomp/target-update-iterator-3.c   |  17 +++
 gcc/tree-pretty-print.cc  |  20 ++-
 libgomp/target.c  |  12 ++
 .../target-update-iterators-1.c   |  65 ++
 .../target-update-iterators-2.c   |  57 +
 .../target-update-iterators-3.c   |  66 ++
 12 files changed, 509 insertions(+), 27 deletions(-)
 create mode 100644 gcc/testsuite/c-c++-common/gomp/target-update-iterator-1.c
 create mode 100644 gcc/testsuite/c-c++-common/gomp/target-update-iterator-2.c
 create mode 100644 gcc/testsuite/c-c++-common/gomp/target-update-iterator-3.c
 create mode 100644 
libgomp/testsuite/libgomp.c-c++-common/target-update-iterators-1.c
 create mode 100644 
libgomp/testsuite/libgomp.c-c++-common/target-update-iterators-2.c
 create mode 100644 
libgomp/testsuite/libgomp.c-c++-common/target-update-iterators-3.c

diff --git a/gcc/c/c-parser.cc b/gcc/c/c-parser.cc
index 2281148561c..6353b15d64f 100644
--- a/gcc/c/c-parser.cc
+++ b/gcc/c/c-parser.cc
@@ -19185,8 +19185,11 @@ c_parser_omp_clause_device_type (c_parser *parser, 
tree list)
to ( variable-list )
 
OpenMP 5.1:
-   from ( [present :] variable-list )
-   to ( [present :] variable-list ) */
+   from ( [motion-modifier[,] [motion-modifier[,]...]:] variable-list )
+   to ( [motion-modifier[,] [motion-modifier[,]...]:] variable-list )
+
+   motion-modifier:
+ present | iterator (iterators-definition)  */
 
 static tree
 c_parser_omp_clause_from_to (c_parser *parser, enum omp_clause_code kind,
@@ -19197,15 +19200,88 @@ c_parser_omp_clause_from_to (c_parser *parser, enum 
omp_clause_code kind,
   if (!parens.require_open (parser))
 return list;
 
+  int pos = 1, colon_pos = 0;
+  int iterator_length = 0;
+  while (c_parser_peek_nth_token_raw (parser, pos)->type == CPP_NAME)
+{
+  if (c_parser_peek_nth_token_raw (parser, pos + 1)->type
+ == CPP_OPEN_PAREN)
+   {
+ unsigned int n = pos + 2;
+ if (c_parser_check_balanced_raw_token_sequence (parser, )
+&& (c_parser_peek_nth_token_raw (parser, n)->type
+== CPP_CLOSE_PAREN))
+   {
+ iterator_length = n - pos + 1;
+ pos = n;
+   }
+   }
+  if (c_parser_peek_nth_token_raw (parser, pos + 1)->type == CPP_COMMA)
+   pos += 2;
+  else
+   pos++;
+  if (c_parser_peek_nth_token_raw (parser, pos)->type == CPP_COLON)
+   {
+ colon_pos = pos;
+ break;
+   }
+}
+
   bool present = false;
-  c_token *token = c_parser_peek_token (parser);
+  tree iterators = NULL_TREE;
 
-  if (token->type == CPP_NAME
-  && strcmp (IDENTIFIER_POINTER (token->value), "present") == 0
-  && c_parser_peek_2nd_token (parser)-&g

[PATCH 2/3] openmp: Add support for iterators in map clauses (C/C++)

2024-05-24 Thread Kwok Cheung Yeung
This patch modifies the C and C++ parsers to accept an iterator as a map 
type modifier, encoded in the same way as the depend and affinity 
clauses. When finishing the clauses, clauses with iterators are treated 
separately from ones without to avoid clashes (e.g. iterating over x[i] 
will likely generate clauses to map x).


During gimplification, gimplify_omp_map_iterators is called during 
scanning if a map clause encountered has any iterators. This scans all 
the remaining clauses in one go, as iterators may be shared between 
clauses. Later clauses with iterators are simply skipped over.


For each map clause with an iterator, gimplify_omp_map_iterators 
generates a loop (or multiple loops, if the iterator is 
multidimensional) to iterate over the iterator expression, storing the 
result in a new array (constant-sized for now, we could dynamically 
allocate the array for non-constant iteration bounds). The data array 
stores the total number of iterations in the first element, then the 
address generated by the iterator expression and the OMP_CLAUSE_SIZE 
(since the iteration variables may occur within the size tree) for each 
iteration. The clause is then rewritten to point to the new array. The 
original clause decl is no longer directly relevant, but is kept around 
for informational purposes and to help with clause sorting. The original 
OMP_CLAUSE_SIZE is set to NULL_TREE.


When OMP lowering clauses with iterators, the data array holding the 
expanded iterator info is allocated to a field in the omp_data, and the 
size is set to SIZE_MAX to mark the entry as coming from an expanded 
iterator.


Libgomp has a new function gomp_merge_iterator_maps which identifies 
data coming from an iterator, and effectively creates new maps 
on-the-fly from the iterator info array, inserting them into the list of 
mappings at the point where iterator data occurred.From b2e8ff46929d5a2781781486ec942b344056d78b Mon Sep 17 00:00:00 2001
From: Kwok Cheung Yeung 
Date: Tue, 12 Mar 2024 22:51:06 +
Subject: [PATCH 2/3] openmp: Add support for iterators in map clauses (C/C++)

This adds preliminary support for iterators in map clauses within OpenMP
'target' constructs (which includes constructs such as 'target enter data').

Iterators with non-constant loop bounds are not currently supported.

2024-05-24  Kwok Cheung Yeung  

gcc/c/
* c-parser.cc (c_parser_omp_clause_map): Parse 'iterator' modifier.
* c-typeck.cc (c_finish_omp_clauses): Call recursively on iterator
clauses.

gcc/cp/
* parser.cc (cp_parser_omp_clause_map): Parse 'iterator' modifier.
* semantics.cc (finish_omp_clauses): Call recursively on iterator
clauses.

gcc/
* gimplify.cc (find_var_decl): New.
(check_iterator_var_usage): New.
(gimplify_omp_map_iterators): New.
(omp_group_iterator): New.
(omp_get_attachment): Replace OMP_CLAUSE_DECL with
OMP_ITERATOR_CLAUSE_DECL.
(omp_group_last): Keep decls with and without iterators in separate
groups.
(omp_index_mapping_groups_1): Replace OMP_CLAUSE_DECL with
OMP_ITERATOR_CLAUSE_DECL.
(omp_tsort_mapping_groups_1): Likewise.
(omp_resolve_clause_dependencies): Likewise.  Prevent removal of
mapping if groups do not use the same iterators.
(omp_build_struct_sibling_lists): Replace OMP_CLAUSE_DECL with
OMP_ITERATOR_CLAUSE_DECL.
(gimplify_scan_omp_clauses): Call gimplify_omp_map_iterators once to
handle clauses with iterators, then skip subsequent iterator clauses.
* omp-low.cc (scan_sharing_clauses): Add field for iterator clauses.
(lower_omp_target): Add map entries for iterator clauses.
* tree-pretty-print.cc (dump_omp_map_iterators): New.
(dump_omp_clause): Call dump_omp_map_iterators for iterators in map
clauses.
* tree.h (OMP_ITERATOR_CLAUSE_DECL): New.

gcc/testsuite/
* c-c++-common/gomp/map-6.c (foo): Amend expected error message.
* c-c++-common/gomp/target-iterator-1.c: New.
* c-c++-common/gomp/target-iterator-2.c: New.
* c-c++-common/gomp/target-iterator-3.c: New.

libgomp/
* target.c (gomp_merge_iterator_maps): New.
(gomp_map_vars_internal): Call gomp_merge_iterator_maps.  Free
allocated variables.
* testsuite/libgomp.c-c++-common/target-map-iterators-1.c: New.
* testsuite/libgomp.c-c++-common/target-map-iterators-2.c: New.
* testsuite/libgomp.c-c++-common/target-map-iterators-3.c: New.
---
 gcc/c/c-parser.cc |  60 -
 gcc/c/c-typeck.cc |  68 ++
 gcc/cp/parser.cc  |  64 -
 gcc/cp/semantics.cc   |  65 ++
 gcc/gimplify.cc   | 220 +-
 gcc/omp-low.cc|  52 -
 gcc

[PATCH 1/3] openmp: Refactor handling of iterators

2024-05-24 Thread Kwok Cheung Yeung
This patch factors out the code to calculate the number of iterations 
required and to generate the iteration loop into separate functions from 
gimplify_omp_depend for reuse later.


I have also replaced the 'TREE_CODE (*tp) == TREE_LIST && ...' checks 
used for detecting an iterator clause with a macro OMP_ITERATOR_DECL_P, 
as it needs to be done frequently.From 0439fce03c2b5fb2802eaf65831e28f548ca074b Mon Sep 17 00:00:00 2001
From: Kwok Cheung Yeung 
Date: Tue, 12 Mar 2024 20:51:38 +
Subject: [PATCH 1/3] openmp: Refactor handling of iterators

Move code to calculate the iteration size and to generate the iterator
expansion loop into separate functions.

Use OMP_ITERATOR_DECL_P to check for iterators in clause declarations.

2024-05-24  Kwok Cheung Yeung  

gcc/c-family/
* c-omp.cc (c_finish_omp_depobj): Use OMP_ITERATOR_DECL_P.

gcc/c/
* c-typeck.cc (handle_omp_array_sections): Use OMP_ITERATOR_DECL_P.
(c_finish_omp_clauses): Likewise.

gcc/cp/
* pt.cc (tsubst_omp_clause_decl): Use OMP_ITERATOR_DECL_P.
* semantics.cc (handle_omp_array_sections): Likewise.
(finish_omp_clauses): Likewise.

gcc/
* gimplify.cc (gimplify_omp_affinity): Use OMP_ITERATOR_DECL_P.
(compute_iterator_count): New.
(build_iterator_loop): New.
(gimplify_omp_depend): Use OMP_ITERATOR_DECL_P, compute_iterator_count
and build_iterator_loop.
* tree-inline.cc (copy_tree_body_r): Use OMP_ITERATOR_DECL_P.
* tree-pretty-print.cc (dump_omp_clause): Likewise.
* tree.h (OMP_ITERATOR_DECL_P): New macro.
---
 gcc/c-family/c-omp.cc|   4 +-
 gcc/c/c-typeck.cc|  13 +-
 gcc/cp/pt.cc |   4 +-
 gcc/cp/semantics.cc  |   8 +-
 gcc/gimplify.cc  | 326 +++
 gcc/tree-inline.cc   |   5 +-
 gcc/tree-pretty-print.cc |   8 +-
 gcc/tree.h   |   6 +
 8 files changed, 175 insertions(+), 199 deletions(-)

diff --git a/gcc/c-family/c-omp.cc b/gcc/c-family/c-omp.cc
index c0e02aa422f..b56e49da62c 100644
--- a/gcc/c-family/c-omp.cc
+++ b/gcc/c-family/c-omp.cc
@@ -744,9 +744,7 @@ c_finish_omp_depobj (location_t loc, tree depobj,
  kind = OMP_CLAUSE_DEPEND_KIND (clause);
  t = OMP_CLAUSE_DECL (clause);
  gcc_assert (t);
- if (TREE_CODE (t) == TREE_LIST
- && TREE_PURPOSE (t)
- && TREE_CODE (TREE_PURPOSE (t)) == TREE_VEC)
+ if (OMP_ITERATOR_DECL_P (t))
{
  error_at (OMP_CLAUSE_LOCATION (clause),
"% modifier may not be specified on "
diff --git a/gcc/c/c-typeck.cc b/gcc/c/c-typeck.cc
index 7ecca9f58c6..b0fe80cf224 100644
--- a/gcc/c/c-typeck.cc
+++ b/gcc/c/c-typeck.cc
@@ -14218,9 +14218,7 @@ handle_omp_array_sections (tree , enum 
c_omp_region_type ort)
   tree *tp = _CLAUSE_DECL (c);
   if ((OMP_CLAUSE_CODE (c) == OMP_CLAUSE_DEPEND
|| OMP_CLAUSE_CODE (c) == OMP_CLAUSE_AFFINITY)
-  && TREE_CODE (*tp) == TREE_LIST
-  && TREE_PURPOSE (*tp)
-  && TREE_CODE (TREE_PURPOSE (*tp)) == TREE_VEC)
+  && OMP_ITERATOR_DECL_P (*tp))
 tp = _VALUE (*tp);
   tree first = handle_omp_array_sections_1 (c, *tp, types,
maybe_zero_len, first_non_one,
@@ -15409,9 +15407,7 @@ c_finish_omp_clauses (tree clauses, enum 
c_omp_region_type ort)
case OMP_CLAUSE_DEPEND:
case OMP_CLAUSE_AFFINITY:
  t = OMP_CLAUSE_DECL (c);
- if (TREE_CODE (t) == TREE_LIST
- && TREE_PURPOSE (t)
- && TREE_CODE (TREE_PURPOSE (t)) == TREE_VEC)
+ if (OMP_ITERATOR_DECL_P (t))
{
  if (TREE_PURPOSE (t) != last_iterators)
last_iterators_remove
@@ -15511,10 +15507,7 @@ c_finish_omp_clauses (tree clauses, enum 
c_omp_region_type ort)
  break;
}
}
- if (TREE_CODE (OMP_CLAUSE_DECL (c)) == TREE_LIST
- && TREE_PURPOSE (OMP_CLAUSE_DECL (c))
- && (TREE_CODE (TREE_PURPOSE (OMP_CLAUSE_DECL (c)))
- == TREE_VEC))
+ if (OMP_ITERATOR_DECL_P (OMP_CLAUSE_DECL (c)))
TREE_VALUE (OMP_CLAUSE_DECL (c)) = t;
  else
OMP_CLAUSE_DECL (c) = t;
diff --git a/gcc/cp/pt.cc b/gcc/cp/pt.cc
index e77c48e463e..26db4f6e0cf 100644
--- a/gcc/cp/pt.cc
+++ b/gcc/cp/pt.cc
@@ -17520,9 +17520,7 @@ tsubst_omp_clause_decl (tree decl, tree args, 
tsubst_flags_t complain,
 return decl;
 
   /* Handle OpenMP iterators.  */
-  if (TREE_CODE (decl) == TREE_LIST
-  && TREE_PURPOSE (decl)
-  && TREE_CODE (TREE_PURPOSE (decl)) == TREE_VEC)
+  if (OMP_ITERATOR_DECL_P (decl))
 {
   tree ret;
   if (iterator_cache[0] == TREE_PURPOSE (decl))
diff --git a/gcc/

[PATCH 0/3] openmp: Add support for iterators in OpenMP mapping clauses (C/C++)

2024-05-24 Thread Kwok Cheung Yeung
This series of patches adds support for OpenMP iterators in the 'map' 
clause of the 'target' construct (and it's derivatives such as 'target 
enter data'), and the 'to' and 'from' constructs of the 'target update' 
construct, currently for C and C++ only.


The approach in this patch differs from Tobias' WFC patch 
(https://gcc.gnu.org/pipermail/gcc-patches/2021-December/586237.html) in 
that it does not rely on generating a callback function - instead, 
during Gimplification it generates loop(s) to evaluate every iteration 
of the iterator expression, and the results (i.e. addresses, as the 
expression should be an lvalue) are placed into a new array. This array 
is then used as the 'hostaddrs' entry for that particular map. Libgomp 
detects this (the corresponding size entry is set to SIZE_MAX, which 
shouldn't normally occur) and inserts the contents of the array into the 
map information before continuing on as normal.


Caveats:

- In section 2.21.7.1 of the OpenMP 5.1 standard, it states that 'If an 
expression that is used to form a list item in a map clause contains an 
iterator identifier, the list item instances that would result from 
different values of the iterator must not have the same containing array 
and must not have base pointers that share original storage' - this is 
currently not enforced (it would prohibit something like map 
(iterator(i=0:10), to: x[i]) while x is an int[]). As the expression in 
the iterator is more-or-less unbound, it would be very difficult to 
determine this at compile time. At runtime in libgomp, I suppose we 
could check every iterator-derived mapping to ensure that they all 
access unique entries in mem_map?


- The clause finishing currently generates spurious firstprivate maps - 
the patch currently just ignores them when in iterator clauses, but is 
there a better way of doing this?


- Clause reordering does not work too well with iterators. I believe the 
current approach to reordering on trunk is a bit buggy in the first 
place, so I just added enough to get the clauses through the pass 
without ICEing.


The GCC gomp tests and all the libgomp tests have been run without 
regressions on an x86-64 host with NVPTX offloading. Testing on AMD GCN 
to follow.


Kwok


[COMMITTED] libgomp: Update documentation for indirect calls in target regions

2024-02-15 Thread Kwok Cheung Yeung

Hi,

I have committed this patch to the libgomp documentation to reflect that 
indirect calls in offloaded target regions are now supported in C, C++ 
and Fortran.


KwokFrom b3b3bd250f0a7c22b7d46d3522c8b94c6a35d22a Mon Sep 17 00:00:00 2001
From: Kwok Cheung Yeung 
Date: Thu, 15 Feb 2024 21:22:26 +
Subject: [PATCH] libgomp: Update documentation for indirect calls in target
 regions

Support for indirect calls to procedures/functions in offloaded target
regions is now available for C, C++ and Fortran.

2024-02-15  Kwok Cheung Yeung  

libgomp/
* libgomp.texi (OpenMP 5.1): Mark indirect call support as fully
implemented.
---
 libgomp/libgomp.texi | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/libgomp/libgomp.texi b/libgomp/libgomp.texi
index 6ee923099b7..f57190f203c 100644
--- a/libgomp/libgomp.texi
+++ b/libgomp/libgomp.texi
@@ -313,7 +313,7 @@ The OpenMP 4.5 specification is fully supported.
 @item Iterators in @code{target update} motion clauses and @code{map}
   clauses @tab N @tab
 @item Indirect calls to the device version of a procedure or function in
-  @code{target} regions @tab P @tab Only C and C++
+  @code{target} regions @tab Y @tab
 @item @code{interop} directive @tab N @tab
 @item @code{omp_interop_t} object support in runtime routines @tab N @tab
 @item @code{nowait} clause in @code{taskwait} directive @tab Y @tab
@@ -362,7 +362,7 @@ to address of matching mapped list item per 5.1, Sect. 
2.21.7.2 @tab N @tab
 @item For Fortran, diagnose placing declarative before/between @code{USE},
   @code{IMPORT}, and @code{IMPLICIT} as invalid @tab N @tab
 @item Optional comma between directive and clause in the @code{#pragma} form 
@tab Y @tab
-@item @code{indirect} clause in @code{declare target} @tab P @tab Only C and 
C++
+@item @code{indirect} clause in @code{declare target} @tab Y @tab
 @item @code{device_type(nohost)}/@code{device_type(host)} for variables @tab N 
@tab
 @item @code{present} modifier to the @code{map}, @code{to} and @code{from}
   clauses @tab Y @tab
-- 
2.34.1



Re: [PATCH v2] openmp, fortran: Add Fortran support for indirect clause on the declare target directive

2024-02-06 Thread Kwok Cheung Yeung
Oops. I thought exactly the same thing yesterday, but forgot to add the 
changes to my commit! Here is the updated version.


Kwok

On 06/02/2024 9:03 am, Tobias Burnus wrote:
LGTM. I just wonder whether there should be a value test and not just a 
does-not-crash-when-called test for the latter testcase, i.e.




+++ b/libgomp/testsuite/libgomp.fortran/declare-target-indirect-3.f90
@@ -0,0 +1,25 @@
+! { dg-do run }
+
+! Check that indirect calls work on procedures passed in via a dummy argument
+
+module m
+contains
+  subroutine bar
+!$omp declare target enter(bar) indirect

e.g. "integer function bar()" ... " bar = 42"

+  end subroutine
+
+  subroutine foo(f)
+procedure(bar) :: f
+
+!$omp target
+  call f

And then: if (f() /= 42) stop 1

+!$omp end target
+  end subroutine
+end module


Thanks,

Tobias
From 83b734aa63aa63ea5bb438bb59ee09b00869e0fd Mon Sep 17 00:00:00 2001
From: Kwok Cheung Yeung 
Date: Mon, 5 Feb 2024 20:31:49 +
Subject: [PATCH] openmp, fortran: Add Fortran support for indirect clause on
 the declare target directive

2024-02-05  Kwok Cheung Yeung  

gcc/fortran/
* dump-parse-tree.cc (show_attr): Handle omp_declare_target_indirect
attribute.
* f95-lang.cc (gfc_gnu_attributes): Add entry for 'omp declare
target indirect'.
* gfortran.h (symbol_attribute): Add omp_declare_target_indirect
field.
(struct gfc_omp_clauses): Add indirect field.
* openmp.cc (omp_mask2): Add OMP_CLAUSE_INDIRECT.
(gfc_match_omp_clauses): Match indirect clause.
(OMP_DECLARE_TARGET_CLAUSES): Add OMP_CLAUSE_INDIRECT.
(gfc_match_omp_declare_target): Check omp_device_type and apply
omp_declare_target_indirect attribute to symbol if indirect clause
active.  Show warning if there are only device_type and/or indirect
clauses on the directive.
* trans-decl.cc (add_attributes_to_decl): Add 'omp declare target
indirect' attribute if symbol has indirect attribute set.

gcc/testsuite/
* gfortran.dg/gomp/declare-target-4.f90 (f1): Update expected warning.
* gfortran.dg/gomp/declare-target-indirect-1.f90: New.
* gfortran.dg/gomp/declare-target-indirect-2.f90: New.

libgomp/
* testsuite/libgomp.fortran/declare-target-indirect-1.f90: New.
* testsuite/libgomp.fortran/declare-target-indirect-2.f90: New.
* testsuite/libgomp.fortran/declare-target-indirect-3.f90: New.
---
 gcc/fortran/dump-parse-tree.cc|  2 +
 gcc/fortran/f95-lang.cc   |  2 +
 gcc/fortran/gfortran.h|  3 +-
 gcc/fortran/openmp.cc | 50 ++-
 gcc/fortran/trans-decl.cc |  4 ++
 .../gfortran.dg/gomp/declare-target-4.f90 |  2 +-
 .../gomp/declare-target-indirect-1.f90| 62 +++
 .../gomp/declare-target-indirect-2.f90| 25 
 .../declare-target-indirect-1.f90 | 39 
 .../declare-target-indirect-2.f90 | 53 
 .../declare-target-indirect-3.f90 | 35 +++
 11 files changed, 272 insertions(+), 5 deletions(-)
 create mode 100644 gcc/testsuite/gfortran.dg/gomp/declare-target-indirect-1.f90
 create mode 100644 gcc/testsuite/gfortran.dg/gomp/declare-target-indirect-2.f90
 create mode 100644 
libgomp/testsuite/libgomp.fortran/declare-target-indirect-1.f90
 create mode 100644 
libgomp/testsuite/libgomp.fortran/declare-target-indirect-2.f90
 create mode 100644 
libgomp/testsuite/libgomp.fortran/declare-target-indirect-3.f90

diff --git a/gcc/fortran/dump-parse-tree.cc b/gcc/fortran/dump-parse-tree.cc
index 1563b810b98..7b154eb3ca7 100644
--- a/gcc/fortran/dump-parse-tree.cc
+++ b/gcc/fortran/dump-parse-tree.cc
@@ -914,6 +914,8 @@ show_attr (symbol_attribute *attr, const char * module)
 fputs (" OMP-DECLARE-TARGET", dumpfile);
   if (attr->omp_declare_target_link)
 fputs (" OMP-DECLARE-TARGET-LINK", dumpfile);
+  if (attr->omp_declare_target_indirect)
+fputs (" OMP-DECLARE-TARGET-INDIRECT", dumpfile);
   if (attr->elemental)
 fputs (" ELEMENTAL", dumpfile);
   if (attr->pure)
diff --git a/gcc/fortran/f95-lang.cc b/gcc/fortran/f95-lang.cc
index 358cb17fce2..67fda27aa3e 100644
--- a/gcc/fortran/f95-lang.cc
+++ b/gcc/fortran/f95-lang.cc
@@ -96,6 +96,8 @@ static const attribute_spec gfc_gnu_attributes[] =
 gfc_handle_omp_declare_target_attribute, NULL },
   { "omp declare target link", 0, 0, true,  false, false, false,
 gfc_handle_omp_declare_target_attribute, NULL },
+  { "omp declare target indirect", 0, 0, true,  false, false, false,
+gfc_handle_omp_declare_target_attribute, NULL },
   { "oacc function", 0, -1, true,  false, false, false,
 gfc_handle_omp_declare_target_attribute, NULL },
 };
diff --git a/g

[PATCH v2] openmp, fortran: Add Fortran support for indirect clause on the declare target directive

2024-02-05 Thread Kwok Cheung Yeung

Hi

As previously discussed, this version of the patch adds code to emit a 
warning when a directive like this:


!$omp declare target indirect(.true.)

is encountered (i.e. a target directive containing at least one clause, 
but no to/enter clause, which appears to violate the OpenMP standard). A 
test is also added to gfortran.dg/gomp/declare-target-indirect-1.f90 to 
test for this.


I have also added a declare-target-indirect-3.f90 test to libgomp to 
check that procedures passed via a dummy argument work properly when 
used in an indirect call.


Okay for mainline?

Thanks

KwokFrom f6662a7bc76d400fecb5013ad6d6ab3b00b8a6e7 Mon Sep 17 00:00:00 2001
From: Kwok Cheung Yeung 
Date: Mon, 5 Feb 2024 20:31:49 +
Subject: [PATCH] openmp, fortran: Add Fortran support for indirect clause on
 the declare target directive

2024-02-05  Kwok Cheung Yeung  

gcc/fortran/
* dump-parse-tree.cc (show_attr): Handle omp_declare_target_indirect
attribute.
* f95-lang.cc (gfc_gnu_attributes): Add entry for 'omp declare
target indirect'.
* gfortran.h (symbol_attribute): Add omp_declare_target_indirect
field.
(struct gfc_omp_clauses): Add indirect field.
* openmp.cc (omp_mask2): Add OMP_CLAUSE_INDIRECT.
(gfc_match_omp_clauses): Match indirect clause.
(OMP_DECLARE_TARGET_CLAUSES): Add OMP_CLAUSE_INDIRECT.
(gfc_match_omp_declare_target): Check omp_device_type and apply
omp_declare_target_indirect attribute to symbol if indirect clause
active.  Show warning if there are only device_type and/or indirect
clauses on the directive.
* trans-decl.cc (add_attributes_to_decl): Add 'omp declare target
indirect' attribute if symbol has indirect attribute set.

gcc/testsuite/
* gfortran.dg/gomp/declare-target-4.f90 (f1): Update expected warning.
* gfortran.dg/gomp/declare-target-indirect-1.f90: New.
* gfortran.dg/gomp/declare-target-indirect-2.f90: New.

libgomp/
* testsuite/libgomp.fortran/declare-target-indirect-1.f90: New.
* testsuite/libgomp.fortran/declare-target-indirect-2.f90: New.
* testsuite/libgomp.fortran/declare-target-indirect-3.f90: New.
---
 gcc/fortran/dump-parse-tree.cc|  2 +
 gcc/fortran/f95-lang.cc   |  2 +
 gcc/fortran/gfortran.h|  3 +-
 gcc/fortran/openmp.cc | 50 ++-
 gcc/fortran/trans-decl.cc |  4 ++
 .../gfortran.dg/gomp/declare-target-4.f90 |  2 +-
 .../gomp/declare-target-indirect-1.f90| 62 +++
 .../gomp/declare-target-indirect-2.f90| 25 
 .../declare-target-indirect-1.f90 | 39 
 .../declare-target-indirect-2.f90 | 53 
 .../declare-target-indirect-3.f90 | 25 
 11 files changed, 262 insertions(+), 5 deletions(-)
 create mode 100644 gcc/testsuite/gfortran.dg/gomp/declare-target-indirect-1.f90
 create mode 100644 gcc/testsuite/gfortran.dg/gomp/declare-target-indirect-2.f90
 create mode 100644 
libgomp/testsuite/libgomp.fortran/declare-target-indirect-1.f90
 create mode 100644 
libgomp/testsuite/libgomp.fortran/declare-target-indirect-2.f90
 create mode 100644 
libgomp/testsuite/libgomp.fortran/declare-target-indirect-3.f90

diff --git a/gcc/fortran/dump-parse-tree.cc b/gcc/fortran/dump-parse-tree.cc
index 1563b810b98..7b154eb3ca7 100644
--- a/gcc/fortran/dump-parse-tree.cc
+++ b/gcc/fortran/dump-parse-tree.cc
@@ -914,6 +914,8 @@ show_attr (symbol_attribute *attr, const char * module)
 fputs (" OMP-DECLARE-TARGET", dumpfile);
   if (attr->omp_declare_target_link)
 fputs (" OMP-DECLARE-TARGET-LINK", dumpfile);
+  if (attr->omp_declare_target_indirect)
+fputs (" OMP-DECLARE-TARGET-INDIRECT", dumpfile);
   if (attr->elemental)
 fputs (" ELEMENTAL", dumpfile);
   if (attr->pure)
diff --git a/gcc/fortran/f95-lang.cc b/gcc/fortran/f95-lang.cc
index 358cb17fce2..67fda27aa3e 100644
--- a/gcc/fortran/f95-lang.cc
+++ b/gcc/fortran/f95-lang.cc
@@ -96,6 +96,8 @@ static const attribute_spec gfc_gnu_attributes[] =
 gfc_handle_omp_declare_target_attribute, NULL },
   { "omp declare target link", 0, 0, true,  false, false, false,
 gfc_handle_omp_declare_target_attribute, NULL },
+  { "omp declare target indirect", 0, 0, true,  false, false, false,
+gfc_handle_omp_declare_target_attribute, NULL },
   { "oacc function", 0, -1, true,  false, false, false,
 gfc_handle_omp_declare_target_attribute, NULL },
 };
diff --git a/gcc/fortran/gfortran.h b/gcc/fortran/gfortran.h
index fd73e4ce431..fd843a3241d 100644
--- a/gcc/fortran/gfortran.h
+++ b/gcc/fortran/gfortran.h
@@ -999,6 +999,7 @@ typedef struct
   /* Mentioned in OMP DECLARE TARGET.  */
   unsigned omp_declare_target:1;
   unsigned omp_

[PATCH v2] openmp: Change to using a hashtab to lookup offload target addresses for indirect function calls

2024-01-29 Thread Kwok Cheung Yeung

Can you please akso update the comments to talk about hashtab instead of splay?



Hello

This version has the comments updated and removes a stray 'volatile' in 
the #ifdefed out code.


Thanks

KwokFrom 5737298f4f5e5471667b05e207b22c9c91b94ca0 Mon Sep 17 00:00:00 2001
From: Kwok Cheung Yeung 
Date: Mon, 29 Jan 2024 17:40:04 +
Subject: [PATCH 1/2] openmp: Change to using a hashtab to lookup offload
 target addresses for indirect function calls

A splay-tree was previously used to lookup equivalent target addresses
for a given host address on offload targets. However, as splay-trees can
modify their structure on lookup, they are not suitable for concurrent
access from separate teams/threads without some form of locking.  This
patch changes the lookup data structure to a hashtab instead, which does
not have these issues.

The call to build_indirect_map to initialize the data structure is now
called from just the first thread of the first team to avoid redundant
calls to this function.

2024-01-29  Kwok Cheung Yeung  

libgomp/
* config/accel/target-indirect.c: Include string.h and hashtab.h.
Remove include of splay-tree.h.  Update comments.
(splay_tree_prefix, splay_tree_c): Delete.
(struct indirect_map_t): New.
(hash_entry_type, htab_alloc, htab_free, htab_hash, htab_eq): New.
(GOMP_INDIRECT_ADD_MAP): Remove volatile qualifier.
(USE_SPLAY_TREE_LOOKUP): Rename to...
(USE_HASHTAB_LOOKUP): ..this.
(indirect_map, indirect_array): Delete.
(indirect_htab): New.
(build_indirect_map): Remove locking.  Build indirect map using
hashtab.
(GOMP_target_map_indirect_ptr): Use indirect_htab to lookup target
address.
(GOMP_target_map_indirect_ptr): Remove volatile qualifier.
* config/gcn/team.c (gomp_gcn_enter_kernel): Call build_indirect_map
from first thread of first team only.
* config/nvptx/team.c (gomp_nvptx_main): Likewise.
* testsuite/libgomp.c-c++-common/declare-target-indirect-2.c (main):
Add missing break statements.
---
 libgomp/config/accel/target-indirect.c| 83 ++-
 libgomp/config/gcn/team.c |  7 +-
 libgomp/config/nvptx/team.c   |  9 +-
 .../declare-target-indirect-2.c   | 14 ++--
 4 files changed, 63 insertions(+), 50 deletions(-)

diff --git a/libgomp/config/accel/target-indirect.c 
b/libgomp/config/accel/target-indirect.c
index c60fd547cb6..cfef1ddbc49 100644
--- a/libgomp/config/accel/target-indirect.c
+++ b/libgomp/config/accel/target-indirect.c
@@ -25,60 +25,73 @@
<http://www.gnu.org/licenses/>.  */
 
 #include 
+#include 
 #include "libgomp.h"
 
-#define splay_tree_prefix indirect
-#define splay_tree_c
-#include "splay-tree.h"
+struct indirect_map_t
+{
+  void *host_addr;
+  void *target_addr;
+};
+
+typedef struct indirect_map_t *hash_entry_type;
+
+static inline void * htab_alloc (size_t size) { return gomp_malloc (size); }
+static inline void htab_free (void *ptr) { free (ptr); }
+
+#include "hashtab.h"
+
+static inline hashval_t
+htab_hash (hash_entry_type element)
+{
+  return hash_pointer (element->host_addr);
+}
 
-volatile void **GOMP_INDIRECT_ADDR_MAP = NULL;
+static inline bool
+htab_eq (hash_entry_type x, hash_entry_type y)
+{
+  return x->host_addr == y->host_addr;
+}
 
-/* Use a splay tree to lookup the target address instead of using a
-   linear search.  */
-#define USE_SPLAY_TREE_LOOKUP
+void **GOMP_INDIRECT_ADDR_MAP = NULL;
 
-#ifdef USE_SPLAY_TREE_LOOKUP
+/* Use a hashtab to lookup the target address instead of using a linear
+   search.  */
+#define USE_HASHTAB_LOOKUP
 
-static struct indirect_splay_tree_s indirect_map;
-static indirect_splay_tree_node indirect_array = NULL;
+#ifdef USE_HASHTAB_LOOKUP
 
-/* Build the splay tree used for host->target address lookups.  */
+static htab_t indirect_htab = NULL;
+
+/* Build the hashtab used for host->target address lookups.  */
 
 void
 build_indirect_map (void)
 {
   size_t num_ind_funcs = 0;
-  volatile void **map_entry;
-  static int lock = 0; /* == gomp_mutex_t lock; gomp_mutex_init (); */
+  void **map_entry;
 
   if (!GOMP_INDIRECT_ADDR_MAP)
 return;
 
-  gomp_mutex_lock ();
-
-  if (!indirect_array)
+  if (!indirect_htab)
 {
   /* Count the number of entries in the NULL-terminated address map.  */
   for (map_entry = GOMP_INDIRECT_ADDR_MAP; *map_entry;
   map_entry += 2, num_ind_funcs++);
 
-  /* Build splay tree for address lookup.  */
-  indirect_array = gomp_malloc (num_ind_funcs * sizeof (*indirect_array));
-  indirect_splay_tree_node array = indirect_array;
+  /* Build hashtab for address lookup.  */
+  indirect_htab = htab_create (num_ind_funcs);
   map_entry = GOMP_INDIRECT_ADDR_MAP;
 
-  for (int i = 0; i < num_ind_funcs; i++, array++)
+  

[committed] MAINTAINERS: Update my email address

2024-01-29 Thread Kwok Cheung Yeung
I have committed this to update my work email address in MAINTAINERS 
(but forgot to change my git user.mail first - oops!).


Thanks

Kwok Yeung
From f3fdaa3eecd155dbdc78c1ec9a259dfa4e379ea4 Mon Sep 17 00:00:00 2001
From: Kwok Cheung Yeung 
Date: Mon, 29 Jan 2024 16:40:49 +
Subject: [PATCH] MAINTAINERS: Update my work email address

* MAINTAINERS: Update my work email address.
---
 MAINTAINERS | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/MAINTAINERS b/MAINTAINERS
index 8b11ddbc069..9d92be1f301 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -730,7 +730,7 @@ Canqun Yang 

 Fei Yang   
 Jeffrey Yasskin
 Joey Ye
-Kwok Cheung Yeung  
+Kwok Cheung Yeung  
 Greta Yorsh
 David Yuste
 Adhemerval Zanella 
-- 
2.34.1



[PATCH] openmp, fortran: Add Fortran support for indirect clause on the declare target directive

2024-01-22 Thread Kwok Cheung Yeung

Hi

This patch adds support for the indirect clause on the OpenMP 'declare 
target' directive in Fortran. As with the C and C++ front-ends, this 
applies the 'omp declare target indirect' attribute on affected function 
declarations. The C test cases have also been translated to Fortran 
where appropriate.


Okay for mainline?

Thanks

KwokFrom 545bdb2c8ab9a43e79c7a3a2992bd9edc7d08a6f Mon Sep 17 00:00:00 2001
From: Kwok Cheung Yeung 
Date: Thu, 11 Jan 2024 19:52:53 +
Subject: [PATCH 2/2] openmp, fortran: Add Fortran support for indirect clause
 on the declare target directive

2024-01-19  Kwok Cheung Yeung  

gcc/fortran/
* dump-parse-tree.cc (show_attr): Handle omp_declare_target_indirect
attribute.
* f95-lang.cc (gfc_gnu_attributes): Add entry for 'omp declare
target indirect'.
* gfortran.h (symbol_attribute): Add omp_declare_target_indirect
field.
(struct gfc_omp_clauses): Add indirect field.
* openmp.cc (omp_mask2): Add OMP_CLAUSE_INDIRECT.
(gfc_match_omp_clauses): Match indirect clause.
(OMP_DECLARE_TARGET_CLAUSES): Add OMP_CLAUSE_INDIRECT.
(gfc_match_omp_declare_target): Check omp_device_type and apply
omp_declare_target_indirect attribute to symbol if indirect clause
active.
* trans-decl.cc (add_attributes_to_decl): Add 'omp declare target
indirect' attribute if symbol has indirect attribute set.

gcc/testsuite/
* gfortran.dg/gomp/declare-target-indirect-1.f90: New.
* gfortran.dg/gomp/declare-target-indirect-2.f90: New.

libgomp/
* testsuite/libgomp.fortran/declare-target-indirect-1.f90: New.
* testsuite/libgomp.fortran/declare-target-indirect-2.f90: New.
---
 gcc/fortran/dump-parse-tree.cc|  2 +
 gcc/fortran/f95-lang.cc   |  2 +
 gcc/fortran/gfortran.h|  3 +-
 gcc/fortran/openmp.cc | 45 +-
 gcc/fortran/trans-decl.cc |  4 ++
 .../gomp/declare-target-indirect-1.f90| 58 +++
 .../gomp/declare-target-indirect-2.f90| 25 
 .../declare-target-indirect-1.f90 | 39 +
 .../declare-target-indirect-2.f90 | 53 +
 9 files changed, 229 insertions(+), 2 deletions(-)
 create mode 100644 gcc/testsuite/gfortran.dg/gomp/declare-target-indirect-1.f90
 create mode 100644 gcc/testsuite/gfortran.dg/gomp/declare-target-indirect-2.f90
 create mode 100644 
libgomp/testsuite/libgomp.fortran/declare-target-indirect-1.f90
 create mode 100644 
libgomp/testsuite/libgomp.fortran/declare-target-indirect-2.f90

diff --git a/gcc/fortran/dump-parse-tree.cc b/gcc/fortran/dump-parse-tree.cc
index 1563b810b98..7b154eb3ca7 100644
--- a/gcc/fortran/dump-parse-tree.cc
+++ b/gcc/fortran/dump-parse-tree.cc
@@ -914,6 +914,8 @@ show_attr (symbol_attribute *attr, const char * module)
 fputs (" OMP-DECLARE-TARGET", dumpfile);
   if (attr->omp_declare_target_link)
 fputs (" OMP-DECLARE-TARGET-LINK", dumpfile);
+  if (attr->omp_declare_target_indirect)
+fputs (" OMP-DECLARE-TARGET-INDIRECT", dumpfile);
   if (attr->elemental)
 fputs (" ELEMENTAL", dumpfile);
   if (attr->pure)
diff --git a/gcc/fortran/f95-lang.cc b/gcc/fortran/f95-lang.cc
index 358cb17fce2..67fda27aa3e 100644
--- a/gcc/fortran/f95-lang.cc
+++ b/gcc/fortran/f95-lang.cc
@@ -96,6 +96,8 @@ static const attribute_spec gfc_gnu_attributes[] =
 gfc_handle_omp_declare_target_attribute, NULL },
   { "omp declare target link", 0, 0, true,  false, false, false,
 gfc_handle_omp_declare_target_attribute, NULL },
+  { "omp declare target indirect", 0, 0, true,  false, false, false,
+gfc_handle_omp_declare_target_attribute, NULL },
   { "oacc function", 0, -1, true,  false, false, false,
 gfc_handle_omp_declare_target_attribute, NULL },
 };
diff --git a/gcc/fortran/gfortran.h b/gcc/fortran/gfortran.h
index fd73e4ce431..fd843a3241d 100644
--- a/gcc/fortran/gfortran.h
+++ b/gcc/fortran/gfortran.h
@@ -999,6 +999,7 @@ typedef struct
   /* Mentioned in OMP DECLARE TARGET.  */
   unsigned omp_declare_target:1;
   unsigned omp_declare_target_link:1;
+  unsigned omp_declare_target_indirect:1;
   ENUM_BITFIELD (gfc_omp_device_type) omp_device_type:2;
   unsigned omp_allocate:1;
 
@@ -1584,7 +1585,7 @@ typedef struct gfc_omp_clauses
   unsigned grainsize_strict:1, num_tasks_strict:1, compare:1, weak:1;
   unsigned non_rectangular:1, order_concurrent:1;
   unsigned contains_teams_construct:1, target_first_st_is_teams:1;
-  unsigned contained_in_target_construct:1;
+  unsigned contained_in_target_construct:1, indirect:1;
   ENUM_BITFIELD (gfc_omp_sched_kind) sched_kind:3;
   ENUM_BITFIELD (gfc_omp_device_type) device_type:2;
   ENUM_BITFIELD (gfc_omp_memorder) memorder:3;
diff --git a/gcc/fortran/openmp.cc b/gcc/f

[PATCH] openmp: Change to using a hashtab to lookup offload target addresses for indirect function calls

2024-01-22 Thread Kwok Cheung Yeung

Hi

There was a bug in the declare-target-indirect-2.c libgomp testcase 
(testing indirect calls in offloaded target regions, spread over 
multiple teams/threads) that due to an errant fallthrough in a switch 
statement resulted in only one indirect function ever getting called:


switch (i % 3)
  {
case 0: fn_ptr[i] =   // Missing break
case 1: fn_ptr[i] =   // Missing break
case 2: fn_ptr[i] = 
  }

However, when the missing break statements are added, the testcase fails 
with an invalid memory access. Upon investigation, this is due to the 
use of a splay-tree as the lookup structure for indirect addresses, as 
the splay-tree moves frequently accessed elements closer to the root 
node and so needs locking when used from multiple threads. However, this 
would end up partially serialising all the threads and kill performance. 
I have switched the lookup structure from a splay tree to a hashtab 
instead to avoid locking during lookup.


I have also tidied up the initialisation of the lookup table by calling 
it only from the first thread of the first team, instead of redundantly 
calling it from every thread and only having the first one reached do 
the initialisation. This removes the need for locking during initialisation.


Tested with offloading to NVPTX and GCN with a x86_64 host. Okay for master?

Thanks

KwokFrom 721ec33bec2fddc7ee37e227358e36fec923f8da Mon Sep 17 00:00:00 2001
From: Kwok Cheung Yeung 
Date: Wed, 17 Jan 2024 16:53:40 +
Subject: [PATCH 1/2] openmp: Change to using a hashtab to lookup offload
 target addresses for indirect function calls

A splay-tree was previously used to lookup equivalent target addresses
for a given host address on offload targets. However, as splay-trees can
modify their structure on lookup, they are not suitable for concurrent
access from separate teams/threads without some form of locking.  This
patch changes the lookup data structure to a hashtab instead, which does
not have these issues.

The call to build_indirect_map to initialize the data structure is now
called from just the first thread of the first team to avoid redundant
calls to this function.

2024-01-19  Kwok Cheung Yeung  

libgomp/
* config/accel/target-indirect.c: Include string.h and hashtab.h.
Remove include of splay-tree.h.
(splay_tree_prefix, splay_tree_c): Delete.
(struct indirect_map_t): New.
(hash_entry_type, htab_alloc, htab_free, htab_hash, htab_eq): New.
(GOMP_INDIRECT_ADD_MAP): Remove volatile qualifier.
(USE_SPLAY_TREE_LOOKUP): Rename to...
(USE_HASHTAB_LOOKUP): ..this.
(indirect_map, indirect_array): Delete.
(indirect_htab): New.
(build_indirect_map): Remove locking.  Build indirect map using
hashtab.
(GOMP_target_map_indirect_ptr): Use indirect_htab to lookup target
address.
* config/gcn/team.c (gomp_gcn_enter_kernel): Call build_indirect_map
from first thread of first team only.
* config/nvptx/team.c (gomp_nvptx_main): Likewise.
* testsuite/libgomp.c-c++-common/declare-target-indirect-2.c (main):
Add missing break statements.
---
 libgomp/config/accel/target-indirect.c| 75 +++
 libgomp/config/gcn/team.c |  7 +-
 libgomp/config/nvptx/team.c   |  9 ++-
 .../declare-target-indirect-2.c   | 14 ++--
 4 files changed, 59 insertions(+), 46 deletions(-)

diff --git a/libgomp/config/accel/target-indirect.c 
b/libgomp/config/accel/target-indirect.c
index c60fd547cb6..6dad85076d6 100644
--- a/libgomp/config/accel/target-indirect.c
+++ b/libgomp/config/accel/target-indirect.c
@@ -25,22 +25,43 @@
<http://www.gnu.org/licenses/>.  */
 
 #include 
+#include 
 #include "libgomp.h"
 
-#define splay_tree_prefix indirect
-#define splay_tree_c
-#include "splay-tree.h"
+struct indirect_map_t
+{
+  void *host_addr;
+  void *target_addr;
+};
+
+typedef struct indirect_map_t *hash_entry_type;
+
+static inline void * htab_alloc (size_t size) { return gomp_malloc (size); }
+static inline void htab_free (void *ptr) { free (ptr); }
+
+#include "hashtab.h"
+
+static inline hashval_t
+htab_hash (hash_entry_type element)
+{
+  return hash_pointer (element->host_addr);
+}
 
-volatile void **GOMP_INDIRECT_ADDR_MAP = NULL;
+static inline bool
+htab_eq (hash_entry_type x, hash_entry_type y)
+{
+  return x->host_addr == y->host_addr;
+}
+
+void **GOMP_INDIRECT_ADDR_MAP = NULL;
 
 /* Use a splay tree to lookup the target address instead of using a
linear search.  */
-#define USE_SPLAY_TREE_LOOKUP
+#define USE_HASHTAB_LOOKUP
 
-#ifdef USE_SPLAY_TREE_LOOKUP
+#ifdef USE_HASHTAB_LOOKUP
 
-static struct indirect_splay_tree_s indirect_map;
-static indirect_splay_tree_node indirect_array = NULL;
+static htab_t indirect_htab = NULL;
 
 /* Build the splay tree used for host->target address lookups.  */
 
@@ -48,37 +69,29

[committed] Re: [PATCH] openmp: Add support for the 'indirect' clause in C/C++

2024-01-03 Thread Kwok Cheung Yeung

On 09/11/2023 12:24 pm, Thomas Schwinge wrote:

--- a/gcc/tree-core.h
+++ b/gcc/tree-core.h
@@ -350,6 +350,9 @@ enum omp_clause_code {
/* OpenMP clause: doacross ({source,sink}:vec).  */
OMP_CLAUSE_DOACROSS,

+  /* OpenMP clause: indirect [(constant-integer-expression)].  */
+  OMP_CLAUSE_INDIRECT,
+
/* Internal structure to hold OpenACC cache directive's variable-list.
   #pragma acc cache (variable-list).  */
OMP_CLAUSE__CACHE_,


In this position here, isn't 'OMP_CLAUSE_INDIRECT' applicable to the
'OMP_CLAUSE_RANGE_CHECK' in 'gcc/tree.h:OMP_CLAUSE_SIZE' and
'gcc/tree.h:OMP_CLAUSE_DECL':

 #define OMP_CLAUSE_SIZE(NODE)  
 \
   OMP_CLAUSE_OPERAND (OMP_CLAUSE_RANGE_CHECK (OMP_CLAUSE_CHECK (NODE), 
 \
   OMP_CLAUSE_FROM,  \
   OMP_CLAUSE__CACHE_), 1)

 #define OMP_CLAUSE_DECL(NODE)   \
   OMP_CLAUSE_OPERAND (OMP_CLAUSE_RANGE_CHECK (OMP_CLAUSE_CHECK (NODE), 
 \
   OMP_CLAUSE_PRIVATE,   \
   OMP_CLAUSE__SCANTEMP_), 0)

That's probably not intentional?  In that case, maybe simply move it at
the end of the clause list?  (..., and generally then match that ordering
in any 'switch'es, as applicable, and likewise position
'gcc/tree.h:OMP_CLAUSE_INDIRECT_EXPR' correspondingly.)


I have moved OMP_CLAUSE_INDIRECT to just before OMP_CLAUSE__SIMDUID_ so 
that it is outside the range checked by OMP_CLAUSE_SIZE and 
OMP_CLAUSE_DECL. I have also moved its handling in 
c(p)_parser_omp_clause_name so that the alphabetical ordering is 
preserved. Committed as trivial.



I would've assumed handling for 'OMP_CLAUSE_INDIRECT' to also be
necessary in the following places:

   - 'gcc/c-family/c-omp.cc:c_omp_split_clauses'
   - 'gcc/cp/pt.cc:tsubst_omp_clauses',
   - 'gcc/gimplify.cc:gimplify_scan_omp_clauses',
 'gcc/gimplify.cc:gimplify_adjust_omp_clauses'
   - 'gcc/omp-low.cc:scan_sharing_clauses' (twice)
   - 'gcc/tree-nested.cc:convert_nonlocal_omp_clauses',
 'gcc/tree-nested.cc:convert_local_omp_clauses'
   - 'gcc/tree-pretty-print.cc:dump_omp_clause'

Please verify, and add handling as well as test cases as necessary, or,
as applicable, put 'case OMP_CLAUSE_INDIRECT:' next to
'default: gcc_unreachable ();' etc., if indeed that clause is not
expected there.


As Tobias noted, OMP_CLAUSE_INDIRECT never makes it into the middle-end. 
It may be generated by c(p)_parser_omp_all_clauses, and if present an 
attribute is applied to the function declaration, but at no point is it 
directly incorporated into the tree structure. I'm not sure whether it 
is best to explicitly list such cases as gcc_unreachable (it might imply 
that it can reach the ME, but just not at that particular point?) or not 
though.


KwokFrom a56a693a74dd3bee71b1266b09dbd753694ace94 Mon Sep 17 00:00:00 2001
From: Kwok Cheung Yeung 
Date: Wed, 3 Jan 2024 14:34:39 +
Subject: [PATCH] openmp: Adjust position of OMP_CLAUSE_INDIRECT in OpenMP
 clauses

Move OMP_CLAUSE_INDIRECT so that it is outside of the range checked by
OMP_CLAUSE_SIZE and OMP_CLAUSE_DECL.

2024-01-03  Kwok Cheung Yeung  

gcc/c/
* c-parser.cc (c_parser_omp_clause_name): Move handling of indirect
clause to correspond to alphabetical order.

gcc/cp/
* parser.cc (cp_parser_omp_clause_name): Move handling of indirect
clause to correspond to alphabetical order.

gcc/
* tree-core.h (enum omp_clause_code): Move OMP_CLAUSE_INDIRECT to before
OMP_CLAUSE__SIMDUID_.
* tree.cc (omp_clause_num_ops): Update position of entry for
OMP_CLAUSE_INDIRECT to correspond with omp_clause_code.
(omp_clause_code_name): Likewise.
---
 gcc/c/c-parser.cc | 4 ++--
 gcc/cp/parser.cc  | 4 ++--
 gcc/tree-core.h   | 6 +++---
 gcc/tree.cc   | 4 ++--
 4 files changed, 9 insertions(+), 9 deletions(-)

diff --git a/gcc/c/c-parser.cc b/gcc/c/c-parser.cc
index 64e436010d5..e7b74fb07f0 100644
--- a/gcc/c/c-parser.cc
+++ b/gcc/c/c-parser.cc
@@ -14899,10 +14899,10 @@ c_parser_omp_clause_name (c_parser *parser)
result = PRAGMA_OMP_CLAUSE_IN_REDUCTION;
  else if (!strcmp ("inbranch", p))
result = PRAGMA_OMP_CLAUSE_INBRANCH;
- else if (!strcmp ("indirect", p))
-   result = PRAGMA_OMP_CLAUSE_INDIRECT;
  else if (!strcmp ("independent", p))
result = PRAGMA_OACC_CLAUSE_INDEPENDENT;
+ else if (!strcmp ("indirect", p))
+   result = PRAGMA_OMP_CLAUSE_INDIRECT;
  else if (!strcmp ("is_device_ptr", p))
result = PRAGMA_OMP_CLAUSE_IS_DEVICE_PTR;
  break;
diff --git a/gcc/cp/parser.cc b/gcc/cp/parser.cc
index 1a6b53933a7..37536faf2cf 100644
--- a/gcc/cp/parse

[committed] Re: [PATCH] openmp: Add support for the 'indirect' clause in C/C++

2024-01-03 Thread Kwok Cheung Yeung

Hello

I have committed the following trivial patch to emit FUNC_MAP or 
IND_FUNC_MAP in separate branches of an if statement.


Kwok

On 09/11/2023 12:24 pm, Thomas Schwinge wrote:

Similar to how you have it here:


--- a/gcc/config/nvptx/mkoffload.cc
+++ b/gcc/config/nvptx/mkoffload.cc
@@ -51,6 +51,7 @@ struct id_map
  };

  static id_map *func_ids, **funcs_tail = _ids;
+static id_map *ind_func_ids, **ind_funcs_tail = _func_ids;
  static id_map *var_ids, **vars_tail = _ids;

  /* Files to unlink.  */
@@ -302,6 +303,11 @@ process (FILE *in, FILE *out, uint32_t omp_requires)

| else if (startswith (input + i, "FUNC_MAP "))
|   {

 output_fn_ptr = true;
 record_id (input + i + 9, _tail);
   }
+   else if (startswith (input + i, "IND_FUNC_MAP "))
+ {
+   output_fn_ptr = true;
+   record_id (input + i + 13, _funcs_tail);
+ }
 else
   abort ();
 /* Skip to next line. */


..., please also here:


--- a/gcc/config/nvptx/nvptx.cc
+++ b/gcc/config/nvptx/nvptx.cc
@@ -5919,7 +5919,11 @@ nvptx_record_offload_symbol (tree decl)
   /* OpenMP offloading does not set this attribute.  */
   tree dims = attr ? TREE_VALUE (attr) : NULL_TREE;

- fprintf (asm_out_file, "//:FUNC_MAP \"%s\"",
+ fprintf (asm_out_file, "//:");
+ if (lookup_attribute ("omp declare target indirect",
+   DECL_ATTRIBUTES (decl)))
+   fprintf (asm_out_file, "IND_");
+ fprintf (asm_out_file, "FUNC_MAP \"%s\"",
IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl)));


... maintain separate 'if' branches for 'FUNC_MAP' vs. 'IND_FUNC_MAP', so
that we're able to easily locate those with 'grep', for example.
From 6ae84729940acff598e1a7f49d7b381025082ceb Mon Sep 17 00:00:00 2001
From: Kwok Cheung Yeung 
Date: Wed, 3 Jan 2024 14:27:39 +
Subject: [PATCH] nvptx: Restructure code generating function map labels

This restructures the code generating FUNC_MAP and IND_FUNC_MAP labels
in the assembly code for mkoffload to consume, hopefully making it a
bit clearer and easier to search for.

2024-01-03  Kwok Cheung Yeung  

gcc/
* config/nvptx/nvptx.cc (nvptx_record_offload_symbol): Restucture
printing of FUNC_MAP/IND_FUNC_MAP labels.
---
 gcc/config/nvptx/nvptx.cc | 6 --
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/gcc/config/nvptx/nvptx.cc b/gcc/config/nvptx/nvptx.cc
index 724e403a0e9..9363d3ecc6a 100644
--- a/gcc/config/nvptx/nvptx.cc
+++ b/gcc/config/nvptx/nvptx.cc
@@ -5921,8 +5921,10 @@ nvptx_record_offload_symbol (tree decl)
fprintf (asm_out_file, "//:");
if (lookup_attribute ("omp declare target indirect",
  DECL_ATTRIBUTES (decl)))
- fprintf (asm_out_file, "IND_");
-   fprintf (asm_out_file, "FUNC_MAP \"%s\"",
+ fprintf (asm_out_file, "IND_FUNC_MAP");
+   else
+ fprintf (asm_out_file, "FUNC_MAP");
+   fprintf (asm_out_file, " \"%s\"",
 IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl)));
 
for (; dims; dims = TREE_CHAIN (dims))
-- 
2.34.1



Re: [PATCH] openmp: Add support for the 'indirect' clause in C/C++

2023-11-07 Thread Kwok Cheung Yeung
Yes, I believe that is the right fix. The version in 
libgomp/config/accel/ should then override the version in libgomp/ for 
accelerator targets.


I'll do a quick check that this works as expected and push it ASAP. 
Sorry for breaking the build for so many targets!


Kwok

On 07/11/2023 9:51 pm, Jakub Jelinek wrote:

On Tue, Nov 07, 2023 at 09:37:22PM +, Joseph Myers wrote:

This looks like targets that libgomp/configure.tgt does *not* have any
special handling for, and so never adds "linux" to config_path for.


Indeed, I don't really see anything linux specific about the
libgomp/config/linux/target-indirect.c
so wonder if the right fix isn't
git mv libgomp/{config/linux/,}target-indirect.c

Jakub



Re: [PATCH] openmp: Add support for the 'indirect' clause in C/C++

2023-11-03 Thread Kwok Cheung Yeung
quot;
"If an indirect clause is present and invoked-by-fptr evaluates to true 
then

the only permitted device_type clause is device_type(any)" [215:1-2]

In OpenMP 5.2 that's in "7.8.3 indirect Clause" itself.



I have added a check for this in the parser which emits an error if this 
happens, and some tests in declare-target-indirect-1.c.



OpenMP permits pointers to member functions. Can you
also a test for those? I bet it simply works but we
should still test those.

(For vtables, see also comment below.)

class Foo {
public:
   int f(int x);
};

typedef int (Foo::*FooFptr)(int x);
...
int my_call(Foo )
{
   FooFptr fn_ptr = ::f;
...
   return std::invoke(fn_ptr, foo, 42);
}


This works, and I have added an execution test in 
libgomp.c++/declare-target-indirect-1.C.



--- a/gcc/omp-offload.cc +++ b/gcc/omp-offload.cc } + if
(omp_redirect_indirect_calls + && gimple_call_fndecl (stmt) ==
NULL_TREE) + { + gcall *orig_call = dyn_cast  (stmt); + tree
call_fn = gimple_call_fn (stmt); + tree map_ptr_fn + =
builtin_decl_explicit (BUILT_IN_GOMP_TARGET_MAP_INDIRECT_PTR);


This line is too long. Maybe use a 'enum built_in_function' temporary?


Fixed.


--- a/libgomp/libgomp.texi +++ b/libgomp/libgomp.texi @@ -304,7 +304,7
@@ The OpenMP 4.5 specification is fully supported. @item Iterators in
@code{target update} motion clauses and @code{map} clauses @tab N @tab
@item Indirect calls to the device version of a procedure or function
in - @code{target} regions @tab N @tab + @code{target} regions @tab P
@tab Only C and C++


I think we need a new entry to handle the virtual part. However, it looks
as if that's a new OpenMP 5.2 feature. Can you add an entry under
"Other new OpenMP 5.2 features2?

At least I cannot find any existing entry and I only see in OpenMP 5.2:

"Invoking a virtual member function of an object on a device other than
the device on which the object was constructed results in unspecified
behavior, unless the object is accessible and was constructed on the
host device." [OpenMP 5.2, 287:10-12] in "Restrictions to the target 
construct".


I have added a line in the OpenMP 5.2 section to state this.


--- a/libgomp/target.c +++ b/libgomp/target.c @@ -2256,11 +2256,14 @@
gomp_load_image_to_device (struct gomp_device_descr *devicep, unsigned
version, void **host_funcs_end = ((void ***) host_table)[1]; void
**host_var_table = ((void ***) host_table)[2]; void **host_vars_end =
((void ***) host_table)[3]; + void **host_ind_func_table = ((void ***)
host_table)[4]; + void **host_ind_funcs_end = ((void ***) host_table)[5];


This code assumes that all calls have now 6 arguments. But that's not 
true for old
code. It seems as if you have to bump the version number and only access 
those values

when the version number is sufficiently large.


I have bumped up the GOMP_VERSION to 3, and reading the indirect 
functions section of the host table will be skipped if the GOMP_VERSION 
is not at least 3.


Also, in the device plugins, the indirect function count will not be 
read from the image header if the GOMP_VERSION is too low.


Okay for mainline, pending successful testing (still in progress)?

Thanks

Kwok


Thanks,

Tobias



-
Siemens Electronic Design Automation GmbH; Anschrift: Arnulfstraße 201, 
80634 München; Gesellschaft mit beschränkter Haftung; Geschäftsführer: 
Thomas Heurung, Frank Thürauf; Sitz der Gesellschaft: München; 
Registergericht München, HRB 106955From adcd938b1dee1cc5a9df6efee40d47a2aab254f8 Mon Sep 17 00:00:00 2001
From: Kwok Cheung Yeung 
Date: Fri, 3 Nov 2023 18:03:50 +
Subject: [PATCH] openmp: Add support for the 'indirect' clause in C/C++

This adds support for the 'indirect' clause in the 'declare target'
directive.  Functions declared as indirect may be called via function
pointers passed from the host in offloaded code.

Virtual calls to member functions via the object pointer in C++ are
currently not supported in target regions.

2023-11-03  Kwok Cheung Yeung  

gcc/c-family/
* c-attribs.cc (c_common_attribute_table): Add attribute for
indirect functions.
* c-pragma.h (enum parma_omp_clause): Add entry for indirect clause.

gcc/c/
* c-decl.cc (c_decl_attributes): Add attribute for indirect
functions.
* c-lang.h (c_omp_declare_target_attr): Add indirect field.
* c-parser.cc (c_parser_omp_clause_name): Handle indirect clause.
(c_parser_omp_clause_indirect): New.
(c_parser_omp_all_clauses): Handle indirect clause.
(OMP_DECLARE_TARGET_CLAUSE_MASK): Add indirect clause to mask.
(c_parser_omp_declare_target): Handle indirect clause.  Emit error
message if device_type or indirect clauses used alone.  Emit error
if indirect clause used with device_type that is not 'any'.
(OMP_BEGIN_DECLARE_TARGET_CLAUSE_MASK): Add indirect clause to mask.
(c_parser_omp_begin): Handle indirect clause.

[PATCH] openmp: Add support for the 'indirect' clause in C/C++

2023-10-08 Thread Kwok Cheung Yeung

Hello

This patch adds support for the 'indirect' clause in the 'declare 
target' directive in C/C++ (Fortran to follow) and adds the necessary 
infrastructure to support indirect calls in target regions. This allows 
one to pass in pointers to functions that have been declared as indirect 
from the host to the target, then invoked via the passed-in pointer on 
the target device.


This is done by processing the functions declared as indirect in a 
similar way to regular kernels - they are added as a separate entry to 
the offload tables which are embedded into the target code by mkoffload. 
When the image is loaded, the host reads the target version of the 
offload table, then combines it with the host version to produce an 
address map. This map is then written to the device memory and a pointer 
is set to point to it.


The omp_device_lower pass now runs if any indirect functions are 
present. The pass searches for any indirect function calls, and runs a
new builtin BUILT_IN_GOMP_TARGET_MAP_INDIRECT_PTR to process the 
function pointer before making the indirect call.


The builtin (implemented by GOMP_target_map_indirect_ptr) searches 
through the address map, returning the target address if found, or the 
original address if not. I've added two search algorithms - a simple 
linear search through the map, and another which builds up a splay tree 
from the map and uses that to do the search. I've enabled the splay-tree 
version by default, but the linear search is useful for debugging 
purposes so I have kept it in.


The C++ support is currently limited to normal indirect calls - virtual 
calls on objects do not currently work. I believe the main issue is that 
the vtables are not currently copied across to the target. I have added 
some handling for OBJ_TYPE_REF to prevent the compiler from ICEing when 
it encounters a virtual call, but without the vtable this cannot work 
properly.


Tested on a x86_64 host with offloading to NVPTX and AMD GCN, and 
bootstrapped on a x86_64 host. Okay for mainline?


Thanks

KwokFrom 46129c254990a9fff4b6d8512f04ad8fa7d61f0e Mon Sep 17 00:00:00 2001
From: Kwok Cheung Yeung 
Date: Sun, 8 Oct 2023 13:50:25 +0100
Subject: [PATCH] openmp: Add support for the 'indirect' clause in C/C++

This adds support for the 'indirect' clause in the 'declare target'
directive.  Functions declared as indirect may be called via function
pointers passed from the host in offloaded code.

Virtual calls to member functions via the object pointer in C++ are
currently not supported in target regions.

2023-10-08  Kwok Cheung Yeung  

gcc/c-family/
* c-attribs.cc (c_common_attribute_table): Add attribute for
indirect functions.
* c-pragma.h (enum parma_omp_clause): Add entry for indirect clause.

gcc/c/
* c-decl.cc (c_decl_attributes): Add attribute for indirect
functions.
* c-lang.h (c_omp_declare_target_attr): Add indirect field.
* c-parser.cc (c_parser_omp_clause_name): Handle indirect clause.
(c_parser_omp_clause_indirect): New.
(c_parser_omp_all_clauses): Handle indirect clause.
(OMP_DECLARE_TARGET_CLAUSE_MASK): Add indirect clause to mask.
(c_parser_omp_declare_target): Handle indirect clause.
(OMP_BEGIN_DECLARE_TARGET_CLAUSE_MASK): Add indirect clause to mask.
(c_parser_omp_begin): Handle indirect clause.
* c-typeck.cc (c_finish_omp_clauses): Handle indirect clause.

gcc/cp/
* cp-tree.h (cp_omp_declare_target_attr): Add indirect field.
* decl2.cc (cplus_decl_attributes): Add attribute for indirect
functions.
* parser.cc (cp_parser_omp_clause_name): Handle indirect clause.
(cp_parser_omp_clause_indirect): New.
(cp_parser_omp_all_clauses): Handle indirect clause.
(handle_omp_declare_target_clause): Add extra parameter.  Add
indirect attribute for indirect functions.
(OMP_DECLARE_TARGET_CLAUSE_MASK): Add indirect clause to mask.
(cp_parser_omp_declare_target): Handle indirect clause.
(OMP_BEGIN_DECLARE_TARGET_CLAUSE_MASK): Add indirect clause to mask.
(cp_parser_omp_begin): Handle indirect clause.
* semantics.cc (finish_omp_clauses): Handle indirect clause.

gcc/
* lto-cgraph.cc (enum LTO_symtab_tags): Add tag for indirect
functions.
(output_offload_tables): Write indirect functions.
(input_offload_tables): read indirect functions.
* lto-section-names.h (OFFLOAD_IND_FUNC_TABLE_SECTION_NAME): New.
* omp-builtins.def (BUILT_IN_GOMP_TARGET_MAP_INDIRECT_PTR): New.
* omp-offload.cc (offload_ind_funcs): New.
(omp_discover_implicit_declare_target): Add functions marked with
'omp declare target indirect' to indirect functions list.
(omp_finish_file): Add indirect functions to section for offload
indirect functions.
(execute_omp_device_lower): Redirect indirect calls on target

Re: [PATCH] amdgcn: Enable SIMD vectorization of math functions

2023-03-02 Thread Kwok Cheung Yeung

Hello

I've made the suggested changes. Should I hold off on committing this 
until GCC 13 has been branched off?


Kwok

On 01/03/2023 10:01 am, Andrew Stubbs wrote:

On 28/02/2023 23:01, Kwok Cheung Yeung wrote:

Hello

This patch implements the TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION 
target hook for the AMD GCN architecture, such that when vectorized, 
calls to builtin standard math functions such as asinf, exp, pow etc. 
are converted to calls to the recently added vectorized math functions 
for GCN in Newlib. The -fno-math-errno flag is required in addition to 
the usual vectorization optimization flags for this to occur, and some 
of the math functions (the larger double-precision ones) require a 
large stack size to function properly.


This patch requires the GCN vector math functions in Newlib to 
function - these were included in the recent 4.3.0.20230120 snapshot. 
As this was a minimum requirement starting from the patch 'amdgcn, 
libgomp: Manually allocated stacks', this should not be a problem.


I have added new testcases in the testsuite that compare the output of 
the vectorized math functions against the scalar, passing if they are 
sufficiently close. With the testcase for standalone GCN (without 
libgomp) in gcc.target/gcn/, there is a problem since gcn-run 
currently cannot set the stack size correctly in DejaGnu testing, so I 
have made it a compile test for now - it is still useful to check that 
calls to the correct functions are being made. The runtime correctness 
is still covered by the libgomp test.


Okay for trunk?


The main part of the patch is OK, with the small changes below.

Others have pointed out that "omp declare simd" exists, but you and I 
have been all through that verbally, long ago, and as Tobias says the 
offload compiler cannot rely on markup in the host compiler's header 
files to solve this problem.



@@ -7324,6 +7429,11 @@ gcn_dwarf_register_span (rtx rtl)
   gcn_simd_clone_compute_vecsize_and_simdlen
 #undef  TARGET_SIMD_CLONE_USABLE
 #define TARGET_SIMD_CLONE_USABLE gcn_simd_clone_usable
+#undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
+#define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
+  gcn_vectorize_builtin_vectorized_function
+#undef TARGET_LIBC_HAS_FUNCTION
+#define TARGET_LIBC_HAS_FUNCTION gcn_libc_has_function
 #undef  TARGET_SMALL_REGISTER_CLASSES_FOR_MODE_P
 #define TARGET_SMALL_REGISTER_CLASSES_FOR_MODE_P \
   gcn_small_register_classes_for_mode_p


Please keep these in alphabetical order.

+/* Ideally this test should be run, but the math routines require a 
large
+   stack and gcn-run currently does not respect the stack-size 
parameter.  */

+/* { dg-do compile } */
+/* { dg-options "-O2 -ftree-vectorize -fno-math-errno 
-mstack-size=300 -fdump-tree-vect" } */


This isn't ideal. The dg-set-target-env-var directive (I think this is 
it?) can set GCN_STACK_SIZE, which gcn-run does honour, but I realise 
that doesn't work with remote test targets (like ours).


I suggest adding an additional test that sets the envvar and #includes 
the code from this one; one test to scan the dumps, one test to run it. 
Like this  (untested, syntax uncertain).


/* { dg-do run } */
/* { dg-options "-O2 -ftree-vectorize -fno-math-errno" } */
/* { dg-set-target-env-var "GCN_STACK_SIZE" "300" } */
#include "simd-math-1.c"

The run test will get skipped in our test environment (and anyone else 
using remote), but the libgomp test should make up for that.


AndrewFrom 0b43ef3c2d6afd4aecfc03fd1d2df675626e017b Mon Sep 17 00:00:00 2001
From: Kwok Cheung Yeung 
Date: Tue, 28 Feb 2023 14:15:47 +
Subject: [PATCH] amdgcn: Enable SIMD vectorization of math functions

Calls to vectorized versions of routines in the math library will now
be inserted when vectorizing code containing supported math functions.

2023-02-28  Kwok Cheung Yeung  
Paul-Antoine Arras  

gcc/
* builtins.cc (mathfn_built_in_explicit): New.
* config/gcn/gcn.cc: Include case-cfn-macros.h.
(mathfn_built_in_explicit): Add prototype.
(gcn_vectorize_builtin_vectorized_function): New.
(gcn_libc_has_function): New.
(TARGET_LIBC_HAS_FUNCTION): Define.
(TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION): Define.

gcc/testsuite/
* gcc.target/gcn/simd-math-1.c: New testcase.
* gcc.target/gcn/simd-math-2.c: New testcase.

libgomp/
* testsuite/libgomp.c/simd-math-1.c: New testcase.
---
 gcc/builtins.cc|   8 +
 gcc/config/gcn/gcn.cc  | 110 +++
 gcc/testsuite/gcc.target/gcn/simd-math-1.c | 206 +++
 gcc/testsuite/gcc.target/gcn/simd-math-2.c |   8 +
 libgomp/testsuite/libgomp.c/simd-math-1.c  | 217 +
 5 files changed, 549 insertions(+)
 create mode 100644 gcc/testsuite/gcc.target/gcn/simd-math-1.c
 create mode 100644 gcc/testsu

[PATCH] amdgcn: Enable SIMD vectorization of math functions

2023-02-28 Thread Kwok Cheung Yeung

Hello

This patch implements the TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION 
target hook for the AMD GCN architecture, such that when vectorized, 
calls to builtin standard math functions such as asinf, exp, pow etc. 
are converted to calls to the recently added vectorized math functions 
for GCN in Newlib. The -fno-math-errno flag is required in addition to 
the usual vectorization optimization flags for this to occur, and some 
of the math functions (the larger double-precision ones) require a large 
stack size to function properly.


This patch requires the GCN vector math functions in Newlib to function 
- these were included in the recent 4.3.0.20230120 snapshot. As this was 
a minimum requirement starting from the patch 'amdgcn, libgomp: Manually 
allocated stacks', this should not be a problem.


I have added new testcases in the testsuite that compare the output of 
the vectorized math functions against the scalar, passing if they are 
sufficiently close. With the testcase for standalone GCN (without 
libgomp) in gcc.target/gcn/, there is a problem since gcn-run currently 
cannot set the stack size correctly in DejaGnu testing, so I have made 
it a compile test for now - it is still useful to check that calls to 
the correct functions are being made. The runtime correctness is still 
covered by the libgomp test.


Okay for trunk?

Thanks

Kwok
From 69d13dc898ff7c70e80299a92dc895a89a9e679b Mon Sep 17 00:00:00 2001
From: Kwok Cheung Yeung 
Date: Tue, 28 Feb 2023 14:15:47 +
Subject: [PATCH] amdgcn: Enable SIMD vectorization of math functions

Calls to vectorized versions of routines in the math library will now
be inserted when vectorizing code containing supported math functions.

2023-02-28  Kwok Cheung Yeung  
Paul-Antoine Arras  

gcc/
* builtins.cc (mathfn_built_in_explicit): New.
* config/gcn/gcn.cc: Include case-cfn-macros.h.
(mathfn_built_in_explicit): Add prototype.
(gcn_vectorize_builtin_vectorized_function): New.
(gcn_libc_has_function): New.
(TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION): Define.
(TARGET_LIBC_HAS_FUNCTION): Define.

gcc/testsuite/
* gcc.target/gcn/simd-math-1.c: New testcase.

libgomp/
* testsuite/libgomp.c/simd-math-1.c: New testcase.
---
 gcc/builtins.cc|   8 +
 gcc/config/gcn/gcn.cc  | 110 +++
 gcc/testsuite/gcc.target/gcn/simd-math-1.c | 210 
 libgomp/testsuite/libgomp.c/simd-math-1.c  | 217 +
 4 files changed, 545 insertions(+)
 create mode 100644 gcc/testsuite/gcc.target/gcn/simd-math-1.c
 create mode 100644 libgomp/testsuite/libgomp.c/simd-math-1.c

diff --git a/gcc/builtins.cc b/gcc/builtins.cc
index 4d467c8c5c1..305c65c29be 100644
--- a/gcc/builtins.cc
+++ b/gcc/builtins.cc
@@ -2089,6 +2089,14 @@ mathfn_built_in (tree type, combined_fn fn)
   return mathfn_built_in_1 (type, fn, /*implicit=*/ 1);
 }
 
+/* Like mathfn_built_in_1, but always use the explicit array.  */
+
+tree
+mathfn_built_in_explicit (tree type, combined_fn fn)
+{
+  return mathfn_built_in_1 (type, fn, /*implicit=*/ 0);
+}
+
 /* Like mathfn_built_in_1, but take a built_in_function and
always use the implicit array.  */
 
diff --git a/gcc/config/gcn/gcn.cc b/gcc/config/gcn/gcn.cc
index 23ab01e75d8..d99bb63d4c0 100644
--- a/gcc/config/gcn/gcn.cc
+++ b/gcc/config/gcn/gcn.cc
@@ -53,6 +53,7 @@
 #include "dwarf2.h"
 #include "gimple.h"
 #include "cgraph.h"
+#include "case-cfn-macros.h"
 
 /* This file should be included last.  */
 #include "target-def.h"
@@ -5240,6 +5241,110 @@ gcn_simd_clone_usable (struct cgraph_node *ARG_UNUSED 
(node))
   return 0;
 }
 
+tree mathfn_built_in_explicit (tree, combined_fn);
+
+/* Implement TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION.
+   Return the function declaration of the vectorized version of the builtin
+   in the math library if available.  */
+
+tree
+gcn_vectorize_builtin_vectorized_function (unsigned int fn, tree type_out,
+  tree type_in)
+{
+  if (TREE_CODE (type_out) != VECTOR_TYPE
+  || TREE_CODE (type_in) != VECTOR_TYPE)
+return NULL_TREE;
+
+  machine_mode out_mode = TYPE_MODE (TREE_TYPE (type_out));
+  int out_n = TYPE_VECTOR_SUBPARTS (type_out);
+  machine_mode in_mode = TYPE_MODE (TREE_TYPE (type_in));
+  int in_n = TYPE_VECTOR_SUBPARTS (type_in);
+  combined_fn cfn = combined_fn (fn);
+
+  /* Keep this consistent with the list of vectorized math routines.  */
+  int implicit_p;
+  switch (fn)
+{
+CASE_CFN_ACOS:
+CASE_CFN_ACOSH:
+CASE_CFN_ASIN:
+CASE_CFN_ASINH:
+CASE_CFN_ATAN:
+CASE_CFN_ATAN2:
+CASE_CFN_ATANH:
+CASE_CFN_COPYSIGN:
+CASE_CFN_COS:
+CASE_CFN_COSH:
+CASE_CFN_ERF:
+CASE_CFN_EXP:
+CASE_CFN_EXP2:
+CASE_CFN_FINITE:
+CASE_CFN_FMOD:
+CASE_CFN_GAMMA:
+CASE_CF

[PATCHv2] openmp: Add support for 'present' modifier

2023-02-17 Thread Kwok Cheung Yeung

Hello

This is a revised version of the patch for the 'present' modifier for 
OpenMP. Compared to the first version, three improvements have been made:


- A bug which caused bootstrapping with a '-m32' multilib on x86-64 to 
fail due to pointer size issues has been fixed.

- The Fortran parse tree dump now shows clauses with 'present' applied.
- The reordering of OpenMP clauses has been moved to 
gimplify_scan_omp_clauses, where the other clause reordering rules are 
applied.


Thanks

KwokFrom 24b6225578bb08bbd745d6ec653aab60802dd220 Mon Sep 17 00:00:00 2001
From: Kwok Cheung Yeung 
Date: Fri, 3 Feb 2023 13:04:21 +
Subject: [PATCH] openmp: Add support for the 'present' modifier

This implements support for the OpenMP 5.1 'present' modifier, which can be
used in map clauses in the 'target', 'target data', 'target data enter' and
'target data exit' constructs, and in the 'to' and 'from' clauses of the
'target update' construct.  It is also supported in defaultmap.

The modifier triggers a fatal runtime error if the data specified by the
clause is not already present on the target device.  It can also be combined
with 'always' in map clauses.

2023-02-01  Kwok Cheung Yeung  

gcc/c/
* c-parser.cc (c_parser_omp_variable_list): Set default motion
modifier.
(c_parser_omp_var_list_parens): Add new parameter with default.  Parse
'present' motion modifier and apply.
(c_parser_omp_clause_defaultmap): Parse 'present' in defaultmap.
(c_parser_omp_clause_map): Parse 'present' modifier in map clauses.
(c_parser_omp_clause_to): Allow use of 'present' in variable list.
(c_parser_omp_clause_from): Likewise.
(c_parser_omp_target_data): Allow map clauses with 'present'
modifiers.
(c_parser_omp_target_enter_data): Likewise.
(c_parser_omp_target_exit_data): Likewise.
(c_parser_omp_target): Likewise.

gcc/cp/
* parser.cc (cp_parser_omp_var_list_no_open): Add new parameter with
default.  Parse 'present' motion modifier and apply.
(cp_parser_omp_clause_defaultmap): Parse 'present' in defaultmap.
(cp_parser_omp_clause_map): Parse 'present' modifier in map clauses.
(cp_parser_omp_all_clauses): Allow use of 'present' in 'to' and 'from'
clauses.
(cp_parser_omp_target_data): Allow map clauses with 'present'
modifiers.
(cp_parser_omp_target_enter_data): Likewise.
(cp_parser_omp_target_exit_data): Likewise.
* semantics.cc (finish_omp_target): Accept map clauses with 'present'
modifiers.

gcc/fortran/
* dump-parse-tree.cc (show_omp_namelist): Display 'present' map
modifier.
(show_omp_clauses): Display 'present' motion modifier for 'to'
and 'from' clauses.
* gfortran.h (enum gfc_omp_map_op): Add entries with 'present'
modifiers.
(enum gfc_omp_motion_modifier): New.
(struct gfc_omp_namelist): Add motion_modifier field.
* openmp.cc (gfc_match_omp_variable_list): Add new parameter with
default.  Parse 'present' motion modifier and apply.
(gfc_match_omp_clauses): Parse 'present' in defaultmap, 'from'
clauses, 'map' clauses and 'to' clauses.
(resolve_omp_clauses): Allow 'present' modifiers on 'target',
'target data', 'target enter' and 'target exit' directives.
* trans-openmp.cc (gfc_trans_omp_clauses): Apply 'present' modifiers
to tree node for 'map', 'to' and 'from' clauses.  Apply 'present' for
defaultmap.

gcc/
* gimplify.cc (omp_notice_variable): Apply GOVD_MAP_ALLOC_ONLY flag
and defaultmap flags if the defaultmap has GOVD_MAP_FORCE_PRESENT flag
set.
(omp_get_attachment): Handle map clauses with 'present' modifier.
(omp_group_base): Likewise.
(gimplify_scan_omp_clauses): Reorder present maps to come first.
Set GOVD flags for present defaultmaps.
(gimplify_adjust_omp_clauses_1): Set map kind for present defaultmaps.
* omp-low.cc (scan_sharing_clauses): Handle 'always, present' map
clauses.
(lower_omp_target): Handle map clauses with 'present' modifier.
Handle 'to' and 'from' clauses with 'present'.
* tree-core.h (enum omp_clause_defaultmap_kind): Add
OMP_CLAUSE_DEFAULTMAP_PRESENT defaultmap kind.
(enum omp_clause_motion_modifier): New.
(struct tree_omp_clause): Add motion_modifier field.
* tree-pretty-print.cc (dump_omp_clause): Handle 'map', 'to' and
'from' clauses with 'present' modifier.  Handle present defaultmap.
* tree.h (OMP_CLAUSE_MOTION_MODIFIER): New.
(OMP_CLAUSE_SET_MOTION_MODIFIER): New.

gcc/testsuite/
* c-c++-common/gomp/defaultmap-4.c: New.
* c-c++-common/gomp/map-6.c: Update expected error messages.
* c-c++-common/gomp/map-8.c: New.
* c-c++-common/gomp

Re: [OG12][committed] openmp: Add support for the 'present' modifier

2023-02-14 Thread Kwok Cheung Yeung

Hi

I have also committed the following patch to devel/omp/gcc-12 to show 
the 'present' modifier in the Fortran parse tree dump.


e7279cc2eda openmp: Add support for 'present' modifier in the Fortran 
parse tree dump


KwokFrom e7279cc2eda2a0c50cff19ee4e02eea3d7808f68 Mon Sep 17 00:00:00 2001
From: Kwok Cheung Yeung 
Date: Tue, 14 Feb 2023 21:24:19 +
Subject: [PATCH] openmp: Add support for 'present' modifier in the Fortran
 parse tree dump

2023-02-14  Kwok Cheung Yeung  

gcc/fortran/
* dump-parse-tree.cc (show_omp_namelist): Display 'present' map
modifier.
(show_omp_clauses): Display 'present' motion modifier for 'to'
and 'from' clauses.
---
 gcc/fortran/ChangeLog.omp  |  7 +++
 gcc/fortran/dump-parse-tree.cc | 15 +++
 2 files changed, 22 insertions(+)

diff --git a/gcc/fortran/ChangeLog.omp b/gcc/fortran/ChangeLog.omp
index 44bc0ea1e2a..579d8ee7c97 100644
--- a/gcc/fortran/ChangeLog.omp
+++ b/gcc/fortran/ChangeLog.omp
@@ -1,3 +1,10 @@
+2023-02-14  Kwok Cheung Yeung  
+
+   * dump-parse-tree.cc (show_omp_namelist): Display 'present' map
+   modifier.
+   (show_omp_clauses): Display 'present' motion modifier for 'to'
+   and 'from' clauses.
+
 2023-02-09  Kwok Cheung Yeung  
 
* gfortran.h (enum gfc_omp_map_op): Add entries with 'present'
diff --git a/gcc/fortran/dump-parse-tree.cc b/gcc/fortran/dump-parse-tree.cc
index 4da4d813d1d..7dad3ac0307 100644
--- a/gcc/fortran/dump-parse-tree.cc
+++ b/gcc/fortran/dump-parse-tree.cc
@@ -1453,9 +1453,20 @@ show_omp_namelist (int list_type, gfc_omp_namelist *n)
  case OMP_MAP_TO: fputs ("to:", dumpfile); break;
  case OMP_MAP_FROM: fputs ("from:", dumpfile); break;
  case OMP_MAP_TOFROM: fputs ("tofrom:", dumpfile); break;
+ case OMP_MAP_PRESENT_ALLOC: fputs ("present,alloc:", dumpfile); break;
+ case OMP_MAP_PRESENT_TO: fputs ("present,to:", dumpfile); break;
+ case OMP_MAP_PRESENT_FROM: fputs ("present,from:", dumpfile); break;
+ case OMP_MAP_PRESENT_TOFROM:
+   fputs ("present,tofrom:", dumpfile); break;
  case OMP_MAP_ALWAYS_TO: fputs ("always,to:", dumpfile); break;
  case OMP_MAP_ALWAYS_FROM: fputs ("always,from:", dumpfile); break;
  case OMP_MAP_ALWAYS_TOFROM: fputs ("always,tofrom:", dumpfile); break;
+ case OMP_MAP_ALWAYS_PRESENT_TO:
+   fputs ("always,present,to:", dumpfile); break;
+ case OMP_MAP_ALWAYS_PRESENT_FROM:
+   fputs ("always,present,from:", dumpfile); break;
+ case OMP_MAP_ALWAYS_PRESENT_TOFROM:
+   fputs ("always,present,tofrom:", dumpfile); break;
  case OMP_MAP_DELETE: fputs ("delete:", dumpfile); break;
  case OMP_MAP_RELEASE: fputs ("release:", dumpfile); break;
  default: break;
@@ -1793,6 +1804,10 @@ show_omp_clauses (gfc_omp_clauses *omp_clauses)
  fputs ("inscan, ", dumpfile);
if (list_type == OMP_LIST_REDUCTION_TASK)
  fputs ("task, ", dumpfile);
+   if ((list_type == OMP_LIST_TO || list_type == OMP_LIST_FROM)
+   && omp_clauses->lists[list_type]->u.motion_modifier
+  == OMP_MOTION_PRESENT)
+ fputs ("present:", dumpfile);
show_omp_namelist (list_type, omp_clauses->lists[list_type]);
fputc (')', dumpfile);
   }
-- 
2.34.1



[OG12][committed] openmp: Add support for the 'present' modifier

2023-02-09 Thread Kwok Cheung Yeung

Hello

I've ported my patch for supporting the OpenMP 5.1 'present' modifier 
and committed it to the devel/omp/gcc-12 development branch:


229b705862c openmp: Add support for the 'present' modifier

Tested with offloading on amdgcn and nvptx.

Kwok


[PATCH] openmp: Add support for 'present' modifier

2023-02-03 Thread Kwok Cheung Yeung

Hello

This patch implements support for the OpenMP 5.1 'present' modifier in 
C, C++ and Fortran. 'present' can be used in the 'map' clause for the 
'target', 'target data', 'target data enter' and 'target data exit' 
constructs, and the 'to'/'from' clauses of 'target update'. It can be 
used in conjunction with other modifiers too (currently only 'always' on 
map clauses).


It can also be used in defaultmap, which applies 'present, alloc' to the 
default clauses.


It behaves similarly to the OpenACC 'present' clause, and causes an 
fatal runtime error when the referenced data is not already present in 
device memory. Similarly to the OpenACC error message, the error is 
expressed in terms of the equivalent OpenMP function !omp_target_is_present.


Regarding the representation of the map kind - the bit space is getting 
a bit crowded. I have made bit 7 (GOMP_MAP_FLAG_FORCE) into another 
special bit (GOMP_MAP_FLAG_SPECIAL_5), and redefined GOMP_MAP_FLAG_FORCE 
to be GOMP_MAP_FLAG_SPECIAL_5 with no other special flags set. The 
'present' modifier is represented by setting GOMP_MAP_FLAG_SPECIAL_5 | 
GOMP_MAP_FLAG_SPECIAL_0 - this does not interfere with 'always' 
(GOMP_MAP_FLAG_SPECIAL_2) or 'implicit' (GOMP_MAP_FLAG_SPECIAL_3 | 
GOMP_MAP_FLAG_SPECIAL_4) which is used by clauses generated by defaultmap.


During gimplification of defaultmap, the present defaultmap is 
represented by setting GOVD_MAP_FORCE_PRESENT (as that is presently only 
used in OpenACC and has a similar meaning). GOVD_MAP_ALLOC ONLY will be 
added, and this is eventually lowered to a GOMP_MAP_PRESENT_ALLOC map 
kind for the default clauses.


Bootstrapped on x86-64, no regressions in GCC testsuite, libgomp tested 
with x86-64 (no offloading), AMD GCN and NVPTX offloading. This is too 
late for GCC 13 now, but will this be okay for GCC 14?


Thanks

KwokFrom ba9368f88514a27f374d84e53e36ce36fa9ac5bc Mon Sep 17 00:00:00 2001
From: Kwok Cheung Yeung 
Date: Fri, 3 Feb 2023 13:04:21 +
Subject: [PATCH] openmp: Add support for the 'present' modifier

This implements support for the OpenMP 5.1 'present' modifier, which can be
used in map clauses in the 'target', 'target data', 'target data enter' and
'target data exit' constructs, and in the 'to' and 'from' clauses of the
'target update' construct.  It is also supported in defaultmap.

The modifier triggers a fatal runtime error if the data specified by the
clause is not already present on the target device.  It can also be combined
with 'always' in map clauses.

2023-02-01  Kwok Cheung Yeung  

gcc/c/
* c-parser.cc (c_parser_omp_variable_list): Set default motion
modifier.
(c_parser_omp_var_list_parens): Add new parameter with default.  Parse
'present' motion modifier and apply.
(c_parser_omp_clause_defaultmap): Parse 'present' in defaultmap.
(c_parser_omp_clause_map): Parse 'present' modifier in map clauses.
(c_parser_omp_clause_to): Allow use of 'present' in variable list.
(c_parser_omp_clause_from): Likewise.
(c_parser_omp_target_data): Allow map clauses with 'present'
modifiers.
(c_parser_omp_target_enter_data): Likewise.
(c_parser_omp_target_exit_data): Likewise.
(c_parser_omp_target): Likewise.

gcc/cp/
* parser.cc (cp_parser_omp_var_list_no_open): Add new parameter with
default.  Parse 'present' motion modifier and apply.
(cp_parser_omp_clause_defaultmap): Parse 'present' in defaultmap.
(cp_parser_omp_clause_map): Parse 'present' modifier in map clauses.
(cp_parser_omp_all_clauses): Allow use of 'present' in 'to' and 'from'
clauses.
(cp_parser_omp_target_data): Allow map clauses with 'present'
modifiers.
(cp_parser_omp_target_enter_data): Likewise.
(cp_parser_omp_target_exit_data): Likewise.
* semantics.cc (finish_omp_target): Accept map clauses with 'present'
modifiers.

gcc/fortran/
* gfortran.h (enum gfc_omp_map_op): Add entries with 'present'
modifiers.
(enum gfc_omp_motion_modifier): New.
(struct gfc_omp_namelist): Add motion_modifier field.
* openmp.cc (gfc_match_omp_variable_list): Add new parameter with
default.  Parse 'present' motion modifier and apply.
(gfc_match_omp_clauses): Parse 'present' in defaultmap, 'from'
clauses, 'map' clauses and 'to' clauses.
(resolve_omp_clauses): Allow 'present' modifiers on 'target',
'target data', 'target enter' and 'target exit' directives.
* trans-openmp.cc (gfc_trans_omp_clauses): Apply 'present' modifiers
to tree node for 'map', 'to' and 'from' clauses.  Apply 'present' for
defaultmap.

gcc/
* gimplify.cc (omp_notice_variable): Apply GOVD_MAP_ALLOC_ONLY flag
and defaultmap flags if the defaultmap has GOVD_MAP_FORCE_PRESENT flag
set.
(omp_get_attachment): Handle map

Re: [PATCH][OG12] amdgcn: Support AMD-specific 'isa' and 'arch' traits in OpenMP context selectors

2022-12-02 Thread Kwok Cheung Yeung
So this is the OG12-specific part (including metadirective and dynamic 
context selectors) of the previous patch.


Once https://gcc.gnu.org/r13-4446-ge41b243302e996 is backported, is it 
OK for OG12?


Looks good to me, thanks!

Kwok


Re: [PATCH][OG12] amdgcn: Support AMD-specific 'isa' and 'arch' traits in OpenMP context selectors

2022-11-30 Thread Kwok Cheung Yeung via Gcc-patches

Hello PA,


--- libgomp/config/gcn/selector.c
+++ libgomp/config/gcn/selector.c
@@ -36,7 +36,7 @@ GOMP_evaluate_current_device (const char *kind, const char 
*arch,
   if (kind && strcmp (kind, "gpu") != 0)
 return false;
 
-  if (arch && strcmp (arch, "gcn") != 0)

+  if (arch && (strcmp (arch, "gcn") != 0 || strcmp (arch, "amdgcn") != 0))
 return false;


The logic here looks wrong to me - surely it should return false if arch 
is not 'gcn' AND it is not 'amdgcn'?



@@ -48,8 +48,17 @@ GOMP_evaluate_current_device (const char *kind, const char 
*arch,
 #endif
 
 #ifdef __GCN5__

-  if (strcmp (isa, "gfx900") == 0 || strcmp (isa, "gfx906") != 0
-  || strcmp (isa, "gfx908") == 0)
+  if (strcmp (isa, "gfx900") == 0 || strcmp (isa, "gfx906") != 0)
+return true;
+#endif
+
+#ifdef __CDNA1__
+  if (strcmp (isa, "gfx908") == 0)
+return true;
+#endif
+
+#ifdef __CDNA2__
+  if (strcmp (isa, "gfx90a") == 0)
 return true;
 #endif


Okay for gfx908 and gfx90a, but is there any way of distinguishing 
between 'gfx900' and 'gfx906' ISAs? I don't think these are mutually 
compatible.


Thanks

Kwok


Re: [OG12] [committed] amdgcn: Enable SIMD vectorization of math library functions

2022-11-08 Thread Kwok Cheung Yeung

Hello

These additional patches were pushed onto the devel/omp/gcc-12 branch to 
fix various issues with the SIMD math library:


ecf1603b7ad amdgcn: Fix expansion of GCN_BUILTIN_LDEXPV builtin
6c40e3f5daa amdgcn: Various fixes for SIMD math library
8e6c5b18e10 amdgcn: Fixed intermittent failure in vectorized version of rint

Kwok


[COMMITTED] amdgcn: Fix expansion of GCN_BUILTIN_LDEXPV builtin

2022-11-08 Thread Kwok Cheung Yeung

Hello

This patch fixes a bug in the expansion of GCN_BUILTIN_LDEXPV. As this 
is a double-precision operation, the first argument should be expanded 
as a V64DF expression (instead of V64SF).


Committed to trunk as obvious.

KwokFrom cb0a2b1f28cf0c231bf38fcd02c40689739df7bb Mon Sep 17 00:00:00 2001
From: Kwok Cheung Yeung 
Date: Tue, 8 Nov 2022 14:38:23 +
Subject: [PATCH] amdgcn: Fix expansion of GCN_BUILTIN_LDEXPV builtin

2022-11-08  Kwok Cheung Yeung  

gcc/
* config/gcn/gcn.cc (gcn_expand_builtin_1): Expand first argument
of GCN_BUILTIN_LDEXPV to V64DFmode.
---
 gcc/config/gcn/gcn.cc | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/gcc/config/gcn/gcn.cc b/gcc/config/gcn/gcn.cc
index 9c5e3419748..5e6f3b8b74b 100644
--- a/gcc/config/gcn/gcn.cc
+++ b/gcc/config/gcn/gcn.cc
@@ -4383,7 +4383,7 @@ gcn_expand_builtin_1 (tree exp, rtx target, rtx 
/*subtarget */ ,
  return target;
rtx arg1 = force_reg (V64DFmode,
  expand_expr (CALL_EXPR_ARG (exp, 0), NULL_RTX,
-  V64SFmode,
+  V64DFmode,
   EXPAND_NORMAL));
rtx arg2 = force_reg (V64SImode,
  expand_expr (CALL_EXPR_ARG (exp, 1), NULL_RTX,
-- 
2.25.1



[PATCH] amdgcn: Add builtins for vectorized native versions of abs, floorf and floor

2022-11-08 Thread Kwok Cheung Yeung

Hello

This patch adds three extra builtins for the vectorized forms of the 
abs, floorf and floor math functions, which are implemented by native 
GCN instructions. I have also added a test to check that they generate 
the expected assembler instructions.


Okay for trunk?

Thanks

KwokFrom 37f49b204d501327d0867b3e8a3f01b9445fb9bd Mon Sep 17 00:00:00 2001
From: Kwok Cheung Yeung 
Date: Tue, 8 Nov 2022 11:59:58 +
Subject: [PATCH] amdgcn: Add builtins for vectorized native versions of abs,
 floorf and floor

2022-11-08  Kwok Cheung Yeung  

gcc/
* config/gcn/gcn-builtins.def (FABSV, FLOORVF, FLOORV): New builtins.
* config/gcn/gcn.cc (gcn_expand_builtin_1): Expand GCN_BUILTIN_FABSV,
GCN_BUILTIN_FLOORVF and GCN_BUILTIN_FLOORV.

gcc/testsuite/
* gcc.target/gcn/math-builtins-1.c: New test.
---
 gcc/config/gcn/gcn-builtins.def   | 15 +
 gcc/config/gcn/gcn.cc | 33 +++
 .../gcc.target/gcn/math-builtins-1.c  | 33 +++
 3 files changed, 81 insertions(+)
 create mode 100644 gcc/testsuite/gcc.target/gcn/math-builtins-1.c

diff --git a/gcc/config/gcn/gcn-builtins.def b/gcc/config/gcn/gcn-builtins.def
index 27691909925..c50777bd3b0 100644
--- a/gcc/config/gcn/gcn-builtins.def
+++ b/gcc/config/gcn/gcn-builtins.def
@@ -64,6 +64,21 @@ DEF_BUILTIN (FABSVF, 3 /*CODE_FOR_fabsvf */,
 _A2 (GCN_BTI_V64SF, GCN_BTI_V64SF),
 gcn_expand_builtin_1)
 
+DEF_BUILTIN (FABSV, 3 /*CODE_FOR_fabsv */,
+"fabsv", B_INSN,
+_A2 (GCN_BTI_V64DF, GCN_BTI_V64DF),
+gcn_expand_builtin_1)
+
+DEF_BUILTIN (FLOORVF, 3 /*CODE_FOR_floorvf */,
+"floorvf", B_INSN,
+_A2 (GCN_BTI_V64SF, GCN_BTI_V64SF),
+gcn_expand_builtin_1)
+
+DEF_BUILTIN (FLOORV, 3 /*CODE_FOR_floorv */,
+"floorv", B_INSN,
+_A2 (GCN_BTI_V64DF, GCN_BTI_V64DF),
+gcn_expand_builtin_1)
+
 DEF_BUILTIN (LDEXPVF, 3 /*CODE_FOR_ldexpvf */,
 "ldexpvf", B_INSN,
 _A3 (GCN_BTI_V64SF, GCN_BTI_V64SF, GCN_BTI_V64SI),
diff --git a/gcc/config/gcn/gcn.cc b/gcc/config/gcn/gcn.cc
index 1996115a686..9c5e3419748 100644
--- a/gcc/config/gcn/gcn.cc
+++ b/gcc/config/gcn/gcn.cc
@@ -4329,6 +4329,39 @@ gcn_expand_builtin_1 (tree exp, rtx target, rtx 
/*subtarget */ ,
emit_insn (gen_absv64sf2 (target, arg));
return target;
   }
+case GCN_BUILTIN_FABSV:
+  {
+   if (ignore)
+ return target;
+   rtx arg = force_reg (V64DFmode,
+expand_expr (CALL_EXPR_ARG (exp, 0), NULL_RTX,
+ V64DFmode,
+ EXPAND_NORMAL));
+   emit_insn (gen_absv64df2 (target, arg));
+   return target;
+  }
+case GCN_BUILTIN_FLOORVF:
+  {
+   if (ignore)
+ return target;
+   rtx arg = force_reg (V64SFmode,
+expand_expr (CALL_EXPR_ARG (exp, 0), NULL_RTX,
+ V64SFmode,
+ EXPAND_NORMAL));
+   emit_insn (gen_floorv64sf2 (target, arg));
+   return target;
+  }
+case GCN_BUILTIN_FLOORV:
+  {
+   if (ignore)
+ return target;
+   rtx arg = force_reg (V64DFmode,
+expand_expr (CALL_EXPR_ARG (exp, 0), NULL_RTX,
+ V64DFmode,
+ EXPAND_NORMAL));
+   emit_insn (gen_floorv64df2 (target, arg));
+   return target;
+  }
 case GCN_BUILTIN_LDEXPVF:
   {
if (ignore)
diff --git a/gcc/testsuite/gcc.target/gcn/math-builtins-1.c 
b/gcc/testsuite/gcc.target/gcn/math-builtins-1.c
new file mode 100644
index 000..e1aadfb40d9
--- /dev/null
+++ b/gcc/testsuite/gcc.target/gcn/math-builtins-1.c
@@ -0,0 +1,33 @@
+/* { dg-do compile } */
+/* { dg-options "-O1" } */
+
+typedef float v64sf __attribute__ ((vector_size (256)));
+typedef double v64df __attribute__ ((vector_size (512)));
+typedef int v64si __attribute__ ((vector_size (256)));
+typedef long v64di __attribute__ ((vector_size (512)));
+
+v64sf f (v64sf _x, v64si _y)
+{
+  v64sf x = _x;
+  v64si y = _y;
+  x = __builtin_gcn_fabsvf (x); /* { dg-final { scan-assembler 
"v_add_f32\\s+v\[0-9\]+, 0, |v\[0-9\]+|" } } */
+  x = __builtin_gcn_floorvf (x); /* { dg-final { scan-assembler 
"v_floor_f32\\s+v\[0-9\]+, v\[0-9\]+" } }*/
+  x = __builtin_gcn_frexpvf_mant (x); /* { dg-final { scan-assembler 
"v_frexp_mant_f32\\s+v\[0-9\]+, v\[0-9\]+" } }*/
+  y = __builtin_gcn_frexpvf_exp (x); /* { dg-final { scan-assembler 
"v_frexp_exp_i32_f32\\s+v\[0-9\]+, v\[0-9\]+" } }*/
+  x = __builtin_gcn_ldexpvf (x, y); /* { dg-final { scan-assembler 
"v_ldexp_f32\\s+v\[0-9\]+, v\[0-9\]+, v\[0-9\]+&quo

[PATCH] amdgcn: Fix instruction generation for exp2 and log2 operations

2022-11-03 Thread Kwok Cheung Yeung

Hello

This patch fixes a bug introduced in a previous patch adding support for 
generating native instructions for the exp2 and log2 patterns. The 
problem is that the name of the instruction implementing the exp2 
operation is v_exp (and not v_exp2), and similarly log2 is implemented 
by v_log, so we cannot use the RTL name of the operation when outputting 
the instruction.


I've added an extra iterator for the GCN operation name and used that 
when outputting instructions instead. I have also added an extra 
testcase for GCN that exercises this case.


Okay for trunk?

Thanks

KwokFrom c0e74e01743cd3a3e0dcb2a071396e3a5751ff4c Mon Sep 17 00:00:00 2001
From: Kwok Cheung Yeung 
Date: Thu, 3 Nov 2022 17:19:11 +
Subject: [PATCH] amdgcn: Fix instruction generation for exp2 and log2
 operations

The GCN instructions for the exp2 and log2 operations are v_exp_* and v_log_*
respectively, which unfortunately do not line up with the RTL naming
convention.  To deal with this, a new set of int attributes is now used when
generating the assembly for these instructions.

2022-11-03  Kwok Cheung Yeung  

gcc/
* config/gcn/gcn-valu.md (math_unop_insn): New attribute.
(2, 2, 2,
2, *2_insn,
*2_insn): Use math_unop_insn to generate
assembler output.

gcc/testsuite/
* gcc.target/gcn/unsafe-math-1.c: New.
---
 gcc/config/gcn/gcn-valu.md   | 20 ++--
 gcc/testsuite/gcc.target/gcn/unsafe-math-1.c | 10 ++
 2 files changed, 24 insertions(+), 6 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/gcn/unsafe-math-1.c

diff --git a/gcc/config/gcn/gcn-valu.md b/gcc/config/gcn/gcn-valu.md
index 3b619512e13..9f4353831bd 100644
--- a/gcc/config/gcn/gcn-valu.md
+++ b/gcc/config/gcn/gcn-valu.md
@@ -2549,13 +2549,21 @@
(UNSPEC_SIN "sin")
(UNSPEC_COS "cos")])
 
+(define_int_attr math_unop_insn
+  [(UNSPEC_FLOOR "floor")
+   (UNSPEC_CEIL "ceil")
+   (UNSPEC_EXP2 "exp")
+   (UNSPEC_LOG2 "log")
+   (UNSPEC_SIN "sin")
+   (UNSPEC_COS "cos")])
+
 (define_insn "2"
   [(set (match_operand:FP 0 "register_operand"  "=  v")
(unspec:FP
  [(match_operand:FP 1 "gcn_alu_operand" "vSvB")]
  MATH_UNOP_1OR2REG))]
   ""
-  "v_%i0\t%0, %1"
+  "v_%i0\t%0, %1"
   [(set_attr "type" "vop1")
(set_attr "length" "8")])
 
@@ -2565,7 +2573,7 @@
  [(match_operand:V_FP 1 "gcn_alu_operand" "vSvB")]
  MATH_UNOP_1OR2REG))]
   ""
-  "v_%i0\t%0, %1"
+  "v_%i0\t%0, %1"
   [(set_attr "type" "vop1")
(set_attr "length" "8")])
 
@@ -2575,7 +2583,7 @@
  [(match_operand:FP_1REG 1 "gcn_alu_operand" "vSvB")]
  MATH_UNOP_1REG))]
   "flag_unsafe_math_optimizations"
-  "v_%i0\t%0, %1"
+  "v_%i0\t%0, %1"
   [(set_attr "type" "vop1")
(set_attr "length" "8")])
 
@@ -2585,7 +2593,7 @@
  [(match_operand:V_FP_1REG 1 "gcn_alu_operand" "vSvB")]
  MATH_UNOP_1REG))]
   "flag_unsafe_math_optimizations"
-  "v_%i0\t%0, %1"
+  "v_%i0\t%0, %1"
   [(set_attr "type" "vop1")
(set_attr "length" "8")])
 
@@ -2595,7 +2603,7 @@
  [(match_operand:FP_1REG 1 "gcn_alu_operand" "vSvB")]
  MATH_UNOP_TRIG))]
   "flag_unsafe_math_optimizations"
-  "v_%i0\t%0, %1"
+  "v_%i0\t%0, %1"
   [(set_attr "type" "vop1")
(set_attr "length" "8")])
 
@@ -2605,7 +2613,7 @@
  [(match_operand:V_FP_1REG 1 "gcn_alu_operand" "vSvB")]
  MATH_UNOP_TRIG))]
   "flag_unsafe_math_optimizations"
-  "v_%i0\t%0, %1"
+  "v_%i0\t%0, %1"
   [(set_attr "type" "vop1")
(set_attr "length" "8")])
 
diff --git a/gcc/testsuite/gcc.target/gcn/unsafe-math-1.c 
b/gcc/testsuite/gcc.target/gcn/unsafe-math-1.c
new file mode 100644
index 000..2b54fa232e9
--- /dev/null
+++ b/gcc/testsuite/gcc.target/gcn/unsafe-math-1.c
@@ -0,0 +1,10 @@
+/* { dg-do link } */
+/* { dg-options "-O0 -ffast-math" } */
+
+int main (void)
+{
+  float x = 0.123456f;
+
+  float r1 = __builtin_exp2f (x);
+  float r2 = __builtin_log2f (x);
+}
-- 
2.25.1



[OG12] [committed] amdgcn: Enable SIMD vectorization of math library functions

2022-11-01 Thread Kwok Cheung Yeung

Hello

I have committed the following patches onto the devel/omp/gcc-12 
development branch:


863579c4e30 amdgcn: Enable SIMD vectorization of math functions
bd9a6106b95 amdgcn: Add SIMD versions of math routines to libgcc
d3a2a1cc424 amdgcn: Add builtins for vector floor/floorf
a3c04a367a9 amdgcn: Fix expansion of builtin for vector fabs operation

These patches implement a vectorized version of most of the C math 
library for AMD GCN. These routines will be used when math functions are 
used in auto-vectorized code.


Note that -fno-math-errno must be specified on the command line in most 
cases before the compiler will consider using these functions.


Vectors smaller than the native 64 element ones are also supported (by 
masking off the unused lanes), which can be useful for SLP vectorized code.


Kwok Yeung


[PATCH] amdgcn: Add support for additional natively supported floating-point operations

2022-09-08 Thread Kwok Cheung Yeung

Hello

This patch adds support for some additional floating-point operations, 
in scalar and vector modes, which are natively supported by the AMD GCN 
instruction set, but haven't been implemented in GCC yet. With the 
exception of frexp, these implement standard RTL names, and should be 
utilised automatically by GCC.


The instructions for the transcendental functions are documented to have 
limited numerical precision, so they are only used if 
unsafe_math_optimizations are enabled for now.


The sin and cos instructions for some reason are scaled by 2*PI radians 
(i.e. 1.0 == 2*PI radians/360 degrees), so their inputs need to be 
scaled by 1/(2*PI) first. I've implemented this as an expander to two 
instructions - one to do the pre-scaling, one to do the sin/cos. 
1/(2*PI) is a builtin constant for GCN, but the syntax to use it in the 
LLVM assembler was wrong - now fixed.


I have also added some extra GCN-specific builtins to access the vector 
versions of some of these operations (to implement vectorized versions 
of library math routines) and to access the frexp operations.


Okay for trunk?

Thanks

KwokFrom 5592c4512212ba74a7a690821650ddcba05df848 Mon Sep 17 00:00:00 2001
From: Kwok Cheung Yeung 
Date: Thu, 8 Sep 2022 17:37:26 +
Subject: [PATCH] amdgcn: Add support for additional natively supported
 floating-point operations

This adds support for the following natively supported floating-point
operations, in scalar and vectorized modes:

floor, ceil, exp2*, log2*, sin*, cos*, ldexp, frexp

* These operations are single-precision float only and are only active
if unsafe_math_optimizations are enabled (due to potential numerical
precision issues).

2022-09-08  Kwok Cheung Yeung  

gcc/
* config/gcn/gcn-builtins.def (FABSVF, LDEXPVF, LDEXPV, FREXPVF_EXP,
FREXPVF_MANT, FREXPV_EXP, FREXPV_MANT): Add new builtins.
* config/gcn/gcn-protos.h (gcn_dconst1over2pi): New prototype.
* config/gcn/gcn-valu.md (MATH_UNOP_1OR2REG, MATH_UNOP_1REG,
MATH_UNOP_TRIG): New iterators.
(math_unop): New attributes.
(2, 2,
2, 2,
2_insn, 2_insn,
ldexp3, ldexp3,
frexp_exp2, frexp_mant2,
frexp_exp2, frexp_mant2): New instructions.
(2, 2): New expanders.
* config/gcn/gcn.cc (init_ext_gcn_constants): Update definition of
dconst1over2pi.
(gcn_dconst1over2pi): New.
(gcn_builtin_type_index): Add entry for v64df type.
(v64df_type_node): New.
(gcn_init_builtin_types): Initialize v64df_type_node.
(gcn_expand_builtin_1): Expand new builtins to instructions.
(print_operand): Fix assembler output for 1/(2*PI) constant.
* config/gcn/gcn.md (unspec): Add new entries.
---
 gcc/config/gcn/gcn-builtins.def |  35 ++
 gcc/config/gcn/gcn-protos.h |   1 +
 gcc/config/gcn/gcn-valu.md  | 181 
 gcc/config/gcn/gcn.cc   | 114 +++-
 gcc/config/gcn/gcn.md   |   4 +-
 5 files changed, 332 insertions(+), 3 deletions(-)

diff --git a/gcc/config/gcn/gcn-builtins.def b/gcc/config/gcn/gcn-builtins.def
index 54e4ea4e953..27691909925 100644
--- a/gcc/config/gcn/gcn-builtins.def
+++ b/gcc/config/gcn/gcn-builtins.def
@@ -59,6 +59,41 @@ DEF_BUILTIN (SQRTF, 3 /*CODE_FOR_sqrtf */,
 _A2 (GCN_BTI_SF, GCN_BTI_SF),
 gcn_expand_builtin_1)
 
+DEF_BUILTIN (FABSVF, 3 /*CODE_FOR_fabsvf */,
+"fabsvf", B_INSN,
+_A2 (GCN_BTI_V64SF, GCN_BTI_V64SF),
+gcn_expand_builtin_1)
+
+DEF_BUILTIN (LDEXPVF, 3 /*CODE_FOR_ldexpvf */,
+"ldexpvf", B_INSN,
+_A3 (GCN_BTI_V64SF, GCN_BTI_V64SF, GCN_BTI_V64SI),
+gcn_expand_builtin_1)
+
+DEF_BUILTIN (LDEXPV, 3 /*CODE_FOR_ldexpv */,
+"ldexpv", B_INSN,
+_A3 (GCN_BTI_V64DF, GCN_BTI_V64DF, GCN_BTI_V64SI),
+gcn_expand_builtin_1)
+
+DEF_BUILTIN (FREXPVF_EXP, 3 /*CODE_FOR_frexpvf_exp */,
+"frexpvf_exp", B_INSN,
+_A2 (GCN_BTI_V64SI, GCN_BTI_V64SF),
+gcn_expand_builtin_1)
+
+DEF_BUILTIN (FREXPVF_MANT, 3 /*CODE_FOR_frexpvf_mant */,
+"frexpvf_mant", B_INSN,
+_A2 (GCN_BTI_V64SF, GCN_BTI_V64SF),
+gcn_expand_builtin_1)
+
+DEF_BUILTIN (FREXPV_EXP, 3 /*CODE_FOR_frexpv_exp */,
+"frexpv_exp", B_INSN,
+_A2 (GCN_BTI_V64SI, GCN_BTI_V64DF),
+gcn_expand_builtin_1)
+
+DEF_BUILTIN (FREXPV_MANT, 3 /*CODE_FOR_frexpv_mant */,
+"frexpv_mant", B_INSN,
+_A2 (GCN_BTI_V64DF, GCN_BTI_V64DF),
+gcn_expand_builtin_1)
+
 DEF_BUILTIN (CMP_SWAP, -1,
"cmp_swap", B_INSN,
_A4 (GCN_BTI_UINT, GCN_BTI_VOIDPTR, GCN_BTI_UINT, GCN_BTI_UINT),
diff --git a/gcc/config/gcn/gcn-protos.h b/gcc/config/gcn/gcn-protos.h
index 38197b929fd..ca804609c09 100644
--- a/gcc/co

[og12] [committed] Port remaining OG11 patches

2022-07-01 Thread Kwok Cheung Yeung
(output_file_start): Emit xnack attributes.
(gcn_hsa_declare_function_name): Obey -mxnack setting.
* config/gcn/gcn.md (xnack): New attribute.
(enabled): Rework to include "xnack" attribute.
(*movbi): Add xnack compatible alternatives.
(*mov_insn): Likewise.
(*mov_insn): Likewise.
(*mov_insn): Likewise.
(*movti_insn): Likewise.
* config/gcn/gcn.opt (-mxnack): Add the "on/off/any" syntax.
(sram_ecc_type): Rename to ...
(hsaco_attr_type: ... this.)
* config/gcn/mkoffload.c (SET_XNACK_ANY): New macro.
(TEST_XNACK): Delete.
(TEST_XNACK_ANY): New macro.
(TEST_XNACK_ON): New macro.
(main): Support the new -mxnack=on/off/any syntax.
---
 gcc/ChangeLog.omp   |  31 ++
 gcc/config/gcn/gcn-hsa.h|   3 +-
 gcc/config/gcn/gcn-opts.h   |  10 ++--
 gcc/config/gcn/gcn-valu.md  |  29 -
 gcc/config/gcn/gcn.cc   |  34 ++-
 gcc/config/gcn/gcn.md   | 113 +++-
 gcc/config/gcn/gcn.opt  |  18 +++---
 gcc/config/gcn/mkoffload.cc |  19 --
 8 files changed, 171 insertions(+), 86 deletions(-)

diff --git a/gcc/ChangeLog.omp b/gcc/ChangeLog.omp
index 24e22e19ae8..7af24604841 100644
--- a/gcc/ChangeLog.omp
+++ b/gcc/ChangeLog.omp
@@ -1,3 +1,34 @@
+2022-06-10  Andrew Stubbs  
+
+   * config/gcn/gcn-hsa.h (XNACKOPT): New macro.
+   (ASM_SPEC): Use XNACKOPT.
+   * config/gcn/gcn-opts.h (enum sram_ecc_type): Rename to ...
+   (enum hsaco_attr_type): ... this, and generalize the names.
+   (TARGET_XNACK): New macro.
+   * config/gcn/gcn-valu.md (gather_insn_1offset):
+   Add xnack compatible alternatives.
+   (gather_insn_2offsets): Likewise.
+   * config/gcn/gcn.c (gcn_option_override): Permit -mxnack for devices
+   other than Fiji.
+   (gcn_expand_epilogue): Remove early-clobber problems.
+   (output_file_start): Emit xnack attributes.
+   (gcn_hsa_declare_function_name): Obey -mxnack setting.
+   * config/gcn/gcn.md (xnack): New attribute.
+   (enabled): Rework to include "xnack" attribute.
+   (*movbi): Add xnack compatible alternatives.
+   (*mov_insn): Likewise.
+   (*mov_insn): Likewise.
+   (*mov_insn): Likewise.
+   (*movti_insn): Likewise.
+   * config/gcn/gcn.opt (-mxnack): Add the "on/off/any" syntax.
+   (sram_ecc_type): Rename to ...
+   (hsaco_attr_type: ... this.)
+   * config/gcn/mkoffload.c (SET_XNACK_ANY): New macro.
+   (TEST_XNACK): Delete.
+   (TEST_XNACK_ANY): New macro.
+   (TEST_XNACK_ON): New macro.
+   (main): Support the new -mxnack=on/off/any syntax.
+
 2022-06-30  Kwok Cheung Yeung  
 
* dwarf2cfi.cc (get_cfa_from_loc_descr): Check op against DW_OP_bregx.
diff --git a/gcc/config/gcn/gcn-hsa.h b/gcc/config/gcn/gcn-hsa.h
index b3079cebb43..fd08947574f 100644
--- a/gcc/config/gcn/gcn-hsa.h
+++ b/gcc/config/gcn/gcn-hsa.h
@@ -81,12 +81,13 @@ extern unsigned int gcn_local_sym_hash (const char *name);
 /* In HSACOv4 no attribute setting means the binary supports "any" hardware
configuration.  The name of the attribute also changed.  */
 #define SRAMOPT "msram-ecc=on:-mattr=+sramecc;msram-ecc=off:-mattr=-sramecc"
+#define XNACKOPT "mxnack=on:-mattr=+xnack;mxnack=off:-mattr=-xnack"
 
 /* Use LLVM assembler and linker options.  */
 #define ASM_SPEC  "-triple=amdgcn--amdhsa "  \
  "%:last_arg(%{march=*:-mcpu=%*}) " \
  "%{!march=*|march=fiji:--amdhsa-code-object-version=3} " \
- "%{" NO_XNACK "mxnack:-mattr=+xnack;:-mattr=-xnack} " \
+ "%{" NO_XNACK XNACKOPT "}" \
  "%{" NO_SRAM_ECC SRAMOPT "} " \
  "-filetype=obj"
 #define LINK_SPEC "--pie --export-dynamic"
diff --git a/gcc/config/gcn/gcn-opts.h b/gcc/config/gcn/gcn-opts.h
index b62dfb45f59..07ddc79cda3 100644
--- a/gcc/config/gcn/gcn-opts.h
+++ b/gcc/config/gcn/gcn-opts.h
@@ -48,11 +48,13 @@ extern enum gcn_isa {
 #define TARGET_M0_LDS_LIMIT (TARGET_GCN3)
 #define TARGET_PACKED_WORK_ITEMS (TARGET_CDNA2_PLUS)
 
-enum sram_ecc_type
+#define TARGET_XNACK (flag_xnack != HSACO_ATTR_OFF)
+
+enum hsaco_attr_type
 {
-  SRAM_ECC_OFF,
-  SRAM_ECC_ON,
-  SRAM_ECC_ANY
+  HSACO_ATTR_OFF,
+  HSACO_ATTR_ON,
+  HSACO_ATTR_ANY
 };
 
 #endif
diff --git a/gcc/config/gcn/gcn-valu.md b/gcc/config/gcn/gcn-valu.md
index abe46201344..ec114db9dd1 100644
--- a/gcc/config/gcn/gcn-valu.md
+++ b/gcc/config/gcn/gcn-valu.md
@@ -741,13 +741,13 @@
 {})
 
 (define_insn "gather_insn_1offset"
-  [(set (match_operand:V_ALL 0 "register_operand" "=v")
+  [(set (match_operand:V_ALL 0 "register_operand" "=v,")

[og12] [committed] Fix bootstrap build of OG12

2022-06-30 Thread Kwok Cheung Yeung
The following patches have been committed to devel/omp/gcc-12 to fix a 
bootstrap build of the branch:


29ba2e4eeff Fix mis-merge of 'dwarf: Multi-register CFI address support'
82a3f9f22f7 Build fixes for OG12 on more recent GCC versions
e9ee746093b Fix string formatting issues
b8ecb83d528 Build fix for 'openmp: allow requires unified_shared_memory'

KwokFrom b8ecb83d52884153c2b9b9c44840f933dfaa4dc7 Mon Sep 17 00:00:00 2001
From: Tobias Burnus 
Date: Thu, 30 Jun 2022 08:30:48 +0200
Subject: [PATCH 1/5] Build fix for 'openmp: allow requires
 unified_shared_memory'

OG12 commit fa65fc45972d27f2fd79a44eaba1978348177ee9 added an
error diagnostic (moved around in later commits); this diagnostic
caused bootstrap fails as %<...%> were missing. This commit adds
them.

gcc/c/
* c-parser.cc (c_parser_omp_requires): Add missing %<...%> in error.

gcc/cp/
* parser.cc (cp_parser_omp_requires): Add missing %<...%> in error.
---
 gcc/c/c-parser.cc | 8 
 gcc/cp/parser.cc  | 8 
 2 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/gcc/c/c-parser.cc b/gcc/c/c-parser.cc
index 363b80ebfeb..5cabcb684e9 100644
--- a/gcc/c/c-parser.cc
+++ b/gcc/c/c-parser.cc
@@ -22872,8 +22872,8 @@ c_parser_omp_requires (c_parser *parser)
  if (flag_offload_memory != OFFLOAD_MEMORY_UNIFIED
  && flag_offload_memory != OFFLOAD_MEMORY_NONE)
error_at (cloc,
- "unified_address is incompatible with the "
- "selected -foffload-memory option");
+ "% is incompatible with the "
+ "selected %<-foffload-memory%> option");
  flag_offload_memory = OFFLOAD_MEMORY_UNIFIED;
}
  else if (!strcmp (p, "unified_shared_memory"))
@@ -22883,8 +22883,8 @@ c_parser_omp_requires (c_parser *parser)
  if (flag_offload_memory != OFFLOAD_MEMORY_UNIFIED
  && flag_offload_memory != OFFLOAD_MEMORY_NONE)
error_at (cloc,
- "unified_shared_memory is incompatible with the "
- "selected -foffload-memory option");
+ "% is incompatible with the "
+ "selected %<-foffload-memory%> option");
  flag_offload_memory = OFFLOAD_MEMORY_UNIFIED;
}
  else if (!strcmp (p, "dynamic_allocators"))
diff --git a/gcc/cp/parser.cc b/gcc/cp/parser.cc
index 563bf4546eb..f8455e30ed8 100644
--- a/gcc/cp/parser.cc
+++ b/gcc/cp/parser.cc
@@ -47177,8 +47177,8 @@ cp_parser_omp_requires (cp_parser *parser, cp_token 
*pragma_tok)
  if (flag_offload_memory != OFFLOAD_MEMORY_UNIFIED
  && flag_offload_memory != OFFLOAD_MEMORY_NONE)
error_at (cloc,
- "unified_address is incompatible with the "
- "selected -foffload-memory option");
+ "% is incompatible with the "
+ "selected %<-foffload-memory%> option");
  flag_offload_memory = OFFLOAD_MEMORY_UNIFIED;
}
  else if (!strcmp (p, "unified_shared_memory"))
@@ -47188,8 +47188,8 @@ cp_parser_omp_requires (cp_parser *parser, cp_token 
*pragma_tok)
  if (flag_offload_memory != OFFLOAD_MEMORY_UNIFIED
  && flag_offload_memory != OFFLOAD_MEMORY_NONE)
error_at (cloc,
- "unified_shared_memory is incompatible with the "
- "selected -foffload-memory option");
+ "% is incompatible with the "
+ "selected %<-foffload-memory%> option");
  flag_offload_memory = OFFLOAD_MEMORY_UNIFIED;
}
  else if (!strcmp (p, "dynamic_allocators"))
-- 
2.25.1

From e9ee746093bd989c33685e3197c75b901aef2cc1 Mon Sep 17 00:00:00 2001
From: Kwok Cheung Yeung 
Date: Thu, 30 Jun 2022 15:31:41 +0100
Subject: [PATCH 3/5] Fix string formatting issues

Stricter format-string checking in more recent versions of GCC can cause
build failures.

2022-06-30  Kwok Cheung Yeung  

gcc/
* omp-data-optimize.cc (omp_data_optimize_add_candidate): Suppress
format checking.
(omp_data_optimize_can_be_private): Likewise.
(omp_data_optimize_can_be_private): Likewise.

(This should be a fixup to ab53d5a6a27dce2a92f28a62ceb6e184c8356f25: 'openacc:
Add data optimization pass')

2022-06-30  Kwok Cheung Yeung  

gcc/
* gimplify.cc (gimplify_scan_omp_clauses): Remove extra
'%<..%>' pair in format string.

(This should be a fixup to dbc770c4351c8824e8083f8aff6117a6b4ba3c0d: 'openmp

[committed] wwwdocs: Document devel/omp/gcc-12

2022-06-29 Thread Kwok Cheung Yeung

Hello

The devel/omp/gcc-12 branch has been pushed to the git repo as the 
development branch for new OpenMP, OpenACC and offloading functionality, 
based on the GCC 12 branch.


The git doc page has been modified to point to the new branch as the 
active OMP development branch, while the previous devel/omp/gcc-11 
branch now joins the list of inactive OMP branches.


KwokFrom 0695e5e969eba730e517a6adbdf38b8774f89437 Mon Sep 17 00:00:00 2001
From: Kwok Cheung Yeung 
Date: Wed, 29 Jun 2022 22:32:39 +0100
Subject: [PATCH] Document devel/omp/gcc-12 branch

Also moves the old devel/omp/gcc-11 branch to the inactive branches
section next to devel/omp/gcc-10.
---
 htdocs/git.html | 12 +++-
 1 file changed, 7 insertions(+), 5 deletions(-)

diff --git a/htdocs/git.html b/htdocs/git.html
index 5202363c..52ad05db 100644
--- a/htdocs/git.html
+++ b/htdocs/git.html
@@ -280,15 +280,15 @@ in Git.
   Makarov mailto:vmaka...@redhat.com;>vmaka...@redhat.com.
   
 
-  https://gcc.gnu.org/git/gitweb.cgi?p=gcc.git;a=shortlog;h=refs/heads/devel/omp/gcc-11;>devel/omp/gcc-11
+  https://gcc.gnu.org/git/gitweb.cgi?p=gcc.git;a=shortlog;h=refs/heads/devel/omp/gcc-12;>devel/omp/gcc-12
   This branch is for collaborative development of
   https://gcc.gnu.org/wiki/OpenACC;>OpenACC and
   https://gcc.gnu.org/wiki/openmp;>OpenMP support and related
   functionality, such
   as https://gcc.gnu.org/wiki/Offloading;>offloading support (OMP:
   offloading and multi processing).
-  The branch is based on releases/gcc-11.
-  Please send patch emails with a short-hand [og11] tag in the
+  The branch is based on releases/gcc-12.
+  Please send patch emails with a short-hand [og12] tag in the
   subject line, and use ChangeLog.omp files.
 
   unified-autovect
@@ -895,13 +895,15 @@ merged.
 
   https://gcc.gnu.org/git/gitweb.cgi?p=gcc.git;a=shortlog;h=refs/heads/devel/omp/gcc-9;>devel/omp/gcc-9
   https://gcc.gnu.org/git/gitweb.cgi?p=gcc.git;a=shortlog;h=refs/heads/devel/omp/gcc-10;>devel/omp/gcc-10
+  https://gcc.gnu.org/git/gitweb.cgi?p=gcc.git;a=shortlog;h=refs/heads/devel/omp/gcc-11;>devel/omp/gcc-11
   These branches were used for collaborative development of
   https://gcc.gnu.org/wiki/OpenACC;>OpenACC and
   https://gcc.gnu.org/wiki/openmp;>OpenMP support and related
   functionality as the successors to openacc-gcc-9-branch after the move to
   Git.
-  The branches were based on releases/gcc-9 and releases/gcc-10 respectively.
-  Development has now moved to the devel/omp/gcc-11 branch.
+  The branches were based on releases/gcc-9, releases/gcc-10 and
+  releases/gcc-11 respectively.
+  Development has now moved to the devel/omp/gcc-12 branch.
 
   hammer-3_3-branch
   The goal of this branch was to have a stable compiler based on GCC 3.3
-- 
2.25.1



[PATCH][v2] openmp, fortran: Check that the type of an event handle in a detach clause is suitable [PR104131]

2022-03-02 Thread Kwok Cheung Yeung

Hello

I have updated the patch to catch array elements and structure 
components as additional checks, in addition to checking that the 
variable is a scalar.


The check has been moved to the end of resolve_omp_clauses as it is more 
appropriate there. This gets rid of the additional 'Unexpected !$OMP END 
TASK statement' error, since the type error is now caught after the 
matching phase.


Coarrays (with the testcases in pr104131-2.f90) can be dealt with in a 
separate patch. Is this part okay for trunk?


Thanks

Kwok

On 01/03/2022 3:37 pm, Mikael Morin wrote:

So, if I try to sum up what has been gathered in this thread:

  - pr104131.f90 is invalid, as x is not scalar.
    Checks are better done in resolve_omp_clauses after a call
    to gfc_resolve_expr.
    Checking expr->sym->attr.dimension seems to cover more cases than
    expr->rank > 0.

  - pr104131-2.f90 is valid and should be accepted.

  - Some other cases should be rejected, including x[1] (coindexed
    variable), x(1) (array element), x%comp (structure component).

Is that correct? Anything else?

Regarding the expr->rank vs expr->sym->attr.dimension controversy, my 
take is that it should stick to the error message.  Use expr->rank is 
the error is about scalar vs array, use expr->sym->attr.dimension if 
it’s about subobject-ness of an array variable.


Coming back to the PR, the ICE backtraces for pr104131.f90 and 
pr104131-2.f90 are different and should probably be treated separatedly.
I don’t know how difficult the bullet 2 above would be, but bullet 1 and 
3 seem quite doable.From 3ed6eb1e38ad2a25c6eca18f9ff4d05d3f227db3 Mon Sep 17 00:00:00 2001
From: Kwok Cheung Yeung 
Date: Wed, 2 Mar 2022 17:09:45 +
Subject: [PATCH] openmp, fortran: Check that the type of an event handle in a
 detach clause is suitable [PR104131]

This rejects variables that are array types, array elements or derived type
members when used as the event handle inside a detach clause (in accordance
with the OpenMP specification).  This would previously lead to an ICE.

2022-03-02  Kwok Cheung Yeung  

gcc/fortran/

PR fortran/104131
* openmp.cc (gfc_match_omp_detach): Move check for type of event
handle to...
(resolve_omp_clauses) ...here.  Also check that the event handle is
not an array, or an array access or structure element access.

gcc/testsuite/

PR fortran/104131
* gfortran.dg/gomp/pr104131.f90: New.
* gfortran.dg/gomp/task-detach-1.f90: Update expected error message.
---
 gcc/fortran/openmp.cc | 34 +--
 gcc/testsuite/gfortran.dg/gomp/pr104131.f90   | 26 ++
 .../gfortran.dg/gomp/task-detach-1.f90|  4 +--
 3 files changed, 51 insertions(+), 13 deletions(-)
 create mode 100644 gcc/testsuite/gfortran.dg/gomp/pr104131.f90

diff --git a/gcc/fortran/openmp.cc b/gcc/fortran/openmp.cc
index 19142c4d8d0..16cd03a3d67 100644
--- a/gcc/fortran/openmp.cc
+++ b/gcc/fortran/openmp.cc
@@ -531,14 +531,6 @@ gfc_match_omp_detach (gfc_expr **expr)
   if (gfc_match_variable (expr, 0) != MATCH_YES)
 goto syntax_error;
 
-  if ((*expr)->ts.type != BT_INTEGER || (*expr)->ts.kind != gfc_c_intptr_kind)
-{
-  gfc_error ("%qs at %L should be of type "
-"integer(kind=omp_event_handle_kind)",
-(*expr)->symtree->n.sym->name, &(*expr)->where);
-  return MATCH_ERROR;
-}
-
   if (gfc_match_char (')') != MATCH_YES)
 goto syntax_error;
 
@@ -7581,9 +7573,29 @@ resolve_omp_clauses (gfc_code *code, gfc_omp_clauses 
*omp_clauses,
gfc_error ("%s must contain at least one MAP clause at %L",
   p, >loc);
 }
-  if (!openacc && omp_clauses->mergeable && omp_clauses->detach)
-gfc_error ("% clause at %L must not be used together with "
-  "% clause", _clauses->detach->where);
+
+  if (!openacc && omp_clauses->detach)
+{
+  if (!gfc_resolve_expr (omp_clauses->detach)
+ || omp_clauses->detach->ts.type != BT_INTEGER
+ || omp_clauses->detach->ts.kind != gfc_c_intptr_kind
+ || omp_clauses->detach->rank != 0)
+   gfc_error ("%qs at %L should be a scalar of type "
+  "integer(kind=omp_event_handle_kind)",
+  omp_clauses->detach->symtree->n.sym->name,
+  _clauses->detach->where);
+  else if (omp_clauses->detach->symtree->n.sym->attr.dimension > 0)
+   gfc_error ("The event handle at %L must not be an array element",
+  _clauses->detach->where);
+  else if (omp_clauses->detach->symtree->n.sym->ts.type == BT_DERIVED
+  || omp_clauses->detach->symtree->n.sym->ts.type == BT_CLASS)
+   gfc_error (&quo

Re: [PATCH] openmp, fortran: Check that event handles passed to detach clauses are not arrays [PR104131]

2022-02-28 Thread Kwok Cheung Yeung

On 28/02/2022 5:37 pm, Jakub Jelinek wrote:

On Mon, Feb 28, 2022 at 06:33:15PM +0100, Mikael Morin wrote:

It is true that the spots I saw in fortran/openmp.cc that test rank look
like:
  if (!gfc_resolve_expr (el->expr)
  || el->expr->ts.type != BT_INTEGER || el->expr->rank != 0)
etc., so probably !gfc_resolve_expr call is missing.


As long as the expression is expected to not be a (contained) function call,
I think it should work.

In the general case non-syntaxic errors are preferably checked and reported
later at resolution stage, where contained functions are known.


Oh, I've missed that it is done during parsing and not during resolution.
That !gfc_resolve_expr call and the checking if it is BT_INTEGER etc.
should be certainly moved to resolve_omp_clauses.



Calling gfc_resolve_expr does not work to update the rank when called 
from gfc_match_omp_detach:


(gdb) p *e->ref
$3 = {type = REF_ARRAY, u = {ar = {type = AR_ELEMENT, dimen = 0, codimen 
= 1, in_allocate = false, team = 0x0, stat = 0x0, where = {nextc = 
0x2e532d8, lb = 0x2e53260}, as = 0x2e04110, c_where = {{nextc = 0x0, lb 
= 0x0} }, start = {0x0 }, end = {0x0 
}, stride = {0x0 }, dimen_type = 
{DIMEN_THIS_IMAGE, 0 }}, c = {component = 0x2, sym = 
0x1}, ss = {start = 0x2, end = 0x1, length = 0x0}, i = INQUIRY_KIND}, 
next = 0x0}


In gfc_expression_rank, e->ref is non-NULL, so e->rank is not set from 
the symtree. It then iterates through the ref elements - ref->type == 
REF_ARRAY and ref->u.ar.type == AR_ELEMENT, so e->rank remains at 0.


I'll move the check to resolve_omp_clauses and see if it works there.

Thanks

Kwok


Re: [PATCH] openmp, fortran: Check that event handles passed to detach clauses are not arrays [PR104131]

2022-02-28 Thread Kwok Cheung Yeung

On 28/02/2022 2:07 pm, Jakub Jelinek wrote:

On Mon, Feb 28, 2022 at 02:01:03PM +, Kwok Cheung Yeung wrote:

diff --git a/gcc/fortran/openmp.cc b/gcc/fortran/openmp.cc
index 19142c4d8d0..50a1c476009 100644
--- a/gcc/fortran/openmp.cc
+++ b/gcc/fortran/openmp.cc
@@ -531,9 +531,10 @@ gfc_match_omp_detach (gfc_expr **expr)
if (gfc_match_variable (expr, 0) != MATCH_YES)
  goto syntax_error;
  
-  if ((*expr)->ts.type != BT_INTEGER || (*expr)->ts.kind != gfc_c_intptr_kind)

+  if ((*expr)->ts.type != BT_INTEGER || (*expr)->ts.kind != gfc_c_intptr_kind
+  || (*expr)->symtree->n.sym->as)


Don't we usually test instead || (*expr)->rank != 0 when testing for
scalars?

Jakub



If I run GCC in GDB on the pr104131.f90 testcase and inspect the expr, I 
get:


534   if ((*expr)->ts.type != BT_INTEGER || (*expr)->ts.kind != 
gfc_c_intptr_kind

(gdb) p **expr
$2 = {expr_type = EXPR_VARIABLE, ts = {type = BT_INTEGER, kind = 8, u = 
{derived = 0x0, cl = 0x0, pad = 0}, interface = 0x0, is_c_interop = 1, 
is_iso_c = 0, f90_type = BT_INTEGER, deferred = false, interop_kind = 
0x2e3fb80}, rank = 0, shape = 0x0, symtree = 0x2e3ffe0, ref = 0x2e3e600, 
where = { ...


So (*expr)->rank is 0 here even with an array. I'm not sure why - is 
rank updated later, or did we forget to call something on the event 
handle expression?


Testing against n->sym->as for an array check has been used elsewhere in 
openmp.cc, to prevent reductions against arrays in OpenACC in 
resolve_omp_clauses.


Kwok


[PATCH] openmp, fortran: Check that event handles passed to detach clauses are not arrays [PR104131]

2022-02-28 Thread Kwok Cheung Yeung

Hello

This patch addresses PR fortran/104131 on the GCC bug tracker, where an 
ICE would occur if an array or co-array was passed as the event handle 
in the detach clause of a task.


Since the event handle is supposed to be a scalar of type 
omp_event_handle_kind, we can simply reject the event handle during 
parsing if it is any type of array, thereby preventing the situation 
leading to an ICE in the first place.


Okay for trunk?

Thanks

KwokFrom 8ed3b8bd793298f94bdefbdff32f91eaea1a9d70 Mon Sep 17 00:00:00 2001
From: Kwok Cheung Yeung 
Date: Mon, 28 Feb 2022 12:34:22 +
Subject: [PATCH] openmp, fortran: Check that event handles passed to detach
 clauses are not arrays [PR104131]

2022-02-28  Kwok Cheung Yeung  

gcc/fortran/

PR fortran/104131
* openmp.cc (gfc_match_omp_detach): Check that the event handle is not
an array type.

gcc/testsuite/

PR fortran/104131
* gfortran.dg/gomp/pr104131.f90: New.
* gfortran.dg/gomp/pr104131-2.f90: New.
* gfortran.dg/gomp/task-detach-1.f90: Update expected error message.
---
 gcc/fortran/openmp.cc|  5 +++--
 gcc/testsuite/gfortran.dg/gomp/pr104131-2.f90| 10 ++
 gcc/testsuite/gfortran.dg/gomp/pr104131.f90  | 10 ++
 gcc/testsuite/gfortran.dg/gomp/task-detach-1.f90 |  2 +-
 4 files changed, 24 insertions(+), 3 deletions(-)
 create mode 100644 gcc/testsuite/gfortran.dg/gomp/pr104131-2.f90
 create mode 100644 gcc/testsuite/gfortran.dg/gomp/pr104131.f90

diff --git a/gcc/fortran/openmp.cc b/gcc/fortran/openmp.cc
index 19142c4d8d0..50a1c476009 100644
--- a/gcc/fortran/openmp.cc
+++ b/gcc/fortran/openmp.cc
@@ -531,9 +531,10 @@ gfc_match_omp_detach (gfc_expr **expr)
   if (gfc_match_variable (expr, 0) != MATCH_YES)
 goto syntax_error;
 
-  if ((*expr)->ts.type != BT_INTEGER || (*expr)->ts.kind != gfc_c_intptr_kind)
+  if ((*expr)->ts.type != BT_INTEGER || (*expr)->ts.kind != gfc_c_intptr_kind
+  || (*expr)->symtree->n.sym->as)
 {
-  gfc_error ("%qs at %L should be of type "
+  gfc_error ("%qs at %L should be a scalar of type "
 "integer(kind=omp_event_handle_kind)",
 (*expr)->symtree->n.sym->name, &(*expr)->where);
   return MATCH_ERROR;
diff --git a/gcc/testsuite/gfortran.dg/gomp/pr104131-2.f90 
b/gcc/testsuite/gfortran.dg/gomp/pr104131-2.f90
new file mode 100644
index 000..8d10367ba3b
--- /dev/null
+++ b/gcc/testsuite/gfortran.dg/gomp/pr104131-2.f90
@@ -0,0 +1,10 @@
+! { dg-do compile }
+! { dg-options "-fopenmp -fcoarray=single" }
+
+program p
+  use iso_c_binding, only: c_intptr_t
+  integer, parameter :: omp_event_handle_kind = c_intptr_t
+  integer (kind=omp_event_handle_kind) :: x[*]
+  !$omp task detach (x) ! { dg-error "'x' at \\\(1\\\) should be a scalar of 
type integer\\\(kind=omp_event_handle_kind\\\)" }
+  !$omp end task ! { dg-error "Unexpected !\\\$OMP END TASK statement at 
\\\(1\\\)" }
+end
diff --git a/gcc/testsuite/gfortran.dg/gomp/pr104131.f90 
b/gcc/testsuite/gfortran.dg/gomp/pr104131.f90
new file mode 100644
index 000..70a2dedfd7f
--- /dev/null
+++ b/gcc/testsuite/gfortran.dg/gomp/pr104131.f90
@@ -0,0 +1,10 @@
+! { dg-do compile }
+! { dg-options "-fopenmp" }
+
+program p
+  use iso_c_binding, only: c_intptr_t
+  integer, parameter :: omp_event_handle_kind = c_intptr_t
+  integer(omp_event_handle_kind) :: x(1)
+  !$omp task detach(x) ! { dg-error "'x' at \\\(1\\\) should be a scalar of 
type integer\\\(kind=omp_event_handle_kind\\\)" }
+  !$omp end task ! { dg-error "Unexpected !\\\$OMP END TASK statement at 
\\\(1\\\)" }
+end
diff --git a/gcc/testsuite/gfortran.dg/gomp/task-detach-1.f90 
b/gcc/testsuite/gfortran.dg/gomp/task-detach-1.f90
index 020be13a8b6..b73db07b7c3 100644
--- a/gcc/testsuite/gfortran.dg/gomp/task-detach-1.f90
+++ b/gcc/testsuite/gfortran.dg/gomp/task-detach-1.f90
@@ -18,7 +18,7 @@ program task_detach_1
   !$omp task detach(x) mergeable ! { dg-error "'DETACH' clause at \\\(1\\\) 
must not be used together with 'MERGEABLE' clause" }
   !$omp end task
 
-  !$omp task detach(z) ! { dg-error "'z' at \\\(1\\\) should be of type 
integer\\\(kind=omp_event_handle_kind\\\)" }
+  !$omp task detach(z) ! { dg-error "'z' at \\\(1\\\) should be a scalar of 
type integer\\\(kind=omp_event_handle_kind\\\)" }
   !$omp end task ! { dg-error "Unexpected !\\\$OMP END TASK statement at 
\\\(1\\\)" }
   
   !$omp task detach (x) firstprivate (x) ! { dg-error "DETACH event handle 'x' 
in FIRSTPRIVATE clause at \\\(1\\\)" }
-- 
2.25.1



[og11][committed] openmp: Improve handling of nested OpenMP metadirectives in C and C++

2022-02-18 Thread Kwok Cheung Yeung

This patch has been committed to the devel/omp/gcc-11 development branch:

249df772b70f7b9f50f68030d4ea9c25624cc578  openmp: Improve handling of 
nested OpenMP metadirectives in C and C++


Kwok


[PATCH] openmp: Improve handling of nested OpenMP metadirectives in C and C++ (was: Re: [PATCH 1/7] openmp: Add C support for parsing metadirectives)

2022-02-18 Thread Kwok Cheung Yeung
This patch (to be applied on top of the metadirective patch series) 
addresses issues found in the C/C++ parsers when nested metadirectives 
are used.


analyze_metadirective_body when encountering code like:

#pragma omp metadirective when {set={...}: A)
  #pragma omp metadirective when (set={...}: B)

would stop just before ': B' before it naively assumes that the '}' 
marks the end of the body associated with the first metadirective, when 
it needs to include the whole of the second metadirective plus its 
associated body. This is fixed by checking that the nesting level of 
parentheses is zero as well before stopping the gathering of tokens.


The assert on the remaining tokens after parsing a clause can fail 
(resulting in an ICE) if there is a parse error in the directive or the 
body, since in that case not all tokens may be processed before parsing 
aborts early. The assert is therefore not enforced if any parse errors 
occur in the clause.


I have also moved the handling of the metadirective pragma from 
c_parser_omp_construct to c_parser_pragma (and their C++ equivalents), 
since c_parser_omp_construct has some checks that do not apply to 
metadirectives.


KwokFrom a9e4936b8476b97f11bb81b416ef3d28fa60cd37 Mon Sep 17 00:00:00 2001
From: Kwok Cheung Yeung 
Date: Fri, 18 Feb 2022 19:00:57 +
Subject: [PATCH] openmp: Improve handling of nested OpenMP metadirectives in C
 and C++

This patch fixes a misparsing issue when encountering code like:

  #pragma omp metadirective when {={...}: A)
#pragma omp metadirective when (={...}: B)

When called for the first metadirective, analyze_metadirective_body would
stop just before the colon in the second metadirective because it naively
assumes that the '}' marks the end of a code block.

The assertion for clauses to end parsing at the same point is now disabled
if a parse error has occurred during the parsing of the clause, since some
tokens may not be consumed if a parse error cuts parsing short.

2022-02-18  Kwok Cheung Yeung  

gcc/c/
* c-parser.cc (c_parser_omp_construct): Move handling of
PRAGMA_OMP_METADIRECTIVE from here...
(c_parser_pragma): ...to here.
(analyze_metadirective_body): Check that the bracket nesting level
is also zero before stopping the adding of tokens on encountering a
close brace.
(c_parser_omp_metadirective): Modify function signature and update.
Do not assert on remaining tokens if there has been a parse error.

gcc/cp/
* parser.cc (cp_parser_omp_construct): Move handling of
PRAGMA_OMP_METADIRECTIVE from here...
(cp_parser_pragma): ...to here.
(analyze_metadirective_body): Check that the bracket
nesting level is also zero before stopping the adding of tokens on
encountering a close brace.
(cp_parser_omp_metadirective): Modify function signature and update.
Do not assert on remaining tokens if there has been a parse error.

gcc/testsuite/
* c-c++-common/gomp/metadirective-1.c (f): Add test for
improperly nested metadirectives.
---
 gcc/c/c-parser.cc | 47 +--
 gcc/cp/parser.cc  | 33 ++---
 .../c-c++-common/gomp/metadirective-1.c   | 13 +
 3 files changed, 51 insertions(+), 42 deletions(-)

diff --git a/gcc/c/c-parser.cc b/gcc/c/c-parser.cc
index 58fcbb398ee..6a134e0fb50 100644
--- a/gcc/c/c-parser.cc
+++ b/gcc/c/c-parser.cc
@@ -1592,6 +1592,7 @@ static void c_parser_omp_taskwait (c_parser *);
 static void c_parser_omp_taskyield (c_parser *);
 static void c_parser_omp_cancel (c_parser *);
 static void c_parser_omp_nothing (c_parser *);
+static void c_parser_omp_metadirective (c_parser *, bool *);
 
 enum pragma_context { pragma_external, pragma_struct, pragma_param,
  pragma_stmt, pragma_compound };
@@ -1600,8 +1601,6 @@ static bool c_parser_omp_cancellation_point (c_parser *, 
enum pragma_context);
 static bool c_parser_omp_target (c_parser *, enum pragma_context, bool *);
 static void c_parser_omp_end_declare_target (c_parser *);
 static bool c_parser_omp_declare (c_parser *, enum pragma_context);
-static tree c_parser_omp_metadirective (location_t, c_parser *, char *,
-   omp_clause_mask, tree *, bool *);
 static void c_parser_omp_requires (c_parser *);
 static bool c_parser_omp_error (c_parser *, enum pragma_context);
 static bool c_parser_omp_ordered (c_parser *, enum pragma_context, bool *);
@@ -12551,6 +12550,10 @@ c_parser_pragma (c_parser *parser, enum pragma_context 
context, bool *if_p)
   c_parser_omp_nothing (parser);
   return false;
 
+case PRAGMA_OMP_METADIRECTIVE:
+  c_parser_omp_metadirective (parser, if_p);
+  return true;
+
 case PRAGMA_OMP_ERROR:
   return c_parser_omp_error (parser, context);
 
@@ -23020,7 +23023,7 @@ analyze_metadirective_body (c_parser *parser

[OG11][committed] Fortran metadirective fixes

2022-02-14 Thread Kwok Cheung Yeung

Hello

I have backported and committed the recent two fixes for Fortran 
metadirective handling onto the devel/omp/gcc-11 development branch:


38ed9d83b893df0bbd098c7b44dbbeb56ed7dd1c  openmp: Eliminate non-matching 
metadirective variants early in Fortran front-end 
(https://gcc.gnu.org/pipermail/gcc-patches/2022-February/590349.html)


8cb0121af50eacb63098a79ff8c6deae05883c6f  openmp: More Fortran front-end 
fixes for metadirectives 
(https://gcc.gnu.org/pipermail/gcc-patches/2022-February/590350.html)


Kwok


Re: [PATCH 6/7] openmp, fortran: Add Fortran support for parsing metadirectives

2022-02-14 Thread Kwok Cheung Yeung
This patch (again, to be applied on top of the current set of 
metadirective patches) fixes two minor issues with metadirectives in the 
Fortran front-end.


- 'sorry' is called if a declarative OpenMP directive is found in a 
metadirective clause.
- An ICE that occurs with an empty metadirective (i.e. just '!$omp 
metadirective' with nothing else) is fixed.


Thanks

KwokFrom 153b8dbd19cf90b1869be7f409d55d1ab5ba81d5 Mon Sep 17 00:00:00 2001
From: Kwok Cheung Yeung 
Date: Fri, 11 Feb 2022 15:42:50 +
Subject: [PATCH 2/2] openmp: More Fortran front-end fixes for metadirectives

This adds a check for declarative OpenMP directives in metadirective
variants (already present in the C/C++ front-ends), and fixes an
ICE when an empty metadirective (i.e. just '!$omp metadirective')
is presented.

2022-02-11  Kwok Cheung Yeung  

gcc/fortran/
* gfortran.h (is_omp_declarative_stmt): New.
* openmp.cc (match_omp_metadirective): Reject declarative OpenMP
directives with 'sorry'.
* parse.cc (parse_omp_metadirective_body): Check that state stack head
is non-null before dereferencing.
(is_omp_declarative_stmt): New.

gcc/testsuite/
* gfortran.dg/gomp/metadirective-2.f90 (main): Test empty
metadirective.
---
 gcc/fortran/gfortran.h   |  1 +
 gcc/fortran/openmp.cc|  3 +++
 gcc/fortran/parse.cc | 16 +++-
 .../gfortran.dg/gomp/metadirective-2.f90 |  5 -
 4 files changed, 23 insertions(+), 2 deletions(-)

diff --git a/gcc/fortran/gfortran.h b/gcc/fortran/gfortran.h
index bdb4b0f6aa5..37eb039b6d4 100644
--- a/gcc/fortran/gfortran.h
+++ b/gcc/fortran/gfortran.h
@@ -3852,6 +3852,7 @@ bool gfc_parse_file (void);
 void gfc_global_used (gfc_gsymbol *, locus *);
 gfc_namespace* gfc_build_block_ns (gfc_namespace *);
 gfc_statement match_omp_directive (void);
+bool is_omp_declarative_stmt (gfc_statement);
 
 /* dependency.cc */
 int gfc_dep_compare_functions (gfc_expr *, gfc_expr *, bool);
diff --git a/gcc/fortran/openmp.cc b/gcc/fortran/openmp.cc
index 5e87e18ce0d..0071484817d 100644
--- a/gcc/fortran/openmp.cc
+++ b/gcc/fortran/openmp.cc
@@ -5151,6 +5151,9 @@ match_omp_metadirective (bool begin_p)
   gfc_statement directive = match_omp_directive ();
   gfc_matching_omp_context_selector = false;
 
+  if (is_omp_declarative_stmt (directive))
+   sorry ("declarative directive variants are not supported");
+
   if (gfc_error_flag_test ())
{
  gfc_current_locus = old_loc;
diff --git a/gcc/fortran/parse.cc b/gcc/fortran/parse.cc
index cd18315697e..cb8acb3c68f 100644
--- a/gcc/fortran/parse.cc
+++ b/gcc/fortran/parse.cc
@@ -5841,7 +5841,8 @@ parse_omp_metadirective_body (gfc_statement omp_st)
 
   gfc_in_metadirective_body = old_in_metadirective_body;
 
-  *clause->code = *gfc_state_stack->head;
+  if (gfc_state_stack->head)
+   *clause->code = *gfc_state_stack->head;
   pop_state ();
 
   gfc_commit_symbols ();
@@ -7081,3 +7082,16 @@ is_oacc (gfc_state_data *sd)
   return false;
 }
 }
+
+/* Return true if ST is a declarative OpenMP statement.  */
+bool
+is_omp_declarative_stmt (gfc_statement st)
+{
+  switch (st)
+{
+  case_omp_decl:
+   return true;
+  default:
+   return false;
+}
+}
diff --git a/gcc/testsuite/gfortran.dg/gomp/metadirective-2.f90 
b/gcc/testsuite/gfortran.dg/gomp/metadirective-2.f90
index 06c324589d0..cdd5e85068e 100644
--- a/gcc/testsuite/gfortran.dg/gomp/metadirective-2.f90
+++ b/gcc/testsuite/gfortran.dg/gomp/metadirective-2.f90
@@ -43,7 +43,7 @@ program main
 end do
   !$omp end metadirective
   
-  ! Test labels in the body
+  ! Test labels in the body.
   !$omp begin metadirective &
   !$omp&   when (device={arch("nvptx")}: parallel do) &
   !$omp&   when (device={arch("gcn")}: parallel)
@@ -56,4 +56,7 @@ program main
 20continue
 end do
   !$omp end metadirective
+
+  ! Test empty metadirective.
+  !$omp metadirective
 end program
-- 
2.25.1



Re: [PATCH 6/7] openmp, fortran: Add Fortran support for parsing metadirectives

2022-02-14 Thread Kwok Cheung Yeung

> This patch implements metadirective parsing in the Fortran frontend.

This patch (to be applied on top of the current set of metadirective 
patches) implements a feature that was present in the C and C++ 
front-ends but not in Fortran - the early culling of metadirective 
variants that can be eliminated during parsing because their selectors 
are resolvable at parse-time and still do not match. This is more 
efficient, and allows code with nested metadirectives like this (which 
works on other compilers) to compile:


!$omp metadirective when (implementation={vendor("ibm")}: &
!$omp&  target teams distribute)
  !$omp metadirective when (implementation={vendor("gnu")}: parallel do)

This would currently fail because when parsing the body of the 'target 
teams distribute', the parser sees the metadirective when it is 
expecting a loop nest. If the vendor("ibm") is eliminated early though, 
it would just evaluate to '!$omp nothing' and the following 
metadirective would not be incorrect. This doesn't work for selectors 
such as 'arch' that would need to be deferred until later passes though.


As the selector matching code (omp_context_selector_matches in 
omp-general.cc) works on Generic trees, I have allowed for a limited 
translation from the GFortran AST form to tree form during parsing, 
skipping over things like expression translation that must be done later.


I have also fixed another FE issue with nested metadirectives, that 
occurs when you have something like:


program P
  !$omp metadirective
!$omp metadirective
  !$omp metadirective

end program P

When gfc_match_end is called after parsing the do statement, it needs to 
drop down multiple levels from the innermost metadirective state to that 
 of 'program P' in order to find the proper end type, and not just one 
level as it currently does.


Thanks

KwokFrom 5a7b109a014422a5b43e43669df1dc0d59e830cf Mon Sep 17 00:00:00 2001
From: Kwok Cheung Yeung 
Date: Fri, 11 Feb 2022 11:20:18 +
Subject: [PATCH 1/2] openmp: Eliminate non-matching metadirective variants
 early in Fortran front-end

This patch checks during parsing if a metadirective selector is both
resolvable and non-matching - if so, it is removed from further
consideration.  This is both more efficient, and avoids spurious
syntax errors caused by considering combinations of selectors that
lead to invalid combinations of OpenMP directives, when that
combination would never arise in the first place.

This exposes another bug - when metadirectives that are not of the
begin-end variety are nested, we might have to drill up through
multiple layers of the state stack to reach the state for the
next statement.  This is now fixed.

2022-02-11  Kwok Cheung Yeung  

gcc/
* omp-general.cc (DELAY_METADIRECTIVES_AFTER_LTO): Check that cfun is
non-null before derefencing.

gcc/fortran/
* decl.cc (gfc_match_end): Search for first previous state that is not
COMP_OMP_METADIRECTIVE.
* gfortran.h (gfc_skip_omp_metadirective_clause): Add prototype.
* openmp.cc (match_omp_metadirective): Skip clause if
result of gfc_skip_omp_metadirective_clause is true.
* trans-openmp.cc (gfc_trans_omp_set_selector): Add argument and
disable expression conversion if false.
(gfc_skip_omp_metadirective_clause): New.

gcc/testsuite/
* gfortran.dg/gomp/metadirective-8.f90: New.
---
 gcc/fortran/decl.cc   | 21 +-
 gcc/fortran/gfortran.h|  4 ++
 gcc/fortran/openmp.cc |  7 +++-
 gcc/fortran/trans-openmp.cc   | 38 ++-
 gcc/omp-general.cc|  5 ++-
 .../gfortran.dg/gomp/metadirective-8.f90  | 22 +++
 6 files changed, 81 insertions(+), 16 deletions(-)
 create mode 100644 gcc/testsuite/gfortran.dg/gomp/metadirective-8.f90

diff --git a/gcc/fortran/decl.cc b/gcc/fortran/decl.cc
index e024e360c88..a77ac768175 100644
--- a/gcc/fortran/decl.cc
+++ b/gcc/fortran/decl.cc
@@ -8325,15 +8325,32 @@ gfc_match_end (gfc_statement *st)
 
 case COMP_CONTAINS:
 case COMP_DERIVED_CONTAINS:
-case COMP_OMP_METADIRECTIVE:
 case COMP_OMP_BEGIN_METADIRECTIVE:
   state = gfc_state_stack->previous->state;
   block_name = gfc_state_stack->previous->sym == NULL
-? NULL : gfc_state_stack->previous->sym->name;
+  ? NULL : gfc_state_stack->previous->sym->name;
   abreviated_modproc_decl = gfc_state_stack->previous->sym
&& gfc_state_stack->previous->sym->abr_modproc_decl;
   break;
 
+case COMP_OMP_METADIRECTIVE:
+  {
+   /* Metadirectives can be nested, so we need to drill down to the
+  first state that is not COMP_OMP_METADIRECTIVE.  */
+   gfc_state_data *state_data = gfc_state_stack;
+

Re: [OG11][committed] OpenMP metadirective support

2022-01-31 Thread Kwok Cheung Yeung

21766085775bd52c9db53629636c830fc9dc6fa0 openmp: Add support for 
'target_device' context selector set


I have committed an extra fix to remove an extra comma in an error 
message changed by this patch. This causes regressions in an existing 
test for 'declare variant' because the expected wording of the error 
message has changed. This fix is already in the version of the patch 
posted at:


https://gcc.gnu.org/pipermail/gcc-patches/2022-January/589191.html

KwokFrom 23dd64c4715b2df2181aaf995e3040d54edba129 Mon Sep 17 00:00:00 2001
From: Kwok Cheung Yeung 
Date: Mon, 31 Jan 2022 05:44:21 -0800
Subject: [PATCH] openmp: Fix error message in Fortran front-end

An extra comma in an error message causes failures in the Fortran tests for
declare variant, because the message differs from that expected.

2022-01-31  Kwok Cheung Yeung  

gcc/fortran/
* openmp.c (gfc_match_omp_context_selector_specification): Remove
extra comma in error message.
---
 gcc/fortran/ChangeLog.omp | 5 +
 gcc/fortran/openmp.c  | 2 +-
 2 files changed, 6 insertions(+), 1 deletion(-)

diff --git a/gcc/fortran/ChangeLog.omp b/gcc/fortran/ChangeLog.omp
index 81c4d9a129b..a78fab44352 100644
--- a/gcc/fortran/ChangeLog.omp
+++ b/gcc/fortran/ChangeLog.omp
@@ -1,3 +1,8 @@
+2022-01-31  Kwok Cheung Yeung  
+
+   * openmp.c (gfc_match_omp_context_selector_specification): Remove
+   extra comma in error message.
+
 2022-01-25  Kwok Cheung Yeung  
 
* openmp.c (omp_target_device_selectors): New.
diff --git a/gcc/fortran/openmp.c b/gcc/fortran/openmp.c
index e448868d246..94930fed059 100644
--- a/gcc/fortran/openmp.c
+++ b/gcc/fortran/openmp.c
@@ -4844,7 +4844,7 @@ gfc_match_omp_context_selector_specification 
(gfc_omp_set_selector **oss_head,
gfc_error ("expected 'construct', 'device', 'implementation', "
   "'target_device' or 'user' at %C");
  else
-   gfc_error ("expected 'construct', 'device', 'implementation', "
+   gfc_error ("expected 'construct', 'device', 'implementation' "
   "or 'user' at %C");
  return MATCH_ERROR;
}
-- 
2.30.0.335.ge636282



[OG11][committed] openmp: Add warning when functions containing metadirectives with 'construct={target}' called directly

2022-01-28 Thread Kwok Cheung Yeung

Hello

I have backported and committed the patch 'openmp: Add warning when 
functions containing metadirectives with 'construct={target}' called 
directly' to the devel/omp/gcc-11 development branch:


d6d82af7918 openmp: Add warning when functions containing metadirectives 
with 'construct={target}' called directly


Thanks

Kwok


[PATCH] openmp: Add warning when functions containing metadirectives with 'construct={target}' called directly

2022-01-28 Thread Kwok Cheung Yeung

Hello

Regarding this issue which we discussed previously - I have created a 
patch that adds a warning when this situation is detected.


When a metadirective in a explicitly marked target function is 
gimplified, it is checked to see if it contains a 'construct={target}' 
selector - if it does, then the containing function is marked with 'omp 
metadirective construct target'.


In the omp-low pass, when function calls are processed, the target 
function is checked to see if it contains the marker. If it does and the 
call is not made in a target context, a warning is emitted.


This will obviously not catch every possible occurence (e.g. if the 
function containing the metadirective is called from another target 
function which is then called locally, or if the call is made via a 
function pointer), but it might still be useful? Okay for mainline (once 
the metadirective patches are done)?


Thanks

Kwok

On 26/07/2021 10:23 pm, Jakub Jelinek wrote:

On Mon, Jul 26, 2021 at 10:19:35PM +0100, Kwok Cheung Yeung wrote:

Yes, that is a target variant, but I'm pretty sure we've decided that
the target construct added for declare target is actually not a dynamic
property.  So basically mostly return to the 5.0 wording with clarifications
for Fortran.  See
https://github.com/OpenMP/spec/issues/2612#issuecomment-849742988
for details.
Making the target in construct dynamic would pretty much force all the
scoring to be dynamic as well.


In that comment, Deepak says:

So, we decided to keep the target trait static, requiring that the declare
target directive must be explicit and that the function version must be
different from the version of the function that may be called outside of a
target region (with the additional clarification that whether it differs or
not will be implementation defined).

"the function version must be different from the version of the function
that may be called outside of a target region": This is what we do not have
in GCC at the moment - the function versions called within and outside
target regions are the same on the host.

"whether it differs or not will be implementation defined": So whether a
function with 'declare target' and a metadirective involving a 'target'
construct behaves the same or not when called from both inside and outside
of a target region is implementation defined?

I will leave the treatment of target constructs in the selector as it is
then, with both calls going to the same function with the metadirective
resolving to the 'target' variant. I will try to address your other concerns
later.


I think you're right, it should differ in the host vs. target version iff
it is in explicit declare target block, my memory is weak, but let's implement
the 5.0 wording for now (and ignore the 5.1 wording later on) and only when
we'll be doing 5.2 change this (and change for both metadirective and
declare variant at that point).
Ok?

Jakub
From 741b037a8cd6b85d43a6273ab305ce07705dfa23 Mon Sep 17 00:00:00 2001
From: Kwok Cheung Yeung 
Date: Fri, 28 Jan 2022 13:56:33 +
Subject: [PATCH] openmp: Add warning when functions containing metadirectives
 with 'construct={target}' called directly

void f(void)
{
  #pragma omp metadirective \
when (construct={target}: A) \
default (B)
...
}
...
{
  #pragma omp target
f(); // Target call

  f(); // Local call
}

With the OpenMP 5.0/5.1 specifications, we would expect A to be selected in
the metadirective when the target call is made, but B when f is called
directly outside of a target context.  However, since GCC does not have
separate copies of f for local and target calls, and the construct selector
is static, it must be resolved one way or the other at compile-time (currently
in the favour of selecting A), which may be unexpected behaviour.

This patch attempts to detect the above situation, and will emit a warning
if found.

2022-01-28  Kwok Cheung Yeung  

gcc/
* gimplify.cc (gimplify_omp_metadirective): Mark offloadable functions
containing metadirectives with 'construct={target}' in the selector.
* omp-general.cc (omp_has_target_constructor_p): New.
* omp-general.h (omp_has_target_constructor_p): New prototype.
* omp-low.cc (lower_omp_1): Emit warning if marked functions called
outside of a target context.

gcc/testsuite/
* c-c++-common/gomp/metadirective-4.c (main): Add expected warning.
* gfortran.dg/gomp/metadirective-4.f90 (test): Likewise.

libgomp/
* testsuite/libgomp.c-c++-common/metadirective-2.c (main): Add
expected warning.
* testsuite/libgomp.fortran/metadirective-2.f90 (test): Likewise.
---
 gcc/gimplify.cc   | 21 +++
 gcc/omp-general.cc| 21 +++
 gcc/omp-general.h |  1 +
 gcc/omp-low.cc| 18 
 .

Re: [PATCH] openmp: Add support for target_device selector set in metadirectives

2022-01-26 Thread Kwok Cheung Yeung

Hello

Just noticed a bug in the ISA checking in the nvptx plugin - the minor 
version should only be compared if the major version is equal, otherwise 
it would reject an isa of sm_35 if the card is capable of supporting 
sm_52, for example. This patch fixes the issue.


Thanks

Kwokdiff --git a/libgomp/plugin/plugin-nvptx.c b/libgomp/plugin/plugin-nvptx.c
index 7427677e69d..86a12c3fcfd 100644
--- a/libgomp/plugin/plugin-nvptx.c
+++ b/libgomp/plugin/plugin-nvptx.c
@@ -2047,7 +2047,8 @@ GOMP_OFFLOAD_run (int ord, void *tgt_fn, void *tgt_vars, 
void **args)
 /* TODO: Implement GOMP_OFFLOAD_async_run. */
 
 #define CHECK_ISA(major, minor) \
-  if (device->compute_major >= major && device->compute_minor >= minor \
+  if (((device->compute_major == major && device->compute_minor >= minor) \
+   || device->compute_major > major) \
   && strcmp (isa, "sm_"#major#minor) == 0) \
 return true
 


[OG11][committed] OpenMP metadirective support

2022-01-25 Thread Kwok Cheung Yeung

Hello

I have backported and committed my metadirective patches onto the 
current OpenMP development branch (devel/omp/gcc-11). These are:


f464df13a44b9814341659be631f051377a2ce25 openmp: Add C support for 
parsing metadirectives
a238b6934b62ce3e8342047e41840c804d83b59d openmp: Add middle-end support 
for metadirectives
7e672d2ba146ca55dfffc36b198fbb3f3200f8f2 openmp: Add support for 
resolving metadirectives during parsing and Gimplification
b6fd3d1a54736c87fcd29a4ed294b31346b3af75 openmp: Add support for 
streaming metadirectives and resolving them after LTO
360db2054413d21399473173a85870da6479ab8c openmp: Add C++ support for 
parsing metadirectives
ceb0beb7ba9357146994895070762f8a9d94ca7c openmp, fortran: Add Fortran 
support for parsing metadirectives
eb4bea483010d91fbeeae9c863e92da873fbeef9 openmp: Add testcases for 
metadirectives

b597c0835ede0067d1b009e0d7381515b44d8753 openmp: Metadirective fixes
21766085775bd52c9db53629636c830fc9dc6fa0 openmp: Add support for 
'target_device' context selector set


The following backport from mainline was also required:

94c179971913b4837ec76a9e02a9a8a5cbf8e024 Expose stable sort algorithm to 
gcc_sort_r and add vec::stablesort


Thanks

Kwok


[PATCH] openmp: Add support for target_device selector set in metadirectives

2022-01-24 Thread Kwok Cheung Yeung

Hello

This patch builds on top of the previous patches for metadirective 
support to add support for the target_device selector set introduced in 
OpenMP 5.1.


This selector set is similar to the existing device selector set, but 
can take an additional device_num selector, specifying the device number 
of the OpenMP device to be matched against. Since the device at a 
particular number depends on the hardware configuration, the check 
necessarily needs to be made at runtime.


This patch expands a target_device selector into a call to a new libgomp 
function GOMP_evaluate_target_device, which returns true if there is a 
match.


If device_num is the same as the current device, then it returns the 
result of calling GOMP_evaluate_current_device. This function is 
currently implemented for nvptx, amdgcn and x86 Linux - they behave 
similarly to the various implementations of 
TARGET_OMP_DEVICE_KIND_ARCH_ISA in gcc/config/*, but are part of the 
libgomp runtime rather than the GCC internals. Stub implementations have 
been added so that libgomp can still compile in other configurations.


If the current device is the host device (which should be the usual 
scenario with target_device), and device_num is an accelerator, then it 
will return the result of a new plugin function 
GOMP_OFFLOAD_evaluate_device, which is implemented for nvptx and amdgcn. 
I have added some extra code to the nvptx plugin to determine the 
supported SM level at runtime (which is matched against the 'isa' 
selector), while GCN uses the existing device_isa field in agent_info.


In the case where the current device is an accelerator and the 
device_num is not the current device number, GOMP_evaluate_target_device 
simply returns false, as multiple accelerators generally do not know 
anything about each other.


Bootstrapped on x86_64 Linux and tested with no offloading, and with 
offloading to nvptx and gcn. I have also checked that it builds when 
targetting a powerpc64le Linux host. Okay for inclusion in trunk (after 
the metadirective work is reviewed, presumably after GCC 12 is released)?


Thanks

KwokFrom 2d2f00947783e1ecdf54943d9c499015ce61d267 Mon Sep 17 00:00:00 2001
From: Kwok Cheung Yeung 
Date: Wed, 24 Nov 2021 02:51:28 -0800
Subject: [PATCH] openmp: Add support for 'target_device' context selector set

2022-01-18  Kwok Cheung Yeung  

gcc/
* builtin-types.def (BT_FN_BOOL_INT_CONST_PTR_CONST_PTR_CONST_PTR): New
type.
* omp-builtins.def (BUILT_IN_GOMP_EVALUATE_TARGET_DEVICE): New builtin.
* omp-general.cc (omp_context_selector_matches): Handle 'target_device'
selector set.
(omp_dynamic_cond): Generate expression tree for 'target_device'
selector set.
(omp_context_compute_score): Handle selectors in 'target_device' set.

gcc/c/
* c-parser.cc (omp_target_device_selectors): New.
(c_parser_omp_context_selector): Accept 'target_device' selector set.
Treat 'device_num' selector as expression.
(c_parser_omp_context_selector_specification): Handle 'target_device'
selector set.

gcc/cp/
* parser.cc (omp_target_device_selectors): New.
(cp_parser_omp_context_selector): Accept 'target_device' selector set.
Treat 'device_num' selector as expression.
(cp_parser_omp_context_selector_specification): Handle 'target_device'
selector set.

gcc/fortran/
* openmp.cc (omp_target_device_selectors): New.
(gfc_match_omp_context_selector): Accept 'target_device' selector set.
Treat 'device_num' selector as expression.
(gfc_match_omp_context_selector_specification): Handle 'target_device'
selector set.
* types.def (BT_FN_BOOL_INT_CONST_PTR_CONST_PTR_CONST_PTR): New type.

gcc/testsuite/
* c-c++-common/gomp/metadirective-7.c: New.
* gfortran.dg/gomp/metadirective-7.f90: New.

libgomp/
* Makefile.am (libgomp_la_SOURCES): Add selector.c.
* Makefile.am: Regenerate.
* config/gcn/selector.c: New.
* config/linux/selector.c: New.
* config/linux/x86/selector.c: New.
* config/nvptx/selector.c: New.
* libgomp-plugin.h (GOMP_OFFLOAD_evaluate_device): New.
* libgomp.h (struct gomp_device_descr): Add evaluate_device_func field.
* libgomp.map (GOMP_5.1): Add GOMP_evaluate_target_device.
* libgomp_g.h (GOMP_evaluate_current_device): New.
(GOMP_evaluate_target_device): New.
* oacc-host.c (host_evaluate_device): New.
(host_openacc_exec): Initialize evaluate_device_func field to
host_evaluate_device.
* plugin/plugin-gcn.c (GOMP_OFFLOAD_evaluate_device): New.
* plugin/plugin-nvptx.c (struct ptx_device): Add compute_major and
compute_minor fields.
(nvptx_open_device): Read compute capability information from device.
(CHECK_ISA): New macro

[PATCH] openmp: Metadirective patch fixes

2022-01-24 Thread Kwok Cheung Yeung

Hello

This patch fixes a couple of issues with the latest patch series for 
metadirectives.


Firstly, the changes to c_parser_skip_to_end_of_block_or_statement and 
its C++ equivalent cause a couple of tests (e.g. gcc.dg/attr-malloc.c) 
to regress.


This is because these tests cause the parser to skip code starting from 
within a pair of brackets - this causes the unsigned nesting_depth to 
wrap around to UINT_MAX when a ')' is encountered and so semicolons no 
longer stop the skipping, causing too much code to be skipped and 
resulting in the test regressions. This is fixed by tracking the bracket 
nesting level separately from the brace nesting level in a signed int, 
and to allow skipping to end with negative values.


Secondly, user condition selectors containing only compile-time 
constants should be treated as static rather than dynamic. In practice 
though it doesn't matter much, as GCC readily eliminates the resulting 
'if ()' statements via constant folding.


These fixes should be merged into the original metadirective patches.

Thanks

KwokFrom 77f419aef8a608440789b0ebb4a08f11d69f00e2 Mon Sep 17 00:00:00 2001
From: Kwok Cheung Yeung 
Date: Fri, 21 Jan 2022 18:23:57 +
Subject: [PATCH 8/9] openmp: Metadirective fixes

Fix regressions introduced by block/statement skipping.

If user condition selector is constant, do not return it as a dynamic
selector.

2022-01-21  Kwok Cheung Yeung  

gcc/c/
* c-parser.cc (c_parser_skip_to_end_of_block_or_statement): Track
bracket depth separately from nesting depth.

gcc/cp/
* parser.cc (cp_parser_skip_to_end_of_statement): Revert.
(cp_parser_skip_to_end_of_block_or_statement): Track bracket depth
separately from nesting depth.

gcc/
* omp-general.cc (omp_dynamic_cond): Do not return user condition if
constant.
---
 gcc/c/c-parser.cc  |  9 ++---
 gcc/cp/parser.cc   | 20 ++--
 gcc/omp-general.cc |  8 ++--
 3 files changed, 18 insertions(+), 19 deletions(-)

diff --git a/gcc/c/c-parser.cc b/gcc/c/c-parser.cc
index 47075973bfe..f3afc38eb65 100644
--- a/gcc/c/c-parser.cc
+++ b/gcc/c/c-parser.cc
@@ -1344,6 +1344,7 @@ static void
 c_parser_skip_to_end_of_block_or_statement (c_parser *parser)
 {
   unsigned nesting_depth = 0;
+  int bracket_depth = 0;
   bool save_error = parser->error;
 
   while (true)
@@ -1366,7 +1367,7 @@ c_parser_skip_to_end_of_block_or_statement (c_parser 
*parser)
case CPP_SEMICOLON:
  /* If the next token is a ';', we have reached the
 end of the statement.  */
- if (!nesting_depth)
+ if (!nesting_depth && bracket_depth <= 0)
{
  /* Consume the ';'.  */
  c_parser_consume_token (parser);
@@ -1394,11 +1395,13 @@ c_parser_skip_to_end_of_block_or_statement (c_parser 
*parser)
  /* Track parentheses in case the statement is a standalone 'for'
 statement - we want to skip over the semicolons separating the
 operands.  */
- nesting_depth++;
+ if (nesting_depth == 0)
+   ++bracket_depth;
  break;
 
case CPP_CLOSE_PAREN:
- nesting_depth--;
+ if (nesting_depth == 0)
+   --bracket_depth;
  break;
 
case CPP_PRAGMA:
diff --git a/gcc/cp/parser.cc b/gcc/cp/parser.cc
index de35f42d7c4..7cfaff9d65b 100644
--- a/gcc/cp/parser.cc
+++ b/gcc/cp/parser.cc
@@ -3931,17 +3931,6 @@ cp_parser_skip_to_end_of_statement (cp_parser* parser)
  ++nesting_depth;
  break;
 
-   case CPP_OPEN_PAREN:
- /* Track parentheses in case the statement is a standalone 'for'
-statement - we want to skip over the semicolons separating the
-operands.  */
- ++nesting_depth;
- break;
-
-   case CPP_CLOSE_PAREN:
- --nesting_depth;
- break;
-
case CPP_KEYWORD:
  if (token->keyword != RID__EXPORT
  && token->keyword != RID__MODULE
@@ -3991,6 +3980,7 @@ static void
 cp_parser_skip_to_end_of_block_or_statement (cp_parser* parser)
 {
   int nesting_depth = 0;
+  int bracket_depth = 0;
 
   /* Unwind generic function template scope if necessary.  */
   if (parser->fully_implicit_function_template_p)
@@ -4012,7 +4002,7 @@ cp_parser_skip_to_end_of_block_or_statement (cp_parser* 
parser)
 
case CPP_SEMICOLON:
  /* Stop if this is an unnested ';'. */
- if (!nesting_depth)
+ if (!nesting_depth && bracket_depth <= 0)
nesting_depth = -1;
  break;
 
@@ -4035,11 +4025,13 @@ cp_parser_skip_to_end_of_block_or_statement (cp_parser* 
parser)
  /* Track parentheses in case the statement is a standalone 'for'
 statement - we want to skip over the semicolons separating the
 operands.  */
- nesting_depth++;
+ if (nesting_depth == 0)
+   bracke

[PATCH 7/7] openmp: Add testcases for metadirectives

2021-12-10 Thread Kwok Cheung Yeung

This adds testcases for metadirectives.

KwokFrom d3f80b603298fb2f3501a28b888acfdbc02a64e7 Mon Sep 17 00:00:00 2001
From: Kwok Cheung Yeung 
Date: Tue, 7 Dec 2021 11:25:33 +
Subject: [PATCH 7/7] openmp: Add testcases for metadirectives

2021-12-10  Kwok Cheung Yeung  

gcc/testsuite/
* c-c++-common/gomp/metadirective-1.c: New.
* c-c++-common/gomp/metadirective-2.c: New.
* c-c++-common/gomp/metadirective-3.c: New.
* c-c++-common/gomp/metadirective-4.c: New.
* c-c++-common/gomp/metadirective-5.c: New.
* c-c++-common/gomp/metadirective-6.c: New.
* gcc.dg/gomp/metadirective-1.c: New.
* gfortran.dg/gomp/metadirective-1.f90: New.
* gfortran.dg/gomp/metadirective-2.f90: New.
* gfortran.dg/gomp/metadirective-3.f90: New.
* gfortran.dg/gomp/metadirective-4.f90: New.
* gfortran.dg/gomp/metadirective-5.f90: New.
* gfortran.dg/gomp/metadirective-6.f90: New.

libgomp/
* testsuite/libgomp.c-c++-common/metadirective-1.c: New.
* testsuite/libgomp.c-c++-common/metadirective-2.c: New.
* testsuite/libgomp.c-c++-common/metadirective-3.c: New.
* testsuite/libgomp.c-c++-common/metadirective-4.c: New.
* testsuite/libgomp.fortran/metadirective-1.f90: New.
* testsuite/libgomp.fortran/metadirective-2.f90: New.
* testsuite/libgomp.fortran/metadirective-3.f90: New.
* testsuite/libgomp.fortran/metadirective-4.f90: New.
---
 .../c-c++-common/gomp/metadirective-1.c   | 29 
 .../c-c++-common/gomp/metadirective-2.c   | 74 +++
 .../c-c++-common/gomp/metadirective-3.c   | 31 
 .../c-c++-common/gomp/metadirective-4.c   | 40 ++
 .../c-c++-common/gomp/metadirective-5.c   | 24 ++
 .../c-c++-common/gomp/metadirective-6.c   | 31 
 gcc/testsuite/gcc.dg/gomp/metadirective-1.c   | 15 
 .../gfortran.dg/gomp/metadirective-1.f90  | 41 ++
 .../gfortran.dg/gomp/metadirective-2.f90  | 59 +++
 .../gfortran.dg/gomp/metadirective-3.f90  | 34 +
 .../gfortran.dg/gomp/metadirective-4.f90  | 39 ++
 .../gfortran.dg/gomp/metadirective-5.f90  | 30 
 .../gfortran.dg/gomp/metadirective-6.f90  | 31 
 .../libgomp.c-c++-common/metadirective-1.c| 35 +
 .../libgomp.c-c++-common/metadirective-2.c| 41 ++
 .../libgomp.c-c++-common/metadirective-3.c| 34 +
 .../libgomp.c-c++-common/metadirective-4.c| 52 +
 .../libgomp.fortran/metadirective-1.f90   | 33 +
 .../libgomp.fortran/metadirective-2.f90   | 40 ++
 .../libgomp.fortran/metadirective-3.f90   | 29 
 .../libgomp.fortran/metadirective-4.f90   | 46 
 21 files changed, 788 insertions(+)
 create mode 100644 gcc/testsuite/c-c++-common/gomp/metadirective-1.c
 create mode 100644 gcc/testsuite/c-c++-common/gomp/metadirective-2.c
 create mode 100644 gcc/testsuite/c-c++-common/gomp/metadirective-3.c
 create mode 100644 gcc/testsuite/c-c++-common/gomp/metadirective-4.c
 create mode 100644 gcc/testsuite/c-c++-common/gomp/metadirective-5.c
 create mode 100644 gcc/testsuite/c-c++-common/gomp/metadirective-6.c
 create mode 100644 gcc/testsuite/gcc.dg/gomp/metadirective-1.c
 create mode 100644 gcc/testsuite/gfortran.dg/gomp/metadirective-1.f90
 create mode 100644 gcc/testsuite/gfortran.dg/gomp/metadirective-2.f90
 create mode 100644 gcc/testsuite/gfortran.dg/gomp/metadirective-3.f90
 create mode 100644 gcc/testsuite/gfortran.dg/gomp/metadirective-4.f90
 create mode 100644 gcc/testsuite/gfortran.dg/gomp/metadirective-5.f90
 create mode 100644 gcc/testsuite/gfortran.dg/gomp/metadirective-6.f90
 create mode 100644 libgomp/testsuite/libgomp.c-c++-common/metadirective-1.c
 create mode 100644 libgomp/testsuite/libgomp.c-c++-common/metadirective-2.c
 create mode 100644 libgomp/testsuite/libgomp.c-c++-common/metadirective-3.c
 create mode 100644 libgomp/testsuite/libgomp.c-c++-common/metadirective-4.c
 create mode 100644 libgomp/testsuite/libgomp.fortran/metadirective-1.f90
 create mode 100644 libgomp/testsuite/libgomp.fortran/metadirective-2.f90
 create mode 100644 libgomp/testsuite/libgomp.fortran/metadirective-3.f90
 create mode 100644 libgomp/testsuite/libgomp.fortran/metadirective-4.f90

diff --git a/gcc/testsuite/c-c++-common/gomp/metadirective-1.c 
b/gcc/testsuite/c-c++-common/gomp/metadirective-1.c
new file mode 100644
index 000..72cf0abbbd7
--- /dev/null
+++ b/gcc/testsuite/c-c++-common/gomp/metadirective-1.c
@@ -0,0 +1,29 @@
+/* { dg-do compile } */
+
+#define N 100
+
+void f (int a[], int b[], int c[])
+{
+  #pragma omp metadirective \
+  default (teams loop) \
+  default (parallel loop) /* { dg-error "there can only be one default 
clause in a metadirective before '\\(' token" } */
+for (i = 0; i < N; i++) c[i] = a[i] * b[i];
+
+  #pragma omp metadirective \
+

[PATCH 6/7] openmp, fortran: Add Fortran support for parsing metadirectives

2021-12-10 Thread Kwok Cheung Yeung

This patch implements metadirective parsing in the Fortran frontend.

The code previously used to process context selectors in 'declare 
variant' is refactored so that it can be reused in metadirectives. The 
big case lists in parse_executable are moved into macros, since 
parse_omp_metadirective_body needs to know how to act depending on the 
type of directive variant. The selection of end statements in 
parse_omp_do and parse_omp_structured_block are also delegated to 
gfc_omp_end_stmt.


Labels in directive variant bodies are handled by assigning a unique 
number to each statement body parsed in a metadirective, and adding this 
number as a field to gfc_st_label, such that labels with identical 
numbers but different region ids are considered different.


I have also reverted my previous changes to the TREE_STRING_LENGTH check 
in omp_check_context_selector and omp_context_name_list_prop. This is 
because in the accel compiler, lang_GNU_Fortran returns 0 even when the 
code is in Fortran, resulting in the selector failing to match. Instead, 
I opted to increment the TREE_STRING_LENGTH when it is created in 
gfc_trans_omp_set_selector - this should be safe as it is an internal 
implementation detail not visible to end users.


KwokFrom eed8a06fca397edd5fb451f08c8b1a6f7d67951a Mon Sep 17 00:00:00 2001
From: Kwok Cheung Yeung 
Date: Mon, 6 Dec 2021 22:59:36 +
Subject: [PATCH 6/7] openmp, fortran: Add Fortran support for parsing
 metadirectives

This adds support for parsing OpenMP metadirectives in the Fortran front end.

2021-12-10  Kwok Cheung Yeung  

gcc/
* omp-general.c (omp_check_context_selector): Revert string length
check.
(omp_context_name_list_prop): Likewise.

gcc/fortran/
* decl.c (gfc_match_end): Handle COMP_OMP_METADIRECTIVE and
COMP_OMP_BEGIN_METADIRECTIVE.
* dump-parse-tree.c (show_omp_node): Handle EXEC_OMP_METADIRECTIVE.
(show_code_node): Handle EXEC_OMP_METADIRECTIVE.
* gfortran.h (enum gfc_statement): Add ST_OMP_METADIRECTIVE,
ST_OMP_BEGIN_METADIRECTIVE and ST_OMP_END_METADIRECTIVE.
(struct gfc_omp_metadirective_clause): New structure.
(gfc_get_omp_metadirective_clause): New macro.
(struct gfc_st_label): Add omp_region field.
(enum gfc_exec_op): Add EXEC_OMP_METADIRECTIVE.
(struct gfc_code): Add omp_metadirective_clauses field.
(gfc_free_omp_metadirective_clauses): New prototype.
(match_omp_directive): New prototype.
* io.c (format_asterisk): Initialize omp_region field.
* match.h (gfc_match_omp_begin_metadirective): New prototype.
(gfc_match_omp_metadirective): New prototype.
* openmp.c (gfc_match_omp_eos): Match ')' in context selectors.
(gfc_free_omp_metadirective_clauses): New.
(gfc_match_omp_clauses): Remove context_selector argument.  Rely on
gfc_match_omp_eos to match end of clauses.
(match_omp): Remove extra argument to gfc_match_omp_clauses.
(gfc_match_omp_context_selector): Remove extra argument to
gfc_match_omp_clauses.  Set gfc_matching_omp_context_selector
before call to gfc_match_omp_clauses and reset after.
(gfc_match_omp_context_selector_specification): Modify to take a
gfc_omp_set_selector** argument.
(gfc_match_omp_declare_variant): Pass set_selectors to
gfc_match_omp_context_selector_specification.
(match_omp_metadirective): New.
(gfc_match_omp_begin_metadirective): New.
(gfc_match_omp_metadirective): New.
(resolve_omp_metadirective): New.
(gfc_resolve_omp_directive): Handle EXEC_OMP_METADIRECTIVE.
* parse.c (gfc_matching_omp_context_selector): New variable.
(gfc_in_metadirective_body): New variable.
(gfc_omp_region_count): New variable.
(decode_omp_directive): Match 'begin metadirective',
'end metadirective' and 'metadirective'.
(match_omp_directive): New.
(case_omp_structured_block): New.
(case_omp_do): New.
(gfc_ascii_statement): Handle metadirective statements.
(gfc_omp_end_stmt): New.
(parse_omp_do): Delegate to gfc_omp_end_stmt.
(parse_omp_structured_block): Delegate to gfc_omp_end_stmt. Handle
ST_OMP_END_METADIRECTIVE.
(parse_omp_metadirective_body): New.
(parse_executable): Delegate to case_omp_structured_block and
case_omp_do.  Return after one statement if compiling regular
metadirective.  Handle metadirective statements.
(gfc_parse_file): Reset gfc_omp_region_count,
gfc_in_metadirective_body and gfc_matching_omp_context_selector.
* parse.h (enum gfc_compile_state): Add COMP_OMP_METADIRECTIVE and
COMP_OMP_BEGIN_METADIRECTIVE.
(gfc_omp_end_stmt): New prototype.
(gfc_matching_omp_context_selector): New declaration.
(gfc_in_metadirective_body): New declaration

[PATCH 5/7] openmp: Add C++ support for parsing metadirectives

2021-12-10 Thread Kwok Cheung Yeung
This patch adds metadirective parsing support to the C++ parser. This is 
basically just a straight port of the C code to the C++ front end.


KwokFrom e9bb138d4c3f560e48e408facce2361533685a98 Mon Sep 17 00:00:00 2001
From: Kwok Cheung Yeung 
Date: Mon, 6 Dec 2021 22:58:01 +
Subject: [PATCH 5/7] openmp: Add C++ support for parsing metadirectives

This adds support for parsing OpenMP metadirectives in the C++ front end.

2021-12-10  Kwok Cheung Yeung  

gcc/cp/
* parser.c (cp_parser_skip_to_end_of_statement): Handle parentheses.
(cp_parser_skip_to_end_of_block_or_statement): Likewise.
(cp_parser_omp_context_selector): Add extra argument.  Allow
non-constant expressions.
(cp_parser_omp_context_selector_specification): Add extra argument and
propagate to cp_parser_omp_context_selector.
(analyze_metadirective_body): New.
(cp_parser_omp_metadirective): New.
(cp_parser_omp_construct): Handle PRAGMA_OMP_METADIRECTIVE.
(cp_parser_pragma): Handle PRAGMA_OMP_METADIRECTIVE.
---
 gcc/cp/parser.c | 425 +++-
 1 file changed, 417 insertions(+), 8 deletions(-)

diff --git a/gcc/cp/parser.c b/gcc/cp/parser.c
index 6f273bfe21f..afbfe148949 100644
--- a/gcc/cp/parser.c
+++ b/gcc/cp/parser.c
@@ -3907,6 +3907,17 @@ cp_parser_skip_to_end_of_statement (cp_parser* parser)
  ++nesting_depth;
  break;
 
+   case CPP_OPEN_PAREN:
+ /* Track parentheses in case the statement is a standalone 'for'
+statement - we want to skip over the semicolons separating the
+operands.  */
+ ++nesting_depth;
+ break;
+
+   case CPP_CLOSE_PAREN:
+ --nesting_depth;
+ break;
+
case CPP_KEYWORD:
  if (token->keyword != RID__EXPORT
  && token->keyword != RID__MODULE
@@ -3996,6 +4007,17 @@ cp_parser_skip_to_end_of_block_or_statement (cp_parser* 
parser)
  nesting_depth++;
  break;
 
+   case CPP_OPEN_PAREN:
+ /* Track parentheses in case the statement is a standalone 'for'
+statement - we want to skip over the semicolons separating the
+operands.  */
+ nesting_depth++;
+ break;
+
+   case CPP_CLOSE_PAREN:
+ nesting_depth--;
+ break;
+
case CPP_KEYWORD:
  if (token->keyword != RID__EXPORT
  && token->keyword != RID__MODULE
@@ -44972,7 +44994,8 @@ static const char *const omp_user_selectors[] = {
  score(score-expression)  */
 
 static tree
-cp_parser_omp_context_selector (cp_parser *parser, tree set, bool has_parms_p)
+cp_parser_omp_context_selector (cp_parser *parser, tree set, bool has_parms_p,
+   bool metadirective_p)
 {
   tree ret = NULL_TREE;
   do
@@ -45188,15 +45211,21 @@ cp_parser_omp_context_selector (cp_parser *parser, 
tree set, bool has_parms_p)
  while (1);
  break;
case CTX_PROPERTY_EXPR:
- t = cp_parser_constant_expression (parser);
+ /* Allow non-constant expressions in metadirectives.  */
+ t = metadirective_p
+ ? cp_parser_expression (parser)
+ : cp_parser_constant_expression (parser);
  if (t != error_mark_node)
{
  t = fold_non_dependent_expr (t);
- if (!value_dependent_expression_p (t)
- && (!INTEGRAL_TYPE_P (TREE_TYPE (t))
- || !tree_fits_shwi_p (t)))
+ if (metadirective_p && !INTEGRAL_TYPE_P (TREE_TYPE (t)))
error_at (token->location, "property must be "
- "constant integer expression");
+  "integer expression");
+ else if (!metadirective_p && !value_dependent_expression_p (t)
+   && (!INTEGRAL_TYPE_P (TREE_TYPE (t))
+   || !tree_fits_shwi_p (t)))
+   error_at (token->location, "property must be constant "
+  "integer expression");
  else
properties = tree_cons (NULL_TREE, t, properties);
}
@@ -45260,7 +45289,8 @@ cp_parser_omp_context_selector (cp_parser *parser, tree 
set, bool has_parms_p)
 
 static tree
 cp_parser_omp_context_selector_specification (cp_parser *parser,
- bool has_parms_p)
+ bool has_parms_p,
+ bool metadirective_p = false)
 {
   tree ret = NULL_TREE;
   do
@@ -45308,7 +45338,8 @@ cp_parser_omp_context_selector_specification (cp_parser 
*parser,
  

[PATCH 4/7] openmp: Add support for streaming metadirectives and resolving them after LTO

2021-12-10 Thread Kwok Cheung Yeung
This patch adds support for streaming the Gimple metadirective 
representation during LTO. An extra pass (also using 
omp_get_dynamic_candidates) is also added to resolve metadirectives 
after LTO, which is required for selectors that need to be resolved on 
the accel compiler.


KwokFrom 85826d05e029571fd003dd629aa04ce3e17d9c71 Mon Sep 17 00:00:00 2001
From: Kwok Cheung Yeung 
Date: Mon, 6 Dec 2021 22:56:07 +
Subject: [PATCH 4/7] openmp: Add support for streaming metadirectives and
 resolving them after LTO

This patch adds support for streaming metadirective Gimple statements during
LTO, and adds a metadirective expansion pass that runs after LTO.  This is
required for metadirectives with selectors that can only be resolved from
within the accel compiler.

2021-12-10  Kwok Cheung Yeung  

gcc/
* Makefile.in (OBJS): Add omp-expand-metadirective.o.
* gimple-streamer-in.c (input_gimple_stmt): Add case for
GIMPLE_OMP_METADIRECTIVE.  Handle metadirective labels.
* gimple-streamer-out.c (output_gimple_stmt): Likewise.
* omp-expand-metadirective.cc: New.
* passes.def: Add pass_omp_expand_metadirective.
* tree-pass.h (make_pass_omp_expand_metadirective): New prototype.
---
 gcc/Makefile.in |   1 +
 gcc/gimple-streamer-in.c|  10 ++
 gcc/gimple-streamer-out.c   |   6 +
 gcc/omp-expand-metadirective.cc | 191 
 gcc/passes.def  |   1 +
 gcc/tree-pass.h |   1 +
 6 files changed, 210 insertions(+)
 create mode 100644 gcc/omp-expand-metadirective.cc

diff --git a/gcc/Makefile.in b/gcc/Makefile.in
index 2a0be9e66a6..34a17f36922 100644
--- a/gcc/Makefile.in
+++ b/gcc/Makefile.in
@@ -1519,6 +1519,7 @@ OBJS = \
omp-oacc-kernels-decompose.o \
omp-oacc-neuter-broadcast.o \
omp-simd-clone.o \
+   omp-expand-metadirective.o \
opt-problem.o \
optabs.o \
optabs-libfuncs.o \
diff --git a/gcc/gimple-streamer-in.c b/gcc/gimple-streamer-in.c
index 1c979f438a5..b821aa3ca30 100644
--- a/gcc/gimple-streamer-in.c
+++ b/gcc/gimple-streamer-in.c
@@ -151,6 +151,7 @@ input_gimple_stmt (class lto_input_block *ib, class data_in 
*data_in,
 case GIMPLE_COND:
 case GIMPLE_GOTO:
 case GIMPLE_DEBUG:
+case GIMPLE_OMP_METADIRECTIVE:
   for (i = 0; i < num_ops; i++)
{
  tree *opp, op = stream_read_tree (ib, data_in);
@@ -188,6 +189,15 @@ input_gimple_stmt (class lto_input_block *ib, class 
data_in *data_in,
  else
gimple_call_set_fntype (call_stmt, stream_read_tree (ib, data_in));
}
+  if (gomp_metadirective *metadirective_stmt
+   = dyn_cast  (stmt))
+   {
+ gimple_alloc_omp_metadirective (metadirective_stmt);
+ for (i = 0; i < num_ops; i++)
+   gimple_omp_metadirective_set_label (metadirective_stmt, i,
+   stream_read_tree (ib,
+ data_in));
+   }
   break;
 
 case GIMPLE_NOP:
diff --git a/gcc/gimple-streamer-out.c b/gcc/gimple-streamer-out.c
index fcbf92300d4..c19dff74261 100644
--- a/gcc/gimple-streamer-out.c
+++ b/gcc/gimple-streamer-out.c
@@ -127,6 +127,7 @@ output_gimple_stmt (struct output_block *ob, struct 
function *fn, gimple *stmt)
 case GIMPLE_COND:
 case GIMPLE_GOTO:
 case GIMPLE_DEBUG:
+case GIMPLE_OMP_METADIRECTIVE:
   for (i = 0; i < gimple_num_ops (stmt); i++)
{
  tree op = gimple_op (stmt, i);
@@ -169,6 +170,11 @@ output_gimple_stmt (struct output_block *ob, struct 
function *fn, gimple *stmt)
  else
stream_write_tree (ob, gimple_call_fntype (stmt), true);
}
+  if (gimple_code (stmt) == GIMPLE_OMP_METADIRECTIVE)
+   for (i = 0; i < gimple_num_ops (stmt); i++)
+ stream_write_tree (ob, gimple_omp_metadirective_label (stmt, i),
+true);
+
   break;
 
 case GIMPLE_NOP:
diff --git a/gcc/omp-expand-metadirective.cc b/gcc/omp-expand-metadirective.cc
new file mode 100644
index 000..aaf048a699a
--- /dev/null
+++ b/gcc/omp-expand-metadirective.cc
@@ -0,0 +1,191 @@
+/* Expand an OpenMP metadirective.
+
+   Copyright (C) 2021 Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free
+Software Foundation; either version 3, or (at your option) any later
+version.
+
+GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>

[PATCH 3/7] openmp: Add support for resolving metadirectives during parsing and Gimplification

2021-12-10 Thread Kwok Cheung Yeung
This patch contains code to resolve metadirectives, either during 
parsing or Gimplification.


The dynamic candidate selection algorithm from the OpenMP 5.1 spec is 
implemented in omp_get_dynamic_candidates in omp-general.c, which 
returns a vector containing information on the top-scoring candidate 
variants. The vector always consists of entries with dynamic selectors 
first, followed by a single entry with an all-static selector (which can 
be the default clause if all the other clauses are dynamic). If all 
selectors are static (i.e. OpenMP 5.0), then omp_get_dynamic_candidates 
will return a vector of at most length 1.


If any part of the selectors in the candidate list cannot be resolved at 
the current stage of compilation, an empty list is returned. Note that 
it is possible to resolve metadirectives even with some selectors 
unresolvable as long as those selectors are not part of the candidate list.


omp_context_selector_matches should always return 1 for dynamic 
selectors (since we can generate code to evaluate the condition at any 
time). omp_dynamic_cond, when given a selector, should return just the 
part of it that must be evaluated at run-time.


Metadirectives are resolved in both tree and Gimple form by generating a 
sequence of if..then..else statements that evaluate the dynamic selector 
of each candidate returned from omp_get_dynamic_candidates in order, 
jumping to the directive body if true, to the next evaluation if not.


KwokFrom 65ee7342256db3c81cc6741ce2c96e36dd4a9ca6 Mon Sep 17 00:00:00 2001
From: Kwok Cheung Yeung 
Date: Mon, 6 Dec 2021 22:49:23 +
Subject: [PATCH 3/7] openmp: Add support for resolving metadirectives during
 parsing and Gimplification

This adds support for resolving metadirectives according to the OpenMP 5.1
specification.  The variants are sorted by score, then gathered into a list
of dynamic replacement candidates.  The metadirective is then expanded into
a sequence of 'if..else' statements to test the dynamic selector and execute
the variant if the selector is satisfied.

If any of the selectors in the list are unresolvable, GCC will give up on
resolving the metadirective and try again later.

2021-12-10  Kwok Cheung Yeung  

gcc/
* gimplify.c (expand_omp_metadirective): New.
* omp-general.c: Include tree-pretty-print.h.
(DELAY_METADIRECTIVES_AFTER_LTO): New macro.
(omp_context_selector_matches): Delay resolution of selectors.  Allow
non-constant expressions.
(omp_dynamic_cond): New.
(omp_dynamic_selector_p): New.
(sort_variant): New.
(omp_get_dynamic_candidates): New.
(omp_resolve_metadirective): New.
(omp_resolve_metadirective): New.
* omp-general.h (struct omp_metadirective_variant): New.
(omp_resolve_metadirective): New prototype.

gcc/c-family/
* c-omp.c (c_omp_expand_metadirective_r): New.
(c_omp_expand_metadirective): New.
---
 gcc/c-family/c-omp.c |  45 -
 gcc/gimplify.c   |  72 +-
 gcc/omp-general.c| 232 ++-
 gcc/omp-general.h|   7 ++
 4 files changed, 346 insertions(+), 10 deletions(-)

diff --git a/gcc/c-family/c-omp.c b/gcc/c-family/c-omp.c
index 9a7a6834f1b..fedaec566ee 100644
--- a/gcc/c-family/c-omp.c
+++ b/gcc/c-family/c-omp.c
@@ -3264,8 +3264,49 @@ c_omp_categorize_directive (const char *first, const 
char *second,
   return NULL;
 }
 
+static tree
+c_omp_expand_metadirective_r (vec 
,
+ hash_map _labels,
+ unsigned index)
+{
+  struct omp_metadirective_variant  = candidates[index];
+  tree if_block = push_stmt_list ();
+  if (candidate.directive != NULL_TREE)
+add_stmt (candidate.directive);
+  if (candidate.body != NULL_TREE)
+{
+  tree *label = body_labels.get (candidate.body);
+  if (label != NULL)
+   add_stmt (build1 (GOTO_EXPR, void_type_node, *label));
+  else
+   {
+ tree body_label = create_artificial_label (UNKNOWN_LOCATION);
+ add_stmt (build1 (LABEL_EXPR, void_type_node, body_label));
+ add_stmt (candidate.body);
+ body_labels.put (candidate.body, body_label);
+   }
+}
+  if_block = pop_stmt_list (if_block);
+
+  if (index == candidates.length () - 1)
+return if_block;
+
+  tree cond = candidate.selector;
+  gcc_assert (cond != NULL_TREE);
+
+  tree else_block = c_omp_expand_metadirective_r (candidates, body_labels,
+ index + 1);
+  tree ret = push_stmt_list ();
+  tree stmt = build3 (COND_EXPR, void_type_node, cond, if_block, else_block);
+  add_stmt (stmt);
+  ret = pop_stmt_list (ret);
+
+  return ret;
+}
+
 tree
-c_omp_expand_metadirective (vec &)
+c_omp_expand_metadirective (vec )
 {
-  return NULL_TREE;
+  hash_map body_labels;
+  return c_omp_expand_metadirective_r (candidates, body_labels, 0);
 }
diff --git a/gcc/gimplify.c b

[PATCH 2/7] openmp: Add middle-end support for metadirectives

2021-12-10 Thread Kwok Cheung Yeung
This patch contains the required support for metadirectives in the 
middle-end.


The tree metadirective representation is gimplified into the high Gimple 
representation, which is structured like this:


#pragma omp metadirective
  when ():

goto body_label|end_label
  when (>:

goto body_label|end_label
  default:

goto body_label|end_label
body_label:
  
end_label:

Each variant ends with an explicit goto to either the shared standalone 
body (if the variant uses it) or to the point after the body (if it does 
not).


When lowered to low Gimple, the directive bodies move outside of the 
metadirective statement, retaining only the labels to the bodies, so it 
looks like this instead:


#pragma omp metadirective
  when (): goto body1_label
  when (>: goto body2_label
  default: goto default_label
body1_label:
  
  goto body_label|end_label
body2_label:
  
  goto body_label|end_label
default_label:
  
  goto body_label|end_label
body_label:
  
end_label:

When scanning the OpenMP regions in the ompexp pass, we create a 'clone' 
of the surrounding context when recursively scanning the directive 
variants. If the same outer context was used for all variants, then it 
would appear as if all the variants were inside the region at the same 
time (only one variant of the metadirective is ever active at a time), 
which can lead to spurious errors.


The rest of the code is the plumbing required to allow the Gimple 
metadirective statement to pass through the middle-end. GCC will emit an 
ICE if it makes it through to the back-end though, as the metadirective 
is supposed to be eliminated before it gets that far.


KwokFrom 1a2fcbb2191fd1dd694ea5730e54fab19d6465b4 Mon Sep 17 00:00:00 2001
From: Kwok Cheung Yeung 
Date: Mon, 6 Dec 2021 22:29:34 +
Subject: [PATCH 2/7] openmp: Add middle-end support for metadirectives

This adds a new Gimple statement type GIMPLE_OMP_METADIRECTIVE, which
represents the metadirective in Gimple. In high Gimple, the statement
contains the body of the directive variants, whereas in low Gimple, it
only contains labels to the bodies.

This patch adds support for converting metadirectives from tree to Gimple
form, and handling of the Gimple form (Gimple lowering, OpenMP lowering
and expansion, inlining, SSA handling etc).

Metadirectives should be resolved before they reach the back-end, otherwise
the compiler will crash as GCC does not know how to convert metadirective
Gimple statements to RTX.

2021-12-10  Kwok Cheung Yeung  

gcc/
* gimple-low.c (lower_omp_metadirective): New.
(lower_stmt): Handle GIMPLE_OMP_METADIRECTIVE.
* gimple-pretty-print.c (dump_gimple_omp_metadirective): New.
(pp_gimple_stmt_1): Handle GIMPLE_OMP_METADIRECTIVE.
* gimple-walk.c (walk_gimple_op): Handle GIMPLE_OMP_METADIRECTIVE.
(walk_gimple_stmt): Likewise.
* gimple.c (gimple_alloc_omp_metadirective): New.
(gimple_build_omp_metadirective): New.
(gimple_build_omp_metadirective_variant): New.
* gimple.def (GIMPLE_OMP_METADIRECTIVE): New.
(GIMPLE_OMP_METADIRECTIVE_VARIANT): New.
* gimple.h (gomp_metadirective_variant): New.
(gomp_metadirective): New.
(is_a_helper ::test): New.
(is_a_helper ::test): New.
(is_a_helper ::test): New.
(is_a_helper ::test): New.
(gimple_alloc_omp_metadirective): New prototype.
(gimple_build_omp_metadirective): New prototype.
(gimple_build_omp_metadirective_variant): New prototype.
(gimple_has_substatements): Add GIMPLE_OMP_METADIRECTIVE case.
(gimple_has_ops): Add GIMPLE_OMP_METADIRECTIVE.
(gimple_omp_metadirective_label): New.
(gimple_omp_metadirective_set_label): New.
(gimple_omp_metadirective_variants): New.
(gimple_omp_metadirective_set_variants): New.
(CASE_GIMPLE_OMP): Add GIMPLE_OMP_METADIRECTIVE.
* gimplify.c (is_gimple_stmt): Add OMP_METADIRECTIVE.
(expand_omp_metadirective): New.
(gimplify_omp_metadirective): New.
(gimplify_expr): Add case for OMP_METADIRECTIVE.
* gsstruct.def (GSS_OMP_METADIRECTIVE): New.
(GSS_OMP_METADIRECTIVE_VARIANT): New.
* omp-expand.c (build_omp_regions_1): Handle GIMPLE_OMP_METADIRECTIVE.
(omp_make_gimple_edges): Likewise.
* omp-low.c (struct omp_context): Add next_clone field.
(new_omp_context): Initialize next_clone field.
(clone_omp_context): New.
(delete_omp_context): Delete clone contexts.
(scan_omp_metadirective): New.
(scan_omp_1_stmt): Handle GIMPLE_OMP_METADIRECTIVE.
(lower_omp_metadirective): New.
(lower_omp_1): Handle GIMPLE_OMP_METADIRECTIVE.
* tree-cfg.c (cleanup_dead_labels): Handle GIMPLE_OMP_METADIRECTIVE.
(gimple_redirect_edge_and_branch): Likewise.
* tree-inline.c (remap_gimple_stmt): Handle GIMPLE_OMP_METADIR

[PATCH 1/7] openmp: Add C support for parsing metadirectives

2021-12-10 Thread Kwok Cheung Yeung

This patch adds support for parsing metadirectives in the C parser.

Metadirectives are represented by a OMP_METADIRECTIVE tree node. It has 
a single operand (accessed by OMP_METADIRECTIVE_CLAUSES) which contains 
a chain of TREE_LIST nodes, each one representing a clause from the 
metadirective. TREE_PURPOSE(clause) contains the selector of the clause, 
while TREE_VALUE(clause) contains another TREE_LIST - the TREE_PURPOSE 
contains the tree for the directive, while the TREE_VALUE contains the 
standalone body (if any).


If an OMP directive has an associated body, it will be part of the tree 
at TREE_PURPOSE(TREE_VALUE(clause)) - the standalone body at 
TREE_VALUE(TREE_VALUE(clause) is only used for standalone directives 
that do not have an associated body (strictly speaking, it isn't a part 
of the directive variant at all). At present, all standalone bodies in a 
metadirective are shared, and will point to the same tree node.


Labels in the statement body are handled by first scanning the body for 
labels, then enclosing the statements in a lexical block with the found 
labels declared as local using __label__. This prevents labels in the 
body interfering with each other when the body is re-parsed.


I have removed support for the 'omp begin metadirective'..'omp end 
metadirective' form of the directive that was originally in the WIP 
patch. According to the spec, the only variant directives that can be 
used in this form must have an 'end ' form (apart from the 
'nothing' directive), and in C/C++, the only directive that we support 
with an end form is 'declare target', which we currently forbid since it 
is declarative.


KwokFrom dc88559b0295104472a0cbf79de03b0549bd35f5 Mon Sep 17 00:00:00 2001
From: Kwok Cheung Yeung 
Date: Mon, 6 Dec 2021 19:15:23 +
Subject: [PATCH 1/7] openmp: Add C support for parsing metadirectives

This patch implements parsing for the OpenMP metadirective introduced in
OpenMP 5.0.  Metadirectives are parsed into an OMP_METADIRECTIVE node,
with the variant clauses forming a chain accessible via
OMP_METADIRECTIVE_CLAUSES.  Each clause contains the context selector
and tree for the variant.

User conditions in the selector are now permitted to be non-constant when
used in metadirectives as specified in OpenMP 5.1.

2021-12-10  Kwok Cheung Yeung  

gcc/
* omp-general.c (omp_context_selector_matches): Add extra argument.
(omp_resolve_metadirective): New stub function.
* omp-general.h (struct omp_metadirective_variant): New.
(omp_context_selector_matches): Add extra argument.
(omp_resolve_metadirective): New prototype.
* tree.def (OMP_METADIRECTIVE): New.
* tree.h (OMP_METADIRECTIVE_CLAUSES): New macro.

gcc/c/
* c-parser.c (c_parser_skip_to_end_of_block_or_statement): Handle
parentheses in statement.
(c_parser_omp_metadirective): New prototype.
(c_parser_omp_context_selector): Add extra argument.  Allow
non-constant expressions.
(c_parser_omp_context_selector_specification): Add extra argument and
propagate it to c_parser_omp_context_selector.
(analyze_metadirective_body): New.
(c_parser_omp_metadirective): New.
(c_parser_omp_construct): Handle PRAGMA_OMP_METADIRECTIVE.

gcc/c-family
* c-common.h (enum c_omp_directive_kind): Add C_OMP_DIR_META.
(c_omp_expand_metadirective): New prototype.
* c-gimplify.c (genericize_omp_metadirective_stmt): New.
(c_genericize_control_stmt): Handle OMP_METADIRECTIVE tree nodes.
* c-omp.c (omp_directives): Classify metadirectives as C_OMP_DIR_META.
(c_omp_expand_metadirective): New stub function.
* c-pragma.c (omp_pragmas): Add entry for metadirective.
* c-pragma.h (enum pragma_kind): Add PRAGMA_OMP_METADIRECTIVE.
---
 gcc/c-family/c-common.h   |   4 +-
 gcc/c-family/c-gimplify.c |  25 +++
 gcc/c-family/c-omp.c  |  14 +-
 gcc/c-family/c-pragma.c   |   1 +
 gcc/c-family/c-pragma.h   |   1 +
 gcc/c/c-parser.c  | 403 +-
 gcc/omp-general.c |  14 +-
 gcc/omp-general.h |   9 +-
 gcc/tree.def  |   5 +
 gcc/tree.h|   3 +
 10 files changed, 465 insertions(+), 14 deletions(-)

diff --git a/gcc/c-family/c-common.h b/gcc/c-family/c-common.h
index c089fda12e4..ef37051791f 100644
--- a/gcc/c-family/c-common.h
+++ b/gcc/c-family/c-common.h
@@ -1257,7 +1257,8 @@ enum c_omp_directive_kind {
   C_OMP_DIR_CONSTRUCT,
   C_OMP_DIR_DECLARATIVE,
   C_OMP_DIR_UTILITY,
-  C_OMP_DIR_INFORMATIONAL
+  C_OMP_DIR_INFORMATIONAL,
+  C_OMP_DIR_META
 };
 
 struct c_omp_directive {
@@ -1270,6 +1271,7 @@ struct c_omp_directive {
 extern const struct c_omp_directive *c_omp_categorize_directive (const char *,
 const char *,
 const char

[PATCH 0/7] openmp: OpenMP metadirectives support

2021-12-10 Thread Kwok Cheung Yeung

Hello

This is my current patchset for OpenMP metadirectives support. It aims 
to implement the specification from OpenMP 5.1, with dynamic selector 
support (though currently only the dynamic user selector set is 
supported), and supports the C, C++ and Fortran front ends.


The patch has been bootstrapped on a x86_64 Linux machine, and the 
testsuite run with no regressions (libgomp tested with both no 
offloading and with offloading to nvptx). Okay for inclusion in trunk?


Kwok


Re: [WIP, OpenMP] OpenMP metadirectives support

2021-12-10 Thread Kwok Cheung Yeung

Hello

It has been several months since I posted my WIP patch, and my current 
patch set (which I will post separately) has evolved considerably since 
then. I have added C++ and Fortran support, as well as dynamic selectors 
from the OpenMP 5.1 spec (currently only the 'user={condition()}' 
selector is implemented, target_device is TBD).


On 26/07/2021 3:29 pm, Jakub Jelinek wrote:

Note, there is a partial overlap with the attribute syntax changes, see below.
c-family/c-omp.c now has omp_directives table that should be updated for
changes like this and then c_omp_categorize_directive that returns some
information about the directives given a directive name (though, that name
can be one, two or three tokens long, consider e.g. target enter data
or cancellation point directives).


I have modified the C/C++ parser code to lookup the type of the 
directive using c_omp_categorize_directive.



For metadirective, I think very special case are declarative directives in
them, I'd tend to sorry for them at least for now, I'm pretty sure many
cases with them are just unimplementable and will need to be restricted in
the standard, others can be implemented with lots of effort.
Whether it is e.g. metadirective guarding declare target ... end declare
target pair that would only conditionally set declare target and instead of
a single bit to find out if something is declare target or not we'd until
resolved need to compute it for all possibilities, or e.g. conditional
declare reduction/declare mapper where the name lookup for reduction or map
directives would be dependent on metadirective resolution later on, etc.
I'm afraid a total nightmare nobody has really thought about details for it.


The parsers currently emit a sorry if a C_OMP_DIR_DECLARATIVE directive 
is encountered in a metadirective, though I am sure there are many 
remaining ways that one could break it!



As an optimisation, identical body trees could be merged together, but that
can come later.


I'm afraid it isn't just an optimization and we need to be as smart as
possible.  I'm not sure it is possible to parse everything many times,
consider e.g. labels in the blocks, nested function definitions, variable
definitions, etc.
While OpenMP requires that essentially the code must be valid if the
metadirective is replaced by any of those mentioned directives which rules
quite some weirdo corner cases, nothing prevents e.g. two or more
when directives to be standalone directives (which don't have any body and
so whatever comes after them should be left parsed for later as normal
statement sequence), one or more to be normal constructs that accept a
structured block and one or more to be e.g. looping constructs (simd, for,
distribute, taskloop or combined versions of those).
Even when issues with labels etc. are somehow solved (e.g. for structured
blocks we have the restriction that goto, break, continue, or switch into
a case/default label, etc. can't be used to enter or exit the structured
block which could mean some cases can be handled through renaming seen
labels in all but one bodies), most important is to sync on where parsing
should continue after the metadirective.
I think it would be nice if the metadirective parsing at least made quick
analysis on what kind of bodies the directives will want and can use the new
c-omp.c infrastructure or if needed extend it (e.g. separate the 
C_OMP_DIR_CONSTRUCT
category into C_OMP_DIR_CONSTRUCT and C_OMP_DIR_LOOPING_CONSTRUCT where
the latter would be used for those that expect some omp loop after it).
One option would be then to parse the body as the most restricted construct
(looping (and determine highest needed collapse and ordered), then construct,
then standalone) and be able to adjust what we parsed into what the
different constructs need, but another option is the separate parsing of
the code after the directive multiple times, but at least in the order of
most restricted to least restricted, remember where to stop and don't parse
it multiple times at least for directives that need the same thing.



After some experimentation, I'm not sure if it is possible in the 
general case to share bodies between variants. For one thing, it 
complicates the OMP region outlining and lowering, and becomes rather 
invasive to implement in the parser. Another is the possibility of 
having metadirectives nested within metadirective bodies. e.g. Something 
of the form:


#pragma omp metadirective \
when (cond1: dir1) \
when (cond2: dir2)
  {
#pragma omp metadirective \
  when (construct dir1: dirA)
  when (construct dir2: dirB)
(body)
  }

in which case the way the inner metadirective is resolved depends on the 
outer metadirective, leading to different bodies.


In my current patch set, I have implemented a limited form of statement 
body sharing when the body is not part of an OMP directive (e.g. an 'omp 
flush' followed by the body). Variables declarations and local functions 
in the 

Re: [PATCH] openmp, fortran: Add support for declare variant in Fortran

2021-10-18 Thread Kwok Cheung Yeung

On 14/10/2021 1:47 pm, Jakub Jelinek wrote:

What I still miss is tests for the (proc_name : variant_name) syntax
in places where proc_name : is optional, but is supplied and is valid, like
e.g. in interface, or in subroutine/function and where proc_name specifies
the name of the containing interface or subroutine/function.
I see that syntax tested in some places with dg-error on that line and
in spaces where it isn't optional (e.g. at module scope before contains).
But if you want, that can be added incrementally.


Do you mean something like these tests?

Thanks

Kwok
From 38733234024697d2144613c4a992e970f40afad8 Mon Sep 17 00:00:00 2001
From: Kwok Cheung Yeung 
Date: Mon, 18 Oct 2021 13:56:59 -0700
Subject: [PATCH] openmp: Add additional tests for declare variant in Fortran

Add tests to check that explicitly specifying the containing procedure as the
base name for declare variant works.

2021-10-18  Kwok Cheung Yeung  

gcc/testsuite/

* gfortran.dg/gomp/declare-variant-15.f90 (variant2, base2, test2):
Add tests.
* gfortran.dg/gomp/declare-variant-16.f90 (base2, variant2, test2):
Add tests.
---
 .../gfortran.dg/gomp/declare-variant-15.f90| 13 +
 .../gfortran.dg/gomp/declare-variant-16.f90| 14 +-
 2 files changed, 26 insertions(+), 1 deletion(-)

diff --git a/gcc/testsuite/gfortran.dg/gomp/declare-variant-15.f90 
b/gcc/testsuite/gfortran.dg/gomp/declare-variant-15.f90
index b2ad96a8998..4a88e3e46c7 100644
--- a/gcc/testsuite/gfortran.dg/gomp/declare-variant-15.f90
+++ b/gcc/testsuite/gfortran.dg/gomp/declare-variant-15.f90
@@ -14,6 +14,13 @@ contains
   subroutine base ()
   end subroutine
 
+  subroutine variant2 ()
+  end subroutine
+
+  subroutine base2 ()
+!$omp declare variant (base2: variant2) match (construct={parallel})
+  end subroutine
+
   subroutine test1 ()
 !$omp target
   !$omp parallel
@@ -21,4 +28,10 @@ contains
   !$omp end parallel
 !$omp end target
   end subroutine
+
+  subroutine test2 ()
+!$omp parallel
+   call base2 ()   ! { dg-final { scan-tree-dump-times "variant2 
\\\(\\\);" 1 "gimple" } }
+!$omp end parallel
+  end subroutine
 end module
diff --git a/gcc/testsuite/gfortran.dg/gomp/declare-variant-16.f90 
b/gcc/testsuite/gfortran.dg/gomp/declare-variant-16.f90
index fc97322e667..5e34d474da4 100644
--- a/gcc/testsuite/gfortran.dg/gomp/declare-variant-16.f90
+++ b/gcc/testsuite/gfortran.dg/gomp/declare-variant-16.f90
@@ -10,15 +10,27 @@ module main
 subroutine base ()
   !$omp declare variant (variant) match (construct={parallel})
 end subroutine
+
+subroutine base2 ()
+  !$omp declare variant (base2: variant2) match (construct={target})
+end subroutine
   end interface
-
 contains
   subroutine variant ()
   end subroutine
 
+  subroutine variant2 ()
+  end subroutine
+
   subroutine test ()
 !$omp parallel
   call base ()  ! { dg-final { scan-tree-dump-times "variant \\\(\\\);" 1 
"gimple" } }
 !$omp end parallel
   end subroutine
+
+  subroutine test2 ()
+!$omp target
+  call base2 ()  ! { dg-final { scan-tree-dump-times "variant2 \\\(\\\);" 
1 "gimple" } }
+!$omp end target
+  end subroutine
 end module
-- 
2.30.0.335.ge636282



[COMMIT] openmp: Mark declare variant directive as supported in Fortran

2021-10-14 Thread Kwok Cheung Yeung

Hello

As declare variant is now supported in the Fortran FE, I have removed 
the 'Only C and C++' for the declare variant entry in the list of OpenMP 
5.0 features supported. Committed as obvious.


Thanks

KwokFrom 2c4666fb0686a8f5a55821f1527351dc71c018b4 Mon Sep 17 00:00:00 2001
From: Kwok Cheung Yeung 
Date: Thu, 14 Oct 2021 09:29:13 -0700
Subject: [PATCH] openmp: Mark declare variant directive in documentation as
 supported in Fortran

2021-10-14  Kwok Cheung Yeung  

libgomp/
* libgomp.texi (OpenMP 5.0): Update entry for declare variant
directive.
---
 libgomp/libgomp.texi | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/libgomp/libgomp.texi b/libgomp/libgomp.texi
index bdd7e3ac442..af25e9df250 100644
--- a/libgomp/libgomp.texi
+++ b/libgomp/libgomp.texi
@@ -182,7 +182,7 @@ The OpenMP 4.5 specification is fully supported.
 @item Iterators @tab Y @tab
 @item @code{metadirective} directive @tab N @tab
 @item @code{declare variant} directive
-  @tab P @tab Only C and C++, simd traits not handled correctly
+  @tab P @tab simd traits not handled correctly
 @item @emph{target-offload-var} ICV and @code{OMP_TARGET_OFFLOAD}
   env variable @tab Y @tab
 @item Nested-parallel changes to @emph{max-active-levels-var} ICV @tab Y @tab
-- 
2.30.0.335.ge636282



Re: [WIP, OpenMP] OpenMP metadirectives support

2021-07-26 Thread Kwok Cheung Yeung

On 26/07/2021 10:23 pm, Jakub Jelinek wrote:

On Mon, Jul 26, 2021 at 10:19:35PM +0100, Kwok Cheung Yeung wrote:

In that comment, Deepak says:

So, we decided to keep the target trait static, requiring that the declare
target directive must be explicit and that the function version must be
different from the version of the function that may be called outside of a
target region (with the additional clarification that whether it differs or
not will be implementation defined).

"the function version must be different from the version of the function
that may be called outside of a target region": This is what we do not have
in GCC at the moment - the function versions called within and outside
target regions are the same on the host.

"whether it differs or not will be implementation defined": So whether a
function with 'declare target' and a metadirective involving a 'target'
construct behaves the same or not when called from both inside and outside
of a target region is implementation defined?

I will leave the treatment of target constructs in the selector as it is
then, with both calls going to the same function with the metadirective
resolving to the 'target' variant. I will try to address your other concerns
later.


I think you're right, it should differ in the host vs. target version iff
it is in explicit declare target block, my memory is weak, but let's implement
the 5.0 wording for now (and ignore the 5.1 wording later on) and only when
we'll be doing 5.2 change this (and change for both metadirective and
declare variant at that point).
Ok?



Okay, the rest of the metadirective spec is quite enough to be getting on with 
for now. :-)


Thanks

Kwok


Re: [WIP, OpenMP] OpenMP metadirectives support

2021-07-26 Thread Kwok Cheung Yeung

Hello

On 26/07/2021 8:56 pm, Jakub Jelinek wrote:

On Mon, Jul 26, 2021 at 08:28:16PM +0100, Kwok Cheung Yeung wrote:

In Section 1.2.2 of the OpenMP TR10 spec, 'target variant' is defined as:

A version of a device routine that can only be executed as part of a target 
region.


Yes, that is a target variant, but I'm pretty sure we've decided that
the target construct added for declare target is actually not a dynamic
property.  So basically mostly return to the 5.0 wording with clarifications
for Fortran.  See
https://github.com/OpenMP/spec/issues/2612#issuecomment-849742988
for details.
Making the target in construct dynamic would pretty much force all the
scoring to be dynamic as well.


In that comment, Deepak says:

So, we decided to keep the target trait static, requiring that the declare 
target directive must be explicit and that the function version must be 
different from the version of the function that may be called outside of a 
target region (with the additional clarification that whether it differs or not 
will be implementation defined).


"the function version must be different from the version of the function that 
may be called outside of a target region": This is what we do not have in GCC at 
the moment - the function versions called within and outside target regions are 
the same on the host.


"whether it differs or not will be implementation defined": So whether a 
function with 'declare target' and a metadirective involving a 'target' 
construct behaves the same or not when called from both inside and outside of a 
target region is implementation defined?


I will leave the treatment of target constructs in the selector as it is then, 
with both calls going to the same function with the metadirective resolving to 
the 'target' variant. I will try to address your other concerns later.


Thanks

Kwok


Re: [WIP, OpenMP] OpenMP metadirectives support

2021-07-26 Thread Kwok Cheung Yeung

Hello

Thanks for your reply.

On 26/07/2021 3:29 pm, Jakub Jelinek wrote:

On Fri, Jul 09, 2021 at 12:16:15PM +0100, Kwok Cheung Yeung wrote:

3) In the OpenMP examples (version 5.0.1), section 9.7, the example
metadirective.3.c does not work as expected.

#pragma omp declare target
void exp_pi_diff(double *d, double my_pi){
#pragma omp metadirective \
when( construct={target}: distribute parallel for ) \
default( parallel for simd)
...
int main()
{
...
#pragma omp target teams map(tofrom: d[0:N])
exp_pi_diff(d,my_pi);
...
exp_pi_diff(d,my_pi);


The spec says in this case that the target construct is added to the
construct set because of the function appearing in between omp declare target
and omp end declare target, so the above is something that resolves
statically to distribute parallel for.
It is true that in OpenMP 5.1 the earlier
For functions within a declare target block, the target trait is added to the 
beginning of the
set as c 1 for any versions of the function that are generated for target 
regions so the total size
of the set is increased by 1.
has been mistakenly replaced with:
For device routines, the target trait is added to the beginning of the set as c 
1 for any versions of
the procedure that are generated for target regions so the total size of the 
set is increased by 1.
by that has been corrected in 5.2:
C/C++:
For functions that are declared in a code region that is delimited by a declare 
target directive and
its paired end directive, the target trait is added to the beginning of the set 
as c 1 for any target
variants that result from the directive so the total size of the set is 
increased by one.
Fortran:
If a declare target directive appears in the specification part of a procedure 
or in the
specification part of a procedure interface body, the target trait is added to 
the beginning of the
set as c 1 for any target variants that result from the directive so the total 
size of the set is
increased by one.

So, it is really a static decision that can be decided already during
parsing.


In Section 1.2.2 of the OpenMP TR10 spec, 'target variant' is defined as:

A version of a device routine that can only be executed as part of a target 
region.

So isn't this really saying the same thing as the previous versions of the spec? 
The target trait is added to the beginning of the construct set _for any target 
variants_ that result from the directive (implying that it shouldn't be added 
for non-target variants). In this example, the same function exp_pi_diff is 
being used in both a target and non-target context, so shouldn't the 
metadirective resolve differently in the two contexts, independently of the 
function being declared in a 'declare target' block? If not, there does not seem 
to be much point in that example (in section 9.7 of the OpenMP Examples v5.0.1).


From reading the spec, I infer that they expect the device and non-device 
versions of a function with 'declare target' to be separate, but that is not 
currently the case for GCC - on the host compiler, the same version of the 
function gets called in both target and non-target regions (though in the target 
region case, it gets called indirectly via a compiler-generated function with a 
name like main._omp_fn.0). The offload compiler gets its own streamed version, 
so there is no conflict there - by definition, its version must be in a target 
context.


Thanks,

Kwok


Re: [WIP, OpenMP] OpenMP metadirectives support

2021-07-26 Thread Kwok Cheung Yeung

Ping? Does anyone have any opinions on how this issue should be resolved?

On 09/07/2021 12:16 pm, Kwok Cheung Yeung wrote:
3) In the OpenMP examples (version 5.0.1), section 9.7, the example 
metadirective.3.c does not work as expected.


#pragma omp declare target
void exp_pi_diff(double *d, double my_pi){
    #pragma omp metadirective \
    when( construct={target}: distribute parallel for ) \
    default( parallel for simd)
...
int main()
{
    ...
    #pragma omp target teams map(tofrom: d[0:N])
    exp_pi_diff(d,my_pi);
    ...
    exp_pi_diff(d,my_pi);

In the first call to exp_pi_diff in an '#pragma omp target' construct, the 
metadirective is expected to expand to 'distribute parallel for', but in the 
second (without the '#pragma omp target'), it should expand to 'parallel for simd'.


During OMP expansion of the 'omp target', it creates a child function that calls 
exp_pi_diff:


__attribute__((omp target entrypoint))
void main._omp_fn.0 (const struct .omp_data_t.12 & restrict .omp_data_i)
{
   ...
    :
   __builtin_GOMP_teams (0, 0);
   exp_pi_diff (d.13, my_pi);

This is not a problem on the offload compiler (since by definition its copy of 
exp_pi_diff must be in a 'target'), but if the host device is used, the same 
version of exp_pi_diff is called in both target and non-target contexts.


What would be the best way to solve this? Offhand, I can think of two solutions:

(a) Recursively go through all functions that can be reached via a target region 
and create clones for each, redirecting all function calls in the clones to the 
new cloned versions. Resolve the metadirectives in the clones and originals 
separately.




Maybe this could be done at the same time as when marking functions implicitly 
'declare target'? It seems a lot of work for one special case though...


(b) Make the construct selector a dynamic selector when OpenMP 5.1 metadirective 
support is implemented. Keep track of the current construct list every time an 
OpenMP construct is entered or exited, and make the decision at runtime.




I think this would be easier to implement at runtime (assuming that the 
infrastructure for OpenMP 5.1 was already in place) since this a host-side 
issue, but it probably goes against the intent of the specification, given that 
the 'construct' selector set appeared in the 5.0 specification before dynamic 
replacements became available.


Thanks

Kwok


[WIP, OpenMP] OpenMP metadirectives support

2021-07-09 Thread Kwok Cheung Yeung

Hello

This is a WIP implementation of metadirectives as defined in the OpenMP 5.0 
spec. I intend to add support for metadirectives as specified in OpenMP 5.1 
later (where the directive can be selected dynamically at runtime), but am 
concentrating on the static part for now. Parsing has only been implemented in 
the C frontend so far. I am especially interested in feedback regarding certain 
aspects of the implementation before I become too committed to the current design.


1) When parsing each directive variant, a vector of tokens is constructed and 
populated with the tokens for a regular equivalent pragma, along with the tokens 
for its clauses and the body. The parser routine for that pragma type is then 
called with these tokens, and the entire resulting parse tree is stored as a 
sub-tree of the metadirective tree structure.


This results in the body being parsed and stored once for each directive 
variant. I believe this is necessary because the body is parsed differently if 
there is a 'for' in the directive (using c_parser_omp_for_loop) compared to if 
there is not, plus clauses in the directive (e.g. tile, collapse) can change how 
the for loop is parsed.


As an optimisation, identical body trees could be merged together, but that can 
come later.


2) Selectors in the device set (i.e. kind, isa, arch) resolve differently 
depending on whether the program is running on a target or on the host. Since we 
don't keep multiple versions of a function for each target on the host compiler, 
resolving metadirectives with these selectors needs to be delayed until after 
LTO streaming, at which point the host or offload compiler can make the 
appropriate decision.


One negative of this is that the metadirective Gimple representation lasts 
beyond the OMP expand stage, when generally we would expect all OMP directives 
to have been expanded to something else.


3) In the OpenMP examples (version 5.0.1), section 9.7, the example 
metadirective.3.c does not work as expected.


#pragma omp declare target
void exp_pi_diff(double *d, double my_pi){
   #pragma omp metadirective \
   when( construct={target}: distribute parallel for ) \
   default( parallel for simd)
...
int main()
{
   ...
   #pragma omp target teams map(tofrom: d[0:N])
   exp_pi_diff(d,my_pi);
   ...
   exp_pi_diff(d,my_pi);

In the first call to exp_pi_diff in an '#pragma omp target' construct, the 
metadirective is expected to expand to 'distribute parallel for', but in the 
second (without the '#pragma omp target'), it should expand to 'parallel for simd'.


During OMP expansion of the 'omp target', it creates a child function that calls 
exp_pi_diff:


__attribute__((omp target entrypoint))
void main._omp_fn.0 (const struct .omp_data_t.12 & restrict .omp_data_i)
{
  ...
   :
  __builtin_GOMP_teams (0, 0);
  exp_pi_diff (d.13, my_pi);

This is not a problem on the offload compiler (since by definition its copy of 
exp_pi_diff must be in a 'target'), but if the host device is used, the same 
version of exp_pi_diff is called in both target and non-target contexts.


What would be the best way to solve this? Offhand, I can think of two solutions:

(a) Recursively go through all functions that can be reached via a target region 
and create clones for each, redirecting all function calls in the clones to the 
new cloned versions. Resolve the metadirectives in the clones and originals 
separately.


(b) Make the construct selector a dynamic selector when OpenMP 5.1 metadirective 
support is implemented. Keep track of the current construct list every time an 
OpenMP construct is entered or exited, and make the decision at runtime.



Thanks

Kwok
diff --git a/gcc/Makefile.in b/gcc/Makefile.in
index 1164554e6d6..28e29fab93d 100644
--- a/gcc/Makefile.in
+++ b/gcc/Makefile.in
@@ -1505,6 +1505,7 @@ OBJS = \
omp-general.o \
omp-low.o \
omp-oacc-kernels-decompose.o \
+omp-expand-metadirective.o \
omp-simd-clone.o \
opt-problem.o \
optabs.o \
diff --git a/gcc/c-family/c-pragma.c b/gcc/c-family/c-pragma.c
index 4f8e8e0128c..01dc1e6d9c0 100644
--- a/gcc/c-family/c-pragma.c
+++ b/gcc/c-family/c-pragma.c
@@ -1312,12 +1312,14 @@ static const struct omp_pragma_def omp_pragmas[] = {
   { "allocate", PRAGMA_OMP_ALLOCATE },
   { "atomic", PRAGMA_OMP_ATOMIC },
   { "barrier", PRAGMA_OMP_BARRIER },
+  { "begin", PRAGMA_OMP_BEGIN },
   { "cancel", PRAGMA_OMP_CANCEL },
   { "cancellation", PRAGMA_OMP_CANCELLATION_POINT },
   { "critical", PRAGMA_OMP_CRITICAL },
   { "depobj", PRAGMA_OMP_DEPOBJ },
-  { "end", PRAGMA_OMP_END_DECLARE_TARGET },
+  { "end", PRAGMA_OMP_END },
   { "flush", PRAGMA_OMP_FLUSH },
+  { "metadirective", PRAGMA_OMP_METADIRECTIVE },
   { "requires", PRAGMA_OMP_REQUIRES },
   { "section", PRAGMA_OMP_SECTION },
   { "sections", PRAGMA_OMP_SECTIONS },
@@ -1387,6 +1389,41 @@ c_pp_lookup_pragma (unsigned int id, const char **space, 
const char **name)
   

[PATCH] openmp: Notify team barrier of pending tasks in, omp_fulfill_event

2021-05-17 Thread Kwok Cheung Yeung

Hello

This patch fixes the issue where a call to omp_fulfill_event could fail to 
trigger the execution of tasks that were dependent on the task whose completion 
event is being fulfilled.


This mainly (or can only?) occurs when the thread is external to OpenMP, and all 
the barrier threads are sleeping when the omp_fulfill_event is called. 
omp_fulfill_event wakes the appropriate number of threads, but if 
BAR_TASK_PENDING is not set on bar->generation, the threads go back to sleep 
again rather than process new tasks.


I have added a new testcase using a pthread thread to call omp_fulfill_event on 
a suspended task after a short delay. I have not included a Fortran version as 
there doesn't appear to be a standard interface for threading on Fortran.


I have tested all the task-detach-* libgomp tests (which are the only tests that 
call omp_fulfill_event) with no offloading and offloading to Nvidia, with no 
fails. Okay to commit to master, releases/gcc-11 and devel/omp/gcc-11?


Thanks

Kwok
From 348c7cd00e358a8dc0b7563055f367fce2713fa5 Mon Sep 17 00:00:00 2001
From: Kwok Cheung Yeung 
Date: Fri, 14 May 2021 09:59:11 -0700
Subject: [PATCH] openmp: Notify team barrier of pending tasks in
 omp_fulfill_event

The team barrier should be notified of any new tasks that become runnable
as the result of a completing task, otherwise the barrier threads might
not resume processing available tasks, resulting in a hang.

2021-05-17  Kwok Cheung Yeung  

libgomp/
* task.c (omp_fulfill_event): Call gomp_team_barrier_set_task_pending
if new tasks generated.
* testsuite/libgomp.c-c++-common/task-detach-13.c: New.
---
 libgomp/task.c|  1 +
 .../libgomp.c-c++-common/task-detach-13.c | 60 +++
 2 files changed, 61 insertions(+)
 create mode 100644 libgomp/testsuite/libgomp.c-c++-common/task-detach-13.c

diff --git a/libgomp/task.c b/libgomp/task.c
index 1c73c759a8d..feb4796a3ac 100644
--- a/libgomp/task.c
+++ b/libgomp/task.c
@@ -2460,6 +2460,7 @@ omp_fulfill_event (omp_event_handle_t event)
   if (new_tasks > 0)
 {
   /* Wake up threads to run new tasks.  */
+  gomp_team_barrier_set_task_pending (>barrier);
   do_wake = team->nthreads - team->task_running_count;
   if (do_wake > new_tasks)
do_wake = new_tasks;
diff --git a/libgomp/testsuite/libgomp.c-c++-common/task-detach-13.c 
b/libgomp/testsuite/libgomp.c-c++-common/task-detach-13.c
new file mode 100644
index 000..4306524526d
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c-c++-common/task-detach-13.c
@@ -0,0 +1,60 @@
+/* { dg-do run } */
+/* { dg-options "-fopenmp" } */
+/* { dg-timeout 10 } */
+
+/* Test that omp_fulfill_event works when called from an external
+   non-OpenMP thread.  */
+
+#include 
+#include 
+#include 
+#include 
+
+int finished = 0;
+int event_pending = 0;
+omp_event_handle_t detach_event;
+
+void*
+fulfill_thread (void *)
+{
+  while (!__atomic_load_n (, __ATOMIC_RELAXED))
+{
+  if (__atomic_load_n (_pending, __ATOMIC_ACQUIRE))
+   {
+ omp_fulfill_event (detach_event);
+ __atomic_store_n (_pending, 0, __ATOMIC_RELEASE);
+   }
+
+  sleep(1);
+}
+
+  return 0;
+}
+
+int
+main (void)
+{
+  pthread_t thr;
+  int dep;
+  pthread_create (, NULL, fulfill_thread, 0);
+
+  #pragma omp parallel
+#pragma omp single
+  {
+   omp_event_handle_t ev;
+
+   #pragma omp task depend (out: dep) detach (ev)
+   {
+ detach_event = ev;
+ __atomic_store_n (_pending, 1, __ATOMIC_RELEASE);
+   }
+
+   #pragma omp task depend (in: dep)
+   {
+ __atomic_store_n (, 1, __ATOMIC_RELAXED);
+   }
+  }
+
+
+  pthread_join (thr, 0);
+}
-- 
2.30.0.335.ge636282



[COMMIT] wwwdocs: Document devel/omp/gcc-11

2021-05-13 Thread Kwok Cheung Yeung

Hello

I have pushed the devel/omp/gcc-11 branch to the git repo as the development 
branch for new OpenMP, OpenACC and offloading functionality, based on the GCC 11 
branch.


I have committed this patch to update the git doc page to point to the new 
branch as the active OMP develepment branch, and have moved devel/omp/gcc-10 to 
the list of inactive branches.


Kwok
commit 8a006e10264a471a8f9ece2ce3720eff0910f77d
Author: Kwok Cheung Yeung 
Date:   Thu May 13 22:09:36 2021 +0100

Document devel/omp/gcc-11 branch

This also moves the old devel/omp/gcc-10 branch to the inactive branches
section next to devel/omp/gcc-9.

diff --git a/htdocs/git.html b/htdocs/git.html
index 8edde126..2bbfc334 100644
--- a/htdocs/git.html
+++ b/htdocs/git.html
@@ -280,15 +280,15 @@ in Git.
   Makarov mailto:vmaka...@redhat.com;>vmaka...@redhat.com.
   
 
-  https://gcc.gnu.org/git/gitweb.cgi?p=gcc.git;a=shortlog;h=refs/heads/devel/omp/gcc-10;>devel/omp/gcc-10
+  https://gcc.gnu.org/git/gitweb.cgi?p=gcc.git;a=shortlog;h=refs/heads/devel/omp/gcc-11;>devel/omp/gcc-11
   This branch is for collaborative development of
   https://gcc.gnu.org/wiki/OpenACC;>OpenACC and
   https://gcc.gnu.org/wiki/openmp;>OpenMP support and related
   functionality, such
   as https://gcc.gnu.org/wiki/Offloading;>offloading support (OMP:
   offloading and multi processing).
-  The branch is based on releases/gcc-10.
-  Please send patch emails with a short-hand [og10] tag in the
+  The branch is based on releases/gcc-11.
+  Please send patch emails with a short-hand [og11] tag in the
   subject line, and use ChangeLog.omp files.
 
   unified-autovect
@@ -949,13 +949,14 @@ merged.
   respectively.
 
   https://gcc.gnu.org/git/gitweb.cgi?p=gcc.git;a=shortlog;h=refs/heads/devel/omp/gcc-9;>devel/omp/gcc-9
-  This branch was used for collaborative development of
+  https://gcc.gnu.org/git/gitweb.cgi?p=gcc.git;a=shortlog;h=refs/heads/devel/omp/gcc-10;>devel/omp/gcc-10
+  These branches were used for collaborative development of
   https://gcc.gnu.org/wiki/OpenACC;>OpenACC and
   https://gcc.gnu.org/wiki/openmp;>OpenMP support and related
-  functionality as the successor to openacc-gcc-9-branch after the move to
+  functionality as the successors to openacc-gcc-9-branch after the move to
   Git.
-  The branch was based on releases/gcc-9.
-  Development has now moved to the devel/omp/gcc-10 branch.
+  The branches were based on releases/gcc-9 and releases/gcc-10 respectively.
+  Development has now moved to the devel/omp/gcc-11 branch.
 
   hammer-3_3-branch
   The goal of this branch was to have a stable compiler based on GCC 3.3


[commit] [OG10] amdgcn: Add gfx908 support

2021-03-25 Thread Kwok Cheung Yeung

Hello

I have backported commit 3535402e20118655b2ad4085a6e1d4f1b9c46e92 (amdgcn: Add 
gfx908 support) from mainline to the devel/omp/gcc-10 branch as commit 
bb55967ccde0b48f285150caf6443a327159b4a2. This adds support for the gfx908 GPU type.


Kwok


[PATCH] [og10] openmp: Scale precision of collapsed iteration variable

2021-03-01 Thread Kwok Cheung Yeung

Hello

When two or more nested loops are collapsed using the OpenMP collapse clause, a 
single iteration variable is used to represent the combined iteration space. In 
the usual case (i.e. statically scheduled, no ordered clause), the type of this 
variable is picked by taking the unsigned version of the largest of the iterator 
types in the loop nest:


 else if (i == 0
  || TYPE_PRECISION (iter_type)
 < TYPE_PRECISION (TREE_TYPE (loop->v)))
   iter_type
 = build_nonstandard_integer_type
 (TYPE_PRECISION (TREE_TYPE (loop->v)), 1);

If needed, the original indices of the collapsed loops are recalculated from the 
combined index. However, this can be problematic if the combined iteration space 
of the collapsed loops is larger than can be represented by the type of the 
largest individual loop iterator type. e.g.


for (int i = 0; i < 8; i++)
  for (int j = 0; j < 8; j++)

In this case, the combined iteration space is [0..6,400,000,000), which is 
larger than the [0..4,294,967,296) range of a 32-bit unsigned int.


This patch attempts to avoid this problem by setting the precision of the 
combined iteration variable to the sum of the precision of the collapsed 
iterators, rounded up to the nearest power of 2. This is capped at the size of a 
long long (i.e. 64 bits) to avoid an excessive performance hit. If any of the 
loops use a larger type (e.g. __int128), then that is used instead.


I believe OpenACC suffers from a similar problem, but it uses a different 
code-path and should be dealt with separately. The patch caused regressions in 
some OpenACC tests related to tiling (pr84955-1.c, pr84955.c, tile-1.c, 
pr84955.f90) due to the type of diff_type changing between when it was used to 
define the '.tile' variables in expand_oacc_collapse_init and when the '.tile' 
variables are used in expand_oacc_for. I fixed this by adding a cast to the 
current diff_type when '.tile' is multiplied.


Okay for OG10?

Thanks

Kwok
From df1332b7a1575920c8de17359b2dfcad5404a112 Mon Sep 17 00:00:00 2001
From: Kwok Cheung Yeung 
Date: Mon, 1 Mar 2021 14:15:30 -0800
Subject: [PATCH] openmp: Scale type precision of collapsed iterator variable

This sets the type precision of the collapsed iterator variable to the
sum of the precision of the collapsed loop variables, up to a maximum of
sizeof(long long) (i.e. 64-bits).

2021-03-01  Kwok Cheung Yeung  

gcc/
* omp-expand.c (expand_oacc_for): Convert .tile variable to
diff_type before multiplying.
* omp-general.c (omp_extract_for_data): Use accumulated precision
of all collapsed for-loops as precision of iteration variable, up
to the precision of a long long.

libgomp/
* testsuite/libgomp.c-c++-common/collapse-4.c: New.
* testsuite/libgomp.fortran/collapse5.f90: New.
---
 gcc/ChangeLog.omp  |  8 ++
 gcc/omp-expand.c   |  5 +++-
 gcc/omp-general.c  | 29 +-
 libgomp/ChangeLog.omp  |  5 
 .../testsuite/libgomp.c-c++-common/collapse-4.c| 19 ++
 libgomp/testsuite/libgomp.fortran/collapse5.f90| 14 +++
 6 files changed, 73 insertions(+), 7 deletions(-)
 create mode 100644 libgomp/testsuite/libgomp.c-c++-common/collapse-4.c
 create mode 100644 libgomp/testsuite/libgomp.fortran/collapse5.f90

diff --git a/gcc/ChangeLog.omp b/gcc/ChangeLog.omp
index a59c25b..374665d 100644
--- a/gcc/ChangeLog.omp
+++ b/gcc/ChangeLog.omp
@@ -1,3 +1,11 @@
+2021-03-01  Kwok Cheung Yeung  
+
+   * omp-expand.c (expand_oacc_for): Convert .tile variable to
+   diff_type before multiplying.
+   * omp-general.c (omp_extract_for_data): Use accumulated precision
+   of all collapsed for-loops as precision of iteration variable, up
+   to the precision of a long long.
+
 2021-02-24  Julian Brown  
 
Backport from mainline
diff --git a/gcc/omp-expand.c b/gcc/omp-expand.c
index e4a2f3a..f8347c0 100644
--- a/gcc/omp-expand.c
+++ b/gcc/omp-expand.c
@@ -7328,7 +7328,10 @@ expand_oacc_for (struct omp_region *region, struct 
omp_for_data *fd)
   tile_size = create_tmp_var (diff_type, ".tile_size");
   expr = build_int_cst (diff_type, 1);
   for (int ix = 0; ix < fd->collapse; ix++)
-   expr = fold_build2 (MULT_EXPR, diff_type, counts[ix].tile, expr);
+   {
+ tree tile = fold_convert (diff_type, counts[ix].tile);
+ expr = fold_build2 (MULT_EXPR, diff_type, tile, expr);
+   }
   expr = force_gimple_operand_gsi (, expr, true,
   NULL_TREE, true, GSI_SAME_STMT);
   ass = gimple_build_assign (tile_size, expr);
diff --git a/gcc/omp-general.c b/gcc/omp-general.c
index 8e5b961..97f94e1 100644
--- a/gcc/omp-general.c
+++ b/gcc/omp-general.c
@@ -356,6

Re: [WIP] Re: [PATCH] openmp: Fix intermittent hanging of task-detach-6 libgomp tests [PR98738]

2021-02-25 Thread Kwok Cheung Yeung
troy is missing here (in the conditional if it was
only initialized.  Furthermore, I don't understand the && detach,
the earlier code assumes that if (flags & GOMP_TASK_FLAG_DETACH) != 0
then it can dereference *(void *)) detach, so the && detach seems
to be unnecessary.


I have added a call to gomp_sem_destroy, and removed the redundant check for 
detach.


@@ -484,15 +483,16 @@ GOMP_task (void (*fn) (void *), void *data, void (*cpyfn) 
(void *, void *),
task->kind = GOMP_TASK_UNDEFERRED;
task->in_tied_task = parent->in_tied_task;
task->taskgroup = taskgroup;
+  task->deferred_p = true;
if ((flags & GOMP_TASK_FLAG_DETACH) != 0)
{
- task->detach = true;
- gomp_sem_init (>completion_sem, 0);
- *(void **) detach = >completion_sem;


I think you can move task->deferred_p into the if stmt.


That can be done (since the code for detach is currently the only thing using 
it), but I think it would be better to have deferred_p always have the right 
value, regardless of whether or not it is used? Otherwise that might lead to 
some confusion if it is later used by something else.



+  if (!shackled_thread_p
+  && !do_wake
+  && gomp_team_barrier_waiting_for_tasks (>barrier)
+  && team->task_detach_count == 0)


&& team->task_detach_count == 0 is cheaper than the
   && gomp_team_barrier_waiting_for_tasks (>barrier)
so please swap those two.



Done.


+{
+  /* Ensure that at least one thread is woken up to signal that the
+barrier can finish.  */
+  do_wake = 1;
+}


Please drop the {}s around the single do_wake = 1; stmt.


Okay. I put the braces in because it looked a little odd with the comment.


Otherwise LGTM.



I will get this committed later if the regression tests finish with no 
surprises.

Thank you for your time in reviewing this patch!

Kwok
From 462c86549de28f28d5a71e4a7e83e2c17fd19c17 Mon Sep 17 00:00:00 2001
From: Kwok Cheung Yeung 
Date: Thu, 21 Jan 2021 05:38:47 -0800
Subject: [PATCH] openmp: Fix intermittent hanging of task-detach-6 libgomp
 tests [PR98738]

This adds support for the task detach clause to taskwait and taskgroup, and
simplifies the handling of the detach clause by moving most of the extra
handling required for detach tasks to omp_fulfill_event.

2021-02-25  Kwok Cheung Yeung  
Jakub Jelinek  

libgomp/

PR libgomp/98738
* libgomp.h (enum gomp_task_kind): Add GOMP_TASK_DETACHED.
(struct gomp_task): Replace detach and completion_sem fields with
union containing completion_sem and detach_team.  Add deferred_p
field.
(struct gomp_team): Remove task_detach_queue.
* task.c: Include assert.h.
(gomp_init_task): Initialize deferred_p and detach_team fields.
(task_fulfilled_p): Delete.
(GOMP_task): Use address of task as the event handle.  Remove
initialization of detach field.  Initialize deferred_p field.
Use automatic local for completion_sem.  Initialize detach_team field
for deferred tasks.
(gomp_barrier_handle_tasks): Remove handling of task_detach_queue.
Set kind of suspended detach task to GOMP_TASK_DETACHED and
decrement task_running_count.  Move finish_cancelled block out of
else branch.  Relocate call to gomp_team_barrier_done.
(GOMP_taskwait): Handle tasks with completion events that have not
been fulfilled.
(GOMP_taskgroup_end): Likewise.
(omp_fulfill_event): Use address of task as event handle.  Post to
completion_sem for undeferred tasks.  Clear detach_team if task
has not finished.  For finished tasks, handle post-execution tasks,
call gomp_team_barrier_wake if necessary, and free task.
* team.c (gomp_new_team): Remove initialization of task_detach_queue.
(free_team): Remove free of task_detach_queue.
* testsuite/libgomp.c-c++-common/task-detach-1.c: Fix formatting.
* testsuite/libgomp.c-c++-common/task-detach-2.c: Fix formatting.
* testsuite/libgomp.c-c++-common/task-detach-3.c: Fix formatting.
* testsuite/libgomp.c-c++-common/task-detach-4.c: Fix formatting.
* testsuite/libgomp.c-c++-common/task-detach-5.c: Fix formatting.
Change data-sharing of detach events on enclosing parallel to private.
* testsuite/libgomp.c-c++-common/task-detach-6.c: Likewise.  Remove
taskwait directive.
* testsuite/libgomp.c-c++-common/task-detach-7.c: New.
* testsuite/libgomp.c-c++-common/task-detach-8.c: New.
* testsuite/libgomp.c-c++-common/task-detach-9.c: New.
* testsuite/libgomp.c-c++-common/task-detach-10.c: New.
* testsuite/libgomp.c-c++-common/task-detach-11.c: New.
* testsuite/libgomp.c-c++-common/task-detach-1.f90: Fix formatting.
* testsuite/libgomp

Re: [WIP] Re: [PATCH] openmp: Fix intermittent hanging of task-detach-6 libgomp tests [PR98738]

2021-02-24 Thread Kwok Cheung Yeung
r the call for unshackeled threads, before otherwise.


And then there is the case where all tasks finish on a barrier but some
haven't been fulfilled yet.
In that case, when the last thread calls

...

So, I think for the team != gomp_thread ()->ts.team
&& !do_wake
&& gomp_team_barrier_waiting_for_tasks (>barrier)
&& team->task_detach_count == 0
case, we need to wake up 1 thread anyway and arrange for it to do:
   gomp_team_barrier_done (>barrier, state);
   gomp_mutex_unlock (>task_lock);
   gomp_team_barrier_wake (>barrier, 0);
Possibly in
   if (!priority_queue_empty_p (>task_queue, MEMMODEL_RELAXED))
add
   else if (team->task_count == 0
   && gomp_team_barrier_waiting_for_tasks (>barrier))
{
  gomp_team_barrier_done (>barrier, state);
  gomp_mutex_unlock (>task_lock);
  gomp_team_barrier_wake (>barrier, 0);
  if (to_free)
{
  gomp_finish_task (to_free);
  free (to_free);
}
  return;
}
but the:
   if (--team->task_count == 0
   && gomp_team_barrier_waiting_for_tasks (>barrier))
 {
   gomp_team_barrier_done (>barrier, state);
   gomp_mutex_unlock (>task_lock);
   gomp_team_barrier_wake (>barrier, 0);
   gomp_mutex_lock (>task_lock);
 }
in that case would then be incorrect, we don't want to do that twice.
So, either that second if would need to do the to_free handling
and return instead of taking the lock again and looping, or
perhaps we can just do
  --team->task_count;
there instead and let the above added else if handle that?

I have applied your patch to move the gomp_team_barrier_done, and in 
omp_fulfill_event, I ensure that a single thread is woken up so that 
gomp_barrier_handle_tasks can signal for the barrier to finish.


I'm having some trouble coming up with a testcase to test this scenario though. 
I tried having a testcase like this to have threads in separate teams:


  #pragma omp teams num_teams (2) shared (event, started)
#pragma omp parallel num_threads (1)
  if (omp_get_team_num () == 0)
{
  #pragma omp task detach (event)
started = 1;
}
  else
// Wait for started to become 1
omp_fulfill_event (event);

but it does not work because GOMP_teams_reg launches the enclosed block 
sequentially:


  for (gomp_team_num = 0; gomp_team_num < num_teams; gomp_team_num++)
fn (data);

and when the first team launches, it blocks waiting for the detach event in 
GOMP_parallel_end->gomp_team_end->gomp_team_barrier_wait_end, and never gets 
around to launching the second team. If I omit the 'omp parallel' (to try to get 
an undeferred task), GCC refuses to compile (only 'distribute', 'parallel' or 
'loop' regions are allowed to be strictly nested inside 'teams' region). And you 
can't nest 'omp teams' inside an 'omp parallel' either. Is there any way of 
doing this within OpenMP or do we have to resort to creating threads outside of 
OpenMP?


Thanks

Kwok
From 0fa4deb89f3778ccacd64b01de377ba2b7879db1 Mon Sep 17 00:00:00 2001
From: Kwok Cheung Yeung 
Date: Thu, 21 Jan 2021 05:38:47 -0800
Subject: [PATCH] openmp: Fix intermittent hanging of task-detach-6 libgomp
 tests [PR98738]

This adds support for the task detach clause to taskwait and taskgroup, and
simplifies the handling of the detach clause by moving most of the extra
handling required for detach tasks to omp_fulfill_event.

2021-02-24  Kwok Cheung Yeung  
Jakub Jelinek  

libgomp/

PR libgomp/98738
* libgomp.h (enum gomp_task_kind): Add GOMP_TASK_DETACHED.
(struct gomp_task): Replace detach and completion_sem fields with
union containing completion_sem and detach_team.  Add deferred_p
field.
(struct gomp_team): Remove task_detach_queue.
* task.c: Include assert.h.
(gomp_init_task): Initialize deferred_p and detach_team fields.
(task_fulfilled_p): Delete.
(GOMP_task): Use address of task as the event handle.  Remove
initialization of detach field.  Initialize deferred_p field.
Use automatic local for completion_sem.  Initialize detach_team field
for deferred tasks.
(gomp_barrier_handle_tasks): Remove handling of task_detach_queue.
Set kind of suspended detach task to GOMP_TASK_DETACHED and
decrement task_running_count.  Move finish_cancelled block out of
else branch.  Relocate call to gomp_team_barrier_done.
(GOMP_taskwait): Handle tasks with completion events that have not
been fulfilled.
(GOMP_taskgroup_end): Likewise.
(omp_fulfill_event): Use address of task as event handle.  Post to
completion_sem for undeferred tasks.  Clea

Re: [WIP] Re: [PATCH] openmp: Fix intermittent hanging of task-detach-6 libgomp tests [PR98738]

2021-02-23 Thread Kwok Cheung Yeung

On 19/02/2021 7:12 pm, Kwok Cheung Yeung wrote:
I have included the current state of my patch. All task-detach-* tests pass when 
executed without offloading or with offloading to GCN, but with offloading to 
Nvidia, task-detach-6.* hangs consistently but everything else passes (probably 
because of the missing gomp_team_barrier_done?).




It looks like the hang has nothing to do with the detach patch - this hangs 
consistently for me when offloaded to NVPTX:


#include 

int main (void)
{
#pragma omp target
  #pragma omp parallel
#pragma omp task
  ;
}

This doesn't hang when offloaded to GCN or the host device, or if num_threads(1) 
is specified on the omp parallel.


Kwok


[WIP] Re: [PATCH] openmp: Fix intermittent hanging of task-detach-6 libgomp tests [PR98738]

2021-02-19 Thread Kwok Cheung Yeung
s when 
executed without offloading or with offloading to GCN, but with offloading to 
Nvidia, task-detach-6.* hangs consistently but everything else passes (probably 
because of the missing gomp_team_barrier_done?).


Kwok
From 31a5c736910036364fd1f0f3cf7ac28437864a27 Mon Sep 17 00:00:00 2001
From: Kwok Cheung Yeung 
Date: Thu, 21 Jan 2021 05:38:47 -0800
Subject: [PATCH] openmp: Fix intermittent hanging of task-detach-6 libgomp
 tests [PR98738]

This adds support for the task detach clause to taskwait and taskgroup, and
simplifies the handling of the detach clause by moving most of the extra
handling required for detach tasks to omp_fulfill_event.

2021-02-19  Kwok Cheung Yeung  

libgomp/

PR libgomp/98738
* libgomp.h (enum gomp_task_kind): Add GOMP_TASK_DETACHED.
(struct gomp_task): Replace detach and completion_sem fields with
union containing completion_sem and detach_team.
(struct gomp_team): Remove task_detach_queue.
* task.c: Include assert.h.
(gomp_init_task): Initialize detach_team field.
(task_fulfilled_p): Delete.
(GOMP_task): Use address of task as the event handle.  Remove
initialization of detach field.  Initialize detach_team field for
deferred tasks.
(gomp_barrier_handle_tasks): Remove handling of task_detach_queue.
Set kind of suspended detach task to GOMP_TASK_DETACHED and
decrement task_running_count.  Move finish_cancelled block out of
else branch.
(GOMP_taskwait): Handle tasks with completion events that have not
been fulfilled.
(GOMP_taskgroup_end): Likewise.
(omp_fulfill_event): Use address of task as event handle.  Post to
completion_sem for undeferred tasks.  Clear detach_team if task
has not finished.  For finished tasks, handle post-execution tasks,
post to taskwait_sem and taskgroup_sem if necessary, call
gomp_team_barrier_wake if necessary, and free task.
* testsuite/libgomp.c-c++-common/task-detach-1.c: Fix formatting.
* testsuite/libgomp.c-c++-common/task-detach-2.c: Fix formatting.
* testsuite/libgomp.c-c++-common/task-detach-3.c: Fix formatting.
* testsuite/libgomp.c-c++-common/task-detach-4.c: Fix formatting.
* testsuite/libgomp.c-c++-common/task-detach-5.c: Fix formatting.
Change data-sharing of detach events on enclosing parallel to private.
* testsuite/libgomp.c-c++-common/task-detach-6.c: Likewise.  Remove
taskwait directive.
* testsuite/libgomp.c-c++-common/task-detach-7.c: New.
* testsuite/libgomp.c-c++-common/task-detach-8.c: New.
* testsuite/libgomp.c-c++-common/task-detach-9.c: New.
* testsuite/libgomp.c-c++-common/task-detach-10.c: New.
* testsuite/libgomp.c-c++-common/task-detach-11.c: New.
* testsuite/libgomp.c-c++-common/task-detach-1.f90: Fix formatting.
* testsuite/libgomp.c-c++-common/task-detach-2.f90: Fix formatting.
* testsuite/libgomp.c-c++-common/task-detach-3.f90: Fix formatting.
* testsuite/libgomp.c-c++-common/task-detach-4.f90: Fix formatting.
* testsuite/libgomp.fortran/task-detach-5.f90: Fix formatting.
Change data-sharing of detach events on enclosing parallel to private.
* testsuite/libgomp.fortran/task-detach-6.f90: Likewise.  Remove
taskwait directive.
* testsuite/libgomp.c-c++-common/task-detach-7.f90: New.
* testsuite/libgomp.c-c++-common/task-detach-8.f90: New.
* testsuite/libgomp.c-c++-common/task-detach-9.f90: New.
* testsuite/libgomp.c-c++-common/task-detach-10.f90: New.
* testsuite/libgomp.c-c++-common/task-detach-11.f90: New.
---
 libgomp/libgomp.h  |  18 +-
 libgomp/task.c | 225 +
 libgomp/team.c |   2 -
 .../testsuite/libgomp.c-c++-common/task-detach-1.c |   4 +-
 .../libgomp.c-c++-common/task-detach-10.c  |  45 +
 .../libgomp.c-c++-common/task-detach-11.c  |  13 ++
 .../testsuite/libgomp.c-c++-common/task-detach-2.c |   6 +-
 .../testsuite/libgomp.c-c++-common/task-detach-3.c |   6 +-
 .../testsuite/libgomp.c-c++-common/task-detach-4.c |   4 +-
 .../testsuite/libgomp.c-c++-common/task-detach-5.c |   8 +-
 .../testsuite/libgomp.c-c++-common/task-detach-6.c |   8 +-
 .../testsuite/libgomp.c-c++-common/task-detach-7.c |  45 +
 .../testsuite/libgomp.c-c++-common/task-detach-8.c |  47 +
 .../testsuite/libgomp.c-c++-common/task-detach-9.c |  43 
 .../testsuite/libgomp.fortran/task-detach-1.f90|   4 +-
 .../testsuite/libgomp.fortran/task-detach-10.f90   |  44 
 .../testsuite/libgomp.fortran/task-detach-11.f90   |  13 ++
 .../testsuite/libgomp.fortran/task-detach-2.f90|   6 +-
 .../testsuite/libgomp.fortran/task-detach-3.f90|   6 +-
 .../testsuite/libgomp.fortran/task-detach-4.f90|   

[OG10] [committed] Backport patches for non-rectangular loop collapse

2021-02-09 Thread Kwok Cheung Yeung

Hello

I have backported the following patches for supporting non-rectangular loop 
collapse from mainline to the devel/omp/gcc-10 branch:


7bfdb5a1c694cb9006e0478941e4443b230f5b98 openmp: Fix ICE on non-rectangular loop 
with known 0 iterations
88528328ea560230f728af97110e89396c8267d2 openmp: Improve composite triangular 
loop lowering and expansion
758fdf6514348a40ed424f3244cb25b92a005095 openmp: Add support for non-rectangular 
loops in taskloop construct
4b2c33ef32f483ca78f6c7b3a5ee880aebe75b4c openmp: Handle even some combined 
non-rectangular loops
3dd767906dab7d5456fc4c3a98134582b5f8a2ed openmp: Use more efficient logical -> 
actual computation even if # iterations is computed at runtime
1e507ef879b2bf4ec1c80a07f41c52474ed32ba3 openmp: Compute number of collapsed 
loop iterations more efficiently for some non-rectangular loops

c5f31b373a075254e954e6d690671f68955db6d4 openmp: Fix up loop-21.c
6da60f76333666e98955aa33624a7e95bffe58aa openmp: Adjust outer bounds of non-rect 
loops
8abe8a169150b717c8e1f7de8c1e0d29b9381806 openmp: Optimize triangular loop 
logical iterator to actual iterators computation using search for quadratic 
equation root(s)
551b4fbc89e84c43a9cd202bc537f428b39aab83 openmp: Diagnose non-rectangular loops 
with invalid steps
b2eabb179a3e2eab5eda2cc0829ba88756252189 openmp: Non-rectangular loop support 
for non-composite worksharing loops and distribute
076673fd7c64940b761ce980ea54df7ef6dd2199 openmp: Fix two pastos in non-rect loop 
OpenMP lowering.
875154e999bdf000f95e811cc50a1cbf76c0ce71 openmp: Compute triangular loop number 
of iterations at compile time
3cdeb3993f3901407dcb4320525876c188f31872 openmp: Initial part of OpenMP 5.0 
non-rectangular loop support


Kwok


[PATCH] libgomp: Add documentation for omp_fulfill_event

2021-01-25 Thread Kwok Cheung Yeung

Hello

I forgot to update the libgomp documentation to document the new 
omp_fulfill_event runtime routine introduced by task detach support. Is this 
patch okay for trunk?


Thanks

Kwok
From efeaac839879bc30e0e7e129ca43381192a6f109 Mon Sep 17 00:00:00 2001
From: Kwok Cheung Yeung 
Date: Mon, 25 Jan 2021 07:01:10 -0800
Subject: [PATCH] libgomp: Add documentation for omp_fulfill_event runtime
 function

2021-01-25  Kwok Cheung Yeung  

libgomp/
* libgomp.texi (omp_fulfill_event): New entry.
---
 libgomp/libgomp.texi | 34 ++
 1 file changed, 34 insertions(+)

diff --git a/libgomp/libgomp.texi b/libgomp/libgomp.texi
index 7350b8e..346540b 100644
--- a/libgomp/libgomp.texi
+++ b/libgomp/libgomp.texi
@@ -210,6 +210,10 @@ Portable, thread-based, wall clock timer.
 
 * omp_get_wtick::Get timer precision.
 * omp_get_wtime::Elapsed wall clock time.
+
+Support for event objects.
+
+* omp_fulfill_event::Fulfill and destroy OpenMP event.
 @end menu
 
 
@@ -1401,6 +1405,36 @@ guaranteed not to change during the execution of the 
program.
 
 
 
+@node omp_fulfill_event
+@section @code{omp_fulfill_event} -- Fulfill and destroy an OpenMP event
+@table @asis
+@item @emph{Description}:
+Fulfill the event associated with the event handle argument.  Currently, it
+is only used to fulfill events generated by detach clauses on task
+constructs - the effect of fulfilling the event is to allow the task to
+complete.
+
+The result of calling @code{omp_fulfill_event} with an event handle other
+than that generated by a detach clause is undefined.  Calling it with an
+event handle that has already been fulfilled is also undefined.
+
+@item @emph{C/C++}:
+@multitable @columnfractions .20 .80
+@item @emph{Prototype}: @tab @code{void omp_fulfill_event(omp_event_handle_t 
event);}
+@end multitable
+
+@item @emph{Fortran}:
+@multitable @columnfractions .20 .80
+@item @emph{Interface}: @tab @code{subroutine omp_fulfill_event(event)}
+@item   @tab @code{integer (kind=omp_event_handle_kind) :: 
event}
+@end multitable
+
+@item @emph{Reference}:
+@uref{https://www.openmp.org, OpenMP specification v5.0}, Section 3.5.1.
+@end table
+
+
+
 @c -
 @c OpenMP Environment Variables
 @c -
-- 
2.8.1



[OG10][committed] Backport patches for OpenMP task detach support

2021-01-22 Thread Kwok Cheung Yeung

I have backported a number of patches from mainline to the devel/omp/gcc-10 
branch:

* openmp: Add support for the OpenMP 5.0 task detach clause 
(de460a5faff80a2338ccd46f249c964fa34b4c16)


* libgomp: Don't access gomp_sem_t as int using atomics unconditionally 
(2b93ffc7219aa53815ece2adb21f56dd265ec6bc)


* RTEMS: Fix libgomp build (f61fbb53ecf775cd491794f14847c2f1ebc88951)

* openmp: Don't optimize shared to firstprivate on task with depend clause 
(d4d00bc2f8fa2b130e50145db385fecf5858c845)


* openmp: Don't ICE on detach clause with erroneous decl [PR98742] 
(668dc081cccaf5c44e31fe39e741d5644a300bb1)


* Fix gfortran.dg/gomp/task-detach-1.f90 for non 64bit pointers 
(6de2de17049667f8426f9669f992f74230b2c2d7)


* libgomp: Fix up GOMP_task on s390x (7a7702433921e4451d2618bb398436c58443e744)


I have also committed my fix for the intermittent hang sometimes seen in one of 
the tests:


* openmp: Fix intermittent hanging of task-detach-6 libgomp tests [PR98738] 
(ffd581af1d2228bc7c5f5f84e1b6fe42e49cdda2)



Kwok


Re: [PATCH] openmp: Fix intermittent hanging of task-detach-6 libgomp tests [PR98738]

2021-01-21 Thread Kwok Cheung Yeung

On 21/01/2021 7:33 pm, Kwok Cheung Yeung wrote:
With Nvidia and GCN offloading though, task-detach-6 hangs... I _think_ the 
reason why it 'worked' before was because the taskwait allowed tasks with detach 
clauses to always complete immediately after execution. Since that backdoor has 
been closed, task-detach-6 hangs with or without the taskwait.


It turns out that the hang is because the team barrier threads fail to wake up 
when gomp_team_barrier_wake is called from omp_fulfill_event, because it was 
done while task_lock was held. When the lock is freed first, the wake works as 
expected and the test completes.


Is this patch okay for trunk (to be squashed into the previous patch)?

Thanks

Kwok
From 2ee183c22772bc7d80d24ae75d5bd57f419712fd Mon Sep 17 00:00:00 2001
From: Kwok Cheung Yeung 
Date: Thu, 21 Jan 2021 14:01:16 -0800
Subject: [PATCH] openmp: Fix hangs when task constructs with detach clauses
 are offloaded

2021-01-21  Kwok Cheung Yeung  

libgomp/
task.c (GOMP_task): Add thread to debug message.
(gomp_barrier_handle_tasks): Do not take address of child_task in
debug message.
(omp_fulfill_event): Release team->task_lock before waking team
barrier threads.
---
 libgomp/task.c | 12 
 1 file changed, 8 insertions(+), 4 deletions(-)

diff --git a/libgomp/task.c b/libgomp/task.c
index dbd6284..60b598e 100644
--- a/libgomp/task.c
+++ b/libgomp/task.c
@@ -492,7 +492,7 @@ GOMP_task (void (*fn) (void *), void *data, void (*cpyfn) 
(void *, void *),
  if (data)
*(void **) data = task;
 
- gomp_debug (0, "New event: %p\n", task);
+ gomp_debug (0, "Thread %d: new event: %p\n", thr->ts.team_id, task);
}
   thr->task = task;
   if (cpyfn)
@@ -1372,7 +1372,7 @@ gomp_barrier_handle_tasks (gomp_barrier_state_t state)
 child_task, MEMMODEL_RELAXED);
  --team->task_detach_count;
  gomp_debug (0, "thread %d: found task with fulfilled event %p\n",
- thr->ts.team_id, _task);
+ thr->ts.team_id, child_task);
 
  if (to_free)
{
@@ -2470,8 +2470,12 @@ omp_fulfill_event (omp_event_handle_t event)
   gomp_sem_post (>taskgroup->taskgroup_sem);
 }
   if (team && team->nthreads > team->task_running_count)
-gomp_team_barrier_wake (>barrier, 1);
-  gomp_mutex_unlock (>task_lock);
+{
+  gomp_mutex_unlock (>task_lock);
+  gomp_team_barrier_wake (>barrier, 1);
+}
+  else
+gomp_mutex_unlock (>task_lock);
 }
 
 ialias (omp_fulfill_event)
-- 
2.8.1



[PATCH] openmp: Fix intermittent hanging of task-detach-6 libgomp tests [PR98738]

2021-01-21 Thread Kwok Cheung Yeung

Hello

This patch addresses the intermittent hanging seen in the 
libgomp.c-c++-common/task-detach-6.f90 test.


The main problem is due to the 'omp taskwait' in the test. GOMP_taskwait can run 
tasks, so for correct semantics it needs to be able to place finished tasks that 
have unfulfilled completion events into the detach queue, rather than just 
finishing them immediately (in effect ignoring the detach clause).


Unfinished tasks in the detach queue are still children of their parent task, so 
they can appear in next_task in the main GOMP_taskwait loop. If next_task is 
fulfilled then it can be finished immediately, otherwise it will wait on 
taskwait_sem.


omp_fulfill_event needs to be able to post the taskwait_sem semaphore as well as 
wake the team barrier. Since the semaphore is located on the parent of the task 
whose completion event is being fulfilled, I have changed the event handle to 
being a pointer to the task instead of just the completion semaphore in order to 
access the parent field.


This type of code is currently used to wake the threads for the team barrier:

  if (team->nthreads > team->task_running_count)
gomp_team_barrier_wake (>barrier, 1);

This issues a gomp_team_barrier_wake if any of the threads are not running a 
task (and so might be sleeping). However, detach tasks that are queued waiting 
for a completion event are currently included in task_running_count (because the 
finish_cancelled code executed later decrements it). Since 
gomp_barrier_handle_tasks does not block if there are unfinished detached tasks 
remaining (since during development I found that doing so could cause deadlocks 
in single-threaded code), threads could be sleeping even if team->nthreads == 
team->task_running_count, and this code would fail to wake them. I fixed this by 
decrementing task_running_count when queuing an unfinished detach task, and 
skipping the decrement in finish_cancelled if the task was a queued detach tash. 
I added a new gomp_task_kind GOMP_TASK_DETACHED to mark these type of tasks.


I have tried running the task-detach-6 testcase (C and Fortran) 10,000 
iterations at a time using 32 threads, on a x86_64 Linux machine with GCC built 
with --disable-linux-futex, and no hangs. I have checked that it bootstraps, and 
noticed no regressions in the libgomp testsuite when run without offloading.


With Nvidia and GCN offloading though, task-detach-6 hangs... I _think_ the 
reason why it 'worked' before was because the taskwait allowed tasks with detach 
clauses to always complete immediately after execution. Since that backdoor has 
been closed, task-detach-6 hangs with or without the taskwait.


I think GOMP_taskgroup_end and maybe gomp_task_maybe_wait_for_dependencies also 
need the same type of TLC as they can also run tasks, but there are currently no 
tests that exercise it.


The detach support clearly needs more work, but is this particular patch okay 
for trunk?


Thanks

Kwok
From 12cc24c937e9294d5616dd0cd9a754c02ffb26fa Mon Sep 17 00:00:00 2001
From: Kwok Cheung Yeung 
Date: Thu, 21 Jan 2021 05:38:47 -0800
Subject: [PATCH] openmp: Fix intermittent hanging of task-detach-6 libgomp
 tests [PR98738]

This adds support for the task detach clause to taskwait, and fixes a
number of problems related to semaphores that may lead to a hang in
some circumstances.

2021-01-21  Kwok Cheung Yeung  

libgomp/

PR libgomp/98738
* libgomp.h (enum gomp_task_kind): Add GOMP_TASK_DETACHED.
* task.c (task_fulfilled_p): Check detach field as well.
(GOMP_task): Use address of task as the event handle.
(gomp_barrier_handle_tasks): Fix indentation.  Use address of task
as event handle. Set kind of suspended detach task to
GOMP_TASK_DETACHED and decrement task_running_count.  Move
finish_cancelled block out of else branch.  Skip decrement of
task_running_count if task kind is GOMP_TASK_DETACHED.
(GOMP_taskwait): Finish fulfilled detach tasks.  Update comment.
Queue detach tasks that have not been fulfilled.
(omp_fulfill_event): Use address of task as event handle.  Post
to taskwait_sem and taskgroup_sem if necessary.  Check
task_running_count before calling gomp_team_barrier_wake.
* testsuite/libgomp.c-c++-common/task-detach-5.c (main): Change
data-sharing of detach events on enclosing parallel to private.
* testsuite/libgomp.c-c++-common/task-detach-6.c (main): Likewise.
* testsuite/libgomp.fortran/task-detach-5.f90 (task_detach_5):
Likewise.
* testsuite/libgomp.fortran/task-detach-6.f90 (task_detach_6):
Likewise.
---
 libgomp/libgomp.h  |   5 +-
 libgomp/task.c | 155 ++---
 .../testsuite/libgomp.c-c++-common/task-detach-5.c |   2 +-
 .../testsuite/libgomp.c-c++-common/task-detach-6.c |   2 +-
 .../testsuite/li

Re: [PATCH] [WIP] openmp: Add OpenMP 5.0 task detach clause support

2021-01-16 Thread Kwok Cheung Yeung

Thanks for the review.

On 16/01/2021 9:25 am, Jakub Jelinek wrote:

On Fri, Jan 15, 2021 at 03:07:56PM +, Kwok Cheung Yeung wrote:

+   {
+ tree detach_decl = OMP_CLAUSE_DECL (*detach_seen);
+
+ for (pc = , c = clauses; c ; c = *pc)
+   {
+ bool remove = false;
+ if ((OMP_CLAUSE_CODE (c) == OMP_CLAUSE_SHARED
+  || OMP_CLAUSE_CODE (c) == OMP_CLAUSE_PRIVATE
+  || OMP_CLAUSE_CODE (c) == OMP_CLAUSE_FIRSTPRIVATE
+  || OMP_CLAUSE_CODE (c) == OMP_CLAUSE_LASTPRIVATE)
+ && OMP_CLAUSE_DECL (c) == detach_decl)
+   {
+ error_at (OMP_CLAUSE_LOCATION (c),
+   "the event handle of a % clause "
+   "should not be in a data-sharing clause");
+ remove = true;
+   }


I think you don't need this loop, instead you could just check
if (bitmap_bit_p (_head, DECL_UID (detach_decl))
|| bitmap_bit_p (_head, DECL_UID (detach_decl))
|| bitmap_bit_p (_head, DECL_UID (detach_decl)))



I think the main problem with this is that you cannot then point to the location 
of the offending data-sharing clause. Given a task construct with 'detach(x) 
shared(x)', I would tend to think of the 'shared(x)' as being the incorrect part 
here, and so would want the error to point to it. Unless you have any 
objections, I am inclined to keep this as it is?



@@ -2416,6 +2421,64 @@ finish_taskreg_scan (omp_context *ctx)
  TYPE_FIELDS (ctx->srecord_type) = f1;
}
}
+  if (detach_clause)
+   {
+ tree c, field;
+
+ /* Look for a firstprivate clause with the detach event handle.  */
+ for (c = gimple_omp_taskreg_clauses (ctx->stmt);
+  c; c = OMP_CLAUSE_CHAIN (c))
+   {
+ if (OMP_CLAUSE_CODE (c) != OMP_CLAUSE_FIRSTPRIVATE)
+   continue;
+ if (maybe_lookup_decl_in_outer_ctx (OMP_CLAUSE_DECL (c), ctx)
+ == OMP_CLAUSE_DECL (detach_clause))
+   break;
+   }
+
+ if (c)
+   field = lookup_field (OMP_CLAUSE_DECL (c), ctx);
+ else
+   {
+ /* The detach event handle is not referenced within the
+task context, so add a temporary field for it here.  */
+ field = build_decl (OMP_CLAUSE_LOCATION (detach_clause),
+ FIELD_DECL, NULL_TREE, ptr_type_node);
+ insert_field_into_struct (ctx->record_type, field);


Can't you just force the firstprivate clause during gimplification, so that
it doesn't go away even if not referenced?
That would mean just forcing in | GOVD_SEEN when it is added.
If not, not a big deal, just thought it could be easier.



I've tried this diff:

case OMP_CLAUSE_DETACH:
- decl = OMP_CLAUSE_DECL (c);
- goto do_notice;
+ flags = GOVD_FIRSTPRIVATE | GOVD_SEEN;
+ goto do_add;

and just asserted that a suitable firstprivate clause is found in 
finish_taskreg_scan, and it seems to work fine :-).



+  #pragma omp task detach (x) detach (y) /* { dg-error "there can be at most one 
'detach' clause in a task construct" } */


It would be on a task construct rather than in a task construct, but the
common wording for this diagnostics is
"too many %qs clauses", "detach"
Please use that wording.


Done, though I don't see the point of using a %qs format code with a constant 
string here...


I have applied your other suggestions, and have retested the gomp.exp and 
libgomp tests. The full testrun started yesterday showed no regressions. If you 
have no further issues then I will commit this later tonight ahead of stage4.


Thanks

Kwok
commit 68f17e5d3f28b4150fc0fa9112671403c4519c05
Author: Kwok Cheung Yeung 
Date:   Sat Jan 16 09:27:28 2021 -0800

More task detach fixes.

diff --git a/gcc/c/c-typeck.c b/gcc/c/c-typeck.c
index 4e9b21b..b938e6a 100644
--- a/gcc/c/c-typeck.c
+++ b/gcc/c/c-typeck.c
@@ -14942,8 +14942,7 @@ c_finish_omp_clauses (tree clauses, enum 
c_omp_region_type ort)
  if (detach_seen)
{
  error_at (OMP_CLAUSE_LOCATION (c),
-   "there can be at most one % clause in a "
-   "task construct");
+   "too many % clauses on a task construct");
  remove = true;
  break;
}
diff --git a/gcc/cp/semantics.c b/gcc/cp/semantics.c
index 9dfaea2..c28cde0 100644
--- a/gcc/cp/semantics.c
+++ b/gcc/cp/semantics.c
@@ -7425,8 +7425,7 @@ finish_omp_clauses (tree clauses, enum c_omp_region_type 
ort)
  if (detach_seen)
{
  error_at (OMP_CLAUSE_LOCATION (c),
-   "there can be at most one % clause i

Re: [PATCH] [WIP] openmp: Add OpenMP 5.0 task detach clause support

2021-01-15 Thread Kwok Cheung Yeung

On 15/01/2021 3:07 pm, Kwok Cheung Yeung wrote:
I have tested bootstrapping on x86_64 (no offloading) with no issues, and 
running the libgomp testsuite with Nvidia offloading shows no regressions. I 
have also tested all the gomp.exp tests in the main gcc testsuite, also with no 
issues. I am currently still running the full testsuite, but do not anticipate 
any problems.


Okay to commit on trunk, if the full testsuite run does not show any 
regressions?


Found an issue already :-( - the libgomp include files are not found when the 
tests are run via 'make check'. I have now included the relevant parts of the 
include files in the tests themselves. Okay for trunk (to be merged into the 
main patch)?


Thanks

Kwok
diff --git a/gcc/testsuite/c-c++-common/gomp/task-detach-1.c 
b/gcc/testsuite/c-c++-common/gomp/task-detach-1.c
index c7dda82..f50f748 100644
--- a/gcc/testsuite/c-c++-common/gomp/task-detach-1.c
+++ b/gcc/testsuite/c-c++-common/gomp/task-detach-1.c
@@ -1,7 +1,12 @@
 /* { dg-do compile } */
 /* { dg-options "-fopenmp" } */
 
-#include 
+typedef enum omp_event_handle_t
+{
+  __omp_event_handle_t_max__ = __UINTPTR_MAX__
+} omp_event_handle_t;
+
+extern void omp_fulfill_event (omp_event_handle_t);
 
 void f (omp_event_handle_t x, omp_event_handle_t y, int z)
 {
diff --git a/gcc/testsuite/g++.dg/gomp/task-detach-1.C 
b/gcc/testsuite/g++.dg/gomp/task-detach-1.C
index 443d3e8..2f0c650 100644
--- a/gcc/testsuite/g++.dg/gomp/task-detach-1.C
+++ b/gcc/testsuite/g++.dg/gomp/task-detach-1.C
@@ -1,7 +1,10 @@
 // { dg-do compile }
 // { dg-options "-fopenmp" }
 
-#include 
+typedef enum omp_event_handle_t
+{
+  __omp_event_handle_t_max__ = __UINTPTR_MAX__
+} omp_event_handle_t;
 
 template 
 void func ()
diff --git a/gcc/testsuite/gcc.dg/gomp/task-detach-1.c 
b/gcc/testsuite/gcc.dg/gomp/task-detach-1.c
index fa7315e..611044d 100644
--- a/gcc/testsuite/gcc.dg/gomp/task-detach-1.c
+++ b/gcc/testsuite/gcc.dg/gomp/task-detach-1.c
@@ -1,7 +1,12 @@
 /* { dg-do compile } */
 /* { dg-options "-fopenmp" } */
 
-#include 
+typedef enum omp_event_handle_t
+{
+  __omp_event_handle_t_max__ = __UINTPTR_MAX__
+} omp_event_handle_t;
+
+extern void omp_fulfill_event (omp_event_handle_t);
 
 void f (omp_event_handle_t x)
 {
diff --git a/gcc/testsuite/gfortran.dg/gomp/task-detach-1.f90 
b/gcc/testsuite/gfortran.dg/gomp/task-detach-1.f90
index dc51345..114068e 100644
--- a/gcc/testsuite/gfortran.dg/gomp/task-detach-1.f90
+++ b/gcc/testsuite/gfortran.dg/gomp/task-detach-1.f90
@@ -2,8 +2,10 @@
 ! { dg-options "-fopenmp" }
 
 program task_detach_1
-  use omp_lib
-
+  use iso_c_binding, only: c_intptr_t
+  implicit none
+  
+  integer, parameter :: omp_event_handle_kind = c_intptr_t
   integer (kind=omp_event_handle_kind) :: x, y
   integer :: z
   


Re: [PATCH] [WIP] openmp: Add OpenMP 5.0 task detach clause support

2021-01-15 Thread Kwok Cheung Yeung

On 10/12/2020 2:38 pm, Jakub Jelinek wrote:

On Wed, Dec 09, 2020 at 05:37:24PM +, Kwok Cheung Yeung wrote:

--- a/gcc/c/c-typeck.c
+++ b/gcc/c/c-typeck.c
@@ -14942,6 +14942,11 @@ c_finish_omp_clauses (tree clauses, enum 
c_omp_region_type ort)
  pc = _CLAUSE_CHAIN (c);
  continue;
  
+	case OMP_CLAUSE_DETACH:

+ t = OMP_CLAUSE_DECL (c);
+ pc = _CLAUSE_CHAIN (c);
+ continue;
+


If you wouldn't need to do anything for C for the detach clause, just would
just add:
case OMP_CLAUSE_DETACH:
at the end of the case list that starts below:

case OMP_CLAUSE_IF:
case OMP_CLAUSE_NUM_THREADS:
case OMP_CLAUSE_NUM_TEAMS:


But you actually do need to do something, even for C.

There are two restrictions:
- At most one detach clause can appear on the directive.
- If a detach clause appears on the directive, then a mergeable clause cannot 
appear on the same directive.
that should be checked and diagnosed.  One place to do that would be
like usually in all the FEs separately, that would mean adding
   bool mergeable_seen = false, detach_seen = false;
vars and for those clauses setting the *_seen, plus for DETACH
already complain if detach_seen is already true and remove the clause.
And at the end of the loop if mergeable_seen && detach_seen, diagnose
and remove one of them (perhaps better detach clause).
There is the optional second loop that can be used for the removal...

Testcase coverage should include:
   #pragma omp task detach (x) detach (y)
as well as
   #pragma omp task mergeable detach (x)
and
   #pragma omp task detach (x) mergeable
(and likewise for Fortran).



I have implemented checking for multiple detach clauses and usage with 
mergeable. I have included testcases in c-c++-common/gomp/task-detach-1.c and

gfortran.dg/gomp/task-detach-1.f90.


+  if (cp_lexer_next_token_is_not (parser->lexer, CPP_NAME))
+{
+  cp_parser_error (parser, "expected identifier");
+  return list;
+}
+
+  location_t id_loc = cp_lexer_peek_token (parser->lexer)->location;
+  tree t, identifier = cp_parser_identifier (parser);
+
+  if (identifier == error_mark_node)
+t = error_mark_node;
+  else
+{
+  t = cp_parser_lookup_name_simple
+   (parser, identifier,
+cp_lexer_peek_token (parser->lexer)->location);
+  if (t == error_mark_node)
+   cp_parser_name_lookup_error (parser, identifier, t, NLE_NULL,
+id_loc);


The above doesn't match what cp_parser_omp_var_list_no_open does,
in particular it should use cp_parser_id_expression
instead of cp_parser_identifier etc.



Changed to use cp_parser_id_expression, and added extra logic from 
cp_parser_omp_var_list in looking up the decl.



+  else
+   {
+ tree type = TYPE_MAIN_VARIANT (TREE_TYPE (t));
+ if (!INTEGRAL_TYPE_P (type)
+ || TREE_CODE (type) != ENUMERAL_TYPE
+ || DECL_NAME (TYPE_NAME (type))
+  != get_identifier ("omp_event_handle_t"))
+   {
+ error_at (id_loc, "% clause event handle "
+   "has type %qT rather than "
+   "%",
+   type);
+ return list;


You can't do this here for C++, it needs to be done in finish_omp_clauses
instead and only be done if the type is not a dependent type.
Consider (e.g. should be in testsuite)
template 
void
foo ()
{
   T t;
   #pragma omp task detach (t)
   ;
}

template 
void
bar ()
{
   T t;
   #pragma omp task detach (t)
   ;
}

void
baz ()
{
   foo  ();
   bar  (); // Instantiating this should error
}



Moved type checking to finish_omp_clauses, and testcase added at 
g++.dg/gomp/task-detach-1.C.



@@ -7394,6 +7394,9 @@ finish_omp_clauses (tree clauses, enum c_omp_region_type 
ort)
}
}
  break;
+   case OMP_CLAUSE_DETACH:
+ t = OMP_CLAUSE_DECL (c);
+ break;
  


Again, restriction checking here, plus check the type if it is
non-dependent, otherwise defer that checking for finish_omp_clauses when
it will not be dependent anymore.

I think you need to handle OMP_CLAUSE_DETACH in cp/pt.c too.



Done. g++.dg/gomp/task-detach-1.C contains a test for templates.


--- a/gcc/gimplify.c
+++ b/gcc/gimplify.c
@@ -9733,6 +9733,19 @@ gimplify_scan_omp_clauses (tree *list_p, gimple_seq 
*pre_p,
}
  break;
  
+	case OMP_CLAUSE_DETACH:

+ decl = OMP_CLAUSE_DECL (c);
+ if (outer_ctx)
+   {
+ splay_tree_node on
+   = splay_tree_lookup (outer_ctx->variables,
+(splay_tree_key)decl);
+ if (on == NULL || (on->value & GOVD_DATA_SHARE_CLASS) == 0)
+   omp_firstprivatize_variable (outer_ctx, decl);
+ omp_notice_variable (outer_ctx, decl, true);
+   

Re: [PATCH] [WIP] openmp: Add OpenMP 5.0 task detach clause support

2021-01-11 Thread Kwok Cheung Yeung

Hello

Thanks for the review. Due to the Christmas holidays I have not finished 
addressing all these issues yet, but I expect to be done by the end of this 
week. Can this patch still make it for GCC 10, as I believe stage 4 is starting 
soon?


Thanks

Kwok

On 10/12/2020 2:38 pm, Jakub Jelinek wrote:

On Wed, Dec 09, 2020 at 05:37:24PM +, Kwok Cheung Yeung wrote:

--- a/gcc/c/c-typeck.c
+++ b/gcc/c/c-typeck.c
@@ -14942,6 +14942,11 @@ c_finish_omp_clauses (tree clauses, enum 
c_omp_region_type ort)
  pc = _CLAUSE_CHAIN (c);
  continue;
  
+	case OMP_CLAUSE_DETACH:

+ t = OMP_CLAUSE_DECL (c);
+ pc = _CLAUSE_CHAIN (c);
+ continue;
+


If you wouldn't need to do anything for C for the detach clause, just would
just add:
case OMP_CLAUSE_DETACH:
at the end of the case list that starts below:

case OMP_CLAUSE_IF:
case OMP_CLAUSE_NUM_THREADS:
case OMP_CLAUSE_NUM_TEAMS:


But you actually do need to do something, even for C.

There are two restrictions:
- At most one detach clause can appear on the directive.
- If a detach clause appears on the directive, then a mergeable clause cannot 
appear on the same directive.
that should be checked and diagnosed.  One place to do that would be
like usually in all the FEs separately, that would mean adding
   bool mergeable_seen = false, detach_seen = false;
vars and for those clauses setting the *_seen, plus for DETACH
already complain if detach_seen is already true and remove the clause.
And at the end of the loop if mergeable_seen && detach_seen, diagnose
and remove one of them (perhaps better detach clause).
There is the optional second loop that can be used for the removal...

Testcase coverage should include:
   #pragma omp task detach (x) detach (y)
as well as
   #pragma omp task mergeable detach (x)
and
   #pragma omp task detach (x) mergeable
(and likewise for Fortran).


+  if (cp_lexer_next_token_is_not (parser->lexer, CPP_NAME))
+{
+  cp_parser_error (parser, "expected identifier");
+  return list;
+}
+
+  location_t id_loc = cp_lexer_peek_token (parser->lexer)->location;
+  tree t, identifier = cp_parser_identifier (parser);
+
+  if (identifier == error_mark_node)
+t = error_mark_node;
+  else
+{
+  t = cp_parser_lookup_name_simple
+   (parser, identifier,
+cp_lexer_peek_token (parser->lexer)->location);
+  if (t == error_mark_node)
+   cp_parser_name_lookup_error (parser, identifier, t, NLE_NULL,
+id_loc);


The above doesn't match what cp_parser_omp_var_list_no_open does,
in particular it should use cp_parser_id_expression
instead of cp_parser_identifier etc.


+  else
+   {
+ tree type = TYPE_MAIN_VARIANT (TREE_TYPE (t));
+ if (!INTEGRAL_TYPE_P (type)
+ || TREE_CODE (type) != ENUMERAL_TYPE
+ || DECL_NAME (TYPE_NAME (type))
+  != get_identifier ("omp_event_handle_t"))
+   {
+ error_at (id_loc, "% clause event handle "
+   "has type %qT rather than "
+   "%",
+   type);
+ return list;


You can't do this here for C++, it needs to be done in finish_omp_clauses
instead and only be done if the type is not a dependent type.
Consider (e.g. should be in testsuite)
template 
void
foo ()
{
   T t;
   #pragma omp task detach (t)
   ;
}

template 
void
bar ()
{
   T t;
   #pragma omp task detach (t)
   ;
}

void
baz ()
{
   foo  ();
   bar  (); // Instantiating this should error
}


@@ -7394,6 +7394,9 @@ finish_omp_clauses (tree clauses, enum c_omp_region_type 
ort)
}
}
  break;
+   case OMP_CLAUSE_DETACH:
+ t = OMP_CLAUSE_DECL (c);
+ break;
  


Again, restriction checking here, plus check the type if it is
non-dependent, otherwise defer that checking for finish_omp_clauses when
it will not be dependent anymore.

I think you need to handle OMP_CLAUSE_DETACH in cp/pt.c too.


--- a/gcc/gimplify.c
+++ b/gcc/gimplify.c
@@ -9733,6 +9733,19 @@ gimplify_scan_omp_clauses (tree *list_p, gimple_seq 
*pre_p,
}
  break;
  
+	case OMP_CLAUSE_DETACH:

+ decl = OMP_CLAUSE_DECL (c);
+ if (outer_ctx)
+   {
+ splay_tree_node on
+   = splay_tree_lookup (outer_ctx->variables,
+(splay_tree_key)decl);
+ if (on == NULL || (on->value & GOVD_DATA_SHARE_CLASS) == 0)
+   omp_firstprivatize_variable (outer_ctx, decl);
+ omp_notice_variable (outer_ctx, decl, true);
+   }
+ break;


I don't understand this.  My reading of:
"The event-handle will be considered as if it was specified on a
firstprivate clause. The use of a variable in a detach clause expression of a 

Re: [OG10][committed] Backport openmp: Implicit 'declare target' for C++ static initializers

2020-12-18 Thread Kwok Cheung Yeung

On 18/12/2020 7:27 pm, Kwok Cheung Yeung wrote:

Hello

I have now backported the "openmp: Implicit 'declare target' for C++ static 
initializers" patch from mainline to the devel/omp/gcc-10 branch. The testcase 
required a small tweak as the gimple output has changed since OG10 was branched.


This has been committed as commit 83797c2d47aaa011b73512f6e86cf6a192cade56.



I have now also backported and committed to OG10 the fix for the testcase (as 
commit a3108e0b52bae3783a705ce38d4a1f82ed3443a2), and a patch required for 
implicit 'declare target' for Fortran (OpenMP: Add implicit declare target for 
nested procedures, as commit c490d85b1722651f0a67b373e5e2a059c4a35abe).


Kwok


Re: [PATCH] openmp: Implicit 'declare target' for C++ static initializers

2020-12-18 Thread Kwok Cheung Yeung

On 18/12/2020 7:31 pm, Jakub Jelinek wrote:

On Fri, Dec 18, 2020 at 03:10:52PM +, Kwok Cheung Yeung wrote:

2020-12-17  Kwok Cheung Yeung  

gcc/testsuite/
* g++.dg/gomp/declare-target-3.C: New.


Note the test fails on the trunk when one doesn't have offloading
configured.  IL scan tests are always problematic, different between
offloading and no offloading...


Oops. This patch disables the scan for .offload_var_table entries in the 
assembler if offloading is not enabled. The gimple tests appear to be okay though?


Okay for trunk?

Thanks

Kwok
commit f427d4eaddbd1ee4001e057b231c92fdd9fc66f5
Author: Kwok Cheung Yeung 
Date:   Fri Dec 18 12:05:20 2020 -0800

openmp: Fix g++.dg/gomp/declare-target-3.C testcase when offloading is 
disabled

2020-12-18  Kwok Cheung Yeung  

gcc/testsuite/
* g++.dg/gomp/declare-target-3.C: Only check .offload_var_table
entries if offloading is enabled.

diff --git a/gcc/testsuite/g++.dg/gomp/declare-target-3.C 
b/gcc/testsuite/g++.dg/gomp/declare-target-3.C
index d2dedaf..1e23c86 100644
--- a/gcc/testsuite/g++.dg/gomp/declare-target-3.C
+++ b/gcc/testsuite/g++.dg/gomp/declare-target-3.C
@@ -22,10 +22,10 @@ int *g = // Explicitly marked
 // { dg-final { scan-tree-dump "__attribute__\\\(\\\(omp declare 
target\\\)\\\)\\\nint bar \\\(\\\)" "gimple" } }
 // { dg-final { scan-tree-dump "__attribute__\\\(\\\(omp declare 
target\\\)\\\)\\\nint baz \\\(\\\)" "gimple" } }
 // { dg-final { scan-tree-dump "__attribute__\\\(\\\(omp declare 
target\\\)\\\)\\\nint qux \\\(\\\)" "gimple" } }
-// { dg-final { scan-assembler-not "\\\.offload_var_table:\\n.+\\\.quad\\s+a" 
} }
-// { dg-final { scan-assembler "\\\.offload_var_table:\\n.+\\\.quad\\s+b" } }
-// { dg-final { scan-assembler "\\\.offload_var_table:\\n.+\\\.quad\\s+c" } }
-// { dg-final { scan-assembler "\\\.offload_var_table:\\n.+\\\.quad\\s+d" } }
-// { dg-final { scan-assembler "\\\.offload_var_table:\\n.+\\\.quad\\s+e" } }
-// { dg-final { scan-assembler "\\\.offload_var_table:\\n.+\\\.quad\\s+f" } }
-// { dg-final { scan-assembler "\\\.offload_var_table:\\n.+\\\.quad\\s+g" } }
+// { dg-final { scan-assembler-not "\\\.offload_var_table:\\n.+\\\.quad\\s+a" 
{ target { offloading_enabled } } } }
+// { dg-final { scan-assembler "\\\.offload_var_table:\\n.+\\\.quad\\s+b" { 
target { offloading_enabled } } } }
+// { dg-final { scan-assembler "\\\.offload_var_table:\\n.+\\\.quad\\s+c" { 
target { offloading_enabled } } } }
+// { dg-final { scan-assembler "\\\.offload_var_table:\\n.+\\\.quad\\s+d" { 
target { offloading_enabled } } } }
+// { dg-final { scan-assembler "\\\.offload_var_table:\\n.+\\\.quad\\s+e" { 
target { offloading_enabled } } } }
+// { dg-final { scan-assembler "\\\.offload_var_table:\\n.+\\\.quad\\s+f" { 
target { offloading_enabled } } } }
+// { dg-final { scan-assembler "\\\.offload_var_table:\\n.+\\\.quad\\s+g" { 
target { offloading_enabled } } } }


[OG10][committed] Backport openmp: Implicit 'declare target' for C++ static initializers

2020-12-18 Thread Kwok Cheung Yeung

Hello

I have now backported the "openmp: Implicit 'declare target' for C++ static 
initializers" patch from mainline to the devel/omp/gcc-10 branch. The testcase 
required a small tweak as the gimple output has changed since OG10 was branched.


This has been committed as commit 83797c2d47aaa011b73512f6e86cf6a192cade56.

Kwok


Re: [PATCH] openmp: Implicit 'declare target' for C++ static initializers

2020-12-18 Thread Kwok Cheung Yeung

On 08/12/2020 4:24 pm, Jakub Jelinek wrote:

The GCC coding style (appart from libstdc++) is type * rather than type*,
occurs several times in the patch.


Fixed.


+{
+  tree node;
+
+  if (DECL_INITIAL (decl))
+return _INITIAL (decl);
+
+  for (node = dynamic_initializers; node; node = TREE_CHAIN (node))
+if (TREE_VALUE (node) == decl)
+  return _PURPOSE (node);


I'm worried with many dynamic initializers this will be worst case
quadratic.  Can't you use instead a hash map?  Note, as this is in the
FE, we might need to worry about PCH and GC.
Thus the hash map needs to be indexed by DECL_UIDs rather than pointers,
so perhaps use decl_tree_map?
Also, I'm worried that nothing releases dynamic_initializers (or the
decl_tree_map replacement).  We need it only during the discovery and not
afterwards, so it would be nice if the omp declare target discovery at the
end called another lang hook that would free the decl_tree_map, so that GC
can take it all.
If trees would remain there afterwards, we'd need to worry about destructive
gimplifier too and would need to unshare the dynamic initializers or
something.

I think it would be best to use omp_ in the hook name(s), and:


I have now changed dynamic_initializers to use a decl_tree_map instead. 
get_decl_init has been renamed to omp_get_decl_init, and I have added a hook 
omp_finish_decl_inits which is called at the end of 
omp_discover_implicit_declare_target to free the decl_tree_map for GC.



--- a/gcc/cp/decl2.c
+++ b/gcc/cp/decl2.c
@@ -4940,6 +4940,11 @@ c_parse_final_cleanups (void)
 loop.  */
vars = prune_vars_needing_no_initialization (_aggregates);
  
+  /* Copy the contents of VARS into DYNAMIC_INITIALIZERS.  */

+  for (t = vars; t; t = TREE_CHAIN (t))
+   dynamic_initializers = tree_cons (TREE_PURPOSE (t), TREE_VALUE (t),
+ dynamic_initializers);


Not to add there anything if (!flag_openmp).  We don't need to waste memory
when nobody is going to look at it.


Done.

I have retested all the gomp tests in the main testsuite, retested libgomp, and 
checked bootstrapping. Is this version okay for trunk now?


Thanks

Kwok
From ef4a42c5174372dd0d72dc0efe2c608e693c7877 Mon Sep 17 00:00:00 2001
From: Kwok Cheung Yeung 
Date: Thu, 17 Dec 2020 12:10:18 -0800
Subject: [PATCH] openmp: Implicitly add 'declare target' directives for
 dynamic initializers in C++

2020-12-17  Kwok Cheung Yeung  

gcc/
* langhooks-def.h (lhd_get_decl_init): New.
(lhd_finish_decl_inits): New.
(LANG_HOOKS_GET_DECL_INIT): New.
(LANG_HOOKS_OMP_FINISH_DECL_INITS): New.
(LANG_HOOKS_DECLS): Add LANG_HOOKS_GET_DECL_INIT and
LANG_HOOKS_OMP_FINISH_DECL_INITS.
* langhooks.c (lhd_omp_get_decl_init): New.
(lhd_omp_finish_decl_inits): New.
* langhooks.h (struct lang_hooks_for_decls): Add omp_get_decl_init
and omp_finish_decl_inits.
* omp-offload.c (omp_discover_declare_target_var_r): Use
get_decl_init langhook in place of DECL_INITIAL.  Call
omp_finish_decl_inits langhook at end of function.

gcc/cp/
* cp-lang.c (cxx_get_decl_init): New.
(cxx_omp_finish_decl_inits): New.
(LANG_HOOKS_GET_DECL_INIT): New.
(LANG_HOOKS_OMP_FINISH_DECL_INITS): New.
* cp-tree.h (dynamic_initializers): New.
* decl.c (dynamic_initializers): New.
* decl2.c (c_parse_final_cleanups): Add initializer entries
from vars to dynamic_initializers.

gcc/testsuite/
* g++.dg/gomp/declare-target-3.C: New.
---
 gcc/cp/cp-lang.c | 32 
 gcc/cp/cp-tree.h |  4 
 gcc/cp/decl.c|  4 
 gcc/cp/decl2.c   |  7 ++
 gcc/langhooks-def.h  |  8 ++-
 gcc/langhooks.c  | 16 ++
 gcc/langhooks.h  | 10 +
 gcc/omp-offload.c| 11 ++
 gcc/testsuite/g++.dg/gomp/declare-target-3.C | 31 +++
 9 files changed, 118 insertions(+), 5 deletions(-)
 create mode 100644 gcc/testsuite/g++.dg/gomp/declare-target-3.C

diff --git a/gcc/cp/cp-lang.c b/gcc/cp/cp-lang.c
index 5d2aef4..bde11db 100644
--- a/gcc/cp/cp-lang.c
+++ b/gcc/cp/cp-lang.c
@@ -34,6 +34,8 @@ static tree cp_eh_personality (void);
 static tree get_template_innermost_arguments_folded (const_tree);
 static tree get_template_argument_pack_elems_folded (const_tree);
 static tree cxx_enum_underlying_base_type (const_tree);
+static tree * cxx_omp_get_decl_init (tree);
+static void cxx_omp_finish_decl_inits (void);
 
 /* Lang hooks common to C++ and ObjC++ are declared in cp/cp-objcp-common.h;
consequently, there should be very few hooks below.  */
@@ -92,6 +94,12 @@ static tree cxx_enum_underlying_base_type (const_tree

Re: [PATCH] [WIP] openmp: Add OpenMP 5.0 task detach clause support

2020-12-09 Thread Kwok Cheung Yeung

On 09/12/2020 5:53 pm, Jakub Jelinek wrote:

On Wed, Dec 09, 2020 at 05:37:24PM +, Kwok Cheung Yeung wrote:

I believe this patch is largely complete now. I have done a bootstrap on
x86_64 and run the testsuites with no regressions. I have also run the
libgomp testsuite with offloading to Nvidia and AMD GCN devices, also with
no regressions. Is this patch okay for trunk (or would it be more
appropriate to wait until GCC 11 is branched off)?


I think it is desirable for GCC 11, doesn't need to be deferred, and sorry
it is taking me so long.  I've paged in the standard wording related to this
yesterday and hoped I'd look at this, but didn't manage, will try to do that
tomorrow or worst case on Friday.


No problem :-), and thanks for looking at the patch.

Kwok


Re: [PATCH] [WIP] openmp: Add OpenMP 5.0 task detach clause support

2020-12-09 Thread Kwok Cheung Yeung

Hello

This is a further update of the patch for task detach support.

- The memory for the event is not mapped on the target. This means that if 
omp_fulfill_event is called from an 'omp target' section with a target that 
does not share memory with the host, the event will not be fulfilled (and a 
segfault will probably occur).


I was thinking of something along the lines of:

#pragma omp task detach (event)
{
}

#pragma omp target
{
   omp_fulfill_event (event);
}

Would something like this be expected to work? I cannot find many examples of 
the detach clause online, and none of them use any offloading constructs.


I have asked on the omp-lang mailing list - this is not expected to work.

- The tasks awaiting event fulfillment currently wait until there are no other 
runnable tasks left. A better approach would be to poll (without blocking) the 
waiting tasks whenever any task completes, immediately removing any 
now-complete tasks and requeuing any dependent tasks.


This has now been implemented. On every iteration of the main loop in 
gomp_barrier_handle_tasks, it first checks to see if any tasks in the detach 
queue have a fulfilled completion event, and if so it will remove the task and 
requeue any dependent tasks.




I have found another problem with the original blocking approach when the tasks 
are on offload devices. On Nvidia and GCN, a bar.sync/s_barrier instruction is 
issued when gomp_team_barrier_wake is called to synchronise the threads. 
However, if some of the barrier threads are stuck waiting for semaphores 
associated with completion events, and the fulfillment of those events are in 
other tasks waiting to run, then the result is a deadlock as the threads cannot 
synchronise without all the semaphores being released.


I have removed the blocking path on gomp_barrier_handle_tasks altogether, and 
omp_fulfill_event now directly wakes the barrier threads to process any tasks 
that are now complete.


I have also ensured that the event handle specified on the detach clause is 
firstprivate by default on enclosing scopes.


I believe this patch is largely complete now. I have done a bootstrap on x86_64 
and run the testsuites with no regressions. I have also run the libgomp 
testsuite with offloading to Nvidia and AMD GCN devices, also with no 
regressions. Is this patch okay for trunk (or would it be more appropriate to 
wait until GCC 11 is branched off)?


Thanks

Kwok
commit 3d82db0fc3623e9dc241bed4c4cfd266574d45e7
Author: Kwok Cheung Yeung 
Date:   Wed Dec 9 09:33:46 2020 -0800

openmp: Add support for the OpenMP 5.0 task detach clause

2020-12-09  Kwok Cheung Yeung  

gcc/
* builtin-types.def (BT_PTR_SIZED_INT): New primitive type.
(BT_FN_PSINT_VOID): New function type.
(BT_FN_VOID_OMPFN_PTR_OMPCPYFN_LONG_LONG_BOOL_UINT_PTR_INT): Rename
to...
(BT_FN_VOID_OMPFN_PTR_OMPCPYFN_LONG_LONG_BOOL_UINT_PTR_INT_PSINT):
...this.  Add extra argument.
* gimplify.c (gimplify_scan_omp_clauses): Handle OMP_CLAUSE_DETACH.
(gimplify_adjust_omp_clauses): Likewise.
* omp-builtins.def (BUILT_IN_GOMP_TASK): Change function type to
BT_FN_VOID_OMPFN_PTR_OMPCPYFN_LONG_LONG_BOOL_UINT_PTR_INT_PSINT.
(BUILT_IN_GOMP_NEW_EVENT): New.
* omp-expand.c (expand_task_call): Add detach argument when generating
call to GOMP_task.
* omp-low.c (scan_sharing_clauses): Setup data environment for detach
clause.
(lower_detach_clause): New.
(lower_omp_taskreg): Call lower_detach_clause for detach clause.  Add
Gimple statements generated for detach clause.
* tree-core.h (enum omp_clause_code): Add OMP_CLAUSE_DETACH.
* tree-pretty-print.c (dump_omp_clause): Handle OMP_CLAUSE_DETACH.
* tree.c (omp_clause_num_ops): Add entry for OMP_CLAUSE_DETACH.
(omp_clause_code_name): Add entry for OMP_CLAUSE_DETACH.
(walk_tree_1): Handle OMP_CLAUSE_DETACH.
* tree.h (OMP_CLAUSE_DETACH_EXPR): New.

gcc/c-family/
* c-pragma.h (pragma_omp_clause): Add PRAGMA_OMP_CLAUSE_DETACH.
Redefine PRAGMA_OACC_CLAUSE_DETACH.

gcc/c/
* c-parser.c (c_parser_omp_clause_detach): New.
(c_parser_omp_all_clauses): Handle PRAGMA_OMP_CLAUSE_DETACH clause.
(OMP_TASK_CLAUSE_MASK): Add mask for PRAGMA_OMP_CLAUSE_DETACH.
* c-typeck.c (c_finish_omp_clauses): Handle PRAGMA_OMP_CLAUSE_DETACH
clause.

gcc/cp/
* parser.c (cp_parser_omp_clause_detach): New.
(cp_parser_omp_all_clauses): Handle PRAGMA_OMP_CLAUSE_DETACH.
(OMP_TASK_CLAUSE_MASK): Add mask for PRAGMA_OMP_CLAUSE_DETACH.
* semantics.c (finish_omp_clauses): Handle OMP_CLAUSE_DETACH clause.

gcc/fortran/
* dump-parse-tree.c (show_omp_clauses): Handle detach clause.
* frontend-passes.c (gfc_code_walker): Walk detach expression.
* gfortran.h

PING Re: [PATCH] openmp: Implicit 'declare target' for C++ static initializers

2020-11-27 Thread Kwok Cheung Yeung

Hello

This patch still needs review.

Thanks

Kwok

On 19/11/2020 6:07 pm, Kwok Cheung Yeung wrote:

On 29/10/2020 10:03 am, Jakub Jelinek wrote:

I'm actually not sure how this can work correctly.
Let's say we have
int foo () { return 1; }
int bar () { return 2; }
int baz () { return 3; }
int qux () { return 4; }
int a = foo ();
int b = bar ();
int c = baz ();
int *d = 
int e = qux ();
int f = e + 1;
int *g = 
#pragma omp declare target to (b, d, g)
So, for the implicit declare target discovery, a is not declare target to,
nor is foo, and everything else is; b, d, g explicitly, c because it is
referenced in initializer of b, f because it is mentioned in initializer of
g and e because it is mentioned in initializer of f.
Haven't checked if the new function you've added is called before or after
analyze_function calls omp_discover_implicit_declare_target, but I don't
really see how it can work when it is not inside of that function, so that
discovery of new static vars that are implicitly declare target to doesn't
result in marking of its dynamic initializers too.  Perhaps we need a
langhook for that.  But if it is a separate function, either it is called
before the other discovery and will ignore static initializers for vars
that will only be marked as implicit declare target to later, or it is done
afterwards, but then it would really need to duplicate everything what the
other function does, otherwise it woiuldn't discover everything.



I have added a new langhook GET_DECL_INIT that by default returns the 
DECL_INITIAL of a variable declaration, but for C++ can also return the dynamic 
initializer if present. omp_discover_implicit_declare_target and 
omp_discover_declare_target_var_r have been changed to use the new langhook 
instead of using DECL_INITIAL.


The dynamic initializer information is stored in a new variable 
dynamic_initializers. The information is originally stored in static_aggregates, 
but this is nulled by calling prune_vars_needing_no_initialization in 
c_parse_final_cleanups. I copy the information into a separate variable before 
it is discarded - this avoids any potential problems that may be caused by 
trying to change the way that static_aggregates currently works.


With this, all the functions and variables in your example are marked correctly:

foo ()
...

__attribute__((omp declare target))
bar ()
...

__attribute__((omp declare target))
baz ()
...

__attribute__((omp declare target))
qux ()
...

.offload_var_table:
     .quad   g
     .quad   8
     .quad   d
     .quad   8
     .quad   b
     .quad   4
     .quad   c
     .quad   4
     .quad   f
     .quad   4
     .quad   e
     .quad   4

Your example is now a compile test in g++.dg/gomp/.


Anyway, that is one thing, the other is even if the implicit declare target
discovery handles those correctly, the question is what should we do
afterwards.  Because the C++ FE normally creates a single function that
performs the dynamic initialization of the TUs variables.  But that function
shouldn't be really declare target to, it initializes not only (explicit or
implicit) declare target to variables, but also host only variables.
So we'll probably need to create next to that host only TU constructor
also a device only constructor function that will only initialize the
declare target to variables.


Even without this patch, G++ currently accepts something like

int foo() { return 1; }
int x = foo();
#pragma omp declare target to(x)

but will not generate the device-side initializer for x, even though x is now 
present on the device. So this part of the implementation is broken with or 
without the patch.


Given that my patch doesn't make the current situation any worse, can I commit 
this portion of it to trunk for now, and leave device-side dynamic 
initialization for later?


Bootstrapped on x86_64 with no offloading, G++ testsuite ran with no 
regressions, and no regressions in the libgomp testsuite with Nvidia offloading.


Thanks,

Kwok


Re: [PATCH] [WIP] openmp: Add OpenMP 5.0 task detach clause support

2020-11-27 Thread Kwok Cheung Yeung

Hello

This is an updated version of the WIP patch for task detach support. Any 
comments would be welcome!


On 11/11/2020 7:06 pm, Kwok Cheung Yeung wrote:

- No error checking at the front-end.


The detach clause is now parsed properly in C, C++ and Fortran, and will raise 
an error if the syntax is incorrect or if the event variable is of the wrong type.


- The memory for the event is not mapped on the target. This means that if 
omp_fulfill_event is called from an 'omp target' section with a target that does 
not share memory with the host, the event will not be fulfilled (and a segfault 
will probably occur).


I was thinking of something along the lines of:

#pragma omp task detach (event)
{
}

#pragma omp target
{
  omp_fulfill_event (event);
}

Would something like this be expected to work? I cannot find many examples of 
the detach clause online, and none of them use any offloading constructs.


- The tasks awaiting event fulfillment currently wait until there are no other 
runnable tasks left. A better approach would be to poll (without blocking) the 
waiting tasks whenever any task completes, immediately removing any now-complete 
tasks and requeuing any dependent tasks.


This has now been implemented. On every iteration of the main loop in 
gomp_barrier_handle_tasks, it first checks to see if any tasks in the detach 
queue have a fulfilled completion event, and if so it will remove the task and 
requeue any dependent tasks.


Thanks

Kwok
From 3611024b39ea5b264ec2fd35ffa64360861052af Mon Sep 17 00:00:00 2001
From: Kwok Cheung Yeung 
Date: Fri, 27 Nov 2020 11:59:12 -0800
Subject: [PATCH] openmp: Add support for the OpenMP 5.0 task detach clause

2020-11-27  Kwok Cheung Yeung  

gcc/
* builtin-types.def (BT_PTR_SIZED_INT): New primitive type.
(BT_FN_PSINT_VOID): New function type.
(BT_FN_VOID_OMPFN_PTR_OMPCPYFN_LONG_LONG_BOOL_UINT_PTR_INT): Rename
to...
(BT_FN_VOID_OMPFN_PTR_OMPCPYFN_LONG_LONG_BOOL_UINT_PTR_INT_PSINT):
...this.  Add extra argument.
* gimplify.c (gimplify_scan_omp_clauses): Handle OMP_CLAUSE_DETACH.
(gimplify_adjust_omp_clauses): Likewise.
* omp-builtins.def (BUILT_IN_GOMP_TASK): Change function type to
BT_FN_VOID_OMPFN_PTR_OMPCPYFN_LONG_LONG_BOOL_UINT_PTR_INT_PSINT.
(BUILT_IN_GOMP_NEW_EVENT): New.
* omp-expand.c (expand_task_call): Add detach argument when generating
call to GOMP_task.
* omp-low.c (scan_sharing_clauses): Setup data environment for detach
clause.
(lower_detach_clause): New.
(lower_omp_taskreg): Call lower_detach_clause for detach clause.  Add
Gimple statements generated for detach clause.
* tree-core.h (enum omp_clause_code): Add OMP_CLAUSE_DETACH.
* tree-pretty-print.c (dump_omp_clause): Handle OMP_CLAUSE_DETACH.
* tree.c (omp_clause_num_ops): Add entry for OMP_CLAUSE_DETACH.
(omp_clause_code_name): Add entry for OMP_CLAUSE_DETACH.
(walk_tree_1): Handle OMP_CLAUSE_DETACH.
* tree.h (OMP_CLAUSE_DETACH_EXPR): New.

gcc/c-family/
* c-pragma.h (pragma_omp_clause): Add PRAGMA_OMP_CLAUSE_DETACH.
Redefine PRAGMA_OACC_CLAUSE_DETACH.

gcc/c/
* c-parser.c (c_parser_omp_clause_detach): New.
(c_parser_omp_all_clauses): Handle PRAGMA_OMP_CLAUSE_DETACH clause.
(OMP_TASK_CLAUSE_MASK): Add mask for PRAGMA_OMP_CLAUSE_DETACH.
* c-typeck.c (c_finish_omp_clauses): Handle PRAGMA_OMP_CLAUSE_DETACH
clause.

gcc/cp/
* parser.c (cp_parser_omp_clause_detach): New.
(cp_parser_omp_all_clauses): Handle PRAGMA_OMP_CLAUSE_DETACH.
(OMP_TASK_CLAUSE_MASK): Add mask for PRAGMA_OMP_CLAUSE_DETACH.
* semantics.c (finish_omp_clauses): Handle OMP_CLAUSE_DETACH clause.

gcc/fortran/
* dump-parse-tree.c (show_omp_clauses): Handle detach clause.
* frontend-passes.c (gfc_code_walker): Walk detach expression.
* gfortran.h (struct gfc_omp_clauses): Add detach field.
(gfc_c_intptr_kind): New.
* openmp.c (gfc_free_omp_clauses): Free detach clause.
(gfc_match_omp_detach): New.
(enum omp_mask1): Add OMP_CLAUSE_DETACH.
(enum omp_mask2): Remove OMP_CLAUSE_DETACH.
(gfc_match_omp_clauses): Handle OMP_CLAUSE_DETACH for OpenMP.
(OMP_TASK_CLAUSES): Add OMP_CLAUSE_DETACH.
* trans-openmp.c (gfc_trans_omp_clauses): Handle detach clause.
* trans-types.c (gfc_c_intptr_kind): New.
(gfc_init_kinds): Initialize gfc_c_intptr_kind.
* types.def (BT_PTR_SIZED_INT): New type.
(BT_FN_PSINT_VOID): New function type.
(BT_FN_VOID_OMPFN_PTR_OMPCPYFN_LONG_LONG_BOOL_UINT_PTR_INT): Rename
to...
(BT_FN_VOID_OMPFN_PTR_OMPCPYFN_LONG_LONG_BOOL_UINT_PTR_INT_PSINT):
...this.  Add extra argument.

libgomp/
* fortran.c (omp_fulfill_event_): New

Nested declare target support

2020-11-20 Thread Kwok Cheung Yeung

Hello


New OpenMP 5.0 features that won't be available in GCC 9, are planned for GCC 10
or later versions as time permits:


...

- nested declare target support


You said in an email two years ago that nested declare target was not supported 
yet. I do not see any patches that claim to implement this since then, but when 
I ran a quick test with a trunk build:


#pragma omp declare target
  #pragma omp declare target
int foo() { return 1; }
  #pragma omp end declare target
  int bar() { return 2; }
#pragma omp end declare target

This compiles and appears to do the right thing:

__attribute__((omp declare target, omp declare target block))
foo ()
...

__attribute__((omp declare target, omp declare target block))
bar ()
...

Looking at the C parser:

static void
c_parser_omp_declare_target (c_parser *parser)
{
  ...
  else
{
  c_parser_skip_to_pragma_eol (parser);
  current_omp_declare_target_attribute++;
  return;
}

static void
c_parser_omp_end_declare_target (c_parser *parser)
{
  ...
current_omp_declare_target_attribute--;
}

It looks like this was written to handle nesting to begin with (since at least 
2013) by making current_omp_declare_target_attribute (which effectively tracks 
the nesting level) an integer. Is there anything that is currently missing for 
nested declare target support?


Thanks

Kwok


Re: [PATCH] openmp: Implicit 'declare target' for C++ static initializers

2020-11-19 Thread Kwok Cheung Yeung

On 29/10/2020 10:03 am, Jakub Jelinek wrote:

I'm actually not sure how this can work correctly.
Let's say we have
int foo () { return 1; }
int bar () { return 2; }
int baz () { return 3; }
int qux () { return 4; }
int a = foo ();
int b = bar ();
int c = baz ();
int *d = 
int e = qux ();
int f = e + 1;
int *g = 
#pragma omp declare target to (b, d, g)
So, for the implicit declare target discovery, a is not declare target to,
nor is foo, and everything else is; b, d, g explicitly, c because it is
referenced in initializer of b, f because it is mentioned in initializer of
g and e because it is mentioned in initializer of f.
Haven't checked if the new function you've added is called before or after
analyze_function calls omp_discover_implicit_declare_target, but I don't
really see how it can work when it is not inside of that function, so that
discovery of new static vars that are implicitly declare target to doesn't
result in marking of its dynamic initializers too.  Perhaps we need a
langhook for that.  But if it is a separate function, either it is called
before the other discovery and will ignore static initializers for vars
that will only be marked as implicit declare target to later, or it is done
afterwards, but then it would really need to duplicate everything what the
other function does, otherwise it woiuldn't discover everything.



I have added a new langhook GET_DECL_INIT that by default returns the 
DECL_INITIAL of a variable declaration, but for C++ can also return the dynamic 
initializer if present. omp_discover_implicit_declare_target and 
omp_discover_declare_target_var_r have been changed to use the new langhook 
instead of using DECL_INITIAL.


The dynamic initializer information is stored in a new variable 
dynamic_initializers. The information is originally stored in static_aggregates, 
but this is nulled by calling prune_vars_needing_no_initialization in 
c_parse_final_cleanups. I copy the information into a separate variable before 
it is discarded - this avoids any potential problems that may be caused by 
trying to change the way that static_aggregates currently works.


With this, all the functions and variables in your example are marked correctly:

foo ()
...

__attribute__((omp declare target))
bar ()
...

__attribute__((omp declare target))
baz ()
...

__attribute__((omp declare target))
qux ()
...

.offload_var_table:
.quad   g
.quad   8
.quad   d
.quad   8
.quad   b
.quad   4
.quad   c
.quad   4
.quad   f
.quad   4
.quad   e
.quad   4

Your example is now a compile test in g++.dg/gomp/.


Anyway, that is one thing, the other is even if the implicit declare target
discovery handles those correctly, the question is what should we do
afterwards.  Because the C++ FE normally creates a single function that
performs the dynamic initialization of the TUs variables.  But that function
shouldn't be really declare target to, it initializes not only (explicit or
implicit) declare target to variables, but also host only variables.
So we'll probably need to create next to that host only TU constructor
also a device only constructor function that will only initialize the
declare target to variables.


Even without this patch, G++ currently accepts something like

int foo() { return 1; }
int x = foo();
#pragma omp declare target to(x)

but will not generate the device-side initializer for x, even though x is now 
present on the device. So this part of the implementation is broken with or 
without the patch.


Given that my patch doesn't make the current situation any worse, can I commit 
this portion of it to trunk for now, and leave device-side dynamic 
initialization for later?


Bootstrapped on x86_64 with no offloading, G++ testsuite ran with no 
regressions, and no regressions in the libgomp testsuite with Nvidia offloading.


Thanks,

Kwok
From 0348b149474d0922d79209705e6777e7af271e0d Mon Sep 17 00:00:00 2001
From: Kwok Cheung Yeung 
Date: Wed, 18 Nov 2020 13:54:01 -0800
Subject: [PATCH] openmp: Implicitly add 'declare target' directives for
 dynamic initializers in C++

2020-11-18  Kwok Cheung Yeung  

gcc/
* langhooks-def.h (lhd_get_decl_init): New.
(LANG_HOOKS_GET_DECL_INIT): New.
(LANG_HOOKS_DECLS): Add LANG_HOOKS_GET_DECL_INIT.
* langhooks.h (struct lang_hooks_for_decls): Add get_decl_init.
* omp-offload.c (omp_discover_declare_target_var_r): Use
get_decl_init langhook in place of DECL_INITIAL.

gcc/cp/
* cp-lang.c (cxx_get_decl_init): New.
(LANG_HOOKS_GET_DECL_INIT): New.
* cp-tree.h (dynamic_initializers): New.
* decl.c (dynamic_initializers): New.
* decl2.c (c_parse_final_cleanups): Copy vars into
dynamic_initializers.

gcc/testsuite/
* g++.dg/gomp/declare-target-3.C: New.
---
 gcc/cp/cp-lang.c | 24 +
 gcc

Re: [PATCH] openmp: Retire nest-var ICV

2020-11-18 Thread Kwok Cheung Yeung

On 18/11/2020 11:41 am, Jakub Jelinek wrote:

On Thu, Nov 12, 2020 at 10:44:35PM +, Kwok Cheung Yeung wrote:

+  /* OMP_NESTED is deprecated in OpenMP 5.0.  */
+  if (parse_boolean ("OMP_NESTED", ))
+   gomp_global_icv.max_active_levels_var =
+   nested ? gomp_supported_active_levels : 1;


Formatting - = should be on the next line, indented 2 columns further from
gomp_global_icv.



Fixed.


  int
  omp_get_nested (void)
  {
struct gomp_task_icv *icv = gomp_icv (false);
-  return icv->nest_var;
+  return icv->max_active_levels_var > 1
+  && icv->max_active_levels_var > omp_get_active_level ();


Formatting, should be:
   return (icv->max_active_levels_var > 1
  && icv->max_active_levels_var > omp_get_active_level ());



Fixed.


@@ -118,19 +122,21 @@ omp_get_thread_limit (void)
  void
  omp_set_max_active_levels (int max_levels)
  {
+  struct gomp_task_icv *icv = gomp_icv (false);


Should be gomp_icv (true), because it modifies the ICVs rather than
just querying them.  And perhaps move it inside of the if (max_levels >= 0)
if.


Done.


So, let's change gomp_supported_active_levels to say 255 and use
   bool dyn_var;
   unsigned char max_active_levels_var;
   char bind_var;



Done (though I used UCHAR_MAX instead of 255). The change in type requires 
changing a format specifier from %lu to %u in handle_omp_display_env, and the 
use of a temporary when parsing OMP_MAX_ACTIVE_LEVELS in initialize_env.


If there are no objections, I will commit this to master and OG10 shortly. 
Bootstrapped on x86_64 with no offloading, and tested libgomp with Nvidia 
offloading with no regressions.


Thanks

Kwok
From a75481979c86aa1da5b5da641fc776bc71d156f7 Mon Sep 17 00:00:00 2001
From: Kwok Cheung Yeung 
Date: Wed, 18 Nov 2020 10:02:00 -0800
Subject: [PATCH] openmp: Retire nest-var ICV for OpenMP 5.1

This removes the nest-var ICV, expressing nesting in terms of the
max-active-levels-var ICV instead.  The max-active-levels-var ICV
is now per data environment rather than per device.

2020-11-18  Kwok Cheung Yeung  

libgomp/
* env.c (gomp_global_icv): Remove nest_var field.  Add
max_active_levels_var field.
(gomp_max_active_levels_var): Remove.
(parse_boolean): Return true on success.
(handle_omp_display_env): Express OMP_NESTED in terms of
max_active_levels_var.  Change format specifier for
max_active_levels_var.
(initialize_env): Set max_active_levels_var from
OMP_MAX_ACTIVE_LEVELS, OMP_NESTED, OMP_NUM_THREADS and
OMP_PROC_BIND.
* icv.c (omp_set_nested): Express in terms of
max_active_levels_var.
(omp_get_nested): Likewise.
(omp_set_max_active_levels): Use max_active_levels_var field instead
of gomp_max_active_levels_var.
(omp_get_max_active_levels): Likewise.
* libgomp.h (struct gomp_task_icv): Remove nest_var field.  Add
max_active_levels_var field.
(gomp_supported_active_levels): Set to UCHAR_MAX.
(gomp_max_active_levels_var): Delete.
* libgomp.texi (omp_get_nested): Update documentation.
(omp_set_nested): Likewise.
(OMP_MAX_ACTIVE_LEVELS): Likewise.
(OMP_NESTED): Likewise.
(OMP_NUM_THREADS): Likewise.
(OMP_PROC_BIND): Likewise.
* parallel.c (gomp_resolve_num_threads): Replace reference
to nest_var with max_active_levels_var.  Use max_active_levels_var
field instead of gomp_max_active_levels_var.
---
 libgomp/env.c| 44 ++
 libgomp/icv.c| 17 ++-
 libgomp/libgomp.h|  5 ++---
 libgomp/libgomp.texi | 60 ++--
 libgomp/parallel.c   |  4 ++--
 5 files changed, 90 insertions(+), 40 deletions(-)

diff --git a/libgomp/env.c b/libgomp/env.c
index ab22525..5a49ae6 100644
--- a/libgomp/env.c
+++ b/libgomp/env.c
@@ -68,12 +68,11 @@ struct gomp_task_icv gomp_global_icv = {
   .run_sched_chunk_size = 1,
   .default_device_var = 0,
   .dyn_var = false,
-  .nest_var = false,
+  .max_active_levels_var = 1,
   .bind_var = omp_proc_bind_false,
   .target_data = NULL
 };
 
-unsigned long gomp_max_active_levels_var = gomp_supported_active_levels;
 bool gomp_cancel_var = false;
 enum gomp_target_offload_t gomp_target_offload_var
   = GOMP_TARGET_OFFLOAD_DEFAULT;
@@ -959,16 +958,17 @@ parse_spincount (const char *name, unsigned long long 
*pvalue)
 }
 
 /* Parse a boolean value for environment variable NAME and store the
-   result in VALUE.  */
+   result in VALUE.  Return true if one was present and it was
+   successfully parsed.  */
 
-static void
+static bool
 parse_boolean (const char *name, bool *value)
 {
   const char *env;
 
   env = getenv (name);
   if (env == NULL)
-return;
+return false;
 
   while (isspace ((unsigned char) *env))
 ++env;
@@ -987,7 +987,11 @@ parse_b

[committed] [OG10] Backport OpenMP 5.0 features from master

2020-11-13 Thread Kwok Cheung Yeung

Hello

I have backported a couple of patches related to OpenMP 5.0 features from master 
to the devel/omp/gcc-10 branch. These are:


8949b985dbaf07d433bd57d2883e1e5414f20e75: openmp: Add support for the 
omp_get_supported_active_levels runtime library routine


445567b22a3c535be0b1861b393e9a0b050f2b1e: libgomp: Amend documentation for 
omp_get_max_active_levels and omp_get_supported_active_levels


1bfc07d150790fae93184a79a7cce897655cb37b: openmp: Implement support for 
OMP_TARGET_OFFLOAD environment variable


35f258f4bbba7fa044f90b4f14d1bc942db58089: libgomp: Fix up bootstrap in 
libgomp/target.c due to false positive warning


121a8812c45b3155ccbd268b000ad00a778e81e8: libgomp: Hopefully avoid false 
positive warnings in env.c on solaris


74c9882b80bda50b37c9555498de7123c6bdb9e4: openmp: Change omp_get_initial_device 
() to match OpenMP 5.1 requirements


17c5b7e1dc47bab6e6cedbf4b2d88cef3283533e: openmp: Add test for 
OMP_TARGET_OFFLOAD=mandatory for cases where it must not fail


10508db867934264bbc2578f1f454c19fa558fd3: openmp: Mark deprecated symbols in 
OpenMP 5.0


I have tested that these cause no regressions in the libgomp testsuite with both 
AMD GCN and Nvidia offloading.


Kwok


Re: [PATCH] openmp: Retire nest-var ICV

2020-11-12 Thread Kwok Cheung Yeung

On 10/11/2020 6:01 pm, Jakub Jelinek wrote:

One thing is that max-active-levels-var in 5.0 is per-device,
but in 5.1 per-data environment.  The question is if we should implement
the problematic 5.0 way or the 5.1 one.  E.g.:
#include 
#include 

int
main ()
{
   #pragma omp parallel
   {
 omp_set_nested (1);
 #pragma omp parallel num_threads(2)
 printf ("Hello, world!\n");
   }
}
which used to be valid in 4.5 (where nest-var used to be per-data
environment) is in 5.0 racy (and in 5.1 will not be racy again).
Though, as these are deprecated APIs, perhaps we can just do the 5.0 way for
now.


Since max-active-levels-var is still current in 5.1, I guess we might as well do 
it properly :-). I have now placed max-active-levels-var into gomp_task_icv. The 
definition of omp_get_nested in 5.1 refers to the active-level-var ICV which is 
currently not implemented, so the comparison is against omp_get_active_level() 
instead.



--- a/libgomp/libgomp.texi
+++ b/libgomp/libgomp.texi
@@ -489,8 +489,11 @@ represent their language-specific counterparts.
  
  Nested parallel regions may be initialized at startup by the

  @env{OMP_NESTED} environment variable or at runtime using
-@code{omp_set_nested}.  If undefined, nested parallel regions are
-disabled by default.
+@code{omp_set_nested}.  Setting the maximum number of nested
+regions to above one using the @env{OMP_MAX_ACTIVE_LEVELS}
+environment variable or @code{omp_set_max_active_levels} will
+also enable nesting.  If undefined, nested parallel regions
+are disabled by default.


This doesn't really describe what env.c does.  If undefined, then
if OMP_NESTED is defined, it will be folloed, and if neither is
defined, the code sets the default based on
"OMP_NUM_THREADS or OMP_PROC_BIND is set to a
comma-separated list of more than one value"
as the spec says and only is disabled otherwise.



Similarly.



Again.


I have changed these to more accurately describe what is happening. The 
descriptions are starting to get rather verbose though...



--- a/libgomp/testsuite/libgomp.c/target-5.c
+++ b/libgomp/testsuite/libgomp.c/target-5.c


Why does this testcase need updates?
It doesn't seem to use omp_[sg]et_max_active_levels and so I don't see
why it couldn't use omp_[sg]et_nested.



The problem is with max-active-levels-var (which nesting is now in terms of) 
being per device rather than per data environment. The test expects the nested 
setting to go back to its previous value after leaving a DE that sets it to 
something else.


Anyway, with max-active-levels-var now being per data environment, that is all 
moot now, and the test can remain unchanged.


Is this version okay for trunk? Bootstrapped on x86_64 and libgomp tested with 
no regressions with nvptx offloading.


Thanks

Kwok
commit bcaa3dbf1f130e3a2c7e6033a10be3f61221a951
Author: Kwok Cheung Yeung 
Date:   Thu Nov 12 13:42:28 2020 -0800

openmp: Retire nest-var ICV for OpenMP 5.1

This removes the nest-var ICV, expressing nesting in terms of the
max-active-levels-var ICV instead.  The max-active-levels-var ICV
is now per data environment rather than per device.
    
    2020-11-12  Kwok Cheung Yeung  

libgomp/
* env.c (gomp_global_icv): Remove nest_var field.  Add
max_active_levels_var field.
(gomp_max_active_levels_var): Remove.
(parse_boolean): Return true on success.
(handle_omp_display_env): Express OMP_NESTED in terms of
max_active_levels_var.
(initialize_env): Set max_active_levels_var from
OMP_MAX_ACTIVE_LEVELS, OMP_NESTED, OMP_NUM_THREADS and
OMP_PROC_BIND.
* icv.c (omp_set_nested): Express in terms of
max_active_levels_var.
(omp_get_nested): Likewise.
(omp_set_max_active_levels): Use max_active_levels_var field instead
of gomp_max_active_levels_var.
(omp_get_max_active_levels): Likewise.
* libgomp.h (struct gomp_task_icv): Remove nest_var field.  Add
max_active_levels_var field.
(gomp_max_active_levels_var): Delete.
* libgomp.texi (omp_get_nested): Update documentation.
(omp_set_nested): Likewise.
(OMP_MAX_ACTIVE_LEVELS): Likewise.
(OMP_NESTED): Likewise.
(OMP_NUM_THREADS): Likewise.
(OMP_PROC_BIND): Likewise.
* parallel.c (gomp_resolve_num_threads): Replace reference
to nest_var with max_active_levels_var.  Use max_active_levels_var
field instead of gomp_max_active_levels_var.

diff --git a/libgomp/env.c b/libgomp/env.c
index ab22525..b8ed1bd 100644
--- a/libgomp/env.c
+++ b/libgomp/env.c
@@ -68,12 +68,11 @@ struct gomp_task_icv gomp_global_icv = {
   .run_sched_chunk_size = 1,
   .default_device_var = 0,
   .dyn_var = false,
-  .nest_var = false,
+  .max_active_levels_var = 1,
   .bind_var = omp_proc_bind_false,
   .target_data = NULL
 };
 
-unsigned long gomp_max_active_levels_var = gomp_supported_

[PATCH] [WIP] openmp: Add OpenMP 5.0 task detach clause support

2020-11-11 Thread Kwok Cheung Yeung

Hello

This is a WIP implementation of the OpenMP 5.0 task detach clause. The task 
construct can now take a detach clause, passing in a variable of type 
omp_event_handle_t. When the construct is encountered, space for an event is 
allocated and the event variable is set to point to the new event. When the task 
is run, it is not complete until a new function omp_fulfill_event has been 
called on the event variable, either in the task itself or in another thread of 
execution.


lower_detach_clause generates code to call GOMP_new_event, which allocates, 
initializes and returns a pointer to a gomp_allow_completion_event struct. The 
return value is then type-cast to a omp_event_handle_t and assigned to the event 
variable, before the data environment for the task construct is set up.


The event variable is passed into the call to GOMP_task, where it is assigned to 
a field in the gomp_task struct. If the task is not deferred, then it will wait 
for the detach event for be fulfilled inside GOMP_task, otherwise it needs to be 
handled in omp_barrier_handle_tasks.


When a task finishes in omp_barrier_handle_tasks and the detach event has not 
been fulfilled, it is placed onto a separate queue of unfulfilled tasks before 
the current thread continues with another task. When the current thread has no 
more tasks, then it will remove a task from the queue of unfulfilled tasks and 
wait for it to complete. When it does, it is removed and any dependent tasks are 
requeued for execution.


We cannot simply block after a task with an unfulfilled event has finished 
because in the case where there are more tasks than threads, there is the 
possibility that all the threads will be tied up waiting, while a task that 
results in an event getting fulfilled never gets run, causing execution to stall.


The memory allocated for the event is released when the associated task is 
destroyed.


Issues that I can see with the current implementation at the moment are:

- No error checking at the front-end.
- The memory for the event is not mapped on the target. This means that if 
omp_fulfill_event is called from an 'omp target' section with a target that does 
not share memory with the host, the event will not be fulfilled (and a segfault 
will probably occur).
- The tasks awaiting event fulfillment currently wait until there are no other 
runnable tasks left. A better approach would be to poll (without blocking) the 
waiting tasks whenever any task completes, immediately removing any now-complete 
tasks and requeuing any dependent tasks.


This patchset has only been very lightly tested on a x86-64 host. Any 
comments/thoughts/suggestions on this implementation?


Thanks

Kwok
commit 4c3926d9abb1a7e6089a9098e2099e2d574ebfec
Author: Kwok Cheung Yeung 
Date:   Tue Nov 3 03:06:26 2020 -0800

openmp: Add support for the OpenMP 5.0 task detach clause

2020-11-11  Kwok Cheung Yeung  

gcc/
* builtin-types.def (BT_PTR_SIZED_INT): New primitive type.
(BT_FN_PSINT_VOID): New function type.
(BT_FN_VOID_OMPFN_PTR_OMPCPYFN_LONG_LONG_BOOL_UINT_PTR_INT): Rename
to...
(BT_FN_VOID_OMPFN_PTR_OMPCPYFN_LONG_LONG_BOOL_UINT_PTR_INT_PSINT):
...this.  Add extra argument.
* gimplify.c (gimplify_scan_omp_clauses): Handle OMP_CLAUSE_DETACH.
(gimplify_adjust_omp_clauses): Likewise.
* omp-builtins.def (BUILT_IN_GOMP_TASK): Change function type to
BT_FN_VOID_OMPFN_PTR_OMPCPYFN_LONG_LONG_BOOL_UINT_PTR_INT_PSINT.
(BUILT_IN_GOMP_NEW_EVENT): New.
* omp-expand.c (expand_task_call): Add detach argument when generating
call to GOMP_task.
* omp-low.c (scan_sharing_clauses): Setup data environment for detach
clause.
(lower_detach_clause): New.
(lower_omp_taskreg): Call lower_detach_clause for detach clause.  Add
Gimple statements generated for detach clause.
* tree-core.h (enum omp_clause_code): Add OMP_CLAUSE_DETACH.
* tree-pretty-print.c (dump_omp_clause): Handle OMP_CLAUSE_DETACH.
* tree.c (omp_clause_num_ops): Add entry for OMP_CLAUSE_DETACH.
(omp_clause_code_name): Add entry for OMP_CLAUSE_DETACH.
(walk_tree_1): Handle OMP_CLAUSE_DETACH.
* tree.h (OMP_CLAUSE_DETACH_EXPR): New.

gcc/c-family/
* c-pragma.h (pragma_omp_clause): Add PRAGMA_OMP_CLAUSE_DETACH.
Redefine PRAGMA_OACC_CLAUSE_DETACH.

gcc/c/
* c-parser.c (c_parser_omp_clause_detach): New.
(c_parser_omp_all_clauses): Handle PRAGMA_OMP_CLAUSE_DETACH clause.
(OMP_TASK_CLAUSE_MASK): Add mask for PRAGMA_OMP_CLAUSE_DETACH.
* c-typeck.c (c_finish_omp_clauses): Handle PRAGMA_OMP_CLAUSE_DETACH
clause.

gcc/cp/
* parser.c (cp_parser_omp_all_clauses): Handle
PRAGMA_OMP_CLAUSE_DETACH.
(OMP_TASK_CLAUSE_MASK): Add mask for PRAGMA_OMP_CLAUSE_DETACH.
* semantics.c

Re: [PATCH] openmp: Retire nest-var ICV

2020-11-09 Thread Kwok Cheung Yeung

On 06/11/2020 8:33 pm, Tobias Burnus wrote:

Hello Kwok, hi Jakub,

On 06.11.20 21:13, Kwok Cheung Yeung wrote:

In addition to deprecating the omp_(get|set)_nested() functions and OMP_NESTED 
environment variable, OpenMP 5.0 also removes the nest-var ICV altogether, 
defining it in terms of the max-active-levels-var ICV instead. [...]


Shouldn't libgomp/libgomp.texi be also updated?

Tobias


I have added some documentation regarding the relationship between the nesting 
setting and the current maximum number active levels. The documentation does not 
detail ICVs though, so we probably don't need to explicitly state that one is in 
terms of another?


Is this version okay for trunk?

Thanks

Kwok
commit b4feb16f3c84b8f82163a4cbba6a31d55fbb8e5b
Author: Kwok Cheung Yeung 
Date:   Mon Nov 9 09:34:39 2020 -0800

openmp: Retire nest-var ICV for OpenMP 5.0

This removes the nest-var ICV, expressing nesting in terms of the
max-active-levels-var ICV instead.

2020-11-09  Kwok Cheung Yeung  

libgomp/
* env.c (gomp_global_icv): Remove nest_var field.
(gomp_max_active_levels_var): Initialize to 1.
(parse_boolean): Return true on success.
(handle_omp_display_env): Express OMP_NESTED in terms of
gomp_max_active_levels_var.
(initialize_env): Set gomp_max_active_levels_var from
OMP_MAX_ACTIVE_LEVELS, OMP_NESTED, OMP_NUM_THREADS and
OMP_PROC_BIND.
* icv.c (omp_set_nested): Express in terms of
gomp_max_active_levels_var.
(omp_get_nested): Likewise.
* libgomp.h (struct gomp_task_icv): Remove nest_var field.
* libgomp.texi (omp_get_nested): Update documentation.
(omp_set_nested): Likewise.
(OMP_MAX_ACTIVE_LEVELS): Likewise.
(OMP_NESTED): Likewise.
(OMP_NUM_THREADS): Likewise.
(OMP_PROC_BIND): Likewise.
* parallel.c (gomp_resolve_num_threads): Replace reference
to nest_var with gomp_max_active_levels_var.
* testsuite/libgomp.c/target-5.c: Remove additional options.
(main): Remove references to omp_get_nested and omp_set_nested.

diff --git a/libgomp/env.c b/libgomp/env.c
index ab22525..75d0fe2 100644
--- a/libgomp/env.c
+++ b/libgomp/env.c
@@ -68,12 +68,11 @@ struct gomp_task_icv gomp_global_icv = {
   .run_sched_chunk_size = 1,
   .default_device_var = 0,
   .dyn_var = false,
-  .nest_var = false,
   .bind_var = omp_proc_bind_false,
   .target_data = NULL
 };
 
-unsigned long gomp_max_active_levels_var = gomp_supported_active_levels;
+unsigned long gomp_max_active_levels_var = 1;
 bool gomp_cancel_var = false;
 enum gomp_target_offload_t gomp_target_offload_var
   = GOMP_TARGET_OFFLOAD_DEFAULT;
@@ -959,16 +958,17 @@ parse_spincount (const char *name, unsigned long long 
*pvalue)
 }
 
 /* Parse a boolean value for environment variable NAME and store the
-   result in VALUE.  */
+   result in VALUE.  Return true if one was present and it was
+   successfully parsed.  */
 
-static void
+static bool
 parse_boolean (const char *name, bool *value)
 {
   const char *env;
 
   env = getenv (name);
   if (env == NULL)
-return;
+return false;
 
   while (isspace ((unsigned char) *env))
 ++env;
@@ -987,7 +987,11 @@ parse_boolean (const char *name, bool *value)
   while (isspace ((unsigned char) *env))
 ++env;
   if (*env != '\0')
-gomp_error ("Invalid value for environment variable %s", name);
+{
+  gomp_error ("Invalid value for environment variable %s", name);
+  return false;
+}
+  return true;
 }
 
 /* Parse the OMP_WAIT_POLICY environment variable and return the value.  */
@@ -1252,7 +1256,7 @@ handle_omp_display_env (unsigned long stacksize, int 
wait_policy)
   fprintf (stderr, "  OMP_DYNAMIC = '%s'\n",
   gomp_global_icv.dyn_var ? "TRUE" : "FALSE");
   fprintf (stderr, "  OMP_NESTED = '%s'\n",
-  gomp_global_icv.nest_var ? "TRUE" : "FALSE");
+  gomp_max_active_levels_var > 1 ? "TRUE" : "FALSE");
 
   fprintf (stderr, "  OMP_NUM_THREADS = '%lu", gomp_global_icv.nthreads_var);
   for (i = 1; i < gomp_nthreads_var_list_len; i++)
@@ -1417,16 +1421,11 @@ initialize_env (void)
 
   parse_schedule ();
   parse_boolean ("OMP_DYNAMIC", _global_icv.dyn_var);
-  parse_boolean ("OMP_NESTED", _global_icv.nest_var);
   parse_boolean ("OMP_CANCELLATION", _cancel_var);
   parse_boolean ("OMP_DISPLAY_AFFINITY", _display_affinity_var);
   parse_int ("OMP_DEFAULT_DEVICE", _global_icv.default_device_var, true);
   parse_target_offload ("OMP_TARGET_OFFLOAD", _target_offload_var);
   parse_int ("OMP_MAX_TASK_PRIORITY", _max_task_priority_var, true);
-  parse_unsigned_long ("OMP_MAX_ACTIVE_LEVELS", _max_active_levels_var,
-  true);
-  if (gomp_max_active_

[PATCH] openmp: Retire nest-var ICV

2020-11-06 Thread Kwok Cheung Yeung

Hello

In addition to deprecating the omp_(get|set)_nested() functions and OMP_NESTED 
environment variable, OpenMP 5.0 also removes the nest-var ICV altogether, 
defining it in terms of the max-active-levels-var ICV instead.


This patch removes the ICV, and implements the handling of 
omp_(get|set)_nested() in terms of max-active-levels-var as defined in the spec.


The initial value of max-active-levels-var now depends on the number of items in 
OMP_NUM_THREADS and OMP_PROC_BIND as defined in section 2.5.2 of the spec. 
OMP_NESTED now changes the value of max-active-levels-var. If OMP_NESTED is 
false and OMP_MAX_ACTIVE_LEVELS is > 1, I have opted to use the value specified 
by OMP_MAX_ACTIVE_LEVELS, as OMP_NESTED is deprecated in OpenMP 5.0 (the spec 
says this is implementation defined in section 6.9).


The default value of max-active-levels-var is implementation defined (section 
2.5.2). It was previously set to the maximum supported number, but I think it 
should be 1 now, since OMP_NESTED defaults to false on OpenMP 4.5, and this 
replicates that behaviour.


This change regresses the testcase libgomp.c/target-5.c because nested-var is 
per data environment, while max-active-levels-var is per-device. The change in 
semantics causes the test for the nesting setting to fail, because now any 
changes to the nesting setting apply to the whole device, and not just to the 
current data environment. I just deleted this part of the testing as the test 
looks like it is testing per data environment ICVs.


Bootstrapped on x86_64 with no offloading, and libgomp testing carried out with 
nvptx offloading with no regressions.


Okay for trunk?

Thanks

Kwok
commit aad8afea37b33b4d5836b2b64be8f4dab6d74509
Author: Kwok Cheung Yeung 
Date:   Wed Nov 4 15:34:12 2020 -0800

openmp: Retire nest-var ICV for OpenMP 5.0

This removes the nest-var ICV, expressing nesting in terms of the
max-active-levels-var ICV instead.

2020-11-06  Kwok Cheung Yeung  

libgomp/
* env.c (gomp_global_icv): Remove nest_var field.
(gomp_max_active_levels_var): Initialize to 1.
(parse_boolean): Return true on success.
(handle_omp_display_env): Express OMP_NESTED in terms of
gomp_max_active_levels_var.
(initialize_env): Set gomp_max_active_levels_var from
OMP_MAX_ACTIVE_LEVELS, OMP_NESTED, OMP_NUM_THREADS and
OMP_PROC_BIND.
* icv.c (omp_set_nested): Express in terms of
gomp_max_active_levels_var.
(omp_get_nested): Likewise.
* libgomp.h (struct gomp_task_icv): Remove nest_var field.
* parallel.c (gomp_resolve_num_threads): Replace reference
to nest_var with gomp_max_active_levels_var.
* testsuite/libgomp.c/target-5.c: Remove additional options.
(main): Remove references to omp_get_nested and omp_set_nested.

diff --git a/libgomp/env.c b/libgomp/env.c
index ab22525..75d0fe2 100644
--- a/libgomp/env.c
+++ b/libgomp/env.c
@@ -68,12 +68,11 @@ struct gomp_task_icv gomp_global_icv = {
   .run_sched_chunk_size = 1,
   .default_device_var = 0,
   .dyn_var = false,
-  .nest_var = false,
   .bind_var = omp_proc_bind_false,
   .target_data = NULL
 };
 
-unsigned long gomp_max_active_levels_var = gomp_supported_active_levels;
+unsigned long gomp_max_active_levels_var = 1;
 bool gomp_cancel_var = false;
 enum gomp_target_offload_t gomp_target_offload_var
   = GOMP_TARGET_OFFLOAD_DEFAULT;
@@ -959,16 +958,17 @@ parse_spincount (const char *name, unsigned long long 
*pvalue)
 }
 
 /* Parse a boolean value for environment variable NAME and store the
-   result in VALUE.  */
+   result in VALUE.  Return true if one was present and it was
+   successfully parsed.  */
 
-static void
+static bool
 parse_boolean (const char *name, bool *value)
 {
   const char *env;
 
   env = getenv (name);
   if (env == NULL)
-return;
+return false;
 
   while (isspace ((unsigned char) *env))
 ++env;
@@ -987,7 +987,11 @@ parse_boolean (const char *name, bool *value)
   while (isspace ((unsigned char) *env))
 ++env;
   if (*env != '\0')
-gomp_error ("Invalid value for environment variable %s", name);
+{
+  gomp_error ("Invalid value for environment variable %s", name);
+  return false;
+}
+  return true;
 }
 
 /* Parse the OMP_WAIT_POLICY environment variable and return the value.  */
@@ -1252,7 +1256,7 @@ handle_omp_display_env (unsigned long stacksize, int 
wait_policy)
   fprintf (stderr, "  OMP_DYNAMIC = '%s'\n",
   gomp_global_icv.dyn_var ? "TRUE" : "FALSE");
   fprintf (stderr, "  OMP_NESTED = '%s'\n",
-  gomp_global_icv.nest_var ? "TRUE" : "FALSE");
+  gomp_max_active_levels_var > 1 ? "TRUE" : "FALSE");
 
   fprintf (stderr, "  OMP_NUM_THREADS = '%lu", gomp_global_icv.nthreads_var);
   for (i = 1; i < gomp_nthreads_

Re: deprecations in OpenMP 5.0

2020-11-05 Thread Kwok Cheung Yeung

On 04/11/2020 2:33 pm, Jakub Jelinek wrote:

LGTM, except:


+  omp_lock_hint_contended __GOMP_DEPRECATED_5_0 = omp_sync_hint_contended,
omp_sync_hint_nonspeculative = 4,
-  omp_lock_hint_nonspeculative = omp_sync_hint_nonspeculative,
+  omp_lock_hint_nonspeculative __GOMP_DEPRECATED_5_0 = 
omp_sync_hint_nonspeculative,


The above line is too long and needs wrapping.



Fixed.


But it would be nice to also add -Wno-deprecated to dg-additional-options of
tests that do use those.
Perhaps for testing replace the 201811 temporarily with 201511 and run make
check.



I have run the tests (with _OPENMP >= 201511) and added 
-Wno-deprecated-declarations option to the testcases that trigger the 
deprecation warning.


I also found a bug in the previous version of the patch - C++ doesn't like 
having an attribute come before the throw clause at the end of a function 
declaration. This is now fixed.


Bootstrapped on x86_64 with no offloading, and tested with nvptx offloading. Is 
this version okay for trunk?



--- a/libgomp/omp_lib.f90.in
+++ b/libgomp/omp_lib.f90.in
@@ -644,4 +644,8 @@
end function
  end interface
  
+#if _OPENMP >= 201811

+!GCC$ ATTRIBUTES DEPRECATED :: omp_get_nested, omp_set_nested
+#endif
+
end module omp_lib


Also, what about omp_lib.h?  Do you plan to change it only when we switch
_OPENMP macro?  I mean, we can't rely on preprocessing in that case...



Since we can't rely on having access to the preprocessor, I don't see what else 
we could do at the moment, except maybe extend the DEPRECATED attribute to take 
a condition (openmp_version >= 201811), and not print when false? Probably more 
trouble than it is worth, and it differs from the behaviour of the C attribute.


Kwok
commit a944f2ab445bb226f65239429d13efdf69a98e4b
Author: Kwok Cheung Yeung 
Date:   Thu Nov 5 10:11:23 2020 -0800

openmp: Mark deprecated symbols in OpenMP 5.0

2020-11-05  Ulrich Drepper  
    Kwok Cheung Yeung  

libgomp/
* Makefile.am (%.mod): Add -cpp and -fopenmp to compile flags.
* Makefile.in: Regenerate.
* fortran.c: Wrap uses of omp_set_nested and omp_get_nested with
pragmas to ignore -Wdeprecated-declarations warnings.
* icv.c: Likewise.
* omp.h.in (__GOMP_DEPRECATED_5_0): Define.
Mark omp_lock_hint_* enum values, omp_lock_hint_t, omp_set_nested,
and omp_get_nested with __GOMP_DEPRECATED_5_0.
* omp_lib.f90.in: Mark omp_get_nested and omp_set_nested as
deprecated.
* testsuite/libgomp.c++/affinity-1.C: Add -Wno-deprecated-declarations
to test options.
* testsuite/libgomp.c/affinity-1.c: Likewise.
* testsuite/libgomp.c/affinity-2.c: Likewise.
* testsuite/libgomp.c/appendix-a/a.15.1.c: Likewise.
* testsuite/libgomp.c/lib-1.c: Likewise.
* testsuite/libgomp.c/nested-1.c: Likewise.
* testsuite/libgomp.c/nested-2.c: Likewise.
* testsuite/libgomp.c/nested-3.c: Likewise.
* testsuite/libgomp.c/pr32362-1.c: Likewise.
* testsuite/libgomp.c/pr32362-2.c: Likewise.
* testsuite/libgomp.c/pr32362-3.c: Likewise.
* testsuite/libgomp.c/pr35549.c: Likewise.
* testsuite/libgomp.c/pr42942.c: Likewise.
* testsuite/libgomp.c/pr61200.c: Likewise.
* testsuite/libgomp.c/sort-1.c: Likewise.
* testsuite/libgomp.c/target-5.c: Likewise.
* testsuite/libgomp.c/target-6.c: Likewise.
* testsuite/libgomp.c/teams-1.c: Likewise.
* testsuite/libgomp.c/thread-limit-1.c: Likewise.
* testsuite/libgomp.c/thread-limit-2.c: Likewise.
* testsuite/libgomp.c/thread-limit-4.c: Likewise.
* testsuite/libgomp.fortran/affinity1.f90: Likewise.
* testsuite/libgomp.fortran/lib1.f90: Likewise.
* testsuite/libgomp.fortran/lib2.f: Likewise.
* testsuite/libgomp.fortran/nested1.f90: Likewise.
* testsuite/libgomp.fortran/teams1.f90: Likewise.

diff --git a/libgomp/Makefile.am b/libgomp/Makefile.am
index 586c930..4cf1f58 100644
--- a/libgomp/Makefile.am
+++ b/libgomp/Makefile.am
@@ -92,7 +92,7 @@ openacc_kinds.mod: openacc.mod
 openacc.mod: openacc.lo
:
 %.mod: %.f90
-   $(FC) $(FCFLAGS) -fsyntax-only $<
+   $(FC) $(FCFLAGS) -cpp -fopenmp -fsyntax-only $<
 fortran.lo: libgomp_f.h
 fortran.o: libgomp_f.h
 env.lo: libgomp_f.h
diff --git a/libgomp/Makefile.in b/libgomp/Makefile.in
index 00d5e29..eb868b3 100644
--- a/libgomp/Makefile.in
+++ b/libgomp/Makefile.in
@@ -1382,7 +1382,7 @@ openacc_kinds.mod: openacc.mod
 openacc.mod: openacc.lo
:
 %.mod: %.f90
-   $(FC) $(FCFLAGS) -fsyntax-only $<
+   $(FC) $(FCFLAGS) -cpp -fopenmp -fsyntax-only $<
 fortran.lo: libgomp_f.h
 fortran.o: libgomp_f.h
 env.lo: libgomp_f.h
diff --git a/libgomp/fortran.c b/libgomp/fortran.c
index 029dec1..cd719f9 100644
--- a/libgomp/fortran.c
+++ b/libgomp/fortran.c
@@ -47,10 +47,

Re: deprecations in OpenMP 5.0

2020-11-04 Thread Kwok Cheung Yeung

On 28/10/2020 4:06 pm, Jakub Jelinek wrote:

On Wed, Oct 28, 2020 at 03:41:25PM +, Kwok Cheung Yeung wrote:

What if we made the definition of __GOMP_DEPRECATED in the original patch
conditional on the current value of __OPENMP__? i.e. Something like:

+#if defined(__GNUC__) && __OPENMP__ >= 201811L
+# define __GOMP_DEPRECATED __attribute__((__deprecated__))
+#else
+# define __GOMP_DEPRECATED
+#endif

In that case, __GOMP_DEPRECATED will not do anything until __OPENMP__ is
updated to reflect OpenMP 5.0, but when it is, the functions will
immediately be marked deprecated without any further work.


That could work, but the macro name would need to incorporate the exact
OpenMP version.
Because some APIs can be deprecated in OpenMP 5.0, others in 5.1 or in 5.2
(all to be removed in 6.0), others in 6.0/6.1 etc. to be removed in 7.0 etc.


I've renamed __GOMP_DEPRECATED to __GOMP_DEPRECATED_5_0.



However, GFortran does not support the deprecated attribute, so how should
it behave? My first thought would be to print out a warning message at
runtime the first time a deprecated function is called (printing it out
every time would probably be too annoying), and maybe add an environment
variable that can be set to disable the warning. A similar runtime warning
could also be printed if the OMP_NESTED environment variable is set. Again,
printing these warnings could be surpressed until the value of __OPENMP__ is
bumped up.


I'm against such runtime diagnostics, that is perhaps good for some
sanitization, but not normal usage.  Perhaps better implement deprecated
attribute in gfortran?



I have used Tobias' recently added patch for Fortran deprecation support to mark 
omp_get_nested and omp_set_nested as deprecated. If the omp_lock_hint_* integer 
parameters are marked though, then the deprecation warnings will fire the moment 
omp_lib is used from a Fortran program, even if they are not referenced in the 
progam itself - a bug perhaps?


I have added '-cpp' (for preprocessor support) and '-fopenmp' (for the _OPENMP 
define) to the Makefile when compiling the omp_lib.f90.


Would a warning message be acceptable if OMP_NESTED is used? Obviously this 
cannot be done at compile-time.


Is this patch okay for trunk? We could add the deprecations for omp_lock_hint_* 
later when the deprecations for parameters are fixed. I have checked that it 
bootstraps on x86_64.


Kwok
From 6e8fc46bdcaf44da11d46968a488fdd990ae Mon Sep 17 00:00:00 2001
From: Kwok Cheung Yeung 
Date: Wed, 4 Nov 2020 03:59:44 -0800
Subject: [PATCH] openmp: Mark deprecated symbols in OpenMP 5.0

2020-11-04  Ulrich Drepper  
    Kwok Cheung Yeung  

libgomp/
* Makefile.am (%.mod): Add -cpp and -fopenmp to compile flags.
* Makefile.in: Regenerate.
* fortran.c: Wrap uses of omp_set_nested and omp_get_nested with
pragmas to ignore -Wdeprecated-declarations warnings.
* icv.c: Likewise.
* omp.h.in (__GOMP_DEPRECATED_5_0): Define.
Mark omp_lock_hint_* enum values, omp_lock_hint_t, omp_set_nested,
and omp_get_nested with __GOMP_DEPRECATED_5_0.
* omp_lib.f90.in: Mark omp_get_nested and omp_set_nested as
deprecated.
---
 libgomp/Makefile.am|  2 +-
 libgomp/Makefile.in|  2 +-
 libgomp/fortran.c  | 13 +++--
 libgomp/icv.c  | 10 --
 libgomp/omp.h.in   | 22 ++
 libgomp/omp_lib.f90.in |  4 
 6 files changed, 39 insertions(+), 14 deletions(-)

diff --git a/libgomp/Makefile.am b/libgomp/Makefile.am
index 586c930..4cf1f58 100644
--- a/libgomp/Makefile.am
+++ b/libgomp/Makefile.am
@@ -92,7 +92,7 @@ openacc_kinds.mod: openacc.mod
 openacc.mod: openacc.lo
:
 %.mod: %.f90
-   $(FC) $(FCFLAGS) -fsyntax-only $<
+   $(FC) $(FCFLAGS) -cpp -fopenmp -fsyntax-only $<
 fortran.lo: libgomp_f.h
 fortran.o: libgomp_f.h
 env.lo: libgomp_f.h
diff --git a/libgomp/Makefile.in b/libgomp/Makefile.in
index 00d5e29..eb868b3 100644
--- a/libgomp/Makefile.in
+++ b/libgomp/Makefile.in
@@ -1382,7 +1382,7 @@ openacc_kinds.mod: openacc.mod
 openacc.mod: openacc.lo
:
 %.mod: %.f90
-   $(FC) $(FCFLAGS) -fsyntax-only $<
+   $(FC) $(FCFLAGS) -cpp -fopenmp -fsyntax-only $<
 fortran.lo: libgomp_f.h
 fortran.o: libgomp_f.h
 env.lo: libgomp_f.h
diff --git a/libgomp/fortran.c b/libgomp/fortran.c
index 029dec1..cd719f9 100644
--- a/libgomp/fortran.c
+++ b/libgomp/fortran.c
@@ -47,10 +47,13 @@ ialias_redirect (omp_test_lock)
 ialias_redirect (omp_test_nest_lock)
 # endif
 ialias_redirect (omp_set_dynamic)
-ialias_redirect (omp_set_nested)
-ialias_redirect (omp_set_num_threads)
 ialias_redirect (omp_get_dynamic)
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wdeprecated-declarations"
+ialias_redirect (omp_set_nested)
 ialias_redirect (omp_get_nested)
+#pragma GCC diagnostic pop
+ialias_redirect (omp_set_num_threads)
 ialias_redirect (omp_in_parallel)
 ial

  1   2   3   >