[PATCH v5] libgfortran: Replace mutex with rwlock

2023-05-08 Thread Lipeng Zhu via Fortran
This patch try to introduce the rwlock and split the read/write to
unit_root tree and unit_cache with rwlock instead of the mutex to
increase CPU efficiency. In the get_gfc_unit function, the percentage
to step into the insert_unit function is around 30%, in most instances,
we can get the unit in the phase of reading the unit_cache or unit_root
tree. So split the read/write phase by rwlock would be an approach to
make it more parallel.

BTW, the IPC metrics can gain around 9x in our test
server with 220 cores. The benchmark we used is
https://github.com/rwesson/NEAT

libgcc/ChangeLog:

* gthr-posix.h (__GTHREAD_RWLOCK_INIT): New macro
(__gthrw): New function
(__gthread_rwlock_rdlock): New function
(__gthread_rwlock_tryrdlock): New function
(__gthread_rwlock_wrlock): New function
(__gthread_rwlock_trywrlock): New function
(__gthread_rwlock_unlock): New function

libgfortran/ChangeLog:

* io/async.c (DEBUG_LINE): New
* io/async.h (RWLOCK_DEBUG_ADD): New macro
(CHECK_RDLOCK): New macro
(CHECK_WRLOCK): New macro
(TAIL_RWLOCK_DEBUG_QUEUE): New macro
(IN_RWLOCK_DEBUG_QUEUE): New macro
(RDLOCK): New macro
(WRLOCK): New macro
(RWUNLOCK): New macro
(RD_TO_WRLOCK): New macro
(INTERN_RDLOCK): New macro
(INTERN_WRLOCK): New macro
(INTERN_RWUNLOCK): New macro
* io/io.h (internal_proto): Define unit_rwlock
* io/transfer.c (st_read_done_worker): Relace unit_lock with unit_rwlock
(st_write_done_worker): Relace unit_lock with unit_rwlock
* io/unit.c (get_gfc_unit): Relace unit_lock with unit_rwlock
(if): Relace unit_lock with unit_rwlock
(close_unit_1): Relace unit_lock with unit_rwlock
(close_units): Relace unit_lock with unit_rwlock
(newunit_alloc): Relace unit_lock with unit_rwlock
* io/unix.c (flush_all_units): Relace unit_lock with unit_rwlock

---
v1 -> v2:
Limit the pthread_rwlock usage in libgcc only when __cplusplus isn't defined.

v2 -> v3:
Rebase the patch with trunk branch.

v3 -> v4:
Update the comments.

v4 -> v5:
Fix typos and code formatter.

Reviewed-by: Hongjiu Lu 
Signed-off-by: Lipeng Zhu 
---
 libgcc/gthr-posix.h   |  60 +++
 libgfortran/io/async.c|   4 +
 libgfortran/io/async.h| 154 +-
 libgfortran/io/io.h   |  15 ++--
 libgfortran/io/transfer.c |   8 +-
 libgfortran/io/unit.c |  71 +++---
 libgfortran/io/unix.c |  16 ++--
 7 files changed, 281 insertions(+), 47 deletions(-)

diff --git a/libgcc/gthr-posix.h b/libgcc/gthr-posix.h
index aebcfdd9f4c..73283082997 100644
--- a/libgcc/gthr-posix.h
+++ b/libgcc/gthr-posix.h
@@ -48,6 +48,9 @@ typedef pthread_t __gthread_t;
 typedef pthread_key_t __gthread_key_t;
 typedef pthread_once_t __gthread_once_t;
 typedef pthread_mutex_t __gthread_mutex_t;
+#ifndef __cplusplus
+typedef pthread_rwlock_t __gthread_rwlock_t;
+#endif
 typedef pthread_mutex_t __gthread_recursive_mutex_t;
 typedef pthread_cond_t __gthread_cond_t;
 typedef struct timespec __gthread_time_t;
@@ -58,6 +61,9 @@ typedef struct timespec __gthread_time_t;
 
 #define __GTHREAD_MUTEX_INIT PTHREAD_MUTEX_INITIALIZER
 #define __GTHREAD_MUTEX_INIT_FUNCTION __gthread_mutex_init_function
+#ifndef __cplusplus
+#define __GTHREAD_RWLOCK_INIT PTHREAD_RWLOCK_INITIALIZER
+#endif
 #define __GTHREAD_ONCE_INIT PTHREAD_ONCE_INIT
 #if defined(PTHREAD_RECURSIVE_MUTEX_INITIALIZER)
 #define __GTHREAD_RECURSIVE_MUTEX_INIT PTHREAD_RECURSIVE_MUTEX_INITIALIZER
@@ -135,6 +141,13 @@ __gthrw(pthread_mutexattr_init)
 __gthrw(pthread_mutexattr_settype)
 __gthrw(pthread_mutexattr_destroy)
 
+#ifndef __cplusplus
+__gthrw(pthread_rwlock_rdlock)
+__gthrw(pthread_rwlock_tryrdlock)
+__gthrw(pthread_rwlock_wrlock)
+__gthrw(pthread_rwlock_trywrlock)
+__gthrw(pthread_rwlock_unlock)
+#endif
 
 #if defined(_LIBOBJC) || defined(_LIBOBJC_WEAK)
 /* Objective-C.  */
@@ -885,6 +898,53 @@ __gthread_cond_destroy (__gthread_cond_t* __cond)
   return __gthrw_(pthread_cond_destroy) (__cond);
 }
 
+#ifndef __cplusplus
+static inline int
+__gthread_rwlock_rdlock (__gthread_rwlock_t *__rwlock)
+{
+  if (__gthread_active_p ())
+return __gthrw_(pthread_rwlock_rdlock) (__rwlock);
+  else
+return 0;
+}
+
+static inline int
+__gthread_rwlock_tryrdlock (__gthread_rwlock_t *__rwlock)
+{
+  if (__gthread_active_p ())
+return __gthrw_(pthread_rwlock_tryrdlock) (__rwlock);
+  else
+return 0;
+}
+
+static inline int
+__gthread_rwlock_wrlock (__gthread_rwlock_t *__rwlock)
+{
+  if (__gthread_active_p ())
+return __gthrw_(pthread_rwlock_wrlock) (__rwlock);
+  else
+return 0;
+}
+
+static inline int
+__gthread_rwlock_trywrlock (__gthread_rwlock_t *__rwlock)
+{
+  if (__gthread_active_p ())
+return __gthrw_(pthread_rwlock_trywrlock) (__rwlock);
+  else
+return 0;
+}
+
+static inline int
+__gthread_rwlock_unlock (__gthread_rwlock_t *__rwlock)
+{
+  if (__gthread_active_p ())
+return __gthrw_(pthread_rwlock_unlock) (__rwlock);
+  else
+ 

Re: [PATCH v4] libgfortran: Replace mutex with rwlock

2023-05-08 Thread Zhu, Lipeng via Fortran




On 1/1/1970 8:00 AM, Bernhard Reutner-Fischer wrote:

On Mon,  8 May 2023 17:44:43 +0800
Lipeng Zhu  wrote:


This patch try to introduce the rwlock and split the read/write to
unit_root tree and unit_cache with rwlock instead of the mutex to
increase CPU efficiency. In the get_gfc_unit function, the percentage
to step into the insert_unit function is around 30%, in most
instances, we can get the unit in the phase of reading the unit_cache
or unit_root tree. So split the read/write phase by rwlock would be an
approach to make it more parallel.

BTW, the IPC metrics can gain around 9x in our test server with 220
cores. The benchmark we used is https://github.com/rwesson/NEAT


See commentary typos below.
You did not state if you regression tested the patch?
I use valgrind --tool=helgrind or --tool=drd to test 'make 
check-fortran'. Is it necessary to add an additional unit test for this 
patch?



Other than that it LGTM but i cannot approve it.
Thank you for your kind help for this patch, is there anything that I 
can do or can you help to push this patch forward?





diff --git a/libgfortran/io/async.h b/libgfortran/io/async.h index
ad226c8e856..0033cc74252 100644
--- a/libgfortran/io/async.h
+++ b/libgfortran/io/async.h
@@ -210,6 +210,128 @@
  DEBUG_PRINTF ("%s" DEBUG_RED "ACQ:" DEBUG_NORM " %-30s %78p\n", 
aio_prefix, #mutex,

mutex); \

Thanks, corrected in Patch v5.


} while (0)
  
+#ifdef __GTHREAD_RWLOCK_INIT

+#define RWLOCK_DEBUG_ADD(rwlock) do {  \
+aio_rwlock_debug *n;   \
+n = xmalloc (sizeof(aio_rwlock_debug));\


Missing space before the open brace: sizeof (


Thanks, corrected in Patch v5.


diff --git a/libgfortran/io/unit.c b/libgfortran/io/unit.c index
82664dc5f98..62f1db21d34 100644
--- a/libgfortran/io/unit.c
+++ b/libgfortran/io/unit.c
@@ -33,34 +33,36 @@ see the files COPYING3 and COPYING.RUNTIME
respectively.  If not, see
  
  
  /* IO locking rules:

-   UNIT_LOCK is a master lock, protecting UNIT_ROOT tree and UNIT_CACHE.
+   UNIT_RWLOCK is a master lock, protecting UNIT_ROOT tree and UNIT_CACHE.
+   And use the rwlock to spilt read and write phase to UNIT_ROOT tree
+   and UNIT_CACHE to increase CPU efficiency.


s/spilt/split. Maybe:

Using an rwlock improves efficiency by allowing us to separate readers and 
writers of both UNIT_ROOT
and UNIT_CACHE.


Thanks, corrected in Patch v5.


@@ -350,6 +356,17 @@ retry:
if (c == 0)
break;
  }
+  /* We did not find a unit in the cache nor in the unit list, create a new
+(locked) unit and insert into the unit list and cache.
+Manipulating either or both the unit list and the unit cache requires to
+hold a write-lock [for obvious reasons]:
+1. By separating the read/write lock, it will greatly reduce the contention
+   at the read part, while write part is not always necessary or most
+   unlikely once the unit hit in cache.


+By separating the read/write lock, we will greatly reduce the contention
+on the read part, while the write part is unlikely once the unit hits
+the cache.


+2. We try to balance the implementation complexity and the performance
+   gains that fit into current cases we observed by just using a
+   pthread_rwlock. */


Let's drop 2.


Got it, thanks!

thanks,


Re: [patch, fortran] PR109662 Namelist input with comma after name accepted

2023-05-08 Thread Harald Anlauf via Fortran

Steve,

On 5/8/23 02:13, Steve Kargl via Gcc-patches wrote:

Harald,
Thanks for keeping us honest.  I didn't check what other
separators might cause a problem.

After 2 decades of working on gfortran, I've come to conclusion
that -std=f2018 should be the default.  When f2023 is ratified,
the default becomes -std=f2023.  All GNU fortran extension
should be behind an option, and we should be aggressive
eliminating extensions.

Yes, this means that 'real*4' and similar would require
a -fallow-nonstandard-declaration option.



please don't let us get off-topic.

The issue behind the PR was F2018: 13.11.3.1, Namelist input,
which has

Input for a namelist input statement consists of
(1) optional blanks and namelist comments,
(2) the character & followed immediately by the namelist-group-name as
specified in the NAMELIST statement,
(3) one or more blanks,

where "blanks" was to be interpreted.  Separators are discussed
separately.

Jerry has resolved "," and ";".  Good.

There is another weird issue that is visible in the testcase
output in my previous mail for "!".  Reducing that further now
suggests that the EOF condition of the namelist read of the
single line affects the namelist read of the next multi-line read.

So this one is actually a different bug, likely libgfortran's
internal state.



Re: [PATCH v4] libgfortran: Replace mutex with rwlock

2023-05-08 Thread Bernhard Reutner-Fischer via Fortran
On Mon,  8 May 2023 17:44:43 +0800
Lipeng Zhu  wrote:

> This patch try to introduce the rwlock and split the read/write to
> unit_root tree and unit_cache with rwlock instead of the mutex to
> increase CPU efficiency. In the get_gfc_unit function, the percentage
> to step into the insert_unit function is around 30%, in most instances,
> we can get the unit in the phase of reading the unit_cache or unit_root
> tree. So split the read/write phase by rwlock would be an approach to
> make it more parallel.
> 
> BTW, the IPC metrics can gain around 9x in our test
> server with 220 cores. The benchmark we used is
> https://github.com/rwesson/NEAT

See commentary typos below.
You did not state if you regression tested the patch?
Other than that it LGTM but i cannot approve it.

> diff --git a/libgfortran/io/async.h b/libgfortran/io/async.h
> index ad226c8e856..0033cc74252 100644
> --- a/libgfortran/io/async.h
> +++ b/libgfortran/io/async.h
> @@ -210,6 +210,128 @@
>  DEBUG_PRINTF ("%s" DEBUG_RED "ACQ:" DEBUG_NORM " %-30s %78p\n", 
> aio_prefix, #mutex, mutex); \
>} while (0)
>  
> +#ifdef __GTHREAD_RWLOCK_INIT
> +#define RWLOCK_DEBUG_ADD(rwlock) do {\
> +aio_rwlock_debug *n; \
> +n = xmalloc (sizeof(aio_rwlock_debug));  \

Missing space before the open brace: sizeof (

> diff --git a/libgfortran/io/unit.c b/libgfortran/io/unit.c
> index 82664dc5f98..62f1db21d34 100644
> --- a/libgfortran/io/unit.c
> +++ b/libgfortran/io/unit.c
> @@ -33,34 +33,36 @@ see the files COPYING3 and COPYING.RUNTIME respectively.  
> If not, see
>  
>  
>  /* IO locking rules:
> -   UNIT_LOCK is a master lock, protecting UNIT_ROOT tree and UNIT_CACHE.
> +   UNIT_RWLOCK is a master lock, protecting UNIT_ROOT tree and UNIT_CACHE.
> +   And use the rwlock to spilt read and write phase to UNIT_ROOT tree
> +   and UNIT_CACHE to increase CPU efficiency.

s/spilt/split. Maybe:

Using an rwlock improves efficiency by allowing us to separate readers
and writers of both UNIT_ROOT and UNIT_CACHE.

> @@ -350,6 +356,17 @@ retry:
>if (c == 0)
>   break;
>  }
> +  /* We did not find a unit in the cache nor in the unit list, create a new
> +(locked) unit and insert into the unit list and cache.
> +Manipulating either or both the unit list and the unit cache requires to
> +hold a write-lock [for obvious reasons]:
> +1. By separating the read/write lock, it will greatly reduce the 
> contention
> +   at the read part, while write part is not always necessary or most
> +   unlikely once the unit hit in cache.

+By separating the read/write lock, we will greatly reduce the contention
+on the read part, while the write part is unlikely once the unit hits
+the cache.

> +2. We try to balance the implementation complexity and the performance
> +   gains that fit into current cases we observed by just using a
> +   pthread_rwlock. */

Let's drop 2.
thanks,


[PATCH v4] libgfortran: Replace mutex with rwlock

2023-05-08 Thread Lipeng Zhu via Fortran
This patch try to introduce the rwlock and split the read/write to
unit_root tree and unit_cache with rwlock instead of the mutex to
increase CPU efficiency. In the get_gfc_unit function, the percentage
to step into the insert_unit function is around 30%, in most instances,
we can get the unit in the phase of reading the unit_cache or unit_root
tree. So split the read/write phase by rwlock would be an approach to
make it more parallel.

BTW, the IPC metrics can gain around 9x in our test
server with 220 cores. The benchmark we used is
https://github.com/rwesson/NEAT

libgcc/ChangeLog:

* gthr-posix.h (__GTHREAD_RWLOCK_INIT): New macro
(__gthrw): New function
(__gthread_rwlock_rdlock): New function
(__gthread_rwlock_tryrdlock): New function
(__gthread_rwlock_wrlock): New function
(__gthread_rwlock_trywrlock): New function
(__gthread_rwlock_unlock): New function

libgfortran/ChangeLog:

* io/async.c (DEBUG_LINE): New
* io/async.h (RWLOCK_DEBUG_ADD): New macro
(CHECK_RDLOCK): New macro
(CHECK_WRLOCK): New macro
(TAIL_RWLOCK_DEBUG_QUEUE): New macro
(IN_RWLOCK_DEBUG_QUEUE): New macro
(RDLOCK): New macro
(WRLOCK): New macro
(RWUNLOCK): New macro
(RD_TO_WRLOCK): New macro
(INTERN_RDLOCK): New macro
(INTERN_WRLOCK): New macro
(INTERN_RWUNLOCK): New macro
* io/io.h (internal_proto): Define unit_rwlock
* io/transfer.c (st_read_done_worker): Relace unit_lock with unit_rwlock
(st_write_done_worker): Relace unit_lock with unit_rwlock
* io/unit.c (get_gfc_unit): Relace unit_lock with unit_rwlock
(if): Relace unit_lock with unit_rwlock
(close_unit_1): Relace unit_lock with unit_rwlock
(close_units): Relace unit_lock with unit_rwlock
(newunit_alloc): Relace unit_lock with unit_rwlock
* io/unix.c (flush_all_units): Relace unit_lock with unit_rwlock

---
v1 -> v2:
Limit the pthread_rwlock usage in libgcc only when __cplusplus isn't defined.

v2 -> v3:
Rebase the patch with trunk branch.

v3 -> v4:
Update the comments.

Reviewed-by: Hongjiu Lu 
Signed-off-by: Lipeng Zhu 
---
 libgcc/gthr-posix.h   |  60 +++
 libgfortran/io/async.c|   4 +
 libgfortran/io/async.h| 151 ++
 libgfortran/io/io.h   |  15 ++--
 libgfortran/io/transfer.c |   8 +-
 libgfortran/io/unit.c |  75 +++
 libgfortran/io/unix.c |  16 ++--
 7 files changed, 283 insertions(+), 46 deletions(-)

diff --git a/libgcc/gthr-posix.h b/libgcc/gthr-posix.h
index aebcfdd9f4c..73283082997 100644
--- a/libgcc/gthr-posix.h
+++ b/libgcc/gthr-posix.h
@@ -48,6 +48,9 @@ typedef pthread_t __gthread_t;
 typedef pthread_key_t __gthread_key_t;
 typedef pthread_once_t __gthread_once_t;
 typedef pthread_mutex_t __gthread_mutex_t;
+#ifndef __cplusplus
+typedef pthread_rwlock_t __gthread_rwlock_t;
+#endif
 typedef pthread_mutex_t __gthread_recursive_mutex_t;
 typedef pthread_cond_t __gthread_cond_t;
 typedef struct timespec __gthread_time_t;
@@ -58,6 +61,9 @@ typedef struct timespec __gthread_time_t;
 
 #define __GTHREAD_MUTEX_INIT PTHREAD_MUTEX_INITIALIZER
 #define __GTHREAD_MUTEX_INIT_FUNCTION __gthread_mutex_init_function
+#ifndef __cplusplus
+#define __GTHREAD_RWLOCK_INIT PTHREAD_RWLOCK_INITIALIZER
+#endif
 #define __GTHREAD_ONCE_INIT PTHREAD_ONCE_INIT
 #if defined(PTHREAD_RECURSIVE_MUTEX_INITIALIZER)
 #define __GTHREAD_RECURSIVE_MUTEX_INIT PTHREAD_RECURSIVE_MUTEX_INITIALIZER
@@ -135,6 +141,13 @@ __gthrw(pthread_mutexattr_init)
 __gthrw(pthread_mutexattr_settype)
 __gthrw(pthread_mutexattr_destroy)
 
+#ifndef __cplusplus
+__gthrw(pthread_rwlock_rdlock)
+__gthrw(pthread_rwlock_tryrdlock)
+__gthrw(pthread_rwlock_wrlock)
+__gthrw(pthread_rwlock_trywrlock)
+__gthrw(pthread_rwlock_unlock)
+#endif
 
 #if defined(_LIBOBJC) || defined(_LIBOBJC_WEAK)
 /* Objective-C.  */
@@ -885,6 +898,53 @@ __gthread_cond_destroy (__gthread_cond_t* __cond)
   return __gthrw_(pthread_cond_destroy) (__cond);
 }
 
+#ifndef __cplusplus
+static inline int
+__gthread_rwlock_rdlock (__gthread_rwlock_t *__rwlock)
+{
+  if (__gthread_active_p ())
+return __gthrw_(pthread_rwlock_rdlock) (__rwlock);
+  else
+return 0;
+}
+
+static inline int
+__gthread_rwlock_tryrdlock (__gthread_rwlock_t *__rwlock)
+{
+  if (__gthread_active_p ())
+return __gthrw_(pthread_rwlock_tryrdlock) (__rwlock);
+  else
+return 0;
+}
+
+static inline int
+__gthread_rwlock_wrlock (__gthread_rwlock_t *__rwlock)
+{
+  if (__gthread_active_p ())
+return __gthrw_(pthread_rwlock_wrlock) (__rwlock);
+  else
+return 0;
+}
+
+static inline int
+__gthread_rwlock_trywrlock (__gthread_rwlock_t *__rwlock)
+{
+  if (__gthread_active_p ())
+return __gthrw_(pthread_rwlock_trywrlock) (__rwlock);
+  else
+return 0;
+}
+
+static inline int
+__gthread_rwlock_unlock (__gthread_rwlock_t *__rwlock)
+{
+  if (__gthread_active_p ())
+return __gthrw_(pthread_rwlock_unlock) (__rwlock);
+  else
+return 0;
+}
+#endif
+
 #endif /* 

Re: [PATCH v3] libgfortran: Replace mutex with rwlock

2023-05-08 Thread Zhu, Lipeng via Fortran




On 1/1/1970 8:00 AM, Bernhard Reutner-Fischer wrote:

Hi!

[please do not top-post]


Sure, thanks for the reminder.


On Thu, 20 Apr 2023 21:13:08 +0800
"Zhu, Lipeng"  wrote:


Hi Bernhard,

Thanks for your questions and suggestions.
The rwlock could allow multiple threads to have concurrent read-only
access to the cache/unit list, only a single writer is allowed.


right.


Write lock will not be acquired until all read lock are released.


So i must have confused rwlock with something else, something that allows self 
to hold a read-lock and
upgrade that to a write-lock, purposely starving all successive incoming 
readers. I.e. just toggle your
RD_TO_WRLOCK impl, here, atomically. This proved to be benefical in some 
situations in the past.
Doesn't seem to work with your rwlock, does it

Pthread API doesn't support the upgrade rdlock to wrlock directly, the 
calling thread may deadlock if at the time the call is made it holds the 
read-write lock (whether a read or write lock). 
https://linux.die.net/man/3/pthread_rwlock_wrlock.
That is why we implement RD_TO_WRLOCK like this: release read lock 
firstly and then acquire write lock.



And I didn't change the mutex scope when refactor the code, only make
a more fine-grained distinction for the read/write cache/unit list.


Yes of course, i can see you did that.


I complete the comment according to your template, I will insert the
comment in the source code in next version patch with other refinement
by your suggestions.
"
Now we did not get a unit in cache and unit list, so we need to create
a new unit, and update it to cache and unit list.


s/Now/By now/ or s/Now w/W/ and s/get/find/ "
We did not find a unit in the cache nor in the unit list, create a new
(locked) unit and insert into the unit list and cache.
Manipulating either or both the unit list and the unit cache requires to hold a 
write-lock [for obvious
reasons]"

Superfluous when talking about pthread_rwlock_wrlock since that implies that 
even the process
acquiring the wrlock has to first release it's very own rdlock.


Thanks for the correction, I will update the comments in next v4 patch.


Prior to update the cache and list, we need to release all read locks,
and then immediately to acquire write lock, thus ensure the exclusive
update to the cache and unit list.
Either way, we will manipulate the cache and/or the unit list so we
must take a write lock now.
We don't take the write bit in *addition* to the read lock because:
1. It will needlessly complicate releasing the respective lock;


Under pthread_rwlock_wrlock it will deadlock, so that's wrong?
Drop that point then? If not, what's your reasoning / observation?

Under my lock, you hold the R, additionally take the W and then immediately 
release the R because you
yourself won't read, just write.
But mine's not the pthread_rwlock you talk about, admittedly.

Yes, pthread_rwlock doesn't support upgrade rdlock to wrlock directly, 
so we can’t hold rdlock while trying to acquire wrlock. I will drop the 
first point and update the comment accordingly.



2. By separate the read/write lock, it will greatly reduce the
contention at the read part, while write part is not always necessary
or most unlikely once the unit hit in cache;


We know that.


3. We try to balance the implementation complexity and the performance
gains that fit into current cases we observed.


.. by just using a pthread_rwlock. And that's the top point iff you keep it at 
that. That's a fair step, sure.
BTW, did you look at the RELEASE semantics, respectively the note that one day 
(and now is that very
day), we might improve on the release semantics? Can of course be incremental 
AFAIC

Not quite understand your question about looking at the RELEASE 
semantics. Can you be more specific?



"


If folks agree on this first step then you have my OK with a catchy malloc and 
the discussion recorded
here on the list. A second step would be RELEASE.
And, depending on the underlying capabilities of available locks, further 
tweaks, obviously.

PS: and, please, don't top-post

thanks,



Best Regards,
Zhu, Lipeng

On 1/1/1970 8:00 AM, Bernhard Reutner-Fischer wrote:

On 19 April 2023 09:06:28 CEST, Lipeng Zhu via Fortran  
wrote:

This patch try to introduce the rwlock and split the read/write to
unit_root tree and unit_cache with rwlock instead of the mutex to
increase CPU efficiency. In the get_gfc_unit function, the
percentage to step into the insert_unit function is around 30%, in
most instances, we can get the unit in the phase of reading the
unit_cache or unit_root tree. So split the read/write phase by
rwlock would be an approach to make it more parallel.

BTW, the IPC metrics can gain around 9x in our test server with 220
cores. The benchmark we used is https://github.com/rwesson/NEAT
  
   

+#define RD_TO_WRLOCK(rwlock) \
+  RWUNLOCK (rwlock);\
+  WRLOCK (rwlock);
+#endif
+


   

diff --git a/libgfortran/io/unit.c b/libgfortran/io/unit.c index

Re: [PATCH][stage1] Remove conditionals around free()

2023-05-08 Thread Bernhard Reutner-Fischer via Fortran
On Wed, 1 Mar 2023 16:07:14 -0800
Steve Kargl  wrote:

> On Wed, Mar 01, 2023 at 10:28:56PM +0100, Bernhard Reutner-Fischer via 
> Fortran wrote:
> >  libgfortran/caf/single.c |6 ++
> >  libgfortran/io/async.c   |6 ++
> >  libgfortran/io/format.c  |3 +--
> >  libgfortran/io/transfer.c|6 ++
> >  libgfortran/io/unix.c|3 +--  
> 
> The Fortran ones are OK.
> 

I've pushed the fortran and libgfortran bits as r14-568-gca2f64d5d08c16
thanks,


Re: ping Re: [PATCH 3/3] Fortran: Fix mpz and mpfr memory leaks

2023-05-08 Thread Bernhard Reutner-Fischer via Fortran
On Mon, 17 Apr 2023 15:18:27 -0700
Steve Kargl  wrote:
> On Mon, Apr 17, 2023 at 09:47:50PM +0200, Bernhard Reutner-Fischer via 
> Fortran wrote:
> > On Mon, 03 Apr 2023 23:42:06 +0200
> > Bernhard Reutner-Fischer  wrote:
> >   
> > > On 3 April 2023 21:50:49 CEST, Harald Anlauf  wrote:  
> > > >Hi Bernhard,
> > > >
> > > >there is neither context nor a related PR with a testcase showing
> > > >that this patch fixes issues seen there.
> > > 
> > > Yes, i forgot to mention the PR:
> > > 
> > > PR fortran/68800
> > > 
> > > I did not construct individual test cases but it should be obvious that 
> > > we should not leak these.
> > >   
> > > >
> > > >On 4/2/23 17:05, Bernhard Reutner-Fischer via Gcc-patches wrote:
> > > >> From: Bernhard Reutner-Fischer 
> > > >> 
> > > >> Cc: fortran@gcc.gnu.org
> > > >> 
> > > >> gcc/fortran/ChangeLog:
> > > >> 
> > > >>* array.cc (gfc_ref_dimen_size): Free mpz memory before ICEing.
> > > >>* expr.cc (find_array_section): Fix mpz memory leak.
> > > >>* simplify.cc (gfc_simplify_reshape): Fix mpz memory leaks in
> > > >>error paths.
> > > >>(gfc_simplify_set_exponent): Fix mpfr memory leak.
> > > >> ---
> > > >>   gcc/fortran/array.cc| 3 +++
> > > >>   gcc/fortran/expr.cc | 8 
> > > >>   gcc/fortran/simplify.cc | 7 ++-
> > > >>   3 files changed, 13 insertions(+), 5 deletions(-)
> > > >> 
> > > >> diff --git a/gcc/fortran/array.cc b/gcc/fortran/array.cc
> > > >> index be5eb8b6a0f..8b1e816a859 100644
> > > >> --- a/gcc/fortran/array.cc
> > > >> +++ b/gcc/fortran/array.cc
> > > >> @@ -2541,6 +2541,9 @@ gfc_ref_dimen_size (gfc_array_ref *ar, int 
> > > >> dimen, mpz_t *result, mpz_t *end)
> > > >> return t;
> > > >> 
> > > >>   default:
> > > >> +  mpz_clear (lower);
> > > >> +  mpz_clear (stride);
> > > >> +  mpz_clear (upper);
> > > >> gfc_internal_error ("gfc_ref_dimen_size(): Bad dimen_type");
> > > >>   }
> > > >
> > > >  What is the point of clearing variables before issuing
> > > >  a gfc_internal_error?
> > > 
> > > To make it obvious that we are aware that we allocated these.  
> 
> I must admit I agree with Harald on this portion
> of the diff.  What's the point?  There is alot more
> allocated than just those 3 mzp_t variables when the
> internal error occurs.  For example, gfortran does not
> walk the namespaces and free those before the ICE.
> I suppose silencing valgrind might be sufficient 
> reason for the clutter.  So, ok.

I've dropped this hunk and pushed the rest as r14-567-g2521390dd2f8e5
Harald fixed the leak of expr in gfc_simplify_set_exponent in the
meantime.
thanks,