RE: [PATCH v4] libgfortran: Replace mutex with rwlock

2023-12-06 Thread Zhu, Lipeng
> > [CCing Ian as libgcc maintainer]
> >
> > On Wed, 1 Nov 2023 10:14:37 +
> > "Zhu, Lipeng"  wrote:
> >
> > > > >
> > > > > Hi Lipeng,
> > > > >
> > > > > >>> Sure, as your comments, in the patch V6, I added 3 test
> > > > > >>> cases with OpenMP to test different cases in concurrency
> respectively:
> > > > > >>> 1. find and create unit very frequently to stress read lock
> > > > > >>> and write
> > lock.
> > > > > >>> 2. only access the unit which exist in cache to stress read lock.
> > > > > >>> 3. access the same unit in concurrency.
> > > > > >>> For the third test case, it also help to find a bug:  When
> > > > > >>> unit can't be found in cache nor unit list in read phase,
> > > > > >>> then threads will try to acquire write lock to insert the
> > > > > >>> same unit, this will cause duplicate key
> > > > > >> error.
> > > > > >>> To fix this bug, I get the unit from unit list once again
> > > > > >>> before insert in write
> > > > > >> lock.
> > > > > >>> More details you can refer the patch v6.
> > > > > >>>
> > > > > >>
> > > > > >> Could you help to review this update? I really appreciate
> > > > > >> your
> > assistance.
> > > > > >>
> > > > >
> > > > > > Could you help to review this update?  Any concern will be
> > appreciated.
> > > > >
> > > > > Fortran parts are OK (I think I wrote that already), we need
> > > > > somebody for the non-Fortran parts.
> > > > >
> > > > Hi Thomas,
> > > >
> > > > Thanks for your response. Very appreciate for your patience and help.
> > > >
> > > > > Jakub, could you maybe take a look?
> > > > >
> > > > > Best regards
> > > > >
> > > > >   Thomas
> > > >
> > > > Hi Jakub,
> > > >
> > > > Can you help to take a look at the change for libgcc part that
> > > > added several rwlock macros in libgcc/gthr-posix.h?
> > > >
> > >
> > > Hi Jakub,
> > >
> > > Could you help to review this, any comment will be greatly appreciated.
> >
> > Latest version is at
> > https://inbox.sourceware.org/gcc-patches/20230818031818.2161842-1-
> > lipeng@intel.com/
> >
> Thanks Bernhard.
> 
> Hi Ian,
> Could you help to review the changes for libgcc part?
> Very looking forward to your help.
> 
> > >
> > > > Best Regards,
> > > > Lipeng Zhu
> > >

Hi Jakub, 

Could you help to review this patch for the changes in libgcc/gthr-posix.h.
Just as Thomas commented: "Fortran parts are OK".  We need your 
comments for the non-fortran parts.  Very appreciated for your help.
Latest version is at 
https://inbox.sourceware.org/gcc-patches/20230818031818.2161842-1-lipeng@intel.com/

Lipeng Zhu,
Best Regards.


RE: [PATCH v4] libgfortran: Replace mutex with rwlock

2023-11-23 Thread Zhu, Lipeng
> [CCing Ian as libgcc maintainer]
> 
> On Wed, 1 Nov 2023 10:14:37 +
> "Zhu, Lipeng"  wrote:
> 
> > > >
> > > > Hi Lipeng,
> > > >
> > > > >>> Sure, as your comments, in the patch V6, I added 3 test cases
> > > > >>> with OpenMP to test different cases in concurrency respectively:
> > > > >>> 1. find and create unit very frequently to stress read lock and 
> > > > >>> write
> lock.
> > > > >>> 2. only access the unit which exist in cache to stress read lock.
> > > > >>> 3. access the same unit in concurrency.
> > > > >>> For the third test case, it also help to find a bug:  When
> > > > >>> unit can't be found in cache nor unit list in read phase, then
> > > > >>> threads will try to acquire write lock to insert the same
> > > > >>> unit, this will cause duplicate key
> > > > >> error.
> > > > >>> To fix this bug, I get the unit from unit list once again
> > > > >>> before insert in write
> > > > >> lock.
> > > > >>> More details you can refer the patch v6.
> > > > >>>
> > > > >>
> > > > >> Could you help to review this update? I really appreciate your
> assistance.
> > > > >>
> > > >
> > > > > Could you help to review this update?  Any concern will be
> appreciated.
> > > >
> > > > Fortran parts are OK (I think I wrote that already), we need
> > > > somebody for the non-Fortran parts.
> > > >
> > > Hi Thomas,
> > >
> > > Thanks for your response. Very appreciate for your patience and help.
> > >
> > > > Jakub, could you maybe take a look?
> > > >
> > > > Best regards
> > > >
> > > > Thomas
> > >
> > > Hi Jakub,
> > >
> > > Can you help to take a look at the change for libgcc part that added
> > > several rwlock macros in libgcc/gthr-posix.h?
> > >
> >
> > Hi Jakub,
> >
> > Could you help to review this, any comment will be greatly appreciated.
> 
> Latest version is at
> https://inbox.sourceware.org/gcc-patches/20230818031818.2161842-1-
> lipeng@intel.com/
> 
Thanks Bernhard.

Hi Ian, 
Could you help to review the changes for libgcc part?  
Very looking forward to your help.

> >
> > > Best Regards,
> > > Lipeng Zhu
> >



Re: [PATCH v4] libgfortran: Replace mutex with rwlock

2023-11-02 Thread Bernhard Reutner-Fischer
[CCing Ian as libgcc maintainer]

On Wed, 1 Nov 2023 10:14:37 +
"Zhu, Lipeng"  wrote:

> > >
> > > Hi Lipeng,
> > >  
> > > >>> Sure, as your comments, in the patch V6, I added 3 test cases with
> > > >>> OpenMP to test different cases in concurrency respectively:
> > > >>> 1. find and create unit very frequently to stress read lock and write 
> > > >>> lock.
> > > >>> 2. only access the unit which exist in cache to stress read lock.
> > > >>> 3. access the same unit in concurrency.
> > > >>> For the third test case, it also help to find a bug:  When unit
> > > >>> can't be found in cache nor unit list in read phase, then threads
> > > >>> will try to acquire write lock to insert the same unit, this will
> > > >>> cause duplicate key  
> > > >> error.  
> > > >>> To fix this bug, I get the unit from unit list once again before
> > > >>> insert in write  
> > > >> lock.  
> > > >>> More details you can refer the patch v6.
> > > >>>  
> > > >>
> > > >> Could you help to review this update? I really appreciate your 
> > > >> assistance.
> > > >>  
> > >  
> > > > Could you help to review this update?  Any concern will be appreciated. 
> > > >  
> > >
> > > Fortran parts are OK (I think I wrote that already), we need somebody
> > > for the non-Fortran parts.
> > >  
> > Hi Thomas,
> > 
> > Thanks for your response. Very appreciate for your patience and help.
> >   
> > > Jakub, could you maybe take a look?
> > >
> > > Best regards
> > >
> > >   Thomas  
> > 
> > Hi Jakub,
> > 
> > Can you help to take a look at the change for libgcc part that added several
> > rwlock macros in libgcc/gthr-posix.h?
> >   
> 
> Hi Jakub,
> 
> Could you help to review this, any comment will be greatly appreciated.

Latest version is at
https://inbox.sourceware.org/gcc-patches/20230818031818.2161842-1-lipeng@intel.com/

> 
> > Best Regards,
> > Lipeng Zhu  
> 



RE: [PATCH v4] libgfortran: Replace mutex with rwlock

2023-11-01 Thread Zhu, Lipeng
> >
> > Hi Lipeng,
> >
> > >>> Sure, as your comments, in the patch V6, I added 3 test cases with
> > >>> OpenMP to test different cases in concurrency respectively:
> > >>> 1. find and create unit very frequently to stress read lock and write 
> > >>> lock.
> > >>> 2. only access the unit which exist in cache to stress read lock.
> > >>> 3. access the same unit in concurrency.
> > >>> For the third test case, it also help to find a bug:  When unit
> > >>> can't be found in cache nor unit list in read phase, then threads
> > >>> will try to acquire write lock to insert the same unit, this will
> > >>> cause duplicate key
> > >> error.
> > >>> To fix this bug, I get the unit from unit list once again before
> > >>> insert in write
> > >> lock.
> > >>> More details you can refer the patch v6.
> > >>>
> > >>
> > >> Could you help to review this update? I really appreciate your 
> > >> assistance.
> > >>
> >
> > > Could you help to review this update?  Any concern will be appreciated.
> >
> > Fortran parts are OK (I think I wrote that already), we need somebody
> > for the non-Fortran parts.
> >
> Hi Thomas,
> 
> Thanks for your response. Very appreciate for your patience and help.
> 
> > Jakub, could you maybe take a look?
> >
> > Best regards
> >
> > Thomas
> 
> Hi Jakub,
> 
> Can you help to take a look at the change for libgcc part that added several
> rwlock macros in libgcc/gthr-posix.h?
> 

Hi Jakub,

Could you help to review this, any comment will be greatly appreciated.

> Best Regards,
> Lipeng Zhu



RE: [PATCH v4] libgfortran: Replace mutex with rwlock

2023-10-23 Thread Zhu, Lipeng
> 
> Hi Lipeng,
> 
> >>> Sure, as your comments, in the patch V6, I added 3 test cases with
> >>> OpenMP to test different cases in concurrency respectively:
> >>> 1. find and create unit very frequently to stress read lock and write 
> >>> lock.
> >>> 2. only access the unit which exist in cache to stress read lock.
> >>> 3. access the same unit in concurrency.
> >>> For the third test case, it also help to find a bug:  When unit
> >>> can't be found in cache nor unit list in read phase, then threads
> >>> will try to acquire write lock to insert the same unit, this will
> >>> cause duplicate key
> >> error.
> >>> To fix this bug, I get the unit from unit list once again before
> >>> insert in write
> >> lock.
> >>> More details you can refer the patch v6.
> >>>
> >>
> >> Could you help to review this update? I really appreciate your assistance.
> >>
> 
> > Could you help to review this update?  Any concern will be appreciated.
> 
> Fortran parts are OK (I think I wrote that already), we need somebody for the
> non-Fortran parts.
> 
Hi Thomas,

Thanks for your response. Very appreciate for your patience and help.

> Jakub, could you maybe take a look?
> 
> Best regards
> 
>   Thomas

Hi Jakub,

Can you help to take a look at the change for libgcc part that added 
several rwlock macros in libgcc/gthr-posix.h?

Best Regards,
Lipeng Zhu


Re: [PATCH v4] libgfortran: Replace mutex with rwlock

2023-10-22 Thread Thomas Koenig

Hi Lipeng,


Sure, as your comments, in the patch V6, I added 3 test cases with
OpenMP to test different cases in concurrency respectively:
1. find and create unit very frequently to stress read lock and write lock.
2. only access the unit which exist in cache to stress read lock.
3. access the same unit in concurrency.
For the third test case, it also help to find a bug:  When unit can't
be found in cache nor unit list in read phase, then threads will try
to acquire write lock to insert the same unit, this will cause duplicate key

error.

To fix this bug, I get the unit from unit list once again before insert in write

lock.

More details you can refer the patch v6.



Could you help to review this update? I really appreciate your assistance.




Could you help to review this update?  Any concern will be appreciated.


Fortran parts are OK (I think I wrote that already), we need somebody
for the non-Fortran parts.

Jakub, could you maybe take a look?

Best regards

Thomas



RE: [PATCH v4] libgfortran: Replace mutex with rwlock

2023-10-22 Thread Zhu, Lipeng
> 
> > Hi Thomas,
> >
> > >
> > > Hi Lipeng,
> > >
> > > > May I know any comment or concern on this patch, thanks for your
> > > > time
> > > > 
> > >
> > > Thanks for your patience in getting this reviewed.
> > >
> > > A few remarks / questions.
> > >
> > > Which strategy is used in this implementation, read-preferring or
> > > write- preferring?  And if read-preferring is used, is there a
> > > danger of deadlock if people do unreasonable things?
> > > Maybe you could explain that, also in a comment in the code.
> > >
> >
> > Yes, the implementation use the read-preferring strategy, and comments
> > in code.
> > When adding the test cases, I didn’t meet the situation which may
> > cause the deadlock.
> > Maybe you can give more guidance about that.
> >
> > > Can you add some sort of torture test case(s) which does a lot of
> > > opening/closing/reading/writing, possibly with asynchronous I/O
> > > and/or pthreads, to catch possible problems?  If there is a system
> > > dependency or some race condition, chances are that regression testers
> will catch this.
> > >
> >
> > Sure, as your comments, in the patch V6, I added 3 test cases with
> > OpenMP to test different cases in concurrency respectively:
> > 1. find and create unit very frequently to stress read lock and write lock.
> > 2. only access the unit which exist in cache to stress read lock.
> > 3. access the same unit in concurrency.
> > For the third test case, it also help to find a bug:  When unit can't
> > be found in cache nor unit list in read phase, then threads will try
> > to acquire write lock to insert the same unit, this will cause duplicate key
> error.
> > To fix this bug, I get the unit from unit list once again before insert in 
> > write
> lock.
> > More details you can refer the patch v6.
> >
> 
> Could you help to review this update? I really appreciate your assistance.
> 

Hi Thomas, Bernhard,

Could you help to review this update?  Any concern will be appreciated.

Regards,
Lipeng Zhu
> > > With this, the libgfortran parts are OK, unless somebody else has
> > > more comments, so give this a couple of days.  I cannot approve the
> > > libgcc parts, that would be somebody else (Jakub?)
> > >
> > > Best regards
> > >
> > >   Thomas
> > >
> >
> > Best Regards,
> > Lipeng Zhu
> 
> Best Regards,
> Lipeng Zhu


RE: [PATCH v4] libgfortran: Replace mutex with rwlock

2023-09-14 Thread Zhu, Lipeng via Gcc-patches
> Hi Thomas,
> 
> >
> > Hi Lipeng,
> >
> > > May I know any comment or concern on this patch, thanks for your
> > > time
> > > 
> >
> > Thanks for your patience in getting this reviewed.
> >
> > A few remarks / questions.
> >
> > Which strategy is used in this implementation, read-preferring or
> > write- preferring?  And if read-preferring is used, is there a danger
> > of deadlock if people do unreasonable things?
> > Maybe you could explain that, also in a comment in the code.
> >
> 
> Yes, the implementation use the read-preferring strategy, and comments in
> code.
> When adding the test cases, I didn’t meet the situation which may cause the
> deadlock.
> Maybe you can give more guidance about that.
> 
> > Can you add some sort of torture test case(s) which does a lot of
> > opening/closing/reading/writing, possibly with asynchronous I/O and/or
> > pthreads, to catch possible problems?  If there is a system dependency
> > or some race condition, chances are that regression testers will catch this.
> >
> 
> Sure, as your comments, in the patch V6, I added 3 test cases with OpenMP to
> test different cases in concurrency respectively:
> 1. find and create unit very frequently to stress read lock and write lock.
> 2. only access the unit which exist in cache to stress read lock.
> 3. access the same unit in concurrency.
> For the third test case, it also help to find a bug:  When unit can't be 
> found in
> cache nor unit list in read phase, then threads will try to acquire write 
> lock to
> insert the same unit, this will cause duplicate key error.
> To fix this bug, I get the unit from unit list once again before insert in 
> write lock.
> More details you can refer the patch v6.
> 

Could you help to review this update? I really appreciate your assistance.

> > With this, the libgfortran parts are OK, unless somebody else has more
> > comments, so give this a couple of days.  I cannot approve the libgcc
> > parts, that would be somebody else (Jakub?)
> >
> > Best regards
> >
> > Thomas
> >
> 
> Best Regards,
> Lipeng Zhu

Best Regards,
Lipeng Zhu


RE: [PATCH v4] libgfortran: Replace mutex with rwlock

2023-08-17 Thread Zhu, Lipeng via Gcc-patches
Hi Thomas,

> 
> Hi Lipeng,
> 
> > May I know any comment or concern on this patch, thanks for your time
> > 
> 
> Thanks for your patience in getting this reviewed.
> 
> A few remarks / questions.
> 
> Which strategy is used in this implementation, read-preferring or write-
> preferring?  And if read-preferring is used, is there a danger of deadlock if
> people do unreasonable things?
> Maybe you could explain that, also in a comment in the code.
> 

Yes, the implementation use the read-preferring strategy, and comments in code.
When adding the test cases, I didn’t meet the situation which may cause the 
deadlock.
Maybe you can give more guidance about that.

> Can you add some sort of torture test case(s) which does a lot of
> opening/closing/reading/writing, possibly with asynchronous I/O and/or
> pthreads, to catch possible problems?  If there is a system dependency or
> some race condition, chances are that regression testers will catch this.
> 

Sure, as your comments, in the patch V6, I added 3 test cases with OpenMP to 
test different cases in concurrency respectively:
1. find and create unit very frequently to stress read lock and write lock.
2. only access the unit which exist in cache to stress read lock.
3. access the same unit in concurrency.
For the third test case, it also help to find a bug:  When unit can't be found 
in cache nor unit list in read phase, then threads will try to acquire write 
lock to insert the same unit, this will cause duplicate key error.  
To fix this bug, I get the unit from unit list once again before insert in 
write lock. More details you can refer the patch v6.

> With this, the libgfortran parts are OK, unless somebody else has more
> comments, so give this a couple of days.  I cannot approve the libgcc parts,
> that would be somebody else (Jakub?)
> 
> Best regards
> 
>   Thomas
> 

Best Regards,
Lipeng Zhu


Re: [PATCH v4] libgfortran: Replace mutex with rwlock

2023-05-25 Thread Zhu, Lipeng via Gcc-patches




On 1/1/1970 8:00 AM, Thomas Koenig wrote:

Hi Lipeng,


May I know any comment or concern on this patch, thanks for your time :)



Thanks for your patience in getting this reviewed.

A few remarks / questions.

Which strategy is used in this implementation, read-preferring or 
write-preferring?  And if read-
preferring is used, is there a danger of deadlock if people do unreasonable 
things?
Maybe you could explain that, also in a comment in the code >
Can you add some sort of torture test case(s) which does a lot of 
opening/closing/reading/writing,
possibly with asynchronous I/O and/or pthreads, to catch possible problems?  If 
there is a system
dependency or some race condition, chances are that regression testers will 
catch this.


Hi Thomas,

Thanks for your time for the review.
Sure, I will add test case according to your suggestions and update the 
comment based on the implementation of "read-preferring" strategy.


Thanks,
Lipeng Zhu


With this, the libgfortran parts are OK, unless somebody else has more 
comments, so give this a couple
of days.  I cannot approve the libgcc parts, that would be somebody else 
(Jakub?)

Best regards

Thomas




Re: [PATCH v4] libgfortran: Replace mutex with rwlock

2023-05-24 Thread Thomas Koenig via Gcc-patches

Hi Lipeng,


May I know any comment or concern on this patch, thanks for your time 


Thanks for your patience in getting this reviewed.

A few remarks / questions.

Which strategy is used in this implementation, read-preferring or
write-preferring?  And if read-preferring is used, is there
a danger of deadlock if people do unreasonable things?
Maybe you could explain that, also in a comment in the code.

Can you add some sort of torture test case(s) which does a lot of
opening/closing/reading/writing, possibly with asynchronous
I/O and/or pthreads, to catch possible problems?  If there is a
system dependency or some race condition, chances are that regression
testers will catch this.

With this, the libgfortran parts are OK, unless somebody else has more
comments, so give this a couple of days.  I cannot approve the libgcc
parts, that would be somebody else (Jakub?)

Best regards

Thomas




Re: [PATCH v4] libgfortran: Replace mutex with rwlock

2023-05-22 Thread Zhu, Lipeng via Gcc-patches




On 5/16/2023 3:08 PM, Zhu, Lipeng wrote:



On 5/9/2023 10:32 AM, Zhu, Lipeng wrote:



On 1/1/1970 8:00 AM, Bernhard Reutner-Fischer wrote:

On Mon,  8 May 2023 17:44:43 +0800
Lipeng Zhu  wrote:


This patch try to introduce the rwlock and split the read/write to
unit_root tree and unit_cache with rwlock instead of the mutex to
increase CPU efficiency. In the get_gfc_unit function, the percentage
to step into the insert_unit function is around 30%, in most
instances, we can get the unit in the phase of reading the unit_cache
or unit_root tree. So split the read/write phase by rwlock would be an
approach to make it more parallel.

BTW, the IPC metrics can gain around 9x in our test server with 220
cores. The benchmark we used is https://github.com/rwesson/NEAT


See commentary typos below.
You did not state if you regression tested the patch?
I use valgrind --tool=helgrind or --tool=drd to test 'make 
check-fortran'. Is it necessary to add an additional unit test for 
this patch?



Other than that it LGTM but i cannot approve it.
Thank you for your kind help for this patch, is there anything that I 
can do or can you help to push this patch forward?



Hi Bernhard,

Is there any other refinement that need I to do for this patch?

Thanks.




May I know any comment or concern on this patch, thanks for your time :)




diff --git a/libgfortran/io/async.h b/libgfortran/io/async.h index
ad226c8e856..0033cc74252 100644
--- a/libgfortran/io/async.h
+++ b/libgfortran/io/async.h
@@ -210,6 +210,128 @@
  DEBUG_PRINTF ("%s" DEBUG_RED "ACQ:" DEBUG_NORM " %-30s 
%78p\n", aio_prefix, #mutex,

mutex); \

Thanks, corrected in Patch v5.


    } while (0)
+#ifdef __GTHREAD_RWLOCK_INIT
+#define RWLOCK_DEBUG_ADD(rwlock) do {    \
+    aio_rwlock_debug *n;    \
+    n = xmalloc (sizeof(aio_rwlock_debug));    \


Missing space before the open brace: sizeof (


Thanks, corrected in Patch v5.


diff --git a/libgfortran/io/unit.c b/libgfortran/io/unit.c index
82664dc5f98..62f1db21d34 100644
--- a/libgfortran/io/unit.c
+++ b/libgfortran/io/unit.c
@@ -33,34 +33,36 @@ see the files COPYING3 and COPYING.RUNTIME
respectively.  If not, see
  /* IO locking rules:
-   UNIT_LOCK is a master lock, protecting UNIT_ROOT tree and 
UNIT_CACHE.
+   UNIT_RWLOCK is a master lock, protecting UNIT_ROOT tree and 
UNIT_CACHE.

+   And use the rwlock to spilt read and write phase to UNIT_ROOT tree
+   and UNIT_CACHE to increase CPU efficiency.


s/spilt/split. Maybe:

Using an rwlock improves efficiency by allowing us to separate 
readers and writers of both UNIT_ROOT

and UNIT_CACHE.


Thanks, corrected in Patch v5.


@@ -350,6 +356,17 @@ retry:
    if (c == 0)
  break;
  }
+  /* We did not find a unit in the cache nor in the unit list, 
create a new

+    (locked) unit and insert into the unit list and cache.
+    Manipulating either or both the unit list and the unit cache 
requires to

+    hold a write-lock [for obvious reasons]:
+    1. By separating the read/write lock, it will greatly reduce 
the contention
+   at the read part, while write part is not always necessary 
or most

+   unlikely once the unit hit in cache.


+    By separating the read/write lock, we will greatly reduce the 
contention
+    on the read part, while the write part is unlikely once the unit 
hits

+    the cache.

+    2. We try to balance the implementation complexity and the 
performance

+   gains that fit into current cases we observed by just using a
+   pthread_rwlock. */


Let's drop 2.


Got it, thanks!

thanks,


Re: [PATCH v4] libgfortran: Replace mutex with rwlock

2023-05-16 Thread Zhu, Lipeng via Gcc-patches




On 5/9/2023 10:32 AM, Zhu, Lipeng wrote:



On 1/1/1970 8:00 AM, Bernhard Reutner-Fischer wrote:

On Mon,  8 May 2023 17:44:43 +0800
Lipeng Zhu  wrote:


This patch try to introduce the rwlock and split the read/write to
unit_root tree and unit_cache with rwlock instead of the mutex to
increase CPU efficiency. In the get_gfc_unit function, the percentage
to step into the insert_unit function is around 30%, in most
instances, we can get the unit in the phase of reading the unit_cache
or unit_root tree. So split the read/write phase by rwlock would be an
approach to make it more parallel.

BTW, the IPC metrics can gain around 9x in our test server with 220
cores. The benchmark we used is https://github.com/rwesson/NEAT


See commentary typos below.
You did not state if you regression tested the patch?
I use valgrind --tool=helgrind or --tool=drd to test 'make 
check-fortran'. Is it necessary to add an additional unit test for this 
patch?



Other than that it LGTM but i cannot approve it.
Thank you for your kind help for this patch, is there anything that I 
can do or can you help to push this patch forward?



Hi Bernhard,

Is there any other refinement that need I to do for this patch?

Thanks.





diff --git a/libgfortran/io/async.h b/libgfortran/io/async.h index
ad226c8e856..0033cc74252 100644
--- a/libgfortran/io/async.h
+++ b/libgfortran/io/async.h
@@ -210,6 +210,128 @@
  DEBUG_PRINTF ("%s" DEBUG_RED "ACQ:" DEBUG_NORM " %-30s %78p\n", 
aio_prefix, #mutex,

mutex); \

Thanks, corrected in Patch v5.


    } while (0)
+#ifdef __GTHREAD_RWLOCK_INIT
+#define RWLOCK_DEBUG_ADD(rwlock) do {    \
+    aio_rwlock_debug *n;    \
+    n = xmalloc (sizeof(aio_rwlock_debug));    \


Missing space before the open brace: sizeof (


Thanks, corrected in Patch v5.


diff --git a/libgfortran/io/unit.c b/libgfortran/io/unit.c index
82664dc5f98..62f1db21d34 100644
--- a/libgfortran/io/unit.c
+++ b/libgfortran/io/unit.c
@@ -33,34 +33,36 @@ see the files COPYING3 and COPYING.RUNTIME
respectively.  If not, see
  /* IO locking rules:
-   UNIT_LOCK is a master lock, protecting UNIT_ROOT tree and 
UNIT_CACHE.
+   UNIT_RWLOCK is a master lock, protecting UNIT_ROOT tree and 
UNIT_CACHE.

+   And use the rwlock to spilt read and write phase to UNIT_ROOT tree
+   and UNIT_CACHE to increase CPU efficiency.


s/spilt/split. Maybe:

Using an rwlock improves efficiency by allowing us to separate readers 
and writers of both UNIT_ROOT

and UNIT_CACHE.


Thanks, corrected in Patch v5.


@@ -350,6 +356,17 @@ retry:
    if (c == 0)
  break;
  }
+  /* We did not find a unit in the cache nor in the unit list, 
create a new

+    (locked) unit and insert into the unit list and cache.
+    Manipulating either or both the unit list and the unit cache 
requires to

+    hold a write-lock [for obvious reasons]:
+    1. By separating the read/write lock, it will greatly reduce the 
contention
+   at the read part, while write part is not always necessary or 
most

+   unlikely once the unit hit in cache.


+    By separating the read/write lock, we will greatly reduce the 
contention
+    on the read part, while the write part is unlikely once the unit 
hits

+    the cache.

+    2. We try to balance the implementation complexity and the 
performance

+   gains that fit into current cases we observed by just using a
+   pthread_rwlock. */


Let's drop 2.


Got it, thanks!

thanks,


Re: [PATCH v4] libgfortran: Replace mutex with rwlock

2023-05-08 Thread Zhu, Lipeng via Gcc-patches




On 1/1/1970 8:00 AM, Bernhard Reutner-Fischer wrote:

On Mon,  8 May 2023 17:44:43 +0800
Lipeng Zhu  wrote:


This patch try to introduce the rwlock and split the read/write to
unit_root tree and unit_cache with rwlock instead of the mutex to
increase CPU efficiency. In the get_gfc_unit function, the percentage
to step into the insert_unit function is around 30%, in most
instances, we can get the unit in the phase of reading the unit_cache
or unit_root tree. So split the read/write phase by rwlock would be an
approach to make it more parallel.

BTW, the IPC metrics can gain around 9x in our test server with 220
cores. The benchmark we used is https://github.com/rwesson/NEAT


See commentary typos below.
You did not state if you regression tested the patch?
I use valgrind --tool=helgrind or --tool=drd to test 'make 
check-fortran'. Is it necessary to add an additional unit test for this 
patch?



Other than that it LGTM but i cannot approve it.
Thank you for your kind help for this patch, is there anything that I 
can do or can you help to push this patch forward?





diff --git a/libgfortran/io/async.h b/libgfortran/io/async.h index
ad226c8e856..0033cc74252 100644
--- a/libgfortran/io/async.h
+++ b/libgfortran/io/async.h
@@ -210,6 +210,128 @@
  DEBUG_PRINTF ("%s" DEBUG_RED "ACQ:" DEBUG_NORM " %-30s %78p\n", 
aio_prefix, #mutex,

mutex); \

Thanks, corrected in Patch v5.


} while (0)
  
+#ifdef __GTHREAD_RWLOCK_INIT

+#define RWLOCK_DEBUG_ADD(rwlock) do {  \
+aio_rwlock_debug *n;   \
+n = xmalloc (sizeof(aio_rwlock_debug));\


Missing space before the open brace: sizeof (


Thanks, corrected in Patch v5.


diff --git a/libgfortran/io/unit.c b/libgfortran/io/unit.c index
82664dc5f98..62f1db21d34 100644
--- a/libgfortran/io/unit.c
+++ b/libgfortran/io/unit.c
@@ -33,34 +33,36 @@ see the files COPYING3 and COPYING.RUNTIME
respectively.  If not, see
  
  
  /* IO locking rules:

-   UNIT_LOCK is a master lock, protecting UNIT_ROOT tree and UNIT_CACHE.
+   UNIT_RWLOCK is a master lock, protecting UNIT_ROOT tree and UNIT_CACHE.
+   And use the rwlock to spilt read and write phase to UNIT_ROOT tree
+   and UNIT_CACHE to increase CPU efficiency.


s/spilt/split. Maybe:

Using an rwlock improves efficiency by allowing us to separate readers and 
writers of both UNIT_ROOT
and UNIT_CACHE.


Thanks, corrected in Patch v5.


@@ -350,6 +356,17 @@ retry:
if (c == 0)
break;
  }
+  /* We did not find a unit in the cache nor in the unit list, create a new
+(locked) unit and insert into the unit list and cache.
+Manipulating either or both the unit list and the unit cache requires to
+hold a write-lock [for obvious reasons]:
+1. By separating the read/write lock, it will greatly reduce the contention
+   at the read part, while write part is not always necessary or most
+   unlikely once the unit hit in cache.


+By separating the read/write lock, we will greatly reduce the contention
+on the read part, while the write part is unlikely once the unit hits
+the cache.


+2. We try to balance the implementation complexity and the performance
+   gains that fit into current cases we observed by just using a
+   pthread_rwlock. */


Let's drop 2.


Got it, thanks!

thanks,


Re: [PATCH v4] libgfortran: Replace mutex with rwlock

2023-05-08 Thread Bernhard Reutner-Fischer via Gcc-patches
On Mon,  8 May 2023 17:44:43 +0800
Lipeng Zhu  wrote:

> This patch try to introduce the rwlock and split the read/write to
> unit_root tree and unit_cache with rwlock instead of the mutex to
> increase CPU efficiency. In the get_gfc_unit function, the percentage
> to step into the insert_unit function is around 30%, in most instances,
> we can get the unit in the phase of reading the unit_cache or unit_root
> tree. So split the read/write phase by rwlock would be an approach to
> make it more parallel.
> 
> BTW, the IPC metrics can gain around 9x in our test
> server with 220 cores. The benchmark we used is
> https://github.com/rwesson/NEAT

See commentary typos below.
You did not state if you regression tested the patch?
Other than that it LGTM but i cannot approve it.

> diff --git a/libgfortran/io/async.h b/libgfortran/io/async.h
> index ad226c8e856..0033cc74252 100644
> --- a/libgfortran/io/async.h
> +++ b/libgfortran/io/async.h
> @@ -210,6 +210,128 @@
>  DEBUG_PRINTF ("%s" DEBUG_RED "ACQ:" DEBUG_NORM " %-30s %78p\n", 
> aio_prefix, #mutex, mutex); \
>} while (0)
>  
> +#ifdef __GTHREAD_RWLOCK_INIT
> +#define RWLOCK_DEBUG_ADD(rwlock) do {\
> +aio_rwlock_debug *n; \
> +n = xmalloc (sizeof(aio_rwlock_debug));  \

Missing space before the open brace: sizeof (

> diff --git a/libgfortran/io/unit.c b/libgfortran/io/unit.c
> index 82664dc5f98..62f1db21d34 100644
> --- a/libgfortran/io/unit.c
> +++ b/libgfortran/io/unit.c
> @@ -33,34 +33,36 @@ see the files COPYING3 and COPYING.RUNTIME respectively.  
> If not, see
>  
>  
>  /* IO locking rules:
> -   UNIT_LOCK is a master lock, protecting UNIT_ROOT tree and UNIT_CACHE.
> +   UNIT_RWLOCK is a master lock, protecting UNIT_ROOT tree and UNIT_CACHE.
> +   And use the rwlock to spilt read and write phase to UNIT_ROOT tree
> +   and UNIT_CACHE to increase CPU efficiency.

s/spilt/split. Maybe:

Using an rwlock improves efficiency by allowing us to separate readers
and writers of both UNIT_ROOT and UNIT_CACHE.

> @@ -350,6 +356,17 @@ retry:
>if (c == 0)
>   break;
>  }
> +  /* We did not find a unit in the cache nor in the unit list, create a new
> +(locked) unit and insert into the unit list and cache.
> +Manipulating either or both the unit list and the unit cache requires to
> +hold a write-lock [for obvious reasons]:
> +1. By separating the read/write lock, it will greatly reduce the 
> contention
> +   at the read part, while write part is not always necessary or most
> +   unlikely once the unit hit in cache.

+By separating the read/write lock, we will greatly reduce the contention
+on the read part, while the write part is unlikely once the unit hits
+the cache.

> +2. We try to balance the implementation complexity and the performance
> +   gains that fit into current cases we observed by just using a
> +   pthread_rwlock. */

Let's drop 2.
thanks,


[PATCH v4] libgfortran: Replace mutex with rwlock

2023-05-08 Thread Lipeng Zhu via Gcc-patches
This patch try to introduce the rwlock and split the read/write to
unit_root tree and unit_cache with rwlock instead of the mutex to
increase CPU efficiency. In the get_gfc_unit function, the percentage
to step into the insert_unit function is around 30%, in most instances,
we can get the unit in the phase of reading the unit_cache or unit_root
tree. So split the read/write phase by rwlock would be an approach to
make it more parallel.

BTW, the IPC metrics can gain around 9x in our test
server with 220 cores. The benchmark we used is
https://github.com/rwesson/NEAT

libgcc/ChangeLog:

* gthr-posix.h (__GTHREAD_RWLOCK_INIT): New macro
(__gthrw): New function
(__gthread_rwlock_rdlock): New function
(__gthread_rwlock_tryrdlock): New function
(__gthread_rwlock_wrlock): New function
(__gthread_rwlock_trywrlock): New function
(__gthread_rwlock_unlock): New function

libgfortran/ChangeLog:

* io/async.c (DEBUG_LINE): New
* io/async.h (RWLOCK_DEBUG_ADD): New macro
(CHECK_RDLOCK): New macro
(CHECK_WRLOCK): New macro
(TAIL_RWLOCK_DEBUG_QUEUE): New macro
(IN_RWLOCK_DEBUG_QUEUE): New macro
(RDLOCK): New macro
(WRLOCK): New macro
(RWUNLOCK): New macro
(RD_TO_WRLOCK): New macro
(INTERN_RDLOCK): New macro
(INTERN_WRLOCK): New macro
(INTERN_RWUNLOCK): New macro
* io/io.h (internal_proto): Define unit_rwlock
* io/transfer.c (st_read_done_worker): Relace unit_lock with unit_rwlock
(st_write_done_worker): Relace unit_lock with unit_rwlock
* io/unit.c (get_gfc_unit): Relace unit_lock with unit_rwlock
(if): Relace unit_lock with unit_rwlock
(close_unit_1): Relace unit_lock with unit_rwlock
(close_units): Relace unit_lock with unit_rwlock
(newunit_alloc): Relace unit_lock with unit_rwlock
* io/unix.c (flush_all_units): Relace unit_lock with unit_rwlock

---
v1 -> v2:
Limit the pthread_rwlock usage in libgcc only when __cplusplus isn't defined.

v2 -> v3:
Rebase the patch with trunk branch.

v3 -> v4:
Update the comments.

Reviewed-by: Hongjiu Lu 
Signed-off-by: Lipeng Zhu 
---
 libgcc/gthr-posix.h   |  60 +++
 libgfortran/io/async.c|   4 +
 libgfortran/io/async.h| 151 ++
 libgfortran/io/io.h   |  15 ++--
 libgfortran/io/transfer.c |   8 +-
 libgfortran/io/unit.c |  75 +++
 libgfortran/io/unix.c |  16 ++--
 7 files changed, 283 insertions(+), 46 deletions(-)

diff --git a/libgcc/gthr-posix.h b/libgcc/gthr-posix.h
index aebcfdd9f4c..73283082997 100644
--- a/libgcc/gthr-posix.h
+++ b/libgcc/gthr-posix.h
@@ -48,6 +48,9 @@ typedef pthread_t __gthread_t;
 typedef pthread_key_t __gthread_key_t;
 typedef pthread_once_t __gthread_once_t;
 typedef pthread_mutex_t __gthread_mutex_t;
+#ifndef __cplusplus
+typedef pthread_rwlock_t __gthread_rwlock_t;
+#endif
 typedef pthread_mutex_t __gthread_recursive_mutex_t;
 typedef pthread_cond_t __gthread_cond_t;
 typedef struct timespec __gthread_time_t;
@@ -58,6 +61,9 @@ typedef struct timespec __gthread_time_t;
 
 #define __GTHREAD_MUTEX_INIT PTHREAD_MUTEX_INITIALIZER
 #define __GTHREAD_MUTEX_INIT_FUNCTION __gthread_mutex_init_function
+#ifndef __cplusplus
+#define __GTHREAD_RWLOCK_INIT PTHREAD_RWLOCK_INITIALIZER
+#endif
 #define __GTHREAD_ONCE_INIT PTHREAD_ONCE_INIT
 #if defined(PTHREAD_RECURSIVE_MUTEX_INITIALIZER)
 #define __GTHREAD_RECURSIVE_MUTEX_INIT PTHREAD_RECURSIVE_MUTEX_INITIALIZER
@@ -135,6 +141,13 @@ __gthrw(pthread_mutexattr_init)
 __gthrw(pthread_mutexattr_settype)
 __gthrw(pthread_mutexattr_destroy)
 
+#ifndef __cplusplus
+__gthrw(pthread_rwlock_rdlock)
+__gthrw(pthread_rwlock_tryrdlock)
+__gthrw(pthread_rwlock_wrlock)
+__gthrw(pthread_rwlock_trywrlock)
+__gthrw(pthread_rwlock_unlock)
+#endif
 
 #if defined(_LIBOBJC) || defined(_LIBOBJC_WEAK)
 /* Objective-C.  */
@@ -885,6 +898,53 @@ __gthread_cond_destroy (__gthread_cond_t* __cond)
   return __gthrw_(pthread_cond_destroy) (__cond);
 }
 
+#ifndef __cplusplus
+static inline int
+__gthread_rwlock_rdlock (__gthread_rwlock_t *__rwlock)
+{
+  if (__gthread_active_p ())
+return __gthrw_(pthread_rwlock_rdlock) (__rwlock);
+  else
+return 0;
+}
+
+static inline int
+__gthread_rwlock_tryrdlock (__gthread_rwlock_t *__rwlock)
+{
+  if (__gthread_active_p ())
+return __gthrw_(pthread_rwlock_tryrdlock) (__rwlock);
+  else
+return 0;
+}
+
+static inline int
+__gthread_rwlock_wrlock (__gthread_rwlock_t *__rwlock)
+{
+  if (__gthread_active_p ())
+return __gthrw_(pthread_rwlock_wrlock) (__rwlock);
+  else
+return 0;
+}
+
+static inline int
+__gthread_rwlock_trywrlock (__gthread_rwlock_t *__rwlock)
+{
+  if (__gthread_active_p ())
+return __gthrw_(pthread_rwlock_trywrlock) (__rwlock);
+  else
+return 0;
+}
+
+static inline int
+__gthread_rwlock_unlock (__gthread_rwlock_t *__rwlock)
+{
+  if (__gthread_active_p ())
+return __gthrw_(pthread_rwlock_unlock) (__rwlock);
+  else
+return 0;
+}
+#endif
+
 #endif /*