Re: Performance question concerning chicken flonum vs "foreign flonum"

2021-11-07 Thread Christian Himpe


felix.winkelm...@bevuta.com schrieb am 2021-11-07:
> > Dear Felix,
> >
> > Thank you for the patch. I built the current git head with your patch.
> > After importing chicken.flonum, I get the following error when calling fp*+:
> >

> I'm terribly sorry. I'm an ass, I didn't even test it in the interpreter. 
> Please
> find attached a revised patch.


> felix

Dear felix,

the latest patch works. I extended my test code and here are the results:

without -C -mfma:

csc -O5 -d0 -C -O3 fma-test.scm && ./fma-test
7.998s CPU time, 0/225861 GCs (major/minor), maximum live heap: 30.78 MiB
10.104s CPU time, 0/256410 GCs (major/minor), maximum live heap: 30.78 MiB
10.69s CPU time, 0/311364 GCs (major/minor), maximum live heap: 30.78 MiB

with -C -mfma:

csc -O5 -d0 -C -O3 -C -mfma fma-test.scm && ./fma-test 
7.697s CPU time, 0/238095 GCs (major/minor), maximum live heap: 30.78 MiB
9.135s CPU time, 0/262467 GCs (major/minor), maximum live heap: 30.78 MiB
11.008s CPU time, 0/317460 GCs (major/minor), maximum live heap: 30.78 MiB

It seems the number of GCs is a lot higher than for fp*/fp+ or c99-fma with or 
without fma compiler flag. So currently, there seems to be no benefit 
integrating c99's fma as fp*+ besides a slightly better rounding error. At 
least for me, this comes unexpected.

Thank you for providing the patch. If you want to test something in this regard 
in the future, I am happy to test further patches.

Cheers

Christian



Re: New egg: nng

2021-11-07 Thread Mario Domenech Goulart
Hi Ariela,

On Fri, 05 Nov 2021 09:31:54 -0300 Ariela Wenner  wrote:

> Welp... that's a bummer. I was sure it was a timing issue with the tests.
>
> I'll keep poking at it on different machines to see what I'm missing.
>
> Thanks for giving it a try! Cheers!

Thank you for your efforts and for investigating the issue!  It's nice
that you actually have tests and they seem to be catching issues
(assuming the problem is not in the tests themselves).

All the best.
Mario
-- 
http://parenteses.org/mario



Re: Performance question concerning chicken flonum vs "foreign flonum"

2021-11-07 Thread felix . winkelmann
> Dear Felix,
>
> Thank you for the patch. I built the current git head with your patch.
> After importing chicken.flonum, I get the following error when calling fp*+:
>

I'm terribly sorry. I'm an ass, I didn't even test it in the interpreter. Please
find attached a revised patch.


felix
From 29b7abfd1a990e1fe4fc10f3d2532eadd079151f Mon Sep 17 00:00:00 2001
From: felix 
Date: Sun, 7 Nov 2021 13:48:31 +0100
Subject: [PATCH] Add support for fused-multiply-add

(suggested by Christian Himpe on chicken-users)
---
 NEWS   | 4 
 c-platform.scm | 3 ++-
 chicken.h  | 2 ++
 lfa2.scm   | 2 ++
 library.scm| 6 ++
 manual/Module (chicken flonum) | 3 ++-
 types.db   | 3 +++
 7 files changed, 21 insertions(+), 2 deletions(-)

diff --git a/NEWS b/NEWS
index de01c00e..69fe5054 100644
--- a/NEWS
+++ b/NEWS
@@ -4,6 +4,10 @@
   - Default "cc" on BSD systems for building CHICKEN to avoid ABI problems
 when linking with C++ code.
 
+- Core libraries
+  - Added "fp*+" (fused multiply-add) to "chicken.flonum" module 
+(suggested by Christian Himpe).
+
 5.3.0rc4
 
 - Compiler
diff --git a/c-platform.scm b/c-platform.scm
index 00960c82..e59b1f1c 100644
--- a/c-platform.scm
+++ b/c-platform.scm
@@ -149,7 +149,7 @@
 
 (define-constant +flonum-bindings+
   (map (lambda (x) (symbol-append 'chicken.flonum# x))
-   '(fp/? fp+ fp- fp* fp/ fp> fp< fp= fp>= fp<= fpmin fpmax fpneg fpgcd
+   '(fp/? fp+ fp- fp* fp/ fp> fp< fp= fp>= fp<= fpmin fpmax fpneg fpgcd 
fp*+
 fpfloor fpceiling fptruncate fpround fpsin fpcos fptan fpasin fpacos
 fpatan fpatan2 fpexp fpexpt fplog fpsqrt fpabs fpinteger?)))
 
@@ -652,6 +652,7 @@
 (rewrite 'chicken.flonum#fp/? 16 2 "C_a_i_flonum_quotient_checked" #f 
words-per-flonum)
 (rewrite 'chicken.flonum#fpneg 16 1 "C_a_i_flonum_negate" #f words-per-flonum)
 (rewrite 'chicken.flonum#fpgcd 16 2 "C_a_i_flonum_gcd" #f words-per-flonum)
+(rewrite 'chicken.flonum#fp*+ 16 3 "C_a_i_flonum_multiply_add" #f 
words-per-flonum)
 
 (rewrite 'scheme#zero? 5 "C_eqp" 0 'fixnum)
 (rewrite 'scheme#zero? 2 1 "C_u_i_zerop2" #f)
diff --git a/chicken.h b/chicken.h
index 7e51a38f..ba075471 100644
--- a/chicken.h
+++ b/chicken.h
@@ -1204,6 +1204,7 @@ typedef void (C_ccall *C_proc)(C_word, C_word *) C_noret;
 #define C_a_i_flonum_plus(ptr, c, n1, n2) C_flonum(ptr, C_flonum_magnitude(n1) 
+ C_flonum_magnitude(n2))
 #define C_a_i_flonum_difference(ptr, c, n1, n2) C_flonum(ptr, 
C_flonum_magnitude(n1) - C_flonum_magnitude(n2))
 #define C_a_i_flonum_times(ptr, c, n1, n2) C_flonum(ptr, 
C_flonum_magnitude(n1) * C_flonum_magnitude(n2))
+#define C_a_i_flonum_multiply_add(ptr, c, n1, n2, n3) C_flonum(ptr, 
fma(C_flonum_magnitude(n1), C_flonum_magnitude(n2), C_flonum_magnitude(n3)))
 #define C_a_i_flonum_quotient(ptr, c, n1, n2) C_flonum(ptr, 
C_flonum_magnitude(n1) / C_flonum_magnitude(n2))
 #define C_a_i_flonum_negate(ptr, c, n)  C_flonum(ptr, -C_flonum_magnitude(n))
 #define C_a_u_i_flonum_signum(ptr, n, x) (C_flonum_magnitude(x) == 0.0 ? (x) : 
((C_flonum_magnitude(x) < 0.0) ? C_flonum(ptr, -1.0) : C_flonum(ptr, 1.0)))
@@ -1513,6 +1514,7 @@ typedef void (C_ccall *C_proc)(C_word, C_word *) C_noret;
 #define C_ub_i_flonum_difference(x, y)  ((x) - (y))
 #define C_ub_i_flonum_times(x, y)   ((x) * (y))
 #define C_ub_i_flonum_quotient(x, y)((x) / (y))
+#define C_ub_i_flonum_multiply_add(x, y, z)fma((x), (y), (z))
 
 #define C_ub_i_flonum_equalp(n1, n2)C_mk_bool((n1) == (n2))
 #define C_ub_i_flonum_greaterp(n1, n2)  C_mk_bool((n1) > (n2))
diff --git a/lfa2.scm b/lfa2.scm
index 45057578..e4bd308e 100644
--- a/lfa2.scm
+++ b/lfa2.scm
@@ -191,6 +191,7 @@
 ("C_a_i_flonum_sqrt" float)
 ("C_a_i_flonum_tan" float)
 ("C_a_i_flonum_times" float)
+("C_a_i_flonum_multiply_add" float)
 ("C_a_i_flonum_truncate" float)
 ("C_a_u_i_f64vector_ref" float)
 ("C_a_u_i_f32vector_ref" float)
@@ -201,6 +202,7 @@
   '(("C_a_i_flonum_plus" "C_ub_i_flonum_plus" op)
 ("C_a_i_flonum_difference" "C_ub_i_flonum_difference" op)
 ("C_a_i_flonum_times" "C_ub_i_flonum_times" op)
+("C_a_i_flonum_multiply_add" "C_ub_i_flonum_multiply_add" op)
 ("C_a_i_flonum_quotient" "C_ub_i_flonum_quotient" op)
 ("C_flonum_equalp" "C_ub_i_flonum_equalp" pred)
 ("C_flonum_greaterp" "C_ub_i_flonum_greaterp" pred)
diff --git a/library.scm b/library.scm
index 6c6a6942..45182e84 100644
--- a/library.scm
+++ b/library.scm
@@ -1590,6 +1590,12 @@ EOF
   (fp-check-flonums x y 'fp/)
   (##core#inline_allocate ("C_a_i_flonum_quotient" 4) x y) )
 
+(define (fp*+ x y z) 
+  (unless (and (flonum? x) (flonum? y) (flonum? z))
+(##sys#error-hook (foreign-value "C_BAD_ARGUMENT_TYPE_NO_FLONUM_ERROR" int)
+  'fp*+ x y z) )
+  (##core#inline_allocate ("C_a_i_flonum_multiply_add" 4) x y z) )
+
 (define (fpgcd x y)
   (fp-check-flonums x y 'fpgcd)
   (##core#inline_allocate 

Re: Performance question concerning chicken flonum vs "foreign flonum"

2021-11-07 Thread Christian Himpe
Dear Felix,

Thank you for the patch. I built the current git head with your patch.
After importing chicken.flonum, I get the following error when calling fp*+:

#;2> (fp*+ 1.0 2.0 3.0)

Error: unbound variable: g18021803

Call history:

  (fp*+ 1.0 2.0 3.0)
(fp*+ 1.0 2.0 3.0)<--

But, fp*+ is found:

#;2> (procedure? fp*+)
#t

I performed the following build steps:

git clone git://code.call-cc.org/chicken-core
cd, mv etc.
patch -p1 < 0001-Add-support-for-fused-multiply-add.patch
make PREFIX=XXX PLATFORM=linux OPTIMIZE_FOR_SPEED=1 
CHICKEN=XXX/chicken52/bin/chicken
make PREFIX=XXX PLATFORM=linux install

Best

Christian



felix.winkelm...@bevuta.com schrieb am 2021-11-07:
> Hi!

> Here a patch against the current git HEAD, adding support for "fp*+". Please 
> give it a try, if you want.
> This is experimental, if people consider this worthwhile, I can submit it for 
> adding to the core
> system. Note that you still may need passing extra C-compiler options to 
> enable inlining of
> the fma(3) call.


> cheers,
> felix

-- 
Dr. rer. nat. Christian Himpe
University of Münster / Applied Mathematics Münster
Orléans-Ring 10 / 48149 Münster / Germany
https://himpe.science



Re: Performance question concerning chicken flonum vs "foreign flonum"

2021-11-07 Thread felix . winkelmann
Hi!

Here a patch against the current git HEAD, adding support for "fp*+". Please 
give it a try, if you want.
This is experimental, if people consider this worthwhile, I can submit it for 
adding to the core
system. Note that you still may need passing extra C-compiler options to enable 
inlining of
the fma(3) call.


cheers,
felix
From 0f9c68a2b3954eb7c7d2a6075d6b4dfa3dcfb2a5 Mon Sep 17 00:00:00 2001
From: felix 
Date: Sun, 7 Nov 2021 13:48:31 +0100
Subject: [PATCH] Add support for fused-multiply-add

(suggested by Christian Himpe on chicken-users)
---
 NEWS   | 4 
 c-platform.scm | 3 ++-
 chicken.h  | 2 ++
 lfa2.scm   | 2 ++
 library.scm| 4 
 manual/Module (chicken flonum) | 3 ++-
 types.db   | 3 +++
 7 files changed, 19 insertions(+), 2 deletions(-)

diff --git a/NEWS b/NEWS
index de01c00e..69fe5054 100644
--- a/NEWS
+++ b/NEWS
@@ -4,6 +4,10 @@
   - Default "cc" on BSD systems for building CHICKEN to avoid ABI problems
 when linking with C++ code.
 
+- Core libraries
+  - Added "fp*+" (fused multiply-add) to "chicken.flonum" module 
+(suggested by Christian Himpe).
+
 5.3.0rc4
 
 - Compiler
diff --git a/c-platform.scm b/c-platform.scm
index 00960c82..e59b1f1c 100644
--- a/c-platform.scm
+++ b/c-platform.scm
@@ -149,7 +149,7 @@
 
 (define-constant +flonum-bindings+
   (map (lambda (x) (symbol-append 'chicken.flonum# x))
-   '(fp/? fp+ fp- fp* fp/ fp> fp< fp= fp>= fp<= fpmin fpmax fpneg fpgcd
+   '(fp/? fp+ fp- fp* fp/ fp> fp< fp= fp>= fp<= fpmin fpmax fpneg fpgcd 
fp*+
 fpfloor fpceiling fptruncate fpround fpsin fpcos fptan fpasin fpacos
 fpatan fpatan2 fpexp fpexpt fplog fpsqrt fpabs fpinteger?)))
 
@@ -652,6 +652,7 @@
 (rewrite 'chicken.flonum#fp/? 16 2 "C_a_i_flonum_quotient_checked" #f 
words-per-flonum)
 (rewrite 'chicken.flonum#fpneg 16 1 "C_a_i_flonum_negate" #f words-per-flonum)
 (rewrite 'chicken.flonum#fpgcd 16 2 "C_a_i_flonum_gcd" #f words-per-flonum)
+(rewrite 'chicken.flonum#fp*+ 16 3 "C_a_i_flonum_multiply_add" #f 
words-per-flonum)
 
 (rewrite 'scheme#zero? 5 "C_eqp" 0 'fixnum)
 (rewrite 'scheme#zero? 2 1 "C_u_i_zerop2" #f)
diff --git a/chicken.h b/chicken.h
index 7e51a38f..ba075471 100644
--- a/chicken.h
+++ b/chicken.h
@@ -1204,6 +1204,7 @@ typedef void (C_ccall *C_proc)(C_word, C_word *) C_noret;
 #define C_a_i_flonum_plus(ptr, c, n1, n2) C_flonum(ptr, C_flonum_magnitude(n1) 
+ C_flonum_magnitude(n2))
 #define C_a_i_flonum_difference(ptr, c, n1, n2) C_flonum(ptr, 
C_flonum_magnitude(n1) - C_flonum_magnitude(n2))
 #define C_a_i_flonum_times(ptr, c, n1, n2) C_flonum(ptr, 
C_flonum_magnitude(n1) * C_flonum_magnitude(n2))
+#define C_a_i_flonum_multiply_add(ptr, c, n1, n2, n3) C_flonum(ptr, 
fma(C_flonum_magnitude(n1), C_flonum_magnitude(n2), C_flonum_magnitude(n3)))
 #define C_a_i_flonum_quotient(ptr, c, n1, n2) C_flonum(ptr, 
C_flonum_magnitude(n1) / C_flonum_magnitude(n2))
 #define C_a_i_flonum_negate(ptr, c, n)  C_flonum(ptr, -C_flonum_magnitude(n))
 #define C_a_u_i_flonum_signum(ptr, n, x) (C_flonum_magnitude(x) == 0.0 ? (x) : 
((C_flonum_magnitude(x) < 0.0) ? C_flonum(ptr, -1.0) : C_flonum(ptr, 1.0)))
@@ -1513,6 +1514,7 @@ typedef void (C_ccall *C_proc)(C_word, C_word *) C_noret;
 #define C_ub_i_flonum_difference(x, y)  ((x) - (y))
 #define C_ub_i_flonum_times(x, y)   ((x) * (y))
 #define C_ub_i_flonum_quotient(x, y)((x) / (y))
+#define C_ub_i_flonum_multiply_add(x, y, z)fma((x), (y), (z))
 
 #define C_ub_i_flonum_equalp(n1, n2)C_mk_bool((n1) == (n2))
 #define C_ub_i_flonum_greaterp(n1, n2)  C_mk_bool((n1) > (n2))
diff --git a/lfa2.scm b/lfa2.scm
index 45057578..e4bd308e 100644
--- a/lfa2.scm
+++ b/lfa2.scm
@@ -191,6 +191,7 @@
 ("C_a_i_flonum_sqrt" float)
 ("C_a_i_flonum_tan" float)
 ("C_a_i_flonum_times" float)
+("C_a_i_flonum_multiply_add" float)
 ("C_a_i_flonum_truncate" float)
 ("C_a_u_i_f64vector_ref" float)
 ("C_a_u_i_f32vector_ref" float)
@@ -201,6 +202,7 @@
   '(("C_a_i_flonum_plus" "C_ub_i_flonum_plus" op)
 ("C_a_i_flonum_difference" "C_ub_i_flonum_difference" op)
 ("C_a_i_flonum_times" "C_ub_i_flonum_times" op)
+("C_a_i_flonum_multiply_add" "C_ub_i_flonum_multiply_add" op)
 ("C_a_i_flonum_quotient" "C_ub_i_flonum_quotient" op)
 ("C_flonum_equalp" "C_ub_i_flonum_equalp" pred)
 ("C_flonum_greaterp" "C_ub_i_flonum_greaterp" pred)
diff --git a/library.scm b/library.scm
index 6c6a6942..2fb82557 100644
--- a/library.scm
+++ b/library.scm
@@ -1590,6 +1590,10 @@ EOF
   (fp-check-flonums x y 'fp/)
   (##core#inline_allocate ("C_a_i_flonum_quotient" 4) x y) )
 
+(define (fp*+ x y z) 
+  (fp-check-flonums x y z 'fp*+)
+  (##core#inline_allocate ("C_a_i_flonum_multiply_add" 4) x y z) )
+
 (define (fpgcd x y)
   (fp-check-flonums x y 'fpgcd)
   (##core#inline_allocate ("C_a_i_flonum_gcd" 4) x y))
diff --git a/manual/Module (chicken flonum) b/manual/Module