Re: Performance question concerning chicken flonum vs "foreign flonum"
felix.winkelm...@bevuta.com schrieb am 2021-11-07: > > Dear Felix, > > > > Thank you for the patch. I built the current git head with your patch. > > After importing chicken.flonum, I get the following error when calling fp*+: > > > I'm terribly sorry. I'm an ass, I didn't even test it in the interpreter. > Please > find attached a revised patch. > felix Dear felix, the latest patch works. I extended my test code and here are the results: without -C -mfma: csc -O5 -d0 -C -O3 fma-test.scm && ./fma-test 7.998s CPU time, 0/225861 GCs (major/minor), maximum live heap: 30.78 MiB 10.104s CPU time, 0/256410 GCs (major/minor), maximum live heap: 30.78 MiB 10.69s CPU time, 0/311364 GCs (major/minor), maximum live heap: 30.78 MiB with -C -mfma: csc -O5 -d0 -C -O3 -C -mfma fma-test.scm && ./fma-test 7.697s CPU time, 0/238095 GCs (major/minor), maximum live heap: 30.78 MiB 9.135s CPU time, 0/262467 GCs (major/minor), maximum live heap: 30.78 MiB 11.008s CPU time, 0/317460 GCs (major/minor), maximum live heap: 30.78 MiB It seems the number of GCs is a lot higher than for fp*/fp+ or c99-fma with or without fma compiler flag. So currently, there seems to be no benefit integrating c99's fma as fp*+ besides a slightly better rounding error. At least for me, this comes unexpected. Thank you for providing the patch. If you want to test something in this regard in the future, I am happy to test further patches. Cheers Christian
Re: New egg: nng
Hi Ariela, On Fri, 05 Nov 2021 09:31:54 -0300 Ariela Wenner wrote: > Welp... that's a bummer. I was sure it was a timing issue with the tests. > > I'll keep poking at it on different machines to see what I'm missing. > > Thanks for giving it a try! Cheers! Thank you for your efforts and for investigating the issue! It's nice that you actually have tests and they seem to be catching issues (assuming the problem is not in the tests themselves). All the best. Mario -- http://parenteses.org/mario
Re: Performance question concerning chicken flonum vs "foreign flonum"
> Dear Felix, > > Thank you for the patch. I built the current git head with your patch. > After importing chicken.flonum, I get the following error when calling fp*+: > I'm terribly sorry. I'm an ass, I didn't even test it in the interpreter. Please find attached a revised patch. felix From 29b7abfd1a990e1fe4fc10f3d2532eadd079151f Mon Sep 17 00:00:00 2001 From: felix Date: Sun, 7 Nov 2021 13:48:31 +0100 Subject: [PATCH] Add support for fused-multiply-add (suggested by Christian Himpe on chicken-users) --- NEWS | 4 c-platform.scm | 3 ++- chicken.h | 2 ++ lfa2.scm | 2 ++ library.scm| 6 ++ manual/Module (chicken flonum) | 3 ++- types.db | 3 +++ 7 files changed, 21 insertions(+), 2 deletions(-) diff --git a/NEWS b/NEWS index de01c00e..69fe5054 100644 --- a/NEWS +++ b/NEWS @@ -4,6 +4,10 @@ - Default "cc" on BSD systems for building CHICKEN to avoid ABI problems when linking with C++ code. +- Core libraries + - Added "fp*+" (fused multiply-add) to "chicken.flonum" module +(suggested by Christian Himpe). + 5.3.0rc4 - Compiler diff --git a/c-platform.scm b/c-platform.scm index 00960c82..e59b1f1c 100644 --- a/c-platform.scm +++ b/c-platform.scm @@ -149,7 +149,7 @@ (define-constant +flonum-bindings+ (map (lambda (x) (symbol-append 'chicken.flonum# x)) - '(fp/? fp+ fp- fp* fp/ fp> fp< fp= fp>= fp<= fpmin fpmax fpneg fpgcd + '(fp/? fp+ fp- fp* fp/ fp> fp< fp= fp>= fp<= fpmin fpmax fpneg fpgcd fp*+ fpfloor fpceiling fptruncate fpround fpsin fpcos fptan fpasin fpacos fpatan fpatan2 fpexp fpexpt fplog fpsqrt fpabs fpinteger?))) @@ -652,6 +652,7 @@ (rewrite 'chicken.flonum#fp/? 16 2 "C_a_i_flonum_quotient_checked" #f words-per-flonum) (rewrite 'chicken.flonum#fpneg 16 1 "C_a_i_flonum_negate" #f words-per-flonum) (rewrite 'chicken.flonum#fpgcd 16 2 "C_a_i_flonum_gcd" #f words-per-flonum) +(rewrite 'chicken.flonum#fp*+ 16 3 "C_a_i_flonum_multiply_add" #f words-per-flonum) (rewrite 'scheme#zero? 5 "C_eqp" 0 'fixnum) (rewrite 'scheme#zero? 2 1 "C_u_i_zerop2" #f) diff --git a/chicken.h b/chicken.h index 7e51a38f..ba075471 100644 --- a/chicken.h +++ b/chicken.h @@ -1204,6 +1204,7 @@ typedef void (C_ccall *C_proc)(C_word, C_word *) C_noret; #define C_a_i_flonum_plus(ptr, c, n1, n2) C_flonum(ptr, C_flonum_magnitude(n1) + C_flonum_magnitude(n2)) #define C_a_i_flonum_difference(ptr, c, n1, n2) C_flonum(ptr, C_flonum_magnitude(n1) - C_flonum_magnitude(n2)) #define C_a_i_flonum_times(ptr, c, n1, n2) C_flonum(ptr, C_flonum_magnitude(n1) * C_flonum_magnitude(n2)) +#define C_a_i_flonum_multiply_add(ptr, c, n1, n2, n3) C_flonum(ptr, fma(C_flonum_magnitude(n1), C_flonum_magnitude(n2), C_flonum_magnitude(n3))) #define C_a_i_flonum_quotient(ptr, c, n1, n2) C_flonum(ptr, C_flonum_magnitude(n1) / C_flonum_magnitude(n2)) #define C_a_i_flonum_negate(ptr, c, n) C_flonum(ptr, -C_flonum_magnitude(n)) #define C_a_u_i_flonum_signum(ptr, n, x) (C_flonum_magnitude(x) == 0.0 ? (x) : ((C_flonum_magnitude(x) < 0.0) ? C_flonum(ptr, -1.0) : C_flonum(ptr, 1.0))) @@ -1513,6 +1514,7 @@ typedef void (C_ccall *C_proc)(C_word, C_word *) C_noret; #define C_ub_i_flonum_difference(x, y) ((x) - (y)) #define C_ub_i_flonum_times(x, y) ((x) * (y)) #define C_ub_i_flonum_quotient(x, y)((x) / (y)) +#define C_ub_i_flonum_multiply_add(x, y, z)fma((x), (y), (z)) #define C_ub_i_flonum_equalp(n1, n2)C_mk_bool((n1) == (n2)) #define C_ub_i_flonum_greaterp(n1, n2) C_mk_bool((n1) > (n2)) diff --git a/lfa2.scm b/lfa2.scm index 45057578..e4bd308e 100644 --- a/lfa2.scm +++ b/lfa2.scm @@ -191,6 +191,7 @@ ("C_a_i_flonum_sqrt" float) ("C_a_i_flonum_tan" float) ("C_a_i_flonum_times" float) +("C_a_i_flonum_multiply_add" float) ("C_a_i_flonum_truncate" float) ("C_a_u_i_f64vector_ref" float) ("C_a_u_i_f32vector_ref" float) @@ -201,6 +202,7 @@ '(("C_a_i_flonum_plus" "C_ub_i_flonum_plus" op) ("C_a_i_flonum_difference" "C_ub_i_flonum_difference" op) ("C_a_i_flonum_times" "C_ub_i_flonum_times" op) +("C_a_i_flonum_multiply_add" "C_ub_i_flonum_multiply_add" op) ("C_a_i_flonum_quotient" "C_ub_i_flonum_quotient" op) ("C_flonum_equalp" "C_ub_i_flonum_equalp" pred) ("C_flonum_greaterp" "C_ub_i_flonum_greaterp" pred) diff --git a/library.scm b/library.scm index 6c6a6942..45182e84 100644 --- a/library.scm +++ b/library.scm @@ -1590,6 +1590,12 @@ EOF (fp-check-flonums x y 'fp/) (##core#inline_allocate ("C_a_i_flonum_quotient" 4) x y) ) +(define (fp*+ x y z) + (unless (and (flonum? x) (flonum? y) (flonum? z)) +(##sys#error-hook (foreign-value "C_BAD_ARGUMENT_TYPE_NO_FLONUM_ERROR" int) + 'fp*+ x y z) ) + (##core#inline_allocate ("C_a_i_flonum_multiply_add" 4) x y z) ) + (define (fpgcd x y) (fp-check-flonums x y 'fpgcd) (##core#inline_allocate
Re: Performance question concerning chicken flonum vs "foreign flonum"
Dear Felix, Thank you for the patch. I built the current git head with your patch. After importing chicken.flonum, I get the following error when calling fp*+: #;2> (fp*+ 1.0 2.0 3.0) Error: unbound variable: g18021803 Call history: (fp*+ 1.0 2.0 3.0) (fp*+ 1.0 2.0 3.0)<-- But, fp*+ is found: #;2> (procedure? fp*+) #t I performed the following build steps: git clone git://code.call-cc.org/chicken-core cd, mv etc. patch -p1 < 0001-Add-support-for-fused-multiply-add.patch make PREFIX=XXX PLATFORM=linux OPTIMIZE_FOR_SPEED=1 CHICKEN=XXX/chicken52/bin/chicken make PREFIX=XXX PLATFORM=linux install Best Christian felix.winkelm...@bevuta.com schrieb am 2021-11-07: > Hi! > Here a patch against the current git HEAD, adding support for "fp*+". Please > give it a try, if you want. > This is experimental, if people consider this worthwhile, I can submit it for > adding to the core > system. Note that you still may need passing extra C-compiler options to > enable inlining of > the fma(3) call. > cheers, > felix -- Dr. rer. nat. Christian Himpe University of Münster / Applied Mathematics Münster Orléans-Ring 10 / 48149 Münster / Germany https://himpe.science
Re: Performance question concerning chicken flonum vs "foreign flonum"
Hi! Here a patch against the current git HEAD, adding support for "fp*+". Please give it a try, if you want. This is experimental, if people consider this worthwhile, I can submit it for adding to the core system. Note that you still may need passing extra C-compiler options to enable inlining of the fma(3) call. cheers, felix From 0f9c68a2b3954eb7c7d2a6075d6b4dfa3dcfb2a5 Mon Sep 17 00:00:00 2001 From: felix Date: Sun, 7 Nov 2021 13:48:31 +0100 Subject: [PATCH] Add support for fused-multiply-add (suggested by Christian Himpe on chicken-users) --- NEWS | 4 c-platform.scm | 3 ++- chicken.h | 2 ++ lfa2.scm | 2 ++ library.scm| 4 manual/Module (chicken flonum) | 3 ++- types.db | 3 +++ 7 files changed, 19 insertions(+), 2 deletions(-) diff --git a/NEWS b/NEWS index de01c00e..69fe5054 100644 --- a/NEWS +++ b/NEWS @@ -4,6 +4,10 @@ - Default "cc" on BSD systems for building CHICKEN to avoid ABI problems when linking with C++ code. +- Core libraries + - Added "fp*+" (fused multiply-add) to "chicken.flonum" module +(suggested by Christian Himpe). + 5.3.0rc4 - Compiler diff --git a/c-platform.scm b/c-platform.scm index 00960c82..e59b1f1c 100644 --- a/c-platform.scm +++ b/c-platform.scm @@ -149,7 +149,7 @@ (define-constant +flonum-bindings+ (map (lambda (x) (symbol-append 'chicken.flonum# x)) - '(fp/? fp+ fp- fp* fp/ fp> fp< fp= fp>= fp<= fpmin fpmax fpneg fpgcd + '(fp/? fp+ fp- fp* fp/ fp> fp< fp= fp>= fp<= fpmin fpmax fpneg fpgcd fp*+ fpfloor fpceiling fptruncate fpround fpsin fpcos fptan fpasin fpacos fpatan fpatan2 fpexp fpexpt fplog fpsqrt fpabs fpinteger?))) @@ -652,6 +652,7 @@ (rewrite 'chicken.flonum#fp/? 16 2 "C_a_i_flonum_quotient_checked" #f words-per-flonum) (rewrite 'chicken.flonum#fpneg 16 1 "C_a_i_flonum_negate" #f words-per-flonum) (rewrite 'chicken.flonum#fpgcd 16 2 "C_a_i_flonum_gcd" #f words-per-flonum) +(rewrite 'chicken.flonum#fp*+ 16 3 "C_a_i_flonum_multiply_add" #f words-per-flonum) (rewrite 'scheme#zero? 5 "C_eqp" 0 'fixnum) (rewrite 'scheme#zero? 2 1 "C_u_i_zerop2" #f) diff --git a/chicken.h b/chicken.h index 7e51a38f..ba075471 100644 --- a/chicken.h +++ b/chicken.h @@ -1204,6 +1204,7 @@ typedef void (C_ccall *C_proc)(C_word, C_word *) C_noret; #define C_a_i_flonum_plus(ptr, c, n1, n2) C_flonum(ptr, C_flonum_magnitude(n1) + C_flonum_magnitude(n2)) #define C_a_i_flonum_difference(ptr, c, n1, n2) C_flonum(ptr, C_flonum_magnitude(n1) - C_flonum_magnitude(n2)) #define C_a_i_flonum_times(ptr, c, n1, n2) C_flonum(ptr, C_flonum_magnitude(n1) * C_flonum_magnitude(n2)) +#define C_a_i_flonum_multiply_add(ptr, c, n1, n2, n3) C_flonum(ptr, fma(C_flonum_magnitude(n1), C_flonum_magnitude(n2), C_flonum_magnitude(n3))) #define C_a_i_flonum_quotient(ptr, c, n1, n2) C_flonum(ptr, C_flonum_magnitude(n1) / C_flonum_magnitude(n2)) #define C_a_i_flonum_negate(ptr, c, n) C_flonum(ptr, -C_flonum_magnitude(n)) #define C_a_u_i_flonum_signum(ptr, n, x) (C_flonum_magnitude(x) == 0.0 ? (x) : ((C_flonum_magnitude(x) < 0.0) ? C_flonum(ptr, -1.0) : C_flonum(ptr, 1.0))) @@ -1513,6 +1514,7 @@ typedef void (C_ccall *C_proc)(C_word, C_word *) C_noret; #define C_ub_i_flonum_difference(x, y) ((x) - (y)) #define C_ub_i_flonum_times(x, y) ((x) * (y)) #define C_ub_i_flonum_quotient(x, y)((x) / (y)) +#define C_ub_i_flonum_multiply_add(x, y, z)fma((x), (y), (z)) #define C_ub_i_flonum_equalp(n1, n2)C_mk_bool((n1) == (n2)) #define C_ub_i_flonum_greaterp(n1, n2) C_mk_bool((n1) > (n2)) diff --git a/lfa2.scm b/lfa2.scm index 45057578..e4bd308e 100644 --- a/lfa2.scm +++ b/lfa2.scm @@ -191,6 +191,7 @@ ("C_a_i_flonum_sqrt" float) ("C_a_i_flonum_tan" float) ("C_a_i_flonum_times" float) +("C_a_i_flonum_multiply_add" float) ("C_a_i_flonum_truncate" float) ("C_a_u_i_f64vector_ref" float) ("C_a_u_i_f32vector_ref" float) @@ -201,6 +202,7 @@ '(("C_a_i_flonum_plus" "C_ub_i_flonum_plus" op) ("C_a_i_flonum_difference" "C_ub_i_flonum_difference" op) ("C_a_i_flonum_times" "C_ub_i_flonum_times" op) +("C_a_i_flonum_multiply_add" "C_ub_i_flonum_multiply_add" op) ("C_a_i_flonum_quotient" "C_ub_i_flonum_quotient" op) ("C_flonum_equalp" "C_ub_i_flonum_equalp" pred) ("C_flonum_greaterp" "C_ub_i_flonum_greaterp" pred) diff --git a/library.scm b/library.scm index 6c6a6942..2fb82557 100644 --- a/library.scm +++ b/library.scm @@ -1590,6 +1590,10 @@ EOF (fp-check-flonums x y 'fp/) (##core#inline_allocate ("C_a_i_flonum_quotient" 4) x y) ) +(define (fp*+ x y z) + (fp-check-flonums x y z 'fp*+) + (##core#inline_allocate ("C_a_i_flonum_multiply_add" 4) x y z) ) + (define (fpgcd x y) (fp-check-flonums x y 'fpgcd) (##core#inline_allocate ("C_a_i_flonum_gcd" 4) x y)) diff --git a/manual/Module (chicken flonum) b/manual/Module