On Fri, Feb 7, 2020 at 4:54 PM Andres Freund <and...@anarazel.de> wrote:
> On February 6, 2020 11:42:30 PM PST, keisuke kuroda 
> <keisuke.kuroda.3...@gmail.com> wrote:
> >Hi,
> >
> >I have been testing with newer compiler (clang-7)
> >and result is a bit different at least with clang-7.
> >Compiling PG 12.1 (even without patch) with clang-7
> >results in __isinf() no longer being a bottleneck,
> >that is, you don't see it in profiler at all.
>
> I don't think that's necessarily the right conclusion. What's quite possibly 
> happening is that you do not see the external isinf function anymore, because 
> it is implemented as an intrinsic,  but that there still are more 
> computations being done. Due to the changed order of the isinf checks. You'd 
> have to compare with 11 using the same compiler.

I did some tests using two relatively recent compilers: gcc 8 and
clang-7 and here are the results:

Setup:

create table realtest (a real, b real, c real, d real, e real);
insert into realtest select i, i, i, i, i from generate_series(1, 1000000) i;

Test query:

/tmp/query.sql
select avg(2*dsqrt(a)), avg(2*dsqrt(b)), avg(2*dsqrt(c)),
avg(2*dsqrt(d)), avg(2*dsqrt(e)) from realtest;

pgbench -n -T 60 -f /tmp/query.sql

Latency and profiling results:

gcc 8 (gcc (GCC) 8.3.1 20190311 (Red Hat 8.3.1-3))
====

11.6

latency average = 463.968 ms

    40.62%  postgres  postgres           [.] ExecInterpExpr
     9.74%  postgres  postgres           [.] float8_accum
     6.12%  postgres  libc-2.17.so       [.] __isinf
     5.96%  postgres  postgres           [.] float8mul
     5.33%  postgres  postgres           [.] dsqrt
     3.90%  postgres  postgres           [.] ftod
     3.53%  postgres  postgres           [.] Float8GetDatum
     2.34%  postgres  postgres           [.] DatumGetFloat8
     2.15%  postgres  postgres           [.] AggCheckCallContext
     2.03%  postgres  postgres           [.] slot_deform_tuple
     1.95%  postgres  libm-2.17.so       [.] __sqrt
     1.19%  postgres  postgres           [.] check_float8_array

HEAD

latency average = 549.071 ms

    31.74%  postgres  postgres           [.] ExecInterpExpr
    11.02%  postgres  libc-2.17.so       [.] __isinf
    10.58%  postgres  postgres           [.] float8_accum
     4.84%  postgres  postgres           [.] check_float8_val
     4.66%  postgres  postgres           [.] dsqrt
     3.91%  postgres  postgres           [.] float8mul
     3.56%  postgres  postgres           [.] ftod
     3.26%  postgres  postgres           [.] Float8GetDatum
     2.91%  postgres  postgres           [.] float8_mul
     2.30%  postgres  postgres           [.] DatumGetFloat8
     2.19%  postgres  postgres           [.] slot_deform_heap_tuple
     1.81%  postgres  postgres           [.] AggCheckCallContext
     1.31%  postgres  libm-2.17.so       [.] __sqrt
     1.25%  postgres  postgres           [.] check_float8_array

HEAD + patch

latency average = 546.624 ms

    33.51%  postgres  postgres           [.] ExecInterpExpr
    10.35%  postgres  postgres           [.] float8_accum
    10.06%  postgres  libc-2.17.so       [.] __isinf
     4.58%  postgres  postgres           [.] dsqrt
     4.14%  postgres  postgres           [.] check_float8_val
     4.03%  postgres  postgres           [.] ftod
     3.54%  postgres  postgres           [.] float8mul
     2.96%  postgres  postgres           [.] Float8GetDatum
     2.38%  postgres  postgres           [.] slot_deform_heap_tuple
     2.23%  postgres  postgres           [.] DatumGetFloat8
     2.09%  postgres  postgres           [.] float8_mul
     1.88%  postgres  postgres           [.] AggCheckCallContext
     1.65%  postgres  libm-2.17.so       [.] __sqrt
     1.22%  postgres  postgres           [.] check_float8_array


clang-7 (clang version 7.0.1 (tags/RELEASE_701/final))
=====

11.6

latency average = 419.014 ms

    47.57%  postgres  postgres           [.] ExecInterpExpr
     7.99%  postgres  postgres           [.] float8_accum
     5.96%  postgres  postgres           [.] dsqrt
     4.88%  postgres  postgres           [.] float8mul
     4.23%  postgres  postgres           [.] ftod
     3.30%  postgres  postgres           [.] slot_deform_tuple
     3.19%  postgres  postgres           [.] DatumGetFloat8
     1.92%  postgres  libm-2.17.so       [.] __sqrt
     1.72%  postgres  postgres           [.] check_float8_array

HEAD

latency average = 452.958 ms

    40.55%  postgres  postgres           [.] ExecInterpExpr
    10.61%  postgres  postgres           [.] float8_accum
     4.58%  postgres  postgres           [.] dsqrt
     3.59%  postgres  postgres           [.] slot_deform_heap_tuple
     3.54%  postgres  postgres           [.] check_float8_val
     3.48%  postgres  postgres           [.] ftod
     3.42%  postgres  postgres           [.] float8mul
     3.22%  postgres  postgres           [.] DatumGetFloat8
     2.69%  postgres  postgres           [.] Float8GetDatum
     2.46%  postgres  postgres           [.] float8_mul
     2.29%  postgres  libm-2.17.so       [.] __sqrt
     1.47%  postgres  postgres           [.] check_float8_array

HEAD + patch

latency average = 452.533 ms

    41.05%  postgres  postgres           [.] ExecInterpExpr
    10.15%  postgres  postgres           [.] float8_accum
     5.62%  postgres  postgres           [.] dsqrt
     3.86%  postgres  postgres           [.] check_float8_val
     3.27%  postgres  postgres           [.] float8mul
     3.09%  postgres  postgres           [.] slot_deform_heap_tuple
     2.91%  postgres  postgres           [.] ftod
     2.88%  postgres  postgres           [.] DatumGetFloat8
     2.62%  postgres  postgres           [.] float8_mul
     2.03%  postgres  libm-2.17.so       [.] __sqrt
     2.00%  postgres  postgres           [.] check_float8_array

The patch mentioned above is this:

diff --git a/src/include/utils/float.h b/src/include/utils/float.h
index e2c5dc0f57..dc97d19293 100644
--- a/src/include/utils/float.h
+++ b/src/include/utils/float.h
@@ -136,12 +136,12 @@ static inline void
 check_float4_val(const float4 val, const bool inf_is_valid,
                  const bool zero_is_valid)
 {
-    if (!inf_is_valid && unlikely(isinf(val)))
+    if (unlikely(isinf(val)) && !inf_is_valid)
         ereport(ERROR,
                 (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
                  errmsg("value out of range: overflow")));

-    if (!zero_is_valid && unlikely(val == 0.0))
+    if (unlikely(val == 0.0) && !zero_is_valid)
         ereport(ERROR,
                 (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
                  errmsg("value out of range: underflow")));
@@ -151,12 +151,12 @@ static inline void
 check_float8_val(const float8 val, const bool inf_is_valid,
                  const bool zero_is_valid)
 {
-    if (!inf_is_valid && unlikely(isinf(val)))
+    if (unlikely(isinf(val)) && !inf_is_valid)
         ereport(ERROR,
                 (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
                  errmsg("value out of range: overflow")));

-    if (!zero_is_valid && unlikely(val == 0.0))
+    if (unlikely(val == 0.0) && !zero_is_valid)
         ereport(ERROR,
                 (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
                  errmsg("value out of range: underflow")));

So, the patch appears to do very little here. I can only conclude that
the check_float{8|4}_val() (PG 12) is slower than CHECKFLOATVAL() (PG
11) due to arguments being evaluated first.  It's entirely possible
though that the patch shown above is not enough.

Thanks,
Amit


Reply via email to