On Thu, Sep 8, 2016 at 10:41 PM, Ashutosh Bapat <
[email protected]> wrote:
>
>
> I think we should try to measure performance gain because of aggregate
> pushdown. The EXPLAIN
> doesn't show actual improvement in the execution times.
>
I did performance testing for aggregate push down and see good performance
with the patch.
Attached is the script I have used to get the performance numbers along with
the results I got with and without patch (pg_agg_push_down_v3.patch). Also
attached few GNU plots from the readings I got. These were run on my local
VM having following details:
Linux centos7 3.10.0-327.28.3.el7.x86_64 #1 SMP Thu Aug 18 19:05:49 UTC
2016 x86_64 x86_64 x86_64 GNU/Linux
RAM alloted: 8 GB
CPUs alloted: 8
postgresql.conf is default.
With aggregate push down I see around 12x performance for count(*)
operation.
In another test, I have observed that if number of groups returned from
remote server is same as that of input rows, then aggregate push down
performs
slightly poor which I think is expected. However in all other cases where
number of groups are less than input rows, pushing down aggregate gives
better
performance than performing grouping on the local server.
I did this performance testing on my local setup. I would expected even
better
numbers on a specialized high-end performance machine. I would be more than
happy if someone does this testing on such high-end machine.
Let me know if you need any help.
Thanks
--
Jeevan B Chalke
Principal Software Engineer, Product Development
EnterpriseDB Corporation
The Enterprise PostgreSQL Company
=== WITH PATCH ===
query | rows
| avg_exe_time | std_dev_exe_time | min_exe_time | max_exe_time
---------------------------------------------------------------------+---------+--------------+------------------+--------------+--------------
select count(*) from fperf1 | 1
| 141.8686 | 4.4668353500495 | 138.903 | 152.241
select c2, avg(c1) from fperf1 group by c2 having count(*) < 333334 | 2
| 405.7661 | 11.9403142844368 | 400.689 | 439.675
select c2, sum(c1) from fperf1 group by c2 | 3
| 363.2299 | 4.29278180851687 | 354.815 | 369.739
select c3, avg(c1), sum(c2) from fperf1 group by c3 | 5
| 454.4478 | 3.98680590057494 | 447.248 | 457.955
select c4, avg(c1), sum(c2) from fperf1 group by c4 | 10
| 501.0197 | 5.26951028823454 | 491.726 | 508.574
select c5, avg(c1), sum(c2) from fperf1 group by c5 | 100
| 490.0783 | 5.64261263462349 | 480.78 | 496.662
select c6, avg(c1), sum(c2) from fperf1 group by c6 | 1000
| 582.6842 | 9.9196984474776 | 564.425 | 592.69
select c1%1, avg(c1), sum(c2) from fperf1 group by c1%1 | 1
| 901.1682 | 9.58382273302904 | 888.383 | 923.386
select c1%10, avg(c1), sum(c2) from fperf1 group by c1%10 | 10
| 1032.1959 | 6.89087326268629 | 1018.598 | 1045.423
select c1%100, avg(c1), sum(c2) from fperf1 group by c1%100 | 100
| 1076.3834 | 11.1022883947539 | 1061.305 | 1093.892
select c1%1000, avg(c1), sum(c2) from fperf1 group by c1%1000 | 1000
| 1113.6001 | 11.2143472634172 | 1092.863 | 1133.007
select c1%10000, avg(c1), sum(c2) from fperf1 group by c1%10000 | 10000
| 1182.1374 | 32.5953859659133 | 1148.961 | 1231.296
select c1%100000, avg(c1), sum(c2) from fperf1 group by c1%100000 | 100000
| 1467.1811 | 14.3535175437048 | 1443.95 | 1485.645
select c1%1000000, avg(c1), sum(c2) from fperf1 group by c1%1000000 | 1000000
| 5466.2306 | 633.367848489717 | 5127.339 | 7248.381
(14 rows)
=== WITHOUT PATCH ===
query | rows
| avg_exe_time | std_dev_exe_time | min_exe_time | max_exe_time
---------------------------------------------------------------------+---------+--------------+------------------+--------------+--------------
select count(*) from fperf1 | 1
| 1674.5339 | 27.1549108570754 | 1637.345 | 1725.057
select c2, avg(c1) from fperf1 group by c2 having count(*) < 333334 | 2
| 2467.8368 | 22.606929678949 | 2437.674 | 2506.438
select c2, sum(c1) from fperf1 group by c2 | 3
| 2387.39 | 34.3686766983568 | 2350.396 | 2444.313
select c3, avg(c1), sum(c2) from fperf1 group by c3 | 5
| 2702.3344 | 28.0312843452488 | 2665.317 | 2746.862
select c4, avg(c1), sum(c2) from fperf1 group by c4 | 10
| 2850.9818 | 42.5758532759606 | 2813.562 | 2946.991
select c5, avg(c1), sum(c2) from fperf1 group by c5 | 100
| 3519.9419 | 34.0792701064716 | 3473.709 | 3583.043
select c6, avg(c1), sum(c2) from fperf1 group by c6 | 1000
| 3030.4009 | 27.6855914248393 | 3008.713 | 3094.22
select c1%1, avg(c1), sum(c2) from fperf1 group by c1%1 | 1
| 2577.7204 | 32.2177922141319 | 2551.266 | 2644.744
select c1%10, avg(c1), sum(c2) from fperf1 group by c1%10 | 10
| 2615.7951 | 19.8623889434851 | 2592.613 | 2646.696
select c1%100, avg(c1), sum(c2) from fperf1 group by c1%100 | 100
| 2611.5906 | 26.5091683017013 | 2578.586 | 2664.936
select c1%1000, avg(c1), sum(c2) from fperf1 group by c1%1000 | 1000
| 2624.3797 | 18.1034474559238 | 2602.988 | 2665.453
select c1%10000, avg(c1), sum(c2) from fperf1 group by c1%10000 | 10000
| 2702.8801 | 26.5523021159207 | 2663.27 | 2742.34
select c1%100000, avg(c1), sum(c2) from fperf1 group by c1%100000 | 100000
| 3019.5939 | 34.1965254942112 | 2978.507 | 3076.033
select c1%1000000, avg(c1), sum(c2) from fperf1 group by c1%1000000 | 1000000
| 4555.6334 | 46.5314774013736 | 4496.797 | 4647.332
(14 rows)
drop foreign table fperf1;
drop user mapping for current_user server srv;
drop server srv;
drop table perf1;
drop extension postgres_fdw;
create extension postgres_fdw;
create table perf1(
c1 int primary key,
c2 int not null,
c3 int,
c4 text,
c5 timestamptz,
c6 char(10)
);
insert into perf1 select
id,
id % 3,
id % 5,
to_char(id%10, 'FM0000000'),
'1970-01-01'::timestamptz + ((id % 100) || ' days')::interval,
id % 1000
from generate_series(1, 1000000) id;
do $d$
begin
execute $$create server srv foreign data wrapper postgres_fdw
options (dbname '$$||current_database()||$$',
port '$$||current_setting('port')||$$'
)$$;
end;
$d$;
create user mapping for current_user server srv;
create foreign table fperf1(
c1 int,
c2 int,
c3 int,
c4 text,
c5 timestamptz,
c6 char(10)
) server srv options (table_name 'perf1');
analyze perf1;
-- This part is taken from Ashutosh Bapat's script posted for
-- partitionwise join and modified per my requirement.
-- Table to collect data
drop table perf;
create table perf(query text, rows bigint,
avg_exe_time float, std_dev_exe_time float,
min_exe_time float, max_exe_time float);
drop function query_execution_stats(num_samples int);
create function query_execution_stats(num_samples int)
RETURNS void LANGUAGE plpgsql AS $$
DECLARE
q record;
plan json;
e json;
avg_exe_time float;
std_dev_exe_time float;
min_exe_time float;
max_exe_time float;
BEGIN
CREATE TEMPORARY TABLE query_exe_times(exe_time float);
FOR q IN select query from querytab LOOP
-- Execute query a few times to warm the cache
FOR i IN 1 .. num_samples/5 LOOP
EXECUTE q.query;
END LOOP;
TRUNCATE query_exe_times;
RAISE NOTICE 'Perf testing query: %', q.query;
EXECUTE 'EXPLAIN (analyze, verbose, format json) ' || q.query INTO e;
-- RAISE NOTICE 'Remote SQL: %', e->0->'Plan'->'Remote SQL';
FOR i IN 1 .. num_samples LOOP
EXECUTE 'EXPLAIN (analyze, format json) ' || q.query INTO plan;
INSERT INTO query_exe_times VALUES ((plan->0->'Execution Time')::text::float);
-- RAISE NOTICE ' completed % samples', i;
END LOOP;
SELECT avg(exe_time), stddev(exe_time), min(exe_time), max(exe_time)
INTO avg_exe_time, std_dev_exe_time, min_exe_time, max_exe_time
FROM query_exe_times;
INSERT INTO perf VALUES (q.query, (e->0->'Plan'->'Actual Rows')::text::bigint,
avg_exe_time, std_dev_exe_time, min_exe_time, max_exe_time);
END LOOP;
DROP TABLE query_exe_times;
END;
$$;
--Run queries now
drop table querytab;
create table querytab(query text);
-- Simple count(*) on large table
insert into querytab values ('select count(*) from fperf1');
insert into querytab values ('select c2, avg(c1) from fperf1 group by c2 having count(*) < 333334');
insert into querytab values ('select c2, sum(c1) from fperf1 group by c2');
insert into querytab values ('select c3, avg(c1), sum(c2) from fperf1 group by c3');
insert into querytab values ('select c4, avg(c1), sum(c2) from fperf1 group by c4');
insert into querytab values ('select c5, avg(c1), sum(c2) from fperf1 group by c5');
insert into querytab values ('select c6, avg(c1), sum(c2) from fperf1 group by c6');
insert into querytab values ('select c1%1, avg(c1), sum(c2) from fperf1 group by c1%1');
insert into querytab values ('select c1%10, avg(c1), sum(c2) from fperf1 group by c1%10');
insert into querytab values ('select c1%100, avg(c1), sum(c2) from fperf1 group by c1%100');
insert into querytab values ('select c1%1000, avg(c1), sum(c2) from fperf1 group by c1%1000');
insert into querytab values ('select c1%10000, avg(c1), sum(c2) from fperf1 group by c1%10000');
insert into querytab values ('select c1%100000, avg(c1), sum(c2) from fperf1 group by c1%100000');
insert into querytab values ('select c1%1000000, avg(c1), sum(c2) from fperf1 group by c1%1000000');
\set num_samples 10
select query_execution_stats(:num_samples);
select * from perf;
--
Sent via pgsql-hackers mailing list ([email protected])
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers