Query performance issue

2020-09-04 Thread Nagaraj Raj
 I have a query which will more often run on DB and very slow and it is doing 
'seqscan'. I was trying to optimize it by adding indexes in different ways but 
nothing helps.
Any suggestions?

Query:
EXPALIN ANALYZE select serial_no,receivingplant,sku,r3_eventtime from (select 
serial_no,receivingplant,sku,eventtime as r3_eventtime, row_number() over 
(partition by serial_no order by eventtime desc) as mpos from 
receiving_item_delivered_received where eventtype='LineItemdetailsReceived'and 
replenishmenttype = 'DC2SWARRANTY'and coalesce(serial_no,'') <> '') Rec where 
mpos = 1;

Query Planner: 
"Subquery Scan on rec  (cost=70835.30..82275.49 rows=1760 width=39) (actual 
time=2322.999..3451.783 rows=333451 loops=1)""  Filter: (rec.mpos = 1)""  Rows 
Removed by Filter: 19900""  ->  WindowAgg  (cost=70835.30..77875.42 rows=352006 
width=47) (actual time=2322.997..3414.384 rows=353351 loops=1)""        ->  
Sort  (cost=70835.30..71715.31 rows=352006 width=39) (actual 
time=2322.983..3190.090 rows=353351 loops=1)""              Sort Key: 
receiving_item_delivered_received.serial_no, 
receiving_item_delivered_received.eventtime DESC""              Sort Method: 
external merge  Disk: 17424kB""              ->  Seq Scan on 
receiving_item_delivered_received  (cost=0.00..28777.82 rows=352006 width=39) 
(actual time=0.011..184.677 rows=353351 loops=1)""                    Filter: 
(((COALESCE(serial_no, ''::character varying))::text <> ''::text) AND 
((eventtype)::text = 'LineItemdetailsReceived'::text) AND 
((replenishmenttype)::text = 'DC2SWARRANTY'::text))""                    Rows 
Removed by Filter: 55953""Planning Time: 0.197 ms""Execution Time: 3466.985 ms"
Table DDL: 
CREATE TABLE receiving_item_delivered_received(    load_dttm timestamp with 
time zone,    iamuniqueid character varying(200)  ,    batchid character 
varying(200)  ,    eventid character varying(200)  ,    eventtype character 
varying(200)  ,    eventversion character varying(200)  ,    eventtime 
timestamp with time zone,    eventproducerid character varying(200)  ,    
deliverynumber character varying(200)  ,    activityid character varying(200)  
,    applicationid character varying(200)  ,    channelid character 
varying(200)  ,    interactionid character varying(200)  ,    sessionid 
character varying(200)  ,    receivingplant character varying(200)  ,    
deliverydate date,    shipmentdate date,    shippingpoint character 
varying(200)  ,    replenishmenttype character varying(200)  ,    
numberofpackages character varying(200)  ,    carrier_id character varying(200) 
 ,    carrier_name character varying(200)  ,    billoflading character 
varying(200)  ,    pro_no character varying(200)  ,    partner_id character 
varying(200)  ,    deliveryitem character varying(200)  ,    ponumber character 
varying(200)  ,    poitem character varying(200)  ,    tracking_no character 
varying(200)  ,    serial_no character varying(200)  ,    sto_no character 
varying(200)  ,    sim_no character varying(200)  ,    sku character 
varying(200)  ,    quantity numeric(15,2),    uom character varying(200)  );

-- Index: receiving_item_delivered_rece_eventtype_replenishmenttype_c_idx
-- DROP INDEX receiving_item_delivered_rece_eventtype_replenishmenttype_c_idx;
CREATE INDEX receiving_item_delivered_rece_eventtype_replenishmenttype_c_idx    
ON receiving_item_delivered_received USING btree    (eventtype  , 
replenishmenttype  , COALESCE(serial_no, ''::character varying)  )    ;-- 
Index: receiving_item_delivered_rece_serial_no_eventtype_replenish_idx
-- DROP INDEX receiving_item_delivered_rece_serial_no_eventtype_replenish_idx;
CREATE INDEX receiving_item_delivered_rece_serial_no_eventtype_replenish_idx    
ON receiving_item_delivered_received USING btree    (serial_no  , eventtype  , 
replenishmenttype  )        WHERE eventtype::text = 
'LineItemdetailsReceived'::text AND replenishmenttype::text = 
'DC2SWARRANTY'::text AND COALESCE(serial_no, ''::character varying)::text <> 
''::text;-- Index: 
receiving_item_delivered_recei_eventtype_replenishmenttype_idx1
-- DROP INDEX receiving_item_delivered_recei_eventtype_replenishmenttype_idx1;
CREATE INDEX receiving_item_delivered_recei_eventtype_replenishmenttype_idx1    
ON receiving_item_delivered_received USING btree    (eventtype  , 
replenishmenttype  )        WHERE eventtype::text = 
'LineItemdetailsReceived'::text AND replenishmenttype::text = 
'DC2SWARRANTY'::text;-- Index: 
receiving_item_delivered_receiv_eventtype_replenishmenttype_idx
-- DROP INDEX receiving_item_delivered_receiv_eventtype_replenishmenttype_idx;
CREATE INDEX receiving_item_delivered_receiv_eventtype_replenishmenttype_idx    
ON receiving_item_delivered_received USING btree    (eventtype  , 
replenishmenttype  )    ;-- Index: 
receiving_item_delivered_received_eventtype_idx
-- DROP INDEX receiving_item_delivered_received_eventtype_idx;
CREATE INDEX receiving_item_delivered_received_eventtype_idx    ON 
receiving_item_delivered_received USIN

Re: Query performance issue

2020-09-04 Thread Thomas Kellerer

Nagaraj Raj schrieb am 04.09.2020 um 23:18:

I have a query which will more often run on DB and very slow and it
is doing 'seqscan'. I was trying to optimize it by adding indexes in
different ways but nothing helps.

EXPALIN ANALYZE select serial_no,receivingplant,sku,r3_eventtime
from (select serial_no,receivingplant,sku,eventtime as r3_eventtime, 
row_number() over (partition by serial_no order by eventtime desc) as mpos
from receiving_item_delivered_received
where eventtype='LineItemdetailsReceived'
and replenishmenttype = 'DC2SWARRANTY'
and coalesce(serial_no,'') <> ''
) Rec where mpos = 1;


Query Planner:

"Subquery Scan on rec  (cost=70835.30..82275.49 rows=1760 width=39) (actual 
time=2322.999..3451.783 rows=333451 loops=1)"
"  Filter: (rec.mpos = 1)"
"  Rows Removed by Filter: 19900"
"  ->  WindowAgg  (cost=70835.30..77875.42 rows=352006 width=47) (actual 
time=2322.997..3414.384 rows=353351 loops=1)"
"        ->  Sort  (cost=70835.30..71715.31 rows=352006 width=39) (actual 
time=2322.983..3190.090 rows=353351 loops=1)"
"              Sort Key: receiving_item_delivered_received.serial_no, 
receiving_item_delivered_received.eventtime DESC"
"              Sort Method: external merge  Disk: 17424kB"
"              ->  Seq Scan on receiving_item_delivered_received  
(cost=0.00..28777.82 rows=352006 width=39) (actual time=0.011..184.677 rows=353351 
loops=1)"
"                    Filter: (((COALESCE(serial_no, ''::character varying))::text <> 
''::text) AND ((eventtype)::text = 'LineItemdetailsReceived'::text) AND 
((replenishmenttype)::text = 'DC2SWARRANTY'::text))"
"                    Rows Removed by Filter: 55953"
"Planning Time: 0.197 ms"
"Execution Time: 3466.985 ms"


The query retrieves nearly all rows from the table 353351 of 409304 and the Seq 
Scan takes less than 200ms, so that's not your bottleneck.
Adding indexes won't change that.

The majority of the time is spent in the sort step which is done on disk.
Try to increase work_mem until the "external merge" disappears and is done in 
memory.

Thomas




Re: Query performance issue

2020-09-04 Thread Nagaraj Raj
 query planner:SPJe | explain.depesz.com


| 
| 
|  | 
SPJe | explain.depesz.com


 |

 |

 |




On Friday, September 4, 2020, 02:19:06 PM PDT, Nagaraj Raj 
 wrote:  
 
  I have a query which will more often run on DB and very slow and it is doing 
'seqscan'. I was trying to optimize it by adding indexes in different ways but 
nothing helps.
Any suggestions?

Query:
EXPALIN ANALYZE select serial_no,receivingplant,sku,r3_eventtime from (select 
serial_no,receivingplant,sku,eventtime as r3_eventtime, row_number() over 
(partition by serial_no order by eventtime desc) as mpos from 
receiving_item_delivered_received where eventtype='LineItemdetailsReceived'and 
replenishmenttype = 'DC2SWARRANTY'and coalesce(serial_no,'') <> '') Rec where 
mpos = 1;

Query Planner: 
"Subquery Scan on rec  (cost=70835.30..82275.49 rows=1760 width=39) (actual 
time=2322.999..3451.783 rows=333451 loops=1)""  Filter: (rec.mpos = 1)""  Rows 
Removed by Filter: 19900""  ->  WindowAgg  (cost=70835.30..77875.42 rows=352006 
width=47) (actual time=2322.997..3414.384 rows=353351 loops=1)""        ->  
Sort  (cost=70835.30..71715.31 rows=352006 width=39) (actual 
time=2322.983..3190.090 rows=353351 loops=1)""              Sort Key: 
receiving_item_delivered_received.serial_no, 
receiving_item_delivered_received.eventtime DESC""              Sort Method: 
external merge  Disk: 17424kB""              ->  Seq Scan on 
receiving_item_delivered_received  (cost=0.00..28777.82 rows=352006 width=39) 
(actual time=0.011..184.677 rows=353351 loops=1)""                    Filter: 
(((COALESCE(serial_no, ''::character varying))::text <> ''::text) AND 
((eventtype)::text = 'LineItemdetailsReceived'::text) AND 
((replenishmenttype)::text = 'DC2SWARRANTY'::text))""                    Rows 
Removed by Filter: 55953""Planning Time: 0.197 ms""Execution Time: 3466.985 ms"
Table DDL: 
CREATE TABLE receiving_item_delivered_received(    load_dttm timestamp with 
time zone,    iamuniqueid character varying(200)  ,    batchid character 
varying(200)  ,    eventid character varying(200)  ,    eventtype character 
varying(200)  ,    eventversion character varying(200)  ,    eventtime 
timestamp with time zone,    eventproducerid character varying(200)  ,    
deliverynumber character varying(200)  ,    activityid character varying(200)  
,    applicationid character varying(200)  ,    channelid character 
varying(200)  ,    interactionid character varying(200)  ,    sessionid 
character varying(200)  ,    receivingplant character varying(200)  ,    
deliverydate date,    shipmentdate date,    shippingpoint character 
varying(200)  ,    replenishmenttype character varying(200)  ,    
numberofpackages character varying(200)  ,    carrier_id character varying(200) 
 ,    carrier_name character varying(200)  ,    billoflading character 
varying(200)  ,    pro_no character varying(200)  ,    partner_id character 
varying(200)  ,    deliveryitem character varying(200)  ,    ponumber character 
varying(200)  ,    poitem character varying(200)  ,    tracking_no character 
varying(200)  ,    serial_no character varying(200)  ,    sto_no character 
varying(200)  ,    sim_no character varying(200)  ,    sku character 
varying(200)  ,    quantity numeric(15,2),    uom character varying(200)  );

-- Index: receiving_item_delivered_rece_eventtype_replenishmenttype_c_idx
-- DROP INDEX receiving_item_delivered_rece_eventtype_replenishmenttype_c_idx;
CREATE INDEX receiving_item_delivered_rece_eventtype_replenishmenttype_c_idx    
ON receiving_item_delivered_received USING btree    (eventtype  , 
replenishmenttype  , COALESCE(serial_no, ''::character varying)  )    ;-- 
Index: receiving_item_delivered_rece_serial_no_eventtype_replenish_idx
-- DROP INDEX receiving_item_delivered_rece_serial_no_eventtype_replenish_idx;
CREATE INDEX receiving_item_delivered_rece_serial_no_eventtype_replenish_idx    
ON receiving_item_delivered_received USING btree    (serial_no  , eventtype  , 
replenishmenttype  )        WHERE eventtype::text = 
'LineItemdetailsReceived'::text AND replenishmenttype::text = 
'DC2SWARRANTY'::text AND COALESCE(serial_no, ''::character varying)::text <> 
''::text;-- Index: 
receiving_item_delivered_recei_eventtype_replenishmenttype_idx1
-- DROP INDEX receiving_item_delivered_recei_eventtype_replenishmenttype_idx1;
CREATE INDEX receiving_item_delivered_recei_eventtype_replenishmenttype_idx1    
ON receiving_item_delivered_received USING btree    (eventtype  , 
replenishmenttype  )        WHERE eventtype::text = 
'LineItemdetailsReceived'::text AND replenishmenttype::text = 
'DC2SWARRANTY'::text;-- Index: 
receiving_item_delivered_receiv_eventtype_replenishmenttype_idx
-- DROP INDEX receiving_item_delivered_receiv_eventtype_replenishmenttype_idx;
CREATE INDEX receiving_item_delivered_receiv_eventtype_replenishmenttype_idx    
ON receiving_item_delivered_received USING btree    (eventtype  , 
replenishmenttype  )    ;-- Index: 
receiving_item_delivered_received_eventtyp

Re: Query performance issue

2020-09-04 Thread Tomas Vondra

On Fri, Sep 04, 2020 at 09:18:41PM +, Nagaraj Raj wrote:

I have a query which will more often run on DB and very slow and it is doing 
'seqscan'. I was trying to optimize it by adding indexes in different ways but 
nothing helps.
Any suggestions?



1) It's rather difficult to read the query plan as it's mangled by your
e-mail client. I recommend to check how to prevent the client from doing
that, or attaching the plan as a file.

2) The whole query takes ~3500ms, and the seqscan only accounts for
~200ms, so it's very clearly not the main issue.

3) Most of the time is spent in sort, so the one thing you can do is
either increasing work_mem, or adding index providing that ordering.
Even better if you include all necessary columns to allow IOS.


regards

--
Tomas Vondra  http://www.2ndQuadrant.com
PostgreSQL Development, 24x7 Support, Remote DBA, Training & Services




Re: Query performance issue

2020-09-04 Thread Michael Lewis
CREATE INDEX receiving_item_delivered_received
ON receiving_item_delivered_received USING btree ( eventtype,
replenishmenttype, serial_no, eventtime DESC );

>
More work_mem as Tomas suggests, but also, the above index should find the
candidate rows by the first two keys, and then be able to skip the sort by
reading just that portion of the index that matches

eventtype='LineItemdetailsReceived'
and replenishmenttype = 'DC2SWARRANTY'


Re: Query performance issue

2020-09-04 Thread Michael Lewis
Note- you may need to vacuum* the table to get full benefit of index only
scan by updating the visibility map. I think index only scan is skipped in
favor of just checking visibility when the visibility map is stale.

*NOT full


Re: Query performance issue

2020-09-04 Thread Nagaraj Raj
 Sorry, I have attached the wrong query planner, which executed in lower 
environment which has fewer resources:
Updated one,eVFiF | explain.depesz.com


| 
| 
|  | 
eVFiF | explain.depesz.com


 |

 |

 |



Thanks,RjOn Friday, September 4, 2020, 02:39:57 PM PDT, Michael Lewis 
 wrote:  
 
 CREATE INDEX receiving_item_delivered_received ON 
receiving_item_delivered_received USING btree ( eventtype, replenishmenttype, 
serial_no, eventtime DESC );
  

More work_mem as Tomas suggests, but also, the above index should find the 
candidate rows by the first two keys, and then be able to skip the sort by 
reading just that portion of the index that matches 

eventtype='LineItemdetailsReceived'and replenishmenttype = 'DC2SWARRANTY'
  

Re: Query performance issue

2020-09-04 Thread Michael Lewis
"Subquery Scan on rec (cost=1628601.89..1676580.92 rows=7381 width=41)
(actual time=22171.986..23549.079 rows=1236042 loops=1)" " Filter:
(rec.mpos = 1)" " Rows Removed by Filter: 228737" " Buffers: shared hit=45
read=1166951" " I/O Timings: read=29.530" " -> WindowAgg
(cost=1628601.89..1658127.45 rows=1476278 width=49) (actual
time=22171.983..23379.219 rows=1464779 loops=1)" " Buffers: shared hit=45
read=1166951" " I/O Timings: read=29.530" " -> Sort
(cost=1628601.89..1632292.58 rows=1476278 width=41) (actual
time=22171.963..22484.044 rows=1464779 loops=1)" " Sort Key:
receiving_item_delivered_received.serial_no,
receiving_item_delivered_received.eventtime DESC" " Sort Method: quicksort
Memory: 163589kB" " Buffers: shared hit=45 read=1166951" " I/O Timings:
read=29.530" " -> Gather (cost=1000.00..1477331.13 rows=1476278 width=41)
(actual time=1.296..10428.060 rows=1464779 loops=1)" " Workers Planned: 2" "
Workers Launched: 2" " Buffers: shared hit=39 read=1166951" " I/O Timings:
read=29.530" " -> Parallel Seq Scan on receiving_item_delivered_received
(cost=0.00..1328703.33 rows=615116 width=41) (actual time=1.262..10150.325
rows=488260 loops=3)" " Filter: (((COALESCE(serial_no, ''::character
varying))::text <> ''::text) AND ((eventtype)::text =
'LineItemdetailsReceived'::text) AND ((replenishmenttype)::text =
'DC2SWARRANTY'::text))" " Rows Removed by Filter: 6906258" " Buffers:
shared hit=39 read=1166951" " I/O Timings: read=29.530" "Planning Time:
0.375 ms" "Execution Time: 23617.348 ms"


That is doing a lot of reading from disk. What do you have shared_buffers
set to? I'd expect better cache hits unless it is quite low or this is a
query that differs greatly from the typical work.

Also, did you try adding the index I suggested? That lowest node has 488k
rows coming out of it after throwing away 6.9 million. I would expect an
index on only eventtype, replenishmenttype to be quite helpful. I don't
assume you have tons of rows where serial_no is null.


Re: Query performance issue

2020-09-04 Thread Nagaraj Raj
 Hi Mechel,
I added the index as you suggested and the planner going through the bitmap 
index scan,heap and the new planner is,HaOx | explain.depesz.com


| 
| 
|  | 
HaOx | explain.depesz.com


 |

 |

 |


Mem config: 
Aurora PostgreSQL 11.7 on x86_64-pc-linux-gnu, compiled by gcc (GCC) 4.9.3, 
64-bit
vCPU = 64RAM = 512show shared_buffers = 355 GBshow work_mem = 214 MB
show maintenance_work_mem = 8363MBshow effective_cache_size = 355 GB

Thanks,Rj
On Friday, September 4, 2020, 02:55:50 PM PDT, Michael Lewis 
 wrote:  
 
 "Subquery Scan on rec (cost=1628601.89..1676580.92 rows=7381 width=41) (actual 
time=22171.986..23549.079 rows=1236042 loops=1)"" Filter: (rec.mpos = 1)"" Rows 
Removed by Filter: 228737"" Buffers: shared hit=45 read=1166951"" I/O Timings: 
read=29.530"" -> WindowAgg (cost=1628601.89..1658127.45 rows=1476278 width=49) 
(actual time=22171.983..23379.219 rows=1464779 loops=1)"" Buffers: shared 
hit=45 read=1166951"" I/O Timings: read=29.530"" -> Sort 
(cost=1628601.89..1632292.58 rows=1476278 width=41) (actual 
time=22171.963..22484.044 rows=1464779 loops=1)"" Sort Key: 
receiving_item_delivered_received.serial_no, 
receiving_item_delivered_received.eventtime DESC"" Sort Method: quicksort 
Memory: 163589kB"" Buffers: shared hit=45 read=1166951"" I/O Timings: 
read=29.530"" -> Gather (cost=1000.00..1477331.13 rows=1476278 width=41) 
(actual time=1.296..10428.060 rows=1464779 loops=1)"" Workers Planned: 2"" 
Workers Launched: 2"" Buffers: shared hit=39 read=1166951"" I/O Timings: 
read=29.530"" -> Parallel Seq Scan on receiving_item_delivered_received 
(cost=0.00..1328703.33 rows=615116 width=41) (actual time=1.262..10150.325 
rows=488260 loops=3)"" Filter: (((COALESCE(serial_no, ''::character 
varying))::text <> ''::text) AND ((eventtype)::text = 
'LineItemdetailsReceived'::text) AND ((replenishmenttype)::text = 
'DC2SWARRANTY'::text))"" Rows Removed by Filter: 6906258"" Buffers: shared 
hit=39 read=1166951"" I/O Timings: read=29.530""Planning Time: 0.375 
ms""Execution Time: 23617.348 ms"

That is doing a lot of reading from disk. What do you have shared_buffers set 
to? I'd expect better cache hits unless it is quite low or this is a query that 
differs greatly from the typical work.
Also, did you try adding the index I suggested? That lowest node has 488k rows 
coming out of it after throwing away 6.9 million. I would expect an index on 
only eventtype, replenishmenttype to be quite helpful. I don't assume you have 
tons of rows where serial_no is null.