On 24/08/16 16:52, Mark Kirkwood wrote:
On 24/08/16 16:33, Amit Kapila wrote:
On Wed, Aug 24, 2016 at 9:53 AM, Mark Kirkwood
<mark.kirkw...@catalyst.net.nz> wrote:
On 24/08/16 15:36, Amit Kapila wrote:
On Wed, Aug 24, 2016 at 8:54 AM, Mark Kirkwood
<mark.kirkw...@catalyst.net.nz> wrote:
Can you get the call stacks?

For every stuck backend? (just double checking)...
Yeah.


Cool,

I managed to reproduce with a reduced workload of 4 backends, then noticed that the traces for 3 of 'em were all the same. So I've attached the 2 unique ones, plus noted what pg_stat_activity thought the wait event was, in case that is useful.

...actually I was wrong there, only 2 of them were the same. So I've attached a new log of bt's of them all.


$ pgbench -c 4 -T600 bench

postgres=# SELECT pid, state,now()-xact_start AS 
wait,wait_event_type,wait_event,query FROM pg_stat_activity WHERE 
datname='bench' ORDER BY wait DESC;  pid  | state  |      wait       | 
wait_event_type |   wait_event   |                                              
       query                                   
                   
-------+--------+-----------------+-----------------+----------------+---------------------------------------------------------------------------------------------
-------------------
 16549 | active | 00:06:00.252146 | LWLockTranche   | buffer_content | INSERT 
INTO pgbench_history (tid, bid, aid, delta, mtime) VALUES (190, 24, 9513890, 
-2566, C
URRENT_TIMESTAMP);
 16548 | active | 00:06:00.235003 | LWLockTranche   | buffer_content | INSERT 
INTO pgbench_history (tid, bid, aid, delta, mtime) VALUES (445, 3, 5688076, 
1021, CUR
RENT_TIMESTAMP);
 16547 | active | 00:06:00.218032 | LWLockTranche   | buffer_content | INSERT 
INTO pgbench_history (tid, bid, aid, delta, mtime) VALUES (803, 97, 6871273, 
-35, CUR
RENT_TIMESTAMP);
 16546 | active | 00:06:00.192543 | Lock            | transactionid  | UPDATE 
pgbench_branches SET bbalance = bbalance + -2823 WHERE bid = 3;
(4 rows)

$ gdb 16546
(gdb) bt
#0  0x00007f51daa3a133 in __epoll_wait_nocancel () at 
../sysdeps/unix/syscall-template.S:84
#1  0x00000000006abdb6 in WaitEventSetWaitBlock (nevents=1, 
occurred_events=0x7fffd197d3a0, cur_timeout=-1, set=0xdc9288)
    at latch.c:987
#2  WaitEventSetWait (set=set@entry=0xdc9288, timeout=timeout@entry=-1, 
occurred_events=occurred_events@entry=0x7fffd197d3a0, 
    nevents=nevents@entry=1) at latch.c:941
#3  0x00000000006ac1ca in WaitLatchOrSocket (latch=0x7f51d9fd969c, 
wakeEvents=wakeEvents@entry=1, sock=sock@entry=-1, 
    timeout=-1, timeout@entry=0) at latch.c:347
#4  0x00000000006ac27d in WaitLatch (latch=<optimized out>, 
wakeEvents=wakeEvents@entry=1, timeout=timeout@entry=0)
    at latch.c:302
#5  0x00000000006bb5e3 in ProcSleep (locallock=locallock@entry=0xd3d690, 
    lockMethodTable=lockMethodTable@entry=0x9149a0 <default_lockmethod>) at 
proc.c:1219
#6  0x00000000006b639d in WaitOnLock (locallock=locallock@entry=0xd3d690, 
owner=owner@entry=0xdc4cb0) at lock.c:1703
#7  0x00000000006b7a77 in LockAcquireExtended 
(locktag=locktag@entry=0x7fffd197d700, lockmode=lockmode@entry=5, 
    sessionLock=sessionLock@entry=0 '\000', dontWait=dontWait@entry=0 '\000', 
reportMemoryError=reportMemoryError@entry=1 '\001')
    at lock.c:998
#8  0x00000000006b7d51 in LockAcquire (locktag=locktag@entry=0x7fffd197d700, 
lockmode=lockmode@entry=5, 
    sessionLock=sessionLock@entry=0 '\000', dontWait=dontWait@entry=0 '\000') 
at lock.c:688
#9  0x00000000006b58fe in XactLockTableWait (xid=xid@entry=7667, 
rel=rel@entry=0x7f51db5c6398, ctid=ctid@entry=0x7fffd197d844, 
    oper=oper@entry=XLTW_Update) at lmgr.c:587
#10 0x00000000004a5d29 in heap_update (relation=relation@entry=0x7f51db5c6398, 
otid=otid@entry=0x7fffd197d9e0, 
    newtup=newtup@entry=0xd4b3d0, cid=2, crosscheck=0x0, wait=wait@entry=1 
'\001', hufd=0x7fffd197d8f0, lockmode=0x7fffd197d8ec)
    at heapam.c:3755
#11 0x00000000005dbeaa in ExecUpdate (tupleid=tupleid@entry=0x7fffd197d9e0, 
oldtuple=oldtuple@entry=0x0, 
    slot=slot@entry=0xd4adb0, planSlot=planSlot@entry=0xdc8ea0, 
epqstate=epqstate@entry=0xdc76c8, estate=estate@entry=0xdc73d0, 
    canSetTag=1 '\001') at nodeModifyTable.c:922
#12 0x00000000005dc633 in ExecModifyTable (node=node@entry=0xdc7620) at 
nodeModifyTable.c:1501
#13 0x00000000005c3948 in ExecProcNode (node=node@entry=0xdc7620) at 
execProcnode.c:396
#14 0x00000000005bfdbf in ExecutePlan (dest=0xdc9e60, direction=<optimized 
out>, numberTuples=0, sendTuples=<optimized out>, 
    operation=CMD_UPDATE, use_parallel_mode=<optimized out>, 
planstate=0xdc7620, estate=0xdc73d0) at execMain.c:1567
#15 standard_ExecutorRun (queryDesc=0xdc6fc0, direction=<optimized out>, 
count=0) at execMain.c:338
#16 0x00000000006ce669 in ProcessQuery (plan=<optimized out>, 
    sourceText=0xd8ff70 "UPDATE pgbench_branches SET bbalance = bbalance + 
-2823 WHERE bid = 3;", params=0x0, dest=0xdc9e60, 
    completionTag=0x7fffd197dfc0 "") at pquery.c:187
#17 0x00000000006ce89c in PortalRunMulti (portal=portal@entry=0xd35000, 
isTopLevel=isTopLevel@entry=1 '\001', 
    setHoldSnapshot=setHoldSnapshot@entry=0 '\000', dest=dest@entry=0xdc9e60, 
altdest=altdest@entry=0xdc9e60, 
    completionTag=completionTag@entry=0x7fffd197dfc0 "") at pquery.c:1303
#18 0x00000000006cf34e in PortalRun (portal=portal@entry=0xd35000, 
count=count@entry=9223372036854775807, 
    isTopLevel=isTopLevel@entry=1 '\001', dest=dest@entry=0xdc9e60, 
altdest=altdest@entry=0xdc9e60, 
    completionTag=completionTag@entry=0x7fffd197dfc0 "") at pquery.c:815
#19 0x00000000006cc45b in exec_simple_query (
    query_string=0xd8ff70 "UPDATE pgbench_branches SET bbalance = bbalance + 
-2823 WHERE bid = 3;") at postgres.c:1094
#20 PostgresMain (argc=<optimized out>, argv=argv@entry=0xd16a20, 
dbname=0xd16950 "bench", username=<optimized out>)
    at postgres.c:4074
#21 0x000000000046d509 in BackendRun (port=0xd396e0) at postmaster.c:4262
#22 BackendStartup (port=0xd396e0) at postmaster.c:3936
#23 ServerLoop () at postmaster.c:1693
#24 0x000000000066fb91 in PostmasterMain (argc=argc@entry=1, 
argv=argv@entry=0xd15b50) at postmaster.c:1301
#25 0x000000000046e2b0 in main (argc=1, argv=0xd15b50) at main.c:228


$ gdb 16547
(gdb) bt
#0  0x00007f51daa3b387 in semop () at ../sysdeps/unix/syscall-template.S:84
#1  0x0000000000660331 in PGSemaphoreLock (sema=sema@entry=0x7f51d9fd9368) at 
pg_sema.c:387
#2  0x00000000006bde9b in LWLockAcquire (lock=0x7f51d1a29da4, 
mode=LW_EXCLUSIVE) at lwlock.c:1288
#3  0x00000000004978a4 in _hash_doinsert (rel=rel@entry=0x7f51db5c9308, 
itup=itup@entry=0xdc90d8) at hashinsert.c:104
#4  0x000000000049617f in hashinsert (rel=0x7f51db5c9308, values=<optimized 
out>, isnull=<optimized out>, ht_ctid=0xdc8f9c, 
    heapRel=<optimized out>, checkUnique=<optimized out>) at hash.c:247
#5  0x00000000005bf060 in ExecInsertIndexTuples (slot=slot@entry=0xdc7a90, 
tupleid=tupleid@entry=0xdc8f9c, 
    estate=estate@entry=0xdc73d0, noDupErr=noDupErr@entry=0 '\000', 
specConflict=specConflict@entry=0x0, 
    arbiterIndexes=arbiterIndexes@entry=0x0) at execIndexing.c:388
#6  0x00000000005dd011 in ExecInsert (canSetTag=1 '\001', estate=0xdc73d0, 
onconflict=<optimized out>, arbiterIndexes=0x0, 
    planSlot=0xdc7a90, slot=0xdc7a90, mtstate=0xdc7620) at nodeModifyTable.c:481
#7  ExecModifyTable (node=node@entry=0xdc7620) at nodeModifyTable.c:1496
#8  0x00000000005c3948 in ExecProcNode (node=node@entry=0xdc7620) at 
execProcnode.c:396
#9  0x00000000005bfdbf in ExecutePlan (dest=0xdb7bf8, direction=<optimized 
out>, numberTuples=0, sendTuples=<optimized out>, 
    operation=CMD_INSERT, use_parallel_mode=<optimized out>, 
planstate=0xdc7620, estate=0xdc73d0) at execMain.c:1567
#10 standard_ExecutorRun (queryDesc=0xdc6fc0, direction=<optimized out>, 
count=0) at execMain.c:338
#11 0x00000000006ce669 in ProcessQuery (plan=<optimized out>, 
    sourceText=0xd8ff70 "INSERT INTO pgbench_history (tid, bid, aid, delta, 
mtime) VALUES (803, 97, 6871273, -35, CURRENT_TIMESTAMP);", params=0x0, 
dest=0xdb7bf8, completionTag=0x7fffd197dfc0 "") at pquery.c:187
#12 0x00000000006ce89c in PortalRunMulti (portal=portal@entry=0xd35000, 
isTopLevel=isTopLevel@entry=1 '\001', 
    setHoldSnapshot=setHoldSnapshot@entry=0 '\000', dest=dest@entry=0xdb7bf8, 
altdest=altdest@entry=0xdb7bf8, 
    completionTag=completionTag@entry=0x7fffd197dfc0 "") at pquery.c:1303
#13 0x00000000006cf34e in PortalRun (portal=portal@entry=0xd35000, 
count=count@entry=9223372036854775807, 
    isTopLevel=isTopLevel@entry=1 '\001', dest=dest@entry=0xdb7bf8, 
altdest=altdest@entry=0xdb7bf8, 
    completionTag=completionTag@entry=0x7fffd197dfc0 "") at pquery.c:815
#14 0x00000000006cc45b in exec_simple_query (
    query_string=0xd8ff70 "INSERT INTO pgbench_history (tid, bid, aid, delta, 
mtime) VALUES (803, 97, 6871273, -35, CURRENT_TIMESTAMP);") at postgres.c:1094
#15 PostgresMain (argc=<optimized out>, argv=argv@entry=0xd16a20, 
dbname=0xd16950 "bench", username=<optimized out>)
    at postgres.c:4074
#16 0x000000000046d509 in BackendRun (port=0xd396e0) at postmaster.c:4262
#17 BackendStartup (port=0xd396e0) at postmaster.c:3936
#18 ServerLoop () at postmaster.c:1693
#19 0x000000000066fb91 in PostmasterMain (argc=argc@entry=1, 
argv=argv@entry=0xd15b50) at postmaster.c:1301
#20 0x000000000046e2b0 in main (argc=1, argv=0xd15b50) at main.c:228


$ gdb 16548
(gdb) bt
#0  0x00007f51daa3b387 in semop () at ../sysdeps/unix/syscall-template.S:84
#1  0x0000000000660331 in PGSemaphoreLock (sema=sema@entry=0x7f51d9fd9040) at 
pg_sema.c:387
#2  0x00000000006bde9b in LWLockAcquire (lock=0x7f51d1a29da4, 
mode=LW_EXCLUSIVE) at lwlock.c:1288
#3  0x00000000004978a4 in _hash_doinsert (rel=rel@entry=0x7f51db5c9308, 
itup=itup@entry=0xdc90d8) at hashinsert.c:104
#4  0x000000000049617f in hashinsert (rel=0x7f51db5c9308, values=<optimized 
out>, isnull=<optimized out>, ht_ctid=0xdc8f9c, 
    heapRel=<optimized out>, checkUnique=<optimized out>) at hash.c:247
#5  0x00000000005bf060 in ExecInsertIndexTuples (slot=slot@entry=0xdc7a90, 
tupleid=tupleid@entry=0xdc8f9c, 
    estate=estate@entry=0xdc73d0, noDupErr=noDupErr@entry=0 '\000', 
specConflict=specConflict@entry=0x0, 
    arbiterIndexes=arbiterIndexes@entry=0x0) at execIndexing.c:388
#6  0x00000000005dd011 in ExecInsert (canSetTag=1 '\001', estate=0xdc73d0, 
onconflict=<optimized out>, arbiterIndexes=0x0, 
    planSlot=0xdc7a90, slot=0xdc7a90, mtstate=0xdc7620) at nodeModifyTable.c:481
#7  ExecModifyTable (node=node@entry=0xdc7620) at nodeModifyTable.c:1496
#8  0x00000000005c3948 in ExecProcNode (node=node@entry=0xdc7620) at 
execProcnode.c:396
#9  0x00000000005bfdbf in ExecutePlan (dest=0xdb7bf8, direction=<optimized 
out>, numberTuples=0, sendTuples=<optimized out>, 
    operation=CMD_INSERT, use_parallel_mode=<optimized out>, 
planstate=0xdc7620, estate=0xdc73d0) at execMain.c:1567
#10 standard_ExecutorRun (queryDesc=0xdc6fc0, direction=<optimized out>, 
count=0) at execMain.c:338
#11 0x00000000006ce669 in ProcessQuery (plan=<optimized out>, 
    sourceText=0xd8ff70 "INSERT INTO pgbench_history (tid, bid, aid, delta, 
mtime) VALUES (445, 3, 5688076, 1021, CURRENT_TIMESTAMP);", params=0x0, 
dest=0xdb7bf8, completionTag=0x7fffd197dfc0 "") at pquery.c:187
#12 0x00000000006ce89c in PortalRunMulti (portal=portal@entry=0xd35000, 
isTopLevel=isTopLevel@entry=1 '\001', 
    setHoldSnapshot=setHoldSnapshot@entry=0 '\000', dest=dest@entry=0xdb7bf8, 
altdest=altdest@entry=0xdb7bf8, 
    completionTag=completionTag@entry=0x7fffd197dfc0 "") at pquery.c:1303
#13 0x00000000006cf34e in PortalRun (portal=portal@entry=0xd35000, 
count=count@entry=9223372036854775807, 
    isTopLevel=isTopLevel@entry=1 '\001', dest=dest@entry=0xdb7bf8, 
altdest=altdest@entry=0xdb7bf8, 
    completionTag=completionTag@entry=0x7fffd197dfc0 "") at pquery.c:815
#14 0x00000000006cc45b in exec_simple_query (
    query_string=0xd8ff70 "INSERT INTO pgbench_history (tid, bid, aid, delta, 
mtime) VALUES (445, 3, 5688076, 1021, CURRENT_TIMESTAMP);") at postgres.c:1094
#15 PostgresMain (argc=<optimized out>, argv=argv@entry=0xd16a20, 
dbname=0xd16950 "bench", username=<optimized out>)
    at postgres.c:4074
#16 0x000000000046d509 in BackendRun (port=0xd396e0) at postmaster.c:4262
#17 BackendStartup (port=0xd396e0) at postmaster.c:3936
#18 ServerLoop () at postmaster.c:1693
#19 0x000000000066fb91 in PostmasterMain (argc=argc@entry=1, 
argv=argv@entry=0xd15b50) at postmaster.c:1301
#20 0x000000000046e2b0 in main (argc=1, argv=0xd15b50) at main.c:228


$ gdb 16549
(gdb) bt
#0  0x00007f51daa3b387 in semop () at ../sysdeps/unix/syscall-template.S:84
#1  0x0000000000660331 in PGSemaphoreLock (sema=sema@entry=0x7f51d9fd8d18) at 
pg_sema.c:387
#2  0x00000000006bde9b in LWLockAcquire (lock=0x7f51d1a29de4, 
mode=LW_EXCLUSIVE) at lwlock.c:1288
#3  0x000000000049a625 in _hash_getbuf_with_strategy 
(rel=rel@entry=0x7f51db5c9308, blkno=blkno@entry=13, access=access@entry=2, 
    flags=flags@entry=1, bstrategy=bstrategy@entry=0x0) at hashpage.c:252
#4  0x0000000000498040 in _hash_freeovflpage (rel=rel@entry=0x7f51db5c9308, 
bucketbuf=bucketbuf@entry=6885, 
    ovflbuf=ovflbuf@entry=10782, wbuf=wbuf@entry=6886, 
itups=itups@entry=0x7fffd1979b90, 
    itup_offsets=itup_offsets@entry=0xdc97f0, tups_size=0x7fffd197a850, 
nitups=43, bstrategy=0x0) at hashovfl.c:517
#5  0x0000000000499985 in _hash_squeezebucket (rel=rel@entry=0x7f51db5c9308, 
bucket=bucket@entry=7, 
    bucket_blkno=bucket_blkno@entry=11, bucket_buf=bucket_buf@entry=6885, 
bstrategy=bstrategy@entry=0x0) at hashovfl.c:1010
#6  0x0000000000496caf in hashbucketcleanup (rel=rel@entry=0x7f51db5c9308, 
bucket_buf=bucket_buf@entry=6885, 
    bucket_blkno=bucket_blkno@entry=11, bstrategy=bstrategy@entry=0x0, 
maxbucket=22, highmask=31, lowmask=15, 
    tuples_removed=0x0, num_index_tuples=0x0, bucket_has_garbage=1 '\001', 
delay=0 '\000', callback=0x0, callback_state=0x0)
    at hash.c:937
#7  0x000000000049b353 in _hash_expandtable (rel=rel@entry=0x7f51db5c9308, 
metabuf=metabuf@entry=2459) at hashpage.c:785
#8  0x0000000000497bae in _hash_doinsert (rel=rel@entry=0x7f51db5c9308, 
itup=itup@entry=0xdc90d8) at hashinsert.c:313
#9  0x000000000049617f in hashinsert (rel=0x7f51db5c9308, values=<optimized 
out>, isnull=<optimized out>, ht_ctid=0xdc8f9c, 
    heapRel=<optimized out>, checkUnique=<optimized out>) at hash.c:247
#10 0x00000000005bf060 in ExecInsertIndexTuples (slot=slot@entry=0xdc7a90, 
tupleid=tupleid@entry=0xdc8f9c, 
    estate=estate@entry=0xdc73d0, noDupErr=noDupErr@entry=0 '\000', 
specConflict=specConflict@entry=0x0, 
    arbiterIndexes=arbiterIndexes@entry=0x0) at execIndexing.c:388
#11 0x00000000005dd011 in ExecInsert (canSetTag=1 '\001', estate=0xdc73d0, 
onconflict=<optimized out>, arbiterIndexes=0x0, 
    planSlot=0xdc7a90, slot=0xdc7a90, mtstate=0xdc7620) at nodeModifyTable.c:481
#12 ExecModifyTable (node=node@entry=0xdc7620) at nodeModifyTable.c:1496
#13 0x00000000005c3948 in ExecProcNode (node=node@entry=0xdc7620) at 
execProcnode.c:396
#14 0x00000000005bfdbf in ExecutePlan (dest=0xdb7bf8, direction=<optimized 
out>, numberTuples=0, sendTuples=<optimized out>, 
    operation=CMD_INSERT, use_parallel_mode=<optimized out>, 
planstate=0xdc7620, estate=0xdc73d0) at execMain.c:1567
#15 standard_ExecutorRun (queryDesc=0xdc6fc0, direction=<optimized out>, 
count=0) at execMain.c:338
#16 0x00000000006ce669 in ProcessQuery (plan=<optimized out>, 
    sourceText=0xd8ff70 "INSERT INTO pgbench_history (tid, bid, aid, delta, 
mtime) VALUES (190, 24, 9513890, -2566, CURRENT_TIMESTAMP);", params=0x0, 
dest=0xdb7bf8, completionTag=0x7fffd197dfc0 "") at pquery.c:187
#17 0x00000000006ce89c in PortalRunMulti (portal=portal@entry=0xd35000, 
isTopLevel=isTopLevel@entry=1 '\001', 
    setHoldSnapshot=setHoldSnapshot@entry=0 '\000', dest=dest@entry=0xdb7bf8, 
altdest=altdest@entry=0xdb7bf8, 
    completionTag=completionTag@entry=0x7fffd197dfc0 "") at pquery.c:1303
#18 0x00000000006cf34e in PortalRun (portal=portal@entry=0xd35000, 
count=count@entry=9223372036854775807, 
    isTopLevel=isTopLevel@entry=1 '\001', dest=dest@entry=0xdb7bf8, 
altdest=altdest@entry=0xdb7bf8, 
    completionTag=completionTag@entry=0x7fffd197dfc0 "") at pquery.c:815
#19 0x00000000006cc45b in exec_simple_query (
    query_string=0xd8ff70 "INSERT INTO pgbench_history (tid, bid, aid, delta, 
mtime) VALUES (190, 24, 9513890, -2566, CURRENT_TIMESTAMP);") at postgres.c:1094
#20 PostgresMain (argc=<optimized out>, argv=argv@entry=0xd16a20, 
dbname=0xd16950 "bench", username=<optimized out>)
    at postgres.c:4074
#21 0x000000000046d509 in BackendRun (port=0xd396e0) at postmaster.c:4262
#22 BackendStartup (port=0xd396e0) at postmaster.c:3936
#23 ServerLoop () at postmaster.c:1693
#24 0x000000000066fb91 in PostmasterMain (argc=argc@entry=1, 
argv=argv@entry=0xd15b50) at postmaster.c:1301
#25 0x000000000046e2b0 in main (argc=1, argv=0xd15b50) at main.c:228

-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

Reply via email to