HI, this is the test case:Use stap track pg_clog/* fsync call. [root@digoal ~]# cat trc.stp global f_start[999999]
probe process("/opt/pgsql/bin/postgres").function("SlruPhysicalWritePage@/opt/soft_bak/postgresql-9.4.4/src/backend/access/transam/slru.c").call { f_start[execname(), pid(), tid(), cpu()] = gettimeofday_ms() printf("%s <- time:%d, pp:%s, par:%s\n", thread_indent(-1), gettimeofday_ms(), pp(), $$parms$$) # printf("%s -> time:%d, pp:%s\n", thread_indent(1), f_start[execname(), pid(), tid(), cpu()], pp() ) } probe process("/opt/pgsql/bin/postgres").function("SlruPhysicalWritePage@/opt/soft_bak/postgresql-9.4.4/src/backend/access/transam/slru.c").return { t=gettimeofday_ms() a=execname() b=cpu() c=pid() d=pp() e=tid() if (f_start[a,c,e,b]) { printf("%s <- time:%d, pp:%s, par:%s\n", thread_indent(-1), t - f_start[a,c,e,b], d, $return$$) # printf("%s <- time:%d, pp:%s\n", thread_indent(-1), t - f_start[a,c,e,b], d) } } probe process("/opt/pgsql/bin/postgres").function("OpenTransientFile@/opt/soft_bak/postgresql-9.4.4/src/backend/storage/file/fd.c").call { f_start[execname(), pid(), tid(), cpu()] = gettimeofday_ms() printf("%s <- time:%d, pp:%s, par:%s\n", thread_indent(-1), gettimeofday_ms(), pp(), $$parms$$) # printf("%s -> time:%d, pp:%s\n", thread_indent(1), f_start[execname(), pid(), tid(), cpu()], pp() ) } probe process("/opt/pgsql/bin/postgres").function("OpenTransientFile@/opt/soft_bak/postgresql-9.4.4/src/backend/storage/file/fd.c").return { t=gettimeofday_ms() a=execname() b=cpu() c=pid() d=pp() e=tid() if (f_start[a,c,e,b]) { printf("%s <- time:%d, pp:%s, par:%s\n", thread_indent(-1), t - f_start[a,c,e,b], d, $return$$) # printf("%s <- time:%d, pp:%s\n", thread_indent(-1), t - f_start[a,c,e,b], d) } } probe process("/opt/pgsql/bin/postgres").function("pg_fsync@/opt/soft_bak/postgresql-9.4.4/src/backend/storage/file/fd.c").call { f_start[execname(), pid(), tid(), cpu()] = gettimeofday_ms() printf("%s <- time:%d, pp:%s, par:%s\n", thread_indent(-1), gettimeofday_ms(), pp(), $$parms$$) # printf("%s -> time:%d, pp:%s\n", thread_indent(1), f_start[execname(), pid(), tid(), cpu()], pp() ) } probe process("/opt/pgsql/bin/postgres").function("pg_fsync@/opt/soft_bak/postgresql-9.4.4/src/backend/storage/file/fd.c").return { t=gettimeofday_ms() a=execname() b=cpu() c=pid() d=pp() e=tid() if (f_start[a,c,e,b]) { printf("%s <- time:%d, pp:%s, par:%s\n", thread_indent(-1), t - f_start[a,c,e,b], d, $return$$) # printf("%s <- time:%d, pp:%s\n", thread_indent(-1), t - f_start[a,c,e,b], d) } } Use pgbench like this: postgres@digoal-> cat 7.sql select txid_current(); postgres@digoal-> pgbench -M prepared -n -r -P 1 -f ./7.sql -c 1 -j 1 -T 100000 About 32K tps. progress: 240.0 s, 31164.4 tps, lat 0.031 ms stddev 0.183 progress: 241.0 s, 33243.3 tps, lat 0.029 ms stddev 0.127 progress: 242.0 s, 32567.3 tps, lat 0.030 ms stddev 0.179 progress: 243.0 s, 33656.6 tps, lat 0.029 ms stddev 0.038 progress: 244.0 s, 33948.1 tps, lat 0.029 ms stddev 0.021 progress: 245.0 s, 32996.8 tps, lat 0.030 ms stddev 0.046 progress: 246.0 s, 34156.7 tps, lat 0.029 ms stddev 0.015 progress: 247.0 s, 33259.5 tps, lat 0.029 ms stddev 0.074 progress: 248.0 s, 32979.6 tps, lat 0.030 ms stddev 0.043 progress: 249.0 s, 32892.6 tps, lat 0.030 ms stddev 0.039 progress: 250.0 s, 33090.7 tps, lat 0.029 ms stddev 0.020 progress: 251.0 s, 33238.3 tps, lat 0.029 ms stddev 0.017 progress: 252.0 s, 32341.3 tps, lat 0.030 ms stddev 0.045 progress: 253.0 s, 31999.0 tps, lat 0.030 ms stddev 0.167 progress: 254.0 s, 33332.6 tps, lat 0.029 ms stddev 0.056 progress: 255.0 s, 30394.6 tps, lat 0.032 ms stddev 0.027 progress: 256.0 s, 31862.7 tps, lat 0.031 ms stddev 0.023 progress: 257.0 s, 31574.0 tps, lat 0.031 ms stddev 0.112 Trace the PID: postgres@digoal-> ps -ewf|grep postgres postgres 2921 1883 29 09:37 pts/1 00:00:05 pgbench -M prepared -n -r -P 1 -f ./7.sql -c 1 -j 1 -T 100000 postgres 2924 1841 66 09:37 ? 00:00:13 postgres: postgres postgres [local] SELECT [root@digoal ~]# stap -vp 5 -DMAXSKIPPED=9999999 -DSTP_NO_OVERLOAD -DMAXTRYLOCK=100 ./trc.stp -x 2924 >./stap.log 2>&1 0 postgres(2924): -> time:1441503927731, pp:process("/opt/pgsql9.4.4/bin/postgres").function("SlruPhysicalWritePage@/opt/soft_bak/postgresql-9.4.4/src/backend/access/transam/slru.c:699").call, par:ctl={.shared=0x7f74a9fe39c0, .do_fsync='\001', .PagePrecedes=0x4b1960, .Dir="pg_clog"} pageno=12350 slotno=10 fdata=ERROR 31 postgres(2924): -> time:1441503927731, pp:process("/opt/pgsql9.4.4/bin/postgres").function("OpenTransientFile@/opt/soft_bak/postgresql-9.4.4/src/backend/storage/file/fd.c:1710").call, par:fileName="pg_clog/0181" fileFlags=66 fileMode=384 53 postgres(2924): <- time:0, pp:process("/opt/pgsql9.4.4/bin/postgres").function("OpenTransientFile@/opt/soft_bak/postgresql-9.4.4/src/backend/storage/file/fd.c:1710").return, par:14 102 postgres(2924): -> time:1441503927731, pp:process("/opt/pgsql9.4.4/bin/postgres").function("pg_fsync@/opt/soft_bak/postgresql-9.4.4/src/backend/storage/file/fd.c:315").call, par:fd=14 1096 postgres(2924): <- time:1, pp:process("/opt/pgsql9.4.4/bin/postgres").function("pg_fsync@/opt/soft_bak/postgresql-9.4.4/src/backend/storage/file/fd.c:315").return, par:0 1113 postgres(2924): <- time:1, pp:process("/opt/pgsql9.4.4/bin/postgres").function("SlruPhysicalWritePage@/opt/soft_bak/postgresql-9.4.4/src/backend/access/transam/slru.c:699").return, par:'\001' 1105302 postgres(2924): -> time:1441503928836, pp:process("/opt/pgsql9.4.4/bin/postgres").function("SlruPhysicalWritePage@/opt/soft_bak/postgresql-9.4.4/src/backend/access/transam/slru.c:699").call, par:ctl={.shared=0x7f74a9fe39c0, .do_fsync='\001', .PagePrecedes=0x4b1960, .Dir="pg_clog"} pageno=12351 slotno=11 fdata=ERROR 1105329 postgres(2924): -> time:1441503928836, pp:process("/opt/pgsql9.4.4/bin/postgres").function("OpenTransientFile@/opt/soft_bak/postgresql-9.4.4/src/backend/storage/file/fd.c:1710").call, par:fileName="pg_clog/0181" fileFlags=66 fileMode=384 1105348 postgres(2924): <- time:0, pp:process("/opt/pgsql9.4.4/bin/postgres").function("OpenTransientFile@/opt/soft_bak/postgresql-9.4.4/src/backend/storage/file/fd.c:1710").return, par:14 1105405 postgres(2924): -> time:1441503928836, pp:process("/opt/pgsql9.4.4/bin/postgres").function("pg_fsync@/opt/soft_bak/postgresql-9.4.4/src/backend/storage/file/fd.c:315").call, par:fd=14 1106440 postgres(2924): <- time:1, pp:process("/opt/pgsql9.4.4/bin/postgres").function("pg_fsync@/opt/soft_bak/postgresql-9.4.4/src/backend/storage/file/fd.c:315").return, par:0 1106452 postgres(2924): <- time:1, pp:process("/opt/pgsql9.4.4/bin/postgres").function("SlruPhysicalWritePage@/opt/soft_bak/postgresql-9.4.4/src/backend/access/transam/slru.c:699").return, par:'\001' 2087891 postgres(2924): -> time:1441503929819, pp:process("/opt/pgsql9.4.4/bin/postgres").function("SlruPhysicalWritePage@/opt/soft_bak/postgresql-9.4.4/src/backend/access/transam/slru.c:699").call, par:ctl={.shared=0x7f74a9fe39c0, .do_fsync='\001', .PagePrecedes=0x4b1960, .Dir="pg_clog"} pageno=12352 slotno=12 fdata=ERROR 2087917 postgres(2924): -> time:1441503929819, pp:process("/opt/pgsql9.4.4/bin/postgres").function("OpenTransientFile@/opt/soft_bak/postgresql-9.4.4/src/backend/storage/file/fd.c:1710").call, par:fileName="pg_clog/0182" fileFlags=66 fileMode=384 2087958 postgres(2924): <- time:0, pp:process("/opt/pgsql9.4.4/bin/postgres").function("OpenTransientFile@/opt/soft_bak/postgresql-9.4.4/src/backend/storage/file/fd.c:1710").return, par:14 2088013 postgres(2924): -> time:1441503929819, pp:process("/opt/pgsql9.4.4/bin/postgres").function("pg_fsync@/opt/soft_bak/postgresql-9.4.4/src/backend/storage/file/fd.c:315").call, par:fd=14 2089250 postgres(2924): <- time:1, pp:process("/opt/pgsql9.4.4/bin/postgres").function("pg_fsync@/opt/soft_bak/postgresql-9.4.4/src/backend/storage/file/fd.c:315").return, par:0 2089265 postgres(2924): <- time:1, pp:process("/opt/pgsql9.4.4/bin/postgres").function("SlruPhysicalWritePage@/opt/soft_bak/postgresql-9.4.4/src/backend/access/transam/slru.c:699").return, par:'\001' fsync interval about 1 sec, every 32KB xacts, per CLOG PAGE. postgres=# select 1441503928836-1441503927731; ?column? ---------- 1105 (1 row) postgres=# select 1441503929819-1441503928836; ?column? ---------- 983 (1 row) ------------------------------------------------------------------发件人:Amit Kapila <amit.kapil...@gmail.com>发送时间:2015年9月8日(星期二) 15:37收件人:周正中(德歌) <dege....@alibaba-inc.com>抄 送:Andres Freund <and...@anarazel.de>,张广舟(明虚) <guangzhou....@alibaba-inc.com>,pgsql-hackers <pgsql-hackers@postgresql.org>,范孝剑(康贤) <funnyxj....@alibaba-inc.com>,曾文旌(义从) <wenjing....@alibaba-inc.com>,窦贤明(执白) <xianming....@alibaba-inc.com>,萧少聪(铁庵) <shaocong....@alibaba-inc.com>,陈新坚(惧留孙) <xinjian.c...@alibaba-inc.com>主 题:Re: [HACKERS] 答复:[HACKERS] about fsync in CLOG buffer write On Sun, Sep 6, 2015 at 7:34 AM, 周正中(德歌) <dege....@alibaba-inc.com> wrote: >On 2015-09-10 19:39:59 +0800, 张广舟(明虚) wrote: >> We found there is a fsync call when CLOG buffer >> is written out in SlruPhysicalWritePage(). It is often called when a backend >> needs to check transaction status with SimpleLruReadPage(). >That's when there's not enough buffers available some other, and your >case dirty, needs to be written out. >You could try increasing the max (32) in CLOGShmemBuffers() further.I think >increasing CLOGShmemBuffers have no help for our case, because PG will call pg_fsync once for every CLOG PAGE(BLOCKSZ) when ExtendCLOG。 Ideally that should not happen every time, CLOG page is written onlywhen there is no non-dirty CLOG buffer to accommodate a new pageduring ExtendCLOG. I think it will happen only if there is no flush ofCLOG pages by Checkpoint or by any other process trying to committhe transaction during last 32 pages worth of transactions (which is4 (TRANS_PER_BYTE) * 8192 (BLKSIZE) * 32 (CLOG_BUFFERS)). What kind of workload you are using, can you once try by commentingthe sync call and see if there is any gain (I understand that is not right,but at least we will come to know how much we can save by moving sucha sync to checkpoint). With Regards, Amit Kapila. EnterpriseDB: http://www.enterprisedb.com