Hi,
On 2017-03-30 13:10:41 +1100, Haribabu Kommi wrote:
> diff --git a/src/backend/access/transam/xlog.c
> b/src/backend/access/transam/xlog.c
> index 5d58f09..a29c108 100644
> --- a/src/backend/access/transam/xlog.c
> +++ b/src/backend/access/transam/xlog.c
> @@ -600,6 +600,9 @@ typedef struct XLogCtlData
> */
> XLogwrtResult LogwrtResult;
>
> + /* Protected by WALWriteLock */
> + PgStat_WalWritesCounts stats;
> /*
> + * Check whether the current process is a normal backend or not.
> + * This function checks for the background processes that does
> + * some WAL write activity only and other background processes
> + * are not considered. It considers all the background workers
> + * as WAL write activity workers.
> + *
> + * Returns FALSE - when the current process is a normal backend
> + * TRUE - when the current process a background process/worker
> + */
I don't think we commonly capitalize true/false these days.
> +static bool
> +am_background_process()
> +{
> + /* check whether current process is a background process/worker? */
> + if (AmBackgroundWriterProcess() ||
> + AmWalWriterProcess() ||
> + AmCheckpointerProcess() ||
> + AmStartupProcess() ||
> + IsBackgroundWorker ||
> + am_walsender ||
> + am_autovacuum_worker)
> + {
> + return true;
> + }
> +
> + return false;
> +}
Uhm, wouldn't inverting this be a lot easier and future proof? There's
far fewer non-background processes.
> +/*
> * Write and/or fsync the log at least as far as WriteRqst indicates.
> *
> * If flexible == TRUE, we don't have to write as far as WriteRqst, but
> @@ -2341,6 +2377,9 @@ XLogWrite(XLogwrtRqst WriteRqst, bool flexible)
> int npages;
> int startidx;
> uint32 startoffset;
> + instr_time io_start,
> + io_time;
> + bool is_background_process = am_background_process();
>
> /* We should always be inside a critical section here */
> Assert(CritSectionCount > 0);
> @@ -2458,6 +2497,11 @@ XLogWrite(XLogwrtRqst WriteRqst, bool flexible)
> /* OK to write the page(s) */
> from = XLogCtl->pages + startidx * (Size) XLOG_BLCKSZ;
> nbytes = npages * (Size) XLOG_BLCKSZ;
> +
> + /* Start timer to acquire start time of the wal write */
> + if (track_io_timing)
> + INSTR_TIME_SET_CURRENT(io_start);
> +
I'm more than a bit hesitant adding overhead to WAL writing - it's
already quite a bit of a bottleneck. Normally track_io_timing just
makes things a tiny bit slower, but doesn't cause a scalability issue,
here it does. This is all done under an already highly contended lock.
> nleft = nbytes;
> do
> {
> @@ -2480,6 +2524,34 @@ XLogWrite(XLogwrtRqst WriteRqst, bool flexible)
> from += written;
> } while (nleft > 0);
>
> + /* calculate the total time spent for wal writing */
> + if (track_io_timing)
> + {
> + INSTR_TIME_SET_CURRENT(io_time);
> + INSTR_TIME_SUBTRACT(io_time, io_start);
> +
> + if (is_background_process)
> + XLogCtl->stats.total_write_time +=
> INSTR_TIME_GET_MILLISEC(io_time);
> + else
> + XLogCtl->stats.backend_total_write_time
> += INSTR_TIME_GET_MILLISEC(io_time);
> + }
> + else
> + {
> + XLogCtl->stats.total_write_time = 0;
> + XLogCtl->stats.backend_total_write_time = 0;
> + }
> +
> + /* check whether writer is a normal backend or not? */
> + if (is_background_process)
> + XLogCtl->stats.writes++;
> + else
> + XLogCtl->stats.backend_writes++;
> +
> + if (is_background_process)
> + XLogCtl->stats.write_blocks += npages;
> + else
> + XLogCtl->stats.backend_write_blocks += npages;
> +
> /* Update state for write */
> openLogOff += nbytes;
> npages = 0;
> @@ -2499,8 +2571,29 @@ XLogWrite(XLogwrtRqst WriteRqst, bool flexible)
> */
> if (finishing_seg)
> {
> + /* Start timer to acquire start time of the wal
> sync */
> + if (track_io_timing)
> + INSTR_TIME_SET_CURRENT(io_start);
> +
> issue_xlog_fsync(openLogFile, openLogSegNo);
>
> + /* calculate the total time spent for wal sync
> */
> + if (track_io_timing)
> + {
> + INSTR_TIME_SET_CURRENT(io_time);
> + INSTR_TIME_SUBTRACT(io_time, io_start);
> +
> + if (is_background_process)
> + XLogCtl->stats.total_sync_time
> += INSTR_TIME_GET_MILLISEC(io_time);
> + else
> +
> XLogCtl->stats.backend_total_sync_time += INSTR_TIME_GET_MILLISEC(io_time);
> + }
> + else
> + {
> + XLogCtl->stats.total_sync_time = 0;
> + XLogCtl->stats.backend_total_sync_time
> = 0;
> + }
> +
> /* signal that we need to wakeup walsenders
> later */
> WalSndWakeupRequest();
>
> @@ -2568,7 +2661,28 @@ XLogWrite(XLogwrtRqst WriteRqst, bool flexible)
> openLogOff = 0;
> }
>
> + /* Start timer to acquire start time of the wal sync */
> + if (track_io_timing)
> + INSTR_TIME_SET_CURRENT(io_start);
> +
> issue_xlog_fsync(openLogFile, openLogSegNo);
> +
> + /* calculate the total time spent for wal sync */
> + if (track_io_timing)
> + {
> + INSTR_TIME_SET_CURRENT(io_time);
> + INSTR_TIME_SUBTRACT(io_time, io_start);
> +
> + if (is_background_process)
> + XLogCtl->stats.total_sync_time +=
> INSTR_TIME_GET_MILLISEC(io_time);
> + else
> + XLogCtl->stats.backend_total_sync_time
> += INSTR_TIME_GET_MILLISEC(io_time);
> + }
> + else
> + {
> + XLogCtl->stats.total_sync_time = 0;
> + XLogCtl->stats.backend_total_sync_time = 0;
> + }
> }
I'm *very* doubtful about this, but even if we do it, this needs careful
benchmarking.
> /* signal that we need to wakeup walsenders later */
> @@ -3043,6 +3157,9 @@ XLogBackgroundFlush(void)
> {
> XLogWrite(WriteRqst, flexible);
> }
> +
> + /* Collect all the wal write shared counters into local, and report it
> to stats collector */
> + memcpy(&LocalWalWritesStats.stats, &XLogCtl->stats,
> sizeof(PgStat_WalWritesCounts));
> LWLockRelease(WALWriteLock);
Hm. I think in a good number of workloads hti sis never reached, because
we return early.
I think this is an interesting feature, but I don't think it's ready,
and it was submitted very late in the v10 release cycle. Therefore I
think this should be moved to the next CF.
Greetings,
Andres Freund
--
Sent via pgsql-hackers mailing list ([email protected])
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers