Once more, with feeling. On Sun, 2007-04-01 at 12:11 +0100, Simon Riggs wrote: > Resending... > > -------- Forwarded Message -------- > From: Simon Riggs <[EMAIL PROTECTED]> > To: [email protected] > Cc: [email protected] > Subject: Deferred Transactions, Transaction Guarantee and COMMIT without > waiting > Date: Sat, 31 Mar 2007 22:09:23 +0100 > > Here's the next version (v10) of the patch, ready for review. > > I've struggled with what to call all of the new concepts inherent in > this patch, but I think I've got something now. COMMIT NOWAIT doesn't > describe this feature, since there is no command of that name in the > implementation that we've agreed. So what's it called? > > This patch implements a feature called Deferred Fsync Transactions, or > Deferred Transactions for short. The idea is we don't fsync at commit, > but we defer that briefly, letting a new WAL Writer process perform the > fsync at regular intervals of 50-250 ms. It's a much safer version of > fsync = off, yet retaining most of the speed *and* it can be used for > some transactions and not others. > > Deferred Transactions provide considerable additional performance in a > range of circumstances, but at the cost that a handful of committed > transactions will definitely be lost if the server crashes. > > To remind everybody of the risks, this feature is enabled using a > parameter named transaction_guarantee. The default mode is "on" > reminding us that PostgreSQL provides a strong default guarantee that if > a transaction is committed, it stays committed. If you prefer > performance at the risk of data loss, then you can opt to relax the > standard level of protection and request transaction_guarantee = off > > The data loss isn't random, nor is it indeterminate, but it is certain. > We will say that a transaction is committed, but it isn't until it has > reached disk. So all transactions that have reached the commit point, > but not yet reached disk will be certainly lost - probably best to use a > guidelines figure of 1000 transactions when assessing the business > impact of such loss. The risk is very similar to normal transactions > waiting to write to disk, but the important difference is we will have > replied to the client that the transaction is safely on disk, when it is > not. > > Relaxing the transaction guarantee in this way is completely > controllable by users. Guaranteed and Unguaranteed transactions can > co-exist safely without increased risk for more important data. > > v10 fixes a number of lurking bugs present in v9. There are no > outstanding bugs, after a range of tests, though more are needed. > > wal_writer_delay = 0 (default) ms enables this feature at server start. > Once enabled, individual sessions or transactions may request > transaction_guarantee = off, or it may be set for the whole server. > > It also provides additional instrumentation, with new parameters: > trace_commit = on will show details of each commit (high volume) > trace_bg_flush = on will give more frequent summaries of monitoring data > > The patch needs a reviewers guide, which I'll write next week. > > patch -p0 < transaction_guarantee.v10.patch > with additional files: > src/backend/postmaster/walwriter.c > src/include/postmaster/walwriter.c
-- Simon Riggs EnterpriseDB http://www.enterprisedb.com
transaction_guarantee.v10.patch.gz
Description: GNU Zip compressed data
/*-------------------------------------------------------------------------
*
* walwriter.c
*
* PostgreSQL WAL Writer
*
* Initial author: Simon Riggs [EMAIL PROTECTED]
*
* Portions Copyright (c) 1996-2007, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
*
* IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/postmaster/WALWriter.c,v 1.29 2007/02/10 14:58:54 petere Exp $
*
*-------------------------------------------------------------------------
*/
#include "postgres.h"
#include <fcntl.h>
#include <signal.h>
#include <time.h>
#include <sys/time.h>
#include <sys/wait.h>
#include <unistd.h>
#include "access/xact.h"
#include "access/xlog.h"
#include "libpq/pqsignal.h"
#include "miscadmin.h"
#include "postmaster/fork_process.h"
#include "postmaster/postmaster.h"
#include "postmaster/walwriter.h"
#include "storage/fd.h"
#include "storage/ipc.h"
#include "storage/pg_shmem.h"
#include "storage/pmsignal.h"
#include "storage/proc.h"
#include "storage/procarray.h"
#include "storage/sinval.h"
#include "utils/guc.h"
#include "utils/memutils.h"
#include "utils/ps_status.h"
/* ----------
* Timer definitions.
* ----------
*/
#define WALWRITER_RESTART_INTERVAL 2 /* How often to attempt to restart a
* failed WALWriter; in seconds. */
/* ----------
* Local data
* ----------
*/
static time_t last_WALWriter_start_time;
/* Memory context for long-lived data */
static MemoryContext walwriter_cxt;
/*
* Flags set by interrupt handlers for later service in the main loop.
*/
static volatile sig_atomic_t got_SIGHUP = false;
static volatile sig_atomic_t wakened = false;
/* ----------
* Local function forward declarations
* ----------
*/
#ifdef EXEC_BACKEND
static pid_t WALWriter_forkexec(void);
#endif
int WALWriterDelay = 0;
NON_EXEC_STATIC void WALWriterMain(int argc, char *argv[]);
static void WALWriter_exit(SIGNAL_ARGS);
static void WALWriterSigHupHandler(SIGNAL_ARGS);
static void WALWriter_waken(SIGNAL_ARGS);
static void WALWriter_MainLoop(void);
/* ------------------------------------------------------------
* Public functions called from postmaster follow
* ------------------------------------------------------------
*/
/*
* WALWriter_start
*
* Called from postmaster at startup or after an existing WALWriter
* died. Attempt to fire up a fresh WALWriter process.
*
* Returns PID of child process, or 0 if fail.
*
* Note: if fail, we will be called again from the postmaster main loop.
*/
int
StartWALWriter(void)
{
time_t curtime;
pid_t WALWriterPid;
/*
* Do nothing if no WALWriter needed
*/
if (!WALWriterActive())
return 0;
/*
* Do nothing if too soon since last WALWriter start. This is a safety
* valve to protect against continuous respawn attempts if the WALWriter is
* dying immediately at launch. Note that since we will be re-called from
* the postmaster main loop, we will get another chance later.
*/
curtime = time(NULL);
if ((unsigned int) (curtime - last_WALWriter_start_time) <
(unsigned int) WALWRITER_RESTART_INTERVAL)
return 0;
last_WALWriter_start_time = curtime;
#ifdef EXEC_BACKEND
switch ((WALWriterPid = WALWriter_forkexec()))
#else
switch ((WALWriterPid = fork_process()))
#endif
{
case -1:
ereport(LOG,
(errmsg("could not fork WALWriter: %m")));
return 0;
#ifndef EXEC_BACKEND
case 0:
/* in postmaster child ... */
/* Close the postmaster's sockets */
ClosePostmasterPorts(false);
/* Lose the postmaster's on-exit routines */
on_exit_reset();
WALWriterMain(0, NULL);
break;
#endif
default:
return (int) WALWriterPid;
}
/* shouldn't get here */
return 0;
}
/* ------------------------------------------------------------
* Local functions called by WALWriter follow
* ------------------------------------------------------------
*/
#ifdef EXEC_BACKEND
/*
* WALWriter_forkexec() -
*
* Format up the arglist for, then fork and exec, WALWriter process
*/
static pid_t
WALWriter_forkexec(void)
{
char *av[10];
int ac = 0;
av[ac++] = "postgres";
av[ac++] = "--forkwalwriter";
av[ac++] = NULL; /* filled in by postmaster_forkexec */
av[ac] = NULL;
Assert(ac < lengthof(av));
return postmaster_forkexec(ac, av);
}
#endif /* EXEC_BACKEND */
/*
* WALWriterMain
*
* The argc/argv parameters are valid only in EXEC_BACKEND case. However,
* since we don't use 'em, it hardly matters...
*/
NON_EXEC_STATIC void
WALWriterMain(int argc, char *argv[])
{
sigjmp_buf local_sigjmp_buf;
IsUnderPostmaster = true; /* we are a postmaster subprocess now */
MyProcPid = getpid(); /* reset MyProcPid */
/*
* If possible, make this process a group leader, so that the postmaster
* can signal any child processes too.
*/
#ifdef HAVE_SETSID
if (setsid() < 0)
elog(FATAL, "setsid() failed: %m");
#endif
/*
* Ignore all signals usually bound to some action in the postmaster,
* except for SIGHUP, SIGUSR1 and SIGQUIT.
*/
pqsignal(SIGHUP, WALWriterSigHupHandler);
pqsignal(SIGINT, SIG_IGN);
pqsignal(SIGTERM, SIG_IGN); /* Not executing transactions */
pqsignal(SIGQUIT, WALWriter_exit);
pqsignal(SIGALRM, SIG_IGN);
pqsignal(SIGPIPE, SIG_IGN);
pqsignal(SIGUSR1, WALWriter_waken); /* XXX: May want this later */
pqsignal(SIGUSR2, SIG_IGN);
pqsignal(SIGCHLD, SIG_DFL);
pqsignal(SIGTTIN, SIG_DFL);
pqsignal(SIGTTOU, SIG_DFL);
pqsignal(SIGCONT, SIG_DFL);
pqsignal(SIGWINCH, SIG_DFL);
/*
* Identify myself via ps
*/
init_ps_display("WAL writer process", "", "", "");
SetProcessingMode(InitProcessing);
/* Early initialization */
BaseInit();
/*
* Create a per-backend PGPROC struct in shared memory, except in the
* EXEC_BACKEND case where this was done in SubPostmasterMain. We must do
* this before we can use LWLocks (and in the EXEC_BACKEND case we already
* had to do some stuff with LWLocks).
*/
#ifndef EXEC_BACKEND
InitAuxiliaryProcess();
#endif
/*
* Create a memory context that we will do all our work in. We do this so
* that we can reset the context during error recovery and thereby avoid
* possible memory leaks.
*/
walwriter_cxt = AllocSetContextCreate(TopMemoryContext,
"WAL Writer",
ALLOCSET_DEFAULT_MINSIZE,
ALLOCSET_DEFAULT_INITSIZE,
ALLOCSET_DEFAULT_MAXSIZE);
MemoryContextSwitchTo(walwriter_cxt);
/*
* If an exception is encountered, processing resumes here.
*
* This code is heavily based on bgwriter.c, q.v.
*/
if (sigsetjmp(local_sigjmp_buf, 1) != 0)
{
/* since not using PG_TRY, must reset error stack by hand */
error_context_stack = NULL;
/* Prevents interrupts while cleaning up */
HOLD_INTERRUPTS();
/* Report the error to the server log */
EmitErrorReport();
/*
* These operations are really just a minimal subset of
* AbortTransaction(). We don't have very many resources to worry
* about, but we do have LWLocks.
*/
LWLockReleaseAll();
/*
* Now return to normal top-level context and clear ErrorContext for
* next time.
*/
MemoryContextSwitchTo(walwriter_cxt);
FlushErrorState();
/* Flush any leaked data in the top-level context */
MemoryContextResetAndDeleteChildren(walwriter_cxt);
/* Make sure pgstat also considers our stat data as gone */
/* Now we can allow interrupts again */
RESUME_INTERRUPTS();
/*
* Sleep at least 1 second after any error. We don't want to be
* filling the error logs as fast as we can.
*/
pg_usleep(1000000L);
}
/* We can now handle ereport(ERROR) */
PG_exception_stack = &local_sigjmp_buf;
InitXLOGAccess();
ereport(LOG,
(errmsg("WAL writer started")));
PG_SETMASK(&UnBlockSig);
WALWriter_MainLoop();
ereport(LOG,
(errmsg("WAL writer shutting down")));
exit(0);
}
/* SIGQUIT signal handler for WALWriter process */
static void
WALWriter_exit(SIGNAL_ARGS)
{
PG_SETMASK(&BlockSig);
/*
* For now, we just nail the doors shut and get out of town.
*/
exit(0);
}
/* SIGHUP: set flag to re-read config file at next convenient time */
static void
WALWriterSigHupHandler(SIGNAL_ARGS)
{
got_SIGHUP = true;
}
/* SIGUSR1 signal handler for WALWriter process */
static void
WALWriter_waken(SIGNAL_ARGS)
{
wakened = true;
}
/*
* WALWriter_MainLoop
*/
static void
WALWriter_MainLoop(void)
{
do
{
/* Check for config update */
if (got_SIGHUP)
{
got_SIGHUP = false;
ProcessConfigFile(PGC_SIGHUP);
if (!WALWriterActive())
break; /* user wants us to shut down */
}
wakened = false;
/*
* Do what we're here for and keep doing it as long as we're busy
*/
FlushAnyDeferredFsyncTransactions(true, false);
if (!got_SIGHUP && !wakened)
pg_usleep(WALWriterDelay);
} while (PostmasterIsAlive(true));
}
/*------------------------------------------------------------------------- * * walwriter.h * WALWriter definitions * * Portions Copyright (c) 1996-2007, PostgreSQL Global Development Group * * IDENTIFICATION * $PostgreSQL: pgsql/src/backend/postmaster/WALWriter.c,v 1.29 2007/02/10 14:58:54 petere Exp $ * *------------------------------------------------------------------------- */ extern int WALWriterDelay; #define WALWriterActive() (WALWriterDelay > 0) extern int StartWALWriter(void);
---------------------------(end of broadcast)---------------------------
TIP 7: You can help support the PostgreSQL project by donating at
http://www.postgresql.org/about/donate
