Once more, with feeling. On Sun, 2007-04-01 at 12:11 +0100, Simon Riggs wrote: > Resending... > > -------- Forwarded Message -------- > From: Simon Riggs <[EMAIL PROTECTED]> > To: pgsql-patches@postgresql.org > Cc: pgsql-hackers@postgresql.org > Subject: Deferred Transactions, Transaction Guarantee and COMMIT without > waiting > Date: Sat, 31 Mar 2007 22:09:23 +0100 > > Here's the next version (v10) of the patch, ready for review. > > I've struggled with what to call all of the new concepts inherent in > this patch, but I think I've got something now. COMMIT NOWAIT doesn't > describe this feature, since there is no command of that name in the > implementation that we've agreed. So what's it called? > > This patch implements a feature called Deferred Fsync Transactions, or > Deferred Transactions for short. The idea is we don't fsync at commit, > but we defer that briefly, letting a new WAL Writer process perform the > fsync at regular intervals of 50-250 ms. It's a much safer version of > fsync = off, yet retaining most of the speed *and* it can be used for > some transactions and not others. > > Deferred Transactions provide considerable additional performance in a > range of circumstances, but at the cost that a handful of committed > transactions will definitely be lost if the server crashes. > > To remind everybody of the risks, this feature is enabled using a > parameter named transaction_guarantee. The default mode is "on" > reminding us that PostgreSQL provides a strong default guarantee that if > a transaction is committed, it stays committed. If you prefer > performance at the risk of data loss, then you can opt to relax the > standard level of protection and request transaction_guarantee = off > > The data loss isn't random, nor is it indeterminate, but it is certain. > We will say that a transaction is committed, but it isn't until it has > reached disk. So all transactions that have reached the commit point, > but not yet reached disk will be certainly lost - probably best to use a > guidelines figure of 1000 transactions when assessing the business > impact of such loss. The risk is very similar to normal transactions > waiting to write to disk, but the important difference is we will have > replied to the client that the transaction is safely on disk, when it is > not. > > Relaxing the transaction guarantee in this way is completely > controllable by users. Guaranteed and Unguaranteed transactions can > co-exist safely without increased risk for more important data. > > v10 fixes a number of lurking bugs present in v9. There are no > outstanding bugs, after a range of tests, though more are needed. > > wal_writer_delay = 0 (default) ms enables this feature at server start. > Once enabled, individual sessions or transactions may request > transaction_guarantee = off, or it may be set for the whole server. > > It also provides additional instrumentation, with new parameters: > trace_commit = on will show details of each commit (high volume) > trace_bg_flush = on will give more frequent summaries of monitoring data > > The patch needs a reviewers guide, which I'll write next week. > > patch -p0 < transaction_guarantee.v10.patch > with additional files: > src/backend/postmaster/walwriter.c > src/include/postmaster/walwriter.c
-- Simon Riggs EnterpriseDB http://www.enterprisedb.com
transaction_guarantee.v10.patch.gz
Description: GNU Zip compressed data
/*------------------------------------------------------------------------- * * walwriter.c * * PostgreSQL WAL Writer * * Initial author: Simon Riggs [EMAIL PROTECTED] * * Portions Copyright (c) 1996-2007, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * * IDENTIFICATION * $PostgreSQL: pgsql/src/backend/postmaster/WALWriter.c,v 1.29 2007/02/10 14:58:54 petere Exp $ * *------------------------------------------------------------------------- */ #include "postgres.h" #include <fcntl.h> #include <signal.h> #include <time.h> #include <sys/time.h> #include <sys/wait.h> #include <unistd.h> #include "access/xact.h" #include "access/xlog.h" #include "libpq/pqsignal.h" #include "miscadmin.h" #include "postmaster/fork_process.h" #include "postmaster/postmaster.h" #include "postmaster/walwriter.h" #include "storage/fd.h" #include "storage/ipc.h" #include "storage/pg_shmem.h" #include "storage/pmsignal.h" #include "storage/proc.h" #include "storage/procarray.h" #include "storage/sinval.h" #include "utils/guc.h" #include "utils/memutils.h" #include "utils/ps_status.h" /* ---------- * Timer definitions. * ---------- */ #define WALWRITER_RESTART_INTERVAL 2 /* How often to attempt to restart a * failed WALWriter; in seconds. */ /* ---------- * Local data * ---------- */ static time_t last_WALWriter_start_time; /* Memory context for long-lived data */ static MemoryContext walwriter_cxt; /* * Flags set by interrupt handlers for later service in the main loop. */ static volatile sig_atomic_t got_SIGHUP = false; static volatile sig_atomic_t wakened = false; /* ---------- * Local function forward declarations * ---------- */ #ifdef EXEC_BACKEND static pid_t WALWriter_forkexec(void); #endif int WALWriterDelay = 0; NON_EXEC_STATIC void WALWriterMain(int argc, char *argv[]); static void WALWriter_exit(SIGNAL_ARGS); static void WALWriterSigHupHandler(SIGNAL_ARGS); static void WALWriter_waken(SIGNAL_ARGS); static void WALWriter_MainLoop(void); /* ------------------------------------------------------------ * Public functions called from postmaster follow * ------------------------------------------------------------ */ /* * WALWriter_start * * Called from postmaster at startup or after an existing WALWriter * died. Attempt to fire up a fresh WALWriter process. * * Returns PID of child process, or 0 if fail. * * Note: if fail, we will be called again from the postmaster main loop. */ int StartWALWriter(void) { time_t curtime; pid_t WALWriterPid; /* * Do nothing if no WALWriter needed */ if (!WALWriterActive()) return 0; /* * Do nothing if too soon since last WALWriter start. This is a safety * valve to protect against continuous respawn attempts if the WALWriter is * dying immediately at launch. Note that since we will be re-called from * the postmaster main loop, we will get another chance later. */ curtime = time(NULL); if ((unsigned int) (curtime - last_WALWriter_start_time) < (unsigned int) WALWRITER_RESTART_INTERVAL) return 0; last_WALWriter_start_time = curtime; #ifdef EXEC_BACKEND switch ((WALWriterPid = WALWriter_forkexec())) #else switch ((WALWriterPid = fork_process())) #endif { case -1: ereport(LOG, (errmsg("could not fork WALWriter: %m"))); return 0; #ifndef EXEC_BACKEND case 0: /* in postmaster child ... */ /* Close the postmaster's sockets */ ClosePostmasterPorts(false); /* Lose the postmaster's on-exit routines */ on_exit_reset(); WALWriterMain(0, NULL); break; #endif default: return (int) WALWriterPid; } /* shouldn't get here */ return 0; } /* ------------------------------------------------------------ * Local functions called by WALWriter follow * ------------------------------------------------------------ */ #ifdef EXEC_BACKEND /* * WALWriter_forkexec() - * * Format up the arglist for, then fork and exec, WALWriter process */ static pid_t WALWriter_forkexec(void) { char *av[10]; int ac = 0; av[ac++] = "postgres"; av[ac++] = "--forkwalwriter"; av[ac++] = NULL; /* filled in by postmaster_forkexec */ av[ac] = NULL; Assert(ac < lengthof(av)); return postmaster_forkexec(ac, av); } #endif /* EXEC_BACKEND */ /* * WALWriterMain * * The argc/argv parameters are valid only in EXEC_BACKEND case. However, * since we don't use 'em, it hardly matters... */ NON_EXEC_STATIC void WALWriterMain(int argc, char *argv[]) { sigjmp_buf local_sigjmp_buf; IsUnderPostmaster = true; /* we are a postmaster subprocess now */ MyProcPid = getpid(); /* reset MyProcPid */ /* * If possible, make this process a group leader, so that the postmaster * can signal any child processes too. */ #ifdef HAVE_SETSID if (setsid() < 0) elog(FATAL, "setsid() failed: %m"); #endif /* * Ignore all signals usually bound to some action in the postmaster, * except for SIGHUP, SIGUSR1 and SIGQUIT. */ pqsignal(SIGHUP, WALWriterSigHupHandler); pqsignal(SIGINT, SIG_IGN); pqsignal(SIGTERM, SIG_IGN); /* Not executing transactions */ pqsignal(SIGQUIT, WALWriter_exit); pqsignal(SIGALRM, SIG_IGN); pqsignal(SIGPIPE, SIG_IGN); pqsignal(SIGUSR1, WALWriter_waken); /* XXX: May want this later */ pqsignal(SIGUSR2, SIG_IGN); pqsignal(SIGCHLD, SIG_DFL); pqsignal(SIGTTIN, SIG_DFL); pqsignal(SIGTTOU, SIG_DFL); pqsignal(SIGCONT, SIG_DFL); pqsignal(SIGWINCH, SIG_DFL); /* * Identify myself via ps */ init_ps_display("WAL writer process", "", "", ""); SetProcessingMode(InitProcessing); /* Early initialization */ BaseInit(); /* * Create a per-backend PGPROC struct in shared memory, except in the * EXEC_BACKEND case where this was done in SubPostmasterMain. We must do * this before we can use LWLocks (and in the EXEC_BACKEND case we already * had to do some stuff with LWLocks). */ #ifndef EXEC_BACKEND InitAuxiliaryProcess(); #endif /* * Create a memory context that we will do all our work in. We do this so * that we can reset the context during error recovery and thereby avoid * possible memory leaks. */ walwriter_cxt = AllocSetContextCreate(TopMemoryContext, "WAL Writer", ALLOCSET_DEFAULT_MINSIZE, ALLOCSET_DEFAULT_INITSIZE, ALLOCSET_DEFAULT_MAXSIZE); MemoryContextSwitchTo(walwriter_cxt); /* * If an exception is encountered, processing resumes here. * * This code is heavily based on bgwriter.c, q.v. */ if (sigsetjmp(local_sigjmp_buf, 1) != 0) { /* since not using PG_TRY, must reset error stack by hand */ error_context_stack = NULL; /* Prevents interrupts while cleaning up */ HOLD_INTERRUPTS(); /* Report the error to the server log */ EmitErrorReport(); /* * These operations are really just a minimal subset of * AbortTransaction(). We don't have very many resources to worry * about, but we do have LWLocks. */ LWLockReleaseAll(); /* * Now return to normal top-level context and clear ErrorContext for * next time. */ MemoryContextSwitchTo(walwriter_cxt); FlushErrorState(); /* Flush any leaked data in the top-level context */ MemoryContextResetAndDeleteChildren(walwriter_cxt); /* Make sure pgstat also considers our stat data as gone */ /* Now we can allow interrupts again */ RESUME_INTERRUPTS(); /* * Sleep at least 1 second after any error. We don't want to be * filling the error logs as fast as we can. */ pg_usleep(1000000L); } /* We can now handle ereport(ERROR) */ PG_exception_stack = &local_sigjmp_buf; InitXLOGAccess(); ereport(LOG, (errmsg("WAL writer started"))); PG_SETMASK(&UnBlockSig); WALWriter_MainLoop(); ereport(LOG, (errmsg("WAL writer shutting down"))); exit(0); } /* SIGQUIT signal handler for WALWriter process */ static void WALWriter_exit(SIGNAL_ARGS) { PG_SETMASK(&BlockSig); /* * For now, we just nail the doors shut and get out of town. */ exit(0); } /* SIGHUP: set flag to re-read config file at next convenient time */ static void WALWriterSigHupHandler(SIGNAL_ARGS) { got_SIGHUP = true; } /* SIGUSR1 signal handler for WALWriter process */ static void WALWriter_waken(SIGNAL_ARGS) { wakened = true; } /* * WALWriter_MainLoop */ static void WALWriter_MainLoop(void) { do { /* Check for config update */ if (got_SIGHUP) { got_SIGHUP = false; ProcessConfigFile(PGC_SIGHUP); if (!WALWriterActive()) break; /* user wants us to shut down */ } wakened = false; /* * Do what we're here for and keep doing it as long as we're busy */ FlushAnyDeferredFsyncTransactions(true, false); if (!got_SIGHUP && !wakened) pg_usleep(WALWriterDelay); } while (PostmasterIsAlive(true)); }
/*------------------------------------------------------------------------- * * walwriter.h * WALWriter definitions * * Portions Copyright (c) 1996-2007, PostgreSQL Global Development Group * * IDENTIFICATION * $PostgreSQL: pgsql/src/backend/postmaster/WALWriter.c,v 1.29 2007/02/10 14:58:54 petere Exp $ * *------------------------------------------------------------------------- */ extern int WALWriterDelay; #define WALWriterActive() (WALWriterDelay > 0) extern int StartWALWriter(void);
---------------------------(end of broadcast)--------------------------- TIP 7: You can help support the PostgreSQL project by donating at http://www.postgresql.org/about/donate