On Tue, Jan 25, 2011 at 16:34, Magnus Hagander <mag...@hagander.net> wrote: > On Tue, Jan 25, 2011 at 15:04, Fujii Masao <masao.fu...@gmail.com> wrote: >> On Tue, Jan 25, 2011 at 10:28 PM, Fujii Masao <masao.fu...@gmail.com> wrote: >>>> (the discussed changse above have been applied and pushed to github) >>> >>> Thanks! I'll test and review that. >> >> WAL file might get recycled or removed while walsender is reading it. >> So the WAL file which pg_basebackup seemingly received successfully >> might be actually invalid. Shouldn't we need to check that what we read >> is valid as XLogRead does?
We should, and the easiest way is to actually use XLogRead() since the code is already there. How about the way in this patch? -- Magnus Hagander Me: http://www.hagander.net/ Work: http://www.redpill-linpro.com/
diff --git a/doc/src/sgml/protocol.sgml b/doc/src/sgml/protocol.sgml index 73f26b4..0775b7a 100644 --- a/doc/src/sgml/protocol.sgml +++ b/doc/src/sgml/protocol.sgml @@ -1460,7 +1460,7 @@ The commands accepted in walsender mode are: </varlistentry> <varlistentry> - <term>BASE_BACKUP [<literal>LABEL</literal> <replaceable>'label'</replaceable>] [<literal>PROGRESS</literal>] [<literal>FAST</literal>]</term> + <term>BASE_BACKUP [<literal>LABEL</literal> <replaceable>'label'</replaceable>] [<literal>PROGRESS</literal>] [<literal>FAST</literal>] [<literal>WAL</literal>]</term> <listitem> <para> Instructs the server to start streaming a base backup. @@ -1505,6 +1505,18 @@ The commands accepted in walsender mode are: </para> </listitem> </varlistentry> + + <varlistentry> + <term><literal>WAL</literal></term> + <listitem> + <para> + Include the necessary WAL segments in the backup. This will include + all the files between start and stop backup in the + <filename>pg_xlog</filename> directory of the base directory tar + file. + </para> + </listitem> + </varlistentry> </variablelist> </para> <para> @@ -1561,7 +1573,10 @@ The commands accepted in walsender mode are: </listitem> <listitem> <para> - <filename>pg_xlog</> (including subdirectories) + <filename>pg_xlog</>, including subdirectories. If the backup is run + with wal files included, a synthesized version of pg_xlog will be + included, but it will only contain the files necessary for the + backup to work, not the rest of the contents. </para> </listitem> </itemizedlist> diff --git a/doc/src/sgml/ref/pg_basebackup.sgml b/doc/src/sgml/ref/pg_basebackup.sgml index 321c8ca..60ffa3c 100644 --- a/doc/src/sgml/ref/pg_basebackup.sgml +++ b/doc/src/sgml/ref/pg_basebackup.sgml @@ -145,6 +145,31 @@ PostgreSQL documentation </varlistentry> <varlistentry> + <term><option>-x</option></term> + <term><option>--xlog</option></term> + <listitem> + <para> + Includes the required transaction log files (WAL files) in the + backup. This will include all transaction logs generated during + the backup. If this option is specified, it is possible to start + a postmaster directly in the extracted directory without the need + to consult the log archive, thus making this a completely standalone + backup. + </para> + <note> + <para> + The transaction log files are collected at the end of the backup. + Therefore, it is necessary for the + <xref linkend="guc-wal-keep-segments"> parameter to be set high + enough that the log is not removed before the end of the backup. + If the log has been rotated when it's time to transfer it, the + backup will fail and be unusable. + </para> + </note> + </listitem> + </varlistentry> + + <varlistentry> <term><option>-Z <replaceable class="parameter">level</replaceable></option></term> <term><option>--compress=<replaceable class="parameter">level</replaceable></option></term> <listitem> diff --git a/src/backend/replication/basebackup.c b/src/backend/replication/basebackup.c index 251ed8e..819419e 100644 --- a/src/backend/replication/basebackup.c +++ b/src/backend/replication/basebackup.c @@ -37,6 +37,7 @@ typedef struct const char *label; bool progress; bool fastcheckpoint; + bool includewal; } basebackup_options; @@ -46,11 +47,17 @@ static void _tarWriteHeader(char *filename, char *linktarget, struct stat * statbuf); static void send_int8_string(StringInfoData *buf, int64 intval); static void SendBackupHeader(List *tablespaces); -static void SendBackupDirectory(char *location, char *spcoid); static void base_backup_cleanup(int code, Datum arg); static void perform_base_backup(basebackup_options *opt, DIR *tblspcdir); static void parse_basebackup_options(List *options, basebackup_options *opt); +/* + * Size of each block sent into the tar stream for larger files. + * + * XLogSegSize *MUST* be evenly dividable by this + */ +#define TAR_SEND_SIZE 32768 + typedef struct { char *oid; @@ -78,7 +85,10 @@ base_backup_cleanup(int code, Datum arg) static void perform_base_backup(basebackup_options *opt, DIR *tblspcdir) { - do_pg_start_backup(opt->label, opt->fastcheckpoint); + XLogRecPtr startptr; + XLogRecPtr endptr; + + startptr = do_pg_start_backup(opt->label, opt->fastcheckpoint); PG_ENSURE_ERROR_CLEANUP(base_backup_cleanup, (Datum) 0); { @@ -87,12 +97,6 @@ perform_base_backup(basebackup_options *opt, DIR *tblspcdir) struct dirent *de; tablespaceinfo *ti; - - /* Add a node for the base directory */ - ti = palloc0(sizeof(tablespaceinfo)); - ti->size = opt->progress ? sendDir(".", 1, true) : -1; - tablespaces = lappend(tablespaces, ti); - /* Collect information about all tablespaces */ while ((de = ReadDir(tblspcdir, "pg_tblspc")) != NULL) { @@ -120,6 +124,10 @@ perform_base_backup(basebackup_options *opt, DIR *tblspcdir) tablespaces = lappend(tablespaces, ti); } + /* Add a node for the base directory at the end */ + ti = palloc0(sizeof(tablespaceinfo)); + ti->size = opt->progress ? sendDir(".", 1, true) : -1; + tablespaces = lappend(tablespaces, ti); /* Send tablespace header */ SendBackupHeader(tablespaces); @@ -128,13 +136,106 @@ perform_base_backup(basebackup_options *opt, DIR *tblspcdir) foreach(lc, tablespaces) { tablespaceinfo *ti = (tablespaceinfo *) lfirst(lc); + StringInfoData buf; + + /* Send CopyOutResponse message */ + pq_beginmessage(&buf, 'H'); + pq_sendbyte(&buf, 0); /* overall format */ + pq_sendint(&buf, 0, 2); /* natts */ + pq_endmessage(&buf); + + sendDir(ti->path == NULL ? "." : ti->path, + ti->path == NULL ? 1 : strlen(ti->path), + false); - SendBackupDirectory(ti->path, ti->oid); + /* + * If we're including WAL, and this is the main data directory + * we don't terminate the tar stream here. Instead, we will append + * the xlog files below and terminate it then. This is safe since + * the main data directory is always sent *last*. + */ + if (opt->includewal && ti->path == NULL) + { + Assert(lnext(lc) == NULL); + } + else + pq_putemptymessage('c'); /* CopyDone */ } } PG_END_ENSURE_ERROR_CLEANUP(base_backup_cleanup, (Datum) 0); - do_pg_stop_backup(); + endptr = do_pg_stop_backup(); + + if (opt->includewal) + { + /* + * We've left the last tar file "open", so we can now append the + * required WAL files to it. + */ + uint32 logid, logseg; + uint32 endlogid, endlogseg; + struct stat statbuf; + + MemSet(&statbuf, 0, sizeof(statbuf)); + statbuf.st_mode=S_IRUSR | S_IWUSR; +#ifndef WIN32 + statbuf.st_uid=getuid(); + statbuf.st_gid=getgid(); +#endif + statbuf.st_size=XLogSegSize; + statbuf.st_mtime=time(NULL); + + XLByteToSeg(startptr, logid, logseg); + XLByteToSeg(endptr, endlogid, endlogseg); + elog(LOG, "Going to send wal from %u.%u to %u.%u", + logid, logseg, + endlogid, endlogseg); + + while (true) + { + /* Send another xlog segment */ + char xlogname[MAXFNAMELEN]; + char fn[MAXPGPATH]; + int i; + + /* Send the current WAL file */ + XLogFileName(xlogname, ThisTimeLineID, logid, logseg); + + sprintf(fn, "./pg_xlog/%s", xlogname); + _tarWriteHeader(fn, NULL, &statbuf); + + /* Send the actual WAL file contents, by reading block-by-block */ + for (i = 0; i < XLogSegSize / TAR_SEND_SIZE; i++) + { + char buf[TAR_SEND_SIZE]; + XLogRecPtr ptr; + + ptr.xlogid = logid; + ptr.xrecoff = logseg * XLogSegSize + TAR_SEND_SIZE * i; + + XLogRead(buf, ptr, TAR_SEND_SIZE); + if (pq_putmessage('d', buf, TAR_SEND_SIZE)) + ereport(ERROR, + (errmsg("base backup could not send data, aborting backup"))); + } + /* + * Files are always fixed size, and always end on a 512 byte + * boundary, so padding is never necessary. + */ + + + /* Advance to the next WAL file */ + NextLogSeg(logid, logseg); + + /* Have we reached our stop position yet? */ + if (logid > endlogid || + (logid == endlogid && logseg >= endlogseg)) + break; + } + + /* Send CopyDone message for the last tar file*/ + pq_putemptymessage('c'); + } } /* @@ -147,6 +248,7 @@ parse_basebackup_options(List *options, basebackup_options *opt) bool o_label = false; bool o_progress = false; bool o_fast = false; + bool o_wal = false; MemSet(opt, 0, sizeof(*opt)); foreach(lopt, options) @@ -180,6 +282,15 @@ parse_basebackup_options(List *options, basebackup_options *opt) opt->fastcheckpoint = true; o_fast = true; } + else if (strcmp(defel->defname, "wal") == 0) + { + if (o_wal) + ereport(ERROR, + (errcode(ERRCODE_SYNTAX_ERROR), + errmsg("duplicate option \"%s\"", defel->defname))); + opt->includewal = true; + o_wal = true; + } else elog(ERROR, "option \"%s\" not recognized", defel->defname); @@ -316,26 +427,6 @@ SendBackupHeader(List *tablespaces) pq_puttextmessage('C', "SELECT"); } -static void -SendBackupDirectory(char *location, char *spcoid) -{ - StringInfoData buf; - - /* Send CopyOutResponse message */ - pq_beginmessage(&buf, 'H'); - pq_sendbyte(&buf, 0); /* overall format */ - pq_sendint(&buf, 0, 2); /* natts */ - pq_endmessage(&buf); - - /* tar up the data directory if NULL, otherwise the tablespace */ - sendDir(location == NULL ? "." : location, - location == NULL ? 1 : strlen(location), - false); - - /* Send CopyDone message */ - pq_putemptymessage('c'); -} - static int64 sendDir(char *path, int basepathlen, bool sizeonly) @@ -506,7 +597,7 @@ static void sendFile(char *filename, int basepathlen, struct stat * statbuf) { FILE *fp; - char buf[32768]; + char buf[TAR_SEND_SIZE]; size_t cnt; pgoff_t len = 0; size_t pad; diff --git a/src/backend/replication/repl_gram.y b/src/backend/replication/repl_gram.y index 879a0bd..e1a4a51 100644 --- a/src/backend/replication/repl_gram.y +++ b/src/backend/replication/repl_gram.y @@ -71,6 +71,7 @@ Node *replication_parse_result; %token K_LABEL %token K_PROGRESS %token K_FAST +%token K_WAL %token K_START_REPLICATION %type <node> command @@ -106,7 +107,7 @@ identify_system: ; /* - * BASE_BACKUP [LABEL <label>] [PROGRESS] [FAST] + * BASE_BACKUP [LABEL '<label>'] [PROGRESS] [FAST] [WAL] */ base_backup: K_BASE_BACKUP base_backup_opt_list @@ -136,7 +137,12 @@ base_backup_opt: $$ = makeDefElem("fast", (Node *)makeInteger(TRUE)); } - + | K_WAL + { + $$ = makeDefElem("wal", + (Node *)makeInteger(TRUE)); + } + ; /* * START_REPLICATION %X/%X diff --git a/src/backend/replication/repl_scanner.l b/src/backend/replication/repl_scanner.l index e6dfb04..87e77d9 100644 --- a/src/backend/replication/repl_scanner.l +++ b/src/backend/replication/repl_scanner.l @@ -61,6 +61,7 @@ FAST { return K_FAST; } IDENTIFY_SYSTEM { return K_IDENTIFY_SYSTEM; } LABEL { return K_LABEL; } PROGRESS { return K_PROGRESS; } +WAL { return K_WAL; } START_REPLICATION { return K_START_REPLICATION; } "," { return ','; } ";" { return ';'; } diff --git a/src/backend/replication/walsender.c b/src/backend/replication/walsender.c index 44efa9f..f70458e 100644 --- a/src/backend/replication/walsender.c +++ b/src/backend/replication/walsender.c @@ -105,7 +105,6 @@ static int WalSndLoop(void); static void InitWalSnd(void); static void WalSndHandshake(void); static void WalSndKill(int code, Datum arg); -static void XLogRead(char *buf, XLogRecPtr recptr, Size nbytes); static bool XLogSend(char *msgbuf, bool *caughtup); static void CheckClosedConnection(void); static void IdentifySystem(void); @@ -649,8 +648,13 @@ WalSndKill(int code, Datum arg) * * XXX probably this should be improved to suck data directly from the * WAL buffers when possible. + * + * Will open, and keep open, one WAL segment stored in the global file + * descriptor sendFile. This means if XLogRead is used once, there will + * always be one descriptor left open until the process ends, but never + * more than one. */ -static void +void XLogRead(char *buf, XLogRecPtr recptr, Size nbytes) { XLogRecPtr startRecPtr = recptr; diff --git a/src/bin/pg_basebackup/pg_basebackup.c b/src/bin/pg_basebackup/pg_basebackup.c index 5363034..ef2718a 100644 --- a/src/bin/pg_basebackup/pg_basebackup.c +++ b/src/bin/pg_basebackup/pg_basebackup.c @@ -33,6 +33,7 @@ char *label = "pg_basebackup base backup"; bool showprogress = false; int verbose = 0; int compresslevel = 0; +bool includewal = false; bool fastcheckpoint = false; char *dbhost = NULL; char *dbuser = NULL; @@ -124,6 +125,7 @@ usage(void) printf(_("\nOptions controlling the output:\n")); printf(_(" -D, --pgdata=directory receive base backup into directory\n")); printf(_(" -F, --format=p|t output format (plain, tar)\n")); + printf(_(" -x, --xlog include required WAL files in backup\n")); printf(_(" -Z, --compress=0-9 compress tar output\n")); printf(_("\nGeneral options:\n")); printf(_(" -c, --checkpoint=fast|spread\n" @@ -200,16 +202,20 @@ verify_dir_is_empty_or_create(char *dirname) static void progress_report(int tablespacenum, char *fn) { + int percent = (int) ((totaldone / 1024) * 100 / totalsize); + if (percent > 100) + percent = 100; + if (verbose) fprintf(stderr, INT64_FORMAT "/" INT64_FORMAT " kB (%i%%) %i/%i tablespaces (%-30s)\r", totaldone / 1024, totalsize, - (int) ((totaldone / 1024) * 100 / totalsize), + percent, tablespacenum, tablespacecount, fn); else fprintf(stderr, INT64_FORMAT "/" INT64_FORMAT " kB (%i%%) %i/%i tablespaces\r", totaldone / 1024, totalsize, - (int) ((totaldone / 1024) * 100 / totalsize), + percent, tablespacenum, tablespacecount); } @@ -746,9 +752,10 @@ BaseBackup() conn = GetConnection(); PQescapeStringConn(conn, escaped_label, label, sizeof(escaped_label), &i); - snprintf(current_path, sizeof(current_path), "BASE_BACKUP LABEL '%s' %s %s", + snprintf(current_path, sizeof(current_path), "BASE_BACKUP LABEL '%s' %s %s %s", escaped_label, showprogress ? "PROGRESS" : "", + includewal ? "WAL" : "", fastcheckpoint ? "FAST" : ""); if (PQsendQuery(conn, current_path) == 0) @@ -789,7 +796,7 @@ BaseBackup() * first once since it can be relocated, and it will be checked before * we do anything anyway. */ - if (format == 'p' && i > 0) + if (format == 'p' && !PQgetisnull(res, i, 1)) verify_dir_is_empty_or_create(PQgetvalue(res, i, 1)); } @@ -848,6 +855,7 @@ main(int argc, char **argv) {"pgdata", required_argument, NULL, 'D'}, {"format", required_argument, NULL, 'F'}, {"checkpoint", required_argument, NULL, 'c'}, + {"xlog", no_argument, NULL, 'x'}, {"compress", required_argument, NULL, 'Z'}, {"label", required_argument, NULL, 'l'}, {"host", required_argument, NULL, 'h'}, @@ -881,7 +889,7 @@ main(int argc, char **argv) } } - while ((c = getopt_long(argc, argv, "D:F:l:Z:c:h:p:U:wWvP", + while ((c = getopt_long(argc, argv, "D:F:l:Z:c:h:p:U:xwWvP", long_options, &option_index)) != -1) { switch (c) @@ -901,6 +909,9 @@ main(int argc, char **argv) exit(1); } break; + case 'x': + includewal = true; + break; case 'l': label = xstrdup(optarg); break; diff --git a/src/include/replication/walsender.h b/src/include/replication/walsender.h index bd9e193..8a87311 100644 --- a/src/include/replication/walsender.h +++ b/src/include/replication/walsender.h @@ -67,6 +67,8 @@ extern Size WalSndShmemSize(void); extern void WalSndShmemInit(void); extern void WalSndWakeup(void); extern void WalSndSetState(WalSndState state); +extern void XLogRead(char *buf, XLogRecPtr recptr, Size nbytes); + extern Datum pg_stat_get_wal_senders(PG_FUNCTION_ARGS);
-- Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org) To make changes to your subscription: http://www.postgresql.org/mailpref/pgsql-hackers