Re: [BUGS] pg_basebackup fails if a data file is removed

2012-12-21 Thread Heikki Linnakangas

On 21.12.2012 15:30, Magnus Hagander wrote:

On Fri, Dec 21, 2012 at 2:28 PM, Heikki Linnakangas
  wrote:

When pg_basebackup copies data files, it does basically this:


if (lstat(pathbuf,&statbuf) != 0)
{
 if (errno != ENOENT)
 ereport(ERROR,
 (errcode_for_file_access(),
  errmsg("could not stat file or directory
\"%s\": %m",
 pathbuf)));

 /* If the file went away while scanning, it's no error. */
 continue;
}



...
sendFile(pathbuf, pathbuf + basepathlen + 1,&statbuf);


There's a race condition there. If the file is removed after the lstat call,
and before sendFile opens the file, the backup fails with an error. It's a
fairly tight window, so it's difficult to run into by accident, but by
putting a breakpoint with a debugger there it's quite easy to reproduce, by
e.g doing a VACUUM FULL on the table about to be copied.

A straightforward fix is to allow sendFile() to ignore ENOENT. Patch
attached.


Looks good to me.


Ok, committed.


Nice spot - don't tell me you actually ran into it
during testing? :)


Heh, no, eyeballing the code.

- Heikki


--
Sent via pgsql-bugs mailing list (pgsql-bugs@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-bugs


Re: [BUGS] pg_basebackup fails if a data file is removed

2012-12-21 Thread Magnus Hagander
On Fri, Dec 21, 2012 at 2:28 PM, Heikki Linnakangas
 wrote:
> When pg_basebackup copies data files, it does basically this:
>
>> if (lstat(pathbuf, &statbuf) != 0)
>> {
>> if (errno != ENOENT)
>> ereport(ERROR,
>> (errcode_for_file_access(),
>>  errmsg("could not stat file or directory
>> \"%s\": %m",
>> pathbuf)));
>>
>> /* If the file went away while scanning, it's no error. */
>> continue;
>> }
>
>> ...
>> sendFile(pathbuf, pathbuf + basepathlen + 1, &statbuf);
>
> There's a race condition there. If the file is removed after the lstat call,
> and before sendFile opens the file, the backup fails with an error. It's a
> fairly tight window, so it's difficult to run into by accident, but by
> putting a breakpoint with a debugger there it's quite easy to reproduce, by
> e.g doing a VACUUM FULL on the table about to be copied.
>
> A straightforward fix is to allow sendFile() to ignore ENOENT. Patch
> attached.

Looks good to me. Nice spot - don't tell me you actually ran into it
during testing? :)

--
 Magnus Hagander
 Me: http://www.hagander.net/
 Work: http://www.redpill-linpro.com/


-- 
Sent via pgsql-bugs mailing list (pgsql-bugs@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-bugs


[BUGS] pg_basebackup fails if a data file is removed

2012-12-21 Thread Heikki Linnakangas

When pg_basebackup copies data files, it does basically this:


if (lstat(pathbuf, &statbuf) != 0)
{
if (errno != ENOENT)
ereport(ERROR,
(errcode_for_file_access(),
 errmsg("could not stat file or directory \"%s\": 
%m",
pathbuf)));

/* If the file went away while scanning, it's no error. */
continue;
}

> ...
> sendFile(pathbuf, pathbuf + basepathlen + 1, &statbuf);

There's a race condition there. If the file is removed after the lstat 
call, and before sendFile opens the file, the backup fails with an 
error. It's a fairly tight window, so it's difficult to run into by 
accident, but by putting a breakpoint with a debugger there it's quite 
easy to reproduce, by e.g doing a VACUUM FULL on the table about to be 
copied.


A straightforward fix is to allow sendFile() to ignore ENOENT. Patch 
attached.


- Heikki
diff --git a/src/backend/replication/basebackup.c b/src/backend/replication/basebackup.c
index 1b234c6..bc95215 100644
--- a/src/backend/replication/basebackup.c
+++ b/src/backend/replication/basebackup.c
@@ -44,8 +44,8 @@ typedef struct
 
 
 static int64 sendDir(char *path, int basepathlen, bool sizeonly);
-static void sendFile(char *readfilename, char *tarfilename,
-		 struct stat * statbuf);
+static bool sendFile(char *readfilename, char *tarfilename,
+		 struct stat * statbuf, bool missing_ok);
 static void sendFileWithContent(const char *filename, const char *content);
 static void _tarWriteHeader(const char *filename, const char *linktarget,
 struct stat * statbuf);
@@ -194,7 +194,7 @@ perform_base_backup(basebackup_options *opt, DIR *tblspcdir)
 	XLOG_CONTROL_FILE)));
 }
 
-sendFile(XLOG_CONTROL_FILE, XLOG_CONTROL_FILE, &statbuf);
+sendFile(XLOG_CONTROL_FILE, XLOG_CONTROL_FILE, &statbuf, false);
 			}
 
 			/*
@@ -715,11 +715,18 @@ sendDir(char *path, int basepathlen, bool sizeonly)
 		}
 		else if (S_ISREG(statbuf.st_mode))
 		{
-			/* Add size, rounded up to 512byte block */
-			size += ((statbuf.st_size + 511) & ~511);
+			bool sent = false;
+
 			if (!sizeonly)
-sendFile(pathbuf, pathbuf + basepathlen + 1, &statbuf);
-			size += 512;		/* Size of the header of the file */
+sent = sendFile(pathbuf, pathbuf + basepathlen + 1, &statbuf,
+true);
+
+			if (sent || sizeonly)
+			{
+/* Add size, rounded up to 512byte block */
+size += ((statbuf.st_size + 511) & ~511);
+size += 512;		/* Size of the header of the file */
+			}
 		}
 		else
 			ereport(WARNING,
@@ -779,9 +786,17 @@ _tarChecksum(char *header)
 	return sum;
 }
 
-/* Given the member, write the TAR header & send the file */
-static void
-sendFile(char *readfilename, char *tarfilename, struct stat * statbuf)
+/*
+ * Given the member, write the TAR header & send the file.
+ *
+ * If 'missing_ok' is true, will not throw an error if the file is not found.
+ *
+ * Returns true if the file was successfully sent, false if 'missing_ok',
+ * and the file did not exist.
+ */
+static bool
+sendFile(char *readfilename, char *tarfilename, struct stat *statbuf,
+		 bool missing_ok)
 {
 	FILE	   *fp;
 	char		buf[TAR_SEND_SIZE];
@@ -791,9 +806,13 @@ sendFile(char *readfilename, char *tarfilename, struct stat * statbuf)
 
 	fp = AllocateFile(readfilename, "rb");
 	if (fp == NULL)
+	{
+		if (errno == ENOENT && missing_ok)
+			return false;
 		ereport(ERROR,
 (errcode_for_file_access(),
  errmsg("could not open file \"%s\": %m", readfilename)));
+	}
 
 	/*
 	 * Some compilers will throw a warning knowing this test can never be true
@@ -847,6 +866,8 @@ sendFile(char *readfilename, char *tarfilename, struct stat * statbuf)
 	}
 
 	FreeFile(fp);
+
+	return true;
 }
 
 

-- 
Sent via pgsql-bugs mailing list (pgsql-bugs@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-bugs