Magnus Hagander <mag...@hagander.net> writes:
> We can't get away with just comparing the relative part of the pathname.
> Because it will fail if there is another path with exactly the same length,
> containing the tablespace.

Actually… yeah.

> I think we might want to store a value in the tablespaceinfo struct
> indicating whether it's actually inside PGDATA (since we have the full path
> at that point), and then skip it based on that instead. Or store and pass
> the value of getcwd() perhaps.

I think it's best to stuff in the tablespaceinfo struct either NIL or
the relative path of the tablespace when found in $PGDATA, as done in
the attached.

> I've attached a slightly updated patch - I changed around a bit of logic
> order and updated some comments during my review. And added error-checking.

Thanks! I started again from your version for v3.

Regards,
-- 
Dimitri Fontaine
http://2ndQuadrant.fr     PostgreSQL : Expertise, Formation et Support

*** a/src/backend/replication/basebackup.c
--- b/src/backend/replication/basebackup.c
***************
*** 45,51 **** typedef struct
  } basebackup_options;
  
  
! static int64 sendDir(char *path, int basepathlen, bool sizeonly);
  static int64 sendTablespace(char *path, bool sizeonly);
  static bool sendFile(char *readfilename, char *tarfilename,
  		 struct stat * statbuf, bool missing_ok);
--- 45,51 ----
  } basebackup_options;
  
  
! static int64 sendDir(char *path, int basepathlen, bool sizeonly, List *tablespaces);
  static int64 sendTablespace(char *path, bool sizeonly);
  static bool sendFile(char *readfilename, char *tarfilename,
  		 struct stat * statbuf, bool missing_ok);
***************
*** 72,77 **** typedef struct
--- 72,78 ----
  {
  	char	   *oid;
  	char	   *path;
+ 	char       *rpath;			/* relative path within PGDATA, or nil. */
  	int64		size;
  } tablespaceinfo;
  
***************
*** 100,105 **** perform_base_backup(basebackup_options *opt, DIR *tblspcdir)
--- 101,119 ----
  	XLogRecPtr	endptr;
  	TimeLineID	endtli;
  	char	   *labelfile;
+ 	char	    cwd[MAXPGPATH];
+ 	int         rootpathlen;
+ 
+ 	/*
+ 	 * We need to compute rootpathlen to allow for skipping tablespaces
+ 	 * installed within PGDATA.
+ 	 */
+ 	if (!getcwd(cwd, MAXPGPATH))
+ 		ereport(ERROR,
+ 				(errcode_for_file_access(),
+ 				 errmsg("could not determine current directory: %m")));
+ 
+ 	rootpathlen = strlen(cwd);
  
  	backup_started_in_recovery = RecoveryInProgress();
  
***************
*** 119,124 **** perform_base_backup(basebackup_options *opt, DIR *tblspcdir)
--- 133,139 ----
  		{
  			char		fullpath[MAXPGPATH];
  			char		linkpath[MAXPGPATH];
+ 			char		*relpath = NULL;
  			int			rllen;
  
  			/* Skip special stuff */
***************
*** 145,153 **** perform_base_backup(basebackup_options *opt, DIR *tblspcdir)
--- 160,178 ----
  			}
  			linkpath[rllen] = '\0';
  
+ 			/*
+ 			 * Relpath is the relative path of the tablespace linkpath when
+ 			 * the realname is found within PGDATA, or NULL.
+ 			 */
+ 			if (rllen > rootpathlen
+ 				&& strncmp(linkpath, cwd, rootpathlen) == 0
+ 				&& linkpath[rootpathlen] == '/')
+ 				relpath = linkpath + rootpathlen + 1;
+ 
  			ti = palloc(sizeof(tablespaceinfo));
  			ti->oid = pstrdup(de->d_name);
  			ti->path = pstrdup(linkpath);
+ 			ti->rpath = relpath ? pstrdup(relpath) : NULL;
  			ti->size = opt->progress ? sendTablespace(fullpath, true) : -1;
  			tablespaces = lappend(tablespaces, ti);
  #else
***************
*** 165,171 **** perform_base_backup(basebackup_options *opt, DIR *tblspcdir)
  
  		/* Add a node for the base directory at the end */
  		ti = palloc0(sizeof(tablespaceinfo));
! 		ti->size = opt->progress ? sendDir(".", 1, true) : -1;
  		tablespaces = lappend(tablespaces, ti);
  
  		/* Send tablespace header */
--- 190,196 ----
  
  		/* Add a node for the base directory at the end */
  		ti = palloc0(sizeof(tablespaceinfo));
! 		ti->size = opt->progress ? sendDir(".", 1, true, tablespaces) : -1;
  		tablespaces = lappend(tablespaces, ti);
  
  		/* Send tablespace header */
***************
*** 191,197 **** perform_base_backup(basebackup_options *opt, DIR *tblspcdir)
  				sendFileWithContent(BACKUP_LABEL_FILE, labelfile);
  
  				/* ... then the bulk of the files ... */
! 				sendDir(".", 1, false);
  
  				/* ... and pg_control after everything else. */
  				if (lstat(XLOG_CONTROL_FILE, &statbuf) != 0)
--- 216,222 ----
  				sendFileWithContent(BACKUP_LABEL_FILE, labelfile);
  
  				/* ... then the bulk of the files ... */
! 				sendDir(".", 1, false, tablespaces);
  
  				/* ... and pg_control after everything else. */
  				if (lstat(XLOG_CONTROL_FILE, &statbuf) != 0)
***************
*** 778,785 **** sendTablespace(char *path, bool sizeonly)
  		_tarWriteHeader(TABLESPACE_VERSION_DIRECTORY, NULL, &statbuf);
  	size = 512;					/* Size of the header just added */
  
! 	/* Send all the files in the tablespace version directory */
! 	size += sendDir(pathbuf, strlen(path), sizeonly);
  
  	return size;
  }
--- 803,815 ----
  		_tarWriteHeader(TABLESPACE_VERSION_DIRECTORY, NULL, &statbuf);
  	size = 512;					/* Size of the header just added */
  
! 	/*
! 	 * Send all the files in the tablespace version directory.
! 	 * Since we don't protect against nested tablespaces anywhere
! 	 * other than the root directory, no need to pass rootpathlen
! 	 * and tablespaces list.
! 	 */
! 	size += sendDir(pathbuf, strlen(path), sizeonly, NIL);
  
  	return size;
  }
***************
*** 788,796 **** sendTablespace(char *path, bool sizeonly)
   * Include all files from the given directory in the output tar stream. If
   * 'sizeonly' is true, we just calculate a total length and return it, without
   * actually sending anything.
   */
  static int64
! sendDir(char *path, int basepathlen, bool sizeonly)
  {
  	DIR		   *dir;
  	struct dirent *de;
--- 818,829 ----
   * Include all files from the given directory in the output tar stream. If
   * 'sizeonly' is true, we just calculate a total length and return it, without
   * actually sending anything.
+  *
+  * Omit any directory listed in tablepaces, so as to avoid backuping
+  * tablespaces twice when users created their tablespaces inside PGDATA.
   */
  static int64
! sendDir(char *path, int basepathlen, bool sizeonly, List *tablespaces)
  {
  	DIR		   *dir;
  	struct dirent *de;
***************
*** 931,936 **** sendDir(char *path, int basepathlen, bool sizeonly)
--- 964,972 ----
  		}
  		else if (S_ISDIR(statbuf.st_mode))
  		{
+ 			bool skip_this_dir = false;
+ 			ListCell *lc;
+ 
  			/*
  			 * Store a directory entry in the tar file so we can get the
  			 * permissions right.
***************
*** 939,946 **** sendDir(char *path, int basepathlen, bool sizeonly)
  				_tarWriteHeader(pathbuf + basepathlen + 1, NULL, &statbuf);
  			size += 512;		/* Size of the header just added */
  
! 			/* call ourselves recursively for a directory */
! 			size += sendDir(pathbuf, basepathlen, sizeonly);
  		}
  		else if (S_ISREG(statbuf.st_mode))
  		{
--- 975,1003 ----
  				_tarWriteHeader(pathbuf + basepathlen + 1, NULL, &statbuf);
  			size += 512;		/* Size of the header just added */
  
! 			/*
! 			 * call ourselves recursively for a directory, unless it happens
! 			 * to be a separate tablespace installed within PGDATA.
! 			 */
! 			foreach(lc, tablespaces)
! 			{
! 				tablespaceinfo *ti = (tablespaceinfo *) lfirst(lc);
! 
! 				/*
! 				 * ti->rpath is the tablespace relative path within PGDATA, or
! 				 * NULL if the tablespace has been properly installed
! 				 * somewhere else.
! 				 *
! 				 * Don't forget to omit the leading "./" in comparing:
! 				 */
! 				if (ti->rpath && strcmp(ti->rpath, pathbuf + 2) == 0)
! 				{
! 					skip_this_dir = true;
! 					break;
! 				}
! 			}
! 			if (!skip_this_dir)
! 				size += sendDir(pathbuf, basepathlen, sizeonly, tablespaces);
  		}
  		else if (S_ISREG(statbuf.st_mode))
  		{
-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

Reply via email to