On Fri, Jul 26, 2013 at 01:27:34PM -0400, Bruce Momjian wrote:
> On Thu, Jul 25, 2013 at 10:57:28AM -0400, Bruce Momjian wrote:
> > Everyone should be aware that the 9.3 pg_upgrade -j/--jobs option on
> > Windows is currently broken, and hopefully will be fixed by the next
> > beta.
> >
> > Someone at PGDay UK told me they were getting pg_upgrade -j crashes on
> > Windows. Andrew Dunstan was able to reproduce the crash, and that has
> > been fixed, but there is still a race condition that I am trying to
> > diagnose.
>
> After three days of testing and creating a Windows MinGW build
> environment (with Andrew's help), I have come up with the attached patch
> which fixes the pg_upgrade -j race condition on Windows. In summary,
> creating a file with fopen() from a non-primary thread and then calling
> system() soon after can result in a file-in-use error. The solution was
> to issue the fopen() after system() in such cases.
>
> This would be applied to head and 9.3.
Sorry, patch attached.
--
Bruce Momjian <[email protected]> http://momjian.us
EnterpriseDB http://enterprisedb.com
+ It's impossible for everything to be true. +
diff --git a/contrib/pg_upgrade/exec.c b/contrib/pg_upgrade/exec.c
new file mode 100644
index ef123a8..3ec6e1c
*** a/contrib/pg_upgrade/exec.c
--- b/contrib/pg_upgrade/exec.c
*************** static void validate_exec(const char *di
*** 21,26 ****
--- 21,27 ----
#ifdef WIN32
static int win32_check_directory_write_permissions(void);
+ DWORD mainThreadId = 0;
#endif
*************** static int win32_check_directory_write_p
*** 37,48 ****
* If throw_error is true, this raises a PG_FATAL error and pg_upgrade
* terminates; otherwise it is just reported as PG_REPORT and exec_prog()
* returns false.
*/
bool
exec_prog(const char *log_file, const char *opt_log_file,
bool throw_error, const char *fmt,...)
{
! int result;
int written;
#define MAXCMDLEN (2 * MAXPGPATH)
--- 38,51 ----
* If throw_error is true, this raises a PG_FATAL error and pg_upgrade
* terminates; otherwise it is just reported as PG_REPORT and exec_prog()
* returns false.
+ *
+ * The code requires it be called first from the primary thread on Windows.
*/
bool
exec_prog(const char *log_file, const char *opt_log_file,
bool throw_error, const char *fmt,...)
{
! int result = 0;
int written;
#define MAXCMDLEN (2 * MAXPGPATH)
*************** exec_prog(const char *log_file, const ch
*** 50,55 ****
--- 53,64 ----
FILE *log;
va_list ap;
+ #ifdef WIN32
+ /* We assume we are called from the primary thread first */
+ if (mainThreadId == 0)
+ mainThreadId = GetCurrentThreadId();
+ #endif
+
written = strlcpy(cmd, SYSTEMQUOTE, sizeof(cmd));
va_start(ap, fmt);
written += vsnprintf(cmd + written, MAXCMDLEN - written, fmt, ap);
*************** exec_prog(const char *log_file, const ch
*** 61,66 ****
--- 70,91 ----
if (written >= MAXCMDLEN)
pg_log(PG_FATAL, "command too long\n");
+ pg_log(PG_VERBOSE, "%s\n", cmd);
+
+ #ifdef WIN32
+ /*
+ * For some reason, Windows issues a file-in-use error if we write data
+ * to the log file from a non-primary thread just before we create a
+ * subprocess that also writes to the same log file. One fix is to
+ * sleep for 100ms. A cleaner fix is to write to the log file _after_
+ * the subprocess has completed, so we do this only when writing from
+ * a non-primary thread. fflush(), running system() twice, and
+ * pre-creating the file do not see to help.
+ */
+ if (mainThreadId != GetCurrentThreadId())
+ result = system(cmd);
+ #endif
+
log = fopen(log_file, "a");
#ifdef WIN32
*************** exec_prog(const char *log_file, const ch
*** 84,94 ****
if (log == NULL)
pg_log(PG_FATAL, "cannot write to log file %s\n", log_file);
#ifdef WIN32
! fprintf(log, "\n\n");
#endif
- pg_log(PG_VERBOSE, "%s\n", cmd);
fprintf(log, "command: %s\n", cmd);
/*
* In Windows, we must close the log file at this point so the file is not
--- 109,126 ----
if (log == NULL)
pg_log(PG_FATAL, "cannot write to log file %s\n", log_file);
+
#ifdef WIN32
! /* Are we printing "command:" before its output? */
! if (mainThreadId == GetCurrentThreadId())
! fprintf(log, "\n\n");
#endif
fprintf(log, "command: %s\n", cmd);
+ #ifdef WIN32
+ /* Are we printing "command:" after its output? */
+ if (mainThreadId != GetCurrentThreadId())
+ fprintf(log, "\n\n");
+ #endif
/*
* In Windows, we must close the log file at this point so the file is not
*************** exec_prog(const char *log_file, const ch
*** 96,102 ****
*/
fclose(log);
! result = system(cmd);
if (result != 0)
{
--- 128,138 ----
*/
fclose(log);
! #ifdef WIN32
! /* see comment above */
! if (mainThreadId == GetCurrentThreadId())
! #endif
! result = system(cmd);
if (result != 0)
{
*************** exec_prog(const char *log_file, const ch
*** 118,124 ****
}
#ifndef WIN32
-
/*
* We can't do this on Windows because it will keep the "pg_ctl start"
* output filename open until the server stops, so we do the \n\n above on
--- 154,159 ----
--
Sent via pgsql-hackers mailing list ([email protected])
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers