On Mon, Oct 22, 2007 at 01:19:24PM -0700, Trevor Talbot wrote:
> On 10/22/07, Magnus Hagander <[EMAIL PROTECTED]> wrote:
> > Trevor Talbot wrote:
>
> > > I'd probably take the approach of combining win32_waitpid() and
> > > threads. You'd end up with 1 thread per 64 backends; when something
> > > interesting happens the thread could push the info onto a queue, which
> > > the new win32_waitpid() would check. Use APCs to add new backends to
> > > threads with free slots.
> >
> > I was planning to make it even easier and let Windows do the job for us,
> > just using RegisterWaitForSingleObject(). Does the same - one thread per
> > 64 backends, but we don't have to deal with the queueing ourselves.
>
> Oh, good call -- I keep forgetting the native thread pool exists.
Taking this one to -hackers once and for all now...
Can you try the attached patch? See how many backends you can get up to.
This patch changes from using a single thread for each backend started to
using the builtin threadpool functionality. It also replaces the pid/handle
arrays with an i/o completion port. The net result is also, imho, much more
readable code :-)
Beware - there's still plenty of debugging code in there :-)
//Magnus
Index: src/backend/postmaster/postmaster.c
===================================================================
RCS file: /projects/cvsroot/pgsql/src/backend/postmaster/postmaster.c,v
retrieving revision 1.542
diff -c -r1.542 postmaster.c
*** src/backend/postmaster/postmaster.c 26 Sep 2007 22:36:30 -0000 1.542
--- src/backend/postmaster/postmaster.c 26 Oct 2007 11:46:45 -0000
***************
*** 331,344 ****
#ifdef EXEC_BACKEND
#ifdef WIN32
- static void win32_AddChild(pid_t pid, HANDLE handle);
- static void win32_RemoveChild(pid_t pid);
static pid_t win32_waitpid(int *exitstatus);
! static DWORD WINAPI win32_sigchld_waiter(LPVOID param);
! static pid_t *win32_childPIDArray;
! static HANDLE *win32_childHNDArray;
! static unsigned long win32_numChildren = 0;
HANDLE PostmasterHandle;
#endif
--- 331,347 ----
#ifdef EXEC_BACKEND
#ifdef WIN32
static pid_t win32_waitpid(int *exitstatus);
! static void WINAPI pgwin32_deadchild_callback(PVOID lpParameter, BOOLEAN
TimerOrWaitFired);
! static HANDLE win32ChildQueue;
!
! typedef struct
! {
! HANDLE waitHandle;
! HANDLE procHandle;
! DWORD procId;
! } win32_deadchild_waitinfo;
HANDLE PostmasterHandle;
#endif
***************
*** 899,914 ****
#ifdef WIN32
/*
! * Initialize the child pid/HANDLE arrays for signal handling.
*/
! win32_childPIDArray = (pid_t *)
! malloc(mul_size(NUM_BACKENDARRAY_ELEMS, sizeof(pid_t)));
! win32_childHNDArray = (HANDLE *)
! malloc(mul_size(NUM_BACKENDARRAY_ELEMS, sizeof(HANDLE)));
! if (!win32_childPIDArray || !win32_childHNDArray)
ereport(FATAL,
! (errcode(ERRCODE_OUT_OF_MEMORY),
! errmsg("out of memory")));
/*
* Set up a handle that child processes can use to check whether the
--- 902,913 ----
#ifdef WIN32
/*
! * Initialize I/O completion port used to deliver list of dead children.
*/
! win32ChildQueue = CreateIoCompletionPort(INVALID_HANDLE_VALUE, NULL, 0,
1);
! if (win32ChildQueue == NULL)
ereport(FATAL,
! (errmsg("could not create I/O completion port for child
queue")));
/*
* Set up a handle that child processes can use to check whether the
***************
*** 2072,2083 ****
#define LOOPHEADER() (exitstatus = status.w_status)
#else /* WIN32 */
#define LOOPTEST() ((pid = win32_waitpid(&exitstatus)) > 0)
! /*
! * We need to do this here, and not in CleanupBackend, since this is
! * to be called on all children when we are done with them. Could move
! * to LogChildExit, but that seems like asking for future trouble...
! */
! #define LOOPHEADER() (win32_RemoveChild(pid))
#endif /* WIN32 */
#endif /* HAVE_WAITPID */
--- 2071,2077 ----
#define LOOPHEADER() (exitstatus = status.w_status)
#else /* WIN32 */
#define LOOPTEST() ((pid = win32_waitpid(&exitstatus)) > 0)
! #define LOOPHEADER()
#endif /* WIN32 */
#endif /* HAVE_WAITPID */
***************
*** 3332,3343 ****
int i;
int j;
char cmdLine[MAXPGPATH * 2];
- HANDLE childHandleCopy;
- HANDLE waiterThread;
HANDLE paramHandle;
BackendParameters *param;
SECURITY_ATTRIBUTES sa;
char paramHandleStr[32];
/* Make sure caller set up argv properly */
Assert(argc >= 3);
--- 3326,3336 ----
int i;
int j;
char cmdLine[MAXPGPATH * 2];
HANDLE paramHandle;
BackendParameters *param;
SECURITY_ATTRIBUTES sa;
char paramHandleStr[32];
+ win32_deadchild_waitinfo *childinfo;
/* Make sure caller set up argv properly */
Assert(argc >= 3);
***************
*** 3346,3358 ****
Assert(argv[2] == NULL);
/* Verify that there is room in the child list */
if (win32_numChildren >= NUM_BACKENDARRAY_ELEMS)
{
elog(LOG, "no room for child entry in backend list");
/* Report same error as for a fork failure on Unix */
! errno = EAGAIN;
return -1;
! }
/* Set up shared memory for parameter passing */
ZeroMemory(&sa, sizeof(sa));
--- 3339,3352 ----
Assert(argv[2] == NULL);
/* Verify that there is room in the child list */
+ /* XXXX still need to verify? Or not?
if (win32_numChildren >= NUM_BACKENDARRAY_ELEMS)
{
elog(LOG, "no room for child entry in backend list");
/* Report same error as for a fork failure on Unix */
! /* errno = EAGAIN;
return -1;
! }*/
/* Set up shared memory for parameter passing */
ZeroMemory(&sa, sizeof(sa));
***************
*** 3463,3496 ****
return -1;
}
! if (!IsUnderPostmaster)
! {
! /* We are the Postmaster creating a child... */
! win32_AddChild(pi.dwProcessId, pi.hProcess);
! }
!
! /* Set up the thread to handle the SIGCHLD for this process */
! if (DuplicateHandle(GetCurrentProcess(),
! pi.hProcess,
! GetCurrentProcess(),
! &childHandleCopy,
! 0,
! FALSE,
! DUPLICATE_SAME_ACCESS) == 0)
ereport(FATAL,
! (errmsg_internal("could not duplicate child handle: error
code %d",
(int) GetLastError())));
! waiterThread = CreateThread(NULL, 64 * 1024, win32_sigchld_waiter,
! (LPVOID)
childHandleCopy, 0, NULL);
! if (!waiterThread)
! ereport(FATAL,
! (errmsg_internal("could not create sigchld
waiter thread: error code %d",
! (int)
GetLastError())));
! CloseHandle(waiterThread);
- if (IsUnderPostmaster)
- CloseHandle(pi.hProcess);
CloseHandle(pi.hThread);
return pi.dwProcessId;
--- 3457,3482 ----
return -1;
}
! /* Queue a SIGCHLD handler for this process */
! childinfo = malloc(sizeof(win32_deadchild_waitinfo));
! if (!childinfo)
ereport(FATAL,
! (errcode(ERRCODE_OUT_OF_MEMORY),
! errmsg("out of memory")));
! childinfo->procHandle = pi.hProcess;
! childinfo->procId = pi.dwProcessId;
! if (!RegisterWaitForSingleObject(&childinfo->waitHandle,
!
pi.hProcess,
!
pgwin32_deadchild_callback,
!
childinfo,
!
INFINITE,
!
WT_EXECUTEONLYONCE | WT_EXECUTEINWAITTHREAD))
! ereport(FATAL,
! (errmsg_internal("could not register process for wait: error
code %d",
(int) GetLastError())));
! /* Don't close pi.hProcess here - the wait thread needs access to it */
CloseHandle(pi.hThread);
return pi.dwProcessId;
***************
*** 4500,4636 ****
#ifdef WIN32
- /*
- * Note: The following three functions must not be interrupted (eg. by
- * signals). As the Postgres Win32 signalling architecture (currently)
- * requires polling, or APC checking functions which aren't used here, this
- * is not an issue.
- *
- * We keep two separate arrays, instead of a single array of pid/HANDLE
- * structs, to avoid having to re-create a handle array for
- * WaitForMultipleObjects on each call to win32_waitpid.
- */
-
- static void
- win32_AddChild(pid_t pid, HANDLE handle)
- {
- Assert(win32_childPIDArray && win32_childHNDArray);
- if (win32_numChildren < NUM_BACKENDARRAY_ELEMS)
- {
- win32_childPIDArray[win32_numChildren] = pid;
- win32_childHNDArray[win32_numChildren] = handle;
- ++win32_numChildren;
- }
- else
- ereport(FATAL,
- (errmsg_internal("no room for child entry with
pid %lu",
- (unsigned
long) pid)));
- }
-
- static void
- win32_RemoveChild(pid_t pid)
- {
- int i;
-
- Assert(win32_childPIDArray && win32_childHNDArray);
-
- for (i = 0; i < win32_numChildren; i++)
- {
- if (win32_childPIDArray[i] == pid)
- {
- CloseHandle(win32_childHNDArray[i]);
-
- /* Swap last entry into the "removed" one */
- --win32_numChildren;
- win32_childPIDArray[i] =
win32_childPIDArray[win32_numChildren];
- win32_childHNDArray[i] =
win32_childHNDArray[win32_numChildren];
- return;
- }
- }
-
- ereport(WARNING,
- (errmsg_internal("could not find child entry with pid
%lu",
- (unsigned long) pid)));
- }
-
static pid_t
win32_waitpid(int *exitstatus)
{
/*
! * Note: Do NOT use WaitForMultipleObjectsEx, as we don't want to run
! * queued APCs here.
*/
! int index;
! DWORD exitCode;
! DWORD ret;
! unsigned long offset;
!
! Assert(win32_childPIDArray && win32_childHNDArray);
! elog(DEBUG3, "waiting on %lu children", win32_numChildren);
!
! for (offset = 0; offset < win32_numChildren; offset +=
MAXIMUM_WAIT_OBJECTS)
{
! unsigned long num = Min(MAXIMUM_WAIT_OBJECTS, win32_numChildren
- offset);
!
! ret = WaitForMultipleObjects(num, &win32_childHNDArray[offset],
FALSE, 0);
! switch (ret)
{
! case WAIT_FAILED:
! ereport(LOG,
! (errmsg_internal("failed to
wait on %lu of %lu children: error code %d",
! num,
win32_numChildren, (int) GetLastError())));
! return -1;
!
! case WAIT_TIMEOUT:
! /* No children (in this chunk) have finished */
! break;
!
! default:
!
! /*
! * Get the exit code, and return the PID of,
the respective
! * process
! */
! index = offset + ret - WAIT_OBJECT_0;
! Assert(index >= 0 && index < win32_numChildren);
! if
(!GetExitCodeProcess(win32_childHNDArray[index], &exitCode))
! {
! /*
! * If we get this far, this should
never happen, but, then
! * again... No choice other than to
assume a catastrophic
! * failure.
! */
! ereport(FATAL,
! (errmsg_internal("failed to get exit
code for child %lu",
! (unsigned long)
win32_childPIDArray[index])));
! }
! *exitstatus = (int) exitCode;
! return win32_childPIDArray[index];
}
}
- /* No children have finished */
return -1;
}
/*
! * Note! Code below executes on separate threads, one for
! * each child process created
*/
! static DWORD WINAPI
! win32_sigchld_waiter(LPVOID param)
{
! HANDLE procHandle = (HANDLE) param;
! DWORD r = WaitForSingleObject(procHandle, INFINITE);
! if (r == WAIT_OBJECT_0)
! pg_queue_signal(SIGCHLD);
! else
! write_stderr("could not wait on child process handle: error
code %d\n",
! (int) GetLastError());
! CloseHandle(procHandle);
! return 0;
}
#endif /* WIN32 */
--- 4486,4549 ----
#ifdef WIN32
static pid_t
win32_waitpid(int *exitstatus)
{
+ DWORD dwd;
+ ULONG_PTR key;
+ OVERLAPPED* ovl;
/*
! * Check if there are any dead children. If there are, return the pid
of the first one that died.
*/
! if (GetQueuedCompletionStatus(win32ChildQueue,&dwd,&key,&ovl,0))
{
! *exitstatus = (int)key;
{
! char t[128];
! sprintf(t,"Unqueued dead child pid %i, exitcode %i",
dwd, (int)key);
! OutputDebugString(t);
}
+ return dwd;
}
+ OutputDebugString("No dead children to unqueue");
return -1;
}
/*
! * Note! Code below executes on a thread pool! All operations must
! * be thread safe! Note that elog() and friends must *not* be used.
*/
! static void WINAPI
! pgwin32_deadchild_callback(PVOID lpParameter, BOOLEAN TimerOrWaitFired)
{
! win32_deadchild_waitinfo *childinfo = (win32_deadchild_waitinfo
*)lpParameter;
! DWORD exitcode;
! if (TimerOrWaitFired)
! return; /* timeout. Should not happen. */
! /* Remove handle from wait - required even though it's set to wait only
once */
! UnregisterWaitEx(childinfo->waitHandle, NULL);
!
! /* Post I/O completion about dead pid. First, figure out the exit code
*/
! if (!GetExitCodeProcess(childinfo->procHandle, &exitcode))
! exitcode = 255; /* should never happen */
!
!
PostQueuedCompletionStatus(win32ChildQueue,childinfo->procId,(ULONG_PTR)exitcode,NULL);
! {
! char t[128];
! sprintf(t,"Posted completion status for child %i, exitcode %i",
childinfo->procId, exitcode);
! OutputDebugString(t);
! }
!
! /* Handle is per-process, so we close it hear instead of in the caller
*/
! CloseHandle(childinfo->procHandle);
!
! free(childinfo);
!
! /* Queue SIGCHLD signal */
! pg_queue_signal(SIGCHLD);
}
#endif /* WIN32 */
Index: src/include/port/win32.h
===================================================================
RCS file: /projects/cvsroot/pgsql/src/include/port/win32.h,v
retrieving revision 1.76
diff -c -r1.76 win32.h
*** src/include/port/win32.h 25 Jul 2007 12:22:53 -0000 1.76
--- src/include/port/win32.h 26 Oct 2007 11:16:48 -0000
***************
*** 4,9 ****
--- 4,10 ----
#define WIN32_ONLY_COMPILER
#endif
+ #define _WIN32_WINNT 0x0500
/*
* Always build with SSPI support. Keep it as a #define in case
* we want a switch to disable it sometime in the future.
---------------------------(end of broadcast)---------------------------
TIP 2: Don't 'kill -9' the postmaster