On Mon, Nov 7, 2016 at 10:14 AM, Tsunakawa, Takayuki
<[email protected]> wrote:
> From: [email protected]
>> [mailto:[email protected]] On Behalf Of Ashutosh Bapat
>> I am not sure if following condition is a good idea in ServerLoop()
>> 1650 if (pmState == PM_WAIT_DEAD_END || ClosedSockets)
>>
>> There are no sockets to listen on, so select in the else condition is going
>> to sleep for timeout determined based on the sequence expected.
>> Just before we close sockets in pmdie() it sets AbortStartTime, which
>> determines the timeout for the sleep here. So, it doesn't make sense to
>> ignore it. Instead may be we should change the default 60s sleep to 100ms
>> sleep in DetermineSleepTime().
>
> That sounds better. I modified cleaned ServerLoop() by pushing the existing
> 100ms logic into DetermineSleepTime().
I have changed some comments around this block. See attached patch.
Let me know if that looks good.
>
>
>> While the postmaster is terminating children, a new connection request may
>> arrive. We should probably close listening sockets before terminating
>> children in pmdie().
>
> I moved ClosePostmasterSocket() call before terminating children in the
> immediate shutdown case. I didn't change the behavior of smart and fast
> shutdown modes, because they may take a long time to complete due to
> checkpointing etc. Users will want to know what's happening during shutdown
> or after pg_ctl stop times out, by getting the message "FATAL: the database
> system is shutting down" when they try to connect to the database. The
> immediate shutdown or crash should better be as fast as possible.
OK.
--
Best Wishes,
Ashutosh Bapat
EnterpriseDB Corporation
The Postgres Database Company
diff --git a/src/backend/postmaster/postmaster.c b/src/backend/postmaster/postmaster.c
index 24add74..52c0f46 100644
--- a/src/backend/postmaster/postmaster.c
+++ b/src/backend/postmaster/postmaster.c
@@ -265,20 +265,22 @@ static StartupStatusEnum StartupStatus = STARTUP_NOT_RUNNING;
/* Startup/shutdown state */
#define NoShutdown 0
#define SmartShutdown 1
#define FastShutdown 2
#define ImmediateShutdown 3
static int Shutdown = NoShutdown;
static bool FatalError = false; /* T if recovering from backend crash */
+static bool ClosedSockets = false; /* T if postmaster's listening sockets
+ have been closed. */
/*
* We use a simple state machine to control startup, shutdown, and
* crash recovery (which is rather like shutdown followed by startup).
*
* After doing all the postmaster initialization work, we enter PM_STARTUP
* state and the startup process is launched. The startup process begins by
* reading the control file and other preliminary initialization steps.
* In a normal startup, or after crash recovery, the startup process exits
* with exit code 0 and we switch to PM_RUN state. However, archive recovery
@@ -372,20 +374,21 @@ static DNSServiceRef bonjour_sdref = NULL;
/*
* postmaster.c - function prototypes
*/
static void CloseServerPorts(int status, Datum arg);
static void unlink_external_pid_file(int status, Datum arg);
static void getInstallationPaths(const char *argv0);
static void checkDataDir(void);
static Port *ConnCreate(int serverFd);
static void ConnFree(Port *port);
+static void ClosePostmasterSockets(void);
static void reset_shared(int port);
static void SIGHUP_handler(SIGNAL_ARGS);
static void pmdie(SIGNAL_ARGS);
static void reaper(SIGNAL_ARGS);
static void sigusr1_handler(SIGNAL_ARGS);
static void startup_die(SIGNAL_ARGS);
static void dummy_handler(SIGNAL_ARGS);
static void StartupPacketTimeoutHandler(void);
static void CleanupBackend(int pid, int exitstatus);
static bool CleanupBackgroundWorker(int pid, int exitstatus);
@@ -1513,20 +1516,28 @@ checkDataDir(void)
* background tasks handled by ServerLoop get done even when no requests are
* arriving. However, if there are background workers waiting to be started,
* we don't actually sleep so that they are quickly serviced. Other exception
* cases are as shown in the code.
*/
static void
DetermineSleepTime(struct timeval * timeout)
{
TimestampTz next_wakeup = 0;
+ /* In PM_WAIT_DEAD_END state, sleeping for 100ms seems reasonable. */
+ if (pmState == PM_WAIT_DEAD_END)
+ {
+ timeout->tv_sec = 0;
+ timeout->tv_usec = 100000L;
+ return;
+ }
+
/*
* Normal case: either there are no background workers at all, or we're in
* a shutdown sequence (during which we ignore bgworkers altogether).
*/
if (Shutdown > NoShutdown ||
(!StartWorkerNeeded && !HaveCrashedWorker))
{
if (AbortStartTime != 0)
{
/* time left to abort; clamp to 0 in case it already expired */
@@ -1623,57 +1634,52 @@ ServerLoop(void)
last_lockfile_recheck_time = last_touch_time = time(NULL);
nSockets = initMasks(&readmask);
for (;;)
{
fd_set rmask;
int selres;
time_t now;
+ /* must set timeout each time; some OSes change it! */
+ struct timeval timeout;
/*
* Wait for a connection request to arrive.
*
* We block all signals except while sleeping. That makes it safe for
* signal handlers, which again block all signals while executing, to
* do nontrivial work.
*
* If we are in PM_WAIT_DEAD_END state, then we don't want to accept
* any new connections, so we don't call select(), and just sleep.
*/
- memcpy((char *) &rmask, (char *) &readmask, sizeof(fd_set));
- if (pmState == PM_WAIT_DEAD_END)
- {
- PG_SETMASK(&UnBlockSig);
+ /* Needs to run with blocked signals! */
+ DetermineSleepTime(&timeout);
- pg_usleep(100000L); /* 100 msec seems reasonable */
- selres = 0;
+ PG_SETMASK(&UnBlockSig);
- PG_SETMASK(&BlockSig);
+ if (pmState == PM_WAIT_DEAD_END || ClosedSockets)
+ {
+ pg_usleep(timeout.tv_sec * 1000000L + timeout.tv_usec);
+ selres = 0;
}
else
{
- /* must set timeout each time; some OSes change it! */
- struct timeval timeout;
-
- /* Needs to run with blocked signals! */
- DetermineSleepTime(&timeout);
-
- PG_SETMASK(&UnBlockSig);
-
+ memcpy((char *) &rmask, (char *) &readmask, sizeof(fd_set));
selres = select(nSockets, &rmask, NULL, NULL, &timeout);
-
- PG_SETMASK(&BlockSig);
}
+ PG_SETMASK(&BlockSig);
+
/* Now check the select() result */
if (selres < 0)
{
if (errno != EINTR && errno != EWOULDBLOCK)
{
ereport(LOG,
(errcode_for_socket_access(),
errmsg("select() failed in postmaster: %m")));
return STATUS_ERROR;
}
@@ -2382,57 +2388,68 @@ ConnFree(Port *conn)
#ifdef USE_SSL
secure_close(conn);
#endif
if (conn->gss)
free(conn->gss);
free(conn);
}
/*
+ * ClosePostmasterSockets -- close all the postmaster's listen sockets
+ */
+static void
+ClosePostmasterSockets(void)
+{
+ int i;
+
+ for (i = 0; i < MAXLISTEN; i++)
+ {
+ if (ListenSocket[i] != PGINVALID_SOCKET)
+ {
+ StreamClose(ListenSocket[i]);
+ ListenSocket[i] = PGINVALID_SOCKET;
+ }
+ }
+
+ ClosedSockets = true;
+}
+
+/*
* ClosePostmasterPorts -- close all the postmaster's open sockets
*
* This is called during child process startup to release file descriptors
* that are not needed by that child process. The postmaster still has
* them open, of course.
*
* Note: we pass am_syslogger as a boolean because we don't want to set
* the global variable yet when this is called.
*/
void
ClosePostmasterPorts(bool am_syslogger)
{
- int i;
-
#ifndef WIN32
/*
* Close the write end of postmaster death watch pipe. It's important to
* do this as early as possible, so that if postmaster dies, others won't
* think that it's still running because we're holding the pipe open.
*/
if (close(postmaster_alive_fds[POSTMASTER_FD_OWN]))
ereport(FATAL,
(errcode_for_file_access(),
errmsg_internal("could not close postmaster death monitoring pipe in child process: %m")));
postmaster_alive_fds[POSTMASTER_FD_OWN] = -1;
#endif
/* Close the listen sockets */
- for (i = 0; i < MAXLISTEN; i++)
- {
- if (ListenSocket[i] != PGINVALID_SOCKET)
- {
- StreamClose(ListenSocket[i]);
- ListenSocket[i] = PGINVALID_SOCKET;
- }
- }
+ ClosePostmasterSockets();
/* If using syslogger, close the read side of the pipe */
if (!am_syslogger)
{
#ifndef WIN32
if (syslogPipe[0] >= 0)
close(syslogPipe[0]);
syslogPipe[0] = -1;
#else
if (syslogPipe[0])
@@ -2667,20 +2684,26 @@ pmdie(SIGNAL_ARGS)
*/
if (Shutdown >= ImmediateShutdown)
break;
Shutdown = ImmediateShutdown;
ereport(LOG,
(errmsg("received immediate shutdown request")));
#ifdef USE_SYSTEMD
sd_notify(0, "STOPPING=1");
#endif
+ /*
+ * Close the listen sockets to avoid wasting resources by creating
+ * dead-end children, so that postmaster stops as fast as possible.
+ */
+ ClosePostmasterSockets();
+
TerminateChildren(SIGQUIT);
pmState = PM_WAIT_BACKENDS;
/* set stopwatch for them to die */
AbortStartTime = time(NULL);
/*
* Now wait for backends to exit. If there are none,
* PostmasterStateMachine will take the next step.
*/
@@ -3214,20 +3237,29 @@ HandleChildCrash(int pid, int exitstatus, const char *procname)
* signalled children, nonzero exit status is to be expected, so don't
* clutter log.
*/
take_action = !FatalError && Shutdown != ImmediateShutdown;
if (take_action)
{
LogChildExit(LOG, procname, pid, exitstatus);
ereport(LOG,
(errmsg("terminating any other active server processes")));
+
+ /*
+ * If the startup process failed, or the user does not want an automatic
+ * restart after backend crashes, close the listen sockets to avoid wasting
+ * resources by creating dead-end children, so that postmaster stops as
+ * fast as possible.
+ */
+ if (StartupStatus == STARTUP_CRASHED || !restart_after_crash)
+ ClosePostmasterSockets();
}
/* Process background workers. */
slist_foreach(siter, &BackgroundWorkerList)
{
RegisteredBgWorker *rw;
rw = slist_container(RegisteredBgWorker, rw_lnode, siter.cur);
if (rw->rw_pid == 0)
continue; /* not running */
--
Sent via pgsql-hackers mailing list ([email protected])
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers