On Mon, Nov 7, 2016 at 10:14 AM, Tsunakawa, Takayuki <tsunakawa.ta...@jp.fujitsu.com> wrote: > From: pgsql-hackers-ow...@postgresql.org >> [mailto:pgsql-hackers-ow...@postgresql.org] On Behalf Of Ashutosh Bapat >> I am not sure if following condition is a good idea in ServerLoop() >> 1650 if (pmState == PM_WAIT_DEAD_END || ClosedSockets) >> >> There are no sockets to listen on, so select in the else condition is going >> to sleep for timeout determined based on the sequence expected. >> Just before we close sockets in pmdie() it sets AbortStartTime, which >> determines the timeout for the sleep here. So, it doesn't make sense to >> ignore it. Instead may be we should change the default 60s sleep to 100ms >> sleep in DetermineSleepTime(). > > That sounds better. I modified cleaned ServerLoop() by pushing the existing > 100ms logic into DetermineSleepTime().
I have changed some comments around this block. See attached patch. Let me know if that looks good. > > >> While the postmaster is terminating children, a new connection request may >> arrive. We should probably close listening sockets before terminating >> children in pmdie(). > > I moved ClosePostmasterSocket() call before terminating children in the > immediate shutdown case. I didn't change the behavior of smart and fast > shutdown modes, because they may take a long time to complete due to > checkpointing etc. Users will want to know what's happening during shutdown > or after pg_ctl stop times out, by getting the message "FATAL: the database > system is shutting down" when they try to connect to the database. The > immediate shutdown or crash should better be as fast as possible. OK. -- Best Wishes, Ashutosh Bapat EnterpriseDB Corporation The Postgres Database Company
diff --git a/src/backend/postmaster/postmaster.c b/src/backend/postmaster/postmaster.c index 24add74..52c0f46 100644 --- a/src/backend/postmaster/postmaster.c +++ b/src/backend/postmaster/postmaster.c @@ -265,20 +265,22 @@ static StartupStatusEnum StartupStatus = STARTUP_NOT_RUNNING; /* Startup/shutdown state */ #define NoShutdown 0 #define SmartShutdown 1 #define FastShutdown 2 #define ImmediateShutdown 3 static int Shutdown = NoShutdown; static bool FatalError = false; /* T if recovering from backend crash */ +static bool ClosedSockets = false; /* T if postmaster's listening sockets + have been closed. */ /* * We use a simple state machine to control startup, shutdown, and * crash recovery (which is rather like shutdown followed by startup). * * After doing all the postmaster initialization work, we enter PM_STARTUP * state and the startup process is launched. The startup process begins by * reading the control file and other preliminary initialization steps. * In a normal startup, or after crash recovery, the startup process exits * with exit code 0 and we switch to PM_RUN state. However, archive recovery @@ -372,20 +374,21 @@ static DNSServiceRef bonjour_sdref = NULL; /* * postmaster.c - function prototypes */ static void CloseServerPorts(int status, Datum arg); static void unlink_external_pid_file(int status, Datum arg); static void getInstallationPaths(const char *argv0); static void checkDataDir(void); static Port *ConnCreate(int serverFd); static void ConnFree(Port *port); +static void ClosePostmasterSockets(void); static void reset_shared(int port); static void SIGHUP_handler(SIGNAL_ARGS); static void pmdie(SIGNAL_ARGS); static void reaper(SIGNAL_ARGS); static void sigusr1_handler(SIGNAL_ARGS); static void startup_die(SIGNAL_ARGS); static void dummy_handler(SIGNAL_ARGS); static void StartupPacketTimeoutHandler(void); static void CleanupBackend(int pid, int exitstatus); static bool CleanupBackgroundWorker(int pid, int exitstatus); @@ -1513,20 +1516,28 @@ checkDataDir(void) * background tasks handled by ServerLoop get done even when no requests are * arriving. However, if there are background workers waiting to be started, * we don't actually sleep so that they are quickly serviced. Other exception * cases are as shown in the code. */ static void DetermineSleepTime(struct timeval * timeout) { TimestampTz next_wakeup = 0; + /* In PM_WAIT_DEAD_END state, sleeping for 100ms seems reasonable. */ + if (pmState == PM_WAIT_DEAD_END) + { + timeout->tv_sec = 0; + timeout->tv_usec = 100000L; + return; + } + /* * Normal case: either there are no background workers at all, or we're in * a shutdown sequence (during which we ignore bgworkers altogether). */ if (Shutdown > NoShutdown || (!StartWorkerNeeded && !HaveCrashedWorker)) { if (AbortStartTime != 0) { /* time left to abort; clamp to 0 in case it already expired */ @@ -1623,57 +1634,52 @@ ServerLoop(void) last_lockfile_recheck_time = last_touch_time = time(NULL); nSockets = initMasks(&readmask); for (;;) { fd_set rmask; int selres; time_t now; + /* must set timeout each time; some OSes change it! */ + struct timeval timeout; /* * Wait for a connection request to arrive. * * We block all signals except while sleeping. That makes it safe for * signal handlers, which again block all signals while executing, to * do nontrivial work. * * If we are in PM_WAIT_DEAD_END state, then we don't want to accept * any new connections, so we don't call select(), and just sleep. */ - memcpy((char *) &rmask, (char *) &readmask, sizeof(fd_set)); - if (pmState == PM_WAIT_DEAD_END) - { - PG_SETMASK(&UnBlockSig); + /* Needs to run with blocked signals! */ + DetermineSleepTime(&timeout); - pg_usleep(100000L); /* 100 msec seems reasonable */ - selres = 0; + PG_SETMASK(&UnBlockSig); - PG_SETMASK(&BlockSig); + if (pmState == PM_WAIT_DEAD_END || ClosedSockets) + { + pg_usleep(timeout.tv_sec * 1000000L + timeout.tv_usec); + selres = 0; } else { - /* must set timeout each time; some OSes change it! */ - struct timeval timeout; - - /* Needs to run with blocked signals! */ - DetermineSleepTime(&timeout); - - PG_SETMASK(&UnBlockSig); - + memcpy((char *) &rmask, (char *) &readmask, sizeof(fd_set)); selres = select(nSockets, &rmask, NULL, NULL, &timeout); - - PG_SETMASK(&BlockSig); } + PG_SETMASK(&BlockSig); + /* Now check the select() result */ if (selres < 0) { if (errno != EINTR && errno != EWOULDBLOCK) { ereport(LOG, (errcode_for_socket_access(), errmsg("select() failed in postmaster: %m"))); return STATUS_ERROR; } @@ -2382,57 +2388,68 @@ ConnFree(Port *conn) #ifdef USE_SSL secure_close(conn); #endif if (conn->gss) free(conn->gss); free(conn); } /* + * ClosePostmasterSockets -- close all the postmaster's listen sockets + */ +static void +ClosePostmasterSockets(void) +{ + int i; + + for (i = 0; i < MAXLISTEN; i++) + { + if (ListenSocket[i] != PGINVALID_SOCKET) + { + StreamClose(ListenSocket[i]); + ListenSocket[i] = PGINVALID_SOCKET; + } + } + + ClosedSockets = true; +} + +/* * ClosePostmasterPorts -- close all the postmaster's open sockets * * This is called during child process startup to release file descriptors * that are not needed by that child process. The postmaster still has * them open, of course. * * Note: we pass am_syslogger as a boolean because we don't want to set * the global variable yet when this is called. */ void ClosePostmasterPorts(bool am_syslogger) { - int i; - #ifndef WIN32 /* * Close the write end of postmaster death watch pipe. It's important to * do this as early as possible, so that if postmaster dies, others won't * think that it's still running because we're holding the pipe open. */ if (close(postmaster_alive_fds[POSTMASTER_FD_OWN])) ereport(FATAL, (errcode_for_file_access(), errmsg_internal("could not close postmaster death monitoring pipe in child process: %m"))); postmaster_alive_fds[POSTMASTER_FD_OWN] = -1; #endif /* Close the listen sockets */ - for (i = 0; i < MAXLISTEN; i++) - { - if (ListenSocket[i] != PGINVALID_SOCKET) - { - StreamClose(ListenSocket[i]); - ListenSocket[i] = PGINVALID_SOCKET; - } - } + ClosePostmasterSockets(); /* If using syslogger, close the read side of the pipe */ if (!am_syslogger) { #ifndef WIN32 if (syslogPipe[0] >= 0) close(syslogPipe[0]); syslogPipe[0] = -1; #else if (syslogPipe[0]) @@ -2667,20 +2684,26 @@ pmdie(SIGNAL_ARGS) */ if (Shutdown >= ImmediateShutdown) break; Shutdown = ImmediateShutdown; ereport(LOG, (errmsg("received immediate shutdown request"))); #ifdef USE_SYSTEMD sd_notify(0, "STOPPING=1"); #endif + /* + * Close the listen sockets to avoid wasting resources by creating + * dead-end children, so that postmaster stops as fast as possible. + */ + ClosePostmasterSockets(); + TerminateChildren(SIGQUIT); pmState = PM_WAIT_BACKENDS; /* set stopwatch for them to die */ AbortStartTime = time(NULL); /* * Now wait for backends to exit. If there are none, * PostmasterStateMachine will take the next step. */ @@ -3214,20 +3237,29 @@ HandleChildCrash(int pid, int exitstatus, const char *procname) * signalled children, nonzero exit status is to be expected, so don't * clutter log. */ take_action = !FatalError && Shutdown != ImmediateShutdown; if (take_action) { LogChildExit(LOG, procname, pid, exitstatus); ereport(LOG, (errmsg("terminating any other active server processes"))); + + /* + * If the startup process failed, or the user does not want an automatic + * restart after backend crashes, close the listen sockets to avoid wasting + * resources by creating dead-end children, so that postmaster stops as + * fast as possible. + */ + if (StartupStatus == STARTUP_CRASHED || !restart_after_crash) + ClosePostmasterSockets(); } /* Process background workers. */ slist_foreach(siter, &BackgroundWorkerList) { RegisteredBgWorker *rw; rw = slist_container(RegisteredBgWorker, rw_lnode, siter.cur); if (rw->rw_pid == 0) continue; /* not running */
-- Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org) To make changes to your subscription: http://www.postgresql.org/mailpref/pgsql-hackers