On Wed, 2010-01-13 at 19:58 +0100, Andres Freund wrote:
> > I am still testing patch, so should be confident to commit tomorrow
> > barring issues.
> I have only looked at briefly because right now I dont have the time (going
> to
> eat at a friends place...) but I think I spotted an issue:
> The IsAbortedTransactionBlockState() check in RecoveryConflictInterrupt is
> not
> correct right now because that returns true for TBLOCK_SUBABORT as well.
> Wouldnt that mess with the case where were in a failed subxact and then
> rollback only that subxact?
Well spotted, yes.
--
Simon Riggs www.2ndQuadrant.com
*** a/src/backend/access/transam/xact.c
--- b/src/backend/access/transam/xact.c
***************
*** 313,320 **** IsTransactionState(void)
/*
* IsAbortedTransactionBlockState
*
! * This returns true if we are currently running a query
! * within an aborted transaction block.
*/
bool
IsAbortedTransactionBlockState(void)
--- 313,319 ----
/*
* IsAbortedTransactionBlockState
*
! * This returns true if we are within an aborted transaction block.
*/
bool
IsAbortedTransactionBlockState(void)
*** a/src/backend/storage/ipc/procarray.c
--- b/src/backend/storage/ipc/procarray.c
***************
*** 324,329 **** ProcArrayEndTransaction(PGPROC *proc, TransactionId latestXid)
--- 324,330 ----
/* must be cleared with xid/xmin: */
proc->vacuumFlags &= ~PROC_VACUUM_STATE_MASK;
proc->inCommit = false; /* be sure this is cleared in abort */
+ proc->recoveryConflictPending = false;
/* Clear the subtransaction-XID cache too while holding the lock */
proc->subxids.nxids = 0;
***************
*** 350,355 **** ProcArrayEndTransaction(PGPROC *proc, TransactionId latestXid)
--- 351,357 ----
/* must be cleared with xid/xmin: */
proc->vacuumFlags &= ~PROC_VACUUM_STATE_MASK;
proc->inCommit = false; /* be sure this is cleared in abort */
+ proc->recoveryConflictPending = false;
Assert(proc->subxids.nxids == 0);
Assert(proc->subxids.overflowed == false);
***************
*** 377,383 **** ProcArrayClearTransaction(PGPROC *proc)
proc->xid = InvalidTransactionId;
proc->lxid = InvalidLocalTransactionId;
proc->xmin = InvalidTransactionId;
! proc->recoveryConflictMode = 0;
/* redundant, but just in case */
proc->vacuumFlags &= ~PROC_VACUUM_STATE_MASK;
--- 379,385 ----
proc->xid = InvalidTransactionId;
proc->lxid = InvalidLocalTransactionId;
proc->xmin = InvalidTransactionId;
! proc->recoveryConflictPending = false;
/* redundant, but just in case */
proc->vacuumFlags &= ~PROC_VACUUM_STATE_MASK;
***************
*** 1665,1671 **** GetConflictingVirtualXIDs(TransactionId limitXmin, Oid dbOid,
if (proc->pid == 0)
continue;
! if (skipExistingConflicts && proc->recoveryConflictMode > 0)
continue;
if (!OidIsValid(dbOid) ||
--- 1667,1673 ----
if (proc->pid == 0)
continue;
! if (skipExistingConflicts && proc->recoveryConflictPending)
continue;
if (!OidIsValid(dbOid) ||
***************
*** 1704,1710 **** GetConflictingVirtualXIDs(TransactionId limitXmin, Oid dbOid,
* Returns pid of the process signaled, or 0 if not found.
*/
pid_t
! CancelVirtualTransaction(VirtualTransactionId vxid, int cancel_mode)
{
ProcArrayStruct *arrayP = procArray;
int index;
--- 1706,1712 ----
* Returns pid of the process signaled, or 0 if not found.
*/
pid_t
! CancelVirtualTransaction(VirtualTransactionId vxid, ProcSignalReason sigmode)
{
ProcArrayStruct *arrayP = procArray;
int index;
***************
*** 1722,1749 **** CancelVirtualTransaction(VirtualTransactionId vxid, int cancel_mode)
if (procvxid.backendId == vxid.backendId &&
procvxid.localTransactionId == vxid.localTransactionId)
{
! /*
! * Issue orders for the proc to read next time it receives SIGINT
! */
! if (proc->recoveryConflictMode < cancel_mode)
! proc->recoveryConflictMode = cancel_mode;
!
pid = proc->pid;
break;
}
}
LWLockRelease(ProcArrayLock);
- if (pid != 0)
- {
- /*
- * Kill the pid if it's still here. If not, that's what we wanted
- * so ignore any errors.
- */
- kill(pid, SIGINT);
- }
-
return pid;
}
--- 1724,1745 ----
if (procvxid.backendId == vxid.backendId &&
procvxid.localTransactionId == vxid.localTransactionId)
{
! proc->recoveryConflictPending = true;
pid = proc->pid;
+ if (pid != 0)
+ {
+ /*
+ * Kill the pid if it's still here. If not, that's what we wanted
+ * so ignore any errors.
+ */
+ (void) SendProcSignal(pid, sigmode, vxid.backendId);
+ }
break;
}
}
LWLockRelease(ProcArrayLock);
return pid;
}
***************
*** 1834,1839 **** CancelDBBackends(Oid databaseid)
--- 1830,1836 ----
{
ProcArrayStruct *arrayP = procArray;
int index;
+ pid_t pid = 0;
/* tell all backends to die */
LWLockAcquire(ProcArrayLock, LW_EXCLUSIVE);
***************
*** 1844,1851 **** CancelDBBackends(Oid databaseid)
if (proc->databaseId == databaseid)
{
! proc->recoveryConflictMode = CONFLICT_MODE_FATAL;
! kill(proc->pid, SIGINT);
}
}
--- 1841,1861 ----
if (proc->databaseId == databaseid)
{
! VirtualTransactionId procvxid;
!
! GET_VXID_FROM_PGPROC(procvxid, *proc);
!
! proc->recoveryConflictPending = true;
! pid = proc->pid;
! if (pid != 0)
! {
! /*
! * Kill the pid if it's still here. If not, that's what we wanted
! * so ignore any errors.
! */
! (void) SendProcSignal(pid, PROCSIG_CONFLICT_FATAL_INTERRUPT,
! procvxid.backendId);
! }
}
}
*** a/src/backend/storage/ipc/procsignal.c
--- b/src/backend/storage/ipc/procsignal.c
***************
*** 24,29 ****
--- 24,31 ----
#include "storage/procsignal.h"
#include "storage/shmem.h"
#include "storage/sinval.h"
+ #include "storage/standby.h"
+ #include "tcop/tcopprot.h"
/*
***************
*** 258,262 **** procsignal_sigusr1_handler(SIGNAL_ARGS)
--- 260,270 ----
if (CheckProcSignal(PROCSIG_NOTIFY_INTERRUPT))
HandleNotifyInterrupt();
+ if (CheckProcSignal(PROCSIG_CONFLICT_ERROR_INTERRUPT))
+ RecoveryConflictInterrupt(CONFLICT_MODE_ERROR);
+
+ if (CheckProcSignal(PROCSIG_CONFLICT_FATAL_INTERRUPT))
+ RecoveryConflictInterrupt(CONFLICT_MODE_FATAL);
+
errno = save_errno;
}
*** a/src/backend/storage/ipc/standby.c
--- b/src/backend/storage/ipc/standby.c
***************
*** 159,166 **** WaitExceedsMaxStandbyDelay(void)
* recovery processing. Judgement has already been passed on it within
* a specific rmgr. Here we just issue the orders to the procs. The procs
* then throw the required error as instructed.
- *
- * We may ask for a specific cancel_mode, typically ERROR or FATAL.
*/
void
ResolveRecoveryConflictWithVirtualXIDs(VirtualTransactionId *waitlist,
--- 159,164 ----
***************
*** 218,229 **** ResolveRecoveryConflictWithVirtualXIDs(VirtualTransactionId *waitlist,
if (WaitExceedsMaxStandbyDelay())
{
pid_t pid;
/*
* Now find out who to throw out of the balloon.
*/
Assert(VirtualTransactionIdIsValid(*waitlist));
! pid = CancelVirtualTransaction(*waitlist, cancel_mode);
if (pid != 0)
{
--- 216,231 ----
if (WaitExceedsMaxStandbyDelay())
{
pid_t pid;
+ ProcSignalReason sigmode = PROCSIG_CONFLICT_ERROR_INTERRUPT;
+
+ if (cancel_mode == CONFLICT_MODE_FATAL)
+ sigmode = PROCSIG_CONFLICT_FATAL_INTERRUPT;
/*
* Now find out who to throw out of the balloon.
*/
Assert(VirtualTransactionIdIsValid(*waitlist));
! pid = CancelVirtualTransaction(*waitlist, sigmode);
if (pid != 0)
{
*** a/src/backend/storage/lmgr/proc.c
--- b/src/backend/storage/lmgr/proc.c
***************
*** 318,324 **** InitProcess(void)
MyProc->waitProcLock = NULL;
for (i = 0; i < NUM_LOCK_PARTITIONS; i++)
SHMQueueInit(&(MyProc->myProcLocks[i]));
! MyProc->recoveryConflictMode = 0;
/*
* We might be reusing a semaphore that belonged to a failed process. So
--- 318,324 ----
MyProc->waitProcLock = NULL;
for (i = 0; i < NUM_LOCK_PARTITIONS; i++)
SHMQueueInit(&(MyProc->myProcLocks[i]));
! MyProc->recoveryConflictPending = false;
/*
* We might be reusing a semaphore that belonged to a failed process. So
*** a/src/backend/tcop/postgres.c
--- b/src/backend/tcop/postgres.c
***************
*** 172,177 **** static int UseNewLine = 1; /* Use newlines query delimiters (the default) */
--- 172,179 ----
static int UseNewLine = 0; /* Use EOF as query delimiters */
#endif /* TCOP_DONTUSENEWLINE */
+ /* whether we were cancelled during recovery by conflict processing or not */
+ static bool RecoveryConflictPending = false;
/* ----------------------------------------------------------------
* decls for routines only used in this file
***************
*** 185,190 **** static List *pg_rewrite_query(Query *query);
--- 187,193 ----
static bool check_log_statement(List *stmt_list);
static int errdetail_execute(List *raw_parsetree_list);
static int errdetail_params(ParamListInfo params);
+ static int errdetail_abort(void);
static void start_xact_command(void);
static void finish_xact_command(void);
static bool IsTransactionExitStmt(Node *parsetree);
***************
*** 943,949 **** exec_simple_query(const char *query_string)
ereport(ERROR,
(errcode(ERRCODE_IN_FAILED_SQL_TRANSACTION),
errmsg("current transaction is aborted, "
! "commands ignored until end of transaction block")));
/* Make sure we are in a transaction command */
start_xact_command();
--- 946,953 ----
ereport(ERROR,
(errcode(ERRCODE_IN_FAILED_SQL_TRANSACTION),
errmsg("current transaction is aborted, "
! "commands ignored until end of transaction block"),
! errdetail_abort()));
/* Make sure we are in a transaction command */
start_xact_command();
***************
*** 1252,1258 **** exec_parse_message(const char *query_string, /* string to execute */
ereport(ERROR,
(errcode(ERRCODE_IN_FAILED_SQL_TRANSACTION),
errmsg("current transaction is aborted, "
! "commands ignored until end of transaction block")));
/*
* Set up a snapshot if parse analysis/planning will need one.
--- 1256,1263 ----
ereport(ERROR,
(errcode(ERRCODE_IN_FAILED_SQL_TRANSACTION),
errmsg("current transaction is aborted, "
! "commands ignored until end of transaction block"),
! errdetail_abort()));
/*
* Set up a snapshot if parse analysis/planning will need one.
***************
*** 1532,1538 **** exec_bind_message(StringInfo input_message)
ereport(ERROR,
(errcode(ERRCODE_IN_FAILED_SQL_TRANSACTION),
errmsg("current transaction is aborted, "
! "commands ignored until end of transaction block")));
/*
* Create the portal. Allow silent replacement of an existing portal only
--- 1537,1544 ----
ereport(ERROR,
(errcode(ERRCODE_IN_FAILED_SQL_TRANSACTION),
errmsg("current transaction is aborted, "
! "commands ignored until end of transaction block"),
! errdetail_abort()));
/*
* Create the portal. Allow silent replacement of an existing portal only
***************
*** 1973,1979 **** exec_execute_message(const char *portal_name, long max_rows)
ereport(ERROR,
(errcode(ERRCODE_IN_FAILED_SQL_TRANSACTION),
errmsg("current transaction is aborted, "
! "commands ignored until end of transaction block")));
/* Check for cancel signal before we start execution */
CHECK_FOR_INTERRUPTS();
--- 1979,1986 ----
ereport(ERROR,
(errcode(ERRCODE_IN_FAILED_SQL_TRANSACTION),
errmsg("current transaction is aborted, "
! "commands ignored until end of transaction block"),
! errdetail_abort()));
/* Check for cancel signal before we start execution */
CHECK_FOR_INTERRUPTS();
***************
*** 2234,2239 **** errdetail_params(ParamListInfo params)
--- 2241,2260 ----
}
/*
+ * errdetail_abort
+ *
+ * Add an errdetail() line showing abort reason, if any.
+ */
+ static int
+ errdetail_abort(void)
+ {
+ if (MyProc->recoveryConflictPending)
+ errdetail("abort reason: recovery conflict");
+
+ return 0;
+ }
+
+ /*
* exec_describe_statement_message
*
* Process a "Describe" message for a prepared statement
***************
*** 2290,2296 **** exec_describe_statement_message(const char *stmt_name)
ereport(ERROR,
(errcode(ERRCODE_IN_FAILED_SQL_TRANSACTION),
errmsg("current transaction is aborted, "
! "commands ignored until end of transaction block")));
if (whereToSendOutput != DestRemote)
return; /* can't actually do anything... */
--- 2311,2318 ----
ereport(ERROR,
(errcode(ERRCODE_IN_FAILED_SQL_TRANSACTION),
errmsg("current transaction is aborted, "
! "commands ignored until end of transaction block"),
! errdetail_abort()));
if (whereToSendOutput != DestRemote)
return; /* can't actually do anything... */
***************
*** 2370,2376 **** exec_describe_portal_message(const char *portal_name)
ereport(ERROR,
(errcode(ERRCODE_IN_FAILED_SQL_TRANSACTION),
errmsg("current transaction is aborted, "
! "commands ignored until end of transaction block")));
if (whereToSendOutput != DestRemote)
return; /* can't actually do anything... */
--- 2392,2399 ----
ereport(ERROR,
(errcode(ERRCODE_IN_FAILED_SQL_TRANSACTION),
errmsg("current transaction is aborted, "
! "commands ignored until end of transaction block"),
! errdetail_abort()));
if (whereToSendOutput != DestRemote)
return; /* can't actually do anything... */
***************
*** 2677,2682 **** SigHupHandler(SIGNAL_ARGS)
--- 2700,2784 ----
got_SIGHUP = true;
}
+ /*
+ * RecoveryConflictInterrupt: out-of-line portion of recovery conflict
+ * handling ollowing receipt of SIGUSR1. Designed to be similar to die()
+ * and StatementCancelHandler(). Called only by a normal user backend
+ * that begins a transaction during recovery.
+ */
+ void
+ RecoveryConflictInterrupt(int conflict_mode)
+ {
+ int save_errno = errno;
+
+ /*
+ * Don't joggle the elbow of proc_exit
+ */
+ if (!proc_exit_inprogress)
+ {
+ switch (conflict_mode)
+ {
+ case CONFLICT_MODE_ERROR:
+ /*
+ * If we aren't in a transaction any longer then ignore.
+ */
+ if (!IsTransactionOrTransactionBlock())
+ return;
+
+ /*
+ * If we can abort just the current subtransaction then we
+ * are OK to throw an ERROR to resolve the conflict. Otherwise
+ * drop through to the FATAL case.
+ * XXX Other cases exist also, but add those later.
+ */
+ if (!IsSubTransaction())
+ {
+ /*
+ * If we already aborted then we no longer need to cancel.
+ * We do this here since we do not wish to ignore aborted
+ * subtransactions, which must cause FATAL, currently.
+ */
+ if (IsAbortedTransactionBlockState())
+ return;
+
+ RecoveryConflictPending = true;
+ QueryCancelPending = true;
+ InterruptPending = true;
+ break;
+ }
+
+ /* Intentional drop through to CONFLICT_MODE_FATAL */
+
+ case CONFLICT_MODE_FATAL:
+ RecoveryConflictPending = true;
+ ProcDiePending = true;
+ InterruptPending = true;
+ break;
+
+ default:
+ elog(FATAL, "Unknown conflict mode");
+ }
+
+ /*
+ * If it's safe to interrupt, and we're waiting for input or a lock,
+ * service the interrupt immediately
+ */
+ if (ImmediateInterruptOK && InterruptHoldoffCount == 0 &&
+ CritSectionCount == 0)
+ {
+ /* bump holdoff count to make ProcessInterrupts() a no-op */
+ /* until we are done getting ready for it */
+ InterruptHoldoffCount++;
+ LockWaitCancel(); /* prevent CheckDeadLock from running */
+ DisableNotifyInterrupt();
+ DisableCatchupInterrupt();
+ InterruptHoldoffCount--;
+ ProcessInterrupts();
+ }
+ }
+
+ errno = save_errno;
+ }
/*
* ProcessInterrupts: out-of-line portion of CHECK_FOR_INTERRUPTS() macro
***************
*** 2706,2711 **** ProcessInterrupts(void)
--- 2808,2817 ----
ereport(FATAL,
(errcode(ERRCODE_ADMIN_SHUTDOWN),
errmsg("terminating autovacuum process due to administrator command")));
+ else if (RecoveryConflictPending)
+ ereport(FATAL,
+ (errcode(ERRCODE_ADMIN_SHUTDOWN),
+ errmsg("terminating connection due to conflict with recovery")));
else
ereport(FATAL,
(errcode(ERRCODE_ADMIN_SHUTDOWN),
***************
*** 2744,2800 **** ProcessInterrupts(void)
(errcode(ERRCODE_QUERY_CANCELED),
errmsg("canceling autovacuum task")));
}
{
! int cancelMode = MyProc->recoveryConflictMode;
! /*
! * XXXHS: We don't yet have a clean way to cancel an
! * idle-in-transaction session, so make it FATAL instead.
! * This isn't as bad as it looks because we don't issue a
! * CONFLICT_MODE_ERROR for a session with proc->xmin == 0
! * on cleanup conflicts. There's a possibility that we
! * marked somebody as a conflict and then they go idle.
! */
! if (DoingCommandRead && IsTransactionBlock() &&
! cancelMode == CONFLICT_MODE_ERROR)
{
! cancelMode = CONFLICT_MODE_FATAL;
}
!
! switch (cancelMode)
{
! case CONFLICT_MODE_FATAL:
! ImmediateInterruptOK = false; /* not idle anymore */
! DisableNotifyInterrupt();
! DisableCatchupInterrupt();
! Assert(RecoveryInProgress());
! ereport(FATAL,
! (errcode(ERRCODE_QUERY_CANCELED),
! errmsg("canceling session due to conflict with recovery")));
!
! case CONFLICT_MODE_ERROR:
! /*
! * We are aborting because we need to release
! * locks. So we need to abort out of all
! * subtransactions to make sure we release
! * all locks at whatever their level.
! *
! * XXX Should we try to examine the
! * transaction tree and cancel just enough
! * subxacts to remove locks? Doubt it.
! */
! ImmediateInterruptOK = false; /* not idle anymore */
! DisableNotifyInterrupt();
! DisableCatchupInterrupt();
! Assert(RecoveryInProgress());
! AbortOutOfAnyTransaction();
! ereport(ERROR,
! (errcode(ERRCODE_QUERY_CANCELED),
! errmsg("canceling statement due to conflict with recovery")));
!
! default:
! /* No conflict pending, so fall through */
! break;
}
}
--- 2850,2875 ----
(errcode(ERRCODE_QUERY_CANCELED),
errmsg("canceling autovacuum task")));
}
+ if (RecoveryConflictPending)
{
! ImmediateInterruptOK = false; /* not idle anymore */
! DisableNotifyInterrupt();
! DisableCatchupInterrupt();
! if (DoingCommandRead)
{
! ProcDiePending = false;
! QueryCancelPending = false;
! ereport(FATAL,
! (errcode(ERRCODE_ADMIN_SHUTDOWN),
! errmsg("terminating connection due to conflict with recovery")));
}
! else
{
! QueryCancelPending = false;
! ereport(ERROR,
! (errcode(ERRCODE_QUERY_CANCELED),
! errmsg("canceling statement due to conflict with recovery")));
}
}
***************
*** 3627,3633 **** PostgresMain(int argc, char *argv[], const char *username)
*/
if (send_ready_for_query)
{
! if (IsTransactionOrTransactionBlock())
{
set_ps_display("idle in transaction", false);
pgstat_report_activity("<IDLE> in transaction");
--- 3702,3713 ----
*/
if (send_ready_for_query)
{
! if (IsAbortedTransactionBlockState())
! {
! set_ps_display("idle in transaction (aborted)", false);
! pgstat_report_activity("<IDLE> in transaction (aborted)");
! }
! else if (IsTransactionOrTransactionBlock())
{
set_ps_display("idle in transaction", false);
pgstat_report_activity("<IDLE> in transaction");
*** a/src/include/storage/proc.h
--- b/src/include/storage/proc.h
***************
*** 96,106 **** struct PGPROC
uint8 vacuumFlags; /* vacuum-related flags, see above */
/*
! * While in hot standby mode, setting recoveryConflictMode instructs
! * the backend to commit suicide. Possible values are the same as those
! * passed to ResolveRecoveryConflictWithVirtualXIDs().
*/
! int recoveryConflictMode;
/* Info about LWLock the process is currently waiting for, if any. */
bool lwWaiting; /* true if waiting for an LW lock */
--- 96,106 ----
uint8 vacuumFlags; /* vacuum-related flags, see above */
/*
! * While in hot standby mode, shows that a conflict signal has been sent
! * for the current transaction. Set/cleared while holding ProcArrayLock,
! * though not required. Accessed without lock, if needed.
*/
! bool recoveryConflictPending;
/* Info about LWLock the process is currently waiting for, if any. */
bool lwWaiting; /* true if waiting for an LW lock */
*** a/src/include/storage/procarray.h
--- b/src/include/storage/procarray.h
***************
*** 15,20 ****
--- 15,21 ----
#define PROCARRAY_H
#include "storage/lock.h"
+ #include "storage/procsignal.h"
#include "storage/standby.h"
#include "utils/snapshot.h"
***************
*** 58,65 **** extern VirtualTransactionId *GetCurrentVirtualXIDs(TransactionId limitXmin,
int *nvxids);
extern VirtualTransactionId *GetConflictingVirtualXIDs(TransactionId limitXmin,
Oid dbOid, bool skipExistingConflicts);
! extern pid_t CancelVirtualTransaction(VirtualTransactionId vxid,
! int cancel_mode);
extern int CountActiveBackends(void);
extern int CountDBBackends(Oid databaseid);
--- 59,65 ----
int *nvxids);
extern VirtualTransactionId *GetConflictingVirtualXIDs(TransactionId limitXmin,
Oid dbOid, bool skipExistingConflicts);
! extern pid_t CancelVirtualTransaction(VirtualTransactionId vxid, ProcSignalReason sigmode);
extern int CountActiveBackends(void);
extern int CountDBBackends(Oid databaseid);
*** a/src/include/storage/procsignal.h
--- b/src/include/storage/procsignal.h
***************
*** 31,36 **** typedef enum
--- 31,38 ----
{
PROCSIG_CATCHUP_INTERRUPT, /* sinval catchup interrupt */
PROCSIG_NOTIFY_INTERRUPT, /* listen/notify interrupt */
+ PROCSIG_CONFLICT_ERROR_INTERRUPT, /* recovery conflict error */
+ PROCSIG_CONFLICT_FATAL_INTERRUPT, /* recovery conflict fatal */
NUM_PROCSIGNALS /* Must be last! */
} ProcSignalReason;
*** a/src/include/tcop/tcopprot.h
--- b/src/include/tcop/tcopprot.h
***************
*** 64,69 **** extern void die(SIGNAL_ARGS);
--- 64,70 ----
extern void quickdie(SIGNAL_ARGS);
extern void StatementCancelHandler(SIGNAL_ARGS);
extern void FloatExceptionHandler(SIGNAL_ARGS);
+ extern void RecoveryConflictInterrupt(int conflict_mode); /* called from SIGUSR1 handler */
extern void prepare_for_client_read(void);
extern void client_read_ended(void);
extern const char *process_postgres_switches(int argc, char *argv[],
--
Sent via pgsql-hackers mailing list ([email protected])
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers