Robert Haas <[email protected]> wrote:
> Nope, you're on target. Although - if I were you - I would post
> the ACCESS EXCLUSIVE lock version of the patch for feedback. I
> can't speak for anyone else, but I'll read it.
Here you go! :-)
This is the milestone of having full serializable behavior, albeit
with horrible performance, using the simplest implementation
possible. I didn't use ACCESS EXCLUSIVE locks, because on review it
seemed to me that a SHARE lock would be strong enough. It compiles
and passes the regression tests, and I've been testing some of the
scenarios previously used to show the snapshot anomalies; I now get
correct behavior through blocking.
I identified the points to insert predicate locking by looking for
places where ExecStoreTuple was called with a valid heap buffer; if
there is anywhere that obtains tuples from the heap without going
through that method, I have more work to do. If anyone knows of
such locations, I'd be grateful for a "heads up".
If I've done anything horribly wrong in organizing the code, that'd
be nice to hear about before I go too much farther, too.
I'm definitely not looking for this to be committed, but should I
add it to the CF page just for a "feedback" review? (I'm OK with
keeping it more ad hoc, especially if it's going to hold up the
beta at all.)
-Kevin
*** a/src/backend/catalog/index.c
--- b/src/backend/catalog/index.c
***************
*** 2133,2139 **** IndexCheckExclusion(Relation heapRelation,
*
* After completing validate_index(), we wait until all transactions that
* were alive at the time of the reference snapshot are gone; this is
! * necessary to be sure there are none left with a serializable snapshot
* older than the reference (and hence possibly able to see tuples we did
* not index). Then we mark the index "indisvalid" and commit.
Subsequent
* transactions will be able to use it for queries.
--- 2133,2139 ----
*
* After completing validate_index(), we wait until all transactions that
* were alive at the time of the reference snapshot are gone; this is
! * necessary to be sure there are none left with a transaction-based snapshot
* older than the reference (and hence possibly able to see tuples we did
* not index). Then we mark the index "indisvalid" and commit.
Subsequent
* transactions will be able to use it for queries.
*** a/src/backend/commands/trigger.c
--- b/src/backend/commands/trigger.c
***************
*** 2319,2325 **** ltrmark:;
case HeapTupleUpdated:
ReleaseBuffer(buffer);
! if (IsXactIsoLevelSerializable)
ereport(ERROR,
(errcode(ERRCODE_T_R_SERIALIZATION_FAILURE),
errmsg("could not
serialize access due to concurrent update")));
--- 2319,2325 ----
case HeapTupleUpdated:
ReleaseBuffer(buffer);
! if (IsXactIsoLevelXactSnapshotBased)
ereport(ERROR,
(errcode(ERRCODE_T_R_SERIALIZATION_FAILURE),
errmsg("could not
serialize access due to concurrent update")));
*** a/src/backend/executor/execMain.c
--- b/src/backend/executor/execMain.c
***************
*** 1538,1544 **** EvalPlanQualFetch(EState *estate, Relation relation, int
lockmode,
case HeapTupleUpdated:
ReleaseBuffer(buffer);
! if (IsXactIsoLevelSerializable)
ereport(ERROR,
(errcode(ERRCODE_T_R_SERIALIZATION_FAILURE),
errmsg("could
not serialize access due to concurrent update")));
--- 1538,1544 ----
case HeapTupleUpdated:
ReleaseBuffer(buffer);
! if (IsXactIsoLevelXactSnapshotBased)
ereport(ERROR,
(errcode(ERRCODE_T_R_SERIALIZATION_FAILURE),
errmsg("could
not serialize access due to concurrent update")));
*** a/src/backend/executor/nodeBitmapHeapscan.c
--- b/src/backend/executor/nodeBitmapHeapscan.c
***************
*** 42,47 ****
--- 42,48 ----
#include "executor/nodeBitmapHeapscan.h"
#include "pgstat.h"
#include "storage/bufmgr.h"
+ #include "storage/predicate.h"
#include "utils/memutils.h"
#include "utils/snapmgr.h"
#include "utils/tqual.h"
***************
*** 114,119 **** BitmapHeapNext(BitmapHeapScanState *node)
--- 115,123 ----
#endif /* USE_PREFETCH */
}
+ /* TODO SSI: Lock at tuple level subject to granularity promotion. */
+ PredicateLockRelation(node->ss.ss_currentRelation);
+
for (;;)
{
Page dp;
*** a/src/backend/executor/nodeIndexscan.c
--- b/src/backend/executor/nodeIndexscan.c
***************
*** 30,35 ****
--- 30,36 ----
#include "executor/execdebug.h"
#include "executor/nodeIndexscan.h"
#include "optimizer/clauses.h"
+ #include "storage/predicate.h"
#include "utils/array.h"
#include "utils/lsyscache.h"
#include "utils/memutils.h"
***************
*** 72,77 **** IndexNext(IndexScanState *node)
--- 73,81 ----
econtext = node->ss.ps.ps_ExprContext;
slot = node->ss.ss_ScanTupleSlot;
+ /* TODO SSI: Lock at tuple level subject to granularity promotion. */
+ PredicateLockRelation(node->ss.ss_currentRelation);
+
/*
* ok, now that we have what we need, fetch the next tuple.
*/
*** a/src/backend/executor/nodeLockRows.c
--- b/src/backend/executor/nodeLockRows.c
***************
*** 130,136 **** lnext:
break;
case HeapTupleUpdated:
! if (IsXactIsoLevelSerializable)
ereport(ERROR,
(errcode(ERRCODE_T_R_SERIALIZATION_FAILURE),
errmsg("could not
serialize access due to concurrent update")));
--- 130,136 ----
break;
case HeapTupleUpdated:
! if (IsXactIsoLevelXactSnapshotBased)
ereport(ERROR,
(errcode(ERRCODE_T_R_SERIALIZATION_FAILURE),
errmsg("could not
serialize access due to concurrent update")));
*** a/src/backend/executor/nodeModifyTable.c
--- b/src/backend/executor/nodeModifyTable.c
***************
*** 326,332 **** ldelete:;
break;
case HeapTupleUpdated:
! if (IsXactIsoLevelSerializable)
ereport(ERROR,
(errcode(ERRCODE_T_R_SERIALIZATION_FAILURE),
errmsg("could not serialize
access due to concurrent update")));
--- 326,332 ----
break;
case HeapTupleUpdated:
! if (IsXactIsoLevelXactSnapshotBased)
ereport(ERROR,
(errcode(ERRCODE_T_R_SERIALIZATION_FAILURE),
errmsg("could not serialize
access due to concurrent update")));
***************
*** 514,520 **** lreplace:;
break;
case HeapTupleUpdated:
! if (IsXactIsoLevelSerializable)
ereport(ERROR,
(errcode(ERRCODE_T_R_SERIALIZATION_FAILURE),
errmsg("could not serialize
access due to concurrent update")));
--- 514,520 ----
break;
case HeapTupleUpdated:
! if (IsXactIsoLevelXactSnapshotBased)
ereport(ERROR,
(errcode(ERRCODE_T_R_SERIALIZATION_FAILURE),
errmsg("could not serialize
access due to concurrent update")));
*** a/src/backend/executor/nodeSeqscan.c
--- b/src/backend/executor/nodeSeqscan.c
***************
*** 28,33 ****
--- 28,34 ----
#include "access/relscan.h"
#include "executor/execdebug.h"
#include "executor/nodeSeqscan.h"
+ #include "storage/predicate.h"
static void InitScanRelation(SeqScanState *node, EState *estate);
static TupleTableSlot *SeqNext(SeqScanState *node);
***************
*** 106,116 **** SeqRecheck(SeqScanState *node, TupleTableSlot *slot)
--- 107,122 ----
* tuple.
* We call the ExecScan() routine and pass it the appropriate
* access method functions.
+ * For serializable transactions, we first lock the entire
relation.
+ * TODO SSI: Would it make sense to optimize cases where the plan
+ * includes a LIMIT, such that individual tuples are
+ * locked instead, subject to granularity promotion?
* ----------------------------------------------------------------
*/
TupleTableSlot *
ExecSeqScan(SeqScanState *node)
{
+ PredicateLockRelation(node->ss_currentRelation);
return ExecScan((ScanState *) node,
(ExecScanAccessMtd) SeqNext,
(ExecScanRecheckMtd) SeqRecheck);
*** a/src/backend/executor/nodeTidscan.c
--- b/src/backend/executor/nodeTidscan.c
***************
*** 31,36 ****
--- 31,37 ----
#include "executor/nodeTidscan.h"
#include "optimizer/clauses.h"
#include "storage/bufmgr.h"
+ #include "storage/predicate.h"
#include "utils/array.h"
***************
*** 308,313 **** TidNext(TidScanState *node)
--- 309,317 ----
node->tss_TidPtr++;
}
+ /* TODO SSI: Lock at tuple level subject to granularity promotion. */
+ PredicateLockRelation(heapRelation);
+
while (node->tss_TidPtr >= 0 && node->tss_TidPtr < numTids)
{
tuple->t_self = tidList[node->tss_TidPtr];
*** a/src/backend/storage/lmgr/Makefile
--- b/src/backend/storage/lmgr/Makefile
***************
*** 12,18 **** subdir = src/backend/storage/lmgr
top_builddir = ../../../..
include $(top_builddir)/src/Makefile.global
! OBJS = lmgr.o lock.o proc.o deadlock.o lwlock.o spin.o s_lock.o
include $(top_srcdir)/src/backend/common.mk
--- 12,18 ----
top_builddir = ../../../..
include $(top_builddir)/src/Makefile.global
! OBJS = lmgr.o lock.o proc.o deadlock.o lwlock.o spin.o s_lock.o predicate.o
include $(top_srcdir)/src/backend/common.mk
*** /dev/null
--- b/src/backend/storage/lmgr/predicate.c
***************
*** 0 ****
--- 1,43 ----
+ /*-------------------------------------------------------------------------
+ *
+ * predicate.c
+ * POSTGRES predicate locking
+ * to support full serializable transaction isolation
+ *
+ * Portions Copyright (c) 1996-2010, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ *
+ * IDENTIFICATION
+ * $PostgreSQL$
+ *
+ *-------------------------------------------------------------------------
+ */
+ /*
+ * INTERFACE ROUTINES
+ *
+ * PredicateLockRelation(Relation relation)
+ */
+
+ #include "postgres.h"
+
+ #include "access/xact.h"
+ #include "storage/predicate.h"
+ #include "storage/lmgr.h"
+
+
+ /* ----------------------------------------------------------------
+ * Gets a predicate lock at the relation level.
+ * Skip if not in full serializable transaction isolation level.
+ * Skip if this is a temporary table or toast table..
+ * Skip this if a write lock exists for the relation; otherwise,
+ * clear any finer-grained predicate locks this session has on the relation.
+ * TODO SSI: Some of the above. Using SIREAD locks.
+ * ----------------------------------------------------------------
+ */
+ void
+ PredicateLockRelation(Relation relation)
+ {
+ if (IsXactIsoLevelFullySerializable && !(relation->rd_istemp))
+ LockRelation(relation, ShareLock);
+ }
*** a/src/backend/utils/adt/ri_triggers.c
--- b/src/backend/utils/adt/ri_triggers.c
***************
*** 3314,3320 **** ri_PerformCheck(RI_QueryKey *qkey, SPIPlanPtr qplan,
/*
* In READ COMMITTED mode, we just need to use an up-to-date regular
* snapshot, and we will see all rows that could be interesting. But in
! * SERIALIZABLE mode, we can't change the transaction snapshot. If the
* caller passes detectNewRows == false then it's okay to do the query
* with the transaction snapshot; otherwise we use a current snapshot,
and
* tell the executor to error out if it finds any rows under the current
--- 3314,3320 ----
/*
* In READ COMMITTED mode, we just need to use an up-to-date regular
* snapshot, and we will see all rows that could be interesting. But in
! * xact-snapshot-based modes, we can't change the transaction snapshot.
If the
* caller passes detectNewRows == false then it's okay to do the query
* with the transaction snapshot; otherwise we use a current snapshot,
and
* tell the executor to error out if it finds any rows under the current
***************
*** 3322,3328 **** ri_PerformCheck(RI_QueryKey *qkey, SPIPlanPtr qplan,
* that SPI_execute_snapshot will register the snapshots, so we don't
need
* to bother here.
*/
! if (IsXactIsoLevelSerializable && detectNewRows)
{
CommandCounterIncrement(); /* be sure all my own
work is visible */
test_snapshot = GetLatestSnapshot();
--- 3322,3328 ----
* that SPI_execute_snapshot will register the snapshots, so we don't
need
* to bother here.
*/
! if (IsXactIsoLevelXactSnapshotBased && detectNewRows)
{
CommandCounterIncrement(); /* be sure all my own
work is visible */
test_snapshot = GetLatestSnapshot();
*** a/src/backend/utils/time/snapmgr.c
--- b/src/backend/utils/time/snapmgr.c
***************
*** 37,44 ****
/*
! * CurrentSnapshot points to the only snapshot taken in a serializable
! * transaction, and to the latest one taken in a read-committed transaction.
* SecondarySnapshot is a snapshot that's always up-to-date as of the current
* instant, even on a serializable transaction. It should only be used for
* special-purpose code (say, RI checking.)
--- 37,44 ----
/*
! * CurrentSnapshot points to the only snapshot taken in a xact-snapshot-based
! * transaction; otherwise to the latest one taken.
* SecondarySnapshot is a snapshot that's always up-to-date as of the current
* instant, even on a serializable transaction. It should only be used for
* special-purpose code (say, RI checking.)
***************
*** 97,107 **** static int RegisteredSnapshots = 0;
bool FirstSnapshotSet = false;
/*
! * Remembers whether this transaction registered a serializable snapshot at
* start. We cannot trust FirstSnapshotSet in combination with
! * IsXactIsoLevelSerializable, because GUC may be reset before us.
*/
! static bool registered_serializable = false;
static Snapshot CopySnapshot(Snapshot snapshot);
--- 97,107 ----
bool FirstSnapshotSet = false;
/*
! * Remembers whether this transaction registered a transaction-based snapshot
at
* start. We cannot trust FirstSnapshotSet in combination with
! * IsXactIsoLevelXactSnapshotBased, because GUC may be reset before us.
*/
! static bool registered_xact_snapshot = false;
static Snapshot CopySnapshot(Snapshot snapshot);
***************
*** 130,150 **** GetTransactionSnapshot(void)
FirstSnapshotSet = true;
/*
! * In serializable mode, the first snapshot must live until end
of
* xact regardless of what the caller does with it, so we must
* register it internally here and unregister it at end of xact.
*/
! if (IsXactIsoLevelSerializable)
{
CurrentSnapshot =
RegisterSnapshotOnOwner(CurrentSnapshot,
TopTransactionResourceOwner);
! registered_serializable = true;
}
return CurrentSnapshot;
}
! if (IsXactIsoLevelSerializable)
return CurrentSnapshot;
CurrentSnapshot = GetSnapshotData(&CurrentSnapshotData);
--- 130,150 ----
FirstSnapshotSet = true;
/*
! * In xact-snapshot-based isolation levels, the first snapshot
must live until end of
* xact regardless of what the caller does with it, so we must
* register it internally here and unregister it at end of xact.
*/
! if (IsXactIsoLevelXactSnapshotBased)
{
CurrentSnapshot =
RegisterSnapshotOnOwner(CurrentSnapshot,
TopTransactionResourceOwner);
! registered_xact_snapshot = true;
}
return CurrentSnapshot;
}
! if (IsXactIsoLevelXactSnapshotBased)
return CurrentSnapshot;
CurrentSnapshot = GetSnapshotData(&CurrentSnapshotData);
***************
*** 155,161 **** GetTransactionSnapshot(void)
/*
* GetLatestSnapshot
* Get a snapshot that is up-to-date as of the current instant,
! * even if we are executing in SERIALIZABLE mode.
*/
Snapshot
GetLatestSnapshot(void)
--- 155,161 ----
/*
* GetLatestSnapshot
* Get a snapshot that is up-to-date as of the current instant,
! * even if we are executing in xact-snapshot-based mode.
*/
Snapshot
GetLatestSnapshot(void)
***************
*** 515,527 **** void
AtEarlyCommit_Snapshot(void)
{
/*
! * On a serializable transaction we must unregister our private refcount
! * to the serializable snapshot.
*/
! if (registered_serializable)
UnregisterSnapshotFromOwner(CurrentSnapshot,
TopTransactionResourceOwner);
! registered_serializable = false;
}
--- 515,527 ----
AtEarlyCommit_Snapshot(void)
{
/*
! * On a xact-snapshot-based transaction we must unregister our private
refcount
! * to the xact snapshot.
*/
! if (registered_xact_snapshot)
UnregisterSnapshotFromOwner(CurrentSnapshot,
TopTransactionResourceOwner);
! registered_xact_snapshot = false;
}
***************
*** 557,561 **** AtEOXact_Snapshot(bool isCommit)
SecondarySnapshot = NULL;
FirstSnapshotSet = false;
! registered_serializable = false;
}
--- 557,561 ----
SecondarySnapshot = NULL;
FirstSnapshotSet = false;
! registered_xact_snapshot = false;
}
*** a/src/include/access/xact.h
--- b/src/include/access/xact.h
***************
*** 32,41 **** extern int DefaultXactIsoLevel;
extern int XactIsoLevel;
/*
! * We only implement two isolation levels internally. This macro should
! * be used to check which one is selected.
*/
! #define IsXactIsoLevelSerializable (XactIsoLevel >= XACT_REPEATABLE_READ)
/* Xact read-only state */
extern bool DefaultXactReadOnly;
--- 32,45 ----
extern int XactIsoLevel;
/*
! * We implement three isolation levels internally.
! * The two stronger ones use one snapshot per database transaction;
! * the others use one snapshot per statement.
! * Serializable uses predicate locks.
! * These macros should be used to check which isolation level is selected.
*/
! #define IsXactIsoLevelXactSnapshotBased (XactIsoLevel >= XACT_REPEATABLE_READ)
! #define IsXactIsoLevelFullySerializable (XactIsoLevel == XACT_SERIALIZABLE)
/* Xact read-only state */
extern bool DefaultXactReadOnly;
*** /dev/null
--- b/src/include/storage/predicate.h
***************
*** 0 ****
--- 1,22 ----
+ /*-------------------------------------------------------------------------
+ *
+ * predicate.h
+ * POSTGRES predicate locking definitions.
+ *
+ *
+ * Portions Copyright (c) 1996-2010, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ * $PostgreSQL$
+ *
+ *-------------------------------------------------------------------------
+ */
+ #ifndef PREDICATE_H
+ #define PREDICATE_H
+
+ #include "storage/lock.h"
+ #include "utils/relcache.h"
+
+ extern void PredicateLockRelation(Relation relation);
+
+ #endif /* PREDICATE_H */
--
Sent via pgsql-hackers mailing list ([email protected])
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers