Hi!
I've always been a little wary of using the TRUNCATE command due to
the warning in the documentation about it not being "MVCC-safe":
queries may silently give wrong results and it's hard to tell when
they are affected.
That got me thinking, why can't we handle this like a standby server
does -- if some query has data removed from underneath it, it aborts
with a serialization failure.
Does this solution sound like a good idea?
The attached patch is a lame attempt at implementing this. I added a
new pg_class.relvalidxmin attribute which tracks the Xid of the last
TRUNCATE (maybe it should be called reltruncatexid?). Whenever
starting a relation scan with a snapshot older than relvalidxmin, an
error is thrown. This seems to work out well since TRUNCATE updates
pg_class anyway, and anyone who sees the new relfilenode automatically
knows when it was truncated.
Am I on the right track? Are there any better ways to attach this
information to a relation?
Should I also add another counter to pg_stat_database_conflicts?
Currently this table is only used on standby servers.
Since I wrote it just this afternoon, there are a few things still
wrong with the patch (it doesn't handle xid wraparound for one), so
don't be too picky about the code yet. :)
Example:
CREATE TABLE foo (i int);
Session A:
BEGIN ISOLATION LEVEL REPEATABLE READ;
SELECT txid_current(); -- Force snapshot open
Session B:
TRUNCATE TABLE foo;
Session A:
SELECT * FROM foo;
ERROR: canceling statement due to conflict with TRUNCATE TABLE on foo
DETAIL: Rows visible to this transaction have been removed.
Patch also available in my github 'truncate' branch:
https://github.com/intgr/postgres/commits/truncate
Regards,
Marti
diff --git a/src/backend/access/heap/heapam.c b/src/backend/access/heap/heapam.c
new file mode 100644
index 99a431a..e909b17
*** a/src/backend/access/heap/heapam.c
--- b/src/backend/access/heap/heapam.c
*************** heap_beginscan_internal(Relation relatio
*** 1175,1180 ****
--- 1175,1196 ----
HeapScanDesc scan;
/*
+ * If the snapshot is older than relvalidxmin then that means TRUNCATE has
+ * removed data that we should still see. Abort rather than giving
+ * potentially bogus results.
+ */
+ if (relation->rd_rel->relvalidxmin != InvalidTransactionId &&
+ snapshot->xmax != InvalidTransactionId &&
+ NormalTransactionIdPrecedes(snapshot->xmax, relation->rd_rel->relvalidxmin))
+ {
+ ereport(ERROR,
+ (errcode(ERRCODE_T_R_SERIALIZATION_FAILURE),
+ errmsg("canceling statement due to conflict with TRUNCATE TABLE on %s",
+ NameStr(relation->rd_rel->relname)),
+ errdetail("Rows visible to this transaction have been removed.")));
+ }
+
+ /*
* increment relation ref count while scanning relation
*
* This is just to make really sure the relcache entry won't go away while
diff --git a/src/backend/catalog/heap.c b/src/backend/catalog/heap.c
new file mode 100644
index aef410a..3f9bd5d
*** a/src/backend/catalog/heap.c
--- b/src/backend/catalog/heap.c
*************** InsertPgClassTuple(Relation pg_class_des
*** 787,792 ****
--- 787,793 ----
values[Anum_pg_class_relhastriggers - 1] = BoolGetDatum(rd_rel->relhastriggers);
values[Anum_pg_class_relhassubclass - 1] = BoolGetDatum(rd_rel->relhassubclass);
values[Anum_pg_class_relfrozenxid - 1] = TransactionIdGetDatum(rd_rel->relfrozenxid);
+ values[Anum_pg_class_relvalidxmin - 1] = TransactionIdGetDatum(rd_rel->relvalidxmin);
if (relacl != (Datum) 0)
values[Anum_pg_class_relacl - 1] = relacl;
else
*************** AddNewRelationTuple(Relation pg_class_de
*** 884,889 ****
--- 885,891 ----
new_rel_reltup->relfrozenxid = InvalidTransactionId;
}
+ new_rel_reltup->relvalidxmin = InvalidTransactionId;
new_rel_reltup->relowner = relowner;
new_rel_reltup->reltype = new_type_oid;
new_rel_reltup->reloftype = reloftype;
diff --git a/src/backend/catalog/index.c b/src/backend/catalog/index.c
new file mode 100644
index bfbe642..0d96a6a
*** a/src/backend/catalog/index.c
--- b/src/backend/catalog/index.c
*************** reindex_index(Oid indexId, bool skip_con
*** 2854,2860 ****
}
/* We'll build a new physical relation for the index */
! RelationSetNewRelfilenode(iRel, InvalidTransactionId);
/* Initialize the index and rebuild */
/* Note: we do not need to re-establish pkey setting */
--- 2854,2860 ----
}
/* We'll build a new physical relation for the index */
! RelationSetNewRelfilenode(iRel, InvalidTransactionId, InvalidTransactionId);
/* Initialize the index and rebuild */
/* Note: we do not need to re-establish pkey setting */
diff --git a/src/backend/commands/sequence.c b/src/backend/commands/sequence.c
new file mode 100644
index 1f95abc..ab4aec2
*** a/src/backend/commands/sequence.c
--- b/src/backend/commands/sequence.c
*************** ResetSequence(Oid seq_relid)
*** 287,293 ****
* Create a new storage file for the sequence. We want to keep the
* sequence's relfrozenxid at 0, since it won't contain any unfrozen XIDs.
*/
! RelationSetNewRelfilenode(seq_rel, InvalidTransactionId);
/*
* Insert the modified tuple into the new storage file.
--- 287,293 ----
* Create a new storage file for the sequence. We want to keep the
* sequence's relfrozenxid at 0, since it won't contain any unfrozen XIDs.
*/
! RelationSetNewRelfilenode(seq_rel, InvalidTransactionId, InvalidTransactionId);
/*
* Insert the modified tuple into the new storage file.
diff --git a/src/backend/commands/tablecmds.c b/src/backend/commands/tablecmds.c
new file mode 100644
index 07dc326..993dc41
*** a/src/backend/commands/tablecmds.c
--- b/src/backend/commands/tablecmds.c
***************
*** 19,24 ****
--- 19,25 ----
#include "access/reloptions.h"
#include "access/relscan.h"
#include "access/sysattr.h"
+ #include "access/transam.h"
#include "access/xact.h"
#include "catalog/catalog.h"
#include "catalog/dependency.h"
*************** ExecuteTruncate(TruncateStmt *stmt)
*** 1118,1124 ****
* as the relfilenode value. The old storage file is scheduled for
* deletion at commit.
*/
! RelationSetNewRelfilenode(rel, RecentXmin);
heap_relid = RelationGetRelid(rel);
toast_relid = rel->rd_rel->reltoastrelid;
--- 1119,1125 ----
* as the relfilenode value. The old storage file is scheduled for
* deletion at commit.
*/
! RelationSetNewRelfilenode(rel, RecentXmin, GetCurrentTransactionId());
heap_relid = RelationGetRelid(rel);
toast_relid = rel->rd_rel->reltoastrelid;
*************** ExecuteTruncate(TruncateStmt *stmt)
*** 1129,1135 ****
if (OidIsValid(toast_relid))
{
rel = relation_open(toast_relid, AccessExclusiveLock);
! RelationSetNewRelfilenode(rel, RecentXmin);
heap_close(rel, NoLock);
}
--- 1130,1141 ----
if (OidIsValid(toast_relid))
{
rel = relation_open(toast_relid, AccessExclusiveLock);
! RelationSetNewRelfilenode(rel, RecentXmin, InvalidTransactionId);
!
! /*
! * We don't need a cmin here because there can't be any open
! * cursors in the same session as the TRUNCATE command.
! */
heap_close(rel, NoLock);
}
diff --git a/src/backend/utils/cache/relcache.c b/src/backend/utils/cache/relcache.c
new file mode 100644
index a59950e..17303e8
*** a/src/backend/utils/cache/relcache.c
--- b/src/backend/utils/cache/relcache.c
*************** RelationBuildLocalRelation(const char *r
*** 2605,2611 ****
* the XIDs that will be put into the new relation contents.
*/
void
! RelationSetNewRelfilenode(Relation relation, TransactionId freezeXid)
{
Oid newrelfilenode;
RelFileNodeBackend newrnode;
--- 2605,2612 ----
* the XIDs that will be put into the new relation contents.
*/
void
! RelationSetNewRelfilenode(Relation relation, TransactionId freezeXid,
! TransactionId validxmin)
{
Oid newrelfilenode;
RelFileNodeBackend newrnode;
*************** RelationSetNewRelfilenode(Relation relat
*** 2674,2679 ****
--- 2675,2684 ----
}
classform->relfrozenxid = freezeXid;
+ if (validxmin != InvalidTransactionId &&
+ TransactionIdPrecedes(classform->relvalidxmin, validxmin))
+ classform->relvalidxmin = validxmin;
+
simple_heap_update(pg_class, &tuple->t_self, tuple);
CatalogUpdateIndexes(pg_class, tuple);
diff --git a/src/include/catalog/catversion.h b/src/include/catalog/catversion.h
new file mode 100644
index 78c3c9e..acd2558
*** a/src/include/catalog/catversion.h
--- b/src/include/catalog/catversion.h
***************
*** 53,58 ****
*/
/* yyyymmddN */
! #define CATALOG_VERSION_NO 201202083
#endif
--- 53,58 ----
*/
/* yyyymmddN */
! #define CATALOG_VERSION_NO 201202091
#endif
diff --git a/src/include/catalog/pg_class.h b/src/include/catalog/pg_class.h
new file mode 100644
index 3b01bb4..8f1ed9b
*** a/src/include/catalog/pg_class.h
--- b/src/include/catalog/pg_class.h
*************** CATALOG(pg_class,1259) BKI_BOOTSTRAP BKI
*** 67,72 ****
--- 67,73 ----
bool relhastriggers; /* has (or has had) any TRIGGERs */
bool relhassubclass; /* has (or has had) derived classes */
TransactionId relfrozenxid; /* all Xids < this are frozen in this rel */
+ TransactionId relvalidxmin; /* data is only valid for snapshots > this Xid */
#ifdef CATALOG_VARLEN /* variable-length fields start here */
/* NOTE: These fields are not present in a relcache entry's rd_rel field. */
*************** CATALOG(pg_class,1259) BKI_BOOTSTRAP BKI
*** 77,83 ****
/* Size of fixed part of pg_class tuples, not counting var-length fields */
#define CLASS_TUPLE_SIZE \
! (offsetof(FormData_pg_class,relfrozenxid) + sizeof(TransactionId))
/* ----------------
* Form_pg_class corresponds to a pointer to a tuple with
--- 78,84 ----
/* Size of fixed part of pg_class tuples, not counting var-length fields */
#define CLASS_TUPLE_SIZE \
! (offsetof(FormData_pg_class,relvalidxmin) + sizeof(TransactionId))
/* ----------------
* Form_pg_class corresponds to a pointer to a tuple with
*************** typedef FormData_pg_class *Form_pg_class
*** 91,97 ****
* ----------------
*/
! #define Natts_pg_class 27
#define Anum_pg_class_relname 1
#define Anum_pg_class_relnamespace 2
#define Anum_pg_class_reltype 3
--- 92,98 ----
* ----------------
*/
! #define Natts_pg_class 28
#define Anum_pg_class_relname 1
#define Anum_pg_class_relnamespace 2
#define Anum_pg_class_reltype 3
*************** typedef FormData_pg_class *Form_pg_class
*** 117,124 ****
#define Anum_pg_class_relhastriggers 23
#define Anum_pg_class_relhassubclass 24
#define Anum_pg_class_relfrozenxid 25
! #define Anum_pg_class_relacl 26
! #define Anum_pg_class_reloptions 27
/* ----------------
* initial contents of pg_class
--- 118,126 ----
#define Anum_pg_class_relhastriggers 23
#define Anum_pg_class_relhassubclass 24
#define Anum_pg_class_relfrozenxid 25
! #define Anum_pg_class_relvalidxmin 26
! #define Anum_pg_class_relacl 27
! #define Anum_pg_class_reloptions 28
/* ----------------
* initial contents of pg_class
*************** typedef FormData_pg_class *Form_pg_class
*** 130,142 ****
*/
/* Note: "3" in the relfrozenxid column stands for FirstNormalTransactionId */
! DATA(insert OID = 1247 ( pg_type PGNSP 71 0 PGUID 0 0 0 0 0 0 0 0 f f p r 30 0 t f f f f 3 _null_ _null_ ));
DESCR("");
! DATA(insert OID = 1249 ( pg_attribute PGNSP 75 0 PGUID 0 0 0 0 0 0 0 0 f f p r 21 0 f f f f f 3 _null_ _null_ ));
DESCR("");
! DATA(insert OID = 1255 ( pg_proc PGNSP 81 0 PGUID 0 0 0 0 0 0 0 0 f f p r 26 0 t f f f f 3 _null_ _null_ ));
DESCR("");
! DATA(insert OID = 1259 ( pg_class PGNSP 83 0 PGUID 0 0 0 0 0 0 0 0 f f p r 27 0 t f f f f 3 _null_ _null_ ));
DESCR("");
--- 132,144 ----
*/
/* Note: "3" in the relfrozenxid column stands for FirstNormalTransactionId */
! DATA(insert OID = 1247 ( pg_type PGNSP 71 0 PGUID 0 0 0 0 0 0 0 0 f f p r 30 0 t f f f f 3 0 _null_ _null_ ));
DESCR("");
! DATA(insert OID = 1249 ( pg_attribute PGNSP 75 0 PGUID 0 0 0 0 0 0 0 0 f f p r 21 0 f f f f f 3 0 _null_ _null_ ));
DESCR("");
! DATA(insert OID = 1255 ( pg_proc PGNSP 81 0 PGUID 0 0 0 0 0 0 0 0 f f p r 26 0 t f f f f 3 0 _null_ _null_ ));
DESCR("");
! DATA(insert OID = 1259 ( pg_class PGNSP 83 0 PGUID 0 0 0 0 0 0 0 0 f f p r 28 0 t f f f f 3 0 _null_ _null_ ));
DESCR("");
diff --git a/src/include/utils/relcache.h b/src/include/utils/relcache.h
new file mode 100644
index 2f39486..60c2eb6
*** a/src/include/utils/relcache.h
--- b/src/include/utils/relcache.h
*************** extern Relation RelationBuildLocalRelati
*** 76,82 ****
* Routine to manage assignment of new relfilenode to a relation
*/
extern void RelationSetNewRelfilenode(Relation relation,
! TransactionId freezeXid);
/*
* Routines for flushing/rebuilding relcache entries in various scenarios
--- 76,83 ----
* Routine to manage assignment of new relfilenode to a relation
*/
extern void RelationSetNewRelfilenode(Relation relation,
! TransactionId freezeXid,
! TransactionId validxmin);
/*
* Routines for flushing/rebuilding relcache entries in various scenarios
diff --git a/src/test/isolation/expected/truncate.out b/src/test/isolation/expected/truncate.out
new file mode 100644
index ...2b9f161
*** a/src/test/isolation/expected/truncate.out
--- b/src/test/isolation/expected/truncate.out
***************
*** 0 ****
--- 1,63 ----
+ Parsed test spec with 2 sessions
+
+ starting permutation: begin select roll trunc
+ step begin:
+ BEGIN TRANSACTION ISOLATION LEVEL REPEATABLE READ;
+ -- Force snapshot open
+ SELECT 1 AS foo FROM txid_current();
+
+ foo
+
+ 1
+ step select: SELECT * FROM foo;
+ i
+
+ 1
+ step roll: ROLLBACK;
+ step trunc: TRUNCATE TABLE foo;
+
+ starting permutation: begin select trunc roll
+ step begin:
+ BEGIN TRANSACTION ISOLATION LEVEL REPEATABLE READ;
+ -- Force snapshot open
+ SELECT 1 AS foo FROM txid_current();
+
+ foo
+
+ 1
+ step select: SELECT * FROM foo;
+ i
+
+ 1
+ step trunc: TRUNCATE TABLE foo; <waiting ...>
+ step roll: ROLLBACK;
+ step trunc: <... completed>
+
+ starting permutation: begin trunc select roll
+ step begin:
+ BEGIN TRANSACTION ISOLATION LEVEL REPEATABLE READ;
+ -- Force snapshot open
+ SELECT 1 AS foo FROM txid_current();
+
+ foo
+
+ 1
+ step trunc: TRUNCATE TABLE foo;
+ step select: SELECT * FROM foo;
+ ERROR: canceling statement due to conflict with TRUNCATE TABLE on foo
+ step roll: ROLLBACK;
+
+ starting permutation: trunc begin select roll
+ step trunc: TRUNCATE TABLE foo;
+ step begin:
+ BEGIN TRANSACTION ISOLATION LEVEL REPEATABLE READ;
+ -- Force snapshot open
+ SELECT 1 AS foo FROM txid_current();
+
+ foo
+
+ 1
+ step select: SELECT * FROM foo;
+ i
+
+ step roll: ROLLBACK;
diff --git a/src/test/isolation/isolation_schedule b/src/test/isolation/isolation_schedule
new file mode 100644
index 669c0f2..bba722a
*** a/src/test/isolation/isolation_schedule
--- b/src/test/isolation/isolation_schedule
*************** test: fk-contention
*** 14,16 ****
--- 14,17 ----
test: fk-deadlock
test: fk-deadlock2
test: eval-plan-qual
+ test: truncate
diff --git a/src/test/isolation/specs/truncate.spec b/src/test/isolation/specs/truncate.spec
new file mode 100644
index ...2c7b707
*** a/src/test/isolation/specs/truncate.spec
--- b/src/test/isolation/specs/truncate.spec
***************
*** 0 ****
--- 1,23 ----
+ setup
+ {
+ CREATE TABLE foo (i int);
+ INSERT INTO foo VALUES (1);
+ }
+
+ teardown
+ {
+ DROP TABLE foo;
+ }
+
+ session "s1"
+ step "begin"
+ {
+ BEGIN TRANSACTION ISOLATION LEVEL REPEATABLE READ;
+ -- Force snapshot open
+ SELECT 1 AS foo FROM txid_current();
+ }
+ step "select" { SELECT * FROM foo; }
+ step "roll" { ROLLBACK; }
+
+ session "s2"
+ step "trunc" { TRUNCATE TABLE foo; }
--
Sent via pgsql-hackers mailing list ([email protected])
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers