On Sat, Mar 3, 2012 at 1:14 PM, Simon Riggs <si...@2ndquadrant.com> wrote:
> On Fri, Mar 2, 2012 at 8:58 PM, Noah Misch <n...@leadboat.com> wrote:
>> On Fri, Mar 02, 2012 at 08:46:45AM +0000, Simon Riggs wrote:
>>> On Thu, Mar 1, 2012 at 8:49 PM, Heikki Linnakangas 
>>> <heikki.linnakan...@enterprisedb.com> wrote:
>>> > It's still broken:
>>
>> [BEGIN;TRUNCATE;SAVEPOINT;COPY;ROLLBACK TO]
>>
>>> So this approach isn't the one...
>>>
>>> The COPY FREEZE patch provides a way for the user to say explicitly
>>> that they don't really care about these MVCC corner cases and as a
>>> result allows us to avoid touching XidInMVCCSnapshot() at all. So
>>> there is still a patch on the table.
>>
>> You can salvage the optimization by tightening its prerequisite: use it when
>> the current subtransaction or a child thereof created or truncated the table.
>> A parent subtransaction having done so is acceptable for the WAL avoidance
>> optimization but not for this.
>
> I misread your earlier comment. Yes, that will make this work correctly.
>
>> Incidentally, I contend that we should write frozen tuples to new/truncated
>> tables unconditionally.  The current behavior of making old snapshots see the
>> table as empty violates atomicity at least as badly as letting those 
>> snapshots
>> see the future-snapshot contents.  But Marti has a sound proposal that would
>> interact with your efforts here to avoid violating atomicity at all:
>> http://archives.postgresql.org/message-id/cabrt9rbrmdsoz8kxgehfb4lg-ev9u67-6dlqvoiibpkkhtl...@mail.gmail.com
>
> Thank you for bringing this to my attention.
>
> This will make this optimisation work correctly without adding
> anything to the main code path of XidInMVCCSnapshot() and without the
> annoying FREEZE syntax. So this puts the patch squarely back on the
> table.
>
> I'll do another version of this later today designed to work with the
> StrictMVCC patch.

OK, so latest version of patch handling the subtransaction issues
correctly. Thanks to Noah and Heikki for identifying them and hitting
me with the clue stick.

So this version of patch has negligible effect on mainline performance
though leaves newly loaded data visible in ways that would break MVCC.
So this patch relies on the safe_truncate.v2.patch posted on separate
thread.

-- 
 Simon Riggs                   http://www.2ndQuadrant.com/
 PostgreSQL Development, 24x7 Support, Training & Services
diff --git a/src/backend/access/heap/heapam.c b/src/backend/access/heap/heapam.c
index c910863..b891327 100644
--- a/src/backend/access/heap/heapam.c
+++ b/src/backend/access/heap/heapam.c
@@ -2050,12 +2050,27 @@ heap_prepare_insert(Relation relation, HeapTuple tup, TransactionId xid,
 	tup->t_data->t_infomask &= ~(HEAP_XACT_MASK);
 	tup->t_data->t_infomask2 &= ~(HEAP2_XACT_MASK);
 	tup->t_data->t_infomask |= HEAP_XMAX_INVALID;
-	HeapTupleHeaderSetXmin(tup->t_data, xid);
 	HeapTupleHeaderSetCmin(tup->t_data, cid);
 	HeapTupleHeaderSetXmax(tup->t_data, 0);		/* for cleanliness */
 	tup->t_tableOid = RelationGetRelid(relation);
 
 	/*
+	 * If we are inserting into a new relation invisible as yet to other
+	 * backends and our session has no prior snapshots and no ready portals
+	 * then we can also set the hint bit for the rows we are inserting. The
+	 * last two restrictions ensure that HeapTupleSatisfiesMVCC() gives
+	 * the right answer if the current transaction inspects the data after
+	 * we load it.
+	 */
+	if (options & HEAP_INSERT_HINT_XMIN)
+	{
+		tup->t_data->t_infomask |= HEAP_XMIN_COMMITTED;
+		HeapTupleHeaderSetXmin(tup->t_data, FrozenTransactionId);
+	}
+	else
+		HeapTupleHeaderSetXmin(tup->t_data, xid);
+
+	/*
 	 * If the new tuple is too big for storage or contains already toasted
 	 * out-of-line attributes from some other relation, invoke the toaster.
 	 */
diff --git a/src/backend/commands/copy.c b/src/backend/commands/copy.c
index 110480f..a34f072 100644
--- a/src/backend/commands/copy.c
+++ b/src/backend/commands/copy.c
@@ -43,6 +43,7 @@
 #include "utils/builtins.h"
 #include "utils/lsyscache.h"
 #include "utils/memutils.h"
+#include "utils/portal.h"
 #include "utils/rel.h"
 #include "utils/snapmgr.h"
 
@@ -1922,6 +1923,21 @@ CopyFrom(CopyState cstate)
 		hi_options |= HEAP_INSERT_SKIP_FSM;
 		if (!XLogIsNeeded())
 			hi_options |= HEAP_INSERT_SKIP_WAL;
+
+		if (ThereAreNoPriorRegisteredSnapshots() &&
+			ThereAreNoReadyPortals())
+		{
+			SubTransactionId	currxid = GetCurrentSubTransactionId();
+
+			/*
+			 * If the relfilenode has been created in this subtransaction
+			 * then we can further optimise the data load by setting hint
+			 * bits and pre-freezing tuples.
+			 */
+			if (cstate->rel->rd_createSubid == currxid ||
+				cstate->rel->rd_newRelfilenodeSubid == currxid)
+				hi_options |= HEAP_INSERT_HINT_XMIN;
+		}
 	}
 
 	/*
diff --git a/src/backend/utils/mmgr/portalmem.c b/src/backend/utils/mmgr/portalmem.c
index cfb73c1..24075db 100644
--- a/src/backend/utils/mmgr/portalmem.c
+++ b/src/backend/utils/mmgr/portalmem.c
@@ -1055,3 +1055,22 @@ pg_cursor(PG_FUNCTION_ARGS)
 
 	return (Datum) 0;
 }
+
+bool
+ThereAreNoReadyPortals(void)
+{
+	HASH_SEQ_STATUS status;
+	PortalHashEnt *hentry;
+
+	hash_seq_init(&status, PortalHashTable);
+
+	while ((hentry = (PortalHashEnt *) hash_seq_search(&status)) != NULL)
+	{
+		Portal		portal = hentry->portal;
+
+		if (portal->status == PORTAL_READY)
+			return false;
+	}
+
+	return true;
+}
diff --git a/src/backend/utils/time/snapmgr.c b/src/backend/utils/time/snapmgr.c
index 5aebbd1..5d9e3bf 100644
--- a/src/backend/utils/time/snapmgr.c
+++ b/src/backend/utils/time/snapmgr.c
@@ -1183,3 +1183,12 @@ DeleteAllExportedSnapshotFiles(void)
 
 	FreeDir(s_dir);
 }
+
+bool
+ThereAreNoPriorRegisteredSnapshots(void)
+{
+	if (RegisteredSnapshots <= 1)
+		return true;
+
+	return false;
+}
diff --git a/src/include/access/heapam.h b/src/include/access/heapam.h
index fa38803..0381785 100644
--- a/src/include/access/heapam.h
+++ b/src/include/access/heapam.h
@@ -26,6 +26,7 @@
 /* "options" flag bits for heap_insert */
 #define HEAP_INSERT_SKIP_WAL	0x0001
 #define HEAP_INSERT_SKIP_FSM	0x0002
+#define HEAP_INSERT_HINT_XMIN	0x0004
 
 typedef struct BulkInsertStateData *BulkInsertState;
 
diff --git a/src/include/utils/portal.h b/src/include/utils/portal.h
index 4833942..bd2b133 100644
--- a/src/include/utils/portal.h
+++ b/src/include/utils/portal.h
@@ -219,5 +219,6 @@ extern void PortalDefineQuery(Portal portal,
 extern Node *PortalListGetPrimaryStmt(List *stmts);
 extern void PortalCreateHoldStore(Portal portal);
 extern void PortalHashTableDeleteAll(void);
+extern bool ThereAreNoReadyPortals(void);
 
 #endif   /* PORTAL_H */
diff --git a/src/include/utils/snapmgr.h b/src/include/utils/snapmgr.h
index f195981..789b72e 100644
--- a/src/include/utils/snapmgr.h
+++ b/src/include/utils/snapmgr.h
@@ -46,5 +46,6 @@ extern Datum pg_export_snapshot(PG_FUNCTION_ARGS);
 extern void ImportSnapshot(const char *idstr);
 extern bool XactHasExportedSnapshots(void);
 extern void DeleteAllExportedSnapshotFiles(void);
+extern bool ThereAreNoPriorRegisteredSnapshots(void);
 
 #endif   /* SNAPMGR_H */
diff --git a/src/test/regress/expected/copy2.out b/src/test/regress/expected/copy2.out
index 8e2bc0c..b9626e5 100644
--- a/src/test/regress/expected/copy2.out
+++ b/src/test/regress/expected/copy2.out
@@ -239,6 +239,70 @@ a\.
 \.b
 c\.d
 "\."
+CREATE TABLE vistest (LIKE testeoc);
+BEGIN;
+TRUNCATE vistest;
+COPY vistest FROM stdin CSV;
+SELECT * FROM vistest;
+ a 
+---
+ a
+ b
+ c
+(3 rows)
+
+SAVEPOINT s1;
+TRUNCATE vistest;
+COPY vistest FROM stdin CSV;
+SELECT * FROM vistest;
+ a 
+---
+ d
+ e
+ f
+(3 rows)
+
+SAVEPOINT s2;
+TRUNCATE vistest;
+SAVEPOINT s3;
+COPY vistest FROM stdin CSV;
+SELECT * FROM vistest;
+ a 
+---
+ j
+ k
+ l
+(3 rows)
+
+ROLLBACK TO SAVEPOINT s2;
+SELECT * FROM vistest;
+ a 
+---
+ d
+ e
+ f
+(3 rows)
+
+TRUNCATE vistest;
+COPY vistest FROM stdin CSV;
+SELECT * FROM vistest;
+ a 
+---
+ x
+ y
+ z
+(3 rows)
+
+COMMIT;
+SELECT * FROM vistest;
+ a 
+---
+ x
+ y
+ z
+(3 rows)
+
+DROP TABLE vistest;
 DROP TABLE x, y;
 DROP FUNCTION fn_x_before();
 DROP FUNCTION fn_x_after();
diff --git a/src/test/regress/sql/copy2.sql b/src/test/regress/sql/copy2.sql
index 6322c8f..3d2c376 100644
--- a/src/test/regress/sql/copy2.sql
+++ b/src/test/regress/sql/copy2.sql
@@ -164,6 +164,45 @@ c\.d
 
 COPY testeoc TO stdout CSV;
 
+CREATE TABLE vistest (LIKE testeoc);
+BEGIN;
+TRUNCATE vistest;
+COPY vistest FROM stdin CSV;
+a
+b
+c
+\.
+SELECT * FROM vistest;
+SAVEPOINT s1;
+TRUNCATE vistest;
+COPY vistest FROM stdin CSV;
+d
+e
+f
+\.
+SELECT * FROM vistest;
+SAVEPOINT s2;
+TRUNCATE vistest;
+SAVEPOINT s3;
+COPY vistest FROM stdin CSV;
+j
+k
+l
+\.
+SELECT * FROM vistest;
+ROLLBACK TO SAVEPOINT s2;
+SELECT * FROM vistest;
+TRUNCATE vistest;
+COPY vistest FROM stdin CSV;
+x
+y
+z
+\.
+SELECT * FROM vistest;
+COMMIT;
+SELECT * FROM vistest;
+
+DROP TABLE vistest;
 DROP TABLE x, y;
 DROP FUNCTION fn_x_before();
 DROP FUNCTION fn_x_after();
-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

Reply via email to