On Fri, Mar 13, 2020 at 6:58 AM Justin Pryzby <pry...@telsasoft.com> wrote:
> > Thanks for picking up this patch. There's a minor typo: > > + * readable outside of this sessoin. Therefore > doing IO here isn't > > => session > > -- > Justin > Thanks, please see the updated and rebased patch. (master 17a28b03645e27d73bf69a95d7569b61e58f06eb) -- Ibrar Ahmed
diff --git a/contrib/pg_visibility/expected/pg_visibility.out b/contrib/pg_visibility/expected/pg_visibility.out index f0dcb897c4..6ac3e525eb 100644 --- a/contrib/pg_visibility/expected/pg_visibility.out +++ b/contrib/pg_visibility/expected/pg_visibility.out @@ -131,6 +131,69 @@ select pg_truncate_visibility_map('test_partition'); (1 row) +-- test copy freeze +create table copyfreeze (a int, b char(1500)); +-- load all rows via COPY FREEZE and ensure that all pages are set all-visible +-- and all-frozen. +begin; +truncate copyfreeze; +copy copyfreeze from stdin freeze; +commit; +select * from pg_visibility_map('copyfreeze'); + blkno | all_visible | all_frozen +-------+-------------+------------ + 0 | t | t + 1 | t | t + 2 | t | t +(3 rows) + +select * from pg_check_frozen('copyfreeze'); + t_ctid +-------- +(0 rows) + +-- load half the rows via regular COPY and rest via COPY FREEZE. The pages +-- which are touched by regular COPY must not be set all-visible/all-frozen. On +-- the other hand, pages allocated by COPY FREEZE should be marked +-- all-frozen/all-visible. +begin; +truncate copyfreeze; +copy copyfreeze from stdin; +copy copyfreeze from stdin freeze; +commit; +select * from pg_visibility_map('copyfreeze'); + blkno | all_visible | all_frozen +-------+-------------+------------ + 0 | f | f + 1 | f | f + 2 | t | t +(3 rows) + +select * from pg_check_frozen('copyfreeze'); + t_ctid +-------- +(0 rows) + +-- Try a mix of regular COPY and COPY FREEZE. +begin; +truncate copyfreeze; +copy copyfreeze from stdin freeze; +copy copyfreeze from stdin; +copy copyfreeze from stdin freeze; +commit; +select * from pg_visibility_map('copyfreeze'); + blkno | all_visible | all_frozen +-------+-------------+------------ + 0 | t | t + 1 | f | f + 2 | t | t +(3 rows) + +select * from pg_check_frozen('copyfreeze'); + t_ctid +-------- +(0 rows) + -- cleanup drop table test_partitioned; drop view test_view; @@ -140,3 +203,4 @@ drop server dummy_server; drop foreign data wrapper dummy; drop materialized view matview_visibility_test; drop table regular_table; +drop table copyfreeze; diff --git a/contrib/pg_visibility/sql/pg_visibility.sql b/contrib/pg_visibility/sql/pg_visibility.sql index c2a7f1d9e4..01a65fdab4 100644 --- a/contrib/pg_visibility/sql/pg_visibility.sql +++ b/contrib/pg_visibility/sql/pg_visibility.sql @@ -72,6 +72,82 @@ select count(*) > 0 from pg_visibility_map_summary('test_partition'); select * from pg_check_frozen('test_partition'); -- hopefully none select pg_truncate_visibility_map('test_partition'); +-- test copy freeze +create table copyfreeze (a int, b char(1500)); + +-- load all rows via COPY FREEZE and ensure that all pages are set all-visible +-- and all-frozen. +begin; +truncate copyfreeze; +copy copyfreeze from stdin freeze; +1 '1' +2 '2' +3 '3' +4 '4' +5 '5' +6 '6' +7 '7' +8 '8' +9 '9' +10 '10' +11 '11' +12 '12' +\. +commit; +select * from pg_visibility_map('copyfreeze'); +select * from pg_check_frozen('copyfreeze'); + +-- load half the rows via regular COPY and rest via COPY FREEZE. The pages +-- which are touched by regular COPY must not be set all-visible/all-frozen. On +-- the other hand, pages allocated by COPY FREEZE should be marked +-- all-frozen/all-visible. +begin; +truncate copyfreeze; +copy copyfreeze from stdin; +1 '1' +2 '2' +3 '3' +4 '4' +5 '5' +6 '6' +\. +copy copyfreeze from stdin freeze; +7 '7' +8 '8' +9 '9' +10 '10' +11 '11' +12 '12' +\. +commit; +select * from pg_visibility_map('copyfreeze'); +select * from pg_check_frozen('copyfreeze'); + +-- Try a mix of regular COPY and COPY FREEZE. +begin; +truncate copyfreeze; +copy copyfreeze from stdin freeze; +1 '1' +2 '2' +3 '3' +4 '4' +5 '5' +\. +copy copyfreeze from stdin; +6 '6' +\. +copy copyfreeze from stdin freeze; +7 '7' +8 '8' +9 '9' +10 '10' +11 '11' +12 '12' +\. +commit; +select * from pg_visibility_map('copyfreeze'); +select * from pg_check_frozen('copyfreeze'); + -- cleanup drop table test_partitioned; drop view test_view; @@ -81,3 +157,4 @@ drop server dummy_server; drop foreign data wrapper dummy; drop materialized view matview_visibility_test; drop table regular_table; +drop table copyfreeze; diff --git a/src/backend/access/heap/heapam.c b/src/backend/access/heap/heapam.c index 29694b8aa4..614958e5ee 100644 --- a/src/backend/access/heap/heapam.c +++ b/src/backend/access/heap/heapam.c @@ -2114,6 +2114,7 @@ heap_multi_insert(Relation relation, TupleTableSlot **slots, int ntuples, int ndone; PGAlignedBlock scratch; Page page; + Buffer vmbuffer = InvalidBuffer; bool needwal; Size saveFreeSpace; bool need_tuple_data = RelationIsLogicallyLogged(relation); @@ -2168,9 +2169,11 @@ heap_multi_insert(Relation relation, TupleTableSlot **slots, int ntuples, while (ndone < ntuples) { Buffer buffer; - Buffer vmbuffer = InvalidBuffer; + bool starting_with_empty_page; bool all_visible_cleared = false; + bool all_frozen_set = false; int nthispage; + XLogRecPtr recptr; CHECK_FOR_INTERRUPTS(); @@ -2183,6 +2186,11 @@ heap_multi_insert(Relation relation, TupleTableSlot **slots, int ntuples, &vmbuffer, NULL); page = BufferGetPage(buffer); + starting_with_empty_page = PageGetMaxOffsetNumber(page) == 0; + + if (starting_with_empty_page && (options & HEAP_INSERT_FROZEN)) + all_frozen_set = true; + /* NO EREPORT(ERROR) from here till changes are logged */ START_CRIT_SECTION(); @@ -2216,7 +2224,14 @@ heap_multi_insert(Relation relation, TupleTableSlot **slots, int ntuples, log_heap_new_cid(relation, heaptup); } - if (PageIsAllVisible(page)) + /* + * If the page is all visible, need to clear that, unless we're only + * going to add further frozen rows to it. + * + * If we're only adding already frozen rows, and the page was + * previously empty, mark it as all-visible. + */ + if (PageIsAllVisible(page) && !(options & HEAP_INSERT_FROZEN)) { all_visible_cleared = true; PageClearAllVisible(page); @@ -2224,6 +2239,8 @@ heap_multi_insert(Relation relation, TupleTableSlot **slots, int ntuples, BufferGetBlockNumber(buffer), vmbuffer, VISIBILITYMAP_VALID_BITS); } + else if (all_frozen_set) + PageSetAllVisible(page); /* * XXX Should we set PageSetPrunable on this page ? See heap_insert() @@ -2234,7 +2251,6 @@ heap_multi_insert(Relation relation, TupleTableSlot **slots, int ntuples, /* XLOG stuff */ if (needwal) { - XLogRecPtr recptr; xl_heap_multi_insert *xlrec; uint8 info = XLOG_HEAP2_MULTI_INSERT; char *tupledata; @@ -2247,8 +2263,7 @@ heap_multi_insert(Relation relation, TupleTableSlot **slots, int ntuples, * If the page was previously empty, we can reinit the page * instead of restoring the whole thing. */ - init = (ItemPointerGetOffsetNumber(&(heaptuples[ndone]->t_self)) == FirstOffsetNumber && - PageGetMaxOffsetNumber(page) == FirstOffsetNumber + nthispage - 1); + init = starting_with_empty_page; /* allocate xl_heap_multi_insert struct from the scratch area */ xlrec = (xl_heap_multi_insert *) scratchptr; @@ -2266,7 +2281,17 @@ heap_multi_insert(Relation relation, TupleTableSlot **slots, int ntuples, /* the rest of the scratch space is used for tuple data */ tupledata = scratchptr; - xlrec->flags = all_visible_cleared ? XLH_INSERT_ALL_VISIBLE_CLEARED : 0; + xlrec->flags = 0; + Assert((all_visible_cleared == 0 && all_frozen_set == 0) || + all_visible_cleared != all_frozen_set); + if (all_visible_cleared) + xlrec->flags |= XLH_INSERT_ALL_VISIBLE_CLEARED; + if (all_frozen_set) + { + xlrec->flags |= XLH_INSERT_ALL_VISIBLE_SET; + xlrec->flags |= XLH_INSERT_ALL_FROZEN_SET; + } + xlrec->ntuples = nthispage; /* @@ -2340,13 +2365,46 @@ heap_multi_insert(Relation relation, TupleTableSlot **slots, int ntuples, END_CRIT_SECTION(); - UnlockReleaseBuffer(buffer); - if (vmbuffer != InvalidBuffer) - ReleaseBuffer(vmbuffer); + if (all_frozen_set) + { + Assert(PageIsAllVisible(page)); + + /* + * Having to potentially read the page while holding an exclusive + * lock on the page isn't great. But we only get here if + * HEAP_INSERT_FROZEN is set, and we only do so if the table isn't + * readable outside of this session. Therefore doing IO here isn't + * that bad. + */ + visibilitymap_pin(relation, BufferGetBlockNumber(buffer), &vmbuffer); + + /* + * FIXME: setting recptr here is a dirty dirty hack, to prevent + * visibilitymap_set() from WAL logging. + * + * It's fine to use InvalidTransactionId here - this is only used + * when HEAP_INSERT_FROZEN is specified, which intentionally + * violates visibility rules. + */ + visibilitymap_set(relation, BufferGetBlockNumber(buffer), buffer, + recptr, vmbuffer, + InvalidTransactionId, + VISIBILITYMAP_ALL_VISIBLE | VISIBILITYMAP_ALL_FROZEN); + } + UnlockReleaseBuffer(buffer); ndone += nthispage; + + /* + * NB: Only release vmbuffer after inserting all tuples - it's fairly + * likely that we'll insert into subsequent heap pages that are likely + * to use the same vm page. + */ } + if (vmbuffer != InvalidBuffer) + ReleaseBuffer(vmbuffer); + /* * We're done with the actual inserts. Check for conflicts again, to * ensure that all rw-conflicts in to these inserts are detected. Without @@ -8224,6 +8282,7 @@ heap_xlog_multi_insert(XLogReaderState *record) BlockNumber blkno; Buffer buffer; Page page; + Page vmpage; union { HeapTupleHeaderData hdr; @@ -8248,13 +8307,54 @@ heap_xlog_multi_insert(XLogReaderState *record) * The visibility map may need to be fixed even if the heap page is * already up-to-date. */ - if (xlrec->flags & XLH_INSERT_ALL_VISIBLE_CLEARED) + if (xlrec->flags & (XLH_INSERT_ALL_VISIBLE_CLEARED | + XLH_INSERT_ALL_VISIBLE_SET | + XLH_INSERT_ALL_FROZEN_SET)) { Relation reln = CreateFakeRelcacheEntry(rnode); Buffer vmbuffer = InvalidBuffer; visibilitymap_pin(reln, blkno, &vmbuffer); - visibilitymap_clear(reln, blkno, vmbuffer, VISIBILITYMAP_VALID_BITS); + + if (xlrec->flags & (XLH_INSERT_ALL_VISIBLE_CLEARED)) + { + Assert(!(xlrec->flags & (XLH_INSERT_ALL_FROZEN_SET | + XLH_INSERT_ALL_VISIBLE_SET))); + visibilitymap_clear(reln, blkno, vmbuffer, VISIBILITYMAP_VALID_BITS); + } + else + { + int vmbits = 0; + + if (xlrec->flags & (XLH_INSERT_ALL_VISIBLE_SET)) + vmbits |= VISIBILITYMAP_ALL_VISIBLE; + if (xlrec->flags & (XLH_INSERT_ALL_FROZEN_SET)) + vmbits |= VISIBILITYMAP_ALL_FROZEN; + + vmpage = BufferGetPage(vmbuffer); + + /* + * Don't set the bit if replay has already passed this point. + * + * It might be safe to do this unconditionally; if replay has passed + * this point, we'll replay at least as far this time as we did + * before, and if this bit needs to be cleared, the record responsible + * for doing so should be again replayed, and clear it. For right + * now, out of an abundance of conservatism, we use the same test here + * we did for the heap page. If this results in a dropped bit, no + * real harm is done; and the next VACUUM will fix it. + * + * XXX: This seems entirely unnecessary? + * + * FIXME: Theoretically we should only do this after we've + * modified the heap - but it's safe to do it here I think, + * because this means that the page previously was empty. + */ + if (lsn > PageGetLSN(vmpage)) + visibilitymap_set(reln, blkno, InvalidBuffer, lsn, vmbuffer, + InvalidTransactionId, vmbits); + } + ReleaseBuffer(vmbuffer); FreeFakeRelcacheEntry(reln); } @@ -8332,6 +8432,8 @@ heap_xlog_multi_insert(XLogReaderState *record) if (xlrec->flags & XLH_INSERT_ALL_VISIBLE_CLEARED) PageClearAllVisible(page); + if (xlrec->flags & XLH_INSERT_ALL_VISIBLE_SET) + PageSetAllVisible(page); MarkBufferDirty(buffer); } diff --git a/src/backend/access/heap/visibilitymap.c b/src/backend/access/heap/visibilitymap.c index 0a51678c40..812d839612 100644 --- a/src/backend/access/heap/visibilitymap.c +++ b/src/backend/access/heap/visibilitymap.c @@ -254,7 +254,6 @@ visibilitymap_set(Relation rel, BlockNumber heapBlk, Buffer heapBuf, elog(DEBUG1, "vm_set %s %d", RelationGetRelationName(rel), heapBlk); #endif - Assert(InRecovery || XLogRecPtrIsInvalid(recptr)); Assert(InRecovery || BufferIsValid(heapBuf)); Assert(flags & VISIBILITYMAP_VALID_BITS); diff --git a/src/include/access/heapam_xlog.h b/src/include/access/heapam_xlog.h index 95d18cdb12..7426ad5e4f 100644 --- a/src/include/access/heapam_xlog.h +++ b/src/include/access/heapam_xlog.h @@ -67,6 +67,8 @@ #define XLH_INSERT_LAST_IN_MULTI (1<<1) #define XLH_INSERT_IS_SPECULATIVE (1<<2) #define XLH_INSERT_CONTAINS_NEW_TUPLE (1<<3) +#define XLH_INSERT_ALL_VISIBLE_SET (1<<4) +#define XLH_INSERT_ALL_FROZEN_SET (1<<5) /* * xl_heap_update flag values, 8 bits are available.