Alvaro Herrera escribió: > As it turns out, I have a patched slru.c that adds a new function to > verify whether a page exists on disk. I created this for the commit > timestamp module, for the BDR branch, but I think it's what we need > here.
Here's a patch that should fix the problem. Jesse, if you're able to test it, please give it a run and let me know if it works for you. I was able to upgrade an installation containing a problem that should reproduce yours. -- Álvaro Herrera http://www.2ndQuadrant.com/ PostgreSQL Development, 24x7 Support, Training & Services
*** a/src/backend/access/transam/multixact.c --- b/src/backend/access/transam/multixact.c *************** *** 1719,1724 **** ZeroMultiXactMemberPage(int pageno, bool writeXlog) --- 1719,1756 ---- } /* + * After a binary upgrade from <= 9.2, the pg_multixact/offset SLRU area might + * contain files that are shorter than necessary; this would occur if the old + * installation had used multixacts beyond the first page (files cannot be + * copied, because the on-disk representation is different). pg_upgrade would + * update pg_control to set the next offset value to be at that position, so + * that tuples marked as locked by such MultiXacts would be seen as visible + * without having to consult multixact. However, trying to create a use a new + * MultiXactId would result in an error because the page on which the new value + * would reside does not exist. This routine is in charge of creating such + * pages. + */ + static void + MaybeExtendOffsetSlru(void) + { + int pageno; + + pageno = MultiXactIdToOffsetPage(MultiXactState->nextMXact); + + LWLockAcquire(MultiXactOffsetControlLock, LW_EXCLUSIVE); + + if (!SimpleLruDoesPhysicalPageExist(MultiXactOffsetCtl, pageno)) + { + int slotno; + + slotno = ZeroMultiXactOffsetPage(pageno, false); + SimpleLruWritePage(MultiXactOffsetCtl, slotno); + } + + LWLockRelease(MultiXactOffsetControlLock); + } + + /* * This must be called ONCE during postmaster or standalone-backend startup. * * StartupXLOG has already established nextMXact/nextOffset by calling *************** *** 1738,1743 **** StartupMultiXact(void) --- 1770,1782 ---- int entryno; int flagsoff; + /* + * During a binary upgrade, make sure that the offsets SLRU is large + * enough to contain the next value that would be created. + */ + if (IsBinaryUpgrade) + MaybeExtendOffsetSlru(); + /* Clean up offsets state */ LWLockAcquire(MultiXactOffsetControlLock, LW_EXCLUSIVE); *** a/src/backend/access/transam/slru.c --- b/src/backend/access/transam/slru.c *************** *** 563,568 **** SimpleLruWritePage(SlruCtl ctl, int slotno) --- 563,612 ---- SlruInternalWritePage(ctl, slotno, NULL); } + /* + * Return whether the given page exists on disk. + * + * A false return means that either the file does not exist, or that it's not + * large enough to contain the given page. + */ + bool + SimpleLruDoesPhysicalPageExist(SlruCtl ctl, int pageno) + { + int segno = pageno / SLRU_PAGES_PER_SEGMENT; + int rpageno = pageno % SLRU_PAGES_PER_SEGMENT; + int offset = rpageno * BLCKSZ; + char path[MAXPGPATH]; + int fd; + bool result; + off_t endpos; + + SlruFileName(ctl, path, segno); + + fd = OpenTransientFile(path, O_RDWR | PG_BINARY, S_IRUSR | S_IWUSR); + if (fd < 0) + { + /* expected: file doesn't exist */ + if (errno == ENOENT) + return false; + + /* report error normally */ + slru_errcause = SLRU_OPEN_FAILED; + slru_errno = errno; + SlruReportIOError(ctl, pageno, 0); + } + + if ((endpos = lseek(fd, 0, SEEK_END)) < 0) + { + slru_errcause = SLRU_OPEN_FAILED; + slru_errno = errno; + SlruReportIOError(ctl, pageno, 0); + } + + result = endpos >= (off_t) (offset + BLCKSZ); + + CloseTransientFile(fd); + return result; + } /* * Physical read of a (previously existing) page into a buffer slot *** a/src/include/access/slru.h --- b/src/include/access/slru.h *************** *** 145,150 **** extern int SimpleLruReadPage_ReadOnly(SlruCtl ctl, int pageno, --- 145,151 ---- extern void SimpleLruWritePage(SlruCtl ctl, int slotno); extern void SimpleLruFlush(SlruCtl ctl, bool checkpoint); extern void SimpleLruTruncate(SlruCtl ctl, int cutoffPage); + extern bool SimpleLruDoesPhysicalPageExist(SlruCtl ctl, int pageno); typedef bool (*SlruScanCallback) (SlruCtl ctl, char *filename, int segpage, void *data);
-- Sent via pgsql-bugs mailing list (pgsql-bugs@postgresql.org) To make changes to your subscription: http://www.postgresql.org/mailpref/pgsql-bugs