From 5403a7851823fd2686a3f29d12208414eb6dd1b0 Mon Sep 17 00:00:00 2001 From: Heikki Linnakangas Date: Tue, 20 Jan 2009 18:59:37 +0000 Subject: [PATCH] Add a new option to RestoreBkpBlocks() to indicate if a cleanup lock should be used instead of the normal exclusive lock, and make WAL redo functions responsible for calling RestoreBkpBlocks(). They know better what kind of a lock they need. At the moment, this just moves things around with no functional change, but makes the hot standby patch that's under review cleaner. --- src/backend/access/gin/ginxlog.c | 2 ++ src/backend/access/gist/gistxlog.c | 3 +- src/backend/access/heap/heapam.c | 5 +++ src/backend/access/nbtree/nbtxlog.c | 2 ++ src/backend/access/transam/clog.c | 3 ++ src/backend/access/transam/multixact.c | 3 ++ src/backend/access/transam/xact.c | 3 ++ src/backend/access/transam/xlog.c | 21 ++++++++--- src/backend/access/transam/xlogutils.c | 43 +++++++++++++++-------- src/backend/catalog/storage.c | 3 ++ src/backend/commands/dbcommands.c | 3 ++ src/backend/commands/sequence.c | 3 ++ src/backend/commands/tablespace.c | 3 ++ src/backend/storage/freespace/freespace.c | 2 ++ src/include/access/xlog.h | 2 ++ 15 files changed, 80 insertions(+), 21 deletions(-) diff --git a/src/backend/access/gin/ginxlog.c b/src/backend/access/gin/ginxlog.c index d2cff392e5..8382576d3c 100644 --- a/src/backend/access/gin/ginxlog.c +++ b/src/backend/access/gin/ginxlog.c @@ -438,6 +438,8 @@ gin_redo(XLogRecPtr lsn, XLogRecord *record) { uint8 info = record->xl_info & ~XLR_INFO_MASK; + RestoreBkpBlocks(lsn, record, false); + topCtx = MemoryContextSwitchTo(opCtx); switch (info) { diff --git a/src/backend/access/gist/gistxlog.c b/src/backend/access/gist/gistxlog.c index 116449e3c8..4a20d905d4 100644 --- a/src/backend/access/gist/gistxlog.c +++ b/src/backend/access/gist/gistxlog.c @@ -394,9 +394,10 @@ void gist_redo(XLogRecPtr lsn, XLogRecord *record) { uint8 info = record->xl_info & ~XLR_INFO_MASK; - MemoryContext oldCxt; + RestoreBkpBlocks(lsn, record, false); + oldCxt = MemoryContextSwitchTo(opCtx); switch (info) { diff --git a/src/backend/access/heap/heapam.c b/src/backend/access/heap/heapam.c index 4af17c8079..52115cf64e 100644 --- a/src/backend/access/heap/heapam.c +++ b/src/backend/access/heap/heapam.c @@ -4777,6 +4777,8 @@ heap_redo(XLogRecPtr lsn, XLogRecord *record) { uint8 info = record->xl_info & ~XLR_INFO_MASK; + RestoreBkpBlocks(lsn, record, false); + switch (info & XLOG_HEAP_OPMASK) { case XLOG_HEAP_INSERT: @@ -4816,12 +4818,15 @@ heap2_redo(XLogRecPtr lsn, XLogRecord *record) switch (info & XLOG_HEAP_OPMASK) { case XLOG_HEAP2_FREEZE: + RestoreBkpBlocks(lsn, record, false); heap_xlog_freeze(lsn, record); break; case XLOG_HEAP2_CLEAN: + RestoreBkpBlocks(lsn, record, true); heap_xlog_clean(lsn, record, false); break; case XLOG_HEAP2_CLEAN_MOVE: + RestoreBkpBlocks(lsn, record, true); heap_xlog_clean(lsn, record, true); break; default: diff --git a/src/backend/access/nbtree/nbtxlog.c b/src/backend/access/nbtree/nbtxlog.c index a874e491f5..517c4b90ce 100644 --- a/src/backend/access/nbtree/nbtxlog.c +++ b/src/backend/access/nbtree/nbtxlog.c @@ -714,6 +714,8 @@ btree_redo(XLogRecPtr lsn, XLogRecord *record) { uint8 info = record->xl_info & ~XLR_INFO_MASK; + RestoreBkpBlocks(lsn, record, false); + switch (info) { case XLOG_BTREE_INSERT_LEAF: diff --git a/src/backend/access/transam/clog.c b/src/backend/access/transam/clog.c index c80867c472..528a219db4 100644 --- a/src/backend/access/transam/clog.c +++ b/src/backend/access/transam/clog.c @@ -684,6 +684,9 @@ clog_redo(XLogRecPtr lsn, XLogRecord *record) { uint8 info = record->xl_info & ~XLR_INFO_MASK; + /* Backup blocks are not used in clog records */ + Assert(!(record->xl_info & XLR_BKP_BLOCK_MASK)); + if (info == CLOG_ZEROPAGE) { int pageno; diff --git a/src/backend/access/transam/multixact.c b/src/backend/access/transam/multixact.c index 78d47154c9..7314341101 100644 --- a/src/backend/access/transam/multixact.c +++ b/src/backend/access/transam/multixact.c @@ -1870,6 +1870,9 @@ multixact_redo(XLogRecPtr lsn, XLogRecord *record) { uint8 info = record->xl_info & ~XLR_INFO_MASK; + /* Backup blocks are not used in multixact records */ + Assert(!(record->xl_info & XLR_BKP_BLOCK_MASK)); + if (info == XLOG_MULTIXACT_ZERO_OFF_PAGE) { int pageno; diff --git a/src/backend/access/transam/xact.c b/src/backend/access/transam/xact.c index 23f6ea0a8b..c94e2a2251 100644 --- a/src/backend/access/transam/xact.c +++ b/src/backend/access/transam/xact.c @@ -4308,6 +4308,9 @@ xact_redo(XLogRecPtr lsn, XLogRecord *record) { uint8 info = record->xl_info & ~XLR_INFO_MASK; + /* Backup blocks are not used in xact records */ + Assert(!(record->xl_info & XLR_BKP_BLOCK_MASK)); + if (info == XLOG_XACT_COMMIT) { xl_xact_commit *xlrec = (xl_xact_commit *) XLogRecGetData(record); diff --git a/src/backend/access/transam/xlog.c b/src/backend/access/transam/xlog.c index 7debb34072..bd6035d4a6 100644 --- a/src/backend/access/transam/xlog.c +++ b/src/backend/access/transam/xlog.c @@ -2922,9 +2922,15 @@ CleanupBackupHistory(void) * page might not be. This will force us to replay all subsequent * modifications of the page that appear in XLOG, rather than possibly * ignoring them as already applied, but that's not a huge drawback. + * + * If 'cleanup' is true, a cleanup lock is used when restoring blocks. + * Otherwise, a normal exclusive lock is used. At the moment, that's just + * pro forma, because there can't be any regular backends in the system + * during recovery. The 'cleanup' argument applies to all backup blocks + * in the WAL record, that suffices for now. */ -static void -RestoreBkpBlocks(XLogRecord *record, XLogRecPtr lsn) +void +RestoreBkpBlocks(XLogRecPtr lsn, XLogRecord *record, bool cleanup) { Buffer buffer; Page page; @@ -2944,6 +2950,11 @@ RestoreBkpBlocks(XLogRecord *record, XLogRecPtr lsn) buffer = XLogReadBufferExtended(bkpb.node, bkpb.fork, bkpb.block, RBM_ZERO); Assert(BufferIsValid(buffer)); + if (cleanup) + LockBufferForCleanup(buffer); + else + LockBuffer(buffer, BUFFER_LOCK_EXCLUSIVE); + page = (Page) BufferGetPage(buffer); if (bkpb.hole_length == 0) @@ -5199,9 +5210,6 @@ StartupXLOG(void) TransactionIdAdvance(ShmemVariableCache->nextXid); } - if (record->xl_info & XLR_BKP_BLOCK_MASK) - RestoreBkpBlocks(record, EndRecPtr); - RmgrTable[record->xl_rmid].rm_redo(EndRecPtr, record); /* Pop the error context stack */ @@ -6233,6 +6241,9 @@ xlog_redo(XLogRecPtr lsn, XLogRecord *record) { uint8 info = record->xl_info & ~XLR_INFO_MASK; + /* Backup blocks are not used in xlog records */ + Assert(!(record->xl_info & XLR_BKP_BLOCK_MASK)); + if (info == XLOG_NEXTOID) { Oid nextOid; diff --git a/src/backend/access/transam/xlogutils.c b/src/backend/access/transam/xlogutils.c index ef5fe4cef7..4a86271019 100644 --- a/src/backend/access/transam/xlogutils.c +++ b/src/backend/access/transam/xlogutils.c @@ -217,8 +217,19 @@ XLogCheckInvalidPages(void) /* * XLogReadBuffer - * A shorthand of XLogReadBufferExtended(), for reading from the main - * fork. + * Read a page during XLOG replay. + * + * This is a shorthand of XLogReadBufferExtended() followed by + * LockBuffer(buffer, BUFFER_LOCK_EXCLUSIVE), for reading from the main + * fork. + * + * (Getting the lock is not really necessary, since we expect that this is + * only used during single-process XLOG replay, but some subroutines such + * as MarkBufferDirty will complain if we don't. And hopefully we'll get + * hot standby support in the future, where there will be backends running + * read-only queries during XLOG replay.) + * + * The returned buffer is exclusively-locked. * * For historical reasons, instead of a ReadBufferMode argument, this only * supports RBM_ZERO (init == true) and RBM_NORMAL (init == false) modes. @@ -226,22 +237,21 @@ XLogCheckInvalidPages(void) Buffer XLogReadBuffer(RelFileNode rnode, BlockNumber blkno, bool init) { - return XLogReadBufferExtended(rnode, MAIN_FORKNUM, blkno, - init ? RBM_ZERO : RBM_NORMAL); + Buffer buf; + buf = XLogReadBufferExtended(rnode, MAIN_FORKNUM, blkno, + init ? RBM_ZERO : RBM_NORMAL); + if (BufferIsValid(buf)) + LockBuffer(buf, BUFFER_LOCK_EXCLUSIVE); + + return buf; } /* * XLogReadBufferExtended * Read a page during XLOG replay * - * This is functionally comparable to ReadBuffer followed by - * LockBuffer(buffer, BUFFER_LOCK_EXCLUSIVE): you get back a pinned - * and locked buffer. (Getting the lock is not really necessary, since we - * expect that this is only used during single-process XLOG replay, but - * some subroutines such as MarkBufferDirty will complain if we don't.) - * - * There's some differences in the behavior wrt. the "mode" argument, - * compared to ReadBufferExtended: + * This is functionally comparable to ReadBufferExtended. There's some + * differences in the behavior wrt. the "mode" argument: * * In RBM_NORMAL mode, if the page doesn't exist, or contains all-zeroes, we * return InvalidBuffer. In this case the caller should silently skip the @@ -306,16 +316,19 @@ XLogReadBufferExtended(RelFileNode rnode, ForkNumber forknum, Assert(BufferGetBlockNumber(buffer) == blkno); } - LockBuffer(buffer, BUFFER_LOCK_EXCLUSIVE); - if (mode == RBM_NORMAL) { /* check that page has been initialized */ Page page = (Page) BufferGetPage(buffer); + /* + * We assume that PageIsNew is safe without a lock. During recovery, + * there should be no other backends that could modify the buffer at + * the same time. + */ if (PageIsNew(page)) { - UnlockReleaseBuffer(buffer); + ReleaseBuffer(buffer); log_invalid_page(rnode, forknum, blkno, true); return InvalidBuffer; } diff --git a/src/backend/catalog/storage.c b/src/backend/catalog/storage.c index e9008e3f2c..309fa469ad 100644 --- a/src/backend/catalog/storage.c +++ b/src/backend/catalog/storage.c @@ -401,6 +401,9 @@ smgr_redo(XLogRecPtr lsn, XLogRecord *record) { uint8 info = record->xl_info & ~XLR_INFO_MASK; + /* Backup blocks are not used in smgr records */ + Assert(!(record->xl_info & XLR_BKP_BLOCK_MASK)); + if (info == XLOG_SMGR_CREATE) { xl_smgr_create *xlrec = (xl_smgr_create *) XLogRecGetData(record); diff --git a/src/backend/commands/dbcommands.c b/src/backend/commands/dbcommands.c index 50cb6780d5..7e065762a8 100644 --- a/src/backend/commands/dbcommands.c +++ b/src/backend/commands/dbcommands.c @@ -1941,6 +1941,9 @@ dbase_redo(XLogRecPtr lsn, XLogRecord *record) { uint8 info = record->xl_info & ~XLR_INFO_MASK; + /* Backup blocks are not used in dbase records */ + Assert(!(record->xl_info & XLR_BKP_BLOCK_MASK)); + if (info == XLOG_DBASE_CREATE) { xl_dbase_create_rec *xlrec = (xl_dbase_create_rec *) XLogRecGetData(record); diff --git a/src/backend/commands/sequence.c b/src/backend/commands/sequence.c index c80f2a669a..46d7683377 100644 --- a/src/backend/commands/sequence.c +++ b/src/backend/commands/sequence.c @@ -1339,6 +1339,9 @@ seq_redo(XLogRecPtr lsn, XLogRecord *record) xl_seq_rec *xlrec = (xl_seq_rec *) XLogRecGetData(record); sequence_magic *sm; + /* Backup blocks are not used in seq records */ + Assert(!(record->xl_info & XLR_BKP_BLOCK_MASK)); + if (info != XLOG_SEQ_LOG) elog(PANIC, "seq_redo: unknown op code %u", info); diff --git a/src/backend/commands/tablespace.c b/src/backend/commands/tablespace.c index 7fa4bfba62..ee0c6428ef 100644 --- a/src/backend/commands/tablespace.c +++ b/src/backend/commands/tablespace.c @@ -1276,6 +1276,9 @@ tblspc_redo(XLogRecPtr lsn, XLogRecord *record) { uint8 info = record->xl_info & ~XLR_INFO_MASK; + /* Backup blocks are not used in tblspc records */ + Assert(!(record->xl_info & XLR_BKP_BLOCK_MASK)); + if (info == XLOG_TBLSPC_CREATE) { xl_tblspc_create_rec *xlrec = (xl_tblspc_create_rec *) XLogRecGetData(record); diff --git a/src/backend/storage/freespace/freespace.c b/src/backend/storage/freespace/freespace.c index 8ef11be652..7fcd51e638 100644 --- a/src/backend/storage/freespace/freespace.c +++ b/src/backend/storage/freespace/freespace.c @@ -212,6 +212,8 @@ XLogRecordPageWithFreeSpace(RelFileNode rnode, BlockNumber heapBlk, /* If the page doesn't exist already, extend */ buf = XLogReadBufferExtended(rnode, FSM_FORKNUM, blkno, RBM_ZERO_ON_ERROR); + LockBuffer(buf, BUFFER_LOCK_EXCLUSIVE); + page = BufferGetPage(buf); if (PageIsNew(page)) PageInit(page, BLCKSZ, 0); diff --git a/src/include/access/xlog.h b/src/include/access/xlog.h index 5b7374ee66..6913f7c800 100644 --- a/src/include/access/xlog.h +++ b/src/include/access/xlog.h @@ -194,6 +194,8 @@ extern bool XLogNeedsFlush(XLogRecPtr RecPtr); extern void XLogSetAsyncCommitLSN(XLogRecPtr record); +extern void RestoreBkpBlocks(XLogRecPtr lsn, XLogRecord *record, bool cleanup); + extern void xlog_redo(XLogRecPtr lsn, XLogRecord *record); extern void xlog_desc(StringInfo buf, uint8 xl_info, char *rec); -- 2.39.5