Add CheckBuffer() to check on-disk pages without shared buffer loading

author Michael Paquier <michael@paquier.xyz>

Wed, 28 Oct 2020 02:12:46 +0000 (11:12 +0900)

committer Michael Paquier <michael@paquier.xyz>

Wed, 28 Oct 2020 02:12:46 +0000 (11:12 +0900)
author Michael Paquier <michael@paquier.xyz>
Wed, 28 Oct 2020 02:12:46 +0000 (11:12 +0900)
committer Michael Paquier <michael@paquier.xyz>
Wed, 28 Oct 2020 02:12:46 +0000 (11:12 +0900)
diff --git a/src/backend/storage/buffer/bufmgr.c b/src/backend/storage/buffer/bufmgr.c

index 3eee86afe5c4748a3e3176cbf8e23d7a341b4b7e..2fa0b065a2871c54c6a59069407e52fc1c2ba86c 100644 (file)
--- a/src/backend/storage/buffer/bufmgr.c
+++ b/src/backend/storage/buffer/bufmgr.c
@@ -4585,3 +4585,95 @@ TestForOldSnapshot_impl(Snapshot snapshot, Relation relation)
                 (errcode(ERRCODE_SNAPSHOT_TOO_OLD),
                  errmsg("snapshot too old")));
  }
+
+
+/*
+ * CheckBuffer
+ *
+ * Check the state of a buffer without loading it into the shared buffers. To
+ * avoid torn pages and possible false positives when reading data, a shared
+ * LWLock is taken on the target buffer pool partition mapping, and we check
+ * if the page is in shared buffers or not.  An I/O lock is taken on the block
+ * to prevent any concurrent activity from happening.
+ *
+ * If the page is found as dirty in the shared buffers, it is ignored as
+ * it will be flushed to disk either before the end of the next checkpoint
+ * or during recovery in the event of an unsafe shutdown.
+ *
+ * If the page is found in the shared buffers but is not dirty, we still
+ * check the state of its data on disk, as it could be possible that the
+ * page stayed in shared buffers for a rather long time while the on-disk
+ * data got corrupted.
+ *
+ * If the page is not found in shared buffers, the block is read from disk
+ * while holding the buffer pool partition mapping LWLock.
+ *
+ * The page data is stored in a private memory area local to this function
+ * while running the checks.
+ */
+bool
+CheckBuffer(SMgrRelation smgr, ForkNumber forknum, BlockNumber blkno)
+{
+   char        buffer[BLCKSZ];
+   BufferTag   buf_tag;        /* identity of requested block */
+   uint32      buf_hash;       /* hash value for buf_tag */
+   LWLock     *partLock;       /* buffer partition lock for the buffer */
+   BufferDesc *bufdesc;
+   int         buf_id;
+
+   Assert(smgrexists(smgr, forknum));
+
+   /* create a tag so we can look after the buffer */
+   INIT_BUFFERTAG(buf_tag, smgr->smgr_rnode.node, forknum, blkno);
+
+   /* determine its hash code and partition lock ID */
+   buf_hash = BufTableHashCode(&buf_tag);
+   partLock = BufMappingPartitionLock(buf_hash);
+
+   /* see if the block is in the buffer pool or not */
+   LWLockAcquire(partLock, LW_SHARED);
+   buf_id = BufTableLookup(&buf_tag, buf_hash);
+   if (buf_id >= 0)
+   {
+       uint32      buf_state;
+
+       /*
+        * Found it.  Now, retrieve its state to know what to do with it, and
+        * release the pin immediately.  We do so to limit overhead as much as
+        * possible.  We keep the shared LWLock on the target buffer mapping
+        * partition for now, so this buffer cannot be evicted, and we acquire
+        * an I/O Lock on the buffer as we may need to read its contents from
+        * disk.
+        */
+       bufdesc = GetBufferDescriptor(buf_id);
+
+       LWLockAcquire(BufferDescriptorGetIOLock(bufdesc), LW_SHARED);
+       buf_state = LockBufHdr(bufdesc);
+       UnlockBufHdr(bufdesc, buf_state);
+
+       /* If the page is dirty or invalid, skip it */
+       if ((buf_state & BM_DIRTY) != 0 || (buf_state & BM_TAG_VALID) == 0)
+       {
+           LWLockRelease(BufferDescriptorGetIOLock(bufdesc));
+           LWLockRelease(partLock);
+           return true;
+       }
+
+       /* Read the buffer from disk, with the I/O lock still held */
+       smgrread(smgr, forknum, blkno, buffer);
+       LWLockRelease(BufferDescriptorGetIOLock(bufdesc));
+   }
+   else
+   {
+       /*
+        * Simply read the buffer.  There's no risk of modification on it as
+        * we are holding the buffer pool partition mapping lock.
+        */
+       smgrread(smgr, forknum, blkno, buffer);
+   }
+
+   /* buffer lookup done, so now do its check */
+   LWLockRelease(partLock);
+
+   return PageIsVerifiedExtended(buffer, blkno, PIV_REPORT_STAT);
+}
diff --git a/src/include/storage/bufmgr.h b/src/include/storage/bufmgr.h

index ee91b8fa26c178a35a7d220de8f32393f6753ce5..a21cab2eaf8ccf950f4b9c2029075a43c54c1439 100644 (file)
--- a/src/include/storage/bufmgr.h
+++ b/src/include/storage/bufmgr.h
@@ -240,6 +240,9 @@ extern void AtProcExit_LocalBuffers(void);
  
  extern void TestForOldSnapshot_impl(Snapshot snapshot, Relation relation);
  
+extern bool CheckBuffer(struct SMgrRelationData *smgr, ForkNumber forknum,
+                       BlockNumber blkno);
+
  /* in freelist.c */
  extern BufferAccessStrategy GetAccessStrategy(BufferAccessStrategyType btype);
  extern void FreeAccessStrategy(BufferAccessStrategy strategy);
author	Michael Paquier <michael@paquier.xyz>
	Wed, 28 Oct 2020 02:12:46 +0000 (11:12 +0900)
committer	Michael Paquier <michael@paquier.xyz>
	Wed, 28 Oct 2020 02:12:46 +0000 (11:12 +0900)
src/backend/storage/buffer/bufmgr.c		patch \| blob \| blame \| history
src/include/storage/bufmgr.h		patch \| blob \| blame \| history