Assert redirect pointers are sensible after heap_page_prune().
authorAndres Freund <andres@anarazel.de>
Mon, 22 Nov 2021 17:52:45 +0000 (09:52 -0800)
committerAndres Freund <andres@anarazel.de>
Fri, 14 Jan 2022 02:14:05 +0000 (18:14 -0800)
Corruption of redirect item pointers often only becomes visible well after
being corrupted, as e.g. bug #17255 shows: In the original reproducer,
gigabyte of WAL were between the source of the corruption and the corruption
becoming visible.

To make it easier to find / prevent such bugs, verify whether redirect
pointers are sensible at the end of heap_page_prune_execute(). 5cd7eb1f1c32
introduced related assertions while modifying the page, but they can't easily
detect marking the target of an existing redirect as unused. Sometimes the
corruption will be detected later, but that's harder to diagnose.

Author: Andres Freund <andres@andres@anarazel.de>
Reviewed-By: Peter Geoghegan <pg@bowt.ie>
Discussion: https://postgr.es/m/20211122175914.ayk6gg6nvdwuhrzb@alap3.anarazel.de

src/backend/access/heap/pruneheap.c

index 95a0bbcf5ff4526201e155a96e7a6476f0d476ae..3201fcc52b097764c6e493c956dff7af0053e2a6 100644 (file)
@@ -88,6 +88,7 @@ static void heap_prune_record_redirect(PruneState *prstate,
                                       OffsetNumber offnum, OffsetNumber rdoffnum);
 static void heap_prune_record_dead(PruneState *prstate, OffsetNumber offnum);
 static void heap_prune_record_unused(PruneState *prstate, OffsetNumber offnum);
+static void page_verify_redirects(Page page);
 
 
 /*
@@ -959,6 +960,10 @@ heap_page_prune_execute(Buffer buffer,
         * indexes when an entire HOT chain becomes dead.  A heap-only tuple
         * can never become LP_DEAD; an LP_REDIRECT item or a regular heap
         * tuple can.
+        *
+        * This check may miss problems, e.g. the target of a redirect could
+        * be marked as unused subsequently. The page_verify_redirects() check
+        * below will catch such problems.
         */
        tolp = PageGetItemId(page, tooff);
        Assert(ItemIdHasStorage(tolp) && ItemIdIsNormal(tolp));
@@ -1028,6 +1033,58 @@ heap_page_prune_execute(Buffer buffer,
     * whether it has free pointers.
     */
    PageRepairFragmentation(page);
+
+   /*
+    * Now that the page has been modified, assert that redirect items still
+    * point to valid targets.
+    */
+   page_verify_redirects(page);
+}
+
+
+/*
+ * If built with assertions, verify that all LP_REDIRECT items point to a
+ * valid item.
+ *
+ * One way that bugs related to HOT pruning show is redirect items pointing to
+ * removed tuples. It's not trivial to reliably check that marking an item
+ * unused will not orphan a redirect item during heap_prune_chain() /
+ * heap_page_prune_execute(), so we additionally check the whole page after
+ * pruning. Without this check such bugs would typically only cause asserts
+ * later, potentially well after the corruption has been introduced.
+ *
+ * Also check comments in heap_page_prune_execute()'s redirection loop.
+ */
+static void
+page_verify_redirects(Page page)
+{
+#ifdef USE_ASSERT_CHECKING
+   OffsetNumber offnum;
+   OffsetNumber maxoff;
+
+   maxoff = PageGetMaxOffsetNumber(page);
+   for (offnum = FirstOffsetNumber;
+        offnum <= maxoff;
+        offnum = OffsetNumberNext(offnum))
+   {
+       ItemId      itemid = PageGetItemId(page, offnum);
+       OffsetNumber targoff;
+       ItemId      targitem;
+       HeapTupleHeader htup;
+
+       if (!ItemIdIsRedirected(itemid))
+           continue;
+
+       targoff = ItemIdGetRedirect(itemid);
+       targitem = PageGetItemId(page, targoff);
+
+       Assert(ItemIdIsUsed(targitem));
+       Assert(ItemIdIsNormal(targitem));
+       Assert(ItemIdHasStorage(targitem));
+       htup = (HeapTupleHeader) PageGetItem(page, targitem);
+       Assert(HeapTupleHeaderIsHeapOnly(htup));
+   }
+#endif
 }