Add hardening to catch invalid TIDs in indexes.
authorPeter Geoghegan <pg@bowt.ie>
Fri, 5 Nov 2021 02:54:05 +0000 (19:54 -0700)
committerPeter Geoghegan <pg@bowt.ie>
Fri, 5 Nov 2021 02:54:05 +0000 (19:54 -0700)
Add hardening to the heapam index tuple deletion path to catch TIDs in
index pages that point to a heap item that index tuples should never
point to.  The corruption we're trying to catch here is particularly
tricky to detect, since it typically involves "extra" (corrupt) index
tuples, as opposed to the absence of required index tuples in the index.

For example, a heap TID from an index page that turns out to point to an
LP_UNUSED item in the heap page has a good chance of being caught by one
of the new checks.  There is a decent chance that the recently fixed
parallel VACUUM bug (see commit 9bacec15) would have been caught had
that particular check been in place for Postgres 14.  No backpatch of
this extra hardening for now, though.

Author: Peter Geoghegan <pg@bowt.ie>
Reviewed-By: Andres Freund <andres@anarazel.de>
Discussion: https://postgr.es/m/CAH2-Wzk-4_raTzawWGaiqNvkpwDXxv3y1AQhQyUeHfkU=tFCeA@mail.gmail.com

src/backend/access/heap/heapam.c
src/backend/access/index/genam.c
src/backend/access/nbtree/nbtdedup.c
src/backend/access/nbtree/nbtinsert.c
src/include/access/tableam.h

index fecb9728683b272e00f63ec3ae772b25fadd7ace..ec234a5e595272a0b82941dc62553c7f53205bb1 100644 (file)
@@ -7252,6 +7252,63 @@ index_delete_prefetch_buffer(Relation rel,
 }
 #endif
 
+/*
+ * Helper function for heap_index_delete_tuples.  Checks for index corruption
+ * involving an invalid TID in index AM caller's index page.
+ *
+ * This is an ideal place for these checks.  The index AM must hold a buffer
+ * lock on the index page containing the TIDs we examine here, so we don't
+ * have to worry about concurrent VACUUMs at all.  We can be sure that the
+ * index is corrupt when htid points directly to an LP_UNUSED item or
+ * heap-only tuple, which is not the case during standard index scans.
+ */
+static inline void
+index_delete_check_htid(TM_IndexDeleteOp *delstate,
+                       Page page, OffsetNumber maxoff,
+                       ItemPointer htid, TM_IndexStatus *istatus)
+{
+   OffsetNumber indexpagehoffnum = ItemPointerGetOffsetNumber(htid);
+   ItemId      iid;
+
+   Assert(OffsetNumberIsValid(istatus->idxoffnum));
+
+   if (unlikely(indexpagehoffnum > maxoff))
+       ereport(ERROR,
+               (errcode(ERRCODE_INDEX_CORRUPTED),
+                errmsg_internal("heap tid from index tuple (%u,%u) points past end of heap page line pointer array at offset %u of block %u in index \"%s\"",
+                                ItemPointerGetBlockNumber(htid),
+                                indexpagehoffnum,
+                                istatus->idxoffnum, delstate->iblknum,
+                                RelationGetRelationName(delstate->irel))));
+
+   iid = PageGetItemId(page, indexpagehoffnum);
+   if (unlikely(!ItemIdIsUsed(iid)))
+       ereport(ERROR,
+               (errcode(ERRCODE_INDEX_CORRUPTED),
+                errmsg_internal("heap tid from index tuple (%u,%u) points to unused heap page item at offset %u of block %u in index \"%s\"",
+                                ItemPointerGetBlockNumber(htid),
+                                indexpagehoffnum,
+                                istatus->idxoffnum, delstate->iblknum,
+                                RelationGetRelationName(delstate->irel))));
+
+   if (ItemIdHasStorage(iid))
+   {
+       HeapTupleHeader htup;
+
+       Assert(ItemIdIsNormal(iid));
+       htup = (HeapTupleHeader) PageGetItem(page, iid);
+
+       if (unlikely(HeapTupleHeaderIsHeapOnly(htup)))
+           ereport(ERROR,
+                   (errcode(ERRCODE_INDEX_CORRUPTED),
+                    errmsg_internal("heap tid from index tuple (%u,%u) points to heap-only tuple at offset %u of block %u in index \"%s\"",
+                                    ItemPointerGetBlockNumber(htid),
+                                    indexpagehoffnum,
+                                    istatus->idxoffnum, delstate->iblknum,
+                                    RelationGetRelationName(delstate->irel))));
+   }
+}
+
 /*
  * heapam implementation of tableam's index_delete_tuples interface.
  *
@@ -7446,6 +7503,14 @@ heap_index_delete_tuples(Relation rel, TM_IndexDeleteOp *delstate)
            maxoff = PageGetMaxOffsetNumber(page);
        }
 
+       /*
+        * In passing, detect index corruption involving an index page with a
+        * TID that points to a location in the heap that couldn't possibly be
+        * correct.  We only do this with actual TIDs from caller's index page
+        * (not items reached by traversing through a HOT chain).
+        */
+       index_delete_check_htid(delstate, page, maxoff, htid, istatus);
+
        if (istatus->knowndeletable)
            Assert(!delstate->bottomup && !istatus->promising);
        else
index b93288a6fe61cdfd62ff26b4a280e3b883a56b22..64023eaea5dc8305344d3f241764fa857e153141 100644 (file)
@@ -303,6 +303,8 @@ index_compute_xid_horizon_for_tuples(Relation irel,
 
    Assert(nitems > 0);
 
+   delstate.irel = irel;
+   delstate.iblknum = BufferGetBlockNumber(ibuf);
    delstate.bottomup = false;
    delstate.bottomupfreespace = 0;
    delstate.ndeltids = 0;
@@ -312,16 +314,17 @@ index_compute_xid_horizon_for_tuples(Relation irel,
    /* identify what the index tuples about to be deleted point to */
    for (int i = 0; i < nitems; i++)
    {
+       OffsetNumber offnum = itemnos[i];
        ItemId      iitemid;
 
-       iitemid = PageGetItemId(ipage, itemnos[i]);
+       iitemid = PageGetItemId(ipage, offnum);
        itup = (IndexTuple) PageGetItem(ipage, iitemid);
 
        Assert(ItemIdIsDead(iitemid));
 
        ItemPointerCopy(&itup->t_tid, &delstate.deltids[i].tid);
        delstate.deltids[i].id = delstate.ndeltids;
-       delstate.status[i].idxoffnum = InvalidOffsetNumber; /* unused */
+       delstate.status[i].idxoffnum = offnum;
        delstate.status[i].knowndeletable = true;   /* LP_DEAD-marked */
        delstate.status[i].promising = false;   /* unused */
        delstate.status[i].freespace = 0;   /* unused */
index 6401fce57b91ef38fc4e09b175c831078f26d754..c88dc6eedbd659a46cb13a593001a161c3b4edd2 100644 (file)
@@ -348,6 +348,8 @@ _bt_bottomupdel_pass(Relation rel, Buffer buf, Relation heapRel,
     * concerning ourselves with avoiding work during the tableam call.  Our
     * role in costing the bottom-up deletion process is strictly advisory.
     */
+   delstate.irel = rel;
+   delstate.iblknum = BufferGetBlockNumber(buf);
    delstate.bottomup = true;
    delstate.bottomupfreespace = Max(BLCKSZ / 16, newitemsz);
    delstate.ndeltids = 0;
index ccddb037820c19348ddcaca60869625a990557e1..0fe8c709395039883f941387810a32af50e2d869 100644 (file)
@@ -2810,6 +2810,8 @@ _bt_simpledel_pass(Relation rel, Buffer buffer, Relation heapRel,
                                &ndeadblocks);
 
    /* Initialize tableam state that describes index deletion operation */
+   delstate.irel = rel;
+   delstate.iblknum = BufferGetBlockNumber(buffer);
    delstate.bottomup = false;
    delstate.bottomupfreespace = 0;
    delstate.ndeltids = 0;
index 4aff18215bab0e94bfdfcd53d0783b4e5871b789..808c144a91489a8d54c13a2a3bf99cb257db47d7 100644 (file)
@@ -220,6 +220,8 @@ typedef struct TM_IndexStatus
  */
 typedef struct TM_IndexDeleteOp
 {
+   Relation    irel;           /* Target index relation */
+   BlockNumber iblknum;        /* Index block number (for error reports) */
    bool        bottomup;       /* Bottom-up (not simple) deletion? */
    int         bottomupfreespace;  /* Bottom-up space target */