Teach verify_heapam() to validate update chains within a page.

author Robert Haas <rhaas@postgresql.org>

Wed, 22 Mar 2023 12:48:54 +0000 (08:48 -0400)

committer Robert Haas <rhaas@postgresql.org>

Wed, 22 Mar 2023 12:48:54 +0000 (08:48 -0400)
author Robert Haas <rhaas@postgresql.org>
Wed, 22 Mar 2023 12:48:54 +0000 (08:48 -0400)
committer Robert Haas <rhaas@postgresql.org>
Wed, 22 Mar 2023 12:48:54 +0000 (08:48 -0400)
diff --git a/contrib/amcheck/verify_heapam.c b/contrib/amcheck/verify_heapam.c

index 94ddccd23a809149cce0f266da91d3cfb38a3f1f..663fb65dee6fb372bb7deb50eda103cc9be85fa7 100644 (file)
--- a/contrib/amcheck/verify_heapam.c
+++ b/contrib/amcheck/verify_heapam.c
@@ -150,7 +150,9 @@ typedef struct HeapCheckContext
  } HeapCheckContext;
  
  /* Internal implementation */
-static void check_tuple(HeapCheckContext *ctx);
+static void check_tuple(HeapCheckContext *ctx,
+                       bool *xmin_commit_status_ok,
+                       XidCommitStatus *xmin_commit_status);
  static void check_toast_tuple(HeapTuple toasttup, HeapCheckContext *ctx,
                               ToastedAttribute *ta, int32 *expected_chunk_seq,
                               uint32 extsize);
@@ -160,7 +162,9 @@ static void check_toasted_attribute(HeapCheckContext *ctx,
                                     ToastedAttribute *ta);
  
  static bool check_tuple_header(HeapCheckContext *ctx);
-static bool check_tuple_visibility(HeapCheckContext *ctx);
+static bool check_tuple_visibility(HeapCheckContext *ctx,
+                                  bool *xmin_commit_status_ok,
+                                  XidCommitStatus *xmin_commit_status);
  
  static void report_corruption(HeapCheckContext *ctx, char *msg);
  static void report_toast_corruption(HeapCheckContext *ctx,
@@ -399,9 +403,16 @@ verify_heapam(PG_FUNCTION_ARGS)
     for (ctx.blkno = first_block; ctx.blkno <= last_block; ctx.blkno++)
     {
         OffsetNumber maxoff;
+       OffsetNumber predecessor[MaxOffsetNumber];
+       OffsetNumber successor[MaxOffsetNumber];
+       bool        lp_valid[MaxOffsetNumber];
+       bool        xmin_commit_status_ok[MaxOffsetNumber];
+       XidCommitStatus xmin_commit_status[MaxOffsetNumber];
  
         CHECK_FOR_INTERRUPTS();
  
+       memset(predecessor, 0, sizeof(OffsetNumber) * MaxOffsetNumber);
+
         /* Optionally skip over all-frozen or all-visible blocks */
         if (skip_option != SKIP_PAGES_NONE)
         {
@@ -433,6 +444,12 @@ verify_heapam(PG_FUNCTION_ARGS)
         for (ctx.offnum = FirstOffsetNumber; ctx.offnum <= maxoff;
              ctx.offnum = OffsetNumberNext(ctx.offnum))
         {
+           BlockNumber nextblkno;
+           OffsetNumber nextoffnum;
+
+           successor[ctx.offnum] = InvalidOffsetNumber;
+           lp_valid[ctx.offnum] = false;
+           xmin_commit_status_ok[ctx.offnum] = false;
             ctx.itemid = PageGetItemId(ctx.page, ctx.offnum);
  
             /* Skip over unused/dead line pointers */
@@ -469,6 +486,14 @@ verify_heapam(PG_FUNCTION_ARGS)
                     report_corruption(&ctx,
                                       psprintf("line pointer redirection to unused item at offset %u",
                                                (unsigned) rdoffnum));
+
+               /*
+                * Record the fact that this line pointer has passed basic
+                * sanity checking, and also the offset number to which it
+                * points.
+                */
+               lp_valid[ctx.offnum] = true;
+               successor[ctx.offnum] = rdoffnum;
                 continue;
             }
  
@@ -502,11 +527,237 @@ verify_heapam(PG_FUNCTION_ARGS)
             }
  
             /* It should be safe to examine the tuple's header, at least */
+           lp_valid[ctx.offnum] = true;
             ctx.tuphdr = (HeapTupleHeader) PageGetItem(ctx.page, ctx.itemid);
             ctx.natts = HeapTupleHeaderGetNatts(ctx.tuphdr);
  
             /* Ok, ready to check this next tuple */
-           check_tuple(&ctx);
+           check_tuple(&ctx,
+                       &xmin_commit_status_ok[ctx.offnum],
+                       &xmin_commit_status[ctx.offnum]);
+
+           /*
+            * If the CTID field of this tuple seems to point to another tuple
+            * on the same page, record that tuple as the successor of this
+            * one.
+            */
+           nextblkno = ItemPointerGetBlockNumber(&(ctx.tuphdr)->t_ctid);
+           nextoffnum = ItemPointerGetOffsetNumber(&(ctx.tuphdr)->t_ctid);
+           if (nextblkno == ctx.blkno && nextoffnum != ctx.offnum)
+               successor[ctx.offnum] = nextoffnum;
+       }
+
+       /*
+        * Update chain validation. Check each line pointer that's got a valid
+        * successor against that successor.
+        */
+       ctx.attnum = -1;
+       for (ctx.offnum = FirstOffsetNumber; ctx.offnum <= maxoff;
+            ctx.offnum = OffsetNumberNext(ctx.offnum))
+       {
+           ItemId      curr_lp;
+           ItemId      next_lp;
+           HeapTupleHeader curr_htup;
+           HeapTupleHeader next_htup;
+           TransactionId curr_xmin;
+           TransactionId curr_xmax;
+           TransactionId next_xmin;
+           OffsetNumber nextoffnum = successor[ctx.offnum];
+
+           /*
+            * The current line pointer may not have a successor, either
+            * because it's not valid or because it didn't point to anything.
+            * In either case, we have to give up.
+            *
+            * If the current line pointer does point to something, it's
+            * possible that the target line pointer isn't valid. We have to
+            * give up in that case, too.
+            */
+           if (nextoffnum == InvalidOffsetNumber || !lp_valid[nextoffnum])
+               continue;
+
+           /* We have two valid line pointers that we can examine. */
+           curr_lp = PageGetItemId(ctx.page, ctx.offnum);
+           next_lp = PageGetItemId(ctx.page, nextoffnum);
+
+           /* Handle the cases where the current line pointer is a redirect. */
+           if (ItemIdIsRedirected(curr_lp))
+           {
+               /* Can't redirect to another redirect. */
+               if (ItemIdIsRedirected(next_lp))
+               {
+                   report_corruption(&ctx,
+                                     psprintf("redirected line pointer points to another redirected line pointer at offset %u",
+                                              (unsigned) nextoffnum));
+                   continue;
+               }
+
+               /* Can only redirect to a HOT tuple. */
+               next_htup = (HeapTupleHeader) PageGetItem(ctx.page, next_lp);
+               if (!HeapTupleHeaderIsHeapOnly(next_htup))
+               {
+                   report_corruption(&ctx,
+                                     psprintf("redirected line pointer points to a non-heap-only tuple at offset %u",
+                                              (unsigned) nextoffnum));
+               }
+
+               /*
+                * Redirects are created by updates, so successor should be
+                * the result of an update.
+                */
+               if ((next_htup->t_infomask & HEAP_UPDATED) == 0)
+               {
+                   report_corruption(&ctx,
+                                     psprintf("redirected line pointer points to a non-heap-updated tuple at offset %u",
+                                              (unsigned) nextoffnum));
+               }
+
+               /* HOT chains should not intersect. */
+               if (predecessor[nextoffnum] != InvalidOffsetNumber)
+               {
+                   report_corruption(&ctx,
+                                     psprintf("redirect line pointer points to offset %u, but offset %u also points there",
+                                              (unsigned) nextoffnum, (unsigned) predecessor[nextoffnum]));
+                   continue;
+               }
+
+               /*
+                * This redirect and the tuple to which it points seem to be
+                * part of an update chain.
+                */
+               predecessor[nextoffnum] = ctx.offnum;
+               continue;
+           }
+
+           /*
+            * If the next line pointer is a redirect, or if it's a tuple
+            * but the XMAX of this tuple doesn't match the XMIN of the next
+            * tuple, then the two aren't part of the same update chain and
+            * there is nothing more to do.
+            */
+           if (ItemIdIsRedirected(next_lp))
+               continue;
+           curr_htup = (HeapTupleHeader) PageGetItem(ctx.page, curr_lp);
+           curr_xmax = HeapTupleHeaderGetUpdateXid(curr_htup);
+           next_htup = (HeapTupleHeader) PageGetItem(ctx.page, next_lp);
+           next_xmin = HeapTupleHeaderGetXmin(next_htup);
+           if (!TransactionIdIsValid(curr_xmax) ||
+               !TransactionIdEquals(curr_xmax, next_xmin))
+               continue;
+
+           /* HOT chains should not intersect. */
+           if (predecessor[nextoffnum] != InvalidOffsetNumber)
+           {
+               report_corruption(&ctx,
+                                 psprintf("tuple points to new version at offset %u, but offset %u also points there",
+                                          (unsigned) nextoffnum, (unsigned) predecessor[nextoffnum]));
+               continue;
+           }
+
+           /*
+            * This tuple and the tuple to which it points seem to be part
+            * of an update chain.
+            */
+           predecessor[nextoffnum] = ctx.offnum;
+
+           /*
+            * If the current tuple is marked as HOT-updated, then the next
+            * tuple should be marked as a heap-only tuple. Conversely, if the
+            * current tuple isn't marked as HOT-updated, then the next tuple
+            * shouldn't be marked as a heap-only tuple.
+            */
+           if (!HeapTupleHeaderIsHotUpdated(curr_htup) &&
+               HeapTupleHeaderIsHeapOnly(next_htup))
+           {
+               report_corruption(&ctx,
+                                 psprintf("non-heap-only update produced a heap-only tuple at offset %u",
+                                          (unsigned) nextoffnum));
+           }
+           if (HeapTupleHeaderIsHotUpdated(curr_htup) &&
+               !HeapTupleHeaderIsHeapOnly(next_htup))
+           {
+               report_corruption(&ctx,
+                                 psprintf("heap-only update produced a non-heap only tuple at offset %u",
+                                          (unsigned) nextoffnum));
+           }
+
+           /*
+            * If the current tuple's xmin is still in progress but the
+            * successor tuple's xmin is committed, that's corruption.
+            *
+            * NB: We recheck the commit status of the current tuple's xmin
+            * here, because it might have committed after we checked it and
+            * before we checked the commit status of the successor tuple's
+            * xmin. This should be safe because the xmin itself can't have
+            * changed, only its commit status.
+            */
+           curr_xmin = HeapTupleHeaderGetXmin(curr_htup);
+           if (xmin_commit_status_ok[ctx.offnum] &&
+               xmin_commit_status[ctx.offnum] == XID_IN_PROGRESS &&
+               xmin_commit_status_ok[nextoffnum] &&
+               xmin_commit_status[nextoffnum] == XID_COMMITTED &&
+               TransactionIdIsInProgress(curr_xmin))
+           {
+               report_corruption(&ctx,
+                                 psprintf("tuple with in-progress xmin %u was updated to produce a tuple at offset %u with committed xmin %u",
+                                          (unsigned) curr_xmin,
+                                          (unsigned) ctx.offnum,
+                                          (unsigned) next_xmin));
+           }
+
+           /*
+            * If the current tuple's xmin is aborted but the successor tuple's
+            * xmin is in-progress or committed, that's corruption.
+            */
+           if (xmin_commit_status_ok[ctx.offnum] &&
+               xmin_commit_status[ctx.offnum] == XID_ABORTED &&
+               xmin_commit_status_ok[nextoffnum])
+           {
+               if (xmin_commit_status[nextoffnum] == XID_IN_PROGRESS)
+                   report_corruption(&ctx,
+                                     psprintf("tuple with aborted xmin %u was updated to produce a tuple at offset %u with in-progress xmin %u",
+                                              (unsigned) curr_xmin,
+                                              (unsigned) ctx.offnum,
+                                              (unsigned) next_xmin));
+               else if (xmin_commit_status[nextoffnum] == XID_COMMITTED)
+                   report_corruption(&ctx,
+                                     psprintf("tuple with aborted xmin %u was updated to produce a tuple at offset %u with committed xmin %u",
+                                              (unsigned) curr_xmin,
+                                              (unsigned) ctx.offnum,
+                                              (unsigned) next_xmin));
+           }
+       }
+
+       /*
+        * An update chain can start either with a non-heap-only tuple or with
+        * a redirect line pointer, but not with a heap-only tuple.
+        *
+        * (This check is in a separate loop because we need the predecessor
+        * array to be fully populated before we can perform it.)
+        */
+       for (ctx.offnum = FirstOffsetNumber;
+            ctx.offnum <= maxoff;
+            ctx.offnum = OffsetNumberNext(ctx.offnum))
+       {
+           if (xmin_commit_status_ok[ctx.offnum] &&
+               (xmin_commit_status[ctx.offnum] == XID_COMMITTED ||
+                xmin_commit_status[ctx.offnum] == XID_IN_PROGRESS) &&
+               predecessor[ctx.offnum] == InvalidOffsetNumber)
+           {
+               ItemId      curr_lp;
+
+               curr_lp = PageGetItemId(ctx.page, ctx.offnum);
+               if (!ItemIdIsRedirected(curr_lp))
+               {
+                   HeapTupleHeader curr_htup;
+
+                   curr_htup = (HeapTupleHeader)
+                       PageGetItem(ctx.page, curr_lp);
+                   if (HeapTupleHeaderIsHeapOnly(curr_htup))
+                       report_corruption(&ctx,
+                                         psprintf("tuple is root of chain but is marked as heap-only tuple"));
+               }
+           }
         }
  
         /* clean up */
@@ -638,6 +889,7 @@ check_tuple_header(HeapCheckContext *ctx)
  {
     HeapTupleHeader tuphdr = ctx->tuphdr;
     uint16      infomask = tuphdr->t_infomask;
+   TransactionId curr_xmax = HeapTupleHeaderGetUpdateXid(tuphdr);
     bool        result = true;
     unsigned    expected_hoff;
  
@@ -663,6 +915,19 @@ check_tuple_header(HeapCheckContext *ctx)
          */
     }
  
+   if (!TransactionIdIsValid(curr_xmax) &&
+       HeapTupleHeaderIsHotUpdated(tuphdr))
+   {
+       report_corruption(ctx,
+                         psprintf("tuple has been HOT updated, but xmax is 0"));
+
+       /*
+        * As above, even though this shouldn't happen, it's not sufficient
+        * justification for skipping further checks, we should still be able
+        * to perform sensibly.
+        */
+   }
+
     if (infomask & HEAP_HASNULL)
         expected_hoff = MAXALIGN(SizeofHeapTupleHeader + BITMAPLEN(ctx->natts));
     else
@@ -718,9 +983,14 @@ check_tuple_header(HeapCheckContext *ctx)
   * Returns true if the tuple itself should be checked, false otherwise.  Sets
   * ctx->tuple_could_be_pruned if the tuple -- and thus also any associated
   * TOAST tuples -- are eligible for pruning.
+ *
+ * Sets *xmin_commit_status_ok to true if the commit status of xmin is known
+ * and false otherwise. If it's set to true, then also set *xid_commit_status
+ * to the actual commit status.
   */
  static bool
-check_tuple_visibility(HeapCheckContext *ctx)
+check_tuple_visibility(HeapCheckContext *ctx, bool *xmin_commit_status_ok,
+                      XidCommitStatus *xmin_commit_status)
  {
     TransactionId xmin;
     TransactionId xvac;
@@ -731,13 +1001,17 @@ check_tuple_visibility(HeapCheckContext *ctx)
     HeapTupleHeader tuphdr = ctx->tuphdr;
  
     ctx->tuple_could_be_pruned = true;  /* have not yet proven otherwise */
+   *xmin_commit_status_ok = false;     /* have not yet proven otherwise */
  
     /* If xmin is normal, it should be within valid range */
     xmin = HeapTupleHeaderGetXmin(tuphdr);
     switch (get_xid_status(xmin, ctx, &xmin_status))
     {
         case XID_INVALID:
+           break;
         case XID_BOUNDS_OK:
+           *xmin_commit_status_ok = true;
+           *xmin_commit_status = xmin_status;
             break;
         case XID_IN_FUTURE:
             report_corruption(ctx,
@@ -1515,9 +1789,13 @@ check_toasted_attribute(HeapCheckContext *ctx, ToastedAttribute *ta)
  /*
   * Check the current tuple as tracked in ctx, recording any corruption found in
   * ctx->tupstore.
+ *
+ * We return some information about the status of xmin to aid in validating
+ * update chains.
   */
  static void
-check_tuple(HeapCheckContext *ctx)
+check_tuple(HeapCheckContext *ctx, bool *xmin_commit_status_ok,
+           XidCommitStatus *xmin_commit_status)
  {
     /*
      * Check various forms of tuple header corruption, and if the header is
@@ -1531,7 +1809,8 @@ check_tuple(HeapCheckContext *ctx)
      * cannot assume our relation description matches the tuple structure, and
      * therefore cannot check it.
      */
-   if (!check_tuple_visibility(ctx))
+   if (!check_tuple_visibility(ctx, xmin_commit_status_ok,
+                           xmin_commit_status))
         return;
  
     /*
diff --git a/src/bin/pg_amcheck/t/004_verify_heapam.pl b/src/bin/pg_amcheck/t/004_verify_heapam.pl

index e5ae7e6aadaa986a465ebba59f1608411a2615ef..92898c2a63d19b125d4c4bc745942b356e787420 100644 (file)
--- a/src/bin/pg_amcheck/t/004_verify_heapam.pl
+++ b/src/bin/pg_amcheck/t/004_verify_heapam.pl
@@ -174,12 +174,16 @@ sub write_tuple
  # Set umask so test directories and files are created with default permissions
  umask(0077);
  
+my $pred_xmax;
+my $pred_posid;
+my $aborted_xid;
  # Set up the node.  Once we create and corrupt the table,
  # autovacuum workers visiting the table could crash the backend.
  # Disable autovacuum so that won't happen.
  my $node = PostgreSQL::Test::Cluster->new('test');
  $node->init;
  $node->append_conf('postgresql.conf', 'autovacuum=off');
+$node->append_conf('postgresql.conf', 'max_prepared_transactions=10');
  
  # Start the node and load the extensions.  We depend on both
  # amcheck and pageinspect for this test.
@@ -216,8 +220,15 @@ my $rel = $node->safe_psql('postgres',
     qq(SELECT pg_relation_filepath('public.test')));
  my $relpath = "$pgdata/$rel";
  
-# Insert data and freeze public.test
-my $ROWCOUNT = 17;
+# Initial setup for the public.test table.
+# $ROWCOUNT is the total number of rows that we expect to insert into the page.
+# $ROWCOUNT_BASIC is the number of those rows that are related to basic
+# tuple validation, rather than update chain validation.
+my $ROWCOUNT = 44;
+my $ROWCOUNT_BASIC = 16;
+
+# First insert data needed for tests unrelated to update chain validation.
+# Then freeze the page. These tuples are at offset numbers 1 to 16.
  $node->safe_psql(
     'postgres', qq(
     INSERT INTO public.test (a, b, c)
@@ -225,10 +236,86 @@ $node->safe_psql(
             x'DEADF9F9DEADF9F9'::bigint,
             'abcdefg',
             repeat('w', 10000)
-        FROM generate_series(1, $ROWCOUNT);
+   FROM generate_series(1, $ROWCOUNT_BASIC);
     VACUUM FREEZE public.test;)
  );
  
+# Create some simple HOT update chains for line pointer validation. After
+# the page is HOT pruned, we'll have two redirects line pointers each pointing
+# to a tuple. We'll then change the second redirect to point to the same
+# tuple as the first one and verify that we can detect corruption.
+$node->safe_psql(
+   'postgres', qq(
+       INSERT INTO public.test (a, b, c)
+           VALUES ( x'DEADF9F9DEADF9F9'::bigint, 'abcdefg',
+                    generate_series(1,2)); -- offset numbers 17 and 18
+       UPDATE public.test SET c = 'a' WHERE c = '1'; -- offset number 19
+       UPDATE public.test SET c = 'a' WHERE c = '2'; -- offset number 20
+   ));
+
+# Create some more HOT update chains.
+$node->safe_psql(
+   'postgres', qq(
+       INSERT INTO public.test (a, b, c)
+           VALUES ( x'DEADF9F9DEADF9F9'::bigint, 'abcdefg',
+                    generate_series(3,6)); -- offset numbers 21 through 24
+       UPDATE public.test SET c = 'a' WHERE c = '3'; -- offset number 25
+       UPDATE public.test SET c = 'a' WHERE c = '4'; -- offset number 26
+   ));
+
+# Negative test case of HOT-pruning with aborted tuple.
+$node->safe_psql(
+   'postgres', qq(
+       BEGIN;
+       UPDATE public.test SET c = 'a' WHERE c = '5'; -- offset number 27
+       ABORT;
+       VACUUM FREEZE public.test;
+   ));
+
+# Next update on any tuple will be stored at the same place of tuple inserted
+# by aborted transaction. This should not cause the table to appear corrupt.
+$node->safe_psql(
+   'postgres', qq(
+       UPDATE public.test SET c = 'a' WHERE c = '6'; -- offset number 27 again
+       VACUUM FREEZE public.test;
+   ));
+
+# Data for HOT chain validation, so not calling VACUUM FREEZE.
+$node->safe_psql(
+   'postgres', qq(
+       INSERT INTO public.test (a, b, c)
+           VALUES ( x'DEADF9F9DEADF9F9'::bigint, 'abcdefg',
+                    generate_series(7,15)); -- offset numbers 28 to 36
+       UPDATE public.test SET c = 'a' WHERE c = '7'; -- offset number 37
+       UPDATE public.test SET c = 'a' WHERE c = '10'; -- offset number 38
+       UPDATE public.test SET c = 'a' WHERE c = '11'; -- offset number 39
+       UPDATE public.test SET c = 'a' WHERE c = '12'; -- offset number 40
+       UPDATE public.test SET c = 'a' WHERE c = '13'; -- offset number 41
+       UPDATE public.test SET c = 'a' WHERE c = '14'; -- offset number 42
+       UPDATE public.test SET c = 'a' WHERE c = '15'; -- offset number 43
+   ));
+
+# Need one aborted transaction to test corruption in HOT chains.
+$node->safe_psql(
+   'postgres', qq(
+       BEGIN;
+       UPDATE public.test SET c = 'a' WHERE c = '9'; -- offset number 44
+       ABORT;
+   ));
+
+# Need one in-progress transaction to test few corruption in HOT chains.
+# We are creating PREPARE TRANSACTION here as these will not be aborted
+# even if we stop the node.
+$node->safe_psql(
+   'postgres', qq(
+       BEGIN;
+       PREPARE TRANSACTION 'in_progress_tx';
+   ));
+my $in_progress_xid = $node->safe_psql(
+   'postgres', qq(
+       SELECT transaction FROM pg_prepared_xacts;
+   ));
+
  my $relfrozenxid = $node->safe_psql('postgres',
     q(select relfrozenxid from pg_class where relname = 'test'));
  my $datfrozenxid = $node->safe_psql('postgres',
@@ -245,11 +332,12 @@ if ($datfrozenxid <= 3 || $datfrozenxid >= $relfrozenxid)
     exit;
  }
  
-# Find where each of the tuples is located on the page.
+# Find where each of the tuples is located on the page. If a particular
+# line pointer is a redirect rather than a tuple, we record the offset as -1.
  my @lp_off = split '\n', $node->safe_psql(
     'postgres', qq(
-       select lp_off from heap_page_items(get_raw_page('test', 'main', 0))
-       where lp <= $ROWCOUNT
+       SELECT CASE WHEN lp_flags = 2 THEN -1 ELSE lp_off END
+       FROM heap_page_items(get_raw_page('test', 'main', 0))
      )
  );
  is(scalar @lp_off, $ROWCOUNT, "acquired row offsets");
@@ -268,6 +356,7 @@ for (my $tupidx = 0; $tupidx < $ROWCOUNT; $tupidx++)
  {
     my $offnum = $tupidx + 1;        # offnum is 1-based, not zero-based
     my $offset = $lp_off[$tupidx];
+   next if $offset == -1;           # ignore redirect line pointers
     my $tup = read_tuple($file, $offset);
  
     # Sanity-check that the data appears on the page where we expect.
@@ -280,7 +369,7 @@ for (my $tupidx = 0; $tupidx < $ROWCOUNT; $tupidx++)
         $node->clean_node;
         plan skip_all =>
           sprintf(
-           "Page layout differs from our expectations: expected (%x, %x, \"%s\"), got (%x, %x, \"%s\")",
+           "Page layout of index %d differs from our expectations: expected (%x, %x, \"%s\"), got (%x, %x, \"%s\")", $tupidx,
             0xDEADF9F9, 0xDEADF9F9, "abcdefg", $a_1, $a_2, $b);
         exit;
     }
@@ -315,6 +404,9 @@ use constant HEAP_XMAX_INVALID   => 0x0800;
  use constant HEAP_NATTS_MASK     => 0x07FF;
  use constant HEAP_XMAX_IS_MULTI  => 0x1000;
  use constant HEAP_KEYS_UPDATED   => 0x2000;
+use constant HEAP_HOT_UPDATED    => 0x4000;
+use constant HEAP_ONLY_TUPLE     => 0x8000;
+use constant HEAP_UPDATED        => 0x2000;
  
  # Helper function to generate a regular expression matching the header we
  # expect verify_heapam() to return given which fields we expect to be non-null.
@@ -346,9 +438,11 @@ for (my $tupidx = 0; $tupidx < $ROWCOUNT; $tupidx++)
  {
     my $offnum = $tupidx + 1;        # offnum is 1-based, not zero-based
     my $offset = $lp_off[$tupidx];
-   my $tup = read_tuple($file, $offset);
-
     my $header = header(0, $offnum, undef);
+
+   # Read tuple, if there is one.
+   my $tup = $offset == -1 ? undef : read_tuple($file, $offset);
+
     if ($offnum == 1)
     {
         # Corruptly set xmin < relfrozenxid
@@ -361,7 +455,7 @@ for (my $tupidx = 0; $tupidx < $ROWCOUNT; $tupidx++)
         push @expected,
           qr/${header}xmin $xmin precedes relation freeze threshold 0:\d+/;
     }
-   if ($offnum == 2)
+   elsif ($offnum == 2)
     {
         # Corruptly set xmin < datfrozenxid
         my $xmin = 3;
@@ -521,7 +615,137 @@ for (my $tupidx = 0; $tupidx < $ROWCOUNT; $tupidx++)
         push @expected,
           qr/${$header}xmin ${xmin} equals or exceeds next valid transaction ID 0:\d+/;
     }
-   write_tuple($file, $offset, $tup);
+   elsif ($offnum == 17)
+   {
+       # at offnum 19 we will unset HEAP_ONLY_TUPLE and HEAP_UPDATED flags.
+       die "offnum $offnum should be a redirect" if defined $tup;
+       push @expected,
+           qr/${header}redirected line pointer points to a non-heap-only tuple at offset \d+/;
+       push @expected,
+           qr/${header}redirected line pointer points to a non-heap-updated tuple at offset \d+/;
+   }
+   elsif ($offnum == 18)
+   {
+       # rewrite line pointer with lp_off = 17, lp_flags = 2, lp_len = 0.
+       die "offnum $offnum should be a redirect" if defined $tup;
+       sysseek($file, 92, 0) or BAIL_OUT("sysseek failed: $!");
+       syswrite($file,
+                pack("L", $ENDIANNESS eq 'little' ? 0x00010011 : 0x11000100))
+           or BAIL_OUT("syswrite failed: $!");
+       push @expected,
+         qr/${header}redirected line pointer points to another redirected line pointer at offset \d+/;
+   }
+   elsif ($offnum == 19)
+   {
+       # unset HEAP_ONLY_TUPLE and HEAP_UPDATED flag, so that update chain
+       # validation will complain about offset 17
+       $tup->{t_infomask2} &= ~HEAP_ONLY_TUPLE;
+       $tup->{t_infomask} &= ~HEAP_UPDATED;
+   }
+   elsif ($offnum == 22)
+   {
+       # rewrite line pointer with lp.off = 25, lp_flags = 2, lp_len = 0
+       sysseek($file, 108, 0) or BAIL_OUT("sysseek failed: $!");
+       syswrite($file,
+                pack("L", $ENDIANNESS eq 'little' ? 0x00010019 : 0x19000100))
+           or BAIL_OUT("syswrite failed: $!");
+       push @expected,
+         qr/${header}redirect line pointer points to offset \d+, but offset \d+ also points there/;
+   }
+   elsif ($offnum == 28)
+   {
+       $tup->{t_infomask2} &= ~HEAP_HOT_UPDATED;
+       push @expected,
+         qr/${header}non-heap-only update produced a heap-only tuple at offset \d+/;
+
+       # Save these values so we can insert them into the tuple at offnum 29.
+       $pred_xmax = $tup->{t_xmax};
+       $pred_posid = $tup->{ip_posid};
+   }
+   elsif ($offnum == 29)
+   {
+       # Copy these values from the tuple at offset 28.
+       $tup->{t_xmax} = $pred_xmax;
+       $tup->{ip_posid} = $pred_posid;
+       push @expected,
+         qr/${header}tuple points to new version at offset \d+, but offset \d+ also points there/;
+   }
+   elsif ($offnum == 30)
+   {
+       # Save xid, so we can insert into into tuple at offset 31.
+       $aborted_xid = $tup->{t_xmax};
+   }
+   elsif ($offnum == 31)
+   {
+       # Set xmin to xmax of tuple at offset 30.
+       $tup->{t_xmin} = $aborted_xid;
+       $tup->{t_infomask} &= ~HEAP_XMIN_COMMITTED;
+       push @expected,
+         qr/${header}tuple with aborted xmin \d+ was updated to produce a tuple at offset \d+ with committed xmin \d+/;
+   }
+   elsif ($offnum == 32)
+   {
+       $tup->{t_infomask2} |= HEAP_ONLY_TUPLE;
+       push @expected,
+         qr/${header}tuple is root of chain but is marked as heap-only tuple/;
+   }
+   elsif ($offnum == 33)
+   {
+       # Tuple at offset 40 is the successor of this one; we'll corrupt it to
+       # be non-heap-only.
+       push @expected,
+         qr/${header}heap-only update produced a non-heap only tuple at offset \d+/;
+   }
+   elsif ($offnum == 34)
+   {
+       $tup->{t_xmax} = 0;
+       push @expected,
+         qr/${header}tuple has been HOT updated, but xmax is 0/;
+   }
+   elsif ($offnum == 35)
+   {
+       $tup->{t_xmin} = $in_progress_xid;
+       $tup->{t_infomask} &= ~HEAP_XMIN_COMMITTED;
+       push @expected,
+         qr/${header}tuple with in-progress xmin \d+ was updated to produce a tuple at offset \d+ with committed xmin \d+/;
+   }
+   elsif ($offnum == 36)
+   {
+       # Tuple at offset 43 is the successor of this one; we'll corrupt it to
+       # have xmin = $in_progress_xid. By setting the xmax of this tuple to
+       # the same value, we make it look like an update chain with an
+       # in-progress XID following a committed one.
+       $tup->{t_xmin} = $aborted_xid;
+       $tup->{t_xmax} = $in_progress_xid;
+       $tup->{t_infomask} &= ~HEAP_XMIN_COMMITTED;
+       push @expected,
+         qr/${header}tuple with aborted xmin \d+ was updated to produce a tuple at offset \d+ with in-progress xmin \d+/;
+   }
+   elsif ($offnum == 40)
+   {
+       # Tuple at offset 33 is the predecessor of this one; the error will
+       # be reported there.
+       $tup->{t_infomask2} &= ~HEAP_ONLY_TUPLE;
+   }
+   elsif ($offnum == 43)
+   {
+       # Tuple at offset 36 is the predecessor of this one; the error will
+       # be reported there.
+       $tup->{t_xmin} = $in_progress_xid;
+       $tup->{t_infomask} &= ~HEAP_XMIN_COMMITTED;
+   }
+   else
+   {
+       # The tests for update chain validation end up creating a bunch of
+       # tuples that aren't corrupted in any way e.g. because only one of
+       # the two tuples in the update chain needs to be corrupted for the
+       # test, or because one update chain is being made to erroneously
+       # point into the middle of another that has nothing wrong with it.
+       # In all such cases we need not write the tuple back to the file.
+       next;
+   }
+
+   write_tuple($file, $offset, $tup) if defined $tup;
  }
  close($file)
    or BAIL_OUT("close failed: $!");
@@ -532,6 +756,10 @@ $node->start;
  $node->command_checks_all(
     [ 'pg_amcheck', '--no-dependent-indexes', '-p', $port, 'postgres' ],
     2, [@expected], [], 'Expected corruption message output');
+$node->safe_psql(
+        'postgres', qq(
+                        COMMIT PREPARED 'in_progress_tx';
+        ));
  
  $node->teardown_node;
  $node->clean_node;
author	Robert Haas <rhaas@postgresql.org>
	Wed, 22 Mar 2023 12:48:54 +0000 (08:48 -0400)
committer	Robert Haas <rhaas@postgresql.org>
	Wed, 22 Mar 2023 12:48:54 +0000 (08:48 -0400)
contrib/amcheck/verify_heapam.c		patch \| blob \| blame \| history
src/bin/pg_amcheck/t/004_verify_heapam.pl		patch \| blob \| blame \| history