pg_waldump: Add --save-fullpage=PATH to save full page images from WAL records
authorMichael Paquier <michael@paquier.xyz>
Mon, 26 Dec 2022 23:27:53 +0000 (08:27 +0900)
committerMichael Paquier <michael@paquier.xyz>
Mon, 26 Dec 2022 23:27:53 +0000 (08:27 +0900)
This option extracts (potentially decompressing) full-page images
included in WAL records into a given target directory.  These images are
subject to the same filtering rules as the normal display of the WAL
records, hence with --relation one can for example extract only the FPIs
issued on the relation defined.  By default, the records are printed or
their stats computed (--stats), using --quiet would only save the images
without any output generated.

This is a tool aimed mostly for very experienced users, useful for
fixing page-level corruption or just analyzing the past state of a page,
and there were no easy way to do that with the in-core tools up to now
when looking at WAL.

Each block is saved in a separate file, to ease their manipulation, with
the file respecting <lsn>.<ts>.<db>.<rel>.<blk>_<fork> with as format.

For instance, 00000000-010000C0.1663.1.6117.123_main refers to:
- WAL record LSN in hexa format (00000000-010000C0).
- Tablespace OID (1663).
- Database OID (1).
- Relfilenode (6117).
- Block number (123).
- Fork name of the file this block came from (_main).

Author: David Christensen
Reviewed-by: Sho Kato, Justin Pryzby, Bharath Rupireddy, Matthias van de
Meent
Discussion: https://postgr.es/m/CAOxo6XKjQb2bMSBRpePf3ZpzfNTwjQUc4Tafh21=jzjX6bX8CA@mail.gmail.com

doc/src/sgml/ref/pg_waldump.sgml
src/bin/pg_waldump/meson.build
src/bin/pg_waldump/pg_waldump.c
src/bin/pg_waldump/t/002_save_fullpage.pl [new file with mode: 0644]

index d559f091e5f954ab5a5826ff5dc3c2e1dd43c5c9..343f0482a9f6999c29f853634108ea6cac7a6c4e 100644 (file)
@@ -240,6 +240,72 @@ PostgreSQL documentation
        </listitem>
      </varlistentry>
 
+     <varlistentry>
+       <term><option>--save-fullpage=<replaceable>save_path</replaceable></option></term>
+       <listitem>
+       <para>
+        Save full page images found in the WAL records to the
+        <replaceable>save_path</replaceable> directory. The images saved
+        are subject to the same filtering and limiting criteria as the
+        records displayed.
+       </para>
+       <para>
+        The full page images are saved with the following file name format:
+        <literal><replaceable>LSN</replaceable>.<replaceable>RELTABLESPACE</replaceable>.<replaceable>DATOID</replaceable>.<replaceable>RELNODE</replaceable>.<replaceable>BLKNO</replaceable><replaceable>FORK</replaceable></literal>
+
+        The file names are composed of the following parts:
+        <informaltable>
+         <tgroup cols="2">
+          <thead>
+           <row>
+            <entry>Component</entry>
+            <entry>Description</entry>
+           </row>
+          </thead>
+
+          <tbody>
+           <row>
+            <entry>LSN</entry>
+            <entry>The <acronym>LSN</acronym> of the record with this image,
+             formatted as two 8-character hexadecimal numbers
+             <literal>%08X-%08X</literal></entry>
+           </row>
+
+           <row>
+            <entry>RELTABLESPACE</entry>
+            <entry>tablespace OID of the block</entry>
+           </row>
+
+           <row>
+            <entry>DATOID</entry>
+            <entry>database OID of the block</entry>
+           </row>
+
+           <row>
+            <entry>RELNODE</entry>
+            <entry>filenode of the block</entry>
+           </row>
+
+           <row>
+            <entry>BLKNO</entry>
+            <entry>block number of the block</entry>
+           </row>
+
+           <row>
+            <entry>FORK</entry>
+            <entry>
+             The name of the fork the full page image came from, as of
+             <literal>_main</literal>, <literal>_fsm</literal>,
+             <literal>_vm</literal>, or <literal>_init</literal>.
+            </entry>
+           </row>
+          </tbody>
+         </tgroup>
+        </informaltable>
+       </para>
+       </listitem>
+     </varlistentry>
+
      <varlistentry>
       <term><option>-x <replaceable>xid</replaceable></option></term>
       <term><option>--xid=<replaceable>xid</replaceable></option></term>
index 3fa1b53e715a49dc0d17de266c7d98d1fdd730f6..04289983503f09af3fd53445b9cd8e16a2357380 100644 (file)
@@ -31,6 +31,7 @@ tests += {
   'tap': {
     'tests': [
       't/001_basic.pl',
+      't/002_save_fullpage.pl',
     ],
   },
 }
index 9993378ca58b07cf2f3bb8b88b23ab6a4bcbf01e..15cc500672dd074e60853921b4bb8bb586e9e4f4 100644 (file)
 #include "access/xlogrecord.h"
 #include "access/xlogstats.h"
 #include "common/fe_memutils.h"
+#include "common/file_perm.h"
+#include "common/file_utils.h"
 #include "common/logging.h"
+#include "common/relpath.h"
 #include "getopt_long.h"
 #include "rmgrdesc.h"
+#include "storage/bufpage.h"
 
 /*
  * NOTE: For any code change or issue fix here, it is highly recommended to
@@ -70,6 +74,9 @@ typedef struct XLogDumpConfig
    bool        filter_by_relation_block_enabled;
    ForkNumber  filter_by_relation_forknum;
    bool        filter_by_fpw;
+
+   /* save options */
+   char       *save_fullpage_path;
 } XLogDumpConfig;
 
 
@@ -112,6 +119,37 @@ verify_directory(const char *directory)
    return true;
 }
 
+/*
+ * Create if necessary the directory storing the full-page images extracted
+ * from the WAL records read.
+ */
+static void
+create_fullpage_directory(char *path)
+{
+   int         ret;
+
+   switch ((ret = pg_check_dir(path)))
+   {
+       case 0:
+           /* Does not exist, so create it */
+           if (pg_mkdir_p(path, pg_dir_create_mode) < 0)
+               pg_fatal("could not create directory \"%s\": %m", path);
+           break;
+       case 1:
+           /* Present and empty, so do nothing */
+           break;
+       case 2:
+       case 3:
+       case 4:
+           /* Exists and not empty */
+           pg_fatal("directory \"%s\" exists but is not empty", path);
+           break;
+       default:
+           /* Trouble accessing directory */
+           pg_fatal("could not access directory \"%s\": %m", path);
+   }
+}
+
 /*
  * Split a pathname as dirname(1) and basename(1) would.
  *
@@ -439,6 +477,62 @@ XLogRecordHasFPW(XLogReaderState *record)
    return false;
 }
 
+/*
+ * Function to externally save all FPWs stored in the given WAL record.
+ * Decompression is applied to all the blocks saved, if necessary.
+ */
+static void
+XLogRecordSaveFPWs(XLogReaderState *record, const char *savepath)
+{
+   int         block_id;
+
+   for (block_id = 0; block_id <= XLogRecMaxBlockId(record); block_id++)
+   {
+       PGAlignedBlock buf;
+       Page        page;
+       char        filename[MAXPGPATH];
+       char        forkname[FORKNAMECHARS + 2];    /* _ + terminating zero */
+       FILE       *file;
+       BlockNumber blk;
+       RelFileLocator rnode;
+       ForkNumber  fork;
+
+       if (!XLogRecHasBlockRef(record, block_id))
+           continue;
+
+       if (!XLogRecHasBlockImage(record, block_id))
+           continue;
+
+       page = (Page) buf.data;
+
+       /* Full page exists, so let's save it */
+       if (!RestoreBlockImage(record, block_id, page))
+           pg_fatal("%s", record->errormsg_buf);
+
+       (void) XLogRecGetBlockTagExtended(record, block_id,
+                                         &rnode, &fork, &blk, NULL);
+
+       if (fork >= 0 && fork <= MAX_FORKNUM)
+           sprintf(forkname, "_%s", forkNames[fork]);
+       else
+           pg_fatal("invalid fork number: %u", fork);
+
+       snprintf(filename, MAXPGPATH, "%s/%08X-%08X.%u.%u.%u.%u%s", savepath,
+                LSN_FORMAT_ARGS(record->ReadRecPtr),
+                rnode.spcOid, rnode.dbOid, rnode.relNumber, blk, forkname);
+
+       file = fopen(filename, PG_BINARY_W);
+       if (!file)
+           pg_fatal("could not open file \"%s\": %m", filename);
+
+       if (fwrite(page, BLCKSZ, 1, file) != 1)
+           pg_fatal("could not write file \"%s\": %m", filename);
+
+       if (fclose(file) != 0)
+           pg_fatal("could not write file \"%s\": %m", filename);
+   }
+}
+
 /*
  * Print a record to stdout
  */
@@ -679,6 +773,8 @@ usage(void)
             "                         (default: 1 or the value used in STARTSEG)\n"));
    printf(_("  -V, --version          output version information, then exit\n"));
    printf(_("  -w, --fullpage         only show records with a full page write\n"));
+   printf(_("      --save-fullpage=PATH\n"
+            "                         save full page images\n"));
    printf(_("  -x, --xid=XID          only show records with transaction ID XID\n"));
    printf(_("  -z, --stats[=record]   show statistics instead of records\n"
             "                         (optionally, show per-record statistics)\n"));
@@ -719,6 +815,7 @@ main(int argc, char **argv)
        {"xid", required_argument, NULL, 'x'},
        {"version", no_argument, NULL, 'V'},
        {"stats", optional_argument, NULL, 'z'},
+       {"save-fullpage", required_argument, NULL, 1},
        {NULL, 0, NULL, 0}
    };
 
@@ -770,6 +867,7 @@ main(int argc, char **argv)
    config.filter_by_relation_block_enabled = false;
    config.filter_by_relation_forknum = InvalidForkNumber;
    config.filter_by_fpw = false;
+   config.save_fullpage_path = NULL;
    config.stats = false;
    config.stats_per_record = false;
 
@@ -942,6 +1040,9 @@ main(int argc, char **argv)
                    }
                }
                break;
+           case 1:
+               config.save_fullpage_path = pg_strdup(optarg);
+               break;
            default:
                goto bad_argument;
        }
@@ -972,6 +1073,9 @@ main(int argc, char **argv)
        }
    }
 
+   if (config.save_fullpage_path != NULL)
+       create_fullpage_directory(config.save_fullpage_path);
+
    /* parse files as start/end boundaries, extract path if not specified */
    if (optind < argc)
    {
@@ -1154,6 +1258,10 @@ main(int argc, char **argv)
                XLogDumpDisplayRecord(&config, xlogreader_state);
        }
 
+       /* save full pages if requested */
+       if (config.save_fullpage_path != NULL)
+           XLogRecordSaveFPWs(xlogreader_state, config.save_fullpage_path);
+
        /* check whether we printed enough */
        config.already_displayed_records++;
        if (config.stop_after_records > 0 &&
diff --git a/src/bin/pg_waldump/t/002_save_fullpage.pl b/src/bin/pg_waldump/t/002_save_fullpage.pl
new file mode 100644 (file)
index 0000000..15a53aa
--- /dev/null
@@ -0,0 +1,111 @@
+
+# Copyright (c) 2022, PostgreSQL Global Development Group
+
+use strict;
+use warnings;
+use File::Basename;
+use PostgreSQL::Test::Cluster;
+use PostgreSQL::Test::RecursiveCopy;
+use PostgreSQL::Test::Utils;
+use Test::More;
+
+my ($blocksize, $walfile_name);
+
+# Function to extract the LSN from the given block structure
+sub get_block_lsn
+{
+   my $path      = shift;
+   my $blocksize = shift;
+   my $block;
+
+   open my $fh, '<', $path or die "couldn't open file: $path\n";
+   die "could not read block\n"
+     if $blocksize != read($fh, $block, $blocksize);
+   my ($lsn_hi, $lsn_lo) = unpack('LL', $block);
+
+   $lsn_hi = sprintf('%08X', $lsn_hi);
+   $lsn_lo = sprintf('%08X', $lsn_lo);
+
+   return ($lsn_hi, $lsn_lo);
+}
+
+my $node = PostgreSQL::Test::Cluster->new('main');
+$node->init;
+$node->append_conf(
+   'postgresql.conf', q{
+wal_level = 'replica'
+max_wal_senders = 4
+});
+$node->start;
+
+# Generate data/WAL to examine that will have full pages in them.
+$node->safe_psql(
+   'postgres',
+   "SELECT 'init' FROM pg_create_physical_replication_slot('regress_pg_waldump_slot', true, false);
+CREATE TABLE test_table AS SELECT generate_series(1,100) a;
+-- Force FPWs on the next writes.
+CHECKPOINT;
+UPDATE test_table SET a = a + 1;
+");
+
+($walfile_name, $blocksize) = split '\|' => $node->safe_psql('postgres',
+   "SELECT pg_walfile_name(pg_switch_wal()), current_setting('block_size')");
+
+# Get the relation node, etc for the new table
+my $relation = $node->safe_psql(
+   'postgres',
+   q{SELECT format(
+        '%s/%s/%s',
+        CASE WHEN reltablespace = 0 THEN dattablespace ELSE reltablespace END,
+        pg_database.oid,
+        pg_relation_filenode(pg_class.oid))
+    FROM pg_class, pg_database
+    WHERE relname = 'test_table' AND
+        datname = current_database()}
+);
+
+my $walfile    = $node->data_dir . '/pg_wal/' . $walfile_name;
+my $tmp_folder = PostgreSQL::Test::Utils::tempdir;
+
+ok(-f $walfile, "Got a WAL file");
+
+$node->command_ok(
+   [
+       'pg_waldump',      '--quiet',
+       '--save-fullpage', "$tmp_folder/raw",
+       '--relation',      $relation,
+       $walfile
+   ]);
+
+# This regexp will match filenames formatted as:
+# XXXXXXXX-XXXXXXXX.DBOID.TLOID.NODEOID.dd_fork with the components being:
+# - WAL LSN in hex format,
+# - Tablespace OID (0 for global)
+# - Database OID.
+# - Relfilenode.
+# - Block number.
+# - Fork this block came from (vm, init, fsm, or main).
+my $file_re =
+  qr/^([0-9A-F]{8})-([0-9A-F]{8})[.][0-9]+[.][0-9]+[.][0-9]+[.][0-9]+(?:_vm|_init|_fsm|_main)?$/;
+
+my $file_count = 0;
+
+# Verify filename format matches --save-fullpage.
+for my $fullpath (glob "$tmp_folder/raw/*")
+{
+   my $file = File::Basename::basename($fullpath);
+
+   like($file, $file_re, "verify filename format for file $file");
+   $file_count++;
+
+   my ($hi_lsn_fn, $lo_lsn_fn) = ($file =~ $file_re);
+   my ($hi_lsn_bk, $lo_lsn_bk) = get_block_lsn($fullpath, $blocksize);
+
+   # The LSN on the block comes before the file's LSN.
+   ok( $hi_lsn_fn . $lo_lsn_fn gt $hi_lsn_bk . $lo_lsn_bk,
+       'LSN stored in the file precedes the one stored in the block');
+}
+
+ok($file_count > 0, 'verify that at least one block has been saved');
+
+done_testing();