Add some checkpoint and redo LSNs to a couple of recovery errors
authorMichael Paquier <michael@paquier.xyz>
Mon, 11 Mar 2024 00:08:05 +0000 (09:08 +0900)
committerMichael Paquier <michael@paquier.xyz>
Mon, 11 Mar 2024 00:08:05 +0000 (09:08 +0900)
Two FATALs and one PANIC gain details about the LSNs they fail at:
- When restoring from a backup_label, the FATAL log generated when not
finding the checkpoint record now reports its LSN.
- When restoring from a backup_label, the FATAL log generated when not
finding the redo record referenced by a checkpoint record now shows both
the redo and checkpoint record LSNs.
- When not restoring from a backup_label, the PANIC error generated when
not finding the checkpoint record now reports its LSN.

This information is useful when debugging corruption issues, and these
LSNs may not show up in the logs depending on the level of logging
configured in the backend.

Author: David Steele
Discussion: https://postgr.es/m/0e90da89-77ca-4ccf-872c-9626d755e288@pgmasters.net

src/backend/access/transam/xlogrecovery.c

index 853b540945bf662ecc2a07c662dc11fa6604090e..29c5bec084771d9b233056f7887eef7d10b799b0 100644 (file)
@@ -646,7 +646,8 @@ InitWalRecovery(ControlFileData *ControlFile, bool *wasShutdown_ptr,
                if (!ReadRecord(xlogprefetcher, LOG, false,
                                checkPoint.ThisTimeLineID))
                    ereport(FATAL,
-                           (errmsg("could not find redo location referenced by checkpoint record"),
+                           (errmsg("could not find redo location %X/%X referenced by checkpoint record at %X/%X",
+                                   LSN_FORMAT_ARGS(checkPoint.redo), LSN_FORMAT_ARGS(CheckPointLoc)),
                             errhint("If you are restoring from a backup, touch \"%s/recovery.signal\" or \"%s/standby.signal\" and add required recovery options.\n"
                                     "If you are not restoring from a backup, try removing the file \"%s/backup_label\".\n"
                                     "Be careful: removing \"%s/backup_label\" will result in a corrupt cluster if restoring from a backup.",
@@ -656,7 +657,8 @@ InitWalRecovery(ControlFileData *ControlFile, bool *wasShutdown_ptr,
        else
        {
            ereport(FATAL,
-                   (errmsg("could not locate required checkpoint record"),
+                   (errmsg("could not locate required checkpoint record at %X/%X",
+                           LSN_FORMAT_ARGS(CheckPointLoc)),
                     errhint("If you are restoring from a backup, touch \"%s/recovery.signal\" or \"%s/standby.signal\" and add required recovery options.\n"
                             "If you are not restoring from a backup, try removing the file \"%s/backup_label\".\n"
                             "Be careful: removing \"%s/backup_label\" will result in a corrupt cluster if restoring from a backup.",
@@ -790,7 +792,8 @@ InitWalRecovery(ControlFileData *ControlFile, bool *wasShutdown_ptr,
             * simplify processing around checkpoints.
             */
            ereport(PANIC,
-                   (errmsg("could not locate a valid checkpoint record")));
+                   (errmsg("could not locate a valid checkpoint record at %X/%X",
+                           LSN_FORMAT_ARGS(CheckPointLoc))));
        }
        memcpy(&checkPoint, XLogRecGetData(xlogreader), sizeof(CheckPoint));
        wasShutdown = ((record->xl_info & ~XLR_INFO_MASK) == XLOG_CHECKPOINT_SHUTDOWN);