Add GUC ignore_invalid_pages.
authorFujii Masao <fujii@postgresql.org>
Wed, 22 Jan 2020 02:56:34 +0000 (11:56 +0900)
committerFujii Masao <fujii@postgresql.org>
Wed, 22 Jan 2020 02:56:34 +0000 (11:56 +0900)
Detection of WAL records having references to invalid pages
during recovery causes PostgreSQL to raise a PANIC-level error,
aborting the recovery. Setting ignore_invalid_pages to on causes
the system to ignore those WAL records (but still report a warning),
and continue recovery. This behavior may cause crashes, data loss,
propagate or hide corruption, or other serious problems.
However, it may allow you to get past the PANIC-level error,
to finish the recovery, and to cause the server to start up.

Author: Fujii Masao
Reviewed-by: Michael Paquier
Discussion: https://www.postgresql.org/message-id/CAHGQGwHCK6f77yeZD4MHOnN+PaTf6XiJfEB+Ce7SksSHjeAWtg@mail.gmail.com

doc/src/sgml/config.sgml
src/backend/access/transam/xlogutils.c
src/backend/utils/misc/guc.c

index 3ccacd528b461929942ff4641037e56f8d7a9616..e07dc01e8024cb006b93a09d1f21b9f69cecd331 100644 (file)
@@ -9950,6 +9950,31 @@ LOG:  CleanUpLock: deleting: lock(0xb7acd844) id(24688,24696,0,0,0,1)
       </listitem>
      </varlistentry>
 
+     <varlistentry id="guc-ignore-invalid-pages" xreflabel="ignore_invalid_pages">
+      <term><varname>ignore_invalid_pages</varname> (<type>boolean</type>)
+      <indexterm>
+       <primary><varname>ignore_invalid_pages</varname> configuration parameter</primary>
+      </indexterm>
+      </term>
+      <listitem>
+       <para>
+        If set to <literal>off</literal> (the default), detection of
+        WAL records having references to invalid pages during
+        recovery causes <productname>PostgreSQL</productname> to
+        raise a PANIC-level error, aborting the recovery. Setting
+        <varname>ignore_invalid_pages</varname> to <literal>on</literal>
+        causes the system to ignore invalid page references in WAL records
+        (but still report a warning), and continue the recovery.
+        This behavior may <emphasis>cause crashes, data loss,
+        propagate or hide corruption, or other serious problems</emphasis>.
+        However, it may allow you to get past the PANIC-level error,
+        to finish the recovery, and to cause the server to start up.
+        The parameter can only be set at server start. It only has effect
+        during recovery or in standby mode.
+       </para>
+      </listitem>
+     </varlistentry>
+
      <varlistentry id="guc-jit-debugging-support" xreflabel="jit_debugging_support">
       <term><varname>jit_debugging_support</varname> (<type>boolean</type>)
       <indexterm>
index b55c3833703b50432502d4558c1cb33776bc927a..b217ffa52ffb16d72f6596b93487d5999d724dbc 100644 (file)
@@ -31,6 +31,9 @@
 #include "utils/rel.h"
 
 
+/* GUC variable */
+bool       ignore_invalid_pages = false;
+
 /*
  * During XLOG replay, we may see XLOG records for incremental updates of
  * pages that no longer exist, because their relation was later dropped or
@@ -93,7 +96,8 @@ log_invalid_page(RelFileNode node, ForkNumber forkno, BlockNumber blkno,
    if (reachedConsistency)
    {
        report_invalid_page(WARNING, node, forkno, blkno, present);
-       elog(PANIC, "WAL contains references to invalid pages");
+       elog(ignore_invalid_pages ? WARNING : PANIC,
+            "WAL contains references to invalid pages");
    }
 
    /*
@@ -240,7 +244,8 @@ XLogCheckInvalidPages(void)
    }
 
    if (foundone)
-       elog(PANIC, "WAL contains references to invalid pages");
+       elog(ignore_invalid_pages ? WARNING : PANIC,
+            "WAL contains references to invalid pages");
 
    hash_destroy(invalid_page_tab);
    invalid_page_tab = NULL;
index e44f71e99109be44c37db7428f59f9d6b7277923..9f179a91295770c5db1633b7ca0b229d2e057bbe 100644 (file)
@@ -123,6 +123,7 @@ extern int  CommitSiblings;
 extern char *default_tablespace;
 extern char *temp_tablespaces;
 extern bool ignore_checksum_failure;
+extern bool ignore_invalid_pages;
 extern bool synchronize_seqscans;
 
 #ifdef TRACE_SYNCSCAN
@@ -1172,6 +1173,25 @@ static struct config_bool ConfigureNamesBool[] =
        false,
        NULL, NULL, NULL
    },
+   {
+       {"ignore_invalid_pages", PGC_POSTMASTER, DEVELOPER_OPTIONS,
+           gettext_noop("Continues recovery after an invalid pages failure."),
+           gettext_noop("Detection of WAL records having references to "
+                        "invalid pages during recovery causes PostgreSQL to "
+                        "raise a PANIC-level error, aborting the recovery. "
+                        "Setting ignore_invalid_pages to true causes "
+                        "the system to ignore invalid page references "
+                        "in WAL records (but still report a warning), "
+                        "and continue recovery. This behavior may cause "
+                        "crashes, data loss, propagate or hide corruption, "
+                        "or other serious problems. Only has an effect "
+                        "during recovery or in standby mode."),
+           GUC_NOT_IN_SAMPLE
+       },
+       &ignore_invalid_pages,
+       false,
+       NULL, NULL, NULL
+   },
    {
        {"full_page_writes", PGC_SIGHUP, WAL_SETTINGS,
            gettext_noop("Writes full pages to WAL when first modified after a checkpoint."),