Adjust max_slot_wal_keep_size behavior per review

author Alvaro Herrera <alvherre@alvh.no-ip.org>

Wed, 24 Jun 2020 18:23:39 +0000 (14:23 -0400)

committer Alvaro Herrera <alvherre@alvh.no-ip.org>

Wed, 24 Jun 2020 18:23:39 +0000 (14:23 -0400)
author Alvaro Herrera <alvherre@alvh.no-ip.org>
Wed, 24 Jun 2020 18:23:39 +0000 (14:23 -0400)
committer Alvaro Herrera <alvherre@alvh.no-ip.org>
Wed, 24 Jun 2020 18:23:39 +0000 (14:23 -0400)
diff --git a/doc/src/sgml/catalogs.sgml b/doc/src/sgml/catalogs.sgml

index 5a66115df1ee979fd630aa38923fa1af7ee175a7..49a881b262104b16869f591a2a48b42f5c50cace 100644 (file)
--- a/doc/src/sgml/catalogs.sgml
+++ b/doc/src/sgml/catalogs.sgml
@@ -11239,19 +11239,29 @@ SELECT * FROM pg_locks pl LEFT JOIN pg_prepared_xacts ppx
         Possible values are:
         <itemizedlist>
          <listitem>
-         <para><literal>normal</literal> means that the claimed files
+         <para><literal>reserved</literal> means that the claimed files
            are within <varname>max_wal_size</varname>.</para>
          </listitem>
          <listitem>
-         <para><literal>reserved</literal> means
+         <para><literal>extended</literal> means
            that <varname>max_wal_size</varname> is exceeded but the files are
-          still held, either by some replication slot or
-          by <varname>wal_keep_segments</varname>.</para>
+          still retained, either by the replication slot or
+          by <varname>wal_keep_segments</varname>.
+         </para>
          </listitem>
          <listitem>
-         <para><literal>lost</literal> means that some WAL files are
-          definitely lost and this slot cannot be used to resume replication
-          anymore.</para>
+         <para>
+          <literal>unreserved</literal> means that the slot no longer
+          retains the required WAL files and some of them are to be removed at
+          the next checkpoint.  This state can return
+          to <literal>reserved</literal> or <literal>extended</literal>.
+         </para>
+        </listitem>
+        <listitem>
+         <para>
+          <literal>lost</literal> means that some required WAL files have
+          been removed and this slot is no longer usable.
+         </para>
          </listitem>
         </itemizedlist>
         The last two states are seen only when
diff --git a/src/backend/access/transam/xlog.c b/src/backend/access/transam/xlog.c

index 34ede80c44f4db98e82a1961256904c5db73f379..e455384b5b081f47fe7247525c5b92d6a7f53856 100644 (file)
--- a/src/backend/access/transam/xlog.c
+++ b/src/backend/access/transam/xlog.c
@@ -9488,15 +9488,20 @@ CreateRestartPoint(int flags)
   *     (typically a slot's restart_lsn)
   *
   * Returns one of the following enum values:
- * * WALAVAIL_NORMAL means targetLSN is available because it is in the range
- *   of max_wal_size.
   *
- * * WALAVAIL_PRESERVED means it is still available by preserving extra
+ * * WALAVAIL_RESERVED means targetLSN is available and it is in the range of
+ *   max_wal_size.
+ *
+ * * WALAVAIL_EXTENDED means it is still available by preserving extra
   *   segments beyond max_wal_size. If max_slot_wal_keep_size is smaller
   *   than max_wal_size, this state is not returned.
   *
- * * WALAVAIL_REMOVED means it is definitely lost. A replication stream on
- *   a slot with this LSN cannot continue.
+ * * WALAVAIL_UNRESERVED means it is being lost and the next checkpoint will
+ *   remove reserved segments. The walsender using this slot may return to the
+ *   above.
+ *
+ * * WALAVAIL_REMOVED means it has been removed. A replication stream on
+ *   a slot with this LSN cannot continue after a restart.
   *
   * * WALAVAIL_INVALID_LSN means the slot hasn't been set to reserve WAL.
   */
@@ -9512,13 +9517,18 @@ GetWALAvailability(XLogRecPtr targetLSN)
                                                      * slot */
     uint64      keepSegs;
  
-   /* slot does not reserve WAL. Either deactivated, or has never been active */
+   /*
+    * slot does not reserve WAL. Either deactivated, or has never been active
+    */
     if (XLogRecPtrIsInvalid(targetLSN))
         return WALAVAIL_INVALID_LSN;
  
     currpos = GetXLogWriteRecPtr();
  
-   /* calculate oldest segment currently needed by slots */
+   /*
+    * calculate the oldest segment currently reserved by all slots,
+    * considering wal_keep_segments and max_slot_wal_keep_size
+    */
     XLByteToSeg(targetLSN, targetSeg, wal_segment_size);
     KeepLogSeg(currpos, &oldestSlotSeg);
  
@@ -9529,10 +9539,9 @@ GetWALAvailability(XLogRecPtr targetLSN)
      */
     oldestSeg = XLogGetLastRemovedSegno() + 1;
  
-   /* calculate oldest segment by max_wal_size and wal_keep_segments */
+   /* calculate oldest segment by max_wal_size */
     XLByteToSeg(currpos, currSeg, wal_segment_size);
-   keepSegs = ConvertToXSegs(Max(max_wal_size_mb, wal_keep_segments),
-                             wal_segment_size) + 1;
+   keepSegs = ConvertToXSegs(max_wal_size_mb, wal_segment_size) + 1;
  
     if (currSeg > keepSegs)
         oldestSegMaxWalSize = currSeg - keepSegs;
@@ -9540,27 +9549,23 @@ GetWALAvailability(XLogRecPtr targetLSN)
         oldestSegMaxWalSize = 1;
  
     /*
-    * If max_slot_wal_keep_size has changed after the last call, the segment
-    * that would been kept by the current setting might have been lost by the
-    * previous setting. No point in showing normal or keeping status values
-    * if the targetSeg is known to be lost.
+    * No point in returning reserved or extended status values if the
+    * targetSeg is known to be lost.
      */
-   if (targetSeg >= oldestSeg)
+   if (targetSeg >= oldestSlotSeg)
     {
-       /*
-        * show "normal" when targetSeg is within max_wal_size, even if
-        * max_slot_wal_keep_size is smaller than max_wal_size.
-        */
-       if ((max_slot_wal_keep_size_mb <= 0 ||
-            max_slot_wal_keep_size_mb >= max_wal_size_mb) &&
-           oldestSegMaxWalSize <= targetSeg)
-           return WALAVAIL_NORMAL;
-
-       /* being retained by slots */
-       if (oldestSlotSeg <= targetSeg)
+       /* show "reserved" when targetSeg is within max_wal_size */
+       if (targetSeg >= oldestSegMaxWalSize)
             return WALAVAIL_RESERVED;
+
+       /* being retained by slots exceeding max_wal_size */
+       return WALAVAIL_EXTENDED;
     }
  
+   /* WAL segments are no longer retained but haven't been removed yet */
+   if (targetSeg >= oldestSeg)
+       return WALAVAIL_UNRESERVED;
+
     /* Definitely lost */
     return WALAVAIL_REMOVED;
  }
diff --git a/src/backend/replication/slotfuncs.c b/src/backend/replication/slotfuncs.c

index 3fc54cb9bab1288d0e23e67276ae06604b5b312e..df854bc6e3f784306b756a1b840e2ed68443f414 100644 (file)
--- a/src/backend/replication/slotfuncs.c
+++ b/src/backend/replication/slotfuncs.c
@@ -359,24 +359,47 @@ pg_get_replication_slots(PG_FUNCTION_ARGS)
                 nulls[i++] = true;
                 break;
  
-           case WALAVAIL_NORMAL:
-               values[i++] = CStringGetTextDatum("normal");
-               break;
-
             case WALAVAIL_RESERVED:
                 values[i++] = CStringGetTextDatum("reserved");
                 break;
  
+           case WALAVAIL_EXTENDED:
+               values[i++] = CStringGetTextDatum("extended");
+               break;
+
+           case WALAVAIL_UNRESERVED:
+               values[i++] = CStringGetTextDatum("unreserved");
+               break;
+
             case WALAVAIL_REMOVED:
+
+               /*
+                * If we read the restart_lsn long enough ago, maybe that file
+                * has been removed by now.  However, the walsender could have
+                * moved forward enough that it jumped to another file after
+                * we looked.  If checkpointer signalled the process to
+                * termination, then it's definitely lost; but if a process is
+                * still alive, then "unreserved" seems more appropriate.
+                */
+               if (!XLogRecPtrIsInvalid(slot_contents.data.restart_lsn))
+               {
+                   int         pid;
+
+                   SpinLockAcquire(&slot->mutex);
+                   pid = slot->active_pid;
+                   SpinLockRelease(&slot->mutex);
+                   if (pid != 0)
+                   {
+                       values[i++] = CStringGetTextDatum("unreserved");
+                       break;
+                   }
+               }
                 values[i++] = CStringGetTextDatum("lost");
                 break;
-
-           default:
-               elog(ERROR, "invalid walstate: %d", (int) walstate);
         }
  
         if (max_slot_wal_keep_size_mb >= 0 &&
-           (walstate == WALAVAIL_NORMAL || walstate == WALAVAIL_RESERVED) &&
+           (walstate == WALAVAIL_RESERVED || walstate == WALAVAIL_EXTENDED) &&
             ((last_removed_seg = XLogGetLastRemovedSegno()) != 0))
         {
             XLogRecPtr  min_safe_lsn;
diff --git a/src/include/access/xlog.h b/src/include/access/xlog.h

index 9702dce98aa2afa98525fa1f1753a9cd0cf5d01a..77ac4e785fced1b12efb0bb2059ef8e600ebd704 100644 (file)
--- a/src/include/access/xlog.h
+++ b/src/include/access/xlog.h
@@ -270,8 +270,10 @@ extern CheckpointStatsData CheckpointStats;
  typedef enum WALAvailability
  {
     WALAVAIL_INVALID_LSN,       /* parameter error */
-   WALAVAIL_NORMAL,            /* WAL segment is within max_wal_size */
-   WALAVAIL_RESERVED,          /* WAL segment is reserved by a slot */
+   WALAVAIL_RESERVED,          /* WAL segment is within max_wal_size */
+   WALAVAIL_EXTENDED,          /* WAL segment is reserved by a slot or
+                                * wal_keep_segments */
+   WALAVAIL_UNRESERVED,        /* no longer reserved, but not removed yet */
     WALAVAIL_REMOVED            /* WAL segment has been removed */
  } WALAvailability;
  
diff --git a/src/test/recovery/t/019_replslot_limit.pl b/src/test/recovery/t/019_replslot_limit.pl

index f1be984cc9a6cb058b09a5cb8165213f51e1ac99..7d22ae57201a31a6e9eeb3872626975ee6f3f6f4 100644 (file)
--- a/src/test/recovery/t/019_replslot_limit.pl
+++ b/src/test/recovery/t/019_replslot_limit.pl
@@ -56,7 +56,7 @@ $node_standby->stop;
  $result = $node_master->safe_psql('postgres',
     "SELECT wal_status, min_safe_lsn is NULL FROM pg_replication_slots WHERE slot_name = 'rep1'"
  );
-is($result, "normal|t", 'check the catching-up state');
+is($result, "reserved|t", 'check the catching-up state');
  
  # Advance WAL by five segments (= 5MB) on master
  advance_wal($node_master, 1);
@@ -66,7 +66,8 @@ $node_master->safe_psql('postgres', "CHECKPOINT;");
  $result = $node_master->safe_psql('postgres',
     "SELECT wal_status, min_safe_lsn is NULL FROM pg_replication_slots WHERE slot_name = 'rep1'"
  );
-is($result, "normal|t", 'check that it is safe if WAL fits in max_wal_size');
+is($result, "reserved|t",
+   'check that it is safe if WAL fits in max_wal_size');
  
  advance_wal($node_master, 4);
  $node_master->safe_psql('postgres', "CHECKPOINT;");
@@ -75,7 +76,7 @@ $node_master->safe_psql('postgres', "CHECKPOINT;");
  $result = $node_master->safe_psql('postgres',
     "SELECT wal_status, min_safe_lsn is NULL FROM pg_replication_slots WHERE slot_name = 'rep1'"
  );
-is($result, "normal|t", 'check that slot is working');
+is($result, "reserved|t", 'check that slot is working');
  
  # The standby can reconnect to master
  $node_standby->start;
@@ -99,7 +100,7 @@ $node_master->reload;
  
  $result = $node_master->safe_psql('postgres',
     "SELECT wal_status FROM pg_replication_slots WHERE slot_name = 'rep1'");
-is($result, "normal", 'check that max_slot_wal_keep_size is working');
+is($result, "reserved", 'check that max_slot_wal_keep_size is working');
  
  # Advance WAL again then checkpoint, reducing remain by 2 MB.
  advance_wal($node_master, 2);
@@ -108,7 +109,7 @@ $node_master->safe_psql('postgres', "CHECKPOINT;");
  # The slot is still working
  $result = $node_master->safe_psql('postgres',
     "SELECT wal_status FROM pg_replication_slots WHERE slot_name = 'rep1'");
-is($result, "normal",
+is($result, "reserved",
     'check that min_safe_lsn gets close to the current LSN');
  
  # The standby can reconnect to master
@@ -125,7 +126,7 @@ advance_wal($node_master, 6);
  $result = $node_master->safe_psql('postgres',
     "SELECT wal_status as remain FROM pg_replication_slots WHERE slot_name = 'rep1'"
  );
-is($result, "normal",
+is($result, "extended",
     'check that wal_keep_segments overrides max_slot_wal_keep_size');
  # restore wal_keep_segments
  $result = $node_master->safe_psql('postgres',
@@ -143,7 +144,7 @@ advance_wal($node_master, 6);
  # Slot gets into 'reserved' state
  $result = $node_master->safe_psql('postgres',
     "SELECT wal_status FROM pg_replication_slots WHERE slot_name = 'rep1'");
-is($result, "reserved", 'check that the slot state changes to "reserved"');
+is($result, "extended", 'check that the slot state changes to "extended"');
  
  # do checkpoint so that the next checkpoint runs too early
  $node_master->safe_psql('postgres', "CHECKPOINT;");
@@ -151,11 +152,12 @@ $node_master->safe_psql('postgres', "CHECKPOINT;");
  # Advance WAL again without checkpoint; remain goes to 0.
  advance_wal($node_master, 1);
  
-# Slot gets into 'lost' state
+# Slot gets into 'unreserved' state
  $result = $node_master->safe_psql('postgres',
     "SELECT wal_status, min_safe_lsn is NULL FROM pg_replication_slots WHERE slot_name = 'rep1'"
  );
-is($result, "lost|t", 'check that the slot state changes to "lost"');
+is($result, "unreserved|t",
+   'check that the slot state changes to "unreserved"');
  
  # The standby still can connect to master before a checkpoint
  $node_standby->start;
@@ -186,7 +188,8 @@ ok( find_in_log(
  $result = $node_master->safe_psql('postgres',
     "SELECT slot_name, active, restart_lsn IS NULL, wal_status, min_safe_lsn FROM pg_replication_slots WHERE slot_name = 'rep1'"
  );
-is($result, "rep1|f|t|lost|", 'check that the slot became inactive');
+is($result, "rep1|f|t|lost|",
+   'check that the slot became inactive and the state "lost" persists');
  
  # The standby no longer can connect to the master
  $logstart = get_log_size($node_standby);
author	Alvaro Herrera <alvherre@alvh.no-ip.org>
	Wed, 24 Jun 2020 18:23:39 +0000 (14:23 -0400)
committer	Alvaro Herrera <alvherre@alvh.no-ip.org>
	Wed, 24 Jun 2020 18:23:39 +0000 (14:23 -0400)
doc/src/sgml/catalogs.sgml		patch \| blob \| blame \| history
src/backend/access/transam/xlog.c		patch \| blob \| blame \| history
src/backend/replication/slotfuncs.c		patch \| blob \| blame \| history
src/include/access/xlog.h		patch \| blob \| blame \| history
src/test/recovery/t/019_replslot_limit.pl		patch \| blob \| blame \| history