Adjust max_slot_wal_keep_size behavior per review
authorAlvaro Herrera <alvherre@alvh.no-ip.org>
Wed, 24 Jun 2020 18:23:39 +0000 (14:23 -0400)
committerAlvaro Herrera <alvherre@alvh.no-ip.org>
Wed, 24 Jun 2020 18:23:39 +0000 (14:23 -0400)
In pg_replication_slot, change output from normal/reserved/lost to
reserved/extended/unreserved/ lost, which better expresses the possible
states particularly near the time where segments are no longer safe but
checkpoint has not run yet.

Under the new definition, reserved means the slot is consuming WAL
that's still under the normal WAL size constraints; extended means it's
consuming WAL that's being protected by wal_keep_segments or the slot
itself, whose size is below max_slot_wal_keep_size; unreserved means the
WAL is no longer safe, but checkpoint has not yet removed those files.
Such as slot is in imminent danger, but can still continue for a little
while and may catch up to the reserved WAL space.

Also, there were some bugs in the calculations used to report the
status; fixed those.

Backpatch to 13.

Reported-by: Fujii Masao <masao.fujii@oss.nttdata.com>
Author: Kyotaro Horiguchi <horikyota.ntt@gmail.com>
Reviewed-by: Fujii Masao <masao.fujii@oss.nttdata.com>
Reviewed-by: Álvaro Herrera <alvherre@alvh.no-ip.org>
Discussion: https://postgr.es/m/20200616.120236.1809496990963386593.horikyota.ntt@gmail.com

doc/src/sgml/catalogs.sgml
src/backend/access/transam/xlog.c
src/backend/replication/slotfuncs.c
src/include/access/xlog.h
src/test/recovery/t/019_replslot_limit.pl

index 5a66115df1ee979fd630aa38923fa1af7ee175a7..49a881b262104b16869f591a2a48b42f5c50cace 100644 (file)
@@ -11239,19 +11239,29 @@ SELECT * FROM pg_locks pl LEFT JOIN pg_prepared_xacts ppx
        Possible values are:
        <itemizedlist>
         <listitem>
-         <para><literal>normal</literal> means that the claimed files
+         <para><literal>reserved</literal> means that the claimed files
           are within <varname>max_wal_size</varname>.</para>
         </listitem>
         <listitem>
-         <para><literal>reserved</literal> means
+         <para><literal>extended</literal> means
           that <varname>max_wal_size</varname> is exceeded but the files are
-          still held, either by some replication slot or
-          by <varname>wal_keep_segments</varname>.</para>
+          still retained, either by the replication slot or
+          by <varname>wal_keep_segments</varname>.
+         </para>
         </listitem>
         <listitem>
-         <para><literal>lost</literal> means that some WAL files are
-          definitely lost and this slot cannot be used to resume replication
-          anymore.</para>
+         <para>
+          <literal>unreserved</literal> means that the slot no longer
+          retains the required WAL files and some of them are to be removed at
+          the next checkpoint.  This state can return
+          to <literal>reserved</literal> or <literal>extended</literal>.
+         </para>
+        </listitem>
+        <listitem>
+         <para>
+          <literal>lost</literal> means that some required WAL files have
+          been removed and this slot is no longer usable.
+         </para>
         </listitem>
        </itemizedlist>
        The last two states are seen only when
index 34ede80c44f4db98e82a1961256904c5db73f379..e455384b5b081f47fe7247525c5b92d6a7f53856 100644 (file)
@@ -9488,15 +9488,20 @@ CreateRestartPoint(int flags)
  *     (typically a slot's restart_lsn)
  *
  * Returns one of the following enum values:
- * * WALAVAIL_NORMAL means targetLSN is available because it is in the range
- *   of max_wal_size.
  *
- * * WALAVAIL_PRESERVED means it is still available by preserving extra
+ * * WALAVAIL_RESERVED means targetLSN is available and it is in the range of
+ *   max_wal_size.
+ *
+ * * WALAVAIL_EXTENDED means it is still available by preserving extra
  *   segments beyond max_wal_size. If max_slot_wal_keep_size is smaller
  *   than max_wal_size, this state is not returned.
  *
- * * WALAVAIL_REMOVED means it is definitely lost. A replication stream on
- *   a slot with this LSN cannot continue.
+ * * WALAVAIL_UNRESERVED means it is being lost and the next checkpoint will
+ *   remove reserved segments. The walsender using this slot may return to the
+ *   above.
+ *
+ * * WALAVAIL_REMOVED means it has been removed. A replication stream on
+ *   a slot with this LSN cannot continue after a restart.
  *
  * * WALAVAIL_INVALID_LSN means the slot hasn't been set to reserve WAL.
  */
@@ -9512,13 +9517,18 @@ GetWALAvailability(XLogRecPtr targetLSN)
                                                     * slot */
    uint64      keepSegs;
 
-   /* slot does not reserve WAL. Either deactivated, or has never been active */
+   /*
+    * slot does not reserve WAL. Either deactivated, or has never been active
+    */
    if (XLogRecPtrIsInvalid(targetLSN))
        return WALAVAIL_INVALID_LSN;
 
    currpos = GetXLogWriteRecPtr();
 
-   /* calculate oldest segment currently needed by slots */
+   /*
+    * calculate the oldest segment currently reserved by all slots,
+    * considering wal_keep_segments and max_slot_wal_keep_size
+    */
    XLByteToSeg(targetLSN, targetSeg, wal_segment_size);
    KeepLogSeg(currpos, &oldestSlotSeg);
 
@@ -9529,10 +9539,9 @@ GetWALAvailability(XLogRecPtr targetLSN)
     */
    oldestSeg = XLogGetLastRemovedSegno() + 1;
 
-   /* calculate oldest segment by max_wal_size and wal_keep_segments */
+   /* calculate oldest segment by max_wal_size */
    XLByteToSeg(currpos, currSeg, wal_segment_size);
-   keepSegs = ConvertToXSegs(Max(max_wal_size_mb, wal_keep_segments),
-                             wal_segment_size) + 1;
+   keepSegs = ConvertToXSegs(max_wal_size_mb, wal_segment_size) + 1;
 
    if (currSeg > keepSegs)
        oldestSegMaxWalSize = currSeg - keepSegs;
@@ -9540,27 +9549,23 @@ GetWALAvailability(XLogRecPtr targetLSN)
        oldestSegMaxWalSize = 1;
 
    /*
-    * If max_slot_wal_keep_size has changed after the last call, the segment
-    * that would been kept by the current setting might have been lost by the
-    * previous setting. No point in showing normal or keeping status values
-    * if the targetSeg is known to be lost.
+    * No point in returning reserved or extended status values if the
+    * targetSeg is known to be lost.
     */
-   if (targetSeg >= oldestSeg)
+   if (targetSeg >= oldestSlotSeg)
    {
-       /*
-        * show "normal" when targetSeg is within max_wal_size, even if
-        * max_slot_wal_keep_size is smaller than max_wal_size.
-        */
-       if ((max_slot_wal_keep_size_mb <= 0 ||
-            max_slot_wal_keep_size_mb >= max_wal_size_mb) &&
-           oldestSegMaxWalSize <= targetSeg)
-           return WALAVAIL_NORMAL;
-
-       /* being retained by slots */
-       if (oldestSlotSeg <= targetSeg)
+       /* show "reserved" when targetSeg is within max_wal_size */
+       if (targetSeg >= oldestSegMaxWalSize)
            return WALAVAIL_RESERVED;
+
+       /* being retained by slots exceeding max_wal_size */
+       return WALAVAIL_EXTENDED;
    }
 
+   /* WAL segments are no longer retained but haven't been removed yet */
+   if (targetSeg >= oldestSeg)
+       return WALAVAIL_UNRESERVED;
+
    /* Definitely lost */
    return WALAVAIL_REMOVED;
 }
index 3fc54cb9bab1288d0e23e67276ae06604b5b312e..df854bc6e3f784306b756a1b840e2ed68443f414 100644 (file)
@@ -359,24 +359,47 @@ pg_get_replication_slots(PG_FUNCTION_ARGS)
                nulls[i++] = true;
                break;
 
-           case WALAVAIL_NORMAL:
-               values[i++] = CStringGetTextDatum("normal");
-               break;
-
            case WALAVAIL_RESERVED:
                values[i++] = CStringGetTextDatum("reserved");
                break;
 
+           case WALAVAIL_EXTENDED:
+               values[i++] = CStringGetTextDatum("extended");
+               break;
+
+           case WALAVAIL_UNRESERVED:
+               values[i++] = CStringGetTextDatum("unreserved");
+               break;
+
            case WALAVAIL_REMOVED:
+
+               /*
+                * If we read the restart_lsn long enough ago, maybe that file
+                * has been removed by now.  However, the walsender could have
+                * moved forward enough that it jumped to another file after
+                * we looked.  If checkpointer signalled the process to
+                * termination, then it's definitely lost; but if a process is
+                * still alive, then "unreserved" seems more appropriate.
+                */
+               if (!XLogRecPtrIsInvalid(slot_contents.data.restart_lsn))
+               {
+                   int         pid;
+
+                   SpinLockAcquire(&slot->mutex);
+                   pid = slot->active_pid;
+                   SpinLockRelease(&slot->mutex);
+                   if (pid != 0)
+                   {
+                       values[i++] = CStringGetTextDatum("unreserved");
+                       break;
+                   }
+               }
                values[i++] = CStringGetTextDatum("lost");
                break;
-
-           default:
-               elog(ERROR, "invalid walstate: %d", (int) walstate);
        }
 
        if (max_slot_wal_keep_size_mb >= 0 &&
-           (walstate == WALAVAIL_NORMAL || walstate == WALAVAIL_RESERVED) &&
+           (walstate == WALAVAIL_RESERVED || walstate == WALAVAIL_EXTENDED) &&
            ((last_removed_seg = XLogGetLastRemovedSegno()) != 0))
        {
            XLogRecPtr  min_safe_lsn;
index 9702dce98aa2afa98525fa1f1753a9cd0cf5d01a..77ac4e785fced1b12efb0bb2059ef8e600ebd704 100644 (file)
@@ -270,8 +270,10 @@ extern CheckpointStatsData CheckpointStats;
 typedef enum WALAvailability
 {
    WALAVAIL_INVALID_LSN,       /* parameter error */
-   WALAVAIL_NORMAL,            /* WAL segment is within max_wal_size */
-   WALAVAIL_RESERVED,          /* WAL segment is reserved by a slot */
+   WALAVAIL_RESERVED,          /* WAL segment is within max_wal_size */
+   WALAVAIL_EXTENDED,          /* WAL segment is reserved by a slot or
+                                * wal_keep_segments */
+   WALAVAIL_UNRESERVED,        /* no longer reserved, but not removed yet */
    WALAVAIL_REMOVED            /* WAL segment has been removed */
 } WALAvailability;
 
index f1be984cc9a6cb058b09a5cb8165213f51e1ac99..7d22ae57201a31a6e9eeb3872626975ee6f3f6f4 100644 (file)
@@ -56,7 +56,7 @@ $node_standby->stop;
 $result = $node_master->safe_psql('postgres',
    "SELECT wal_status, min_safe_lsn is NULL FROM pg_replication_slots WHERE slot_name = 'rep1'"
 );
-is($result, "normal|t", 'check the catching-up state');
+is($result, "reserved|t", 'check the catching-up state');
 
 # Advance WAL by five segments (= 5MB) on master
 advance_wal($node_master, 1);
@@ -66,7 +66,8 @@ $node_master->safe_psql('postgres', "CHECKPOINT;");
 $result = $node_master->safe_psql('postgres',
    "SELECT wal_status, min_safe_lsn is NULL FROM pg_replication_slots WHERE slot_name = 'rep1'"
 );
-is($result, "normal|t", 'check that it is safe if WAL fits in max_wal_size');
+is($result, "reserved|t",
+   'check that it is safe if WAL fits in max_wal_size');
 
 advance_wal($node_master, 4);
 $node_master->safe_psql('postgres', "CHECKPOINT;");
@@ -75,7 +76,7 @@ $node_master->safe_psql('postgres', "CHECKPOINT;");
 $result = $node_master->safe_psql('postgres',
    "SELECT wal_status, min_safe_lsn is NULL FROM pg_replication_slots WHERE slot_name = 'rep1'"
 );
-is($result, "normal|t", 'check that slot is working');
+is($result, "reserved|t", 'check that slot is working');
 
 # The standby can reconnect to master
 $node_standby->start;
@@ -99,7 +100,7 @@ $node_master->reload;
 
 $result = $node_master->safe_psql('postgres',
    "SELECT wal_status FROM pg_replication_slots WHERE slot_name = 'rep1'");
-is($result, "normal", 'check that max_slot_wal_keep_size is working');
+is($result, "reserved", 'check that max_slot_wal_keep_size is working');
 
 # Advance WAL again then checkpoint, reducing remain by 2 MB.
 advance_wal($node_master, 2);
@@ -108,7 +109,7 @@ $node_master->safe_psql('postgres', "CHECKPOINT;");
 # The slot is still working
 $result = $node_master->safe_psql('postgres',
    "SELECT wal_status FROM pg_replication_slots WHERE slot_name = 'rep1'");
-is($result, "normal",
+is($result, "reserved",
    'check that min_safe_lsn gets close to the current LSN');
 
 # The standby can reconnect to master
@@ -125,7 +126,7 @@ advance_wal($node_master, 6);
 $result = $node_master->safe_psql('postgres',
    "SELECT wal_status as remain FROM pg_replication_slots WHERE slot_name = 'rep1'"
 );
-is($result, "normal",
+is($result, "extended",
    'check that wal_keep_segments overrides max_slot_wal_keep_size');
 # restore wal_keep_segments
 $result = $node_master->safe_psql('postgres',
@@ -143,7 +144,7 @@ advance_wal($node_master, 6);
 # Slot gets into 'reserved' state
 $result = $node_master->safe_psql('postgres',
    "SELECT wal_status FROM pg_replication_slots WHERE slot_name = 'rep1'");
-is($result, "reserved", 'check that the slot state changes to "reserved"');
+is($result, "extended", 'check that the slot state changes to "extended"');
 
 # do checkpoint so that the next checkpoint runs too early
 $node_master->safe_psql('postgres', "CHECKPOINT;");
@@ -151,11 +152,12 @@ $node_master->safe_psql('postgres', "CHECKPOINT;");
 # Advance WAL again without checkpoint; remain goes to 0.
 advance_wal($node_master, 1);
 
-# Slot gets into 'lost' state
+# Slot gets into 'unreserved' state
 $result = $node_master->safe_psql('postgres',
    "SELECT wal_status, min_safe_lsn is NULL FROM pg_replication_slots WHERE slot_name = 'rep1'"
 );
-is($result, "lost|t", 'check that the slot state changes to "lost"');
+is($result, "unreserved|t",
+   'check that the slot state changes to "unreserved"');
 
 # The standby still can connect to master before a checkpoint
 $node_standby->start;
@@ -186,7 +188,8 @@ ok( find_in_log(
 $result = $node_master->safe_psql('postgres',
    "SELECT slot_name, active, restart_lsn IS NULL, wal_status, min_safe_lsn FROM pg_replication_slots WHERE slot_name = 'rep1'"
 );
-is($result, "rep1|f|t|lost|", 'check that the slot became inactive');
+is($result, "rep1|f|t|lost|",
+   'check that the slot became inactive and the state "lost" persists');
 
 # The standby no longer can connect to the master
 $logstart = get_log_size($node_standby);