Explain pruning pgstats accounting subtleties.
authorPeter Geoghegan <pg@bowt.ie>
Sat, 13 Nov 2021 03:45:58 +0000 (19:45 -0800)
committerPeter Geoghegan <pg@bowt.ie>
Sat, 13 Nov 2021 03:45:58 +0000 (19:45 -0800)
Add a comment explaining why the pgstats accounting used during
opportunistic heap pruning operations (to maintain the current number of
dead tuples in the relation) needs to compensate by subtracting away the
number of new LP_DEAD items.  This is needed so it can avoid completely
forgetting about tuples that become LP_DEAD items during pruning -- they
should still count.

It seems more natural to discuss this issue at the only relevant call
site (opportunistic pruning), since the same issue does not apply to the
only other caller (the VACUUM call site).  Move everything there too.

Author: Peter Geoghegan <pg@bowt.ie>
Discussion: https://postgr.es/m/CAH2-Wzm7f+A6ej650gi_ifTgbhsadVW5cujAL3punpupHff5Yg@mail.gmail.com

src/backend/access/heap/pruneheap.c
src/backend/access/heap/vacuumlazy.c
src/include/access/heapam.h

index 50ed76198c9ccde996061837652d78e242b459ea..5c0b60319d8b5a5e8d0556382d47d68ace41d479 100644 (file)
@@ -182,10 +182,29 @@ heap_page_prune_opt(Relation relation, Buffer buffer)
         */
        if (PageIsFull(page) || PageGetHeapFreeSpace(page) < minfree)
        {
-           /* OK to prune */
-           (void) heap_page_prune(relation, buffer, vistest,
-                                  limited_xmin, limited_ts,
-                                  true, NULL);
+           int     ndeleted,
+                   nnewlpdead;
+
+           ndeleted = heap_page_prune(relation, buffer, vistest, limited_xmin,
+                                      limited_ts, &nnewlpdead, NULL);
+
+           /*
+            * Report the number of tuples reclaimed to pgstats.  This is
+            * ndeleted minus the number of newly-LP_DEAD-set items.
+            *
+            * We derive the number of dead tuples like this to avoid totally
+            * forgetting about items that were set to LP_DEAD, since they
+            * still need to be cleaned up by VACUUM.  We only want to count
+            * heap-only tuples that just became LP_UNUSED in our report,
+            * which don't.
+            *
+            * VACUUM doesn't have to compensate in the same way when it
+            * tracks ndeleted, since it will set the same LP_DEAD items to
+            * LP_UNUSED separately.
+            */
+           if (ndeleted > nnewlpdead)
+               pgstat_update_heap_dead_tuples(relation,
+                                              ndeleted - nnewlpdead);
        }
 
        /* And release buffer lock */
@@ -212,10 +231,8 @@ heap_page_prune_opt(Relation relation, Buffer buffer)
  * either have been set by TransactionIdLimitedForOldSnapshots, or
  * InvalidTransactionId/0 respectively.
  *
- * If report_stats is true then we send the number of reclaimed heap-only
- * tuples to pgstats.  (This must be false during vacuum, since vacuum will
- * send its own new total to pgstats, and we don't want this delta applied
- * on top of that.)
+ * Sets *nnewlpdead for caller, indicating the number of items that were
+ * newly set LP_DEAD during prune operation.
  *
  * off_loc is the offset location required by the caller to use in error
  * callback.
@@ -227,7 +244,7 @@ heap_page_prune(Relation relation, Buffer buffer,
                GlobalVisState *vistest,
                TransactionId old_snap_xmin,
                TimestampTz old_snap_ts,
-               bool report_stats,
+               int *nnewlpdead,
                OffsetNumber *off_loc)
 {
    int         ndeleted = 0;
@@ -381,13 +398,8 @@ heap_page_prune(Relation relation, Buffer buffer,
 
    END_CRIT_SECTION();
 
-   /*
-    * If requested, report the number of tuples reclaimed to pgstats. This is
-    * ndeleted minus ndead, because we don't want to count a now-DEAD root
-    * item as a deletion for this purpose.
-    */
-   if (report_stats && ndeleted > prstate.ndead)
-       pgstat_update_heap_dead_tuples(relation, ndeleted - prstate.ndead);
+   /* Record number of newly-set-LP_DEAD items for caller */
+   *nnewlpdead = prstate.ndead;
 
    return ndeleted;
 }
index 1dd162ab9166fce8e2a87644aeea744b26fc676b..4ee1f1485468662d9b0ed1c40b102b932ad82a24 100644 (file)
@@ -1712,6 +1712,7 @@ lazy_scan_prune(LVRelState *vacrel,
                new_dead_tuples,
                num_tuples,
                live_tuples;
+   int         nnewlpdead;
    int         nfrozen;
    OffsetNumber deadoffsets[MaxHeapTuplesPerPage];
    xl_heap_freeze_tuple frozen[MaxHeapTuplesPerPage];
@@ -1737,7 +1738,7 @@ retry:
     * that were deleted from indexes.
     */
    tuples_deleted = heap_page_prune(rel, buf, vistest,
-                                    InvalidTransactionId, 0, false,
+                                    InvalidTransactionId, 0, &nnewlpdead,
                                     &vacrel->offnum);
 
    /*
index e63b49fc385cecb3a60f730f09832265107d453d..417dd288e5171b6c022d030e9cded30342747a17 100644 (file)
@@ -186,7 +186,7 @@ extern int  heap_page_prune(Relation relation, Buffer buffer,
                            struct GlobalVisState *vistest,
                            TransactionId old_snap_xmin,
                            TimestampTz old_snap_ts_ts,
-                           bool report_stats,
+                           int *nnewlpdead,
                            OffsetNumber *off_loc);
 extern void heap_page_prune_execute(Buffer buffer,
                                    OffsetNumber *redirected, int nredirected,