Add cost-based vacuum delay time to progress views.
authorNathan Bossart <nathan@postgresql.org>
Tue, 11 Feb 2025 22:38:14 +0000 (16:38 -0600)
committerNathan Bossart <nathan@postgresql.org>
Tue, 11 Feb 2025 22:38:14 +0000 (16:38 -0600)
This commit adds the amount of time spent sleeping due to
cost-based delay to the pg_stat_progress_vacuum and
pg_stat_progress_analyze system views.  A new configuration
parameter named track_cost_delay_timing, which is off by default,
controls whether this information is gathered.  For vacuum, the
reported value includes the sleep time of any associated parallel
workers.  However, parallel workers only report their sleep time
once per second to avoid overloading the leader process.

Bumps catversion.

Author: Bertrand Drouvot <bertranddrouvot.pg@gmail.com>
Co-authored-by: Nathan Bossart <nathandbossart@gmail.com>
Reviewed-by: Sami Imseih <samimseih@gmail.com>
Reviewed-by: Robert Haas <robertmhaas@gmail.com>
Reviewed-by: Masahiko Sawada <sawada.mshk@gmail.com>
Reviewed-by: Masahiro Ikeda <ikedamsh@oss.nttdata.com>
Reviewed-by: Dilip Kumar <dilipbalaut@gmail.com>
Reviewed-by: Sergei Kornilov <sk@zsrv.org>
Discussion: https://postgr.es/m/ZmaXmWDL829fzAVX%40ip-10-97-1-34.eu-west-3.compute.internal

doc/src/sgml/config.sgml
doc/src/sgml/monitoring.sgml
src/backend/catalog/system_views.sql
src/backend/commands/vacuum.c
src/backend/commands/vacuumparallel.c
src/backend/utils/misc/guc_tables.c
src/backend/utils/misc/postgresql.conf.sample
src/include/catalog/catversion.h
src/include/commands/progress.h
src/include/commands/vacuum.h
src/test/regress/expected/rules.out

index fc186657a532779163ca58bb6521b0bee6e7a5d8..3b557ecabfba2546f95b6e9fa4ee327b435997ce 100644 (file)
@@ -8252,6 +8252,30 @@ COPY postgres_log FROM '/full/path/to/logfile.csv' WITH csv;
       </listitem>
      </varlistentry>
 
+     <varlistentry id="guc-track-cost-delay-timing" xreflabel="track_cost_delay_timing">
+      <term><varname>track_cost_delay_timing</varname> (<type>boolean</type>)
+      <indexterm>
+       <primary><varname>track_cost_delay_timing</varname> configuration parameter</primary>
+      </indexterm>
+      </term>
+      <listitem>
+       <para>
+        Enables timing of cost-based vacuum delay (see
+        <xref linkend="runtime-config-resource-vacuum-cost"/>).  This parameter
+        is off by default, as it will repeatedly query the operating system for
+        the current time, which may cause significant overhead on some
+        platforms.  You can use the <xref linkend="pgtesttiming"/> tool to
+        measure the overhead of timing on your system.  Cost-based vacuum delay
+        timing information is displayed in
+        <link linkend="vacuum-progress-reporting"><structname>pg_stat_progress_vacuum</structname></link>
+        and
+        <link linkend="analyze-progress-reporting"><structname>pg_stat_progress_analyze</structname></link>.
+        Only superusers and users with the appropriate <literal>SET</literal>
+        privilege can change this setting.
+       </para>
+      </listitem>
+     </varlistentry>
+
      <varlistentry id="guc-track-io-timing" xreflabel="track_io_timing">
       <term><varname>track_io_timing</varname> (<type>boolean</type>)
       <indexterm>
index edc2470bcf985b56ac2cd381e0bb77cda014517a..928a6eb64b07599ed549ce41062cb40e562eb359 100644 (file)
@@ -5606,6 +5606,18 @@ FROM pg_stat_get_backend_idset() AS backendid;
        <literal>acquiring inherited sample rows</literal>.
       </para></entry>
      </row>
+
+     <row>
+      <entry role="catalog_table_entry"><para role="column_definition">
+       <structfield>delay_time</structfield> <type>double precision</type>
+      </para>
+      <para>
+       Total time spent sleeping due to cost-based delay (see
+       <xref linkend="runtime-config-resource-vacuum-cost"/>, in milliseconds
+       (if <xref linkend="guc-track-cost-delay-timing"/> is enabled, otherwise
+       zero).
+      </para></entry>
+     </row>
     </tbody>
    </tgroup>
   </table>
@@ -6531,6 +6543,21 @@ FROM pg_stat_get_backend_idset() AS backendid;
        <literal>cleaning up indexes</literal>.
       </para></entry>
      </row>
+
+     <row>
+      <entry role="catalog_table_entry"><para role="column_definition">
+       <structfield>delay_time</structfield> <type>double precision</type>
+      </para>
+      <para>
+       Total time spent sleeping due to cost-based delay (see
+       <xref linkend="runtime-config-resource-vacuum-cost"/>), in milliseconds
+       (if <xref linkend="guc-track-cost-delay-timing"/> is enabled, otherwise
+       zero).  This includes the time that any associated parallel workers have
+       slept.  However, parallel workers report their sleep time no more
+       frequently than once per second, so the reported value may be slightly
+       stale.
+      </para></entry>
+     </row>
     </tbody>
    </tgroup>
   </table>
index cddc3ea9b530e0a826fb06d10586926505add413..eff0990957ebb01e9f0a1df37f87ae712b645fb1 100644 (file)
@@ -1213,7 +1213,8 @@ CREATE VIEW pg_stat_progress_analyze AS
         S.param5 AS ext_stats_computed,
         S.param6 AS child_tables_total,
         S.param7 AS child_tables_done,
-        CAST(S.param8 AS oid) AS current_child_table_relid
+        CAST(S.param8 AS oid) AS current_child_table_relid,
+        S.param9 / 1000000::double precision AS delay_time
     FROM pg_stat_get_progress_info('ANALYZE') AS S
         LEFT JOIN pg_database D ON S.datid = D.oid;
 
@@ -1233,7 +1234,8 @@ CREATE VIEW pg_stat_progress_vacuum AS
         S.param4 AS heap_blks_vacuumed, S.param5 AS index_vacuum_count,
         S.param6 AS max_dead_tuple_bytes, S.param7 AS dead_tuple_bytes,
         S.param8 AS num_dead_item_ids, S.param9 AS indexes_total,
-        S.param10 AS indexes_processed
+        S.param10 AS indexes_processed,
+        S.param11 / 1000000::double precision AS delay_time
     FROM pg_stat_get_progress_info('VACUUM') AS S
         LEFT JOIN pg_database D ON S.datid = D.oid;
 
index cddddb0f9ea1f7a2056ceb8c4e79658c56a30e0a..0239d9bae6526f8ff7a7d82aca1e9a1d8f803b75 100644 (file)
@@ -39,6 +39,7 @@
 #include "catalog/pg_inherits.h"
 #include "commands/cluster.h"
 #include "commands/defrem.h"
+#include "commands/progress.h"
 #include "commands/vacuum.h"
 #include "miscadmin.h"
 #include "nodes/makefuncs.h"
 #include "utils/snapmgr.h"
 #include "utils/syscache.h"
 
+/*
+ * Minimum interval for cost-based vacuum delay reports from a parallel worker.
+ * This aims to avoid sending too many messages and waking up the leader too
+ * frequently.
+ */
+#define PARALLEL_VACUUM_DELAY_REPORT_INTERVAL_NS   (NS_PER_S)
 
 /*
  * GUC parameters
@@ -70,6 +77,7 @@ int           vacuum_multixact_freeze_table_age;
 int            vacuum_failsafe_age;
 int            vacuum_multixact_failsafe_age;
 double     vacuum_max_eager_freeze_failure_rate;
+bool       track_cost_delay_timing;
 
 /*
  * Variables for cost-based vacuum delay. The defaults differ between
@@ -80,6 +88,9 @@ double        vacuum_max_eager_freeze_failure_rate;
 double     vacuum_cost_delay = 0;
 int            vacuum_cost_limit = 200;
 
+/* Variable for reporting cost-based vacuum delay from parallel workers. */
+int64      parallel_vacuum_worker_delay_ns = 0;
+
 /*
  * VacuumFailsafeActive is a defined as a global so that we can determine
  * whether or not to re-enable cost-based vacuum delay when vacuuming a table.
@@ -2416,13 +2427,66 @@ vacuum_delay_point(bool is_analyze)
    /* Nap if appropriate */
    if (msec > 0)
    {
+       instr_time  delay_start;
+
        if (msec > vacuum_cost_delay * 4)
            msec = vacuum_cost_delay * 4;
 
+       if (track_cost_delay_timing)
+           INSTR_TIME_SET_CURRENT(delay_start);
+
        pgstat_report_wait_start(WAIT_EVENT_VACUUM_DELAY);
        pg_usleep(msec * 1000);
        pgstat_report_wait_end();
 
+       if (track_cost_delay_timing)
+       {
+           instr_time  delay_end;
+           instr_time  delay;
+
+           INSTR_TIME_SET_CURRENT(delay_end);
+           INSTR_TIME_SET_ZERO(delay);
+           INSTR_TIME_ACCUM_DIFF(delay, delay_end, delay_start);
+
+           /*
+            * For parallel workers, we only report the delay time every once
+            * in a while to avoid overloading the leader with messages and
+            * interrupts.
+            */
+           if (IsParallelWorker())
+           {
+               static instr_time last_report_time;
+               instr_time  time_since_last_report;
+
+               Assert(!is_analyze);
+
+               /* Accumulate the delay time */
+               parallel_vacuum_worker_delay_ns += INSTR_TIME_GET_NANOSEC(delay);
+
+               /* Calculate interval since last report */
+               INSTR_TIME_SET_ZERO(time_since_last_report);
+               INSTR_TIME_ACCUM_DIFF(time_since_last_report, delay_end, last_report_time);
+
+               /* If we haven't reported in a while, do so now */
+               if (INSTR_TIME_GET_NANOSEC(time_since_last_report) >=
+                   PARALLEL_VACUUM_DELAY_REPORT_INTERVAL_NS)
+               {
+                   pgstat_progress_parallel_incr_param(PROGRESS_VACUUM_DELAY_TIME,
+                                                       parallel_vacuum_worker_delay_ns);
+
+                   /* Reset variables */
+                   last_report_time = delay_end;
+                   parallel_vacuum_worker_delay_ns = 0;
+               }
+           }
+           else if (is_analyze)
+               pgstat_progress_incr_param(PROGRESS_ANALYZE_DELAY_TIME,
+                                          INSTR_TIME_GET_NANOSEC(delay));
+           else
+               pgstat_progress_incr_param(PROGRESS_VACUUM_DELAY_TIME,
+                                          INSTR_TIME_GET_NANOSEC(delay));
+       }
+
        /*
         * We don't want to ignore postmaster death during very long vacuums
         * with vacuum_cost_delay configured.  We can't use the usual
index dc3322c256b7db2e5e6535084f5d7e6ecc1e695a..2b9d548cdeb1066e2002dab724686fabff394a02 100644 (file)
@@ -1094,6 +1094,11 @@ parallel_vacuum_main(dsm_segment *seg, shm_toc *toc)
    InstrEndParallelQuery(&buffer_usage[ParallelWorkerNumber],
                          &wal_usage[ParallelWorkerNumber]);
 
+   /* Report any remaining cost-based vacuum delay time */
+   if (track_cost_delay_timing)
+       pgstat_progress_parallel_incr_param(PROGRESS_VACUUM_DELAY_TIME,
+                                           parallel_vacuum_worker_delay_ns);
+
    TidStoreDetach(dead_items);
 
    /* Pop the error context stack */
index 382c774b245e5014a899c840b71c36ed4d25d496..226af43fe23f77eafd30bd7176938e4fb9e6685b 100644 (file)
@@ -1471,6 +1471,15 @@ struct config_bool ConfigureNamesBool[] =
        true,
        NULL, NULL, NULL
    },
+   {
+       {"track_cost_delay_timing", PGC_SUSET, STATS_CUMULATIVE,
+           gettext_noop("Collects timing statistics for cost-based vacuum delay."),
+           NULL
+       },
+       &track_cost_delay_timing,
+       false,
+       NULL, NULL, NULL
+   },
    {
        {"track_io_timing", PGC_SUSET, STATS_CUMULATIVE,
            gettext_noop("Collects timing statistics for database I/O activity."),
index f039eaa0c62f6baa79ea2c4f2a11bd8eab66e9e8..d472987ed46a9332f00bd4a89f97102474777677 100644 (file)
 #track_activities = on
 #track_activity_query_size = 1024  # (change requires restart)
 #track_counts = on
+#track_cost_delay_timing = off
 #track_io_timing = off
 #track_wal_io_timing = off
 #track_functions = none            # none, pl, all
index f7226b8e439cbfd349d0e635faf2e914c76bf5e8..ff1253db0e9e639cdc4aa50404a734dc145b81ee 100644 (file)
@@ -57,6 +57,6 @@
  */
 
 /*                         yyyymmddN */
-#define CATALOG_VERSION_NO 202502111
+#define CATALOG_VERSION_NO 202502112
 
 #endif
index 18e3179ef63806f5b5cf5872eebba15810bcf110..7c736e7b03bcf251fea929ff2edfa3ed0b27004f 100644 (file)
@@ -28,6 +28,7 @@
 #define PROGRESS_VACUUM_NUM_DEAD_ITEM_IDS      7
 #define PROGRESS_VACUUM_INDEXES_TOTAL          8
 #define PROGRESS_VACUUM_INDEXES_PROCESSED      9
+#define PROGRESS_VACUUM_DELAY_TIME             10
 
 /* Phases of vacuum (as advertised via PROGRESS_VACUUM_PHASE) */
 #define PROGRESS_VACUUM_PHASE_SCAN_HEAP            1
@@ -46,6 +47,7 @@
 #define PROGRESS_ANALYZE_CHILD_TABLES_TOTAL            5
 #define PROGRESS_ANALYZE_CHILD_TABLES_DONE         6
 #define PROGRESS_ANALYZE_CURRENT_CHILD_TABLE_RELID 7
+#define PROGRESS_ANALYZE_DELAY_TIME                    8
 
 /* Phases of analyze (as advertised via PROGRESS_ANALYZE_PHASE) */
 #define PROGRESS_ANALYZE_PHASE_ACQUIRE_SAMPLE_ROWS     1
index 7fbb738eb8f877e0fecfe9fc1c0a584a084a6c57..1571a66c6bf038d8250327157e875d1a02b0c1f0 100644 (file)
@@ -303,6 +303,7 @@ extern PGDLLIMPORT int vacuum_multixact_freeze_min_age;
 extern PGDLLIMPORT int vacuum_multixact_freeze_table_age;
 extern PGDLLIMPORT int vacuum_failsafe_age;
 extern PGDLLIMPORT int vacuum_multixact_failsafe_age;
+extern PGDLLIMPORT bool track_cost_delay_timing;
 
 /*
  * Relevant for vacuums implementing eager scanning. Normal vacuums may
@@ -330,6 +331,8 @@ extern PGDLLIMPORT bool VacuumFailsafeActive;
 extern PGDLLIMPORT double vacuum_cost_delay;
 extern PGDLLIMPORT int vacuum_cost_limit;
 
+extern PGDLLIMPORT int64 parallel_vacuum_worker_delay_ns;
+
 /* in commands/vacuum.c */
 extern void ExecVacuum(ParseState *pstate, VacuumStmt *vacstmt, bool isTopLevel);
 extern void vacuum(List *relations, VacuumParams *params,
index 3361f6a69c92797eb1f59f2a03fcfbc0afa17301..5baba8d39ff65eda80b32f4a118cb3a1cc5e9dcf 100644 (file)
@@ -1932,7 +1932,8 @@ pg_stat_progress_analyze| SELECT s.pid,
     s.param5 AS ext_stats_computed,
     s.param6 AS child_tables_total,
     s.param7 AS child_tables_done,
-    (s.param8)::oid AS current_child_table_relid
+    (s.param8)::oid AS current_child_table_relid,
+    ((s.param9)::double precision / (1000000)::double precision) AS delay_time
    FROM (pg_stat_get_progress_info('ANALYZE'::text) s(pid, datid, relid, param1, param2, param3, param4, param5, param6, param7, param8, param9, param10, param11, param12, param13, param14, param15, param16, param17, param18, param19, param20)
      LEFT JOIN pg_database d ON ((s.datid = d.oid)));
 pg_stat_progress_basebackup| SELECT pid,
@@ -2062,7 +2063,8 @@ pg_stat_progress_vacuum| SELECT s.pid,
     s.param7 AS dead_tuple_bytes,
     s.param8 AS num_dead_item_ids,
     s.param9 AS indexes_total,
-    s.param10 AS indexes_processed
+    s.param10 AS indexes_processed,
+    ((s.param11)::double precision / (1000000)::double precision) AS delay_time
    FROM (pg_stat_get_progress_info('VACUUM'::text) s(pid, datid, relid, param1, param2, param3, param4, param5, param6, param7, param8, param9, param10, param11, param12, param13, param14, param15, param16, param17, param18, param19, param20)
      LEFT JOIN pg_database d ON ((s.datid = d.oid)));
 pg_stat_recovery_prefetch| SELECT stats_reset,