Add wait event for pg_usleep() in perform_spin_delay()
authorAndres Freund <andres@anarazel.de>
Tue, 22 Nov 2022 04:34:17 +0000 (20:34 -0800)
committerAndres Freund <andres@anarazel.de>
Tue, 22 Nov 2022 04:34:17 +0000 (20:34 -0800)
The lwlock wait queue scalability issue fixed in a4adc31f690 was quite hard to
find because of the exponential backoff and because we adjust spins_per_delay
over time within a backend.

To make it easier to find similar issues in the future, add a wait event for
the pg_usleep() in perform_spin_delay(). Showing a wait event while spinning
without sleeping would increase the overhead of spinlocks, which we do not
want.

We may at some later point want to have more granular wait events, but that'd
be a substantial amount of work. This provides at least some insights into
something currently hard to observe.

Reviewed-by: Michael Paquier <michael@paquier.xyz>
Reviewed-by: Robert Haas <robertmhaas@gmail.com>
Reviewed-by: Alexander Korotkov <aekorotkov@gmail.com>
https://postgr.es/m/20221120204310.xywrhyxyytsajuuq@awork3.anarazel.de

doc/src/sgml/monitoring.sgml
src/backend/storage/lmgr/s_lock.c
src/backend/utils/activity/wait_event.c
src/include/utils/wait_event.h

index e5d622d5147e570f1b4bad9abbc3cb0a42324c20..e2426f7210cbbb7e607e75d010243e2bbc8fda00 100644 (file)
@@ -2289,6 +2289,10 @@ postgres   27093  0.0  0.0  30096  2752 ?        Ss   11:34   0:00 postgres: ser
       <entry>Waiting while sending synchronization requests to the
        checkpointer, because the request queue is full.</entry>
      </row>
+     <row>
+      <entry><literal>SpinDelay</literal></entry>
+      <entry>Waiting while acquiring a contended spinlock.</entry>
+     </row>
      <row>
       <entry><literal>VacuumDelay</literal></entry>
       <entry>Waiting in a cost-based vacuum delay point.</entry>
index 4e473ec27ec75708dccb8cb2b4ea908392f637b1..c3227b11abc8673494424e6f1e62fe73deca3717 100644 (file)
@@ -53,6 +53,7 @@
 #include "common/pg_prng.h"
 #include "port/atomics.h"
 #include "storage/s_lock.h"
+#include "utils/wait_event.h"
 
 #define MIN_SPINS_PER_DELAY 10
 #define MAX_SPINS_PER_DELAY 1000
@@ -136,7 +137,17 @@ perform_spin_delay(SpinDelayStatus *status)
        if (status->cur_delay == 0) /* first time to delay? */
            status->cur_delay = MIN_DELAY_USEC;
 
+       /*
+        * Once we start sleeping, the overhead of reporting a wait event is
+        * justified. Actively spinning easily stands out in profilers, but
+        * sleeping with an exponential backoff is harder to spot...
+        *
+        * We might want to report something more granular at some point, but
+        * this is better than nothing.
+        */
+       pgstat_report_wait_start(WAIT_EVENT_SPIN_DELAY);
        pg_usleep(status->cur_delay);
+       pgstat_report_wait_end();
 
 #if defined(S_LOCK_TEST)
        fprintf(stdout, "*");
index 92f24a6c9bc418d71e44c8b620246fa75267241a..b2abd75ddbd61435972f98f527c139ea6a1a8919 100644 (file)
@@ -497,6 +497,9 @@ pgstat_get_wait_timeout(WaitEventTimeout w)
        case WAIT_EVENT_REGISTER_SYNC_REQUEST:
            event_name = "RegisterSyncRequest";
            break;
+       case WAIT_EVENT_SPIN_DELAY:
+           event_name = "SpinDelay";
+           break;
        case WAIT_EVENT_VACUUM_DELAY:
            event_name = "VacuumDelay";
            break;
index 6f2d5612e062e43fbb9b2d2afd1a4da5b929be7f..0b2100be4a270a24a3a1c8e8782696dc8ac7a4a7 100644 (file)
@@ -145,6 +145,7 @@ typedef enum
    WAIT_EVENT_RECOVERY_APPLY_DELAY,
    WAIT_EVENT_RECOVERY_RETRIEVE_RETRY_INTERVAL,
    WAIT_EVENT_REGISTER_SYNC_REQUEST,
+   WAIT_EVENT_SPIN_DELAY,
    WAIT_EVENT_VACUUM_DELAY,
    WAIT_EVENT_VACUUM_TRUNCATE
 } WaitEventTimeout;