From 4feba03d8b925c4cdda460927611d6b17fb01803 Mon Sep 17 00:00:00 2001 From: Michael Paquier Date: Tue, 21 Jan 2025 11:30:42 +0900 Subject: [PATCH] Rework handling of pending data for backend statistics 9aea73fc61d4 has added support for backend statistics, relying on PgStat_EntryRef->pending for its data pending for flush. This design lacks in flexibility, because the pending list does some memory allocation, making it unsuitable if incrementing counters in critical sections. Pending data of backend statistics is reworked so the implementation does not depend on PgStat_EntryRef->pending anymore, relying on a static area of memory to store the counters that are flushed when stats are reported to the pgstats dshash. An advantage of this approach is to allow the pending data to be manipulated in critical sections; some patches are under discussion and require that. The pending data is tracked by PendingBackendStats, local to pgstat_backend.c. Two routines are introduced to allow IO statistics to update the backend-side counters. have_static_pending_cb and flush_static_cb are used for the flush, instead of flush_pending_cb. Author: Bertrand Drouvot, Michael Paquier Discussion: https://postgr.es/m/66efowskppsns35v5u2m7k4sdnl7yoz5bo64tdjwq7r5lhplrz@y7dme5xwh2r5 --- src/backend/utils/activity/pgstat.c | 4 +- src/backend/utils/activity/pgstat_backend.c | 140 ++++++++++++------- src/backend/utils/activity/pgstat_io.c | 23 +-- src/backend/utils/activity/pgstat_relation.c | 4 +- src/include/pgstat.h | 9 ++ src/include/utils/pgstat_internal.h | 9 +- 6 files changed, 116 insertions(+), 73 deletions(-) diff --git a/src/backend/utils/activity/pgstat.c b/src/backend/utils/activity/pgstat.c index 402e8202bfa..3168b825e25 100644 --- a/src/backend/utils/activity/pgstat.c +++ b/src/backend/utils/activity/pgstat.c @@ -370,9 +370,9 @@ static const PgStat_KindInfo pgstat_kind_builtin_infos[PGSTAT_KIND_BUILTIN_SIZE] .shared_size = sizeof(PgStatShared_Backend), .shared_data_off = offsetof(PgStatShared_Backend, stats), .shared_data_len = sizeof(((PgStatShared_Backend *) 0)->stats), - .pending_size = sizeof(PgStat_BackendPending), - .flush_pending_cb = pgstat_backend_flush_cb, + .have_static_pending_cb = pgstat_backend_have_pending_cb, + .flush_static_cb = pgstat_backend_flush_cb, .reset_timestamp_cb = pgstat_backend_reset_timestamp_cb, }, diff --git a/src/backend/utils/activity/pgstat_backend.c b/src/backend/utils/activity/pgstat_backend.c index 79e4d0a3053..bcf9e4b1487 100644 --- a/src/backend/utils/activity/pgstat_backend.c +++ b/src/backend/utils/activity/pgstat_backend.c @@ -11,7 +11,9 @@ * This statistics kind uses a proc number as object ID for the hash table * of pgstats. Entries are created each time a process is spawned, and are * dropped when the process exits. These are not written to the pgstats file - * on disk. + * on disk. Pending statistics are managed without direct interactions with + * PgStat_EntryRef->pending, relying on PendingBackendStats instead so as it + * is possible to report data within critical sections. * * Copyright (c) 2001-2025, PostgreSQL Global Development Group * @@ -22,8 +24,49 @@ #include "postgres.h" +#include "storage/bufmgr.h" +#include "utils/memutils.h" #include "utils/pgstat_internal.h" +/* + * Backend statistics counts waiting to be flushed out. These counters may be + * reported within critical sections so we use static memory in order to avoid + * memory allocation. + */ +static PgStat_BackendPending PendingBackendStats; + +/* + * Utility routines to report I/O stats for backends, kept here to avoid + * exposing PendingBackendStats to the outside world. + */ +void +pgstat_count_backend_io_op_time(IOObject io_object, IOContext io_context, + IOOp io_op, instr_time io_time) +{ + Assert(track_io_timing); + + if (!pgstat_tracks_backend_bktype(MyBackendType)) + return; + + Assert(pgstat_tracks_io_op(MyBackendType, io_object, io_context, io_op)); + + INSTR_TIME_ADD(PendingBackendStats.pending_io.pending_times[io_object][io_context][io_op], + io_time); +} + +void +pgstat_count_backend_io_op(IOObject io_object, IOContext io_context, + IOOp io_op, uint32 cnt, uint64 bytes) +{ + if (!pgstat_tracks_backend_bktype(MyBackendType)) + return; + + Assert(pgstat_tracks_io_op(MyBackendType, io_object, io_context, io_op)); + + PendingBackendStats.pending_io.counts[io_object][io_context][io_op] += cnt; + PendingBackendStats.pending_io.bytes[io_object][io_context][io_op] += bytes; +} + /* * Returns statistics of a backend by proc number. */ @@ -46,14 +89,21 @@ static void pgstat_flush_backend_entry_io(PgStat_EntryRef *entry_ref) { PgStatShared_Backend *shbackendent; - PgStat_BackendPending *pendingent; PgStat_BktypeIO *bktype_shstats; - PgStat_PendingIO *pending_io; + PgStat_PendingIO pending_io; + + /* + * This function can be called even if nothing at all has happened for IO + * statistics. In this case, avoid unnecessarily modifying the stats + * entry. + */ + if (pg_memory_is_all_zeros(&PendingBackendStats.pending_io, + sizeof(struct PgStat_PendingIO))) + return; shbackendent = (PgStatShared_Backend *) entry_ref->shared_stats; - pendingent = (PgStat_BackendPending *) entry_ref->pending; bktype_shstats = &shbackendent->stats.io_stats; - pending_io = &pendingent->pending_io; + pending_io = PendingBackendStats.pending_io; for (int io_object = 0; io_object < IOOBJECT_NUM_TYPES; io_object++) { @@ -64,68 +114,74 @@ pgstat_flush_backend_entry_io(PgStat_EntryRef *entry_ref) instr_time time; bktype_shstats->counts[io_object][io_context][io_op] += - pending_io->counts[io_object][io_context][io_op]; + pending_io.counts[io_object][io_context][io_op]; bktype_shstats->bytes[io_object][io_context][io_op] += - pending_io->bytes[io_object][io_context][io_op]; - - time = pending_io->pending_times[io_object][io_context][io_op]; + pending_io.bytes[io_object][io_context][io_op]; + time = pending_io.pending_times[io_object][io_context][io_op]; bktype_shstats->times[io_object][io_context][io_op] += INSTR_TIME_GET_MICROSEC(time); } } } + + /* + * Clear out the statistics buffer, so it can be re-used. + */ + MemSet(&PendingBackendStats.pending_io, 0, sizeof(PgStat_PendingIO)); } /* - * Wrapper routine to flush backend statistics. + * Flush out locally pending backend statistics + * + * "flags" parameter controls which statistics to flush. Returns true + * if some statistics could not be flushed due to lock contention. */ -static bool -pgstat_flush_backend_entry(PgStat_EntryRef *entry_ref, bool nowait, - bits32 flags) +bool +pgstat_flush_backend(bool nowait, bits32 flags) { + PgStat_EntryRef *entry_ref; + if (!pgstat_tracks_backend_bktype(MyBackendType)) return false; - if (!pgstat_lock_entry(entry_ref, nowait)) + if (pg_memory_is_all_zeros(&PendingBackendStats, + sizeof(struct PgStat_BackendPending))) return false; + entry_ref = pgstat_get_entry_ref_locked(PGSTAT_KIND_BACKEND, InvalidOid, + MyProcNumber, nowait); + if (!entry_ref) + return true; + /* Flush requested statistics */ if (flags & PGSTAT_BACKEND_FLUSH_IO) pgstat_flush_backend_entry_io(entry_ref); pgstat_unlock_entry(entry_ref); - return true; + return false; } /* - * Callback to flush out locally pending backend statistics. - * - * If no stats have been recorded, this function returns false. + * Check if there are any backend stats waiting for flush. */ bool -pgstat_backend_flush_cb(PgStat_EntryRef *entry_ref, bool nowait) +pgstat_backend_have_pending_cb(void) { - return pgstat_flush_backend_entry(entry_ref, nowait, PGSTAT_BACKEND_FLUSH_ALL); + return (!pg_memory_is_all_zeros(&PendingBackendStats, + sizeof(struct PgStat_BackendPending))); } /* - * Flush out locally pending backend statistics + * Callback to flush out locally pending backend statistics. * - * "flags" parameter controls which statistics to flush. + * If some stats could not be flushed due to lock contention, return true. */ -void -pgstat_flush_backend(bool nowait, bits32 flags) +bool +pgstat_backend_flush_cb(bool nowait) { - PgStat_EntryRef *entry_ref; - - if (!pgstat_tracks_backend_bktype(MyBackendType)) - return; - - entry_ref = pgstat_get_entry_ref(PGSTAT_KIND_BACKEND, InvalidOid, - MyProcNumber, false, NULL); - (void) pgstat_flush_backend_entry(entry_ref, nowait, flags); + return pgstat_flush_backend(nowait, PGSTAT_BACKEND_FLUSH_ALL); } /* @@ -137,9 +193,8 @@ pgstat_create_backend(ProcNumber procnum) PgStat_EntryRef *entry_ref; PgStatShared_Backend *shstatent; - entry_ref = pgstat_prep_pending_entry(PGSTAT_KIND_BACKEND, InvalidOid, - procnum, NULL); - + entry_ref = pgstat_get_entry_ref_locked(PGSTAT_KIND_BACKEND, InvalidOid, + MyProcNumber, false); shstatent = (PgStatShared_Backend *) entry_ref->shared_stats; /* @@ -147,20 +202,9 @@ pgstat_create_backend(ProcNumber procnum) * e.g. if we previously used this proc number. */ memset(&shstatent->stats, 0, sizeof(shstatent->stats)); -} - -/* - * Find or create a local PgStat_BackendPending entry for proc number. - */ -PgStat_BackendPending * -pgstat_prep_backend_pending(ProcNumber procnum) -{ - PgStat_EntryRef *entry_ref; - - entry_ref = pgstat_prep_pending_entry(PGSTAT_KIND_BACKEND, InvalidOid, - procnum, NULL); + pgstat_unlock_entry(entry_ref); - return entry_ref->pending; + MemSet(&PendingBackendStats, 0, sizeof(PgStat_BackendPending)); } /* diff --git a/src/backend/utils/activity/pgstat_io.c b/src/backend/utils/activity/pgstat_io.c index 027aad8b24e..6ff5d9e96a1 100644 --- a/src/backend/utils/activity/pgstat_io.c +++ b/src/backend/utils/activity/pgstat_io.c @@ -73,18 +73,12 @@ pgstat_count_io_op(IOObject io_object, IOContext io_context, IOOp io_op, Assert(pgstat_is_ioop_tracked_in_bytes(io_op) || bytes == 0); Assert(pgstat_tracks_io_op(MyBackendType, io_object, io_context, io_op)); - if (pgstat_tracks_backend_bktype(MyBackendType)) - { - PgStat_BackendPending *entry_ref; - - entry_ref = pgstat_prep_backend_pending(MyProcNumber); - entry_ref->pending_io.counts[io_object][io_context][io_op] += cnt; - entry_ref->pending_io.bytes[io_object][io_context][io_op] += bytes; - } - PendingIOStats.counts[io_object][io_context][io_op] += cnt; PendingIOStats.bytes[io_object][io_context][io_op] += bytes; + /* Add the per-backend counts */ + pgstat_count_backend_io_op(io_object, io_context, io_op, cnt, bytes); + have_iostats = true; } @@ -145,14 +139,9 @@ pgstat_count_io_op_time(IOObject io_object, IOContext io_context, IOOp io_op, INSTR_TIME_ADD(PendingIOStats.pending_times[io_object][io_context][io_op], io_time); - if (pgstat_tracks_backend_bktype(MyBackendType)) - { - PgStat_BackendPending *entry_ref; - - entry_ref = pgstat_prep_backend_pending(MyProcNumber); - INSTR_TIME_ADD(entry_ref->pending_io.pending_times[io_object][io_context][io_op], - io_time); - } + /* Add the per-backend count */ + pgstat_count_backend_io_op_time(io_object, io_context, io_op, + io_time); } pgstat_count_io_op(io_object, io_context, io_op, cnt, bytes); diff --git a/src/backend/utils/activity/pgstat_relation.c b/src/backend/utils/activity/pgstat_relation.c index 09247ba0971..965a7fe2c64 100644 --- a/src/backend/utils/activity/pgstat_relation.c +++ b/src/backend/utils/activity/pgstat_relation.c @@ -264,7 +264,7 @@ pgstat_report_vacuum(Oid tableoid, bool shared, * VACUUM command has processed all tables and committed. */ pgstat_flush_io(false); - pgstat_flush_backend(false, PGSTAT_BACKEND_FLUSH_IO); + (void) pgstat_flush_backend(false, PGSTAT_BACKEND_FLUSH_IO); } /* @@ -351,7 +351,7 @@ pgstat_report_analyze(Relation rel, /* see pgstat_report_vacuum() */ pgstat_flush_io(false); - pgstat_flush_backend(false, PGSTAT_BACKEND_FLUSH_IO); + (void) pgstat_flush_backend(false, PGSTAT_BACKEND_FLUSH_IO); } /* diff --git a/src/include/pgstat.h b/src/include/pgstat.h index 2d40fe3e70f..d0d45150977 100644 --- a/src/include/pgstat.h +++ b/src/include/pgstat.h @@ -540,6 +540,15 @@ extern PgStat_ArchiverStats *pgstat_fetch_stat_archiver(void); * Functions in pgstat_backend.c */ +/* used by pgstat_io.c for I/O stats tracked in backends */ +extern void pgstat_count_backend_io_op_time(IOObject io_object, + IOContext io_context, + IOOp io_op, + instr_time io_time); +extern void pgstat_count_backend_io_op(IOObject io_object, + IOContext io_context, + IOOp io_op, uint32 cnt, + uint64 bytes); extern PgStat_Backend *pgstat_fetch_stat_backend(ProcNumber procNumber); extern bool pgstat_tracks_backend_bktype(BackendType bktype); extern void pgstat_create_backend(ProcNumber procnum); diff --git a/src/include/utils/pgstat_internal.h b/src/include/utils/pgstat_internal.h index 8914aaca9ab..a3d39d2b725 100644 --- a/src/include/utils/pgstat_internal.h +++ b/src/include/utils/pgstat_internal.h @@ -624,10 +624,11 @@ extern void pgstat_archiver_snapshot_cb(void); #define PGSTAT_BACKEND_FLUSH_IO (1 << 0) /* Flush I/O statistics */ #define PGSTAT_BACKEND_FLUSH_ALL (PGSTAT_BACKEND_FLUSH_IO) -extern void pgstat_flush_backend(bool nowait, bits32 flags); -extern PgStat_BackendPending *pgstat_prep_backend_pending(ProcNumber procnum); -extern bool pgstat_backend_flush_cb(PgStat_EntryRef *entry_ref, bool nowait); -extern void pgstat_backend_reset_timestamp_cb(PgStatShared_Common *header, TimestampTz ts); +extern bool pgstat_flush_backend(bool nowait, bits32 flags); +extern bool pgstat_backend_flush_cb(bool nowait); +extern bool pgstat_backend_have_pending_cb(void); +extern void pgstat_backend_reset_timestamp_cb(PgStatShared_Common *header, + TimestampTz ts); /* * Functions in pgstat_bgwriter.c -- 2.30.2