From 2fd2effc50824a8775a088435a13f47b7a6f3b94 Mon Sep 17 00:00:00 2001 From: Robert Haas Date: Wed, 17 Jun 2020 11:39:17 -0400 Subject: [PATCH] Improve server code to read files as part of a base backup. Don't use fread(), since that doesn't necessarily set errno. We could use read() instead, but it's even better to use pg_pread(), which allows us to avoid some extra calls to seek to the desired location in the file. Also, advertise a wait event while reading from a file, as we do for most other places where we're reading data from files. Patch by me, reviewed by Hamid Akhtar. Discussion: http://postgr.es/m/CA+TgmobBw-3573vMosGj06r72ajHsYeKtksT_oTxH8XvTL7DxA@mail.gmail.com --- doc/src/sgml/monitoring.sgml | 4 + src/backend/postmaster/pgstat.c | 3 + src/backend/replication/basebackup.c | 143 ++++++++++++++------------- src/include/pgstat.h | 3 +- 4 files changed, 86 insertions(+), 67 deletions(-) diff --git a/doc/src/sgml/monitoring.sgml b/doc/src/sgml/monitoring.sgml index 89662cc0a36..dfa9d0d6410 100644 --- a/doc/src/sgml/monitoring.sgml +++ b/doc/src/sgml/monitoring.sgml @@ -1193,6 +1193,10 @@ postgres 27093 0.0 0.0 30096 2752 ? Ss 11:34 0:00 postgres: ser + + BaseBackupRead + Waiting for base backup to read from a file. + BufFileRead Waiting for a read from a buffered file. diff --git a/src/backend/postmaster/pgstat.c b/src/backend/postmaster/pgstat.c index e96134dac8a..c022597bc09 100644 --- a/src/backend/postmaster/pgstat.c +++ b/src/backend/postmaster/pgstat.c @@ -3931,6 +3931,9 @@ pgstat_get_wait_io(WaitEventIO w) switch (w) { + case WAIT_EVENT_BASEBACKUP_READ: + event_name = "BaseBackupRead"; + break; case WAIT_EVENT_BUFFILE_READ: event_name = "BufFileRead"; break; diff --git a/src/backend/replication/basebackup.c b/src/backend/replication/basebackup.c index efcf1e6eb56..096b0fcef0d 100644 --- a/src/backend/replication/basebackup.c +++ b/src/backend/replication/basebackup.c @@ -81,6 +81,8 @@ static int compareWalFileNames(const ListCell *a, const ListCell *b); static void throttle(size_t increment); static void update_basebackup_progress(int64 delta); static bool is_checksummed_file(const char *fullpath, const char *filename); +static int basebackup_read_file(int fd, char *buf, size_t nbytes, off_t offset, + const char *filename, bool partial_read_ok); /* Was the backup currently in-progress initiated in recovery mode? */ static bool backup_started_in_recovery = false; @@ -98,18 +100,6 @@ static char *statrelpath = NULL; */ #define THROTTLING_FREQUENCY 8 -/* - * Checks whether we encountered any error in fread(). fread() doesn't give - * any clue what has happened, so we check with ferror(). Also, neither - * fread() nor ferror() set errno, so we just throw a generic error. - */ -#define CHECK_FREAD_ERROR(fp, filename) \ -do { \ - if (ferror(fp)) \ - ereport(ERROR, \ - (errmsg("could not read from file \"%s\"", filename))); \ -} while (0) - /* The actual number of bytes, transfer of which may cause sleep. */ static uint64 throttling_sample; @@ -600,7 +590,7 @@ perform_base_backup(basebackup_options *opt) foreach(lc, walFileList) { char *walFileName = (char *) lfirst(lc); - FILE *fp; + int fd; char buf[TAR_SEND_SIZE]; size_t cnt; pgoff_t len = 0; @@ -608,8 +598,8 @@ perform_base_backup(basebackup_options *opt) snprintf(pathbuf, MAXPGPATH, XLOGDIR "/%s", walFileName); XLogFromFileName(walFileName, &tli, &segno, wal_segment_size); - fp = AllocateFile(pathbuf, "rb"); - if (fp == NULL) + fd = OpenTransientFile(pathbuf, O_RDONLY | PG_BINARY); + if (fd < 0) { int save_errno = errno; @@ -626,7 +616,7 @@ perform_base_backup(basebackup_options *opt) errmsg("could not open file \"%s\": %m", pathbuf))); } - if (fstat(fileno(fp), &statbuf) != 0) + if (fstat(fd, &statbuf) != 0) ereport(ERROR, (errcode_for_file_access(), errmsg("could not stat file \"%s\": %m", @@ -642,9 +632,10 @@ perform_base_backup(basebackup_options *opt) /* send the WAL file itself */ _tarWriteHeader(pathbuf, NULL, &statbuf, false); - while ((cnt = fread(buf, 1, - Min(sizeof(buf), wal_segment_size - len), - fp)) > 0) + while ((cnt = basebackup_read_file(fd, buf, + Min(sizeof(buf), + wal_segment_size - len), + len, pathbuf, true)) > 0) { CheckXLogRemoved(segno, tli); /* Send the chunk as a CopyData message */ @@ -660,8 +651,6 @@ perform_base_backup(basebackup_options *opt) break; } - CHECK_FREAD_ERROR(fp, pathbuf); - if (len != wal_segment_size) { CheckXLogRemoved(segno, tli); @@ -676,7 +665,7 @@ perform_base_backup(basebackup_options *opt) */ Assert(wal_segment_size % TAR_BLOCK_SIZE == 0); - FreeFile(fp); + CloseTransientFile(fd); /* * Mark file as archived, otherwise files can get archived again @@ -1575,7 +1564,7 @@ sendFile(const char *readfilename, const char *tarfilename, struct stat *statbuf, bool missing_ok, Oid dboid, backup_manifest_info *manifest, const char *spcoid) { - FILE *fp; + int fd; BlockNumber blkno = 0; bool block_retry = false; char buf[TAR_SEND_SIZE]; @@ -1594,8 +1583,8 @@ sendFile(const char *readfilename, const char *tarfilename, pg_checksum_init(&checksum_ctx, manifest->checksum_type); - fp = AllocateFile(readfilename, "rb"); - if (fp == NULL) + fd = OpenTransientFile(readfilename, O_RDONLY | PG_BINARY); + if (fd < 0) { if (errno == ENOENT && missing_ok) return false; @@ -1637,8 +1626,27 @@ sendFile(const char *readfilename, const char *tarfilename, } } - while ((cnt = fread(buf, 1, Min(sizeof(buf), statbuf->st_size - len), fp)) > 0) + /* + * Loop until we read the amount of data the caller told us to expect. The + * file could be longer, if it was extended while we were sending it, but + * for a base backup we can ignore such extended data. It will be restored + * from WAL. + */ + while (len < statbuf->st_size) { + /* Try to read some more data. */ + cnt = basebackup_read_file(fd, buf, + Min(sizeof(buf), statbuf->st_size - len), + len, readfilename, true); + + /* + * If we hit end-of-file, a concurrent truncation must have occurred. + * That's not an error condition, because WAL replay will fix things + * up. + */ + if (cnt == 0) + break; + /* * The checksums are verified at block level, so we iterate over the * buffer in chunks of BLCKSZ, after making sure that @@ -1689,16 +1697,15 @@ sendFile(const char *readfilename, const char *tarfilename, */ if (block_retry == false) { - /* Reread the failed block */ - if (fseek(fp, -(cnt - BLCKSZ * i), SEEK_CUR) == -1) - { - ereport(ERROR, - (errcode_for_file_access(), - errmsg("could not fseek in file \"%s\": %m", - readfilename))); - } + int reread_cnt; - if (fread(buf + BLCKSZ * i, 1, BLCKSZ, fp) != BLCKSZ) + /* Reread the failed block */ + reread_cnt = + basebackup_read_file(fd, buf + BLCKSZ * i, + BLCKSZ, len + BLCKSZ * i, + readfilename, + false); + if (reread_cnt == 0) { /* * If we hit end-of-file, a concurrent @@ -1708,24 +1715,8 @@ sendFile(const char *readfilename, const char *tarfilename, * code that handles that case. (We must fix * up cnt first, though.) */ - if (feof(fp)) - { - cnt = BLCKSZ * i; - break; - } - - ereport(ERROR, - (errcode_for_file_access(), - errmsg("could not reread block %d of file \"%s\": %m", - blkno, readfilename))); - } - - if (fseek(fp, cnt - BLCKSZ * i - BLCKSZ, SEEK_CUR) == -1) - { - ereport(ERROR, - (errcode_for_file_access(), - errmsg("could not fseek in file \"%s\": %m", - readfilename))); + cnt = BLCKSZ * i; + break; } /* Set flag so we know a retry was attempted */ @@ -1768,20 +1759,8 @@ sendFile(const char *readfilename, const char *tarfilename, len += cnt; throttle(cnt); - - if (feof(fp) || len >= statbuf->st_size) - { - /* - * Reached end of file. The file could be longer, if it was - * extended while we were sending it, but for a base backup we can - * ignore such extended data. It will be restored from WAL. - */ - break; - } } - CHECK_FREAD_ERROR(fp, readfilename); - /* If the file was truncated while we were sending it, pad it with zeros */ if (len < statbuf->st_size) { @@ -1810,7 +1789,7 @@ sendFile(const char *readfilename, const char *tarfilename, update_basebackup_progress(pad); } - FreeFile(fp); + CloseTransientFile(fd); if (checksum_failures > 1) { @@ -1996,3 +1975,35 @@ update_basebackup_progress(int64 delta) pgstat_progress_update_multi_param(nparam, index, val); } + +/* + * Read some data from a file, setting a wait event and reporting any error + * encountered. + * + * If partial_read_ok is false, also report an error if the number of bytes + * read is not equal to the number of bytes requested. + * + * Returns the number of bytes read. + */ +static int +basebackup_read_file(int fd, char *buf, size_t nbytes, off_t offset, + const char *filename, bool partial_read_ok) +{ + int rc; + + pgstat_report_wait_start(WAIT_EVENT_BASEBACKUP_READ); + rc = pg_pread(fd, buf, nbytes, offset); + pgstat_report_wait_end(); + + if (rc < 0) + ereport(ERROR, + (errcode_for_file_access(), + errmsg("could not read file \"%s\": %m", filename))); + if (!partial_read_ok && rc > 0 && rc != nbytes) + ereport(ERROR, + (errcode_for_file_access(), + errmsg("could not read file \"%s\": read %d of %zu", + filename, rc, nbytes))); + + return rc; +} diff --git a/src/include/pgstat.h b/src/include/pgstat.h index c55dc1481ca..13872013823 100644 --- a/src/include/pgstat.h +++ b/src/include/pgstat.h @@ -913,7 +913,8 @@ typedef enum */ typedef enum { - WAIT_EVENT_BUFFILE_READ = PG_WAIT_IO, + WAIT_EVENT_BASEBACKUP_READ = PG_WAIT_IO, + WAIT_EVENT_BUFFILE_READ, WAIT_EVENT_BUFFILE_WRITE, WAIT_EVENT_CONTROL_FILE_READ, WAIT_EVENT_CONTROL_FILE_SYNC, -- 2.39.5