From 8c16ad3b43299695f203f9157a2b27c22b9ed634 Mon Sep 17 00:00:00 2001 From: Nathan Bossart Date: Wed, 6 Sep 2023 16:27:16 -0700 Subject: [PATCH] Allow using syncfs() in frontend utilities. This commit allows specifying a --sync-method in several frontend utilities that must synchronize many files to disk (initdb, pg_basebackup, pg_checksums, pg_dump, pg_rewind, and pg_upgrade). On Linux, users can specify "syncfs" to synchronize the relevant file systems instead of calling fsync() for every single file. In many cases, using syncfs() is much faster. As with recovery_init_sync_method, this new option comes with some caveats. The descriptions of these caveats have been moved to a new appendix section in the documentation. Co-authored-by: Justin Pryzby Reviewed-by: Michael Paquier, Thomas Munro, Robert Haas, Justin Pryzby Discussion: https://postgr.es/m/20210930004340.GM831%40telsasoft.com --- doc/src/sgml/config.sgml | 12 +++------ doc/src/sgml/filelist.sgml | 1 + doc/src/sgml/postgres.sgml | 1 + doc/src/sgml/ref/initdb.sgml | 22 ++++++++++++++++ doc/src/sgml/ref/pg_basebackup.sgml | 25 +++++++++++++++++++ doc/src/sgml/ref/pg_checksums.sgml | 22 ++++++++++++++++ doc/src/sgml/ref/pg_dump.sgml | 21 ++++++++++++++++ doc/src/sgml/ref/pg_rewind.sgml | 22 ++++++++++++++++ doc/src/sgml/ref/pgupgrade.sgml | 23 +++++++++++++++++ doc/src/sgml/syncfs.sgml | 36 +++++++++++++++++++++++++++ src/bin/initdb/initdb.c | 6 +++++ src/bin/initdb/t/001_initdb.pl | 12 +++++++++ src/bin/pg_basebackup/pg_basebackup.c | 7 ++++++ src/bin/pg_checksums/pg_checksums.c | 6 +++++ src/bin/pg_dump/pg_dump.c | 7 ++++++ src/bin/pg_rewind/pg_rewind.c | 8 ++++++ src/bin/pg_upgrade/option.c | 13 ++++++++++ src/bin/pg_upgrade/pg_upgrade.c | 6 +++-- src/bin/pg_upgrade/pg_upgrade.h | 1 + src/fe_utils/option_utils.c | 27 ++++++++++++++++++++ src/include/fe_utils/option_utils.h | 4 +++ 21 files changed, 271 insertions(+), 11 deletions(-) create mode 100644 doc/src/sgml/syncfs.sgml diff --git a/doc/src/sgml/config.sgml b/doc/src/sgml/config.sgml index f0a50a5f9ad..6bc1b215db9 100644 --- a/doc/src/sgml/config.sgml +++ b/doc/src/sgml/config.sgml @@ -10511,15 +10511,9 @@ dynamic_library_path = 'C:\tools\postgresql;H:\my_project\lib;$libdir' On Linux, syncfs may be used instead, to ask the operating system to synchronize the whole file systems that contain the data directory, the WAL files and each tablespace (but not any other - file systems that may be reachable through symbolic links). This may - be a lot faster than the fsync setting, because it - doesn't need to open each file one by one. On the other hand, it may - be slower if a file system is shared by other applications that - modify a lot of files, since those files will also be written to disk. - Furthermore, on versions of Linux before 5.8, I/O errors encountered - while writing data to disk may not be reported to - PostgreSQL, and relevant error messages may - appear only in kernel logs. + file systems that may be reachable through symbolic links). See + for more information about using + syncfs(). This parameter can only be set in the diff --git a/doc/src/sgml/filelist.sgml b/doc/src/sgml/filelist.sgml index e3d94a62b3f..4c63a7e7689 100644 --- a/doc/src/sgml/filelist.sgml +++ b/doc/src/sgml/filelist.sgml @@ -183,6 +183,7 @@ + diff --git a/doc/src/sgml/postgres.sgml b/doc/src/sgml/postgres.sgml index 2e271862fc1..f629524be07 100644 --- a/doc/src/sgml/postgres.sgml +++ b/doc/src/sgml/postgres.sgml @@ -294,6 +294,7 @@ break is not needed in a wider output rendering. &acronyms; &glossary; &color; + &syncfs; &obsolete; diff --git a/doc/src/sgml/ref/initdb.sgml b/doc/src/sgml/ref/initdb.sgml index 22f1011781f..8a09c5c4388 100644 --- a/doc/src/sgml/ref/initdb.sgml +++ b/doc/src/sgml/ref/initdb.sgml @@ -365,6 +365,28 @@ PostgreSQL documentation + + + + + When set to fsync, which is the default, + initdb will recursively open and synchronize all + files in the data directory. The search for files will follow symbolic + links for the WAL directory and each configured tablespace. + + + On Linux, syncfs may be used instead to ask the + operating system to synchronize the whole file systems that contain the + data directory, the WAL files, and each tablespace. See + for more information about using + syncfs(). + + + This option has no effect when is used. + + + + diff --git a/doc/src/sgml/ref/pg_basebackup.sgml b/doc/src/sgml/ref/pg_basebackup.sgml index 79d3e657c32..d2b8ddd200c 100644 --- a/doc/src/sgml/ref/pg_basebackup.sgml +++ b/doc/src/sgml/ref/pg_basebackup.sgml @@ -594,6 +594,31 @@ PostgreSQL documentation + + + + + When set to fsync, which is the default, + pg_basebackup will recursively open and synchronize + all files in the backup directory. When the plain format is used, the + search for files will follow symbolic links for the WAL directory and + each configured tablespace. + + + On Linux, syncfs may be used instead to ask the + operating system to synchronize the whole file system that contains the + backup directory. When the plain format is used, + pg_basebackup will also synchronize the file systems + that contain the WAL files and each tablespace. See + for more information about using + syncfs(). + + + This option has no effect when is used. + + + + diff --git a/doc/src/sgml/ref/pg_checksums.sgml b/doc/src/sgml/ref/pg_checksums.sgml index a3d0b0f0a3d..7b44ba71cf9 100644 --- a/doc/src/sgml/ref/pg_checksums.sgml +++ b/doc/src/sgml/ref/pg_checksums.sgml @@ -139,6 +139,28 @@ PostgreSQL documentation + + + + + When set to fsync, which is the default, + pg_checksums will recursively open and synchronize + all files in the data directory. The search for files will follow + symbolic links for the WAL directory and each configured tablespace. + + + On Linux, syncfs may be used instead to ask the + operating system to synchronize the whole file systems that contain the + data directory, the WAL files, and each tablespace. See + for more information about using + syncfs(). + + + This option has no effect when is used. + + + + diff --git a/doc/src/sgml/ref/pg_dump.sgml b/doc/src/sgml/ref/pg_dump.sgml index a3cf0608f5b..c1e2220b3cb 100644 --- a/doc/src/sgml/ref/pg_dump.sgml +++ b/doc/src/sgml/ref/pg_dump.sgml @@ -1179,6 +1179,27 @@ PostgreSQL documentation + + + + + When set to fsync, which is the default, + pg_dump --format=directory will recursively open and + synchronize all files in the archive directory. + + + On Linux, syncfs may be used instead to ask the + operating system to synchronize the whole file system that contains the + archive directory. See for more information + about using syncfs(). + + + This option has no effect when is used or + is not set to directory. + + + + diff --git a/doc/src/sgml/ref/pg_rewind.sgml b/doc/src/sgml/ref/pg_rewind.sgml index 15cddd086b7..80dff161682 100644 --- a/doc/src/sgml/ref/pg_rewind.sgml +++ b/doc/src/sgml/ref/pg_rewind.sgml @@ -284,6 +284,28 @@ PostgreSQL documentation + + + + + When set to fsync, which is the default, + pg_rewind will recursively open and synchronize all + files in the data directory. The search for files will follow symbolic + links for the WAL directory and each configured tablespace. + + + On Linux, syncfs may be used instead to ask the + operating system to synchronize the whole file systems that contain the + data directory, the WAL files, and each tablespace. See + for more information about using + syncfs(). + + + This option has no effect when is used. + + + + diff --git a/doc/src/sgml/ref/pgupgrade.sgml b/doc/src/sgml/ref/pgupgrade.sgml index 7816b4c6859..bea0d1b93f9 100644 --- a/doc/src/sgml/ref/pgupgrade.sgml +++ b/doc/src/sgml/ref/pgupgrade.sgml @@ -190,6 +190,29 @@ PostgreSQL documentation variable PGSOCKETDIR + + + + + When set to fsync, which is the default, + pg_upgrade will recursively open and synchronize all + files in the upgraded cluster's data directory. The search for files + will follow symbolic links for the WAL directory and each configured + tablespace. + + + On Linux, syncfs may be used instead to ask the + operating system to synchronize the whole file systems that contain the + upgraded cluster's data directory, its WAL files, and each tablespace. + See for more information about using + syncfs(). + + + This option has no effect when is used. + + + + username username diff --git a/doc/src/sgml/syncfs.sgml b/doc/src/sgml/syncfs.sgml new file mode 100644 index 00000000000..00457d24579 --- /dev/null +++ b/doc/src/sgml/syncfs.sgml @@ -0,0 +1,36 @@ + + + + <function>syncfs()</function> Caveats + + + syncfs + + + + On Linux syncfs() may be specified for some + configuration parameters (e.g., + ), server applications (e.g., + pg_upgrade), and client applications (e.g., + pg_basebackup) that involve synchronizing many + files to disk. syncfs() is advantageous in many cases, + but there are some trade-offs to keep in mind. + + + + Since syncfs() instructs the operating system to + synchronize a whole file system, it typically requires many fewer system + calls than using fsync() to synchronize each file one by + one. Therefore, using syncfs() may be a lot faster than + using fsync(). However, it may be slower if a file + system is shared by other applications that modify a lot of files, since + those files will also be written to disk. + + + + Furthermore, on versions of Linux before 5.8, I/O errors encountered while + writing data to disk may not be reported to the calling program, and relevant + error messages may appear only in kernel logs. + + + diff --git a/src/bin/initdb/initdb.c b/src/bin/initdb/initdb.c index 51198e66655..bddb30d766c 100644 --- a/src/bin/initdb/initdb.c +++ b/src/bin/initdb/initdb.c @@ -2467,6 +2467,7 @@ usage(const char *progname) printf(_(" -N, --no-sync do not wait for changes to be written safely to disk\n")); printf(_(" --no-instructions do not print instructions for next steps\n")); printf(_(" -s, --show show internal settings\n")); + printf(_(" --sync-method=METHOD set method for syncing files to disk\n")); printf(_(" -S, --sync-only only sync database files to disk, then exit\n")); printf(_("\nOther options:\n")); printf(_(" -V, --version output version information, then exit\n")); @@ -3107,6 +3108,7 @@ main(int argc, char *argv[]) {"locale-provider", required_argument, NULL, 15}, {"icu-locale", required_argument, NULL, 16}, {"icu-rules", required_argument, NULL, 17}, + {"sync-method", required_argument, NULL, 18}, {NULL, 0, NULL, 0} }; @@ -3287,6 +3289,10 @@ main(int argc, char *argv[]) case 17: icu_rules = pg_strdup(optarg); break; + case 18: + if (!parse_sync_method(optarg, &sync_method)) + exit(1); + break; default: /* getopt_long already emitted a complaint */ pg_log_error_hint("Try \"%s --help\" for more information.", progname); diff --git a/src/bin/initdb/t/001_initdb.pl b/src/bin/initdb/t/001_initdb.pl index 2d7469d2fc3..45f96cd8bbf 100644 --- a/src/bin/initdb/t/001_initdb.pl +++ b/src/bin/initdb/t/001_initdb.pl @@ -16,6 +16,7 @@ use Test::More; my $tempdir = PostgreSQL::Test::Utils::tempdir; my $xlogdir = "$tempdir/pgxlog"; my $datadir = "$tempdir/data"; +my $supports_syncfs = check_pg_config("#define HAVE_SYNCFS 1"); program_help_ok('initdb'); program_version_ok('initdb'); @@ -82,6 +83,17 @@ command_fails([ 'pg_checksums', '-D', $datadir ], command_ok([ 'initdb', '-S', $datadir ], 'sync only'); command_fails([ 'initdb', $datadir ], 'existing data directory'); +if ($supports_syncfs) +{ + command_ok([ 'initdb', '-S', $datadir, '--sync-method', 'syncfs' ], + 'sync method syncfs'); +} +else +{ + command_fails([ 'initdb', '-S', $datadir, '--sync-method', 'syncfs' ], + 'sync method syncfs'); +} + # Check group access on PGDATA SKIP: { diff --git a/src/bin/pg_basebackup/pg_basebackup.c b/src/bin/pg_basebackup/pg_basebackup.c index e9033af5c03..74f5332e956 100644 --- a/src/bin/pg_basebackup/pg_basebackup.c +++ b/src/bin/pg_basebackup/pg_basebackup.c @@ -425,6 +425,8 @@ usage(void) printf(_(" --no-slot prevent creation of temporary replication slot\n")); printf(_(" --no-verify-checksums\n" " do not verify checksums\n")); + printf(_(" --sync-method=METHOD\n" + " set method for syncing files to disk\n")); printf(_(" -?, --help show this help, then exit\n")); printf(_("\nConnection options:\n")); printf(_(" -d, --dbname=CONNSTR connection string\n")); @@ -2282,6 +2284,7 @@ main(int argc, char **argv) {"no-manifest", no_argument, NULL, 5}, {"manifest-force-encode", no_argument, NULL, 6}, {"manifest-checksums", required_argument, NULL, 7}, + {"sync-method", required_argument, NULL, 8}, {NULL, 0, NULL, 0} }; int c; @@ -2453,6 +2456,10 @@ main(int argc, char **argv) case 7: manifest_checksums = pg_strdup(optarg); break; + case 8: + if (!parse_sync_method(optarg, &sync_method)) + exit(1); + break; default: /* getopt_long already emitted a complaint */ pg_log_error_hint("Try \"%s --help\" for more information.", progname); diff --git a/src/bin/pg_checksums/pg_checksums.c b/src/bin/pg_checksums/pg_checksums.c index 836ee654059..e009ba5e0bc 100644 --- a/src/bin/pg_checksums/pg_checksums.c +++ b/src/bin/pg_checksums/pg_checksums.c @@ -78,6 +78,7 @@ usage(void) printf(_(" -f, --filenode=FILENODE check only relation with specified filenode\n")); printf(_(" -N, --no-sync do not wait for changes to be written safely to disk\n")); printf(_(" -P, --progress show progress information\n")); + printf(_(" --sync-method=METHOD set method for syncing files to disk\n")); printf(_(" -v, --verbose output verbose messages\n")); printf(_(" -V, --version output version information, then exit\n")); printf(_(" -?, --help show this help, then exit\n")); @@ -436,6 +437,7 @@ main(int argc, char *argv[]) {"no-sync", no_argument, NULL, 'N'}, {"progress", no_argument, NULL, 'P'}, {"verbose", no_argument, NULL, 'v'}, + {"sync-method", required_argument, NULL, 1}, {NULL, 0, NULL, 0} }; @@ -494,6 +496,10 @@ main(int argc, char *argv[]) case 'v': verbose = true; break; + case 1: + if (!parse_sync_method(optarg, &sync_method)) + exit(1); + break; default: /* getopt_long already emitted a complaint */ pg_log_error_hint("Try \"%s --help\" for more information.", progname); diff --git a/src/bin/pg_dump/pg_dump.c b/src/bin/pg_dump/pg_dump.c index 280662bc332..f7b61766921 100644 --- a/src/bin/pg_dump/pg_dump.c +++ b/src/bin/pg_dump/pg_dump.c @@ -432,6 +432,7 @@ main(int argc, char **argv) {"table-and-children", required_argument, NULL, 12}, {"exclude-table-and-children", required_argument, NULL, 13}, {"exclude-table-data-and-children", required_argument, NULL, 14}, + {"sync-method", required_argument, NULL, 15}, {NULL, 0, NULL, 0} }; @@ -658,6 +659,11 @@ main(int argc, char **argv) optarg); break; + case 15: + if (!parse_sync_method(optarg, &sync_method)) + exit_nicely(1); + break; + default: /* getopt_long already emitted a complaint */ pg_log_error_hint("Try \"%s --help\" for more information.", progname); @@ -1069,6 +1075,7 @@ help(const char *progname) " compress as specified\n")); printf(_(" --lock-wait-timeout=TIMEOUT fail after waiting TIMEOUT for a table lock\n")); printf(_(" --no-sync do not wait for changes to be written safely to disk\n")); + printf(_(" --sync-method=METHOD set method for syncing files to disk\n")); printf(_(" -?, --help show this help, then exit\n")); printf(_("\nOptions controlling the output content:\n")); diff --git a/src/bin/pg_rewind/pg_rewind.c b/src/bin/pg_rewind/pg_rewind.c index bdfacf32632..bfd44a284e2 100644 --- a/src/bin/pg_rewind/pg_rewind.c +++ b/src/bin/pg_rewind/pg_rewind.c @@ -22,6 +22,7 @@ #include "common/file_perm.h" #include "common/restricted_token.h" #include "common/string.h" +#include "fe_utils/option_utils.h" #include "fe_utils/recovery_gen.h" #include "fe_utils/string_utils.h" #include "file_ops.h" @@ -108,6 +109,7 @@ usage(const char *progname) " file when running target cluster\n")); printf(_(" --debug write a lot of debug messages\n")); printf(_(" --no-ensure-shutdown do not automatically fix unclean shutdown\n")); + printf(_(" --sync-method=METHOD set method for syncing files to disk\n")); printf(_(" -V, --version output version information, then exit\n")); printf(_(" -?, --help show this help, then exit\n")); printf(_("\nReport bugs to <%s>.\n"), PACKAGE_BUGREPORT); @@ -132,6 +134,7 @@ main(int argc, char **argv) {"no-sync", no_argument, NULL, 'N'}, {"progress", no_argument, NULL, 'P'}, {"debug", no_argument, NULL, 3}, + {"sync-method", required_argument, NULL, 6}, {NULL, 0, NULL, 0} }; int option_index; @@ -219,6 +222,11 @@ main(int argc, char **argv) config_file = pg_strdup(optarg); break; + case 6: + if (!parse_sync_method(optarg, &sync_method)) + exit(1); + break; + default: /* getopt_long already emitted a complaint */ pg_log_error_hint("Try \"%s --help\" for more information.", progname); diff --git a/src/bin/pg_upgrade/option.c b/src/bin/pg_upgrade/option.c index 640361009e3..b9d900d0db4 100644 --- a/src/bin/pg_upgrade/option.c +++ b/src/bin/pg_upgrade/option.c @@ -14,6 +14,7 @@ #endif #include "common/string.h" +#include "fe_utils/option_utils.h" #include "getopt_long.h" #include "pg_upgrade.h" #include "utils/pidfile.h" @@ -57,12 +58,14 @@ parseCommandLine(int argc, char *argv[]) {"verbose", no_argument, NULL, 'v'}, {"clone", no_argument, NULL, 1}, {"copy", no_argument, NULL, 2}, + {"sync-method", required_argument, NULL, 3}, {NULL, 0, NULL, 0} }; int option; /* Command line option */ int optindex = 0; /* used by getopt_long */ int os_user_effective_id; + DataDirSyncMethod unused; user_opts.do_sync = true; user_opts.transfer_mode = TRANSFER_MODE_COPY; @@ -199,6 +202,12 @@ parseCommandLine(int argc, char *argv[]) user_opts.transfer_mode = TRANSFER_MODE_COPY; break; + case 3: + if (!parse_sync_method(optarg, &unused)) + exit(1); + user_opts.sync_method = pg_strdup(optarg); + break; + default: fprintf(stderr, _("Try \"%s --help\" for more information.\n"), os_info.progname); @@ -209,6 +218,9 @@ parseCommandLine(int argc, char *argv[]) if (optind < argc) pg_fatal("too many command-line arguments (first is \"%s\")", argv[optind]); + if (!user_opts.sync_method) + user_opts.sync_method = pg_strdup("fsync"); + if (log_opts.verbose) pg_log(PG_REPORT, "Running in verbose mode"); @@ -289,6 +301,7 @@ usage(void) printf(_(" -V, --version display version information, then exit\n")); printf(_(" --clone clone instead of copying files to new cluster\n")); printf(_(" --copy copy files to new cluster (default)\n")); + printf(_(" --sync-method=METHOD set method for syncing files to disk\n")); printf(_(" -?, --help show this help, then exit\n")); printf(_("\n" "Before running pg_upgrade you must:\n" diff --git a/src/bin/pg_upgrade/pg_upgrade.c b/src/bin/pg_upgrade/pg_upgrade.c index 4562dafcff5..96bfb67167f 100644 --- a/src/bin/pg_upgrade/pg_upgrade.c +++ b/src/bin/pg_upgrade/pg_upgrade.c @@ -192,8 +192,10 @@ main(int argc, char **argv) { prep_status("Sync data directory to disk"); exec_prog(UTILITY_LOG_FILE, NULL, true, true, - "\"%s/initdb\" --sync-only \"%s\"", new_cluster.bindir, - new_cluster.pgdata); + "\"%s/initdb\" --sync-only \"%s\" --sync-method %s", + new_cluster.bindir, + new_cluster.pgdata, + user_opts.sync_method); check_ok(); } diff --git a/src/bin/pg_upgrade/pg_upgrade.h b/src/bin/pg_upgrade/pg_upgrade.h index 7afa96716ec..842f3b6cd37 100644 --- a/src/bin/pg_upgrade/pg_upgrade.h +++ b/src/bin/pg_upgrade/pg_upgrade.h @@ -304,6 +304,7 @@ typedef struct transferMode transfer_mode; /* copy files or link them? */ int jobs; /* number of processes/threads to use */ char *socketdir; /* directory to use for Unix sockets */ + char *sync_method; } UserOpts; typedef struct diff --git a/src/fe_utils/option_utils.c b/src/fe_utils/option_utils.c index 763c991015b..d2a3adeb4ba 100644 --- a/src/fe_utils/option_utils.c +++ b/src/fe_utils/option_utils.c @@ -82,3 +82,30 @@ option_parse_int(const char *optarg, const char *optname, *result = val; return true; } + +/* + * Provide strictly harmonized handling of the --sync-method option. + */ +bool +parse_sync_method(const char *optarg, DataDirSyncMethod *sync_method) +{ + if (strcmp(optarg, "fsync") == 0) + *sync_method = DATA_DIR_SYNC_METHOD_FSYNC; + else if (strcmp(optarg, "syncfs") == 0) + { +#ifdef HAVE_SYNCFS + *sync_method = DATA_DIR_SYNC_METHOD_SYNCFS; +#else + pg_log_error("this build does not support sync method \"%s\"", + "syncfs"); + return false; +#endif + } + else + { + pg_log_error("unrecognized sync method: %s", optarg); + return false; + } + + return true; +} diff --git a/src/include/fe_utils/option_utils.h b/src/include/fe_utils/option_utils.h index b7b0654cee7..6f3a965916a 100644 --- a/src/include/fe_utils/option_utils.h +++ b/src/include/fe_utils/option_utils.h @@ -14,6 +14,8 @@ #include "postgres_fe.h" +#include "common/file_utils.h" + typedef void (*help_handler) (const char *progname); extern void handle_help_version_opts(int argc, char *argv[], @@ -22,5 +24,7 @@ extern void handle_help_version_opts(int argc, char *argv[], extern bool option_parse_int(const char *optarg, const char *optname, int min_range, int max_range, int *result); +extern bool parse_sync_method(const char *optarg, + DataDirSyncMethod *sync_method); #endif /* OPTION_UTILS_H */ -- 2.39.5