From: Jeff Davis Date: Thu, 20 Feb 2025 09:29:06 +0000 (-0800) Subject: Transfer statistics during pg_upgrade. X-Git-Tag: REL_18_BETA1~821 X-Git-Url: http://git.postgresql.org/gitweb/?a=commitdiff_plain;h=1fd1bd871012732e3c6c482667d2f2c56f1a9395;p=postgresql.git Transfer statistics during pg_upgrade. Add support to pg_dump for dumping stats, and use that during pg_upgrade so that statistics are transferred during upgrade. In most cases this removes the need for a costly re-analyze after upgrade. Some statistics are not transferred, such as extended statistics or statistics with a custom stakind. Now pg_dump accepts the options --schema-only, --no-schema, --data-only, --no-data, --statistics-only, and --no-statistics; which allow all combinations of schema, data, and/or stats. The options are named this way to preserve compatibility with the previous --schema-only and --data-only options. Statistics are in SECTION_DATA, unless the object itself is in SECTION_POST_DATA. The stats are represented as calls to pg_restore_relation_stats() and pg_restore_attribute_stats(). Author: Corey Huinker, Jeff Davis Reviewed-by: Jian He Discussion: https://postgr.es/m/CADkLM=fzX7QX6r78fShWDjNN3Vcr4PVAnvXxQ4DiGy6V=0bCUA@mail.gmail.com Discussion: https://postgr.es/m/CADkLM%3DcB0rF3p_FuWRTMSV0983ihTRpsH%2BOCpNyiqE7Wk0vUWA%40mail.gmail.com --- diff --git a/doc/src/sgml/ref/pg_dump.sgml b/doc/src/sgml/ref/pg_dump.sgml index 24fcc76d72c..c7a22022fa6 100644 --- a/doc/src/sgml/ref/pg_dump.sgml +++ b/doc/src/sgml/ref/pg_dump.sgml @@ -123,7 +123,7 @@ PostgreSQL documentation - Dump only the data, not the schema (data definitions). + Dump only the data, not the schema (data definitions) or statistics. Table data, large objects, and sequence values are dumped. @@ -141,13 +141,15 @@ PostgreSQL documentation Include large objects in the dump. This is the default behavior - except when , , or - is specified. The - switch is therefore only useful to add large objects to dumps - where a specific schema or table has been requested. Note that - large objects are considered data and therefore will be included when - is used, but not - when is. + except when , , + , , or + is specified. The + switch is therefore only useful to add large objects to dumps where a + specific schema or table has been requested. Note that large objects + are considered data and therefore will be included when + is used, but not when + or + is. @@ -516,10 +518,11 @@ PostgreSQL documentation - Dump only the object definitions (schema), not data. + Dump only the object definitions (schema), not data or statistics. - This option is the inverse of . + This option is mutually exclusive to + and . It is similar to, but for historical reasons not identical to, specifying . @@ -652,6 +655,17 @@ PostgreSQL documentation + + + + + Dump only the statistics, not the schema (data definitions) or data. + Statistics for tables, materialized views, and indexes are dumped. + + + + + [:detail] @@ -741,7 +755,8 @@ PostgreSQL documentation - This option is relevant only when creating a data-only dump. + This option is relevant only when creating a dump that includes data + but does not include schema. It instructs pg_dump to include commands to temporarily disable triggers on the target tables while the data is restored. Use this if you have referential @@ -833,7 +848,8 @@ PostgreSQL documentation though you do not need the data in it. - To exclude data for all tables in the database, see . + To exclude data for all tables in the database, see + or . @@ -1080,6 +1096,15 @@ PostgreSQL documentation + + + + + Do not dump data. + + + + @@ -1098,6 +1123,24 @@ PostgreSQL documentation + + + + + Do not dump schema (data definitions). + + + + + + + + + Do not dump statistics. + + + + @@ -1236,9 +1279,11 @@ PostgreSQL documentation The data section contains actual table data, large-object - contents, and sequence values. + contents, statitistics for tables and materialized views and + sequence values. Post-data items include definitions of indexes, triggers, rules, - and constraints other than validated check constraints. + statistics for indexes, and constraints other than validated check + constraints. Pre-data items include all other data definition items. @@ -1581,7 +1626,7 @@ CREATE DATABASE foo WITH TEMPLATE template0; - When a data-only dump is chosen and the option + When a dump without schema is chosen and the option is used, pg_dump emits commands to disable triggers on user tables before inserting the data, and then commands to re-enable them after the data has been diff --git a/doc/src/sgml/ref/pg_dumpall.sgml b/doc/src/sgml/ref/pg_dumpall.sgml index 39d93c2c0e3..f0823765c4e 100644 --- a/doc/src/sgml/ref/pg_dumpall.sgml +++ b/doc/src/sgml/ref/pg_dumpall.sgml @@ -81,7 +81,7 @@ PostgreSQL documentation - Dump only the data, not the schema (data definitions). + Dump only the data, not the schema (data definitions) or statistics. @@ -265,6 +265,16 @@ exclude database PATTERN + + + + + Dump only the statistics, not the schema (data definitions) or data. + Statistics for tables, materialized views, and indexes are dumped. + + + + @@ -307,7 +317,7 @@ exclude database PATTERN - This option is relevant only when creating a data-only dump. + This option is relevant only when creating a dump with data and without schema. It instructs pg_dumpall to include commands to temporarily disable triggers on the target tables while the data is restored. Use this if you have referential @@ -422,6 +432,15 @@ exclude database PATTERN + + + + + Do not dump data. + + + + @@ -447,6 +466,15 @@ exclude database PATTERN + + + + + Do not dump schema (data definitions). + + + + @@ -456,6 +484,15 @@ exclude database PATTERN + + + + + Do not dump statistics. + + + + diff --git a/doc/src/sgml/ref/pg_restore.sgml b/doc/src/sgml/ref/pg_restore.sgml index b8b27e1719e..b4031708430 100644 --- a/doc/src/sgml/ref/pg_restore.sgml +++ b/doc/src/sgml/ref/pg_restore.sgml @@ -94,7 +94,7 @@ PostgreSQL documentation - Restore only the data, not the schema (data definitions). + Restore only the data, not the schema (data definitions) or statistics. Table data, large objects, and sequence values are restored, if present in the archive. @@ -483,10 +483,11 @@ PostgreSQL documentation to the extent that schema entries are present in the archive. - This option is the inverse of . + This option is mutually exclusive of + and . It is similar to, but for historical reasons not identical to, specifying - . + . (Do not confuse this with the option, which @@ -599,6 +600,15 @@ PostgreSQL documentation + + + + + Restore only the statistics, not schema (data definitions) or data. + + + + @@ -617,7 +627,7 @@ PostgreSQL documentation - This option is relevant only when performing a data-only restore. + This option is relevant only when performing a restore without schema. It instructs pg_restore to execute commands to temporarily disable triggers on the target tables while the data is restored. Use this if you have referential @@ -681,6 +691,16 @@ PostgreSQL documentation + + + + + Do not output commands to restore data, even if the archive + contains them. + + + + @@ -713,6 +733,16 @@ PostgreSQL documentation + + + + + Do not output commands to restore schema (data definitions), even if + the archive contains them. + + + + @@ -723,6 +753,16 @@ PostgreSQL documentation + + + + + Do not output commands to restore statistics, even if the archive + contains them. + + + + diff --git a/doc/src/sgml/ref/pgupgrade.sgml b/doc/src/sgml/ref/pgupgrade.sgml index 4777381dac2..4d9ca2a5616 100644 --- a/doc/src/sgml/ref/pgupgrade.sgml +++ b/doc/src/sgml/ref/pgupgrade.sgml @@ -145,6 +145,15 @@ PostgreSQL documentation + + + + + Do not restore statistics from the old cluster into the new cluster. + + + + options options diff --git a/src/bin/pg_dump/pg_backup.h b/src/bin/pg_dump/pg_backup.h index f0f19bb0b29..350cf659c41 100644 --- a/src/bin/pg_dump/pg_backup.h +++ b/src/bin/pg_dump/pg_backup.h @@ -160,6 +160,7 @@ typedef struct _restoreOptions /* flags derived from the user-settable flags */ bool dumpSchema; bool dumpData; + bool dumpStatistics; } RestoreOptions; typedef struct _dumpOptions @@ -208,6 +209,7 @@ typedef struct _dumpOptions /* flags derived from the user-settable flags */ bool dumpSchema; bool dumpData; + bool dumpStatistics; } DumpOptions; /* diff --git a/src/bin/pg_dump/pg_backup_archiver.c b/src/bin/pg_dump/pg_backup_archiver.c index b9d7ab98c3e..632077113a4 100644 --- a/src/bin/pg_dump/pg_backup_archiver.c +++ b/src/bin/pg_dump/pg_backup_archiver.c @@ -46,6 +46,9 @@ #define TEXT_DUMP_HEADER "--\n-- PostgreSQL database dump\n--\n\n" #define TEXT_DUMPALL_HEADER "--\n-- PostgreSQL database cluster dump\n--\n\n" +#define TOC_PREFIX_NONE "" +#define TOC_PREFIX_DATA "Data for " +#define TOC_PREFIX_STATS "Statistics for " static ArchiveHandle *_allocAH(const char *FileSpec, const ArchiveFormat fmt, const pg_compress_specification compression_spec, @@ -53,7 +56,7 @@ static ArchiveHandle *_allocAH(const char *FileSpec, const ArchiveFormat fmt, SetupWorkerPtrType setupWorkerPtr, DataDirSyncMethod sync_method); static void _getObjectDescription(PQExpBuffer buf, const TocEntry *te); -static void _printTocEntry(ArchiveHandle *AH, TocEntry *te, bool isData); +static void _printTocEntry(ArchiveHandle *AH, TocEntry *te, const char *pfx); static char *sanitize_line(const char *str, bool want_hyphen); static void _doSetFixedOutputState(ArchiveHandle *AH); static void _doSetSessionAuth(ArchiveHandle *AH, const char *user); @@ -149,6 +152,7 @@ InitDumpOptions(DumpOptions *opts) opts->dumpSections = DUMP_UNSECTIONED; opts->dumpSchema = true; opts->dumpData = true; + opts->dumpStatistics = true; } /* @@ -169,9 +173,10 @@ dumpOptionsFromRestoreOptions(RestoreOptions *ropt) dopt->outputClean = ropt->dropSchema; dopt->dumpData = ropt->dumpData; dopt->dumpSchema = ropt->dumpSchema; + dopt->dumpSections = ropt->dumpSections; + dopt->dumpStatistics = ropt->dumpStatistics; dopt->if_exists = ropt->if_exists; dopt->column_inserts = ropt->column_inserts; - dopt->dumpSections = ropt->dumpSections; dopt->aclsSkip = ropt->aclsSkip; dopt->outputSuperuser = ropt->superuser; dopt->outputCreateDB = ropt->createDB; @@ -418,8 +423,8 @@ RestoreArchive(Archive *AHX) } /* - * Work out if we have an implied data-only restore. This can happen if - * the dump was data only or if the user has used a toc list to exclude + * Work out if we have an implied schema-less restore. This can happen if + * the dump excluded the schema or the user has used a toc list to exclude * all of the schema data. All we do is look for schema entries - if none * are found then we unset the dumpSchema flag. * @@ -428,20 +433,20 @@ RestoreArchive(Archive *AHX) */ if (ropt->dumpSchema) { - int impliedDataOnly = 1; + bool no_schema_found = true; for (te = AH->toc->next; te != AH->toc; te = te->next) { if ((te->reqs & REQ_SCHEMA) != 0) - { /* It's schema, and it's wanted */ - impliedDataOnly = 0; + { + no_schema_found = false; break; } } - if (impliedDataOnly) + if (no_schema_found) { ropt->dumpSchema = false; - pg_log_info("implied data-only restore"); + pg_log_info("implied no-schema restore"); } } @@ -739,7 +744,7 @@ RestoreArchive(Archive *AHX) for (te = AH->toc->next; te != AH->toc; te = te->next) { - if ((te->reqs & (REQ_SCHEMA | REQ_DATA)) == 0) + if ((te->reqs & (REQ_SCHEMA | REQ_DATA | REQ_STATS)) == 0) continue; /* ignore if not to be dumped at all */ switch (_tocEntryRestorePass(te)) @@ -760,7 +765,7 @@ RestoreArchive(Archive *AHX) { for (te = AH->toc->next; te != AH->toc; te = te->next) { - if ((te->reqs & (REQ_SCHEMA | REQ_DATA)) != 0 && + if ((te->reqs & (REQ_SCHEMA | REQ_DATA | REQ_STATS)) != 0 && _tocEntryRestorePass(te) == RESTORE_PASS_ACL) (void) restore_toc_entry(AH, te, false); } @@ -770,7 +775,7 @@ RestoreArchive(Archive *AHX) { for (te = AH->toc->next; te != AH->toc; te = te->next) { - if ((te->reqs & (REQ_SCHEMA | REQ_DATA)) != 0 && + if ((te->reqs & (REQ_SCHEMA | REQ_DATA | REQ_STATS)) != 0 && _tocEntryRestorePass(te) == RESTORE_PASS_POST_ACL) (void) restore_toc_entry(AH, te, false); } @@ -869,7 +874,7 @@ restore_toc_entry(ArchiveHandle *AH, TocEntry *te, bool is_parallel) pg_log_info("creating %s \"%s\"", te->desc, te->tag); - _printTocEntry(AH, te, false); + _printTocEntry(AH, te, TOC_PREFIX_NONE); defnDumped = true; if (strcmp(te->desc, "TABLE") == 0) @@ -938,7 +943,7 @@ restore_toc_entry(ArchiveHandle *AH, TocEntry *te, bool is_parallel) */ if (AH->PrintTocDataPtr != NULL) { - _printTocEntry(AH, te, true); + _printTocEntry(AH, te, TOC_PREFIX_DATA); if (strcmp(te->desc, "BLOBS") == 0 || strcmp(te->desc, "BLOB COMMENTS") == 0) @@ -1036,15 +1041,21 @@ restore_toc_entry(ArchiveHandle *AH, TocEntry *te, bool is_parallel) { /* If we haven't already dumped the defn part, do so now */ pg_log_info("executing %s %s", te->desc, te->tag); - _printTocEntry(AH, te, false); + _printTocEntry(AH, te, TOC_PREFIX_NONE); } } + /* + * If it has a statistics component that we want, then process that + */ + if ((reqs & REQ_STATS) != 0) + _printTocEntry(AH, te, TOC_PREFIX_STATS); + /* * If we emitted anything for this TOC entry, that counts as one action * against the transaction-size limit. Commit if it's time to. */ - if ((reqs & (REQ_SCHEMA | REQ_DATA)) != 0 && ropt->txn_size > 0) + if ((reqs & (REQ_SCHEMA | REQ_DATA | REQ_STATS)) != 0 && ropt->txn_size > 0) { if (++AH->txnCount >= ropt->txn_size) { @@ -1084,6 +1095,7 @@ NewRestoreOptions(void) opts->compression_spec.level = 0; opts->dumpSchema = true; opts->dumpData = true; + opts->dumpStatistics = true; return opts; } @@ -1329,7 +1341,7 @@ PrintTOCSummary(Archive *AHX) te->reqs = _tocEntryRequired(te, curSection, AH); /* Now, should we print it? */ if (ropt->verbose || - (te->reqs & (REQ_SCHEMA | REQ_DATA)) != 0) + (te->reqs & (REQ_SCHEMA | REQ_DATA | REQ_STATS)) != 0) { char *sanitized_name; char *sanitized_schema; @@ -2582,7 +2594,7 @@ WriteToc(ArchiveHandle *AH) tocCount = 0; for (te = AH->toc->next; te != AH->toc; te = te->next) { - if ((te->reqs & (REQ_SCHEMA | REQ_DATA | REQ_SPECIAL)) != 0) + if ((te->reqs & (REQ_SCHEMA | REQ_DATA | REQ_STATS | REQ_SPECIAL)) != 0) tocCount++; } @@ -2592,7 +2604,7 @@ WriteToc(ArchiveHandle *AH) for (te = AH->toc->next; te != AH->toc; te = te->next) { - if ((te->reqs & (REQ_SCHEMA | REQ_DATA | REQ_SPECIAL)) == 0) + if ((te->reqs & (REQ_SCHEMA | REQ_DATA | REQ_STATS | REQ_SPECIAL)) == 0) continue; WriteInt(AH, te->dumpId); @@ -2904,8 +2916,9 @@ StrictNamesCheck(RestoreOptions *ropt) * Determine whether we want to restore this TOC entry. * * Returns 0 if entry should be skipped, or some combination of the - * REQ_SCHEMA and REQ_DATA bits if we want to restore schema and/or data - * portions of this TOC entry, or REQ_SPECIAL if it's a special entry. + * REQ_SCHEMA, REQ_DATA, and REQ_STATS bits if we want to restore schema, data + * and/or statistics portions of this TOC entry, or REQ_SPECIAL if it's a + * special entry. */ static int _tocEntryRequired(TocEntry *te, teSection curSection, ArchiveHandle *AH) @@ -2919,6 +2932,14 @@ _tocEntryRequired(TocEntry *te, teSection curSection, ArchiveHandle *AH) strcmp(te->desc, "SEARCHPATH") == 0) return REQ_SPECIAL; + if (strcmp(te->desc, "STATISTICS DATA") == 0) + { + if (!ropt->dumpStatistics) + return 0; + else + res = REQ_STATS; + } + /* * DATABASE and DATABASE PROPERTIES also have a special rule: they are * restored in createDB mode, and not restored otherwise, independently of @@ -2963,6 +2984,10 @@ _tocEntryRequired(TocEntry *te, teSection curSection, ArchiveHandle *AH) if (ropt->no_subscriptions && strcmp(te->desc, "SUBSCRIPTION") == 0) return 0; + /* If it's statistics and we don't want statistics, maybe ignore it */ + if (!ropt->dumpStatistics && strcmp(te->desc, "STATISTICS DATA") == 0) + return 0; + /* Ignore it if section is not to be dumped/restored */ switch (curSection) { @@ -2992,6 +3017,7 @@ _tocEntryRequired(TocEntry *te, teSection curSection, ArchiveHandle *AH) */ if (strcmp(te->desc, "ACL") == 0 || strcmp(te->desc, "COMMENT") == 0 || + strcmp(te->desc, "STATISTICS DATA") == 0 || strcmp(te->desc, "SECURITY LABEL") == 0) { /* Database properties react to createDB, not selectivity options. */ @@ -3108,6 +3134,7 @@ _tocEntryRequired(TocEntry *te, teSection curSection, ArchiveHandle *AH) } } + /* * Determine whether the TOC entry contains schema and/or data components, * and mask off inapplicable REQ bits. If it had a dataDumper, assume @@ -3173,12 +3200,12 @@ _tocEntryRequired(TocEntry *te, teSection curSection, ArchiveHandle *AH) strncmp(te->tag, "LARGE OBJECT", 12) == 0) || (strcmp(te->desc, "SECURITY LABEL") == 0 && strncmp(te->tag, "LARGE OBJECT", 12) == 0)))) - res = res & REQ_SCHEMA; + res = res & (REQ_SCHEMA | REQ_STATS); } /* Mask it if we don't want schema */ if (!ropt->dumpSchema) - res = res & REQ_DATA; + res = res & (REQ_DATA | REQ_STATS); return res; } @@ -3730,7 +3757,7 @@ _getObjectDescription(PQExpBuffer buf, const TocEntry *te) * will remain at default, until the matching ACL TOC entry is restored. */ static void -_printTocEntry(ArchiveHandle *AH, TocEntry *te, bool isData) +_printTocEntry(ArchiveHandle *AH, TocEntry *te, const char *pfx) { RestoreOptions *ropt = AH->public.ropt; @@ -3749,16 +3776,10 @@ _printTocEntry(ArchiveHandle *AH, TocEntry *te, bool isData) /* Emit header comment for item */ if (!AH->noTocComments) { - const char *pfx; char *sanitized_name; char *sanitized_schema; char *sanitized_owner; - if (isData) - pfx = "Data for "; - else - pfx = ""; - ahprintf(AH, "--\n"); if (AH->public.verbose) { @@ -4325,7 +4346,7 @@ restore_toc_entries_parallel(ArchiveHandle *AH, ParallelState *pstate, if (next_work_item != NULL) { /* If not to be restored, don't waste time launching a worker */ - if ((next_work_item->reqs & (REQ_SCHEMA | REQ_DATA)) == 0) + if ((next_work_item->reqs & (REQ_SCHEMA | REQ_DATA | REQ_STATS)) == 0) { pg_log_info("skipping item %d %s %s", next_work_item->dumpId, diff --git a/src/bin/pg_dump/pg_backup_archiver.h b/src/bin/pg_dump/pg_backup_archiver.h index ce5ed1dd395..a2064f471ed 100644 --- a/src/bin/pg_dump/pg_backup_archiver.h +++ b/src/bin/pg_dump/pg_backup_archiver.h @@ -209,7 +209,8 @@ typedef enum #define REQ_SCHEMA 0x01 /* want schema */ #define REQ_DATA 0x02 /* want data */ -#define REQ_SPECIAL 0x04 /* for special TOC entries */ +#define REQ_STATS 0x04 +#define REQ_SPECIAL 0x08 /* for special TOC entries */ struct _archiveHandle { diff --git a/src/bin/pg_dump/pg_backup_directory.c b/src/bin/pg_dump/pg_backup_directory.c index 240a1d41062..b2a841bb0ff 100644 --- a/src/bin/pg_dump/pg_backup_directory.c +++ b/src/bin/pg_dump/pg_backup_directory.c @@ -780,7 +780,7 @@ _PrepParallelRestore(ArchiveHandle *AH) continue; /* We may ignore items not due to be restored */ - if ((te->reqs & REQ_DATA) == 0) + if ((te->reqs & (REQ_DATA | REQ_STATS)) == 0) continue; /* diff --git a/src/bin/pg_dump/pg_dump.c b/src/bin/pg_dump/pg_dump.c index 6370bb711c0..afd79287177 100644 --- a/src/bin/pg_dump/pg_dump.c +++ b/src/bin/pg_dump/pg_dump.c @@ -431,6 +431,10 @@ main(int argc, char **argv) DataDirSyncMethod sync_method = DATA_DIR_SYNC_METHOD_FSYNC; bool data_only = false; bool schema_only = false; + bool statistics_only = false; + bool no_data = false; + bool no_schema = false; + bool no_statistics = false; static DumpOptions dopt; @@ -490,11 +494,15 @@ main(int argc, char **argv) {"section", required_argument, NULL, 5}, {"serializable-deferrable", no_argument, &dopt.serializable_deferrable, 1}, {"snapshot", required_argument, NULL, 6}, + {"statistics-only", no_argument, NULL, 18}, {"strict-names", no_argument, &strict_names, 1}, {"use-set-session-authorization", no_argument, &dopt.use_setsessauth, 1}, {"no-comments", no_argument, &dopt.no_comments, 1}, + {"no-data", no_argument, NULL, 19}, {"no-publications", no_argument, &dopt.no_publications, 1}, + {"no-schema", no_argument, NULL, 20}, {"no-security-labels", no_argument, &dopt.no_security_labels, 1}, + {"no-statistics", no_argument, NULL, 21}, {"no-subscriptions", no_argument, &dopt.no_subscriptions, 1}, {"no-toast-compression", no_argument, &dopt.no_toast_compression, 1}, {"no-unlogged-table-data", no_argument, &dopt.no_unlogged_table_data, 1}, @@ -540,7 +548,7 @@ main(int argc, char **argv) InitDumpOptions(&dopt); - while ((c = getopt_long(argc, argv, "abBcCd:e:E:f:F:h:j:n:N:Op:RsS:t:T:U:vwWxZ:", + while ((c = getopt_long(argc, argv, "abBcCd:e:E:f:F:h:j:n:N:Op:RsS:t:T:U:vwWxXZ:", long_options, &optindex)) != -1) { switch (c) @@ -748,6 +756,22 @@ main(int argc, char **argv) optarg); break; + case 18: + statistics_only = true; + break; + + case 19: + no_data = true; + break; + + case 20: + no_schema = true; + break; + + case 21: + no_statistics = true; + break; + default: /* getopt_long already emitted a complaint */ pg_log_error_hint("Try \"%s --help\" for more information.", progname); @@ -785,6 +809,17 @@ main(int argc, char **argv) if (data_only && schema_only) pg_fatal("options -s/--schema-only and -a/--data-only cannot be used together"); + if (schema_only && statistics_only) + pg_fatal("options -s/--schema-only and --statistics-only cannot be used together"); + if (data_only && statistics_only) + pg_fatal("options -a/--data-only and --statistics-only cannot be used together"); + + if (data_only && no_data) + pg_fatal("options -a/--data-only and --no-data cannot be used together"); + if (schema_only && no_schema) + pg_fatal("options -s/--schema-only and --no-schema cannot be used together"); + if (statistics_only && no_statistics) + pg_fatal("options --statistics-only and --no-statistics cannot be used together"); if (schema_only && foreign_servers_include_patterns.head != NULL) pg_fatal("options -s/--schema-only and --include-foreign-data cannot be used together"); @@ -799,8 +834,9 @@ main(int argc, char **argv) pg_fatal("option --if-exists requires option -c/--clean"); /* set derivative flags */ - dopt.dumpSchema = (!data_only); - dopt.dumpData = (!schema_only); + dopt.dumpData = data_only || (!schema_only && !statistics_only && !no_data); + dopt.dumpSchema = schema_only || (!data_only && !statistics_only && !no_schema); + dopt.dumpStatistics = statistics_only || (!data_only && !schema_only && !no_statistics); /* * --inserts are already implied above if --column-inserts or @@ -1100,6 +1136,7 @@ main(int argc, char **argv) ropt->dropSchema = dopt.outputClean; ropt->dumpData = dopt.dumpData; ropt->dumpSchema = dopt.dumpSchema; + ropt->dumpStatistics = dopt.dumpStatistics; ropt->if_exists = dopt.if_exists; ropt->column_inserts = dopt.column_inserts; ropt->dumpSections = dopt.dumpSections; @@ -1178,7 +1215,7 @@ help(const char *progname) printf(_(" -?, --help show this help, then exit\n")); printf(_("\nOptions controlling the output content:\n")); - printf(_(" -a, --data-only dump only the data, not the schema\n")); + printf(_(" -a, --data-only dump only the data, not the schema or statistics\n")); printf(_(" -b, --large-objects include large objects in dump\n")); printf(_(" --blobs (same as --large-objects, deprecated)\n")); printf(_(" -B, --no-large-objects exclude large objects in dump\n")); @@ -1191,7 +1228,7 @@ help(const char *progname) printf(_(" -N, --exclude-schema=PATTERN do NOT dump the specified schema(s)\n")); printf(_(" -O, --no-owner skip restoration of object ownership in\n" " plain-text format\n")); - printf(_(" -s, --schema-only dump only the schema, no data\n")); + printf(_(" -s, --schema-only dump only the schema, no data or statistics\n")); printf(_(" -S, --superuser=NAME superuser user name to use in plain-text format\n")); printf(_(" -t, --table=PATTERN dump only the specified table(s)\n")); printf(_(" -T, --exclude-table=PATTERN do NOT dump the specified table(s)\n")); @@ -1220,8 +1257,11 @@ help(const char *progname) printf(_(" --inserts dump data as INSERT commands, rather than COPY\n")); printf(_(" --load-via-partition-root load partitions via the root table\n")); printf(_(" --no-comments do not dump comment commands\n")); + printf(_(" --no-data do not dump data\n")); printf(_(" --no-publications do not dump publications\n")); + printf(_(" --no-schema do not dump schema\n")); printf(_(" --no-security-labels do not dump security label assignments\n")); + printf(_(" --no-statistics do not dump statistics\n")); printf(_(" --no-subscriptions do not dump subscriptions\n")); printf(_(" --no-table-access-method do not dump table access methods\n")); printf(_(" --no-tablespaces do not dump tablespace assignments\n")); @@ -1233,6 +1273,7 @@ help(const char *progname) printf(_(" --section=SECTION dump named section (pre-data, data, or post-data)\n")); printf(_(" --serializable-deferrable wait until the dump can run without anomalies\n")); printf(_(" --snapshot=SNAPSHOT use given snapshot for the dump\n")); + printf(_(" --statistics-only dump only the statistics, not schema or data\n")); printf(_(" --strict-names require table and/or schema include patterns to\n" " match at least one entity each\n")); printf(_(" --table-and-children=PATTERN dump only the specified table(s), including\n" @@ -6767,6 +6808,45 @@ getFuncs(Archive *fout) destroyPQExpBuffer(query); } +/* + * getRelationStatistics + * register the statistics object as a dependent of the relation. + * + */ +static RelStatsInfo * +getRelationStatistics(Archive *fout, DumpableObject *rel, char relkind) +{ + if (!fout->dopt->dumpStatistics) + return NULL; + + if ((relkind == RELKIND_RELATION) || + (relkind == RELKIND_PARTITIONED_TABLE) || + (relkind == RELKIND_INDEX) || + (relkind == RELKIND_PARTITIONED_INDEX) || + (relkind == RELKIND_MATVIEW)) + { + RelStatsInfo *info = pg_malloc0(sizeof(RelStatsInfo)); + DumpableObject *dobj = &info->dobj; + + dobj->objType = DO_REL_STATS; + dobj->catId.tableoid = 0; + dobj->catId.oid = 0; + AssignDumpId(dobj); + dobj->dependencies = (DumpId *) pg_malloc(sizeof(DumpId)); + dobj->dependencies[0] = rel->dumpId; + dobj->nDeps = 1; + dobj->allocDeps = 1; + dobj->components |= DUMP_COMPONENT_STATISTICS; + dobj->name = pg_strdup(rel->name); + dobj->namespace = rel->namespace; + info->relkind = relkind; + info->postponed_def = false; + + return info; + } + return NULL; +} + /* * getTables * read all the tables (no indexes) in the system catalogs, @@ -7126,8 +7206,8 @@ getTables(Archive *fout, int *numTables) /* * Now, consider the table "interesting" if we need to dump its - * definition or its data. Later on, we'll skip a lot of data - * collection for uninteresting tables. + * definition, data or its statistics. Later on, we'll skip a lot of + * data collection for uninteresting tables. * * Note: the "interesting" flag will also be set by flagInhTables for * parents of interesting tables, so that we collect necessary @@ -7137,7 +7217,8 @@ getTables(Archive *fout, int *numTables) */ tblinfo[i].interesting = (tblinfo[i].dobj.dump & (DUMP_COMPONENT_DEFINITION | - DUMP_COMPONENT_DATA)) != 0; + DUMP_COMPONENT_DATA | + DUMP_COMPONENT_STATISTICS)) != 0; tblinfo[i].dummy_view = false; /* might get set during sort */ tblinfo[i].postponed_def = false; /* might get set during sort */ @@ -7150,6 +7231,10 @@ getTables(Archive *fout, int *numTables) tblinfo[i].dobj.components |= DUMP_COMPONENT_ACL; tblinfo[i].hascolumnACLs = false; /* may get set later */ + /* Add statistics */ + if (tblinfo[i].interesting) + getRelationStatistics(fout, &tblinfo[i].dobj, tblinfo[i].relkind); + /* * Read-lock target tables to make sure they aren't DROPPED or altered * in schema before we get around to dumping them. @@ -7638,6 +7723,8 @@ getIndexes(Archive *fout, TableInfo tblinfo[], int numTables) for (int c = 0; c < numinds; c++, j++) { char contype; + char indexkind; + RelStatsInfo *relstats; indxinfo[j].dobj.objType = DO_INDEX; indxinfo[j].dobj.catId.tableoid = atooid(PQgetvalue(res, j, i_tableoid)); @@ -7665,7 +7752,14 @@ getIndexes(Archive *fout, TableInfo tblinfo[], int numTables) { NULL, NULL }; + + if (indxinfo[j].parentidx == 0) + indexkind = RELKIND_INDEX; + else + indexkind = RELKIND_PARTITIONED_INDEX; + contype = *(PQgetvalue(res, j, i_contype)); + relstats = getRelationStatistics(fout, &indxinfo[j].dobj, indexkind); if (contype == 'p' || contype == 'u' || contype == 'x') { @@ -7699,6 +7793,8 @@ getIndexes(Archive *fout, TableInfo tblinfo[], int numTables) constrinfo->separate = true; indxinfo[j].indexconstraint = constrinfo->dobj.dumpId; + if (relstats != NULL) + addObjectDependency(&relstats->dobj, constrinfo->dobj.dumpId); } else { @@ -10287,6 +10383,296 @@ dumpComment(Archive *fout, const char *type, catalogId, subid, dumpId, NULL); } +/* + * Tabular description of the parameters to pg_restore_relation_stats() + * param_name, param_type + */ +static const char *rel_stats_arginfo[][2] = { + {"relation", "regclass"}, + {"version", "integer"}, + {"relpages", "integer"}, + {"reltuples", "real"}, + {"relallvisible", "integer"}, +}; + +/* + * Tabular description of the parameters to pg_restore_attribute_stats() + * param_name, param_type + */ +static const char *att_stats_arginfo[][2] = { + {"relation", "regclass"}, + {"attname", "name"}, + {"inherited", "boolean"}, + {"version", "integer"}, + {"null_frac", "float4"}, + {"avg_width", "integer"}, + {"n_distinct", "float4"}, + {"most_common_vals", "text"}, + {"most_common_freqs", "float4[]"}, + {"histogram_bounds", "text"}, + {"correlation", "float4"}, + {"most_common_elems", "text"}, + {"most_common_elem_freqs", "float4[]"}, + {"elem_count_histogram", "float4[]"}, + {"range_length_histogram", "text"}, + {"range_empty_frac", "float4"}, + {"range_bounds_histogram", "text"}, +}; + +/* + * getRelStatsExportQuery -- + * + * Generate a query that will fetch all relation (e.g. pg_class) + * stats for a given relation. + */ +static void +getRelStatsExportQuery(PQExpBuffer query, Archive *fout, + const char *schemaname, const char *relname) +{ + resetPQExpBuffer(query); + appendPQExpBufferStr(query, + "SELECT c.oid::regclass AS relation, " + "current_setting('server_version_num') AS version, " + "c.relpages, c.reltuples, c.relallvisible " + "FROM pg_class c " + "JOIN pg_namespace n " + "ON n.oid = c.relnamespace " + "WHERE n.nspname = "); + appendStringLiteralAH(query, schemaname, fout); + appendPQExpBufferStr(query, " AND c.relname = "); + appendStringLiteralAH(query, relname, fout); +} + +/* + * getAttStatsExportQuery -- + * + * Generate a query that will fetch all attribute (e.g. pg_statistic) + * stats for a given relation. + */ +static void +getAttStatsExportQuery(PQExpBuffer query, Archive *fout, + const char *schemaname, const char *relname) +{ + resetPQExpBuffer(query); + appendPQExpBufferStr(query, + "SELECT c.oid::regclass AS relation, " + "s.attname," + "s.inherited," + "current_setting('server_version_num') AS version, " + "s.null_frac," + "s.avg_width," + "s.n_distinct," + "s.most_common_vals," + "s.most_common_freqs," + "s.histogram_bounds," + "s.correlation," + "s.most_common_elems," + "s.most_common_elem_freqs," + "s.elem_count_histogram,"); + + if (fout->remoteVersion >= 170000) + appendPQExpBufferStr(query, + "s.range_length_histogram," + "s.range_empty_frac," + "s.range_bounds_histogram "); + else + appendPQExpBufferStr(query, + "NULL AS range_length_histogram," + "NULL AS range_empty_frac," + "NULL AS range_bounds_histogram "); + + appendPQExpBufferStr(query, + "FROM pg_stats s " + "JOIN pg_namespace n " + "ON n.nspname = s.schemaname " + "JOIN pg_class c " + "ON c.relname = s.tablename " + "AND c.relnamespace = n.oid " + "WHERE s.schemaname = "); + appendStringLiteralAH(query, schemaname, fout); + appendPQExpBufferStr(query, " AND s.tablename = "); + appendStringLiteralAH(query, relname, fout); + appendPQExpBufferStr(query, " ORDER BY s.attname, s.inherited"); +} + + +/* + * appendNamedArgument -- + * + * Convenience routine for constructing parameters of the form: + * 'paraname', 'value'::type + */ +static void +appendNamedArgument(PQExpBuffer out, Archive *fout, const char *argname, + const char *argval, const char *argtype) +{ + appendPQExpBufferStr(out, "\t"); + + appendStringLiteralAH(out, argname, fout); + appendPQExpBufferStr(out, ", "); + + appendStringLiteralAH(out, argval, fout); + appendPQExpBuffer(out, "::%s", argtype); +} + +/* + * appendRelStatsImport -- + * + * Append a formatted pg_restore_relation_stats statement. + */ +static void +appendRelStatsImport(PQExpBuffer out, Archive *fout, PGresult *res) +{ + const char *sep = ""; + + if (PQntuples(res) == 0) + return; + + appendPQExpBufferStr(out, "SELECT * FROM pg_catalog.pg_restore_relation_stats(\n"); + + for (int argno = 0; argno < lengthof(rel_stats_arginfo); argno++) + { + const char *argname = rel_stats_arginfo[argno][0]; + const char *argtype = rel_stats_arginfo[argno][1]; + int fieldno = PQfnumber(res, argname); + + if (fieldno < 0) + pg_fatal("relation stats export query missing field '%s'", + argname); + + if (PQgetisnull(res, 0, fieldno)) + continue; + + appendPQExpBufferStr(out, sep); + appendNamedArgument(out, fout, argname, PQgetvalue(res, 0, fieldno), argtype); + + sep = ",\n"; + } + appendPQExpBufferStr(out, "\n);\n"); +} + +/* + * appendAttStatsImport -- + * + * Append a series of formatted pg_restore_attribute_stats statements. + */ +static void +appendAttStatsImport(PQExpBuffer out, Archive *fout, PGresult *res) +{ + for (int rownum = 0; rownum < PQntuples(res); rownum++) + { + const char *sep = ""; + + appendPQExpBufferStr(out, "SELECT * FROM pg_catalog.pg_restore_attribute_stats(\n"); + for (int argno = 0; argno < lengthof(att_stats_arginfo); argno++) + { + const char *argname = att_stats_arginfo[argno][0]; + const char *argtype = att_stats_arginfo[argno][1]; + int fieldno = PQfnumber(res, argname); + + if (fieldno < 0) + pg_fatal("attribute stats export query missing field '%s'", + argname); + + if (PQgetisnull(res, rownum, fieldno)) + continue; + + appendPQExpBufferStr(out, sep); + appendNamedArgument(out, fout, argname, PQgetvalue(res, rownum, fieldno), argtype); + sep = ",\n"; + } + appendPQExpBufferStr(out, "\n);\n"); + } +} + +/* + * Decide which section to use based on the relkind of the parent object. + * + * NB: materialized views may be postponed from SECTION_PRE_DATA to + * SECTION_POST_DATA to resolve some kinds of dependency problems. If so, the + * matview stats will also be postponed to SECTION_POST_DATA. See + * repairMatViewBoundaryMultiLoop(). + */ +static teSection +statisticsDumpSection(const RelStatsInfo *rsinfo) +{ + switch (rsinfo->relkind) + { + case RELKIND_RELATION: + case RELKIND_PARTITIONED_TABLE: + case RELKIND_MATVIEW: + return SECTION_DATA; + case RELKIND_INDEX: + case RELKIND_PARTITIONED_INDEX: + return SECTION_POST_DATA; + default: + pg_fatal("cannot dump statistics for relation kind '%c'", + rsinfo->relkind); + } + + return 0; /* keep compiler quiet */ +} + +/* + * dumpRelationStats -- + * + * Dump command to import stats into the relation on the new database. + */ +static void +dumpRelationStats(Archive *fout, const RelStatsInfo *rsinfo) +{ + PGresult *res; + PQExpBuffer query; + PQExpBuffer out; + PQExpBuffer tag; + DumpableObject *dobj = (DumpableObject *) &rsinfo->dobj; + DumpId *deps = NULL; + int ndeps = 0; + + /* nothing to do if we are not dumping statistics */ + if (!fout->dopt->dumpStatistics) + return; + + /* dependent on the relation definition, if doing schema */ + if (fout->dopt->dumpSchema) + { + deps = dobj->dependencies; + ndeps = dobj->nDeps; + } + + tag = createPQExpBuffer(); + appendPQExpBufferStr(tag, fmtId(dobj->name)); + + query = createPQExpBuffer(); + out = createPQExpBuffer(); + + getRelStatsExportQuery(query, fout, dobj->namespace->dobj.name, + dobj->name); + res = ExecuteSqlQuery(fout, query->data, PGRES_TUPLES_OK); + appendRelStatsImport(out, fout, res); + PQclear(res); + + getAttStatsExportQuery(query, fout, dobj->namespace->dobj.name, + dobj->name); + res = ExecuteSqlQuery(fout, query->data, PGRES_TUPLES_OK); + appendAttStatsImport(out, fout, res); + PQclear(res); + + ArchiveEntry(fout, nilCatalogId, createDumpId(), + ARCHIVE_OPTS(.tag = tag->data, + .namespace = dobj->namespace->dobj.name, + .description = "STATISTICS DATA", + .section = rsinfo->postponed_def ? + SECTION_POST_DATA : statisticsDumpSection(rsinfo), + .createStmt = out->data, + .deps = deps, + .nDeps = ndeps)); + + destroyPQExpBuffer(query); + destroyPQExpBuffer(out); + destroyPQExpBuffer(tag); +} + /* * dumpTableComment -- * @@ -10735,6 +11121,9 @@ dumpDumpableObject(Archive *fout, DumpableObject *dobj) case DO_SUBSCRIPTION_REL: dumpSubscriptionTable(fout, (const SubRelInfo *) dobj); break; + case DO_REL_STATS: + dumpRelationStats(fout, (const RelStatsInfo *) dobj); + break; case DO_PRE_DATA_BOUNDARY: case DO_POST_DATA_BOUNDARY: /* never dumped, nothing to do */ @@ -18964,6 +19353,16 @@ addBoundaryDependencies(DumpableObject **dobjs, int numObjs, /* must come after the pre-data boundary */ addObjectDependency(dobj, preDataBound->dumpId); break; + case DO_REL_STATS: + /* stats section varies by parent object type, DATA or POST */ + if (statisticsDumpSection((RelStatsInfo *) dobj) == SECTION_DATA) + { + addObjectDependency(dobj, preDataBound->dumpId); + addObjectDependency(postDataBound, dobj->dumpId); + } + else + addObjectDependency(dobj, postDataBound->dumpId); + break; } } } diff --git a/src/bin/pg_dump/pg_dump.h b/src/bin/pg_dump/pg_dump.h index 7139c88a69a..f08f5905aa3 100644 --- a/src/bin/pg_dump/pg_dump.h +++ b/src/bin/pg_dump/pg_dump.h @@ -83,10 +83,13 @@ typedef enum DO_PUBLICATION, DO_PUBLICATION_REL, DO_PUBLICATION_TABLE_IN_SCHEMA, + DO_REL_STATS, DO_SUBSCRIPTION, DO_SUBSCRIPTION_REL, /* see note for SubRelInfo */ } DumpableObjectType; +#define NUM_DUMPABLE_OBJECT_TYPES (DO_SUBSCRIPTION_REL + 1) + /* * DumpComponents is a bitmask of the potentially dumpable components of * a database object: its core definition, plus optional attributes such @@ -110,6 +113,7 @@ typedef uint32 DumpComponents; #define DUMP_COMPONENT_ACL (1 << 4) #define DUMP_COMPONENT_POLICY (1 << 5) #define DUMP_COMPONENT_USERMAP (1 << 6) +#define DUMP_COMPONENT_STATISTICS (1 << 7) #define DUMP_COMPONENT_ALL (0xFFFF) /* @@ -137,6 +141,7 @@ typedef uint32 DumpComponents; #define DUMP_COMPONENTS_REQUIRING_LOCK (\ DUMP_COMPONENT_DEFINITION |\ DUMP_COMPONENT_DATA |\ + DUMP_COMPONENT_STATISTICS |\ DUMP_COMPONENT_POLICY) typedef struct _dumpableObject @@ -430,6 +435,13 @@ typedef struct _indexAttachInfo IndxInfo *partitionIdx; /* link to index on partition */ } IndexAttachInfo; +typedef struct _relStatsInfo +{ + DumpableObject dobj; + char relkind; /* 'r', 'm', 'i', etc */ + bool postponed_def; /* stats must be postponed into post-data */ +} RelStatsInfo; + typedef struct _statsExtInfo { DumpableObject dobj; diff --git a/src/bin/pg_dump/pg_dump_sort.c b/src/bin/pg_dump/pg_dump_sort.c index dc9a28924bd..f75e9928bad 100644 --- a/src/bin/pg_dump/pg_dump_sort.c +++ b/src/bin/pg_dump/pg_dump_sort.c @@ -81,6 +81,7 @@ enum dbObjectTypePriorities PRIO_TABLE_DATA, PRIO_SEQUENCE_SET, PRIO_LARGE_OBJECT_DATA, + PRIO_STATISTICS_DATA_DATA, PRIO_POST_DATA_BOUNDARY, /* boundary! */ PRIO_CONSTRAINT, PRIO_INDEX, @@ -148,11 +149,12 @@ static const int dbObjectTypePriority[] = [DO_PUBLICATION] = PRIO_PUBLICATION, [DO_PUBLICATION_REL] = PRIO_PUBLICATION_REL, [DO_PUBLICATION_TABLE_IN_SCHEMA] = PRIO_PUBLICATION_TABLE_IN_SCHEMA, + [DO_REL_STATS] = PRIO_STATISTICS_DATA_DATA, [DO_SUBSCRIPTION] = PRIO_SUBSCRIPTION, [DO_SUBSCRIPTION_REL] = PRIO_SUBSCRIPTION_REL, }; -StaticAssertDecl(lengthof(dbObjectTypePriority) == (DO_SUBSCRIPTION_REL + 1), +StaticAssertDecl(lengthof(dbObjectTypePriority) == NUM_DUMPABLE_OBJECT_TYPES, "array length mismatch"); static DumpId preDataBoundId; @@ -801,11 +803,22 @@ repairMatViewBoundaryMultiLoop(DumpableObject *boundaryobj, { /* remove boundary's dependency on object after it in loop */ removeObjectDependency(boundaryobj, nextobj->dumpId); - /* if that object is a matview, mark it as postponed into post-data */ + + /* + * If that object is a matview or matview stats, mark it as postponed into + * post-data. + */ if (nextobj->objType == DO_TABLE) { TableInfo *nextinfo = (TableInfo *) nextobj; + if (nextinfo->relkind == RELKIND_MATVIEW) + nextinfo->postponed_def = true; + } + else if (nextobj->objType == DO_REL_STATS) + { + RelStatsInfo *nextinfo = (RelStatsInfo *) nextobj; + if (nextinfo->relkind == RELKIND_MATVIEW) nextinfo->postponed_def = true; } @@ -1018,6 +1031,21 @@ repairDependencyLoop(DumpableObject **loop, { DumpableObject *nextobj; + nextobj = (j < nLoop - 1) ? loop[j + 1] : loop[0]; + repairMatViewBoundaryMultiLoop(loop[j], nextobj); + return; + } + } + } + else if (loop[i]->objType == DO_REL_STATS && + ((RelStatsInfo *) loop[i])->relkind == RELKIND_MATVIEW) + { + for (j = 0; j < nLoop; j++) + { + if (loop[j]->objType == DO_POST_DATA_BOUNDARY) + { + DumpableObject *nextobj; + nextobj = (j < nLoop - 1) ? loop[j + 1] : loop[0]; repairMatViewBoundaryMultiLoop(loop[j], nextobj); return; @@ -1500,6 +1528,11 @@ describeDumpableObject(DumpableObject *obj, char *buf, int bufsize) "POST-DATA BOUNDARY (ID %d)", obj->dumpId); return; + case DO_REL_STATS: + snprintf(buf, bufsize, + "RELATION STATISTICS FOR %s (ID %d OID %u)", + obj->name, obj->dumpId, obj->catId.oid); + return; } /* shouldn't get here */ snprintf(buf, bufsize, diff --git a/src/bin/pg_dump/pg_dumpall.c b/src/bin/pg_dump/pg_dumpall.c index 64a60a26092..b993b05cc22 100644 --- a/src/bin/pg_dump/pg_dumpall.c +++ b/src/bin/pg_dump/pg_dumpall.c @@ -103,6 +103,9 @@ static int use_setsessauth = 0; static int no_comments = 0; static int no_publications = 0; static int no_security_labels = 0; +static int no_data = 0; +static int no_schema = 0; +static int no_statistics = 0; static int no_subscriptions = 0; static int no_toast_compression = 0; static int no_unlogged_table_data = 0; @@ -110,6 +113,7 @@ static int no_role_passwords = 0; static int server_version; static int load_via_partition_root = 0; static int on_conflict_do_nothing = 0; +static int statistics_only = 0; static char role_catalog[10]; #define PG_AUTHID "pg_authid" @@ -168,15 +172,19 @@ main(int argc, char *argv[]) {"role", required_argument, NULL, 3}, {"use-set-session-authorization", no_argument, &use_setsessauth, 1}, {"no-comments", no_argument, &no_comments, 1}, + {"no-data", no_argument, &no_data, 1}, {"no-publications", no_argument, &no_publications, 1}, {"no-role-passwords", no_argument, &no_role_passwords, 1}, + {"no-schema", no_argument, &no_schema, 1}, {"no-security-labels", no_argument, &no_security_labels, 1}, {"no-subscriptions", no_argument, &no_subscriptions, 1}, + {"no-statistics", no_argument, &no_statistics, 1}, {"no-sync", no_argument, NULL, 4}, {"no-toast-compression", no_argument, &no_toast_compression, 1}, {"no-unlogged-table-data", no_argument, &no_unlogged_table_data, 1}, {"on-conflict-do-nothing", no_argument, &on_conflict_do_nothing, 1}, {"rows-per-insert", required_argument, NULL, 7}, + {"statistics-only", no_argument, &statistics_only, 1}, {"filter", required_argument, NULL, 8}, {NULL, 0, NULL, 0} @@ -447,10 +455,16 @@ main(int argc, char *argv[]) appendPQExpBufferStr(pgdumpopts, " --use-set-session-authorization"); if (no_comments) appendPQExpBufferStr(pgdumpopts, " --no-comments"); + if (no_data) + appendPQExpBufferStr(pgdumpopts, " --no-data"); if (no_publications) appendPQExpBufferStr(pgdumpopts, " --no-publications"); if (no_security_labels) appendPQExpBufferStr(pgdumpopts, " --no-security-labels"); + if (no_schema) + appendPQExpBufferStr(pgdumpopts, " --no-schema"); + if (no_statistics) + appendPQExpBufferStr(pgdumpopts, " --no-statistics"); if (no_subscriptions) appendPQExpBufferStr(pgdumpopts, " --no-subscriptions"); if (no_toast_compression) @@ -459,6 +473,8 @@ main(int argc, char *argv[]) appendPQExpBufferStr(pgdumpopts, " --no-unlogged-table-data"); if (on_conflict_do_nothing) appendPQExpBufferStr(pgdumpopts, " --on-conflict-do-nothing"); + if (statistics_only) + appendPQExpBufferStr(pgdumpopts, " --statistics-only"); /* * If there was a database specified on the command line, use that, @@ -643,13 +659,13 @@ help(void) printf(_(" --lock-wait-timeout=TIMEOUT fail after waiting TIMEOUT for a table lock\n")); printf(_(" -?, --help show this help, then exit\n")); printf(_("\nOptions controlling the output content:\n")); - printf(_(" -a, --data-only dump only the data, not the schema\n")); + printf(_(" -a, --data-only dump only the data, not the schema or statistics\n")); printf(_(" -c, --clean clean (drop) databases before recreating\n")); printf(_(" -E, --encoding=ENCODING dump the data in encoding ENCODING\n")); printf(_(" -g, --globals-only dump only global objects, no databases\n")); printf(_(" -O, --no-owner skip restoration of object ownership\n")); printf(_(" -r, --roles-only dump only roles, no databases or tablespaces\n")); - printf(_(" -s, --schema-only dump only the schema, no data\n")); + printf(_(" -s, --schema-only dump only the schema, no data or statistics\n")); printf(_(" -S, --superuser=NAME superuser user name to use in the dump\n")); printf(_(" -t, --tablespaces-only dump only tablespaces, no databases or roles\n")); printf(_(" -x, --no-privileges do not dump privileges (grant/revoke)\n")); @@ -664,9 +680,12 @@ help(void) printf(_(" --inserts dump data as INSERT commands, rather than COPY\n")); printf(_(" --load-via-partition-root load partitions via the root table\n")); printf(_(" --no-comments do not dump comment commands\n")); + printf(_(" --no-data do not dump data\n")); printf(_(" --no-publications do not dump publications\n")); printf(_(" --no-role-passwords do not dump passwords for roles\n")); + printf(_(" --no-schema do not dump schema\n")); printf(_(" --no-security-labels do not dump security label assignments\n")); + printf(_(" --no-statistics do not dump statistics\n")); printf(_(" --no-subscriptions do not dump subscriptions\n")); printf(_(" --no-sync do not wait for changes to be written safely to disk\n")); printf(_(" --no-table-access-method do not dump table access methods\n")); @@ -676,6 +695,7 @@ help(void) printf(_(" --on-conflict-do-nothing add ON CONFLICT DO NOTHING to INSERT commands\n")); printf(_(" --quote-all-identifiers quote all identifiers, even if not key words\n")); printf(_(" --rows-per-insert=NROWS number of rows per INSERT; implies --inserts\n")); + printf(_(" --statistics-only dump only the statistics, not schema or data\n")); printf(_(" --use-set-session-authorization\n" " use SET SESSION AUTHORIZATION commands instead of\n" " ALTER OWNER commands to set ownership\n")); diff --git a/src/bin/pg_dump/pg_restore.c b/src/bin/pg_dump/pg_restore.c index c602272d7db..13e4dc507e0 100644 --- a/src/bin/pg_dump/pg_restore.c +++ b/src/bin/pg_dump/pg_restore.c @@ -63,6 +63,8 @@ main(int argc, char **argv) int numWorkers = 1; Archive *AH; char *inputFileSpec; + bool data_only = false; + bool schema_only = false; static int disable_triggers = 0; static int enable_row_security = 0; static int if_exists = 0; @@ -71,12 +73,14 @@ main(int argc, char **argv) static int outputNoTablespaces = 0; static int use_setsessauth = 0; static int no_comments = 0; + static int no_data = 0; static int no_publications = 0; + static int no_schema = 0; static int no_security_labels = 0; + static int no_statistics = 0; static int no_subscriptions = 0; static int strict_names = 0; - bool data_only = false; - bool schema_only = false; + static int statistics_only = 0; struct option cmdopts[] = { {"clean", 0, NULL, 'c'}, @@ -124,9 +128,13 @@ main(int argc, char **argv) {"transaction-size", required_argument, NULL, 5}, {"use-set-session-authorization", no_argument, &use_setsessauth, 1}, {"no-comments", no_argument, &no_comments, 1}, + {"no-data", no_argument, &no_data, 1}, {"no-publications", no_argument, &no_publications, 1}, + {"no-schema", no_argument, &no_schema, 1}, {"no-security-labels", no_argument, &no_security_labels, 1}, {"no-subscriptions", no_argument, &no_subscriptions, 1}, + {"no-statistics", no_argument, &no_statistics, 1}, + {"statistics-only", no_argument, &statistics_only, 1}, {"filter", required_argument, NULL, 4}, {NULL, 0, NULL, 0} @@ -343,6 +351,10 @@ main(int argc, char **argv) if (data_only && schema_only) pg_fatal("options -s/--schema-only and -a/--data-only cannot be used together"); + if (data_only && statistics_only) + pg_fatal("options -a/--data-only and --statistics-only cannot be used together"); + if (schema_only && statistics_only) + pg_fatal("options -s/--schema-only and --statistics-only cannot be used together"); if (data_only && opts->dropSchema) pg_fatal("options -c/--clean and -a/--data-only cannot be used together"); @@ -362,8 +374,9 @@ main(int argc, char **argv) pg_fatal("cannot specify both --single-transaction and multiple jobs"); /* set derivative flags */ - opts->dumpSchema = (!data_only); - opts->dumpData = (!schema_only); + opts->dumpData = data_only || (!no_data && !schema_only && !statistics_only); + opts->dumpSchema = schema_only || (!no_schema && !data_only && !statistics_only); + opts->dumpStatistics = statistics_only || (!no_statistics && !data_only && !schema_only); opts->disable_triggers = disable_triggers; opts->enable_row_security = enable_row_security; @@ -489,14 +502,18 @@ usage(const char *progname) " in FILENAME\n")); printf(_(" --if-exists use IF EXISTS when dropping objects\n")); printf(_(" --no-comments do not restore comment commands\n")); + printf(_(" --no-data do not restore data\n")); printf(_(" --no-data-for-failed-tables do not restore data of tables that could not be\n" " created\n")); printf(_(" --no-publications do not restore publications\n")); + printf(_(" --no-schema do not restore schema\n")); printf(_(" --no-security-labels do not restore security labels\n")); + printf(_(" --no-statistics do not restore statistics\n")); printf(_(" --no-subscriptions do not restore subscriptions\n")); printf(_(" --no-table-access-method do not restore table access methods\n")); printf(_(" --no-tablespaces do not restore tablespace assignments\n")); printf(_(" --section=SECTION restore named section (pre-data, data, or post-data)\n")); + printf(_(" --statistics-only restore only the statistics, not schema or data\n")); printf(_(" --strict-names require table and/or schema include patterns to\n" " match at least one entity each\n")); printf(_(" --transaction-size=N commit after every N objects\n")); diff --git a/src/bin/pg_dump/t/001_basic.pl b/src/bin/pg_dump/t/001_basic.pl index 214240f1ae5..37d893d5e6a 100644 --- a/src/bin/pg_dump/t/001_basic.pl +++ b/src/bin/pg_dump/t/001_basic.pl @@ -50,12 +50,30 @@ command_fails_like( 'pg_dump: options -s/--schema-only and -a/--data-only cannot be used together' ); +command_fails_like( + [ 'pg_dump', '-s', '--statistics-only' ], + qr/\Qpg_dump: error: options -s\/--schema-only and --statistics-only cannot be used together\E/, + 'pg_dump: error: options -s/--schema-only and --statistics-only cannot be used together' +); + +command_fails_like( + [ 'pg_dump', '-a', '--statistics-only' ], + qr/\Qpg_dump: error: options -a\/--data-only and --statistics-only cannot be used together\E/, + 'pg_dump: error: options -a/--data-only and --statistics-only cannot be used together' +); + command_fails_like( [ 'pg_dump', '-s', '--include-foreign-data=xxx' ], qr/\Qpg_dump: error: options -s\/--schema-only and --include-foreign-data cannot be used together\E/, 'pg_dump: options -s/--schema-only and --include-foreign-data cannot be used together' ); +command_fails_like( + [ 'pg_dump', '--statistics-only', '--no-statistics' ], + qr/\Qpg_dump: error: options --statistics-only and --no-statistics cannot be used together\E/, + 'pg_dump: options --statistics-only and --no-statistics cannot be used together' +); + command_fails_like( [ 'pg_dump', '-j2', '--include-foreign-data=xxx' ], qr/\Qpg_dump: error: option --include-foreign-data is not supported with parallel backup\E/, diff --git a/src/bin/pg_dump/t/002_pg_dump.pl b/src/bin/pg_dump/t/002_pg_dump.pl index 86df03ddd9d..3945e4f0e2a 100644 --- a/src/bin/pg_dump/t/002_pg_dump.pl +++ b/src/bin/pg_dump/t/002_pg_dump.pl @@ -65,7 +65,7 @@ my %pgdump_runs = ( '--format' => 'custom', '--file' => "$tempdir/binary_upgrade.dump", '--no-password', - '--schema-only', + '--no-data', '--binary-upgrade', '--dbname' => 'postgres', # alternative way to specify database ], @@ -710,6 +710,34 @@ my %pgdump_runs = ( '--no-large-objects', 'postgres', ], + }, + no_statistics => { + dump_cmd => [ + 'pg_dump', '--no-sync', + "--file=$tempdir/no_statistics.sql", '--no-statistics', + 'postgres', + ], + }, + no_data_no_schema => { + dump_cmd => [ + 'pg_dump', '--no-sync', + "--file=$tempdir/no_data_no_schema.sql", '--no-data', + '--no-schema', 'postgres', + ], + }, + statistics_only => { + dump_cmd => [ + 'pg_dump', '--no-sync', + "--file=$tempdir/statistics_only.sql", '--statistics-only', + 'postgres', + ], + }, + no_schema => { + dump_cmd => [ + 'pg_dump', '--no-sync', + "--file=$tempdir/no_schema.sql", '--no-schema', + 'postgres', + ], },); ############################################################### @@ -776,6 +804,7 @@ my %full_runs = ( no_large_objects => 1, no_owner => 1, no_privs => 1, + no_statistics => 1, no_table_access_method => 1, pg_dumpall_dbprivs => 1, pg_dumpall_exclude => 1, @@ -977,6 +1006,7 @@ my %tests = ( column_inserts => 1, data_only => 1, inserts => 1, + no_schema => 1, section_data => 1, test_schema_plus_large_objects => 1, }, @@ -1390,6 +1420,7 @@ my %tests = ( column_inserts => 1, data_only => 1, inserts => 1, + no_schema => 1, section_data => 1, test_schema_plus_large_objects => 1, }, @@ -1411,6 +1442,7 @@ my %tests = ( column_inserts => 1, data_only => 1, inserts => 1, + no_schema => 1, section_data => 1, test_schema_plus_large_objects => 1, }, @@ -1432,6 +1464,7 @@ my %tests = ( column_inserts => 1, data_only => 1, inserts => 1, + no_schema => 1, section_data => 1, test_schema_plus_large_objects => 1, }, @@ -1598,6 +1631,7 @@ my %tests = ( column_inserts => 1, data_only => 1, inserts => 1, + no_schema => 1, section_data => 1, test_schema_plus_large_objects => 1, }, @@ -1751,6 +1785,7 @@ my %tests = ( %full_runs, %dump_test_schema_runs, data_only => 1, + no_schema => 1, only_dump_test_table => 1, section_data => 1, }, @@ -1778,6 +1813,7 @@ my %tests = ( data_only => 1, exclude_test_table => 1, exclude_test_table_data => 1, + no_schema => 1, section_data => 1, }, unlike => { @@ -1798,7 +1834,10 @@ my %tests = ( \QCOPY dump_test.fk_reference_test_table (col1) FROM stdin;\E \n(?:\d\n){5}\\\.\n /xms, - like => { data_only => 1, }, + like => { + data_only => 1, + no_schema => 1, + }, }, 'COPY test_second_table' => { @@ -1814,6 +1853,7 @@ my %tests = ( %full_runs, %dump_test_schema_runs, data_only => 1, + no_schema => 1, section_data => 1, }, unlike => { @@ -1836,6 +1876,7 @@ my %tests = ( %full_runs, %dump_test_schema_runs, data_only => 1, + no_schema => 1, section_data => 1, }, unlike => { @@ -1859,6 +1900,7 @@ my %tests = ( %full_runs, %dump_test_schema_runs, data_only => 1, + no_schema => 1, section_data => 1, }, unlike => { @@ -1881,6 +1923,7 @@ my %tests = ( %full_runs, %dump_test_schema_runs, data_only => 1, + no_schema => 1, section_data => 1, }, unlike => { @@ -1903,6 +1946,7 @@ my %tests = ( %full_runs, %dump_test_schema_runs, data_only => 1, + no_schema => 1, section_data => 1, }, unlike => { @@ -3310,6 +3354,7 @@ my %tests = ( like => { %full_runs, data_only => 1, + no_schema => 1, section_data => 1, only_dump_test_schema => 1, test_schema_plus_large_objects => 1, @@ -3480,6 +3525,7 @@ my %tests = ( %full_runs, %dump_test_schema_runs, data_only => 1, + no_schema => 1, only_dump_measurement => 1, section_data => 1, only_dump_test_schema => 1, @@ -4364,6 +4410,7 @@ my %tests = ( column_inserts => 1, data_only => 1, inserts => 1, + no_schema => 1, section_data => 1, test_schema_plus_large_objects => 1, binary_upgrade => 1, @@ -4664,6 +4711,61 @@ my %tests = ( }, }, + # + # TABLE and MATVIEW stats will end up in SECTION_DATA. + # INDEX stats (expression columns only) will end up in SECTION_POST_DATA. + # + 'statistics_import' => { + create_sql => ' + CREATE TABLE dump_test.has_stats + AS SELECT g.g AS x, g.g / 2 AS y FROM generate_series(1,100) AS g(g); + CREATE MATERIALIZED VIEW dump_test.has_stats_mv AS SELECT * FROM dump_test.has_stats; + CREATE INDEX dup_test_post_data_ix ON dump_test.has_stats((x - 1)); + ANALYZE dump_test.has_stats, dump_test.has_stats_mv;', + regexp => qr/pg_catalog.pg_restore_attribute_stats/, + like => { + %full_runs, + %dump_test_schema_runs, + no_data_no_schema => 1, + no_schema => 1, + section_data => 1, + section_post_data => 1, + statistics_only => 1, + }, + unlike => { + exclude_dump_test_schema => 1, + no_statistics => 1, + only_dump_measurement => 1, + schema_only => 1, + }, + }, + + # + # While attribute stats (aka pg_statistic stats) only appear for tables + # that have been analyzed, all tables will have relation stats because + # those come from pg_class. + # + 'relstats_on_unanalyzed_tables' => { + regexp => qr/pg_catalog.pg_restore_relation_stats/, + + like => { + %full_runs, + %dump_test_schema_runs, + no_data_no_schema => 1, + no_schema => 1, + only_dump_test_table => 1, + role => 1, + role_parallel => 1, + section_data => 1, + section_post_data => 1, + statistics_only => 1, + }, + unlike => { + no_statistics => 1, + schema_only => 1, + }, + }, + # CREATE TABLE with partitioned table and various AMs. One # partition uses the same default as the parent, and a second # uses its own AM. diff --git a/src/bin/pg_upgrade/dump.c b/src/bin/pg_upgrade/dump.c index 8ce0fa3020e..23fe7280a16 100644 --- a/src/bin/pg_upgrade/dump.c +++ b/src/bin/pg_upgrade/dump.c @@ -52,10 +52,11 @@ generate_old_dump(void) snprintf(log_file_name, sizeof(log_file_name), DB_DUMP_LOG_FILE_MASK, old_db->db_oid); parallel_exec_prog(log_file_name, NULL, - "\"%s/pg_dump\" %s --schema-only --quote-all-identifiers " + "\"%s/pg_dump\" %s --no-data %s --quote-all-identifiers " "--binary-upgrade --format=custom %s --no-sync --file=\"%s/%s\" %s", new_cluster.bindir, cluster_conn_opts(&old_cluster), log_opts.verbose ? "--verbose" : "", + user_opts.do_statistics ? "" : "--no-statistics", log_opts.dumpdir, sql_file_name, escaped_connstr.data); diff --git a/src/bin/pg_upgrade/option.c b/src/bin/pg_upgrade/option.c index 108eb7a1ba4..3fd487086a5 100644 --- a/src/bin/pg_upgrade/option.c +++ b/src/bin/pg_upgrade/option.c @@ -60,6 +60,7 @@ parseCommandLine(int argc, char *argv[]) {"copy", no_argument, NULL, 2}, {"copy-file-range", no_argument, NULL, 3}, {"sync-method", required_argument, NULL, 4}, + {"no-statistics", no_argument, NULL, 5}, {NULL, 0, NULL, 0} }; @@ -70,6 +71,7 @@ parseCommandLine(int argc, char *argv[]) user_opts.do_sync = true; user_opts.transfer_mode = TRANSFER_MODE_COPY; + user_opts.do_statistics = true; os_info.progname = get_progname(argv[0]); @@ -212,6 +214,10 @@ parseCommandLine(int argc, char *argv[]) user_opts.sync_method = pg_strdup(optarg); break; + case 5: + user_opts.do_statistics = false; + break; + default: fprintf(stderr, _("Try \"%s --help\" for more information.\n"), os_info.progname); @@ -306,6 +312,7 @@ usage(void) printf(_(" --clone clone instead of copying files to new cluster\n")); printf(_(" --copy copy files to new cluster (default)\n")); printf(_(" --copy-file-range copy files to new cluster with copy_file_range\n")); + printf(_(" --no-statistics do not import statistics from old cluster\n")); printf(_(" --sync-method=METHOD set method for syncing files to disk\n")); printf(_(" -?, --help show this help, then exit\n")); printf(_("\n" diff --git a/src/bin/pg_upgrade/pg_upgrade.h b/src/bin/pg_upgrade/pg_upgrade.h index 0cdd675e4f1..3fe111fbde5 100644 --- a/src/bin/pg_upgrade/pg_upgrade.h +++ b/src/bin/pg_upgrade/pg_upgrade.h @@ -327,6 +327,7 @@ typedef struct int jobs; /* number of processes/threads to use */ char *socketdir; /* directory to use for Unix sockets */ char *sync_method; + bool do_statistics; /* carry over statistics from old cluster */ } UserOpts; typedef struct diff --git a/src/bin/pg_upgrade/t/002_pg_upgrade.pl b/src/bin/pg_upgrade/t/002_pg_upgrade.pl index 68516fa486a..45ea94c84bb 100644 --- a/src/bin/pg_upgrade/t/002_pg_upgrade.pl +++ b/src/bin/pg_upgrade/t/002_pg_upgrade.pl @@ -278,6 +278,9 @@ push @initdb_params, ('--locale-provider', 'libc'); $node_params{extra} = \@initdb_params; $newnode->init(%node_params); +# Stabilize stats for comparison. +$newnode->append_conf('postgresql.conf', 'autovacuum = off'); + my $newbindir = $newnode->config_data('--bindir'); my $oldbindir = $oldnode->config_data('--bindir'); @@ -314,6 +317,10 @@ if (defined($ENV{oldinstall})) } } +# Stabilize stats before pg_dumpall. +$oldnode->append_conf('postgresql.conf', 'autovacuum = off'); +$oldnode->restart; + # Take a dump before performing the upgrade as a base comparison. Note # that we need to use pg_dumpall from the new node here. my @dump_command = ( diff --git a/src/test/perl/PostgreSQL/Test/AdjustUpgrade.pm b/src/test/perl/PostgreSQL/Test/AdjustUpgrade.pm index 38c96f76fbf..264c33b45d0 100644 --- a/src/test/perl/PostgreSQL/Test/AdjustUpgrade.pm +++ b/src/test/perl/PostgreSQL/Test/AdjustUpgrade.pm @@ -294,6 +294,11 @@ sub adjust_old_dumpfile # Version comments will certainly not match. $dump =~ s/^-- Dumped from database version.*\n//mg; + # Same with version argument to pg_restore_relation_stats() or + # pg_restore_attribute_stats(). + $dump =~ s ['version', '${old_version}\d{4}'::integer,] + ['version', '000000'::integer,]mg; + if ($old_version < 16) { # Fix up some view queries that no longer require table-qualification. @@ -626,6 +631,11 @@ sub adjust_new_dumpfile # Version comments will certainly not match. $dump =~ s/^-- Dumped from database version.*\n//mg; + # Same with version argument to pg_restore_relation_stats() or + # pg_restore_attribute_stats(). + $dump =~ s ['version', '\d{6}'::integer,] + ['version', '000000'::integer,]mg; + if ($old_version < 14) { # Suppress noise-word uses of IN in CREATE/ALTER PROCEDURE. diff --git a/src/test/recovery/t/027_stream_regress.pl b/src/test/recovery/t/027_stream_regress.pl index 0eac8f66a9c..83def062d11 100644 --- a/src/test/recovery/t/027_stream_regress.pl +++ b/src/test/recovery/t/027_stream_regress.pl @@ -107,7 +107,7 @@ command_ok( [ 'pg_dumpall', '--file' => $outputdir . '/primary.dump', - '--no-sync', + '--no-sync', '--no-statistics', '--port' => $node_primary->port, '--no-unlogged-table-data', # if unlogged, standby has schema only ], @@ -116,7 +116,7 @@ command_ok( [ 'pg_dumpall', '--file' => $outputdir . '/standby.dump', - '--no-sync', + '--no-sync', '--no-statistics', '--port' => $node_standby_1->port, ], 'dump standby server'); diff --git a/src/tools/pgindent/typedefs.list b/src/tools/pgindent/typedefs.list index fb39c915d76..98ab45adfa3 100644 --- a/src/tools/pgindent/typedefs.list +++ b/src/tools/pgindent/typedefs.list @@ -2403,6 +2403,7 @@ RelMapFile RelMapping RelOptInfo RelOptKind +RelStatsInfo RelToCheck RelToCluster RelabelType