Use "template" data directory in tests
authorAndres Freund <andres@anarazel.de>
Thu, 24 Aug 2023 21:17:03 +0000 (14:17 -0700)
committerAndres Freund <andres@anarazel.de>
Thu, 24 Aug 2023 21:38:02 +0000 (14:38 -0700)
When running all (or just many) of our tests, a significant portion of both
CPU time and IO is spent running initdb. Most of those initdb runs don't
specify any options influencing properties of the created data directory.

Avoid most of that overhead by creating a "template" data directory, alongside
the temporary installation. Instead of running initdb, pg_regress and tap
tests can copy that data directory. When a tap test specifies options to
initdb, the template data directory is not used. That could be relaxed for
some options, but it's not clear it's worth the effort.

There unfortunately is some duplication between pg_regress.c and Cluster.pm,
but there are no easy ways of sharing that code without introducing additional
complexity.

Reviewed-by: Daniel Gustafsson <daniel@yesql.se>
Discussion: https://postgr.es/m/20220120021859.3zpsfqn4z7ob7afz@alap3.anarazel.de

.cirrus.tasks.yml
meson.build
src/Makefile.global.in
src/test/perl/PostgreSQL/Test/Cluster.pm
src/test/regress/pg_regress.c

index 0cf7ba779964e206c99a1143492e18b3734e3a27..e137769850dfb77106492ee7b5849fe686b617c9 100644 (file)
@@ -109,8 +109,9 @@ task:
   test_minimal_script: |
     su postgres <<-EOF
       ulimit -c unlimited
+      meson test $MTEST_ARGS --suite setup
       meson test $MTEST_ARGS --num-processes ${TEST_JOBS} \
-        tmp_install cube/regress pg_ctl/001_start_stop
+        cube/regress pg_ctl/001_start_stop
     EOF
 
   on_failure:
index f5ec442f9a97972157038765d090cc5d530f2e28..8b2b521a013003b012ece9fc90186f7e88376396 100644 (file)
@@ -3070,8 +3070,10 @@ testport = 40000
 test_env = environment()
 
 temp_install_bindir = test_install_location / get_option('bindir')
+test_initdb_template = meson.build_root() / 'tmp_install' / 'initdb-template'
 test_env.set('PG_REGRESS', pg_regress.full_path())
 test_env.set('REGRESS_SHLIB', regress_module.full_path())
+test_env.set('INITDB_TEMPLATE', test_initdb_template)
 
 # Test suites that are not safe by default but can be run if selected
 # by the user via the whitespace-separated list in variable PG_TEST_EXTRA.
@@ -3086,6 +3088,34 @@ if library_path_var != ''
 endif
 
 
+# Create (and remove old) initdb template directory. Tests use that, where
+# possible, to make it cheaper to run tests.
+#
+# Use python to remove the old cached initdb, as we cannot rely on a working
+# 'rm' binary on windows.
+test('initdb_cache',
+     python,
+     args: [
+       '-c', '''
+import shutil
+import sys
+import subprocess
+
+shutil.rmtree(sys.argv[1], ignore_errors=True)
+sp = subprocess.run(sys.argv[2:] + [sys.argv[1]])
+sys.exit(sp.returncode)
+''',
+       test_initdb_template,
+       temp_install_bindir / 'initdb',
+       '-A', 'trust', '-N', '--no-instructions'
+     ],
+     priority: setup_tests_priority - 1,
+     timeout: 300,
+     is_parallel: false,
+     env: test_env,
+     suite: ['setup'])
+
+
 
 ###############################################################
 # Test Generation
index df9f721a41a32249e06570e19133e88e67f06fac..0b4ca0eb6ae2c2b7782861b57d86d3702576f2df 100644 (file)
@@ -397,6 +397,25 @@ check: temp-install
 
 .PHONY: temp-install
 
+
+# prepend to path if already set, else just set it
+define add_to_path
+$(1)="$(if $($(1)),$(2):$$$(1),$(2))"
+endef
+
+# platform-specific environment variable to set shared library path
+# individual ports can override this later, this is the default name
+ld_library_path_var = LD_LIBRARY_PATH
+
+# with_temp_install_extra is for individual ports to define if they
+# need something more here. If not defined then the expansion does
+# nothing.
+with_temp_install = \
+   PATH="$(abs_top_builddir)/tmp_install$(bindir):$(CURDIR):$$PATH" \
+   $(call add_to_path,$(strip $(ld_library_path_var)),$(abs_top_builddir)/tmp_install$(libdir)) \
+   INITDB_TEMPLATE='$(abs_top_builddir)'/tmp_install/initdb-template \
+   $(with_temp_install_extra)
+
 temp-install: | submake-generated-headers
 ifndef NO_TEMP_INSTALL
 ifneq ($(abs_top_builddir),)
@@ -405,6 +424,8 @@ ifeq ($(MAKELEVEL),0)
    $(MKDIR_P) '$(abs_top_builddir)'/tmp_install/log
    $(MAKE) -C '$(top_builddir)' DESTDIR='$(abs_top_builddir)'/tmp_install install >'$(abs_top_builddir)'/tmp_install/log/install.log 2>&1
    $(MAKE) -j1 $(if $(CHECKPREP_TOP),-C $(CHECKPREP_TOP),) checkprep >>'$(abs_top_builddir)'/tmp_install/log/install.log 2>&1
+
+   $(with_temp_install) initdb -A trust -N --no-instructions '$(abs_top_builddir)'/tmp_install/initdb-template >>'$(abs_top_builddir)'/tmp_install/log/initdb-template.log 2>&1
 endif
 endif
 endif
@@ -422,23 +443,6 @@ PG_PROVE_FLAGS = -I $(top_srcdir)/src/test/perl/ -I $(srcdir)
 # User-supplied prove flags such as --verbose can be provided in PROVE_FLAGS.
 PROVE_FLAGS =
 
-# prepend to path if already set, else just set it
-define add_to_path
-$(1)="$(if $($(1)),$(2):$$$(1),$(2))"
-endef
-
-# platform-specific environment variable to set shared library path
-# individual ports can override this later, this is the default name
-ld_library_path_var = LD_LIBRARY_PATH
-
-# with_temp_install_extra is for individual ports to define if they
-# need something more here. If not defined then the expansion does
-# nothing.
-with_temp_install = \
-   PATH="$(abs_top_builddir)/tmp_install$(bindir):$(CURDIR):$$PATH" \
-   $(call add_to_path,$(strip $(ld_library_path_var)),$(abs_top_builddir)/tmp_install$(libdir)) \
-   $(with_temp_install_extra)
-
 ifeq ($(enable_tap_tests),yes)
 
 ifndef PGXS
index 41409ba1bed5802188fd39bbebaf90bdb7d6cba5..426f94ff09c5adf6c8d77f14268dc1bcaecd8008 100644 (file)
@@ -522,8 +522,50 @@ sub init
    mkdir $self->backup_dir;
    mkdir $self->archive_dir;
 
-   PostgreSQL::Test::Utils::system_or_bail('initdb', '-D', $pgdata, '-A',
-       'trust', '-N', @{ $params{extra} });
+   # If available and if there aren't any parameters, use a previously
+   # initdb'd cluster as a template by copying it. For a lot of tests, that's
+   # substantially cheaper. Do so only if there aren't parameters, it doesn't
+   # seem worth figuring out whether they affect compatibility.
+   #
+   # There's very similar code in pg_regress.c, but we can't easily
+   # deduplicate it until we require perl at build time.
+   if (defined $params{extra} or !defined $ENV{INITDB_TEMPLATE})
+   {
+       note("initializing database system by running initdb");
+       PostgreSQL::Test::Utils::system_or_bail('initdb', '-D', $pgdata, '-A',
+           'trust', '-N', @{ $params{extra} });
+   }
+   else
+   {
+       my @copycmd;
+       my $expected_exitcode;
+
+       note("initializing database system by copying initdb template");
+
+       if ($PostgreSQL::Test::Utils::windows_os)
+       {
+           @copycmd = qw(robocopy /E /NJS /NJH /NFL /NDL /NP);
+           $expected_exitcode = 1;    # 1 denotes files were copied
+       }
+       else
+       {
+           @copycmd = qw(cp -a);
+           $expected_exitcode = 0;
+       }
+
+       @copycmd = (@copycmd, $ENV{INITDB_TEMPLATE}, $pgdata);
+
+       my $ret = PostgreSQL::Test::Utils::system_log(@copycmd);
+
+       # See http://perldoc.perl.org/perlvar.html#%24CHILD_ERROR
+       if ($ret & 127 or $ret >> 8 != $expected_exitcode)
+       {
+           BAIL_OUT(
+               sprintf("failed to execute command \"%s\": $ret",
+                   join(" ", @copycmd)));
+       }
+   }
+
    PostgreSQL::Test::Utils::system_or_bail($ENV{PG_REGRESS},
        '--config-auth', $pgdata, @{ $params{auth_extra} });
 
index b68632320a72597ae4996b820b756e742c1b9708..06674141a31cd22f84979ced8f928bd64f0bb1ef 100644 (file)
@@ -2295,6 +2295,7 @@ regression_main(int argc, char *argv[],
        FILE       *pg_conf;
        const char *env_wait;
        int         wait_seconds;
+       const char *initdb_template_dir;
 
        /*
         * Prepare the temp instance
@@ -2316,25 +2317,66 @@ regression_main(int argc, char *argv[],
        if (!directory_exists(buf))
            make_directory(buf);
 
-       /* initdb */
        initStringInfo(&cmd);
-       appendStringInfo(&cmd,
-                        "\"%s%sinitdb\" -D \"%s/data\" --no-clean --no-sync",
-                        bindir ? bindir : "",
-                        bindir ? "/" : "",
-                        temp_instance);
-       if (debug)
-           appendStringInfo(&cmd, " --debug");
-       if (nolocale)
-           appendStringInfo(&cmd, " --no-locale");
-       appendStringInfo(&cmd, " > \"%s/log/initdb.log\" 2>&1", outputdir);
-       fflush(NULL);
-       if (system(cmd.data))
+
+       /*
+        * Create data directory.
+        *
+        * If available, use a previously initdb'd cluster as a template by
+        * copying it. For a lot of tests, that's substantially cheaper.
+        *
+        * There's very similar code in Cluster.pm, but we can't easily de
+        * duplicate it until we require perl at build time.
+        */
+       initdb_template_dir = getenv("INITDB_TEMPLATE");
+       if (initdb_template_dir == NULL || nolocale || debug)
+       {
+           note("initializing database system by running initdb");
+
+           appendStringInfo(&cmd,
+                            "\"%s%sinitdb\" -D \"%s/data\" --no-clean --no-sync",
+                            bindir ? bindir : "",
+                            bindir ? "/" : "",
+                            temp_instance);
+           if (debug)
+               appendStringInfo(&cmd, " --debug");
+           if (nolocale)
+               appendStringInfo(&cmd, " --no-locale");
+           appendStringInfo(&cmd, " > \"%s/log/initdb.log\" 2>&1", outputdir);
+           fflush(NULL);
+           if (system(cmd.data))
+           {
+               bail("initdb failed\n"
+                    "# Examine \"%s/log/initdb.log\" for the reason.\n"
+                    "# Command was: %s",
+                    outputdir, cmd.data);
+           }
+       }
+       else
        {
-           bail("initdb failed\n"
-                "# Examine \"%s/log/initdb.log\" for the reason.\n"
-                "# Command was: %s",
-                outputdir, cmd.data);
+#ifndef WIN32
+           const char *copycmd = "cp -a \"%s\" \"%s/data\"";
+           int         expected_exitcode = 0;
+#else
+           const char *copycmd = "robocopy /E /NJS /NJH /NFL /NDL /NP \"%s\" \"%s/data\"";
+           int         expected_exitcode = 1;  /* 1 denotes files were copied */
+#endif
+
+           note("initializing database system by copying initdb template");
+
+           appendStringInfo(&cmd,
+                            copycmd,
+                            initdb_template_dir,
+                            temp_instance);
+           appendStringInfo(&cmd, " > \"%s/log/initdb.log\" 2>&1", outputdir);
+           fflush(NULL);
+           if (system(cmd.data) != expected_exitcode)
+           {
+               bail("copying of initdb template failed\n"
+                    "# Examine \"%s/log/initdb.log\" for the reason.\n"
+                    "# Command was: %s",
+                    outputdir, cmd.data);
+           }
        }
 
        pfree(cmd.data);