vacuumdb: Teach vacuum_one_database() to reuse query results.

author Nathan Bossart <nathan@postgresql.org>

Tue, 18 Mar 2025 21:32:55 +0000 (16:32 -0500)

committer Nathan Bossart <nathan@postgresql.org>

Tue, 18 Mar 2025 21:32:55 +0000 (16:32 -0500)
author Nathan Bossart <nathan@postgresql.org>
Tue, 18 Mar 2025 21:32:55 +0000 (16:32 -0500)
committer Nathan Bossart <nathan@postgresql.org>
Tue, 18 Mar 2025 21:32:55 +0000 (16:32 -0500)
diff --git a/src/bin/scripts/vacuumdb.c b/src/bin/scripts/vacuumdb.c

index 982bf070be6f2cc543f654a66d8bacda4c258a82..e28f82a0ebaf449cbf5f4014a3b6c40f0af3224f 100644 (file)
--- a/src/bin/scripts/vacuumdb.c
+++ b/src/bin/scripts/vacuumdb.c
@@ -62,10 +62,16 @@ typedef enum
  
  static VacObjFilter objfilter = OBJFILTER_NONE;
  
+static SimpleStringList *retrieve_objects(PGconn *conn,
+                                         vacuumingOptions *vacopts,
+                                         SimpleStringList *objects,
+                                         bool echo);
+
  static void vacuum_one_database(ConnParams *cparams,
                                 vacuumingOptions *vacopts,
                                 int stage,
                                 SimpleStringList *objects,
+                               SimpleStringList **found_objs,
                                 int concurrentCons,
                                 const char *progname, bool echo, bool quiet);
  
@@ -405,7 +411,7 @@ main(int argc, char *argv[])
             {
                 vacuum_one_database(&cparams, &vacopts,
                                     stage,
-                                   &objects,
+                                   &objects, NULL,
                                     concurrentCons,
                                     progname, echo, quiet);
             }
@@ -413,7 +419,7 @@ main(int argc, char *argv[])
         else
             vacuum_one_database(&cparams, &vacopts,
                                 ANALYZE_NO_STAGE,
-                               &objects,
+                               &objects, NULL,
                                 concurrentCons,
                                 progname, echo, quiet);
     }
@@ -461,8 +467,36 @@ escape_quotes(const char *src)
  /*
   * vacuum_one_database
   *
- * Process tables in the given database.  If the 'objects' list is empty,
- * process all tables in the database.
+ * Process tables in the given database.
+ *
+ * There are two ways to specify the list of objects to process:
+ *
+ * 1) The "found_objs" parameter is a double pointer to a fully qualified list
+ *    of objects to process, as returned by a previous call to
+ *    vacuum_one_database().
+ *
+ *     a) If both "found_objs" (the double pointer) and "*found_objs" (the
+ *        once-dereferenced double pointer) are not NULL, this list takes
+ *        priority, and anything specified in "objects" is ignored.
+ *
+ *     b) If "found_objs" (the double pointer) is not NULL but "*found_objs"
+ *        (the once-dereferenced double pointer) _is_ NULL, the "objects"
+ *        parameter takes priority, and the results of the catalog query
+ *        described in (2) are stored in "found_objs".
+ *
+ *     c) If "found_objs" (the double pointer) is NULL, the "objects"
+ *        parameter again takes priority, and the results of the catalog query
+ *        are not saved.
+ *
+ * 2) The "objects" parameter is a user-specified list of objects to process.
+ *    When (1b) or (1c) applies, this function performs a catalog query to
+ *    retrieve a fully qualified list of objects to process, as described
+ *    below.
+ *
+ *     a) If "objects" is not NULL, the catalog query gathers only the objects
+ *        listed in "objects".
+ *
+ *     b) If "objects" is NULL, all tables in the database are gathered.
   *
   * Note that this function is only concerned with running exactly one stage
   * when in analyze-in-stages mode; caller must iterate on us if necessary.
@@ -475,22 +509,18 @@ vacuum_one_database(ConnParams *cparams,
                     vacuumingOptions *vacopts,
                     int stage,
                     SimpleStringList *objects,
+                   SimpleStringList **found_objs,
                     int concurrentCons,
                     const char *progname, bool echo, bool quiet)
  {
     PQExpBufferData sql;
-   PQExpBufferData buf;
-   PQExpBufferData catalog_query;
-   PGresult   *res;
     PGconn     *conn;
     SimpleStringListCell *cell;
     ParallelSlotArray *sa;
-   SimpleStringList dbtables = {NULL, NULL};
-   int         i;
-   int         ntups;
+   int         ntups = 0;
     bool        failed = false;
-   bool        objects_listed = false;
     const char *initcmd;
+   SimpleStringList *ret = NULL;
     const char *stage_commands[] = {
         "SET default_statistics_target=1; SET vacuum_cost_delay=0;",
         "SET default_statistics_target=10; RESET vacuum_cost_delay;",
@@ -599,19 +629,155 @@ vacuum_one_database(ConnParams *cparams,
     }
  
     /*
-    * Prepare the list of tables to process by querying the catalogs.
-    *
-    * Since we execute the constructed query with the default search_path
-    * (which could be unsafe), everything in this query MUST be fully
-    * qualified.
-    *
-    * First, build a WITH clause for the catalog query if any tables were
-    * specified, with a set of values made of relation names and their
-    * optional set of columns.  This is used to match any provided column
-    * lists with the generated qualified identifiers and to filter for the
-    * tables provided via --table.  If a listed table does not exist, the
-    * catalog query will fail.
+    * If the caller provided the results of a previous catalog query, just
+    * use that.  Otherwise, run the catalog query ourselves and set the
+    * return variable if provided.
+    */
+   if (found_objs && *found_objs)
+       ret = *found_objs;
+   else
+   {
+       ret = retrieve_objects(conn, vacopts, objects, echo);
+       if (found_objs)
+           *found_objs = ret;
+   }
+
+   /*
+    * Count the number of objects in the catalog query result.  If there are
+    * none, we are done.
+    */
+   for (cell = ret ? ret->head : NULL; cell; cell = cell->next)
+       ntups++;
+
+   if (ntups == 0)
+   {
+       PQfinish(conn);
+       return;
+   }
+
+   /*
+    * Ensure concurrentCons is sane.  If there are more connections than
+    * vacuumable relations, we don't need to use them all.
      */
+   if (concurrentCons > ntups)
+       concurrentCons = ntups;
+   if (concurrentCons <= 0)
+       concurrentCons = 1;
+
+   /*
+    * All slots need to be prepared to run the appropriate analyze stage, if
+    * caller requested that mode.  We have to prepare the initial connection
+    * ourselves before setting up the slots.
+    */
+   if (stage == ANALYZE_NO_STAGE)
+       initcmd = NULL;
+   else
+   {
+       initcmd = stage_commands[stage];
+       executeCommand(conn, initcmd, echo);
+   }
+
+   /*
+    * Setup the database connections. We reuse the connection we already have
+    * for the first slot.  If not in parallel mode, the first slot in the
+    * array contains the connection.
+    */
+   sa = ParallelSlotsSetup(concurrentCons, cparams, progname, echo, initcmd);
+   ParallelSlotsAdoptConn(sa, conn);
+
+   initPQExpBuffer(&sql);
+
+   cell = ret->head;
+   do
+   {
+       const char *tabname = cell->val;
+       ParallelSlot *free_slot;
+
+       if (CancelRequested)
+       {
+           failed = true;
+           goto finish;
+       }
+
+       free_slot = ParallelSlotsGetIdle(sa, NULL);
+       if (!free_slot)
+       {
+           failed = true;
+           goto finish;
+       }
+
+       prepare_vacuum_command(&sql, PQserverVersion(free_slot->connection),
+                              vacopts, tabname);
+
+       /*
+        * Execute the vacuum.  All errors are handled in processQueryResult
+        * through ParallelSlotsGetIdle.
+        */
+       ParallelSlotSetHandler(free_slot, TableCommandResultHandler, NULL);
+       run_vacuum_command(free_slot->connection, sql.data,
+                          echo, tabname);
+
+       cell = cell->next;
+   } while (cell != NULL);
+
+   if (!ParallelSlotsWaitCompletion(sa))
+   {
+       failed = true;
+       goto finish;
+   }
+
+   /* If we used SKIP_DATABASE_STATS, mop up with ONLY_DATABASE_STATS */
+   if (vacopts->skip_database_stats && stage == ANALYZE_NO_STAGE)
+   {
+       const char *cmd = "VACUUM (ONLY_DATABASE_STATS);";
+       ParallelSlot *free_slot = ParallelSlotsGetIdle(sa, NULL);
+
+       if (!free_slot)
+       {
+           failed = true;
+           goto finish;
+       }
+
+       ParallelSlotSetHandler(free_slot, TableCommandResultHandler, NULL);
+       run_vacuum_command(free_slot->connection, cmd, echo, NULL);
+
+       if (!ParallelSlotsWaitCompletion(sa))
+           failed = true;
+   }
+
+finish:
+   ParallelSlotsTerminate(sa);
+   pg_free(sa);
+
+   termPQExpBuffer(&sql);
+
+   if (failed)
+       exit(1);
+}
+
+/*
+ * Prepare the list of tables to process by querying the catalogs.
+ *
+ * Since we execute the constructed query with the default search_path (which
+ * could be unsafe), everything in this query MUST be fully qualified.
+ *
+ * First, build a WITH clause for the catalog query if any tables were
+ * specified, with a set of values made of relation names and their optional
+ * set of columns.  This is used to match any provided column lists with the
+ * generated qualified identifiers and to filter for the tables provided via
+ * --table.  If a listed table does not exist, the catalog query will fail.
+ */
+static SimpleStringList *
+retrieve_objects(PGconn *conn, vacuumingOptions *vacopts,
+                SimpleStringList *objects, bool echo)
+{
+   PQExpBufferData buf;
+   PQExpBufferData catalog_query;
+   PGresult   *res;
+   SimpleStringListCell *cell;
+   SimpleStringList *found_objs = palloc0(sizeof(SimpleStringList));
+   bool        objects_listed = false;
+
     initPQExpBuffer(&catalog_query);
     for (cell = objects ? objects->head : NULL; cell; cell = cell->next)
     {
@@ -765,23 +931,12 @@ vacuum_one_database(ConnParams *cparams,
     termPQExpBuffer(&catalog_query);
     PQclear(executeQuery(conn, ALWAYS_SECURE_SEARCH_PATH_SQL, echo));
  
-   /*
-    * If no rows are returned, there are no matching tables, so we are done.
-    */
-   ntups = PQntuples(res);
-   if (ntups == 0)
-   {
-       PQclear(res);
-       PQfinish(conn);
-       return;
-   }
-
     /*
      * Build qualified identifiers for each table, including the column list
      * if given.
      */
     initPQExpBuffer(&buf);
-   for (i = 0; i < ntups; i++)
+   for (int i = 0; i < PQntuples(res); i++)
     {
         appendPQExpBufferStr(&buf,
                              fmtQualifiedIdEnc(PQgetvalue(res, i, 1),
@@ -791,110 +946,13 @@ vacuum_one_database(ConnParams *cparams,
         if (objects_listed && !PQgetisnull(res, i, 2))
             appendPQExpBufferStr(&buf, PQgetvalue(res, i, 2));
  
-       simple_string_list_append(&dbtables, buf.data);
+       simple_string_list_append(found_objs, buf.data);
         resetPQExpBuffer(&buf);
     }
     termPQExpBuffer(&buf);
     PQclear(res);
  
-   /*
-    * Ensure concurrentCons is sane.  If there are more connections than
-    * vacuumable relations, we don't need to use them all.
-    */
-   if (concurrentCons > ntups)
-       concurrentCons = ntups;
-   if (concurrentCons <= 0)
-       concurrentCons = 1;
-
-   /*
-    * All slots need to be prepared to run the appropriate analyze stage, if
-    * caller requested that mode.  We have to prepare the initial connection
-    * ourselves before setting up the slots.
-    */
-   if (stage == ANALYZE_NO_STAGE)
-       initcmd = NULL;
-   else
-   {
-       initcmd = stage_commands[stage];
-       executeCommand(conn, initcmd, echo);
-   }
-
-   /*
-    * Setup the database connections. We reuse the connection we already have
-    * for the first slot.  If not in parallel mode, the first slot in the
-    * array contains the connection.
-    */
-   sa = ParallelSlotsSetup(concurrentCons, cparams, progname, echo, initcmd);
-   ParallelSlotsAdoptConn(sa, conn);
-
-   initPQExpBuffer(&sql);
-
-   cell = dbtables.head;
-   do
-   {
-       const char *tabname = cell->val;
-       ParallelSlot *free_slot;
-
-       if (CancelRequested)
-       {
-           failed = true;
-           goto finish;
-       }
-
-       free_slot = ParallelSlotsGetIdle(sa, NULL);
-       if (!free_slot)
-       {
-           failed = true;
-           goto finish;
-       }
-
-       prepare_vacuum_command(&sql, PQserverVersion(free_slot->connection),
-                              vacopts, tabname);
-
-       /*
-        * Execute the vacuum.  All errors are handled in processQueryResult
-        * through ParallelSlotsGetIdle.
-        */
-       ParallelSlotSetHandler(free_slot, TableCommandResultHandler, NULL);
-       run_vacuum_command(free_slot->connection, sql.data,
-                          echo, tabname);
-
-       cell = cell->next;
-   } while (cell != NULL);
-
-   if (!ParallelSlotsWaitCompletion(sa))
-   {
-       failed = true;
-       goto finish;
-   }
-
-   /* If we used SKIP_DATABASE_STATS, mop up with ONLY_DATABASE_STATS */
-   if (vacopts->skip_database_stats && stage == ANALYZE_NO_STAGE)
-   {
-       const char *cmd = "VACUUM (ONLY_DATABASE_STATS);";
-       ParallelSlot *free_slot = ParallelSlotsGetIdle(sa, NULL);
-
-       if (!free_slot)
-       {
-           failed = true;
-           goto finish;
-       }
-
-       ParallelSlotSetHandler(free_slot, TableCommandResultHandler, NULL);
-       run_vacuum_command(free_slot->connection, cmd, echo, NULL);
-
-       if (!ParallelSlotsWaitCompletion(sa))
-           failed = true;
-   }
-
-finish:
-   ParallelSlotsTerminate(sa);
-   pg_free(sa);
-
-   termPQExpBuffer(&sql);
-
-   if (failed)
-       exit(1);
+   return found_objs;
  }
  
  /*
@@ -941,7 +999,7 @@ vacuum_all_databases(ConnParams *cparams,
  
                 vacuum_one_database(cparams, vacopts,
                                     stage,
-                                   objects,
+                                   objects, NULL,
                                     concurrentCons,
                                     progname, echo, quiet);
             }
@@ -955,7 +1013,7 @@ vacuum_all_databases(ConnParams *cparams,
  
             vacuum_one_database(cparams, vacopts,
                                 ANALYZE_NO_STAGE,
-                               objects,
+                               objects, NULL,
                                 concurrentCons,
                                 progname, echo, quiet);
         }
author	Nathan Bossart <nathan@postgresql.org>
	Tue, 18 Mar 2025 21:32:55 +0000 (16:32 -0500)
committer	Nathan Bossart <nathan@postgresql.org>
	Tue, 18 Mar 2025 21:32:55 +0000 (16:32 -0500)