pg_prewarm, a contrib module for prewarming relationd data.
authorRobert Haas <rhaas@postgresql.org>
Fri, 20 Dec 2013 13:08:01 +0000 (08:08 -0500)
committerRobert Haas <rhaas@postgresql.org>
Fri, 20 Dec 2013 13:14:13 +0000 (08:14 -0500)
Patch by me.  Review by Álvaro Herrera, Amit Kapila, Jeff Janes,
Gurjeet Singh, and others.

contrib/Makefile
contrib/pg_prewarm/Makefile [new file with mode: 0644]
contrib/pg_prewarm/pg_prewarm--1.0.sql [new file with mode: 0644]
contrib/pg_prewarm/pg_prewarm.c [new file with mode: 0644]
contrib/pg_prewarm/pg_prewarm.control [new file with mode: 0644]
doc/src/sgml/contrib.sgml
doc/src/sgml/filelist.sgml
doc/src/sgml/pgprewarm.sgml [new file with mode: 0644]
src/tools/pgindent/typedefs.list

index 8a2a9377e6de897370c0f03f95d83c8d844652c0..dd2683b99a0267f18a56b84dbd474225f9e47f2d 100644 (file)
@@ -32,6 +32,7 @@ SUBDIRS = \
        pg_archivecleanup \
        pg_buffercache  \
        pg_freespacemap \
+       pg_prewarm  \
        pg_standby  \
        pg_stat_statements \
        pg_test_fsync   \
diff --git a/contrib/pg_prewarm/Makefile b/contrib/pg_prewarm/Makefile
new file mode 100644 (file)
index 0000000..176a29a
--- /dev/null
@@ -0,0 +1,18 @@
+# contrib/pg_prewarm/Makefile
+
+MODULE_big = pg_prewarm
+OBJS = pg_prewarm.o
+
+EXTENSION = pg_prewarm
+DATA = pg_prewarm--1.0.sql
+
+ifdef USE_PGXS
+PG_CONFIG = pg_config
+PGXS := $(shell $(PG_CONFIG) --pgxs)
+include $(PGXS)
+else
+subdir = contrib/pg_prewarm
+top_builddir = ../..
+include $(top_builddir)/src/Makefile.global
+include $(top_srcdir)/contrib/contrib-global.mk
+endif
diff --git a/contrib/pg_prewarm/pg_prewarm--1.0.sql b/contrib/pg_prewarm/pg_prewarm--1.0.sql
new file mode 100644 (file)
index 0000000..2bec776
--- /dev/null
@@ -0,0 +1,14 @@
+/* contrib/pg_prewarm/pg_prewarm--1.0.sql */
+
+-- complain if script is sourced in psql, rather than via CREATE EXTENSION
+\echo Use "CREATE EXTENSION pg_prewarm" to load this file. \quit
+
+-- Register the function.
+CREATE FUNCTION pg_prewarm(regclass,
+                          mode text default 'buffer',
+                          fork text default 'main',
+                          first_block int8 default null,
+                          last_block int8 default null)
+RETURNS int8
+AS 'MODULE_PATHNAME', 'pg_prewarm'
+LANGUAGE C;
diff --git a/contrib/pg_prewarm/pg_prewarm.c b/contrib/pg_prewarm/pg_prewarm.c
new file mode 100644 (file)
index 0000000..649471d
--- /dev/null
@@ -0,0 +1,205 @@
+/*-------------------------------------------------------------------------
+ *
+ * pg_prewarm.c
+ *       prewarming utilities
+ *
+ * Copyright (c) 2010-2013, PostgreSQL Global Development Group
+ *
+ * IDENTIFICATION
+ *       contrib/pg_prewarm/pg_prewarm.c
+ *
+ *-------------------------------------------------------------------------
+ */
+#include "postgres.h"
+
+#include <sys/stat.h>
+#include <unistd.h>
+
+#include "access/heapam.h"
+#include "catalog/catalog.h"
+#include "fmgr.h"
+#include "miscadmin.h"
+#include "storage/bufmgr.h"
+#include "storage/smgr.h"
+#include "utils/acl.h"
+#include "utils/builtins.h"
+#include "utils/lsyscache.h"
+#include "utils/rel.h"
+
+PG_MODULE_MAGIC;
+
+extern Datum pg_prewarm(PG_FUNCTION_ARGS);
+
+PG_FUNCTION_INFO_V1(pg_prewarm);
+
+typedef enum
+{
+   PREWARM_PREFETCH,
+   PREWARM_READ,
+   PREWARM_BUFFER
+} PrewarmType;
+
+static char blockbuffer[BLCKSZ];
+
+/*
+ * pg_prewarm(regclass, mode text, fork text,
+ *           first_block int8, last_block int8)
+ *
+ * The first argument is the relation to be prewarmed; the second controls
+ * how prewarming is done; legal options are 'prefetch', 'read', and 'buffer'.
+ * The third is the name of the relation fork to be prewarmed. The fourth
+ * and fifth arguments specify the first and last block to be prewarmed.
+ * If the fourth argument is NULL, it will be taken as 0; if the fifth argument
+ * is NULL, it will be taken as the number of blocks in the relation.  The
+ * return value is the number of blocks successfully prewarmed.
+ */
+Datum
+pg_prewarm(PG_FUNCTION_ARGS)
+{
+   Oid         relOid;
+   text       *forkName;
+   text       *type;
+   int64       first_block;
+   int64       last_block;
+   int64       nblocks;
+   int64       blocks_done = 0;
+   int64       block;
+   Relation    rel;
+   ForkNumber  forkNumber;
+   char       *forkString;
+   char       *ttype;
+   PrewarmType ptype;
+   AclResult   aclresult;
+
+   /* Basic sanity checking. */
+   if (PG_ARGISNULL(0))
+       ereport(ERROR,
+               (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+                errmsg("relation cannot be null")));
+   relOid = PG_GETARG_OID(0);
+   if (PG_ARGISNULL(1))
+       ereport(ERROR,
+               (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+                (errmsg("prewarm type cannot be null"))));
+   type = PG_GETARG_TEXT_P(1);
+   ttype = text_to_cstring(type);
+   if (strcmp(ttype, "prefetch") == 0)
+       ptype = PREWARM_PREFETCH;
+   else if (strcmp(ttype, "read") == 0)
+       ptype = PREWARM_READ;
+   else if (strcmp(ttype, "buffer") == 0)
+       ptype = PREWARM_BUFFER;
+   else
+   {
+       ereport(ERROR,
+               (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+                errmsg("invalid prewarm type"),
+                errhint("Valid prewarm types are \"prefetch\", \"read\", and \"buffer\".")));
+       PG_RETURN_INT64(0);     /* Placate compiler. */
+   }
+   if (PG_ARGISNULL(2))
+       ereport(ERROR,
+               (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+                (errmsg("relation fork cannot be null"))));
+   forkName = PG_GETARG_TEXT_P(2);
+   forkString = text_to_cstring(forkName);
+   forkNumber = forkname_to_number(forkString);
+
+   /* Open relation and check privileges. */
+   rel = relation_open(relOid, AccessShareLock);
+   aclresult = pg_class_aclcheck(relOid, GetUserId(), ACL_SELECT);
+   if (aclresult != ACLCHECK_OK)
+       aclcheck_error(aclresult, ACL_KIND_CLASS, get_rel_name(relOid));
+
+   /* Check that the fork exists. */
+   RelationOpenSmgr(rel);
+   if (!smgrexists(rel->rd_smgr, forkNumber))
+       ereport(ERROR,
+               (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+                errmsg("fork \"%s\" does not exist for this relation",
+                       forkString)));
+
+   /* Validate block numbers, or handle nulls. */
+   nblocks = RelationGetNumberOfBlocksInFork(rel, forkNumber);
+   if (PG_ARGISNULL(3))
+       first_block = 0;
+   else
+   {
+       first_block = PG_GETARG_INT64(3);
+       if (first_block < 0 || first_block >= nblocks)
+           ereport(ERROR,
+                   (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+                    errmsg("starting block number must be between 0 and " INT64_FORMAT,
+                           nblocks - 1)));
+   }
+   if (PG_ARGISNULL(4))
+       last_block = nblocks - 1;
+   else
+   {
+       last_block = PG_GETARG_INT64(4);
+       if (last_block < 0 || last_block >= nblocks)
+           ereport(ERROR,
+                   (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+           errmsg("ending block number must be between 0 and " INT64_FORMAT,
+                  nblocks - 1)));
+   }
+
+   /* Now we're ready to do the real work. */
+   if (ptype == PREWARM_PREFETCH)
+   {
+#ifdef USE_PREFETCH
+
+       /*
+        * In prefetch mode, we just hint the OS to read the blocks, but we
+        * don't know whether it really does it, and we don't wait for it to
+        * finish.
+        *
+        * It would probably be better to pass our prefetch requests in chunks
+        * of a megabyte or maybe even a whole segment at a time, but there's
+        * no practical way to do that at present without a gross modularity
+        * violation, so we just do this.
+        */
+       for (block = first_block; block <= last_block; ++block)
+       {
+           PrefetchBuffer(rel, forkNumber, block);
+           ++blocks_done;
+       }
+#else
+       ereport(ERROR,
+               (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+                errmsg("prefetch is not supported by this build")));
+#endif
+   }
+   else if (ptype == PREWARM_READ)
+   {
+       /*
+        * In read mode, we actually read the blocks, but not into shared
+        * buffers.  This is more portable than prefetch mode (it works
+        * everywhere) and is synchronous.
+        */
+       for (block = first_block; block <= last_block; ++block)
+       {
+           smgrread(rel->rd_smgr, forkNumber, block, blockbuffer);
+           ++blocks_done;
+       }
+   }
+   else if (ptype == PREWARM_BUFFER)
+   {
+       /*
+        * In buffer mode, we actually pull the data into shared_buffers.
+        */
+       for (block = first_block; block <= last_block; ++block)
+       {
+           Buffer      buf;
+
+           buf = ReadBufferExtended(rel, forkNumber, block, RBM_NORMAL, NULL);
+           ReleaseBuffer(buf);
+           ++blocks_done;
+       }
+   }
+
+   /* Close relation, release lock. */
+   relation_close(rel, AccessShareLock);
+
+   PG_RETURN_INT64(blocks_done);
+}
diff --git a/contrib/pg_prewarm/pg_prewarm.control b/contrib/pg_prewarm/pg_prewarm.control
new file mode 100644 (file)
index 0000000..8e5dee5
--- /dev/null
@@ -0,0 +1,5 @@
+# pg_prewarm extension
+comment = 'prewarm relation data'
+default_version = '1.0'
+module_pathname = '$libdir/pg_prewarm'
+relocatable = true
index dd8e09ed291c38d9acfd13a6dd31be250103305b..2892fa11670791d5f83408405cd284159d4e8297 100644 (file)
@@ -128,6 +128,7 @@ CREATE EXTENSION <replaceable>module_name</> FROM unpackaged;
  &pgbuffercache;
  &pgcrypto;
  &pgfreespacemap;
+ &pgprewarm;
  &pgrowlocks;
  &pgstatstatements;
  &pgstattuple;
index d1b7dc67811047e0cdc650ea1ac1d97f10cb6f5f..552c3aab2d74eee20e4748f41ad9bc1fab9734cb 100644 (file)
 <!ENTITY pgbuffercache   SYSTEM "pgbuffercache.sgml">
 <!ENTITY pgcrypto        SYSTEM "pgcrypto.sgml">
 <!ENTITY pgfreespacemap  SYSTEM "pgfreespacemap.sgml">
+<!ENTITY pgprewarm       SYSTEM "pgprewarm.sgml">
 <!ENTITY pgrowlocks      SYSTEM "pgrowlocks.sgml">
 <!ENTITY pgstandby       SYSTEM "pgstandby.sgml">
 <!ENTITY pgstatstatements SYSTEM "pgstatstatements.sgml">
diff --git a/doc/src/sgml/pgprewarm.sgml b/doc/src/sgml/pgprewarm.sgml
new file mode 100644 (file)
index 0000000..2200d3d
--- /dev/null
@@ -0,0 +1,68 @@
+<!-- doc/src/sgml/pgprewarm.sgml -->
+
+<sect1 id="pgprewarm" xreflabel="pg_prewarm">
+ <title>pg_prewarm</title>
+
+ <indexterm zone="pgprewarm">
+  <primary>pg_prewarm</primary>
+ </indexterm>
+
+ <para>
+  The <filename>pg_prewarm</filename> module provides a convenient way
+  to load relation data into either the operating system buffer cache
+  or the <productname>PostgreSQL</productname> buffer cache.
+ </para>
+
+ <sect2>
+  <title>Functions</title>
+
+<synopsis>
+pg_prewarm(regclass, mode text default 'buffer', fork text default 'main',
+           first_block int8 default null,
+           last_block int8 default null) RETURNS int8
+</synopsis>
+
+  <para>
+   The first argument is the relation to be prewarmed.  The second argument
+   is the prewarming method to be used, as further discussed below; the third
+   is the relation fork to be prewarmed, usually <literal>main</literal>.  
+   The fourth argument is the first block number to prewarm
+   (<literal>NULL</literal> is accepted as a synonym for zero).  The fifth
+   argument is the last block number to prewarm (<literal>NULL</literal>
+   means prewarm through the last block in the relation).  The return value
+   is the number of blocks prewarmed.
+  </para>
+
+  <para>
+   There are three available prewarming methods.  <literal>prefetch</literal>
+   issues asynchronous prefetch requests to the operating system, if this is
+   supported, or throws an error otherwise.  <literal>read</literal> reads
+   the requested range of blocks; unlike <literal>prefetch</literal>, this is
+   synchronous and supported on all platforms and builds, but may be slower.
+   <literal>buffer</literal> reads the requested range of blocks into the
+   database buffer cache.
+  </para>
+
+  <para>
+   Note that with any of these methods, attempting to prewarm more blocks than
+   can be cached &mdash; by the OS when using <literal>prefetch</literal> or
+   <literal>read</literal>, or by <productname>PostgreSQL</productname> when
+   using <literal>buffer</literal> &mdash; will likely result in lower-numbered
+   blocks being evicted as higher numbered blocks are read in.  Prewarmed data
+   also enjoys no special protection from cache evictions, so it is possible
+   for other system activity may evict the newly prewarmed blocks shortly after
+   they are read; conversely, prewarming may also evict other data from cache.
+   For these reasons, prewarming is typically most useful at startup, when
+   caches are largely empty.
+  </para>
+ </sect2>
+
+ <sect2>
+  <title>Author</title>
+
+  <para>
+   Robert Haas <email>rhaas@postgresql.org</email>
+  </para>
+ </sect2>
+
+</sect1>
index c5200372aec8825e033ee795ff0139039fb5dda4..e3058be6a5a6923b1d51fe58e0817be71e237e64 100644 (file)
@@ -1334,6 +1334,7 @@ PostParseColumnRefHook
 PostgresPollingStatusType
 PostingItem
 PreParseColumnRefHook
+PrewarmType
 PredClass
 PredIterInfo
 PredIterInfoData