errmsg("index \"%s\" meta page is corrupt",
RelationGetRelationName(state->rel))));
- if (metad->btm_version != BTREE_VERSION)
+ if (metad->btm_version < BTREE_MIN_VERSION ||
+ metad->btm_version > BTREE_VERSION)
ereport(ERROR,
(errcode(ERRCODE_INDEX_CORRUPTED),
- errmsg("version mismatch in index \"%s\": file version %d, code version %d",
+ errmsg("version mismatch in index \"%s\": file version %d, "
+ "current version %d, minimal supported version %d",
RelationGetRelationName(state->rel),
- metad->btm_version, BTREE_VERSION)));
+ metad->btm_version, BTREE_VERSION, BTREE_MIN_VERSION)));
}
/*
brinfuncs.o ginfuncs.o hashfuncs.o $(WIN32RES)
EXTENSION = pageinspect
-DATA = pageinspect--1.5.sql pageinspect--1.5--1.6.sql \
+DATA = pageinspect--1.6--1.7.sql \
+ pageinspect--1.5.sql pageinspect--1.5--1.6.sql \
pageinspect--1.4--1.5.sql pageinspect--1.3--1.4.sql \
pageinspect--1.2--1.3.sql pageinspect--1.1--1.2.sql \
pageinspect--1.0--1.1.sql pageinspect--unpackaged--1.0.sql
BTMetaPageData *metad;
TupleDesc tupleDesc;
int j;
- char *values[6];
+ char *values[8];
Buffer buffer;
Page page;
HeapTuple tuple;
values[j++] = psprintf("%d", metad->btm_level);
values[j++] = psprintf("%d", metad->btm_fastroot);
values[j++] = psprintf("%d", metad->btm_fastlevel);
+ values[j++] = psprintf("%u", metad->btm_oldest_btpo_xact);
+ values[j++] = psprintf("%lf", metad->btm_last_cleanup_num_heap_tuples);
tuple = BuildTupleFromCStrings(TupleDescGetAttInMetadata(tupleDesc),
values);
CREATE INDEX test1_a_idx ON test1 USING btree (a);
\x
SELECT * FROM bt_metap('test1_a_idx');
--[ RECORD 1 ]-----
-magic | 340322
-version | 2
-root | 1
-level | 0
-fastroot | 1
-fastlevel | 0
+-[ RECORD 1 ]-----------+-------
+magic | 340322
+version | 3
+root | 1
+level | 0
+fastroot | 1
+fastlevel | 0
+oldest_xact | 0
+last_cleanup_num_tuples | -1
SELECT * FROM bt_page_stats('test1_a_idx', 0);
ERROR: block 0 is a meta page
--- /dev/null
+/* contrib/pageinspect/pageinspect--1.6--1.7.sql */
+
+-- complain if script is sourced in psql, rather than via ALTER EXTENSION
+\echo Use "ALTER EXTENSION pageinspect UPDATE TO '1.7'" to load this file. \quit
+
+--
+-- bt_metap()
+--
+DROP FUNCTION bt_metap(IN relname text,
+ OUT magic int4,
+ OUT version int4,
+ OUT root int4,
+ OUT level int4,
+ OUT fastroot int4,
+ OUT fastlevel int4);
+CREATE FUNCTION bt_metap(IN relname text,
+ OUT magic int4,
+ OUT version int4,
+ OUT root int4,
+ OUT level int4,
+ OUT fastroot int4,
+ OUT fastlevel int4,
+ OUT oldest_xact int4,
+ OUT last_cleanup_num_tuples real)
+AS 'MODULE_PATHNAME', 'bt_metap'
+LANGUAGE C STRICT PARALLEL SAFE;
# pageinspect extension
comment = 'inspect the contents of database pages at a low level'
-default_version = '1.6'
+default_version = '1.7'
module_pathname = '$libdir/pageinspect'
relocatable = true
from pgstatindex('test_pkey');
version | tree_level | index_size | root_block_no | internal_pages | leaf_pages | empty_pages | deleted_pages | avg_leaf_density | leaf_fragmentation
---------+------------+------------+---------------+----------------+------------+-------------+---------------+------------------+--------------------
- 2 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | NaN | NaN
+ 3 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | NaN | NaN
(1 row)
select version, tree_level,
from pgstatindex('test_pkey'::text);
version | tree_level | index_size | root_block_no | internal_pages | leaf_pages | empty_pages | deleted_pages | avg_leaf_density | leaf_fragmentation
---------+------------+------------+---------------+----------------+------------+-------------+---------------+------------------+--------------------
- 2 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | NaN | NaN
+ 3 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | NaN | NaN
(1 row)
select version, tree_level,
from pgstatindex('test_pkey'::name);
version | tree_level | index_size | root_block_no | internal_pages | leaf_pages | empty_pages | deleted_pages | avg_leaf_density | leaf_fragmentation
---------+------------+------------+---------------+----------------+------------+-------------+---------------+------------------+--------------------
- 2 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | NaN | NaN
+ 3 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | NaN | NaN
(1 row)
select version, tree_level,
from pgstatindex('test_pkey'::regclass);
version | tree_level | index_size | root_block_no | internal_pages | leaf_pages | empty_pages | deleted_pages | avg_leaf_density | leaf_fragmentation
---------+------------+------------+---------------+----------------+------------+-------------+---------------+------------------+--------------------
- 2 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | NaN | NaN
+ 3 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | NaN | NaN
(1 row)
select pg_relpages('test');
select pgstatindex('test_partition_idx');
pgstatindex
------------------------------
- (2,0,8192,0,0,0,0,0,NaN,NaN)
+ (3,0,8192,0,0,0,0,0,NaN,NaN)
(1 row)
select pgstathashindex('test_partition_hash_idx');
</note>
</sect2>
+ <sect2 id="runtime-config-index-vacuum">
+ <title>Index Vacuum</title>
+ <variablelist>
+ <varlistentry id="guc-vacuum-cleanup-index-scale-factor" xreflabel="vacuum_cleanup_index_scale_factor">
+ <term><varname>vacuum_cleanup_index_scale_factor</varname> (<type>floating point</type>)
+ <indexterm>
+ <primary><varname>vacuum_cleanup_index_scale_factor</varname> configuration parameter</primary>
+ </indexterm>
+ </term>
+ <listitem>
+ <para>
+ When no tuples were deleted from the heap, B-tree indexes might still
+ be scanned during <command>VACUUM</command> cleanup stage by two
+ reasons. The first reason is that B-tree index contains deleted pages
+ which can be recycled during cleanup. The second reason is that B-tree
+ index statistics is stalled. The criterion of stalled index statistics
+ is number of inserted tuples since previous statistics collection
+ is greater than <varname>vacuum_cleanup_index_scale_factor</varname>
+ fraction of total number of heap tuples.
+ </para>
+ </listitem>
+ </varlistentry>
+ </variablelist>
+ </sect2>
+
<sect2 id="runtime-config-resource-background-writer">
<title>Background Writer</title>
index's metapage. For example:
<screen>
test=# SELECT * FROM bt_metap('pg_cast_oid_index');
--[ RECORD 1 ]-----
-magic | 340322
-version | 2
-root | 1
-level | 0
-fastroot | 1
-fastlevel | 0
+-[ RECORD 1 ]-----------+-------
+magic | 340322
+version | 3
+root | 1
+level | 0
+fastroot | 1
+fastlevel | 0
+oldest_xact | 582
+last_cleanup_num_tuples | 1000
</screen>
</para>
</listitem>
</varlistentry>
</variablelist>
+ <para>
+ B-tree indexes additionally accept this parameter:
+ </para>
+
+ <variablelist>
+ <varlistentry>
+ <term><literal>vacuum_cleanup_index_scale_factor</literal></term>
+ <listitem>
+ <para>
+ Per-table value for <xref linkend="guc-vacuum-cleanup-index-scale-factor"/>.
+ </para>
+ </listitem>
+ </varlistentry>
+ </variablelist>
+
<para>
GiST indexes additionally accept this parameter:
</para>
},
0, -1.0, DBL_MAX
},
+ {
+ {
+ "vacuum_cleanup_index_scale_factor",
+ "Number of tuple inserts prior to index cleanup as a fraction of reltuples.",
+ RELOPT_KIND_BTREE,
+ ShareUpdateExclusiveLock
+ },
+ -1, 0.0, 100.0
+ },
/* list terminator */
{{NULL}}
};
{"user_catalog_table", RELOPT_TYPE_BOOL,
offsetof(StdRdOptions, user_catalog_table)},
{"parallel_workers", RELOPT_TYPE_INT,
- offsetof(StdRdOptions, parallel_workers)}
+ offsetof(StdRdOptions, parallel_workers)},
+ {"vacuum_cleanup_index_scale_factor", RELOPT_TYPE_REAL,
+ offsetof(StdRdOptions, vacuum_cleanup_index_scale_factor)}
};
options = parseRelOptions(reloptions, validate, kind, &numoptions);
if (BufferIsValid(metabuf))
{
+ /* upgrade meta-page if needed */
+ if (metad->btm_version < BTREE_VERSION)
+ _bt_upgrademetapage(metapg);
metad->btm_fastroot = itup_blkno;
metad->btm_fastlevel = lpageop->btpo.level;
MarkBufferDirty(metabuf);
xlmeta.level = metad->btm_level;
xlmeta.fastroot = metad->btm_fastroot;
xlmeta.fastlevel = metad->btm_fastlevel;
+ xlmeta.oldest_btpo_xact = metad->btm_oldest_btpo_xact;
+ xlmeta.last_cleanup_num_heap_tuples =
+ metad->btm_last_cleanup_num_heap_tuples;
XLogRegisterBuffer(2, metabuf, REGBUF_WILL_INIT | REGBUF_STANDARD);
XLogRegisterBufData(2, (char *) &xlmeta, sizeof(xl_btree_metadata));
metapg = BufferGetPage(metabuf);
metad = BTPageGetMeta(metapg);
+ /* upgrade metapage if needed */
+ if (metad->btm_version < BTREE_VERSION)
+ _bt_upgrademetapage(metapg);
+
/*
* Create downlink item for left page (old root). Since this will be the
* first item in a non-leaf page, it implicitly has minus-infinity key
md.level = metad->btm_level;
md.fastroot = rootblknum;
md.fastlevel = metad->btm_level;
+ md.oldest_btpo_xact = metad->btm_oldest_btpo_xact;
+ md.last_cleanup_num_heap_tuples = metad->btm_last_cleanup_num_heap_tuples;
XLogRegisterBufData(2, (char *) &md, sizeof(xl_btree_metadata));
metad->btm_level = level;
metad->btm_fastroot = rootbknum;
metad->btm_fastlevel = level;
+ metad->btm_oldest_btpo_xact = InvalidTransactionId;
+ metad->btm_last_cleanup_num_heap_tuples = -1.0;
metaopaque = (BTPageOpaque) PageGetSpecialPointer(page);
metaopaque->btpo_flags = BTP_META;
((char *) metad + sizeof(BTMetaPageData)) - (char *) page;
}
+/*
+ * _bt_upgrademetapage() -- Upgrade a meta-page from an old format to the new.
+ *
+ * This routine does purely in-memory image upgrade. Caller is
+ * responsible for locking, WAL-logging etc.
+ */
+void
+_bt_upgrademetapage(Page page)
+{
+ BTMetaPageData *metad;
+ BTPageOpaque metaopaque;
+
+ metad = BTPageGetMeta(page);
+ metaopaque = (BTPageOpaque) PageGetSpecialPointer(page);
+
+ /* It must be really a meta page of upgradable version */
+ Assert(metaopaque->btpo_flags & BTP_META);
+ Assert(metad->btm_version < BTREE_VERSION);
+ Assert(metad->btm_version >= BTREE_MIN_VERSION);
+
+ /* Set version number and fill extra fields added into version 3 */
+ metad->btm_version = BTREE_VERSION;
+ metad->btm_oldest_btpo_xact = InvalidTransactionId;
+ metad->btm_last_cleanup_num_heap_tuples = -1.0;
+
+ /* Adjust pd_lower (see _bt_initmetapage() for details) */
+ ((PageHeader) page)->pd_lower =
+ ((char *) metad + sizeof(BTMetaPageData)) - (char *) page;
+}
+
+/*
+ * _bt_update_meta_cleanup_info() -- Update cleanup-related information in
+ * the metapage.
+ *
+ * This routine checks if provided cleanup-related information is matching
+ * to those written in the metapage. On mismatch, metapage is overritten.
+ */
+void
+_bt_update_meta_cleanup_info(Relation rel, TransactionId oldestBtpoXact,
+ float8 numHeapTuples)
+{
+ Buffer metabuf;
+ Page metapg;
+ BTPageOpaque metaopaque;
+ BTMetaPageData *metad;
+ bool needsRewrite = false;
+ XLogRecPtr recptr;
+
+ /* read the metapage and check if it needs rewrite */
+ metabuf = _bt_getbuf(rel, BTREE_METAPAGE, BT_READ);
+ metapg = BufferGetPage(metabuf);
+ metaopaque = (BTPageOpaque) PageGetSpecialPointer(metapg);
+ metad = BTPageGetMeta(metapg);
+
+ /* outdated version of metapage always needs rewrite */
+ if (metad->btm_version < BTREE_VERSION)
+ needsRewrite = true;
+ else if (metad->btm_oldest_btpo_xact != oldestBtpoXact ||
+ metad->btm_last_cleanup_num_heap_tuples != numHeapTuples)
+ needsRewrite = true;
+
+ if (!needsRewrite)
+ {
+ _bt_relbuf(rel, metabuf);
+ return;
+ }
+
+ /* trade in our read lock for a write lock */
+ LockBuffer(metabuf, BUFFER_LOCK_UNLOCK);
+ LockBuffer(metabuf, BT_WRITE);
+
+ START_CRIT_SECTION();
+
+ /* upgrade meta-page if needed */
+ if (metad->btm_version < BTREE_VERSION)
+ _bt_upgrademetapage(metapg);
+
+ /* update cleanup-related infromation */
+ metad->btm_oldest_btpo_xact = oldestBtpoXact;
+ metad->btm_last_cleanup_num_heap_tuples = numHeapTuples;
+ MarkBufferDirty(metabuf);
+
+ /* write wal record if needed */
+ if (RelationNeedsWAL(rel))
+ {
+ xl_btree_metadata md;
+
+ XLogBeginInsert();
+ XLogRegisterBuffer(0, metabuf, REGBUF_WILL_INIT | REGBUF_STANDARD);
+
+ md.root = metad->btm_root;
+ md.level = metad->btm_level;
+ md.fastroot = metad->btm_fastroot;
+ md.fastlevel = metad->btm_fastlevel;
+ md.oldest_btpo_xact = oldestBtpoXact;
+ md.last_cleanup_num_heap_tuples = numHeapTuples;
+
+ XLogRegisterBufData(0, (char *) &md, sizeof(xl_btree_metadata));
+
+ recptr = XLogInsert(RM_BTREE_ID, XLOG_BTREE_META_CLEANUP);
+
+ PageSetLSN(metapg, recptr);
+ }
+
+ END_CRIT_SECTION();
+ _bt_relbuf(rel, metabuf);
+}
+
/*
* _bt_getroot() -- Get the root page of the btree.
*
metad = (BTMetaPageData *) rel->rd_amcache;
/* We shouldn't have cached it if any of these fail */
Assert(metad->btm_magic == BTREE_MAGIC);
- Assert(metad->btm_version == BTREE_VERSION);
+ Assert(metad->btm_version >= BTREE_MIN_VERSION);
+ Assert(metad->btm_version <= BTREE_VERSION);
Assert(metad->btm_root != P_NONE);
rootblkno = metad->btm_fastroot;
errmsg("index \"%s\" is not a btree",
RelationGetRelationName(rel))));
- if (metad->btm_version != BTREE_VERSION)
+ if (metad->btm_version < BTREE_MIN_VERSION ||
+ metad->btm_version > BTREE_VERSION)
ereport(ERROR,
(errcode(ERRCODE_INDEX_CORRUPTED),
- errmsg("version mismatch in index \"%s\": file version %d, code version %d",
+ errmsg("version mismatch in index \"%s\": file version %d, "
+ "current version %d, minimal supported version %d",
RelationGetRelationName(rel),
- metad->btm_version, BTREE_VERSION)));
+ metad->btm_version, BTREE_VERSION, BTREE_MIN_VERSION)));
/* if no root page initialized yet, do it */
if (metad->btm_root == P_NONE)
LockBuffer(metabuf, BUFFER_LOCK_UNLOCK);
LockBuffer(metabuf, BT_WRITE);
+ /* upgrade metapage if needed */
+ if (metad->btm_version < BTREE_VERSION)
+ _bt_upgrademetapage(metapg);
+
/*
* Race condition: if someone else initialized the metadata between
* the time we released the read lock and acquired the write lock, we
metad->btm_level = 0;
metad->btm_fastroot = rootblkno;
metad->btm_fastlevel = 0;
+ metad->btm_oldest_btpo_xact = InvalidTransactionId;
+ metad->btm_last_cleanup_num_heap_tuples = -1.0;
MarkBufferDirty(rootbuf);
MarkBufferDirty(metabuf);
md.level = 0;
md.fastroot = rootblkno;
md.fastlevel = 0;
+ md.oldest_btpo_xact = InvalidTransactionId;
+ md.last_cleanup_num_heap_tuples = -1.0;
XLogRegisterBufData(2, (char *) &md, sizeof(xl_btree_metadata));
errmsg("index \"%s\" is not a btree",
RelationGetRelationName(rel))));
- if (metad->btm_version != BTREE_VERSION)
+ if (metad->btm_version < BTREE_MIN_VERSION ||
+ metad->btm_version > BTREE_VERSION)
ereport(ERROR,
(errcode(ERRCODE_INDEX_CORRUPTED),
- errmsg("version mismatch in index \"%s\": file version %d, code version %d",
+ errmsg("version mismatch in index \"%s\": file version %d, "
+ "current version %d, minimal supported version %d",
RelationGetRelationName(rel),
- metad->btm_version, BTREE_VERSION)));
+ metad->btm_version, BTREE_VERSION, BTREE_MIN_VERSION)));
/* if no root page initialized yet, fail */
if (metad->btm_root == P_NONE)
errmsg("index \"%s\" is not a btree",
RelationGetRelationName(rel))));
- if (metad->btm_version != BTREE_VERSION)
+ if (metad->btm_version < BTREE_MIN_VERSION ||
+ metad->btm_version > BTREE_VERSION)
ereport(ERROR,
(errcode(ERRCODE_INDEX_CORRUPTED),
- errmsg("version mismatch in index \"%s\": file version %d, code version %d",
+ errmsg("version mismatch in index \"%s\": file version %d, "
+ "current version %d, minimal supported version %d",
RelationGetRelationName(rel),
- metad->btm_version, BTREE_VERSION)));
+ metad->btm_version, BTREE_VERSION, BTREE_MIN_VERSION)));
/*
* If there's no root page yet, _bt_getroot() doesn't expect a cache
/* And update the metapage, if needed */
if (BufferIsValid(metabuf))
{
+ /* upgrade metapage if needed */
+ if (metad->btm_version < BTREE_VERSION)
+ _bt_upgrademetapage(metapg);
metad->btm_fastroot = rightsib;
metad->btm_fastlevel = targetlevel;
MarkBufferDirty(metabuf);
xlmeta.level = metad->btm_level;
xlmeta.fastroot = metad->btm_fastroot;
xlmeta.fastlevel = metad->btm_fastlevel;
+ xlmeta.oldest_btpo_xact = metad->btm_oldest_btpo_xact;
+ xlmeta.last_cleanup_num_heap_tuples = metad->btm_last_cleanup_num_heap_tuples;
XLogRegisterBufData(4, (char *) &xlmeta, sizeof(xl_btree_metadata));
xlinfo = XLOG_BTREE_UNLINK_PAGE_META;
#include "postgres.h"
#include "access/nbtree.h"
+#include "access/nbtxlog.h"
#include "access/relscan.h"
#include "access/xlog.h"
#include "commands/vacuum.h"
+#include "miscadmin.h"
#include "nodes/execnodes.h"
#include "pgstat.h"
+#include "postmaster/autovacuum.h"
#include "storage/condition_variable.h"
#include "storage/indexfsm.h"
#include "storage/ipc.h"
BlockNumber lastBlockVacuumed; /* highest blkno actually vacuumed */
BlockNumber lastBlockLocked; /* highest blkno we've cleanup-locked */
BlockNumber totFreePages; /* true total # of free pages */
+ TransactionId oldestBtpoXact;
MemoryContext pagedelcontext;
} BTVacState;
static void btvacuumscan(IndexVacuumInfo *info, IndexBulkDeleteResult *stats,
IndexBulkDeleteCallback callback, void *callback_state,
- BTCycleId cycleid);
+ BTCycleId cycleid, TransactionId *oldestBtpoXact);
static void btvacuumpage(BTVacState *vstate, BlockNumber blkno,
BlockNumber orig_blkno);
SpinLockRelease(&btscan->btps_mutex);
}
+/*
+ * _bt_vacuum_needs_cleanup() -- Checks if index needs cleanup assuming that
+ * btbulkdelete() wasn't called.
+ */
+static bool
+_bt_vacuum_needs_cleanup(IndexVacuumInfo *info)
+{
+ Buffer metabuf;
+ Page metapg;
+ BTPageOpaque metaopaque;
+ BTMetaPageData *metad;
+ bool result = false;
+
+ metabuf = _bt_getbuf(info->index, BTREE_METAPAGE, BT_READ);
+ metapg = BufferGetPage(metabuf);
+ metaopaque = (BTPageOpaque) PageGetSpecialPointer(metapg);
+ metad = BTPageGetMeta(metapg);
+
+ if (metad->btm_version < BTREE_VERSION)
+ {
+ /*
+ * Do cleanup if metapage needs upgrade, because we don't have
+ * cleanup-related meta-information yet.
+ */
+ result = true;
+ }
+ else if (TransactionIdIsValid(metad->btm_oldest_btpo_xact) &&
+ TransactionIdPrecedes(metad->btm_oldest_btpo_xact,
+ RecentGlobalXmin))
+ {
+ /*
+ * If oldest btpo.xact in the deleted pages is older than
+ * RecentGlobalXmin, then at least one deleted page can be recycled.
+ */
+ result = true;
+ }
+ else
+ {
+ StdRdOptions *relopts;
+ float8 cleanup_scale_factor;
+
+ /*
+ * If table receives large enough amount of insertions and no cleanup
+ * was performed, then index might appear to have stalled statistics.
+ * In order to evade that, we perform cleanup when table receives
+ * vacuum_cleanup_index_scale_factor fractions of insertions.
+ */
+ relopts = (StdRdOptions *) info->index->rd_options;
+ cleanup_scale_factor = (relopts &&
+ relopts->vacuum_cleanup_index_scale_factor >= 0)
+ ? relopts->vacuum_cleanup_index_scale_factor
+ : vacuum_cleanup_index_scale_factor;
+
+ if (cleanup_scale_factor < 0 ||
+ metad->btm_last_cleanup_num_heap_tuples < 0 ||
+ info->num_heap_tuples > (1.0 + cleanup_scale_factor) *
+ metad->btm_last_cleanup_num_heap_tuples)
+ result = true;
+ }
+
+ _bt_relbuf(info->index, metabuf);
+ return result;
+}
+
/*
* Bulk deletion of all index entries pointing to a set of heap tuples.
* The set of target tuples is specified via a callback routine that tells
/* The ENSURE stuff ensures we clean up shared memory on failure */
PG_ENSURE_ERROR_CLEANUP(_bt_end_vacuum_callback, PointerGetDatum(rel));
{
+ TransactionId oldestBtpoXact;
+
cycleid = _bt_start_vacuum(rel);
- btvacuumscan(info, stats, callback, callback_state, cycleid);
+ btvacuumscan(info, stats, callback, callback_state, cycleid,
+ &oldestBtpoXact);
+
+ /*
+ * Update cleanup-related information in metapage. These information
+ * is used only for cleanup but keeping up them to date can avoid
+ * unnecessary cleanup even after bulkdelete.
+ */
+ _bt_update_meta_cleanup_info(info->index, oldestBtpoXact,
+ info->num_heap_tuples);
}
PG_END_ENSURE_ERROR_CLEANUP(_bt_end_vacuum_callback, PointerGetDatum(rel));
_bt_end_vacuum(rel);
/*
* If btbulkdelete was called, we need not do anything, just return the
- * stats from the latest btbulkdelete call. If it wasn't called, we must
- * still do a pass over the index, to recycle any newly-recyclable pages
- * and to obtain index statistics.
+ * stats from the latest btbulkdelete call. If it wasn't called, we might
+ * still need to do a pass over the index, to recycle any newly-recyclable
+ * pages and to obtain index statistics. _bt_vacuum_needs_cleanup checks
+ * is there are newly-recyclable or stalled index statistics.
*
* Since we aren't going to actually delete any leaf items, there's no
* need to go through all the vacuum-cycle-ID pushups.
*/
if (stats == NULL)
{
+ TransactionId oldestBtpoXact;
+
+ /* Check if we need a cleanup */
+ if (!_bt_vacuum_needs_cleanup(info))
+ return NULL;
+
stats = (IndexBulkDeleteResult *) palloc0(sizeof(IndexBulkDeleteResult));
- btvacuumscan(info, stats, NULL, NULL, 0);
+ btvacuumscan(info, stats, NULL, NULL, 0, &oldestBtpoXact);
+
+ /* Update cleanup-related information in the metapage */
+ _bt_update_meta_cleanup_info(info->index, oldestBtpoXact,
+ info->num_heap_tuples);
}
/*
static void
btvacuumscan(IndexVacuumInfo *info, IndexBulkDeleteResult *stats,
IndexBulkDeleteCallback callback, void *callback_state,
- BTCycleId cycleid)
+ BTCycleId cycleid, TransactionId *oldestBtpoXact)
{
Relation rel = info->index;
BTVacState vstate;
vstate.lastBlockVacuumed = BTREE_METAPAGE; /* Initialise at first block */
vstate.lastBlockLocked = BTREE_METAPAGE;
vstate.totFreePages = 0;
+ vstate.oldestBtpoXact = InvalidTransactionId;
/* Create a temporary memory context to run _bt_pagedel in */
vstate.pagedelcontext = AllocSetContextCreate(CurrentMemoryContext,
/* update statistics */
stats->num_pages = num_pages;
stats->pages_free = vstate.totFreePages;
+
+ if (oldestBtpoXact)
+ *oldestBtpoXact = vstate.oldestBtpoXact;
}
/*
{
/* Already deleted, but can't recycle yet */
stats->pages_deleted++;
+
+ /* Update the oldest btpo.xact */
+ if (!TransactionIdIsValid(vstate->oldestBtpoXact) ||
+ TransactionIdPrecedes(opaque->btpo.xact, vstate->oldestBtpoXact))
+ vstate->oldestBtpoXact = opaque->btpo.xact;
}
else if (P_ISHALFDEAD(opaque))
{
/* count only this page, else may double-count parent */
if (ndel)
+ {
stats->pages_deleted++;
+ if (!TransactionIdIsValid(vstate->oldestBtpoXact) ||
+ TransactionIdPrecedes(opaque->btpo.xact, vstate->oldestBtpoXact))
+ vstate->oldestBtpoXact = opaque->btpo.xact;
+ }
MemoryContextSwitchTo(oldcontext);
/* pagedel released buffer, so we shouldn't */
md->btm_level = xlrec->level;
md->btm_fastroot = xlrec->fastroot;
md->btm_fastlevel = xlrec->fastlevel;
+ md->btm_oldest_btpo_xact = xlrec->oldest_btpo_xact;
+ md->btm_last_cleanup_num_heap_tuples = xlrec->last_cleanup_num_heap_tuples;
pageop = (BTPageOpaque) PageGetSpecialPointer(metapg);
pageop->btpo_flags = BTP_META;
}
}
-
void
btree_redo(XLogReaderState *record)
{
case XLOG_BTREE_REUSE_PAGE:
btree_xlog_reuse_page(record);
break;
+ case XLOG_BTREE_META_CLEANUP:
+ _bt_restore_meta(record, 0);
+ break;
default:
elog(PANIC, "btree_redo: unknown op code %u", info);
}
int VacuumCostBalance = 0; /* working state for vacuum */
bool VacuumCostActive = false;
+
+double vacuum_cleanup_index_scale_factor;
NULL, NULL, NULL
},
+ {
+ {"vacuum_cleanup_index_scale_factor", PGC_SIGHUP, AUTOVACUUM,
+ gettext_noop("Number of tuple inserts prior to index cleanup as a fraction of reltuples."),
+ NULL
+ },
+ &vacuum_cleanup_index_scale_factor,
+ 0.1, 0.0, 100.0,
+ NULL, NULL, NULL
+ },
+
/* End-of-list marker */
{
{NULL, 0, 0, NULL, NULL}, NULL, 0.0, 0.0, 0.0, NULL, NULL, NULL
uint32 btm_level; /* tree level of the root page */
BlockNumber btm_fastroot; /* current "fast" root location */
uint32 btm_fastlevel; /* tree level of the "fast" root page */
+ /* following fields are available since page version 3 */
+ TransactionId btm_oldest_btpo_xact; /* oldest btpo_xact among of
+ * deleted pages */
+ float4 btm_last_cleanup_num_heap_tuples; /* number of heap tuples
+ * during last cleanup */
} BTMetaPageData;
#define BTPageGetMeta(p) \
#define BTREE_METAPAGE 0 /* first page is meta */
#define BTREE_MAGIC 0x053162 /* magic number of btree pages */
-#define BTREE_VERSION 2 /* current version number */
+#define BTREE_VERSION 3 /* current version number */
+#define BTREE_MIN_VERSION 2 /* minimal supported version number */
/*
* Maximum size of a btree index entry, including its tuple header.
* prototypes for functions in nbtpage.c
*/
extern void _bt_initmetapage(Page page, BlockNumber rootbknum, uint32 level);
+extern void _bt_update_meta_cleanup_info(Relation rel,
+ TransactionId oldestBtpoXact, float8 numHeapTuples);
+extern void _bt_upgrademetapage(Page page);
extern Buffer _bt_getroot(Relation rel, int access);
extern Buffer _bt_gettrueroot(Relation rel);
extern int _bt_getrootheight(Relation rel);
* vacuum */
#define XLOG_BTREE_REUSE_PAGE 0xD0 /* old page is about to be reused from
* FSM */
+#define XLOG_BTREE_META_CLEANUP 0xE0 /* update cleanup-related data in the
+ * metapage */
/*
* All that we need to regenerate the meta-data page
uint32 level;
BlockNumber fastroot;
uint32 fastlevel;
+ TransactionId oldest_btpo_xact;
+ double last_cleanup_num_heap_tuples;
} xl_btree_metadata;
/*
extern int VacuumCostBalance;
extern bool VacuumCostActive;
+extern double vacuum_cleanup_index_scale_factor;
+
/* in tcop/postgres.c */
{
int32 vl_len_; /* varlena header (do not touch directly!) */
int fillfactor; /* page fill factor in percent (0..100) */
+ /* fraction of newly inserted tuples prior to trigger index cleanup */
+ float8 vacuum_cleanup_index_scale_factor;
int toast_tuple_target; /* target for tuple toasting */
AutoVacOpts autovacuum; /* autovacuum-related options */
bool user_catalog_table; /* use as an additional catalog relation */
-- need to insert some rows to cause the fast root page to split.
insert into btree_tall_tbl (id, t)
select g, repeat('x', 100) from generate_series(1, 500) g;
+--
+-- Test vacuum_cleanup_index_scale_factor
+--
+-- Simple create
+create table btree_test(a int);
+create index btree_idx1 on btree_test(a) with (vacuum_cleanup_index_scale_factor = 40.0);
+select reloptions from pg_class WHERE oid = 'btree_idx1'::regclass;
+ reloptions
+------------------------------------------
+ {vacuum_cleanup_index_scale_factor=40.0}
+(1 row)
+
+-- Fail while setting improper values
+create index btree_idx_err on btree_test(a) with (vacuum_cleanup_index_scale_factor = -10.0);
+ERROR: value -10.0 out of bounds for option "vacuum_cleanup_index_scale_factor"
+DETAIL: Valid values are between "0.000000" and "100.000000".
+create index btree_idx_err on btree_test(a) with (vacuum_cleanup_index_scale_factor = 100.0);
+create index btree_idx_err on btree_test(a) with (vacuum_cleanup_index_scale_factor = 'string');
+ERROR: invalid value for floating point option "vacuum_cleanup_index_scale_factor": string
+create index btree_idx_err on btree_test(a) with (vacuum_cleanup_index_scale_factor = true);
+ERROR: invalid value for floating point option "vacuum_cleanup_index_scale_factor": true
+-- Simple ALTER INDEX
+alter index btree_idx1 set (vacuum_cleanup_index_scale_factor = 70.0);
+select reloptions from pg_class WHERE oid = 'btree_idx1'::regclass;
+ reloptions
+------------------------------------------
+ {vacuum_cleanup_index_scale_factor=70.0}
+(1 row)
+
-- need to insert some rows to cause the fast root page to split.
insert into btree_tall_tbl (id, t)
select g, repeat('x', 100) from generate_series(1, 500) g;
+
+--
+-- Test vacuum_cleanup_index_scale_factor
+--
+
+-- Simple create
+create table btree_test(a int);
+create index btree_idx1 on btree_test(a) with (vacuum_cleanup_index_scale_factor = 40.0);
+select reloptions from pg_class WHERE oid = 'btree_idx1'::regclass;
+
+-- Fail while setting improper values
+create index btree_idx_err on btree_test(a) with (vacuum_cleanup_index_scale_factor = -10.0);
+create index btree_idx_err on btree_test(a) with (vacuum_cleanup_index_scale_factor = 100.0);
+create index btree_idx_err on btree_test(a) with (vacuum_cleanup_index_scale_factor = 'string');
+create index btree_idx_err on btree_test(a) with (vacuum_cleanup_index_scale_factor = true);
+
+-- Simple ALTER INDEX
+alter index btree_idx1 set (vacuum_cleanup_index_scale_factor = 70.0);
+select reloptions from pg_class WHERE oid = 'btree_idx1'::regclass;