OffsetNumber redirected[MaxHeapTuplesPerPage * 2];
OffsetNumber nowdead[MaxHeapTuplesPerPage];
OffsetNumber nowunused[MaxHeapTuplesPerPage];
- /* marked[i] is true if item i is entered in one of the above arrays */
+
+ /*
+ * marked[i] is true if item i is entered in one of the above arrays.
+ *
+ * This needs to be MaxHeapTuplesPerPage + 1 long as FirstOffsetNumber is
+ * 1. Otherwise every access would need to subtract 1.
+ */
bool marked[MaxHeapTuplesPerPage + 1];
+
+ /*
+ * Tuple visibility is only computed once for each tuple, for correctness
+ * and efficiency reasons; see comment in heap_page_prune() for
+ * details. This is of type int8[,] intead of HTSV_Result[], so we can use
+ * -1 to indicate no visibility has been computed, e.g. for LP_DEAD items.
+ *
+ * Same indexing as ->marked.
+ */
+ int8 htsv[MaxHeapTuplesPerPage + 1];
} PruneState;
/* Local functions */
+static HTSV_Result heap_prune_satisfies_vacuum(PruneState *prstate,
+ HeapTuple tup,
+ Buffer buffer);
static int heap_prune_chain(Buffer buffer,
OffsetNumber rootoffnum,
PruneState *prstate);
OffsetNumber offnum,
maxoff;
PruneState prstate;
+ HeapTupleData tup;
/*
* Our strategy is to scan the page and make lists of items to change,
prstate.nredirected = prstate.ndead = prstate.nunused = 0;
memset(prstate.marked, 0, sizeof(prstate.marked));
- /* Scan the page */
maxoff = PageGetMaxOffsetNumber(page);
+ tup.t_tableOid = RelationGetRelid(prstate.rel);
+
+ /*
+ * Determine HTSV for all tuples.
+ *
+ * This is required for correctness to deal with cases where running HTSV
+ * twice could result in different results (e.g. RECENTLY_DEAD can turn to
+ * DEAD if another checked item causes GlobalVisTestIsRemovableFullXid()
+ * to update the horizon, INSERT_IN_PROGRESS can change to DEAD if the
+ * inserting transaction aborts, ...). That in turn could cause
+ * heap_prune_chain() to behave incorrectly if a tuple is reached twice,
+ * once directly via a heap_prune_chain() and once following a HOT chain.
+ *
+ * It's also good for performance. Most commonly tuples within a page are
+ * stored at decreasing offsets (while the items are stored at increasing
+ * offsets). When processing all tuples on a page this leads to reading
+ * memory at decreasing offsets within a page, with a variable stride.
+ * That's hard for CPU prefetchers to deal with. Processing the items in
+ * reverse order (and thus the tuples in increasing order) increases
+ * prefetching efficiency significantly / decreases the number of cache
+ * misses.
+ */
+ for (offnum = maxoff;
+ offnum >= FirstOffsetNumber;
+ offnum = OffsetNumberPrev(offnum))
+ {
+ ItemId itemid = PageGetItemId(page, offnum);
+ HeapTupleHeader htup;
+
+ /* Nothing to do if slot doesn't contain a tuple */
+ if (!ItemIdIsNormal(itemid))
+ {
+ prstate.htsv[offnum] = -1;
+ continue;
+ }
+
+ htup = (HeapTupleHeader) PageGetItem(page, itemid);
+ tup.t_data = htup;
+ tup.t_len = ItemIdGetLength(itemid);
+ ItemPointerSet(&(tup.t_self), BufferGetBlockNumber(buffer), offnum);
+
+ /*
+ * Set the offset number so that we can display it along with any
+ * error that occurred while processing this tuple.
+ */
+ if (off_loc)
+ *off_loc = offnum;
+
+ prstate.htsv[offnum] = heap_prune_satisfies_vacuum(&prstate, &tup,
+ buffer);
+ }
+
+ /* Scan the page */
for (offnum = FirstOffsetNumber;
offnum <= maxoff;
offnum = OffsetNumberNext(offnum))
if (prstate.marked[offnum])
continue;
- /*
- * Set the offset number so that we can display it along with any
- * error that occurred while processing this tuple.
- */
+ /* see preceding loop */
if (off_loc)
*off_loc = offnum;
* the HOT chain is pruned by removing all DEAD tuples at the start of the HOT
* chain. We also prune any RECENTLY_DEAD tuples preceding a DEAD tuple.
* This is OK because a RECENTLY_DEAD tuple preceding a DEAD tuple is really
- * DEAD, the heap_prune_satisfies_vacuum test is just too coarse to detect it.
+ * DEAD, our visibility test is just too coarse to detect it.
*
* In general, pruning must never leave behind a DEAD tuple that still has
* tuple storage. VACUUM isn't prepared to deal with that case. That's why
* pointer is marked LP_DEAD. (This includes the case of a DEAD simple
* tuple, which we treat as a chain of length 1.)
*
- * prstate->vistest is used to distinguish whether tuples are DEAD or
- * RECENTLY_DEAD.
- *
- * We don't actually change the page here, except perhaps for hint-bit updates
- * caused by heap_prune_satisfies_vacuum. We just add entries to the arrays in
+ * We don't actually change the page here. We just add entries to the arrays in
* prstate showing the changes to be made. Items to be redirected are added
* to the redirected[] array (two entries per redirection); items to be set to
* LP_DEAD state are added to nowdead[]; and items to be set to LP_UNUSED
OffsetNumber chainitems[MaxHeapTuplesPerPage];
int nchain = 0,
i;
- HeapTupleData tup;
-
- tup.t_tableOid = RelationGetRelid(prstate->rel);
rootlp = PageGetItemId(dp, rootoffnum);
*/
if (ItemIdIsNormal(rootlp))
{
+ Assert(prstate->htsv[rootoffnum] != -1);
htup = (HeapTupleHeader) PageGetItem(dp, rootlp);
- tup.t_data = htup;
- tup.t_len = ItemIdGetLength(rootlp);
- ItemPointerSet(&(tup.t_self), BufferGetBlockNumber(buffer), rootoffnum);
-
if (HeapTupleHeaderIsHeapOnly(htup))
{
/*
* either here or while following a chain below. Whichever path
* gets there first will mark the tuple unused.
*/
- if (heap_prune_satisfies_vacuum(prstate, &tup, buffer)
- == HEAPTUPLE_DEAD && !HeapTupleHeaderIsHotUpdated(htup))
+ if (prstate->htsv[rootoffnum] == HEAPTUPLE_DEAD &&
+ !HeapTupleHeaderIsHotUpdated(htup))
{
heap_prune_record_unused(prstate, rootoffnum);
HeapTupleHeaderAdvanceLatestRemovedXid(htup,
break;
Assert(ItemIdIsNormal(lp));
+ Assert(prstate->htsv[offnum] != -1);
htup = (HeapTupleHeader) PageGetItem(dp, lp);
- tup.t_data = htup;
- tup.t_len = ItemIdGetLength(lp);
- ItemPointerSet(&(tup.t_self), BufferGetBlockNumber(buffer), offnum);
-
/*
* Check the tuple XMIN against prior XMAX, if any
*/
*/
tupdead = recent_dead = false;
- switch (heap_prune_satisfies_vacuum(prstate, &tup, buffer))
+ switch ((HTSV_Result) prstate->htsv[offnum])
{
case HEAPTUPLE_DEAD:
tupdead = true;