* MultiXactId.
*
* "flags" is an output value; it's used to tell caller what to do on return.
- *
- * "mxid_oldest_xid_out" is an output value; it's used to track the oldest
- * extant Xid within any Multixact that will remain after freezing executes.
+ * "pagefrz" is an input/output value, used to manage page level freezing.
*
* Possible values that we can set in "flags":
* FRM_NOOP
* The return value is a new MultiXactId to set as new Xmax.
* (caller must obtain proper infomask bits using GetMultiXactIdHintBits)
*
- * "mxid_oldest_xid_out" is only set when "flags" contains either FRM_NOOP or
- * FRM_RETURN_IS_MULTI, since we only leave behind a MultiXactId for these.
- *
- * NB: Creates a _new_ MultiXactId when FRM_RETURN_IS_MULTI is set in "flags".
+ * Caller delegates control of page freezing to us. In practice we always
+ * force freezing of caller's page unless FRM_NOOP processing is indicated.
+ * We help caller ensure that XIDs < FreezeLimit and MXIDs < MultiXactCutoff
+ * can never be left behind. We freely choose when and how to process each
+ * Multi, without ever violating the cutoff postconditions for freezing.
+ *
+ * It's useful to remove Multis on a proactive timeline (relative to freezing
+ * XIDs) to keep MultiXact member SLRU buffer misses to a minimum. It can also
+ * be cheaper in the short run, for us, since we too can avoid SLRU buffer
+ * misses through eager processing.
+ *
+ * NB: Creates a _new_ MultiXactId when FRM_RETURN_IS_MULTI is set, though only
+ * when FreezeLimit and/or MultiXactCutoff cutoffs leave us with no choice.
+ * This can usually be put off, which is usually enough to avoid it altogether.
+ * Allocating new multis during VACUUM should be avoided on general principle;
+ * only VACUUM can advance relminmxid, so allocating new Multis here comes with
+ * its own special risks.
+ *
+ * NB: Caller must maintain "no freeze" NewRelfrozenXid/NewRelminMxid trackers
+ * using heap_tuple_should_freeze when we haven't forced page-level freezing.
+ *
+ * NB: Caller should avoid needlessly calling heap_tuple_should_freeze when we
+ * have already forced page-level freezing, since that might incur the same
+ * SLRU buffer misses that we specifically intended to avoid by freezing.
*/
static TransactionId
FreezeMultiXactId(MultiXactId multi, uint16 t_infomask,
const struct VacuumCutoffs *cutoffs, uint16 *flags,
- TransactionId *mxid_oldest_xid_out)
+ HeapPageFreeze *pagefrz)
{
- TransactionId newxmax = InvalidTransactionId;
+ TransactionId newxmax;
MultiXactMember *members;
int nmembers;
bool need_replace;
bool has_lockers;
TransactionId update_xid;
bool update_committed;
- TransactionId temp_xid_out;
+ TransactionId FreezePageRelfrozenXid;
*flags = 0;
if (!MultiXactIdIsValid(multi) ||
HEAP_LOCKED_UPGRADED(t_infomask))
{
- /* Ensure infomask bits are appropriately set/reset */
*flags |= FRM_INVALIDATE_XMAX;
+ pagefrz->freeze_required = true;
return InvalidTransactionId;
}
else if (MultiXactIdPrecedes(multi, cutoffs->relminmxid))
(errcode(ERRCODE_DATA_CORRUPTED),
errmsg_internal("found multixact %u from before relminmxid %u",
multi, cutoffs->relminmxid)));
- else if (MultiXactIdPrecedes(multi, cutoffs->MultiXactCutoff))
+ else if (MultiXactIdPrecedes(multi, cutoffs->OldestMxact))
{
+ TransactionId update_xact;
+
/*
* This old multi cannot possibly have members still running, but
* verify just in case. If it was a locker only, it can be removed
HEAP_XMAX_IS_LOCKED_ONLY(t_infomask)))
ereport(ERROR,
(errcode(ERRCODE_DATA_CORRUPTED),
- errmsg_internal("multixact %u from before cutoff %u found to be still running",
- multi, cutoffs->MultiXactCutoff)));
+ errmsg_internal("multixact %u from before multi freeze cutoff %u found to be still running",
+ multi, cutoffs->OldestMxact)));
if (HEAP_XMAX_IS_LOCKED_ONLY(t_infomask))
{
*flags |= FRM_INVALIDATE_XMAX;
- newxmax = InvalidTransactionId;
+ pagefrz->freeze_required = true;
+ return InvalidTransactionId;
}
- else
- {
- /* replace multi with single XID for its updater */
- newxmax = MultiXactIdGetUpdateXid(multi, t_infomask);
-
- /* wasn't only a lock, xid needs to be valid */
- Assert(TransactionIdIsValid(newxmax));
-
- if (TransactionIdPrecedes(newxmax, cutoffs->relfrozenxid))
- ereport(ERROR,
- (errcode(ERRCODE_DATA_CORRUPTED),
- errmsg_internal("found update xid %u from before relfrozenxid %u",
- newxmax, cutoffs->relfrozenxid)));
+ /* replace multi with single XID for its updater? */
+ update_xact = MultiXactIdGetUpdateXid(multi, t_infomask);
+ if (TransactionIdPrecedes(update_xact, cutoffs->relfrozenxid))
+ ereport(ERROR,
+ (errcode(ERRCODE_DATA_CORRUPTED),
+ errmsg_internal("multixact %u contains update XID %u from before relfrozenxid %u",
+ multi, update_xact,
+ cutoffs->relfrozenxid)));
+ else if (TransactionIdPrecedes(update_xact, cutoffs->OldestXmin))
+ {
/*
- * If the new xmax xid is older than OldestXmin, it has to have
- * aborted, otherwise the tuple would have been pruned away
+ * Updater XID has to have aborted (otherwise the tuple would have
+ * been pruned away instead, since updater XID is < OldestXmin).
+ * Just remove xmax.
*/
- if (TransactionIdPrecedes(newxmax, cutoffs->OldestXmin))
- {
- if (TransactionIdDidCommit(newxmax))
- ereport(ERROR,
- (errcode(ERRCODE_DATA_CORRUPTED),
- errmsg_internal("cannot freeze committed update xid %u", newxmax)));
- *flags |= FRM_INVALIDATE_XMAX;
- newxmax = InvalidTransactionId;
- }
- else
- {
- *flags |= FRM_RETURN_IS_XID;
- }
+ if (TransactionIdDidCommit(update_xact))
+ ereport(ERROR,
+ (errcode(ERRCODE_DATA_CORRUPTED),
+ errmsg_internal("multixact %u contains committed update XID %u from before removable cutoff %u",
+ multi, update_xact,
+ cutoffs->OldestXmin)));
+ *flags |= FRM_INVALIDATE_XMAX;
+ pagefrz->freeze_required = true;
+ return InvalidTransactionId;
}
- /*
- * Don't push back mxid_oldest_xid_out using FRM_RETURN_IS_XID Xid, or
- * when no Xids will remain
- */
- return newxmax;
+ /* Have to keep updater XID as new xmax */
+ *flags |= FRM_RETURN_IS_XID;
+ pagefrz->freeze_required = true;
+ return update_xact;
}
/*
{
/* Nothing worth keeping */
*flags |= FRM_INVALIDATE_XMAX;
+ pagefrz->freeze_required = true;
return InvalidTransactionId;
}
+ /*
+ * The FRM_NOOP case is the only case where we might need to ratchet back
+ * FreezePageRelfrozenXid or FreezePageRelminMxid. It is also the only
+ * case where our caller might ratchet back its NoFreezePageRelfrozenXid
+ * or NoFreezePageRelminMxid "no freeze" trackers to deal with a multi.
+ * FRM_NOOP handling should result in the NewRelfrozenXid/NewRelminMxid
+ * trackers managed by VACUUM being ratcheting back by xmax to the degree
+ * required to make it safe to leave xmax undisturbed, independent of
+ * whether or not page freezing is triggered somewhere else.
+ *
+ * Our policy is to force freezing in every case other than FRM_NOOP,
+ * which obviates the need to maintain either set of trackers, anywhere.
+ * Every other case will reliably execute a freeze plan for xmax that
+ * either replaces xmax with an XID/MXID >= OldestXmin/OldestMxact, or
+ * sets xmax to an InvalidTransactionId XID, rendering xmax fully frozen.
+ * (VACUUM's NewRelfrozenXid/NewRelminMxid trackers are initialized with
+ * OldestXmin/OldestMxact, so later values never need to be tracked here.)
+ */
need_replace = false;
- temp_xid_out = *mxid_oldest_xid_out; /* init for FRM_NOOP */
+ FreezePageRelfrozenXid = pagefrz->FreezePageRelfrozenXid;
for (int i = 0; i < nmembers; i++)
{
TransactionId xid = members[i].xid;
if (TransactionIdPrecedes(xid, cutoffs->FreezeLimit))
{
+ /* Can't violate the FreezeLimit postcondition */
need_replace = true;
break;
}
- if (TransactionIdPrecedes(members[i].xid, temp_xid_out))
- temp_xid_out = members[i].xid;
+ if (TransactionIdPrecedes(xid, FreezePageRelfrozenXid))
+ FreezePageRelfrozenXid = xid;
}
- /*
- * In the simplest case, there is no member older than FreezeLimit; we can
- * keep the existing MultiXactId as-is, avoiding a more expensive second
- * pass over the multi
- */
+ /* Can't violate the MultiXactCutoff postcondition, either */
+ if (!need_replace)
+ need_replace = MultiXactIdPrecedes(multi, cutoffs->MultiXactCutoff);
+
if (!need_replace)
{
/*
- * When mxid_oldest_xid_out gets pushed back here it's likely that the
- * update Xid was the oldest member, but we don't rely on that
+ * vacuumlazy.c might ratchet back NewRelminMxid, NewRelfrozenXid, or
+ * both together to make it safe to retain this particular multi after
+ * freezing its page
*/
*flags |= FRM_NOOP;
- *mxid_oldest_xid_out = temp_xid_out;
+ pagefrz->FreezePageRelfrozenXid = FreezePageRelfrozenXid;
+ if (MultiXactIdPrecedes(multi, pagefrz->FreezePageRelminMxid))
+ pagefrz->FreezePageRelminMxid = multi;
pfree(members);
return multi;
}
* Do a more thorough second pass over the multi to figure out which
* member XIDs actually need to be kept. Checking the precise status of
* individual members might even show that we don't need to keep anything.
+ * That is quite possible even though the Multi must be >= OldestMxact,
+ * since our second pass only keeps member XIDs when it's truly necessary;
+ * even member XIDs >= OldestXmin often won't be kept by second pass.
*/
nnewmembers = 0;
newmembers = palloc(sizeof(MultiXactMember) * nmembers);
has_lockers = false;
update_xid = InvalidTransactionId;
update_committed = false;
- temp_xid_out = *mxid_oldest_xid_out; /* init for FRM_RETURN_IS_MULTI */
/*
* Determine whether to keep each member xid, or to ignore it instead
if (TransactionIdIsCurrentTransactionId(xid) ||
TransactionIdIsInProgress(xid))
{
+ if (TransactionIdPrecedes(xid, cutoffs->OldestXmin))
+ ereport(ERROR,
+ (errcode(ERRCODE_DATA_CORRUPTED),
+ errmsg_internal("multixact %u contains running locker XID %u from before removable cutoff %u",
+ multi, xid,
+ cutoffs->OldestXmin)));
newmembers[nnewmembers++] = members[i];
has_lockers = true;
-
- /*
- * Cannot possibly be older than VACUUM's OldestXmin, so we
- * don't need a NewRelfrozenXid step here
- */
- Assert(TransactionIdPrecedesOrEquals(cutoffs->OldestXmin, xid));
}
continue;
* Updater XID (not locker XID). Should we keep it?
*
* Since the tuple wasn't totally removed when vacuum pruned, the
- * update Xid cannot possibly be older than OldestXmin cutoff. The
- * presence of such a tuple would cause corruption, so be paranoid and
- * check.
+ * update Xid cannot possibly be older than OldestXmin cutoff unless
+ * the updater XID aborted. If the updater transaction is known
+ * aborted or crashed then it's okay to ignore it, otherwise not.
+ *
+ * In any case the Multi should never contain two updaters, whatever
+ * their individual commit status. Check for that first, in passing.
*/
- if (TransactionIdPrecedes(xid, cutoffs->OldestXmin))
- ereport(ERROR,
- (errcode(ERRCODE_DATA_CORRUPTED),
- errmsg_internal("found update xid %u from before removable cutoff %u",
- xid, cutoffs->OldestXmin)));
if (TransactionIdIsValid(update_xid))
ereport(ERROR,
(errcode(ERRCODE_DATA_CORRUPTED),
update_xid, xid)));
/*
- * If the transaction is known aborted or crashed then it's okay to
- * ignore it, otherwise not.
- *
* As with all tuple visibility routines, it's critical to test
* TransactionIdIsInProgress before TransactionIdDidCommit, because of
* race conditions explained in detail in heapam_visibility.c.
}
/*
- * We determined that this is an Xid corresponding to an update that
- * must be retained -- add it to new members list for later. Also
- * consider pushing back mxid_oldest_xid_out.
+ * We determined that updater must be kept -- add it to pending new
+ * members list
*/
+ if (TransactionIdPrecedes(xid, cutoffs->OldestXmin))
+ ereport(ERROR,
+ (errcode(ERRCODE_DATA_CORRUPTED),
+ errmsg_internal("multixact %u contains committed update XID %u from before removable cutoff %u",
+ multi, xid, cutoffs->OldestXmin)));
newmembers[nnewmembers++] = members[i];
- if (TransactionIdPrecedes(xid, temp_xid_out))
- temp_xid_out = xid;
}
pfree(members);
*/
if (nnewmembers == 0)
{
- /* nothing worth keeping!? Tell caller to remove the whole thing */
+ /* Nothing worth keeping */
*flags |= FRM_INVALIDATE_XMAX;
newxmax = InvalidTransactionId;
- /* Don't push back mxid_oldest_xid_out -- no Xids will remain */
}
else if (TransactionIdIsValid(update_xid) && !has_lockers)
{
if (update_committed)
*flags |= FRM_MARK_COMMITTED;
newxmax = update_xid;
- /* Don't push back mxid_oldest_xid_out using FRM_RETURN_IS_XID Xid */
}
else
{
/*
* Create a new multixact with the surviving members of the previous
- * one, to set as new Xmax in the tuple. The oldest surviving member
- * might push back mxid_oldest_xid_out.
+ * one, to set as new Xmax in the tuple
*/
newxmax = MultiXactIdCreateFromMembers(nnewmembers, newmembers);
*flags |= FRM_RETURN_IS_MULTI;
- *mxid_oldest_xid_out = temp_xid_out;
}
pfree(newmembers);
+ pagefrz->freeze_required = true;
return newxmax;
}
* heap_prepare_freeze_tuple
*
* Check to see whether any of the XID fields of a tuple (xmin, xmax, xvac)
- * are older than the FreezeLimit and/or MultiXactCutoff freeze cutoffs. If so,
- * setup enough state (in the *frz output argument) to later execute and
- * WAL-log what caller needs to do for the tuple, and return true. Return
+ * are older than the OldestXmin and/or OldestMxact freeze cutoffs. If so,
+ * setup enough state (in the *frz output argument) to enable caller to
+ * process this tuple as part of freezing its page, and return true. Return
* false if nothing can be changed about the tuple right now.
*
* Also sets *totally_frozen to true if the tuple will be totally frozen once
* frozen by an earlier VACUUM). This indicates that there are no remaining
* XIDs or MultiXactIds that will need to be processed by a future VACUUM.
*
- * VACUUM caller must assemble HeapTupleFreeze entries for every tuple that we
- * returned true for when called. A later heap_freeze_execute_prepared call
- * will execute freezing for caller's page as a whole.
+ * VACUUM caller must assemble HeapTupleFreeze freeze plan entries for every
+ * tuple that we returned true for, and call heap_freeze_execute_prepared to
+ * execute freezing. Caller must initialize pagefrz fields for page as a
+ * whole before first call here for each heap page.
+ *
+ * VACUUM caller decides on whether or not to freeze the page as a whole.
+ * We'll often prepare freeze plans for a page that caller just discards.
+ * However, VACUUM doesn't always get to make a choice; it must freeze when
+ * pagefrz.freeze_required is set, to ensure that any XIDs < FreezeLimit (and
+ * MXIDs < MultiXactCutoff) can never be left behind. We help to make sure
+ * that VACUUM always follows that rule.
+ *
+ * We sometimes force freezing of xmax MultiXactId values long before it is
+ * strictly necessary to do so just to ensure the FreezeLimit postcondition.
+ * It's worth processing MultiXactIds proactively when it is cheap to do so,
+ * and it's convenient to make that happen by piggy-backing it on the "force
+ * freezing" mechanism. Conversely, we sometimes delay freezing MultiXactIds
+ * because it is expensive right now (though only when it's still possible to
+ * do so without violating the FreezeLimit/MultiXactCutoff postcondition).
*
* It is assumed that the caller has checked the tuple with
* HeapTupleSatisfiesVacuum() and determined that it is not HEAPTUPLE_DEAD
* (else we should be removing the tuple, not freezing it).
*
- * The *relfrozenxid_out and *relminmxid_out arguments are the current target
- * relfrozenxid and relminmxid for VACUUM caller's heap rel. Any and all
- * unfrozen XIDs or MXIDs that remain in caller's rel after VACUUM finishes
- * _must_ have values >= the final relfrozenxid/relminmxid values in pg_class.
- * This includes XIDs that remain as MultiXact members from any tuple's xmax.
- * Each call here pushes back *relfrozenxid_out and/or *relminmxid_out as
- * needed to avoid unsafe final values in rel's authoritative pg_class tuple.
- *
* NB: This function has side effects: it might allocate a new MultiXactId.
* It will be set as tuple's new xmax when our *frz output is processed within
* heap_execute_freeze_tuple later on. If the tuple is in a shared buffer
bool
heap_prepare_freeze_tuple(HeapTupleHeader tuple,
const struct VacuumCutoffs *cutoffs,
- HeapTupleFreeze *frz, bool *totally_frozen,
- TransactionId *relfrozenxid_out,
- MultiXactId *relminmxid_out)
+ HeapPageFreeze *pagefrz,
+ HeapTupleFreeze *frz, bool *totally_frozen)
{
bool xmin_already_frozen = false,
xmax_already_frozen = false;
/*
* Process xmin, while keeping track of whether it's already frozen, or
- * will become frozen when our freeze plan is executed by caller (could be
+ * will become frozen iff our freeze plan is executed by caller (could be
* neither).
*/
xid = HeapTupleHeaderGetXmin(tuple);
errmsg_internal("found xmin %u from before relfrozenxid %u",
xid, cutoffs->relfrozenxid)));
- freeze_xmin = TransactionIdPrecedes(xid, cutoffs->FreezeLimit);
- if (freeze_xmin)
- {
- if (!TransactionIdDidCommit(xid))
- ereport(ERROR,
- (errcode(ERRCODE_DATA_CORRUPTED),
- errmsg_internal("uncommitted xmin %u from before xid cutoff %u needs to be frozen",
- xid, cutoffs->FreezeLimit)));
- }
- else
- {
- /* xmin to remain unfrozen. Could push back relfrozenxid_out. */
- if (TransactionIdPrecedes(xid, *relfrozenxid_out))
- *relfrozenxid_out = xid;
- }
+ freeze_xmin = TransactionIdPrecedes(xid, cutoffs->OldestXmin);
+ if (freeze_xmin && !TransactionIdDidCommit(xid))
+ ereport(ERROR,
+ (errcode(ERRCODE_DATA_CORRUPTED),
+ errmsg_internal("uncommitted xmin %u from before xid cutoff %u needs to be frozen",
+ xid, cutoffs->OldestXmin)));
+
+ /* Will set freeze_xmin flags in freeze plan below */
}
/*
* For Xvac, we always freeze proactively. This allows totally_frozen
* tracking to ignore xvac.
*/
- replace_xvac = true;
+ replace_xvac = pagefrz->freeze_required = true;
+
+ /* Will set replace_xvac flags in freeze plan below */
}
- /*
- * Process xmax. To thoroughly examine the current Xmax value we need to
- * resolve a MultiXactId to its member Xids, in case some of them are
- * below the given FreezeLimit. In that case, those values might need
- * freezing, too. Also, if a multi needs freezing, we cannot simply take
- * it out --- if there's a live updater Xid, it needs to be kept.
- *
- * Make sure to keep heap_tuple_would_freeze in sync with this.
- */
+ /* Now process xmax */
xid = HeapTupleHeaderGetRawXmax(tuple);
-
if (tuple->t_infomask & HEAP_XMAX_IS_MULTI)
{
/* Raw xmax is a MultiXactId */
TransactionId newxmax;
uint16 flags;
- TransactionId mxid_oldest_xid_out = *relfrozenxid_out;
+ /*
+ * We will either remove xmax completely (in the "freeze_xmax" path),
+ * process xmax by replacing it (in the "replace_xmax" path), or
+ * perform no-op xmax processing. The only constraint is that the
+ * FreezeLimit/MultiXactCutoff postcondition must never be violated.
+ */
newxmax = FreezeMultiXactId(xid, tuple->t_infomask, cutoffs,
- &flags, &mxid_oldest_xid_out);
+ &flags, pagefrz);
- if (flags & FRM_RETURN_IS_XID)
+ if (flags & FRM_NOOP)
+ {
+ /*
+ * xmax is a MultiXactId, and nothing about it changes for now.
+ * This is the only case where 'freeze_required' won't have been
+ * set for us by FreezeMultiXactId, as well as the only case where
+ * neither freeze_xmax nor replace_xmax are set (given a multi).
+ *
+ * This is a no-op, but the call to FreezeMultiXactId might have
+ * ratcheted back NewRelfrozenXid and/or NewRelminMxid trackers
+ * for us (the "freeze page" variants, specifically). That'll
+ * make it safe for our caller to freeze the page later on, while
+ * leaving this particular xmax undisturbed.
+ *
+ * FreezeMultiXactId is _not_ responsible for the "no freeze"
+ * NewRelfrozenXid/NewRelminMxid trackers, though -- that's our
+ * job. A call to heap_tuple_should_freeze for this same tuple
+ * will take place below if 'freeze_required' isn't set already.
+ * (This repeats work from FreezeMultiXactId, but allows "no
+ * freeze" tracker maintenance to happen in only one place.)
+ */
+ Assert(!MultiXactIdPrecedes(newxmax, cutoffs->MultiXactCutoff));
+ Assert(MultiXactIdIsValid(newxmax) && xid == newxmax);
+ }
+ else if (flags & FRM_RETURN_IS_XID)
{
/*
* xmax will become an updater Xid (original MultiXact's updater
* member Xid will be carried forward as a simple Xid in Xmax).
- * Might have to ratchet back relfrozenxid_out here, though never
- * relminmxid_out.
*/
Assert(!TransactionIdPrecedes(newxmax, cutoffs->OldestXmin));
- if (TransactionIdPrecedes(newxmax, *relfrozenxid_out))
- *relfrozenxid_out = newxmax;
/*
* NB -- some of these transformations are only valid because we
/*
* xmax is an old MultiXactId that we have to replace with a new
* MultiXactId, to carry forward two or more original member XIDs.
- * Might have to ratchet back relfrozenxid_out here, though never
- * relminmxid_out.
*/
Assert(!MultiXactIdPrecedes(newxmax, cutoffs->OldestMxact));
- Assert(TransactionIdPrecedesOrEquals(mxid_oldest_xid_out,
- *relfrozenxid_out));
- *relfrozenxid_out = mxid_oldest_xid_out;
/*
* We can't use GetMultiXactIdHintBits directly on the new multi
frz->xmax = newxmax;
replace_xmax = true;
}
- else if (flags & FRM_NOOP)
- {
- /*
- * xmax is a MultiXactId, and nothing about it changes for now.
- * Might have to ratchet back relminmxid_out, relfrozenxid_out, or
- * both together.
- */
- Assert(MultiXactIdIsValid(newxmax) && xid == newxmax);
- Assert(TransactionIdPrecedesOrEquals(mxid_oldest_xid_out,
- *relfrozenxid_out));
- if (MultiXactIdPrecedes(xid, *relminmxid_out))
- *relminmxid_out = xid;
- *relfrozenxid_out = mxid_oldest_xid_out;
- }
else
{
/*
Assert(flags & FRM_INVALIDATE_XMAX);
Assert(!TransactionIdIsValid(newxmax));
- /* Will set t_infomask/t_infomask2 flags in freeze plan below */
+ /* Will set freeze_xmax flags in freeze plan below */
freeze_xmax = true;
}
+
+ /* MultiXactId processing forces freezing (barring FRM_NOOP case) */
+ Assert(pagefrz->freeze_required || (!freeze_xmax && !replace_xmax));
}
else if (TransactionIdIsNormal(xid))
{
errmsg_internal("found xmax %u from before relfrozenxid %u",
xid, cutoffs->relfrozenxid)));
- if (TransactionIdPrecedes(xid, cutoffs->FreezeLimit))
- {
- /*
- * If we freeze xmax, make absolutely sure that it's not an XID
- * that is important. (Note, a lock-only xmax can be removed
- * independent of committedness, since a committed lock holder has
- * released the lock).
- */
- if (!HEAP_XMAX_IS_LOCKED_ONLY(tuple->t_infomask) &&
- TransactionIdDidCommit(xid))
- ereport(ERROR,
- (errcode(ERRCODE_DATA_CORRUPTED),
- errmsg_internal("cannot freeze committed xmax %u",
- xid)));
+ if (TransactionIdPrecedes(xid, cutoffs->OldestXmin))
freeze_xmax = true;
- /* No need for relfrozenxid_out handling, since we'll freeze xmax */
- }
- else
- {
- if (TransactionIdPrecedes(xid, *relfrozenxid_out))
- *relfrozenxid_out = xid;
- }
+
+ /*
+ * If we freeze xmax, make absolutely sure that it's not an XID that
+ * is important. (Note, a lock-only xmax can be removed independent
+ * of committedness, since a committed lock holder has released the
+ * lock).
+ */
+ if (freeze_xmax && !HEAP_XMAX_IS_LOCKED_ONLY(tuple->t_infomask) &&
+ TransactionIdDidCommit(xid))
+ ereport(ERROR,
+ (errcode(ERRCODE_DATA_CORRUPTED),
+ errmsg_internal("cannot freeze committed xmax %u",
+ xid)));
}
else if (!TransactionIdIsValid(xid))
{
* failed; whereas a non-dead MOVED_IN tuple must mean the xvac
* transaction succeeded.
*/
+ Assert(pagefrz->freeze_required);
if (tuple->t_infomask & HEAP_MOVED_OFF)
frz->frzflags |= XLH_INVALID_XVAC;
else
if (replace_xmax)
{
Assert(!xmax_already_frozen && !freeze_xmax);
+ Assert(pagefrz->freeze_required);
- /* Already set t_infomask/t_infomask2 flags in freeze plan */
+ /* Already set replace_xmax flags in freeze plan earlier */
}
if (freeze_xmax)
{
/*
* Determine if this tuple is already totally frozen, or will become
- * totally frozen
+ * totally frozen (provided caller executes freeze plans for the page)
*/
*totally_frozen = ((freeze_xmin || xmin_already_frozen) &&
(freeze_xmax || xmax_already_frozen));
- /* A "totally_frozen" tuple must not leave anything behind in xmax */
- Assert(!*totally_frozen || !replace_xmax);
+ if (!pagefrz->freeze_required && !(xmin_already_frozen &&
+ xmax_already_frozen))
+ {
+ /*
+ * So far no previous tuple from the page made freezing mandatory.
+ * Does this tuple force caller to freeze the entire page?
+ */
+ pagefrz->freeze_required =
+ heap_tuple_should_freeze(tuple, cutoffs,
+ &pagefrz->NoFreezePageRelfrozenXid,
+ &pagefrz->NoFreezePageRelminMxid);
+ }
/* Tell caller if this tuple has a usable freeze plan set in *frz */
return freeze_xmin || replace_xvac || replace_xmax || freeze_xmax;
*/
void
heap_freeze_execute_prepared(Relation rel, Buffer buffer,
- TransactionId FreezeLimit,
+ TransactionId snapshotConflictHorizon,
HeapTupleFreeze *tuples, int ntuples)
{
Page page = BufferGetPage(buffer);
Assert(ntuples > 0);
- Assert(TransactionIdIsNormal(FreezeLimit));
START_CRIT_SECTION();
int nplans;
xl_heap_freeze_page xlrec;
XLogRecPtr recptr;
- TransactionId snapshotConflictHorizon;
/* Prepare deduplicated representation for use in WAL record */
nplans = heap_xlog_freeze_plan(tuples, ntuples, plans, offsets);
- /*
- * FreezeLimit is (approximately) the first XID not frozen by VACUUM.
- * Back up caller's FreezeLimit to avoid false conflicts when
- * FreezeLimit is precisely equal to VACUUM's OldestXmin cutoff.
- */
- snapshotConflictHorizon = FreezeLimit;
- TransactionIdRetreat(snapshotConflictHorizon);
-
xlrec.snapshotConflictHorizon = snapshotConflictHorizon;
xlrec.nplans = nplans;
bool do_freeze;
bool totally_frozen;
struct VacuumCutoffs cutoffs;
- TransactionId NewRelfrozenXid = FreezeLimit;
- MultiXactId NewRelminMxid = MultiXactCutoff;
+ HeapPageFreeze pagefrz;
cutoffs.relfrozenxid = relfrozenxid;
cutoffs.relminmxid = relminmxid;
cutoffs.FreezeLimit = FreezeLimit;
cutoffs.MultiXactCutoff = MultiXactCutoff;
+ pagefrz.freeze_required = true;
+ pagefrz.FreezePageRelfrozenXid = FreezeLimit;
+ pagefrz.FreezePageRelminMxid = MultiXactCutoff;
+ pagefrz.NoFreezePageRelfrozenXid = FreezeLimit;
+ pagefrz.NoFreezePageRelminMxid = MultiXactCutoff;
+
do_freeze = heap_prepare_freeze_tuple(tuple, &cutoffs,
- &frz, &totally_frozen,
- &NewRelfrozenXid, &NewRelminMxid);
+ &pagefrz, &frz, &totally_frozen);
/*
* Note that because this is not a WAL-logged operation, we don't need to
}
/*
- * heap_tuple_would_freeze
+ * heap_tuple_should_freeze
*
* Return value indicates if heap_prepare_freeze_tuple sibling function would
- * freeze any of the XID/MXID fields from the tuple, given the same cutoffs.
- * We must also deal with dead tuples here, since (xmin, xmax, xvac) fields
- * could be processed by pruning away the whole tuple instead of freezing.
- *
- * The *relfrozenxid_out and *relminmxid_out input/output arguments work just
- * like the heap_prepare_freeze_tuple arguments that they're based on. We
- * never freeze here, which makes tracking the oldest extant XID/MXID simple.
+ * (or should) force freezing of the heap page that contains caller's tuple.
+ * Tuple header XIDs/MXIDs < FreezeLimit/MultiXactCutoff trigger freezing.
+ * This includes (xmin, xmax, xvac) fields, as well as MultiXact member XIDs.
+ *
+ * The *NoFreezePageRelfrozenXid and *NoFreezePageRelminMxid input/output
+ * arguments help VACUUM track the oldest extant XID/MXID remaining in rel.
+ * Our working assumption is that caller won't decide to freeze this tuple.
+ * It's up to caller to only ratchet back its own top-level trackers after the
+ * point that it fully commits to not freezing the tuple/page in question.
*/
bool
-heap_tuple_would_freeze(HeapTupleHeader tuple,
- const struct VacuumCutoffs *cutoffs,
- TransactionId *relfrozenxid_out,
- MultiXactId *relminmxid_out)
+heap_tuple_should_freeze(HeapTupleHeader tuple,
+ const struct VacuumCutoffs *cutoffs,
+ TransactionId *NoFreezePageRelfrozenXid,
+ MultiXactId *NoFreezePageRelminMxid)
{
TransactionId xid;
MultiXactId multi;
if (TransactionIdIsNormal(xid))
{
Assert(TransactionIdPrecedesOrEquals(cutoffs->relfrozenxid, xid));
- if (TransactionIdPrecedes(xid, *relfrozenxid_out))
- *relfrozenxid_out = xid;
+ if (TransactionIdPrecedes(xid, *NoFreezePageRelfrozenXid))
+ *NoFreezePageRelfrozenXid = xid;
if (TransactionIdPrecedes(xid, cutoffs->FreezeLimit))
freeze = true;
}
{
Assert(TransactionIdPrecedesOrEquals(cutoffs->relfrozenxid, xid));
/* xmax is a non-permanent XID */
- if (TransactionIdPrecedes(xid, *relfrozenxid_out))
- *relfrozenxid_out = xid;
+ if (TransactionIdPrecedes(xid, *NoFreezePageRelfrozenXid))
+ *NoFreezePageRelfrozenXid = xid;
if (TransactionIdPrecedes(xid, cutoffs->FreezeLimit))
freeze = true;
}
else if (HEAP_LOCKED_UPGRADED(tuple->t_infomask))
{
/* xmax is a pg_upgrade'd MultiXact, which can't have updater XID */
- if (MultiXactIdPrecedes(multi, *relminmxid_out))
- *relminmxid_out = multi;
+ if (MultiXactIdPrecedes(multi, *NoFreezePageRelminMxid))
+ *NoFreezePageRelminMxid = multi;
/* heap_prepare_freeze_tuple always freezes pg_upgrade'd xmax */
freeze = true;
}
int nmembers;
Assert(MultiXactIdPrecedesOrEquals(cutoffs->relminmxid, multi));
- if (MultiXactIdPrecedes(multi, *relminmxid_out))
- *relminmxid_out = multi;
+ if (MultiXactIdPrecedes(multi, *NoFreezePageRelminMxid))
+ *NoFreezePageRelminMxid = multi;
if (MultiXactIdPrecedes(multi, cutoffs->MultiXactCutoff))
freeze = true;
{
xid = members[i].xid;
Assert(TransactionIdPrecedesOrEquals(cutoffs->relfrozenxid, xid));
- if (TransactionIdPrecedes(xid, *relfrozenxid_out))
- *relfrozenxid_out = xid;
+ if (TransactionIdPrecedes(xid, *NoFreezePageRelfrozenXid))
+ *NoFreezePageRelfrozenXid = xid;
if (TransactionIdPrecedes(xid, cutoffs->FreezeLimit))
freeze = true;
}
if (TransactionIdIsNormal(xid))
{
Assert(TransactionIdPrecedesOrEquals(cutoffs->relfrozenxid, xid));
- if (TransactionIdPrecedes(xid, *relfrozenxid_out))
- *relfrozenxid_out = xid;
- /* heap_prepare_freeze_tuple always freezes xvac */
+ if (TransactionIdPrecedes(xid, *NoFreezePageRelfrozenXid))
+ *NoFreezePageRelfrozenXid = xid;
+ /* heap_prepare_freeze_tuple forces xvac freezing */
freeze = true;
}
}
live_tuples,
recently_dead_tuples;
int nnewlpdead;
- TransactionId NewRelfrozenXid;
- MultiXactId NewRelminMxid;
+ HeapPageFreeze pagefrz;
+ int64 fpi_before = pgWalUsage.wal_fpi;
OffsetNumber deadoffsets[MaxHeapTuplesPerPage];
HeapTupleFreeze frozen[MaxHeapTuplesPerPage];
retry:
/* Initialize (or reset) page-level state */
- NewRelfrozenXid = vacrel->NewRelfrozenXid;
- NewRelminMxid = vacrel->NewRelminMxid;
+ pagefrz.freeze_required = false;
+ pagefrz.FreezePageRelfrozenXid = vacrel->NewRelfrozenXid;
+ pagefrz.FreezePageRelminMxid = vacrel->NewRelminMxid;
+ pagefrz.NoFreezePageRelfrozenXid = vacrel->NewRelfrozenXid;
+ pagefrz.NoFreezePageRelminMxid = vacrel->NewRelminMxid;
tuples_deleted = 0;
tuples_frozen = 0;
lpdead_items = 0;
continue;
}
- /*
- * LP_DEAD items are processed outside of the loop.
- *
- * Note that we deliberately don't set hastup=true in the case of an
- * LP_DEAD item here, which is not how count_nondeletable_pages() does
- * it -- it only considers pages empty/truncatable when they have no
- * items at all (except LP_UNUSED items).
- *
- * Our assumption is that any LP_DEAD items we encounter here will
- * become LP_UNUSED inside lazy_vacuum_heap_page() before we actually
- * call count_nondeletable_pages(). In any case our opinion of
- * whether or not a page 'hastup' (which is how our caller sets its
- * vacrel->nonempty_pages value) is inherently race-prone. It must be
- * treated as advisory/unreliable, so we might as well be slightly
- * optimistic.
- */
if (ItemIdIsDead(itemid))
{
+ /*
+ * Deliberately don't set hastup for LP_DEAD items. We make the
+ * soft assumption that any LP_DEAD items encountered here will
+ * become LP_UNUSED later on, before count_nondeletable_pages is
+ * reached. If we don't make this assumption then rel truncation
+ * will only happen every other VACUUM, at most. Besides, VACUUM
+ * must treat hastup/nonempty_pages as provisional no matter how
+ * LP_DEAD items are handled (handled here, or handled later on).
+ *
+ * Also deliberately delay unsetting all_visible until just before
+ * we return to lazy_scan_heap caller, as explained in full below.
+ * (This is another case where it's useful to anticipate that any
+ * LP_DEAD items will become LP_UNUSED during the ongoing VACUUM.)
+ */
deadoffsets[lpdead_items++] = offnum;
- prunestate->all_visible = false;
- prunestate->has_lpdead_items = true;
continue;
}
prunestate->hastup = true; /* page makes rel truncation unsafe */
/* Tuple with storage -- consider need to freeze */
- if (heap_prepare_freeze_tuple(tuple.t_data, &vacrel->cutoffs,
- &frozen[tuples_frozen], &totally_frozen,
- &NewRelfrozenXid, &NewRelminMxid))
+ if (heap_prepare_freeze_tuple(tuple.t_data, &vacrel->cutoffs, &pagefrz,
+ &frozen[tuples_frozen], &totally_frozen))
{
/* Save prepared freeze plan for later */
frozen[tuples_frozen++].offset = offnum;
}
/*
- * If tuple is not frozen (and not about to become frozen) then caller
- * had better not go on to set this page's VM bit
+ * If any tuple isn't either totally frozen already or eligible to
+ * become totally frozen (according to its freeze plan), then the page
+ * definitely cannot be set all-frozen in the visibility map later on
*/
if (!totally_frozen)
prunestate->all_frozen = false;
}
- vacrel->offnum = InvalidOffsetNumber;
-
/*
* We have now divided every item on the page into either an LP_DEAD item
* that will need to be vacuumed in indexes later, or a LP_NORMAL tuple
* that remains and needs to be considered for freezing now (LP_UNUSED and
* LP_REDIRECT items also remain, but are of no further interest to us).
*/
- vacrel->NewRelfrozenXid = NewRelfrozenXid;
- vacrel->NewRelminMxid = NewRelminMxid;
+ vacrel->offnum = InvalidOffsetNumber;
/*
- * Consider the need to freeze any items with tuple storage from the page
- * first (arbitrary)
+ * Freeze the page when heap_prepare_freeze_tuple indicates that at least
+ * one XID/MXID from before FreezeLimit/MultiXactCutoff is present. Also
+ * freeze when pruning generated an FPI, if doing so means that we set the
+ * page all-frozen afterwards (might not happen until final heap pass).
*/
- if (tuples_frozen > 0)
+ if (pagefrz.freeze_required || tuples_frozen == 0 ||
+ (prunestate->all_visible && prunestate->all_frozen &&
+ fpi_before != pgWalUsage.wal_fpi))
{
- Assert(prunestate->hastup);
+ /*
+ * We're freezing the page. Our final NewRelfrozenXid doesn't need to
+ * be affected by the XIDs that are just about to be frozen anyway.
+ */
+ vacrel->NewRelfrozenXid = pagefrz.FreezePageRelfrozenXid;
+ vacrel->NewRelminMxid = pagefrz.FreezePageRelminMxid;
+
+ if (tuples_frozen == 0)
+ {
+ /*
+ * We're freezing all eligible tuples on the page, but have no
+ * freeze plans to execute. This is structured as a case where
+ * the page is nominally frozen so that we set pages all-frozen
+ * whenever no freeze plans need to be executed to make it safe.
+ * If this was handled via "no freeze" processing instead then
+ * VACUUM would senselessly waste certain opportunities to set
+ * pages all-frozen (not just all-visible) at no added cost.
+ *
+ * We never increment the frozen_pages instrumentation counter
+ * here, since it only counts pages with newly frozen tuples
+ * (don't confuse that with pages newly set all-frozen in VM).
+ */
+ }
+ else
+ {
+ TransactionId snapshotConflictHorizon;
+
+ Assert(prunestate->hastup);
- vacrel->frozen_pages++;
+ vacrel->frozen_pages++;
- /* Execute all freeze plans for page as a single atomic action */
- heap_freeze_execute_prepared(vacrel->rel, buf,
- vacrel->cutoffs.FreezeLimit,
- frozen, tuples_frozen);
+ /*
+ * We can use visibility_cutoff_xid as our cutoff for conflicts
+ * when the whole page is eligible to become all-frozen in the VM
+ * once we're done with it. Otherwise we generate a conservative
+ * cutoff by stepping back from OldestXmin.
+ */
+ if (prunestate->all_visible && prunestate->all_frozen)
+ snapshotConflictHorizon = prunestate->visibility_cutoff_xid;
+ else
+ {
+ /* Avoids false conflicts when hot_standby_feedback in use */
+ snapshotConflictHorizon = vacrel->cutoffs.OldestXmin;
+ TransactionIdRetreat(snapshotConflictHorizon);
+ }
+
+ /* Execute all freeze plans for page as a single atomic action */
+ heap_freeze_execute_prepared(vacrel->rel, buf,
+ snapshotConflictHorizon,
+ frozen, tuples_frozen);
+ }
+ }
+ else
+ {
+ /*
+ * Page requires "no freeze" processing. It might be set all-visible
+ * in the visibility map, but it can never be set all-frozen.
+ */
+ vacrel->NewRelfrozenXid = pagefrz.NoFreezePageRelfrozenXid;
+ vacrel->NewRelminMxid = pagefrz.NoFreezePageRelminMxid;
+ prunestate->all_frozen = false;
+ tuples_frozen = 0; /* avoid miscounts in instrumentation */
}
/*
- * The second pass over the heap can also set visibility map bits, using
- * the same approach. This is important when the table frequently has a
- * few old LP_DEAD items on each page by the time we get to it (typically
- * because past opportunistic pruning operations freed some non-HOT
- * tuples).
- *
* VACUUM will call heap_page_is_all_visible() during the second pass over
* the heap to determine all_visible and all_frozen for the page -- this
* is a specialized version of the logic from this function. Now that
*/
#ifdef USE_ASSERT_CHECKING
/* Note that all_frozen value does not matter when !all_visible */
- if (prunestate->all_visible)
+ if (prunestate->all_visible && lpdead_items == 0)
{
TransactionId cutoff;
bool all_frozen;
if (!heap_page_is_all_visible(vacrel, buf, &cutoff, &all_frozen))
Assert(false);
- Assert(lpdead_items == 0);
- Assert(prunestate->all_frozen == all_frozen);
-
/*
* It's possible that we froze tuples and made the page's XID cutoff
* (for recovery conflict purposes) FrozenTransactionId. This is okay
VacDeadItems *dead_items = vacrel->dead_items;
ItemPointerData tmp;
- Assert(!prunestate->all_visible);
- Assert(prunestate->has_lpdead_items);
-
vacrel->lpdead_item_pages++;
+ prunestate->has_lpdead_items = true;
ItemPointerSetBlockNumber(&tmp, blkno);
Assert(dead_items->num_items <= dead_items->max_items);
pgstat_progress_update_param(PROGRESS_VACUUM_NUM_DEAD_TUPLES,
dead_items->num_items);
+
+ /*
+ * It was convenient to ignore LP_DEAD items in all_visible earlier on
+ * to make the choice of whether or not to freeze the page unaffected
+ * by the short-term presence of LP_DEAD items. These LP_DEAD items
+ * were effectively assumed to be LP_UNUSED items in the making. It
+ * doesn't matter which heap pass (initial pass or final pass) ends up
+ * setting the page all-frozen, as long as the ongoing VACUUM does it.
+ *
+ * Now that freezing has been finalized, unset all_visible. It needs
+ * to reflect the present state of things, as expected by our caller.
+ */
+ prunestate->all_visible = false;
}
/* Finally, add page-local counts to whole-VACUUM counts */
recently_dead_tuples,
missed_dead_tuples;
HeapTupleHeader tupleheader;
- TransactionId NewRelfrozenXid = vacrel->NewRelfrozenXid;
- MultiXactId NewRelminMxid = vacrel->NewRelminMxid;
+ TransactionId NoFreezePageRelfrozenXid = vacrel->NewRelfrozenXid;
+ MultiXactId NoFreezePageRelminMxid = vacrel->NewRelminMxid;
OffsetNumber deadoffsets[MaxHeapTuplesPerPage];
Assert(BufferGetBlockNumber(buf) == blkno);
*hastup = true; /* page prevents rel truncation */
tupleheader = (HeapTupleHeader) PageGetItem(page, itemid);
- if (heap_tuple_would_freeze(tupleheader, &vacrel->cutoffs,
- &NewRelfrozenXid, &NewRelminMxid))
+ if (heap_tuple_should_freeze(tupleheader, &vacrel->cutoffs,
+ &NoFreezePageRelfrozenXid,
+ &NoFreezePageRelminMxid))
{
/* Tuple with XID < FreezeLimit (or MXID < MultiXactCutoff) */
if (vacrel->aggressive)
* this particular page until the next VACUUM. Remember its details now.
* (lazy_scan_prune expects a clean slate, so we have to do this last.)
*/
- vacrel->NewRelfrozenXid = NewRelfrozenXid;
- vacrel->NewRelminMxid = NewRelminMxid;
+ vacrel->NewRelfrozenXid = NoFreezePageRelfrozenXid;
+ vacrel->NewRelminMxid = NoFreezePageRelminMxid;
/* Save any LP_DEAD items found on the page in dead_items array */
if (vacrel->nindexes == 0)