WAL Log invalidations at command end with wal_level=logical.
authorAmit Kapila <akapila@postgresql.org>
Thu, 23 Jul 2020 02:49:07 +0000 (08:19 +0530)
committerAmit Kapila <akapila@postgresql.org>
Thu, 23 Jul 2020 03:04:48 +0000 (08:34 +0530)
When wal_level=logical, write invalidations at command end into WAL so
that decoding can use this information.

This patch is required to allow the streaming of in-progress transactions
in logical decoding.  The actual work to allow streaming will be committed
as a separate patch.

We still add the invalidations to the cache and write them to WAL at
commit time in RecordTransactionCommit(). This uses the existing
XLOG_INVALIDATIONS xlog record type, from the RM_STANDBY_ID resource
manager (see LogStandbyInvalidations for details).

So existing code relying on those invalidations (e.g. redo) does not need
to be changed.

The invalidations written at command end uses a new xlog record type
XLOG_XACT_INVALIDATIONS, from RM_XACT_ID resource manager. See
LogLogicalInvalidations for details.

These new xlog records are ignored by existing redo procedures, which
still rely on the invalidations written to commit records.

The invalidations are decoded and accumulated in top-transaction, and then
executed during replay.  This obviates the need to decode the
invalidations as part of a commit record.

Bump XLOG_PAGE_MAGIC, since this introduces XLOG_XACT_INVALIDATIONS.

Author: Dilip Kumar, Tomas Vondra, Amit Kapila
Reviewed-by: Amit Kapila
Tested-by: Neha Sharma and Mahendra Singh Thalor
Discussion: https://postgr.es/m/688b0b7f-2f6c-d827-c27b-216a8e3ea700@2ndquadrant.com

src/backend/access/rmgrdesc/xactdesc.c
src/backend/access/transam/xact.c
src/backend/replication/logical/decode.c
src/backend/replication/logical/reorderbuffer.c
src/backend/utils/cache/inval.c
src/include/access/xact.h
src/include/access/xlog_internal.h
src/include/replication/reorderbuffer.h
src/include/utils/inval.h

index 9fce75565f4b37e7538650e590a8782dd0f04964..addd95faec1403998ae77b8efe3b92c4cfad0251 100644 (file)
@@ -396,6 +396,13 @@ xact_desc(StringInfo buf, XLogReaderState *record)
        appendStringInfo(buf, "xtop %u: ", xlrec->xtop);
        xact_desc_assignment(buf, xlrec);
    }
+   else if (info == XLOG_XACT_INVALIDATIONS)
+   {
+       xl_xact_invals *xlrec = (xl_xact_invals *) rec;
+
+       standby_desc_invalidations(buf, xlrec->nmsgs, xlrec->msgs, InvalidOid,
+                                  InvalidOid, false);
+   }
 }
 
 const char *
@@ -423,6 +430,9 @@ xact_identify(uint8 info)
        case XLOG_XACT_ASSIGNMENT:
            id = "ASSIGNMENT";
            break;
+       case XLOG_XACT_INVALIDATIONS:
+           id = "INVALIDATION";
+           break;
    }
 
    return id;
index bd4c3cf32585fb46ab3792f0e550ba97e0691876..d4f7c29847f4384fde31380a4a90e220b12e616c 100644 (file)
@@ -1224,6 +1224,16 @@ RecordTransactionCommit(void)
    bool        RelcacheInitFileInval = false;
    bool        wrote_xlog;
 
+   /*
+    * Log pending invalidations for logical decoding of in-progress
+    * transactions.  Normally for DDLs, we log this at each command end,
+    * however, for certain cases where we directly update the system table
+    * without a transaction block, the invalidations are not logged till this
+    * time.
+    */
+   if (XLogLogicalInfoActive())
+       LogLogicalInvalidations();
+
    /* Get data needed for commit record */
    nrels = smgrGetPendingDeletes(true, &rels);
    nchildren = xactGetCommittedChildren(&children);
@@ -6022,6 +6032,13 @@ xact_redo(XLogReaderState *record)
            ProcArrayApplyXidAssignment(xlrec->xtop,
                                        xlrec->nsubxacts, xlrec->xsub);
    }
+   else if (info == XLOG_XACT_INVALIDATIONS)
+   {
+       /*
+        * XXX we do ignore this for now, what matters are invalidations
+        * written into the commit record.
+        */
+   }
    else
        elog(PANIC, "xact_redo: unknown op code %u", info);
 }
index 0c0c3717391994d9eeb6e68f9cb88caa803b1d3b..f3a1c31a2921c76bccd4eaecc3de1108b3d0f19e 100644 (file)
@@ -278,10 +278,39 @@ DecodeXactOp(LogicalDecodingContext *ctx, XLogRecordBuffer *buf)
 
            /*
             * We assign subxact to the toplevel xact while processing each
-            * record if required.  So, we don't need to do anything here.
-            * See LogicalDecodingProcessRecord.
+            * record if required.  So, we don't need to do anything here. See
+            * LogicalDecodingProcessRecord.
             */
            break;
+       case XLOG_XACT_INVALIDATIONS:
+           {
+               TransactionId xid;
+               xl_xact_invals *invals;
+
+               xid = XLogRecGetXid(r);
+               invals = (xl_xact_invals *) XLogRecGetData(r);
+
+               /*
+                * Execute the invalidations for xid-less transactions,
+                * otherwise, accumulate them so that they can be processed at
+                * the commit time.
+                */
+               if (TransactionIdIsValid(xid))
+               {
+                   if (!ctx->fast_forward)
+                       ReorderBufferAddInvalidations(reorder, xid,
+                                                     buf->origptr,
+                                                     invals->nmsgs,
+                                                     invals->msgs);
+                   ReorderBufferXidSetCatalogChanges(ctx->reorder, xid,
+                                                     buf->origptr);
+               }
+               else if ((!ctx->fast_forward))
+                   ReorderBufferImmediateInvalidation(ctx->reorder,
+                                                      invals->nmsgs,
+                                                      invals->msgs);
+           }
+           break;
        case XLOG_XACT_PREPARE:
 
            /*
@@ -334,15 +363,11 @@ DecodeStandbyOp(LogicalDecodingContext *ctx, XLogRecordBuffer *buf)
        case XLOG_STANDBY_LOCK:
            break;
        case XLOG_INVALIDATIONS:
-           {
-               xl_invalidations *invalidations =
-               (xl_invalidations *) XLogRecGetData(r);
 
-               if (!ctx->fast_forward)
-                   ReorderBufferImmediateInvalidation(ctx->reorder,
-                                                      invalidations->nmsgs,
-                                                      invalidations->msgs);
-           }
+           /*
+            * We are processing the invalidations at the command level via
+            * XLOG_XACT_INVALIDATIONS.  So we don't need to do anything here.
+            */
            break;
        default:
            elog(ERROR, "unexpected RM_STANDBY_ID record type: %u", info);
@@ -573,19 +598,6 @@ DecodeCommit(LogicalDecodingContext *ctx, XLogRecordBuffer *buf,
        commit_time = parsed->origin_timestamp;
    }
 
-   /*
-    * Process invalidation messages, even if we're not interested in the
-    * transaction's contents, since the various caches need to always be
-    * consistent.
-    */
-   if (parsed->nmsgs > 0)
-   {
-       if (!ctx->fast_forward)
-           ReorderBufferAddInvalidations(ctx->reorder, xid, buf->origptr,
-                                         parsed->nmsgs, parsed->msgs);
-       ReorderBufferXidSetCatalogChanges(ctx->reorder, xid, buf->origptr);
-   }
-
    SnapBuildCommitTxn(ctx->snapshot_builder, buf->origptr, xid,
                       parsed->nsubxacts, parsed->subxacts);
 
index 449327a147f920916dd560af40ebb22802f7a05f..ce6e62152f037306544ee13785d5c7fe827e43a0 100644 (file)
@@ -856,6 +856,9 @@ ReorderBufferAssignChild(ReorderBuffer *rb, TransactionId xid,
    subtxn->toplevel_xid = xid;
    Assert(subtxn->nsubtxns == 0);
 
+   /* set the reference to top-level transaction */
+   subtxn->toptxn = txn;
+
    /* add to subtransaction list */
    dlist_push_tail(&txn->subtxns, &subtxn->node);
    txn->nsubtxns++;
@@ -2201,7 +2204,11 @@ ReorderBufferAddNewTupleCids(ReorderBuffer *rb, TransactionId xid,
 /*
  * Setup the invalidation of the toplevel transaction.
  *
- * This needs to be done before ReorderBufferCommit is called!
+ * This needs to be called for each XLOG_XACT_INVALIDATIONS message and
+ * accumulates all the invalidation messages in the toplevel transaction.
+ * This is required because in some cases where we skip processing the
+ * transaction (see ReorderBufferForget), we need to execute all the
+ * invalidations together.
  */
 void
 ReorderBufferAddInvalidations(ReorderBuffer *rb, TransactionId xid,
@@ -2212,17 +2219,35 @@ ReorderBufferAddInvalidations(ReorderBuffer *rb, TransactionId xid,
 
    txn = ReorderBufferTXNByXid(rb, xid, true, NULL, lsn, true);
 
-   if (txn->ninvalidations != 0)
-       elog(ERROR, "only ever add one set of invalidations");
+   /*
+    * We collect all the invalidations under the top transaction so that we
+    * can execute them all together.
+    */
+   if (txn->toptxn)
+       txn = txn->toptxn;
 
    Assert(nmsgs > 0);
 
-   txn->ninvalidations = nmsgs;
-   txn->invalidations = (SharedInvalidationMessage *)
-       MemoryContextAlloc(rb->context,
-                          sizeof(SharedInvalidationMessage) * nmsgs);
-   memcpy(txn->invalidations, msgs,
-          sizeof(SharedInvalidationMessage) * nmsgs);
+   /* Accumulate invalidations. */
+   if (txn->ninvalidations == 0)
+   {
+       txn->ninvalidations = nmsgs;
+       txn->invalidations = (SharedInvalidationMessage *)
+           MemoryContextAlloc(rb->context,
+                              sizeof(SharedInvalidationMessage) * nmsgs);
+       memcpy(txn->invalidations, msgs,
+              sizeof(SharedInvalidationMessage) * nmsgs);
+   }
+   else
+   {
+       txn->invalidations = (SharedInvalidationMessage *)
+           repalloc(txn->invalidations, sizeof(SharedInvalidationMessage) *
+                    (txn->ninvalidations + nmsgs));
+
+       memcpy(txn->invalidations + txn->ninvalidations, msgs,
+              nmsgs * sizeof(SharedInvalidationMessage));
+       txn->ninvalidations += nmsgs;
+   }
 }
 
 /*
@@ -2250,6 +2275,15 @@ ReorderBufferXidSetCatalogChanges(ReorderBuffer *rb, TransactionId xid,
    txn = ReorderBufferTXNByXid(rb, xid, true, NULL, lsn, true);
 
    txn->txn_flags |= RBTXN_HAS_CATALOG_CHANGES;
+
+   /*
+    * Mark top-level transaction as having catalog changes too if one of its
+    * children has so that the ReorderBufferBuildTupleCidHash can
+    * conveniently check just top-level transaction and decide whether to
+    * build the hash table or not.
+    */
+   if (txn->toptxn != NULL)
+       txn->toptxn->txn_flags |= RBTXN_HAS_CATALOG_CHANGES;
 }
 
 /*
index 591dd33be6786a85946cf651364189dee9e72acf..628d6f5d0cceb592a1ac3038a4c0359c1072214b 100644 (file)
@@ -85,6 +85,9 @@
  * worth trying to avoid sending such inval traffic in the future, if those
  * problems can be overcome cheaply.
  *
+ * When wal_level=logical, write invalidations into WAL at each command end to
+ * support the decoding of the in-progress transactions.  See
+ * CommandEndInvalidationMessages.
  *
  * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group
  * Portions Copyright (c) 1994, Regents of the University of California
@@ -1094,6 +1097,11 @@ CommandEndInvalidationMessages(void)
 
    ProcessInvalidationMessages(&transInvalInfo->CurrentCmdInvalidMsgs,
                                LocalExecuteInvalidationMessage);
+
+   /* WAL Log per-command invalidation messages for wal_level=logical */
+   if (XLogLogicalInfoActive())
+       LogLogicalInvalidations();
+
    AppendInvalidationMessages(&transInvalInfo->PriorCmdInvalidMsgs,
                               &transInvalInfo->CurrentCmdInvalidMsgs);
 }
@@ -1501,3 +1509,49 @@ CallSyscacheCallbacks(int cacheid, uint32 hashvalue)
        i = ccitem->link - 1;
    }
 }
+
+/*
+ * LogLogicalInvalidations
+ *
+ * Emit WAL for invalidations.  This is currently only used for logging
+ * invalidations at the command end or at commit time if any invalidations
+ * are pending.
+ */
+void
+LogLogicalInvalidations()
+{
+   xl_xact_invals xlrec;
+   SharedInvalidationMessage *invalMessages;
+   int         nmsgs = 0;
+
+   /* Quick exit if we haven't done anything with invalidation messages. */
+   if (transInvalInfo == NULL)
+       return;
+
+   ProcessInvalidationMessagesMulti(&transInvalInfo->CurrentCmdInvalidMsgs,
+                                    MakeSharedInvalidMessagesArray);
+
+   Assert(!(numSharedInvalidMessagesArray > 0 &&
+            SharedInvalidMessagesArray == NULL));
+
+   invalMessages = SharedInvalidMessagesArray;
+   nmsgs = numSharedInvalidMessagesArray;
+   SharedInvalidMessagesArray = NULL;
+   numSharedInvalidMessagesArray = 0;
+
+   if (nmsgs > 0)
+   {
+       /* prepare record */
+       memset(&xlrec, 0, MinSizeOfXactInvals);
+       xlrec.nmsgs = nmsgs;
+
+       /* perform insertion */
+       XLogBeginInsert();
+       XLogRegisterData((char *) (&xlrec), MinSizeOfXactInvals);
+       XLogRegisterData((char *) invalMessages,
+                        nmsgs * sizeof(SharedInvalidationMessage));
+       XLogInsert(RM_XACT_ID, XLOG_XACT_INVALIDATIONS);
+
+       pfree(invalMessages);
+   }
+}
index aef8555367449745d5ea73f53e20b1c73a6d8ca8..53480116a4622f3b36484c6112ecd652ce370db8 100644 (file)
@@ -146,7 +146,7 @@ typedef void (*SubXactCallback) (SubXactEvent event, SubTransactionId mySubid,
 #define XLOG_XACT_COMMIT_PREPARED  0x30
 #define XLOG_XACT_ABORT_PREPARED   0x40
 #define XLOG_XACT_ASSIGNMENT       0x50
-/* free opcode 0x60 */
+#define XLOG_XACT_INVALIDATIONS        0x60
 /* free opcode 0x70 */
 
 /* mask for filtering opcodes out of xl_info */
index b9490a3afeff26d62e8bdcc66bb2e3bf260c9848..9b2da56379e1507a15a6afdfd2231c9af7284126 100644 (file)
@@ -31,7 +31,7 @@
 /*
  * Each page of XLOG file has a header like this:
  */
-#define XLOG_PAGE_MAGIC 0xD107 /* can be used as WAL version indicator */
+#define XLOG_PAGE_MAGIC 0xD108 /* can be used as WAL version indicator */
 
 typedef struct XLogPageHeaderData
 {
index 019bd382de9b9cecfbd3ccfa2e6378874cd9925f..1055e99e2e140efeb241956b81c25c284f5f06fb 100644 (file)
@@ -220,6 +220,9 @@ typedef struct ReorderBufferTXN
     */
    XLogRecPtr  end_lsn;
 
+   /* Toplevel transaction for this subxact (NULL for top-level). */
+   struct ReorderBufferTXN *toptxn;
+
    /*
     * LSN of the last lsn at which snapshot information reside, so we can
     * restart decoding from there and fully recover this transaction from
index bc5081cf7210b7b3c36a77c27cafc747bf6d77dd..463888c3894f996c78026c3facbb1e71cd8983f2 100644 (file)
@@ -61,4 +61,6 @@ extern void CacheRegisterRelcacheCallback(RelcacheCallbackFunction func,
 extern void CallSyscacheCallbacks(int cacheid, uint32 hashvalue);
 
 extern void InvalidateSystemCaches(void);
+
+extern void LogLogicalInvalidations(void);
 #endif                         /* INVAL_H */