Don't be so trusting that shm_toc_lookup() will always succeed.
authorTom Lane <tgl@sss.pgh.pa.us>
Mon, 5 Jun 2017 16:05:42 +0000 (12:05 -0400)
committerTom Lane <tgl@sss.pgh.pa.us>
Mon, 5 Jun 2017 16:05:42 +0000 (12:05 -0400)
Given the possibility of race conditions and so on, it seems entirely
unsafe to just assume that shm_toc_lookup() always finds the key it's
looking for --- but that was exactly what all but one call site were
doing.  To fix, add a "bool noError" argument, similarly to what we
have in many other functions, and throw an error on an unexpected
lookup failure.  Remove now-redundant Asserts that a rather random
subset of call sites had.

I doubt this will throw any light on buildfarm member lorikeet's
recent failures, because if an unnoticed lookup failure were involved,
you'd kind of expect a null-pointer-dereference crash rather than the
observed symptom.  But you never know ... and this is better coding
practice even if it never catches anything.

Discussion: https://postgr.es/m/9697.1496675981@sss.pgh.pa.us

src/backend/access/transam/parallel.c
src/backend/executor/execParallel.c
src/backend/executor/nodeBitmapHeapscan.c
src/backend/executor/nodeCustom.c
src/backend/executor/nodeForeignscan.c
src/backend/executor/nodeIndexonlyscan.c
src/backend/executor/nodeIndexscan.c
src/backend/executor/nodeSeqscan.c
src/backend/storage/ipc/shm_toc.c
src/include/storage/shm_toc.h
src/test/modules/test_shm_mq/worker.c

index 2dad3e8a655e1e15cb1d6826d419e369a7d7029e..cb22174270677ce2339bfcfef15d013ffbacf3ce 100644 (file)
@@ -392,12 +392,12 @@ ReinitializeParallelDSM(ParallelContext *pcxt)
    }
 
    /* Reset a few bits of fixed parallel state to a clean state. */
-   fps = shm_toc_lookup(pcxt->toc, PARALLEL_KEY_FIXED);
+   fps = shm_toc_lookup(pcxt->toc, PARALLEL_KEY_FIXED, false);
    fps->last_xlog_end = 0;
 
    /* Recreate error queues. */
    error_queue_space =
-       shm_toc_lookup(pcxt->toc, PARALLEL_KEY_ERROR_QUEUE);
+       shm_toc_lookup(pcxt->toc, PARALLEL_KEY_ERROR_QUEUE, false);
    for (i = 0; i < pcxt->nworkers; ++i)
    {
        char       *start;
@@ -536,7 +536,7 @@ WaitForParallelWorkersToFinish(ParallelContext *pcxt)
    {
        FixedParallelState *fps;
 
-       fps = shm_toc_lookup(pcxt->toc, PARALLEL_KEY_FIXED);
+       fps = shm_toc_lookup(pcxt->toc, PARALLEL_KEY_FIXED, false);
        if (fps->last_xlog_end > XactLastRecEnd)
            XactLastRecEnd = fps->last_xlog_end;
    }
@@ -973,8 +973,7 @@ ParallelWorkerMain(Datum main_arg)
           errmsg("invalid magic number in dynamic shared memory segment")));
 
    /* Look up fixed parallel state. */
-   fps = shm_toc_lookup(toc, PARALLEL_KEY_FIXED);
-   Assert(fps != NULL);
+   fps = shm_toc_lookup(toc, PARALLEL_KEY_FIXED, false);
    MyFixedParallelState = fps;
 
    /*
@@ -983,7 +982,7 @@ ParallelWorkerMain(Datum main_arg)
     * errors that happen here will not be reported back to the process that
     * requested that this worker be launched.
     */
-   error_queue_space = shm_toc_lookup(toc, PARALLEL_KEY_ERROR_QUEUE);
+   error_queue_space = shm_toc_lookup(toc, PARALLEL_KEY_ERROR_QUEUE, false);
    mq = (shm_mq *) (error_queue_space +
                     ParallelWorkerNumber * PARALLEL_ERROR_QUEUE_SIZE);
    shm_mq_set_sender(mq, MyProc);
@@ -1027,8 +1026,7 @@ ParallelWorkerMain(Datum main_arg)
     * this before restoring GUCs, because the libraries might define custom
     * variables.
     */
-   libraryspace = shm_toc_lookup(toc, PARALLEL_KEY_LIBRARY);
-   Assert(libraryspace != NULL);
+   libraryspace = shm_toc_lookup(toc, PARALLEL_KEY_LIBRARY, false);
    RestoreLibraryState(libraryspace);
 
    /*
@@ -1036,8 +1034,7 @@ ParallelWorkerMain(Datum main_arg)
     * loading an additional library, though most likely the entry point is in
     * the core backend or in a library we just loaded.
     */
-   entrypointstate = shm_toc_lookup(toc, PARALLEL_KEY_ENTRYPOINT);
-   Assert(entrypointstate != NULL);
+   entrypointstate = shm_toc_lookup(toc, PARALLEL_KEY_ENTRYPOINT, false);
    library_name = entrypointstate;
    function_name = entrypointstate + strlen(library_name) + 1;
 
@@ -1054,30 +1051,26 @@ ParallelWorkerMain(Datum main_arg)
    SetClientEncoding(GetDatabaseEncoding());
 
    /* Restore GUC values from launching backend. */
-   gucspace = shm_toc_lookup(toc, PARALLEL_KEY_GUC);
-   Assert(gucspace != NULL);
+   gucspace = shm_toc_lookup(toc, PARALLEL_KEY_GUC, false);
    StartTransactionCommand();
    RestoreGUCState(gucspace);
    CommitTransactionCommand();
 
    /* Crank up a transaction state appropriate to a parallel worker. */
-   tstatespace = shm_toc_lookup(toc, PARALLEL_KEY_TRANSACTION_STATE);
+   tstatespace = shm_toc_lookup(toc, PARALLEL_KEY_TRANSACTION_STATE, false);
    StartParallelWorkerTransaction(tstatespace);
 
    /* Restore combo CID state. */
-   combocidspace = shm_toc_lookup(toc, PARALLEL_KEY_COMBO_CID);
-   Assert(combocidspace != NULL);
+   combocidspace = shm_toc_lookup(toc, PARALLEL_KEY_COMBO_CID, false);
    RestoreComboCIDState(combocidspace);
 
    /* Restore transaction snapshot. */
-   tsnapspace = shm_toc_lookup(toc, PARALLEL_KEY_TRANSACTION_SNAPSHOT);
-   Assert(tsnapspace != NULL);
+   tsnapspace = shm_toc_lookup(toc, PARALLEL_KEY_TRANSACTION_SNAPSHOT, false);
    RestoreTransactionSnapshot(RestoreSnapshot(tsnapspace),
                               fps->parallel_master_pgproc);
 
    /* Restore active snapshot. */
-   asnapspace = shm_toc_lookup(toc, PARALLEL_KEY_ACTIVE_SNAPSHOT);
-   Assert(asnapspace != NULL);
+   asnapspace = shm_toc_lookup(toc, PARALLEL_KEY_ACTIVE_SNAPSHOT, false);
    PushActiveSnapshot(RestoreSnapshot(asnapspace));
 
    /*
index 061018001602413b74e8267a3e5c6cf3ac971701..1c02fa140b0cddc655761543ec1c65860ddd94d2 100644 (file)
@@ -341,7 +341,7 @@ ExecParallelSetupTupleQueues(ParallelContext *pcxt, bool reinitialize)
                             mul_size(PARALLEL_TUPLE_QUEUE_SIZE,
                                      pcxt->nworkers));
    else
-       tqueuespace = shm_toc_lookup(pcxt->toc, PARALLEL_KEY_TUPLE_QUEUE);
+       tqueuespace = shm_toc_lookup(pcxt->toc, PARALLEL_KEY_TUPLE_QUEUE, false);
 
    /* Create the queues, and become the receiver for each. */
    for (i = 0; i < pcxt->nworkers; ++i)
@@ -684,7 +684,7 @@ ExecParallelGetReceiver(dsm_segment *seg, shm_toc *toc)
    char       *mqspace;
    shm_mq     *mq;
 
-   mqspace = shm_toc_lookup(toc, PARALLEL_KEY_TUPLE_QUEUE);
+   mqspace = shm_toc_lookup(toc, PARALLEL_KEY_TUPLE_QUEUE, false);
    mqspace += ParallelWorkerNumber * PARALLEL_TUPLE_QUEUE_SIZE;
    mq = (shm_mq *) mqspace;
    shm_mq_set_sender(mq, MyProc);
@@ -705,14 +705,14 @@ ExecParallelGetQueryDesc(shm_toc *toc, DestReceiver *receiver,
    char       *queryString;
 
    /* Get the query string from shared memory */
-   queryString = shm_toc_lookup(toc, PARALLEL_KEY_QUERY_TEXT);
+   queryString = shm_toc_lookup(toc, PARALLEL_KEY_QUERY_TEXT, false);
 
    /* Reconstruct leader-supplied PlannedStmt. */
-   pstmtspace = shm_toc_lookup(toc, PARALLEL_KEY_PLANNEDSTMT);
+   pstmtspace = shm_toc_lookup(toc, PARALLEL_KEY_PLANNEDSTMT, false);
    pstmt = (PlannedStmt *) stringToNode(pstmtspace);
 
    /* Reconstruct ParamListInfo. */
-   paramspace = shm_toc_lookup(toc, PARALLEL_KEY_PARAMS);
+   paramspace = shm_toc_lookup(toc, PARALLEL_KEY_PARAMS, false);
    paramLI = RestoreParamList(&paramspace);
 
    /*
@@ -843,7 +843,7 @@ ParallelQueryMain(dsm_segment *seg, shm_toc *toc)
 
    /* Set up DestReceiver, SharedExecutorInstrumentation, and QueryDesc. */
    receiver = ExecParallelGetReceiver(seg, toc);
-   instrumentation = shm_toc_lookup(toc, PARALLEL_KEY_INSTRUMENTATION);
+   instrumentation = shm_toc_lookup(toc, PARALLEL_KEY_INSTRUMENTATION, true);
    if (instrumentation != NULL)
        instrument_options = instrumentation->instrument_options;
    queryDesc = ExecParallelGetQueryDesc(toc, receiver, instrument_options);
@@ -858,7 +858,7 @@ ParallelQueryMain(dsm_segment *seg, shm_toc *toc)
    InstrStartParallelQuery();
 
    /* Attach to the dynamic shared memory area. */
-   area_space = shm_toc_lookup(toc, PARALLEL_KEY_DSA);
+   area_space = shm_toc_lookup(toc, PARALLEL_KEY_DSA, false);
    area = dsa_attach_in_place(area_space, seg);
 
    /* Start up the executor */
@@ -875,7 +875,7 @@ ParallelQueryMain(dsm_segment *seg, shm_toc *toc)
    ExecutorFinish(queryDesc);
 
    /* Report buffer usage during parallel execution. */
-   buffer_usage = shm_toc_lookup(toc, PARALLEL_KEY_BUFFER_USAGE);
+   buffer_usage = shm_toc_lookup(toc, PARALLEL_KEY_BUFFER_USAGE, false);
    InstrEndParallelQuery(&buffer_usage[ParallelWorkerNumber]);
 
    /* Report instrumentation data if any instrumentation options are set. */
index c453362230856a6aa2a547557d141edd7eb9f770..77f65db0ca0bf2cb151466e6d730a1cb4ff5c546 100644 (file)
@@ -1005,7 +1005,7 @@ ExecBitmapHeapInitializeWorker(BitmapHeapScanState *node, shm_toc *toc)
    ParallelBitmapHeapState *pstate;
    Snapshot    snapshot;
 
-   pstate = shm_toc_lookup(toc, node->ss.ps.plan->plan_node_id);
+   pstate = shm_toc_lookup(toc, node->ss.ps.plan->plan_node_id, false);
    node->pstate = pstate;
 
    snapshot = RestoreSnapshot(pstate->phs_snapshot_data);
index 5d309828ef185616b8d8c3afdbac6c954f9ad61d..69e27047f1c667182e079347a73ff4ffa1957ec4 100644 (file)
@@ -194,7 +194,7 @@ ExecCustomScanInitializeWorker(CustomScanState *node, shm_toc *toc)
        int         plan_node_id = node->ss.ps.plan->plan_node_id;
        void       *coordinate;
 
-       coordinate = shm_toc_lookup(toc, plan_node_id);
+       coordinate = shm_toc_lookup(toc, plan_node_id, false);
        methods->InitializeWorkerCustomScan(node, toc, coordinate);
    }
 }
index 9ae1561404b4ed84db2b26b2bef65d59760a5101..9cde112554b2e1801328182b8802d19fb9b3fd27 100644 (file)
@@ -344,7 +344,7 @@ ExecForeignScanInitializeWorker(ForeignScanState *node, shm_toc *toc)
        int         plan_node_id = node->ss.ps.plan->plan_node_id;
        void       *coordinate;
 
-       coordinate = shm_toc_lookup(toc, plan_node_id);
+       coordinate = shm_toc_lookup(toc, plan_node_id, false);
        fdwroutine->InitializeWorkerForeignScan(node, toc, coordinate);
    }
 }
index 5550f6c0a4be70bbb3735c2c7a83ef4f5beed2a3..fb3d3bb1218fcf772ccb3449c1171705678d7cd2 100644 (file)
@@ -676,7 +676,7 @@ ExecIndexOnlyScanInitializeWorker(IndexOnlyScanState *node, shm_toc *toc)
 {
    ParallelIndexScanDesc piscan;
 
-   piscan = shm_toc_lookup(toc, node->ss.ps.plan->plan_node_id);
+   piscan = shm_toc_lookup(toc, node->ss.ps.plan->plan_node_id, false);
    node->ioss_ScanDesc =
        index_beginscan_parallel(node->ss.ss_currentRelation,
                                 node->ioss_RelationDesc,
index 5afd02e09ddfad4417af5324e5986e7807ee37a5..0fb3fb5e7ece008bda33b305d556fb1d048b7b98 100644 (file)
@@ -1714,7 +1714,7 @@ ExecIndexScanInitializeWorker(IndexScanState *node, shm_toc *toc)
 {
    ParallelIndexScanDesc piscan;
 
-   piscan = shm_toc_lookup(toc, node->ss.ps.plan->plan_node_id);
+   piscan = shm_toc_lookup(toc, node->ss.ps.plan->plan_node_id, false);
    node->iss_ScanDesc =
        index_beginscan_parallel(node->ss.ss_currentRelation,
                                 node->iss_RelationDesc,
index 5680464fa273d554e0266572811e1ae5941e5677..c0e37dcd834c8459aa30a3377995f56b00f1f458 100644 (file)
@@ -332,7 +332,7 @@ ExecSeqScanInitializeWorker(SeqScanState *node, shm_toc *toc)
 {
    ParallelHeapScanDesc pscan;
 
-   pscan = shm_toc_lookup(toc, node->ss.ps.plan->plan_node_id);
+   pscan = shm_toc_lookup(toc, node->ss.ps.plan->plan_node_id, false);
    node->ss.ss_currentScanDesc =
        heap_beginscan_parallel(node->ss.ss_currentRelation, pscan);
 }
index 9110ffa4a0bff1582351c21b4a1d07f70f278991..5e290df3366b4083924d4f97450d1a8d20013dde 100644 (file)
@@ -208,6 +208,9 @@ shm_toc_insert(shm_toc *toc, uint64 key, void *address)
 /*
  * Look up a TOC entry.
  *
+ * If the key is not found, returns NULL if noError is true, otherwise
+ * throws elog(ERROR).
+ *
  * Unlike the other functions in this file, this operation acquires no lock;
  * it uses only barriers.  It probably wouldn't hurt concurrency very much even
  * if it did get a lock, but since it's reasonably likely that a group of
@@ -215,7 +218,7 @@ shm_toc_insert(shm_toc *toc, uint64 key, void *address)
  * right around the same time, there seems to be some value in avoiding it.
  */
 void *
-shm_toc_lookup(shm_toc *toc, uint64 key)
+shm_toc_lookup(shm_toc *toc, uint64 key, bool noError)
 {
    uint64      nentry;
    uint64      i;
@@ -226,10 +229,15 @@ shm_toc_lookup(shm_toc *toc, uint64 key)
 
    /* Now search for a matching entry. */
    for (i = 0; i < nentry; ++i)
+   {
        if (toc->toc_entry[i].key == key)
            return ((char *) toc) + toc->toc_entry[i].offset;
+   }
 
    /* No matching entry was found. */
+   if (!noError)
+       elog(ERROR, "could not find key " UINT64_FORMAT " in shm TOC at %p",
+            key, toc);
    return NULL;
 }
 
index ae0a3878feb9266f974cdb3b3238dfda78a2bde3..0548e309bd0b5f455148c30ada81da1e56552729 100644 (file)
@@ -32,7 +32,7 @@ extern shm_toc *shm_toc_attach(uint64 magic, void *address);
 extern void *shm_toc_allocate(shm_toc *toc, Size nbytes);
 extern Size shm_toc_freespace(shm_toc *toc);
 extern void shm_toc_insert(shm_toc *toc, uint64 key, void *address);
-extern void *shm_toc_lookup(shm_toc *toc, uint64 key);
+extern void *shm_toc_lookup(shm_toc *toc, uint64 key, bool noError);
 
 /*
  * Tools for estimating how large a chunk of shared memory will be needed
index 3e45c75dc0594313b5368f5bdb04f792c1a2e334..f8aef263f72ac886578442a76b441024366dbb69 100644 (file)
@@ -95,7 +95,7 @@ test_shm_mq_main(Datum main_arg)
     * find it.  Our worker number gives our identity: there may be just one
     * worker involved in this parallel operation, or there may be many.
     */
-   hdr = shm_toc_lookup(toc, 0);
+   hdr = shm_toc_lookup(toc, 0, false);
    SpinLockAcquire(&hdr->mutex);
    myworkernumber = ++hdr->workers_attached;
    SpinLockRelease(&hdr->mutex);
@@ -158,10 +158,10 @@ attach_to_queues(dsm_segment *seg, shm_toc *toc, int myworkernumber,
    shm_mq     *inq;
    shm_mq     *outq;
 
-   inq = shm_toc_lookup(toc, myworkernumber);
+   inq = shm_toc_lookup(toc, myworkernumber, false);
    shm_mq_set_receiver(inq, MyProc);
    *inqhp = shm_mq_attach(inq, seg, NULL);
-   outq = shm_toc_lookup(toc, myworkernumber + 1);
+   outq = shm_toc_lookup(toc, myworkernumber + 1, false);
    shm_mq_set_sender(outq, MyProc);
    *outqhp = shm_mq_attach(outq, seg, NULL);
 }