Fix LookupTupleHashEntryHash() pipeline-stall issue.
authorJeff Davis <jdavis@postgresql.org>
Sun, 26 Jul 2020 21:55:52 +0000 (14:55 -0700)
committerJeff Davis <jdavis@postgresql.org>
Sun, 26 Jul 2020 22:09:46 +0000 (15:09 -0700)
Refactor hash lookups in nodeAgg.c to improve performance.

Author: Andres Freund and Jeff Davis
Discussion: https://postgr.es/m/20200612213715.op4ye4q7gktqvpuo%40alap3.anarazel.de
Backpatch-through: 13

src/backend/executor/execGrouping.c
src/backend/executor/nodeAgg.c
src/backend/executor/nodeRecursiveunion.c
src/backend/executor/nodeSetOp.c
src/backend/executor/nodeSubplan.c
src/include/executor/executor.h

index 019b87df21ecb67130cfac76ada78ad738550e1f..321f427e478fcf3748f910b4655882c8302080c9 100644 (file)
 #include "utils/memutils.h"
 
 static int TupleHashTableMatch(struct tuplehash_hash *tb, const MinimalTuple tuple1, const MinimalTuple tuple2);
-static uint32 TupleHashTableHash_internal(struct tuplehash_hash *tb,
-                                         const MinimalTuple tuple);
-static TupleHashEntry LookupTupleHashEntry_internal(TupleHashTable hashtable,
-                                                   TupleTableSlot *slot,
-                                                   bool *isnew, uint32 hash);
+static inline uint32 TupleHashTableHash_internal(struct tuplehash_hash *tb,
+                                                const MinimalTuple tuple);
+static inline TupleHashEntry LookupTupleHashEntry_internal(TupleHashTable hashtable,
+                                                          TupleTableSlot *slot,
+                                                          bool *isnew, uint32 hash);
 
 /*
  * Define parameters for tuple hash table code generation. The interface is
@@ -291,6 +291,9 @@ ResetTupleHashTable(TupleHashTable hashtable)
  * If isnew is NULL, we do not create new entries; we return NULL if no
  * match is found.
  *
+ * If hash is not NULL, we set it to the calculated hash value. This allows
+ * callers access to the hash value even if no entry is returned.
+ *
  * If isnew isn't NULL, then a new entry is created if no existing entry
  * matches.  On return, *isnew is true if the entry is newly created,
  * false if it existed already.  ->additional_data in the new entry has
@@ -298,11 +301,11 @@ ResetTupleHashTable(TupleHashTable hashtable)
  */
 TupleHashEntry
 LookupTupleHashEntry(TupleHashTable hashtable, TupleTableSlot *slot,
-                    bool *isnew)
+                    bool *isnew, uint32 *hash)
 {
    TupleHashEntry entry;
    MemoryContext oldContext;
-   uint32      hash;
+   uint32      local_hash;
 
    /* Need to run the hash functions in short-lived context */
    oldContext = MemoryContextSwitchTo(hashtable->tempcxt);
@@ -312,8 +315,13 @@ LookupTupleHashEntry(TupleHashTable hashtable, TupleTableSlot *slot,
    hashtable->in_hash_funcs = hashtable->tab_hash_funcs;
    hashtable->cur_eq_func = hashtable->tab_eq_func;
 
-   hash = TupleHashTableHash_internal(hashtable->hashtab, NULL);
-   entry = LookupTupleHashEntry_internal(hashtable, slot, isnew, hash);
+   local_hash = TupleHashTableHash_internal(hashtable->hashtab, NULL);
+   entry = LookupTupleHashEntry_internal(hashtable, slot, isnew, local_hash);
+
+   if (hash != NULL)
+       *hash = local_hash;
+
+   Assert(entry == NULL || entry->hash == local_hash);
 
    MemoryContextSwitchTo(oldContext);
 
@@ -362,6 +370,7 @@ LookupTupleHashEntryHash(TupleHashTable hashtable, TupleTableSlot *slot,
    hashtable->cur_eq_func = hashtable->tab_eq_func;
 
    entry = LookupTupleHashEntry_internal(hashtable, slot, isnew, hash);
+   Assert(entry == NULL || entry->hash == hash);
 
    MemoryContextSwitchTo(oldContext);
 
@@ -480,7 +489,7 @@ TupleHashTableHash_internal(struct tuplehash_hash *tb,
  * NB: This function may or may not change the memory context. Caller is
  * expected to change it back.
  */
-static TupleHashEntry
+static inline TupleHashEntry
 LookupTupleHashEntry_internal(TupleHashTable hashtable, TupleTableSlot *slot,
                              bool *isnew, uint32 hash)
 {
index b79c845a6b739d606c63d1bc9b24a39c8155949f..bbfc4af1ec9cb1c581a1a49da4eeee6b24d662f7 100644 (file)
@@ -391,7 +391,9 @@ static void finalize_partialaggregate(AggState *aggstate,
                                      AggStatePerAgg peragg,
                                      AggStatePerGroup pergroupstate,
                                      Datum *resultVal, bool *resultIsNull);
-static void prepare_hash_slot(AggState *aggstate);
+static inline void prepare_hash_slot(AggStatePerHash perhash,
+                                    TupleTableSlot *inputslot,
+                                    TupleTableSlot *hashslot);
 static void prepare_projection_slot(AggState *aggstate,
                                    TupleTableSlot *slot,
                                    int currentSet);
@@ -413,8 +415,9 @@ static int  hash_choose_num_partitions(uint64 input_groups,
                                       double hashentrysize,
                                       int used_bits,
                                       int *log2_npartittions);
-static AggStatePerGroup lookup_hash_entry(AggState *aggstate, uint32 hash,
-                                         bool *in_hash_table);
+static void initialize_hash_entry(AggState *aggstate,
+                                 TupleHashTable hashtable,
+                                 TupleHashEntry entry);
 static void lookup_hash_entries(AggState *aggstate);
 static TupleTableSlot *agg_retrieve_direct(AggState *aggstate);
 static void agg_fill_hash_table(AggState *aggstate);
@@ -1207,12 +1210,11 @@ finalize_partialaggregate(AggState *aggstate,
  * Extract the attributes that make up the grouping key into the
  * hashslot. This is necessary to compute the hash or perform a lookup.
  */
-static void
-prepare_hash_slot(AggState *aggstate)
+static inline void
+prepare_hash_slot(AggStatePerHash perhash,
+                 TupleTableSlot *inputslot,
+                 TupleTableSlot *hashslot)
 {
-   TupleTableSlot *inputslot = aggstate->tmpcontext->ecxt_outertuple;
-   AggStatePerHash perhash = &aggstate->perhash[aggstate->current_set];
-   TupleTableSlot *hashslot = perhash->hashslot;
    int         i;
 
    /* transfer just the needed columns into hashslot */
@@ -2013,75 +2015,39 @@ hash_choose_num_partitions(uint64 input_groups, double hashentrysize,
 }
 
 /*
- * Find or create a hashtable entry for the tuple group containing the current
- * tuple (already set in tmpcontext's outertuple slot), in the current grouping
- * set (which the caller must have selected - note that initialize_aggregate
- * depends on this).
- *
- * When called, CurrentMemoryContext should be the per-query context. The
- * already-calculated hash value for the tuple must be specified.
- *
- * If in "spill mode", then only find existing hashtable entries; don't create
- * new ones. If a tuple's group is not already present in the hash table for
- * the current grouping set, assign *in_hash_table=false and the caller will
- * spill it to disk.
+ * Initialize a freshly-created TupleHashEntry.
  */
-static AggStatePerGroup
-lookup_hash_entry(AggState *aggstate, uint32 hash, bool *in_hash_table)
+static void
+initialize_hash_entry(AggState *aggstate, TupleHashTable hashtable,
+                     TupleHashEntry entry)
 {
-   AggStatePerHash perhash = &aggstate->perhash[aggstate->current_set];
-   TupleTableSlot *hashslot = perhash->hashslot;
-   TupleHashEntryData *entry;
-   bool        isnew = false;
-   bool       *p_isnew;
-
-   /* if hash table already spilled, don't create new entries */
-   p_isnew = aggstate->hash_spill_mode ? NULL : &isnew;
-
-   /* find or create the hashtable entry using the filtered tuple */
-   entry = LookupTupleHashEntryHash(perhash->hashtable, hashslot, p_isnew,
-                                    hash);
-
-   if (entry == NULL)
-   {
-       *in_hash_table = false;
-       return NULL;
-   }
-   else
-       *in_hash_table = true;
-
-   if (isnew)
-   {
-       AggStatePerGroup pergroup;
-       int         transno;
+   AggStatePerGroup pergroup;
+   int         transno;
 
-       aggstate->hash_ngroups_current++;
-       hash_agg_check_limits(aggstate);
+   aggstate->hash_ngroups_current++;
+   hash_agg_check_limits(aggstate);
 
-       /* no need to allocate or initialize per-group state */
-       if (aggstate->numtrans == 0)
-           return NULL;
+   /* no need to allocate or initialize per-group state */
+   if (aggstate->numtrans == 0)
+       return;
 
-       pergroup = (AggStatePerGroup)
-           MemoryContextAlloc(perhash->hashtable->tablecxt,
-                              sizeof(AggStatePerGroupData) * aggstate->numtrans);
+   pergroup = (AggStatePerGroup)
+       MemoryContextAlloc(hashtable->tablecxt,
+                          sizeof(AggStatePerGroupData) * aggstate->numtrans);
 
-       entry->additional = pergroup;
+   entry->additional = pergroup;
 
-       /*
-        * Initialize aggregates for new tuple group, lookup_hash_entries()
-        * already has selected the relevant grouping set.
-        */
-       for (transno = 0; transno < aggstate->numtrans; transno++)
-       {
-           AggStatePerTrans pertrans = &aggstate->pertrans[transno];
-           AggStatePerGroup pergroupstate = &pergroup[transno];
+   /*
+    * Initialize aggregates for new tuple group, lookup_hash_entries()
+    * already has selected the relevant grouping set.
+    */
+   for (transno = 0; transno < aggstate->numtrans; transno++)
+   {
+       AggStatePerTrans pertrans = &aggstate->pertrans[transno];
+       AggStatePerGroup pergroupstate = &pergroup[transno];
 
-           initialize_aggregate(aggstate, pertrans, pergroupstate);
-       }
+       initialize_aggregate(aggstate, pertrans, pergroupstate);
    }
-
-   return entry->additional;
 }
 
 /*
@@ -2106,21 +2072,37 @@ static void
 lookup_hash_entries(AggState *aggstate)
 {
    AggStatePerGroup *pergroup = aggstate->hash_pergroup;
+   TupleTableSlot *outerslot = aggstate->tmpcontext->ecxt_outertuple;
    int         setno;
 
    for (setno = 0; setno < aggstate->num_hashes; setno++)
    {
        AggStatePerHash perhash = &aggstate->perhash[setno];
+       TupleHashTable hashtable = perhash->hashtable;
+       TupleTableSlot *hashslot = perhash->hashslot;
+       TupleHashEntry entry;
        uint32      hash;
-       bool        in_hash_table;
+       bool        isnew = false;
+       bool       *p_isnew;
+
+       /* if hash table already spilled, don't create new entries */
+       p_isnew = aggstate->hash_spill_mode ? NULL : &isnew;
 
        select_current_set(aggstate, setno, true);
-       prepare_hash_slot(aggstate);
-       hash = TupleHashTableHash(perhash->hashtable, perhash->hashslot);
-       pergroup[setno] = lookup_hash_entry(aggstate, hash, &in_hash_table);
+       prepare_hash_slot(perhash,
+                         outerslot,
+                         hashslot);
+
+       entry = LookupTupleHashEntry(hashtable, hashslot,
+                                    p_isnew, &hash);
 
-       /* check to see if we need to spill the tuple for this grouping set */
-       if (!in_hash_table)
+       if (entry != NULL)
+       {
+           if (isnew)
+               initialize_hash_entry(aggstate, hashtable, entry);
+           pergroup[setno] = entry->additional;
+       }
+       else
        {
            HashAggSpill *spill = &aggstate->hash_spills[setno];
            TupleTableSlot *slot = aggstate->tmpcontext->ecxt_outertuple;
@@ -2131,6 +2113,7 @@ lookup_hash_entries(AggState *aggstate)
                                   aggstate->hashentrysize);
 
            hashagg_spill_tuple(aggstate, spill, slot, hash);
+           pergroup[setno] = NULL;
        }
    }
 }
@@ -2588,6 +2571,7 @@ static bool
 agg_refill_hash_table(AggState *aggstate)
 {
    HashAggBatch *batch;
+   AggStatePerHash perhash;
    HashAggSpill spill;
    HashTapeInfo *tapeinfo = aggstate->hash_tapeinfo;
    uint64      ngroups_estimate;
@@ -2639,6 +2623,8 @@ agg_refill_hash_table(AggState *aggstate)
 
    select_current_set(aggstate, batch->setno, true);
 
+   perhash = &aggstate->perhash[aggstate->current_set];
+
    /*
     * Spilled tuples are always read back as MinimalTuples, which may be
     * different from the outer plan, so recompile the aggregate expressions.
@@ -2652,10 +2638,13 @@ agg_refill_hash_table(AggState *aggstate)
                             HASHAGG_READ_BUFFER_SIZE);
    for (;;)
    {
-       TupleTableSlot *slot = aggstate->hash_spill_rslot;
+       TupleTableSlot *spillslot = aggstate->hash_spill_rslot;
+       TupleTableSlot *hashslot = perhash->hashslot;
+       TupleHashEntry entry;
        MinimalTuple tuple;
        uint32      hash;
-       bool        in_hash_table;
+       bool        isnew = false;
+       bool       *p_isnew = aggstate->hash_spill_mode ? NULL : &isnew;
 
        CHECK_FOR_INTERRUPTS();
 
@@ -2663,16 +2652,20 @@ agg_refill_hash_table(AggState *aggstate)
        if (tuple == NULL)
            break;
 
-       ExecStoreMinimalTuple(tuple, slot, true);
-       aggstate->tmpcontext->ecxt_outertuple = slot;
+       ExecStoreMinimalTuple(tuple, spillslot, true);
+       aggstate->tmpcontext->ecxt_outertuple = spillslot;
 
-       prepare_hash_slot(aggstate);
-       aggstate->hash_pergroup[batch->setno] =
-           lookup_hash_entry(aggstate, hash, &in_hash_table);
+       prepare_hash_slot(perhash,
+                         aggstate->tmpcontext->ecxt_outertuple,
+                         hashslot);
+       entry = LookupTupleHashEntryHash(
+                                        perhash->hashtable, hashslot, p_isnew, hash);
 
-       if (in_hash_table)
+       if (entry != NULL)
        {
-           /* Advance the aggregates (or combine functions) */
+           if (isnew)
+               initialize_hash_entry(aggstate, perhash->hashtable, entry);
+           aggstate->hash_pergroup[batch->setno] = entry->additional;
            advance_aggregates(aggstate);
        }
        else
@@ -2688,7 +2681,9 @@ agg_refill_hash_table(AggState *aggstate)
                                   ngroups_estimate, aggstate->hashentrysize);
            }
            /* no memory for a new group, spill */
-           hashagg_spill_tuple(aggstate, &spill, slot, hash);
+           hashagg_spill_tuple(aggstate, &spill, spillslot, hash);
+
+           aggstate->hash_pergroup[batch->setno] = NULL;
        }
 
        /*
index 620414a1edcf9afb5a6edbc44df0dbea4312188a..046242682f017f48a4571a78e74205e86d6a9ecd 100644 (file)
@@ -94,7 +94,7 @@ ExecRecursiveUnion(PlanState *pstate)
            if (plan->numCols > 0)
            {
                /* Find or build hashtable entry for this tuple's group */
-               LookupTupleHashEntry(node->hashtable, slot, &isnew);
+               LookupTupleHashEntry(node->hashtable, slot, &isnew, NULL);
                /* Must reset temp context after each hashtable lookup */
                MemoryContextReset(node->tempContext);
                /* Ignore tuple if already seen */
@@ -141,7 +141,7 @@ ExecRecursiveUnion(PlanState *pstate)
        if (plan->numCols > 0)
        {
            /* Find or build hashtable entry for this tuple's group */
-           LookupTupleHashEntry(node->hashtable, slot, &isnew);
+           LookupTupleHashEntry(node->hashtable, slot, &isnew, NULL);
            /* Must reset temp context after each hashtable lookup */
            MemoryContextReset(node->tempContext);
            /* Ignore tuple if already seen */
index bfd148a41a2416cea719da8157eed0e6b98b4ed9..8d4ccff19cc6222403bd3699f238c193af8d13d8 100644 (file)
@@ -381,7 +381,7 @@ setop_fill_hash_table(SetOpState *setopstate)
 
            /* Find or build hashtable entry for this tuple's group */
            entry = LookupTupleHashEntry(setopstate->hashtable, outerslot,
-                                        &isnew);
+                                        &isnew, NULL);
 
            /* If new tuple group, initialize counts */
            if (isnew)
@@ -402,7 +402,7 @@ setop_fill_hash_table(SetOpState *setopstate)
 
            /* For tuples not seen previously, do not make hashtable entry */
            entry = LookupTupleHashEntry(setopstate->hashtable, outerslot,
-                                        NULL);
+                                        NULL, NULL);
 
            /* Advance the counts if entry is already present */
            if (entry)
index 298b7757f57ccf1c4ec3645223e9dab28e0b87ee..38c2fc0b50b66e31619aafa37d05c44caa4d3b0a 100644 (file)
@@ -595,12 +595,12 @@ buildSubPlanHash(SubPlanState *node, ExprContext *econtext)
         */
        if (slotNoNulls(slot))
        {
-           (void) LookupTupleHashEntry(node->hashtable, slot, &isnew);
+           (void) LookupTupleHashEntry(node->hashtable, slot, &isnew, NULL);
            node->havehashrows = true;
        }
        else if (node->hashnulls)
        {
-           (void) LookupTupleHashEntry(node->hashnulls, slot, &isnew);
+           (void) LookupTupleHashEntry(node->hashnulls, slot, &isnew, NULL);
            node->havenullrows = true;
        }
 
index c7deeac662f6aa17c7cf42bccb605e01d11df412..415e117407c9688e9ca9422efff9aaae221255ca 100644 (file)
@@ -139,7 +139,7 @@ extern TupleHashTable BuildTupleHashTableExt(PlanState *parent,
                                             MemoryContext tempcxt, bool use_variable_hash_iv);
 extern TupleHashEntry LookupTupleHashEntry(TupleHashTable hashtable,
                                           TupleTableSlot *slot,
-                                          bool *isnew);
+                                          bool *isnew, uint32 *hash);
 extern uint32 TupleHashTableHash(TupleHashTable hashtable,
                                 TupleTableSlot *slot);
 extern TupleHashEntry LookupTupleHashEntryHash(TupleHashTable hashtable,