TupleHashTable: store additional data along with tuple.
authorJeff Davis <jdavis@postgresql.org>
Sat, 11 Jan 2025 01:14:37 +0000 (17:14 -0800)
committerJeff Davis <jdavis@postgresql.org>
Sat, 11 Jan 2025 01:14:37 +0000 (17:14 -0800)
Previously, the caller needed to allocate the memory and the
TupleHashTable would store a pointer to it. That wastes space for the
palloc overhead as well as the size of the pointer itself.

Now, the TupleHashTable relies on the caller to correctly specify the
additionalsize, and allocates that amount of space. The caller can
then request a pointer into that space.

Discussion: https://postgr.es/m/b9cbf0219a9859dc8d240311643ff4362fd9602c.camel@j-davis.com
Reviewed-by: Heikki Linnakangas
src/backend/executor/execGrouping.c
src/backend/executor/nodeAgg.c
src/backend/executor/nodeSetOp.c
src/backend/executor/nodeSubplan.c
src/include/executor/executor.h
src/include/nodes/execnodes.h

index 33b124fbb0a5014c451a83ef27325775d18c0b9d..40f1776a7bea0db15de663744176761426224352 100644 (file)
 #include "miscadmin.h"
 #include "utils/lsyscache.h"
 
+typedef struct TupleHashEntryData
+{
+   MinimalTuple firstTuple;    /* copy of first tuple in this group */
+   uint32      status;         /* hash status */
+   uint32      hash;           /* hash value (cached) */
+} TupleHashEntryData;
+
 static int TupleHashTableMatch(struct tuplehash_hash *tb, const MinimalTuple tuple1, const MinimalTuple tuple2);
 static inline uint32 TupleHashTableHash_internal(struct tuplehash_hash *tb,
                                                 const MinimalTuple tuple);
@@ -196,6 +203,7 @@ BuildTupleHashTable(PlanState *parent,
    hashtable->tab_collations = collations;
    hashtable->tablecxt = tablecxt;
    hashtable->tempcxt = tempcxt;
+   hashtable->additionalsize = additionalsize;
    hashtable->tableslot = NULL;    /* will be made on first lookup */
    hashtable->inputslot = NULL;
    hashtable->in_hash_expr = NULL;
@@ -273,6 +281,15 @@ ResetTupleHashTable(TupleHashTable hashtable)
    tuplehash_reset(hashtable->hashtab);
 }
 
+/*
+ * Return size of the hash bucket. Useful for estimating memory usage.
+ */
+size_t
+TupleHashEntrySize(void)
+{
+   return sizeof(TupleHashEntryData);
+}
+
 /*
  * Find or create a hashtable entry for the tuple group containing the
  * given tuple.  The tuple must be the same type as the hashtable entries.
@@ -339,6 +356,24 @@ TupleHashTableHash(TupleHashTable hashtable, TupleTableSlot *slot)
    return hash;
 }
 
+MinimalTuple
+TupleHashEntryGetTuple(TupleHashEntry entry)
+{
+   return entry->firstTuple;
+}
+
+/*
+ * Get a pointer into the additional space allocated for this entry. The
+ * amount of space available is the additionalsize specified to
+ * BuildTupleHashTable(). If additionalsize was specified as zero, no
+ * additional space is available and this function should not be called.
+ */
+void *
+TupleHashEntryGetAdditional(TupleHashEntry entry)
+{
+   return (char *) entry->firstTuple + MAXALIGN(entry->firstTuple->t_len);
+}
+
 /*
  * A variant of LookupTupleHashEntry for callers that have already computed
  * the hash value.
@@ -477,13 +512,31 @@ LookupTupleHashEntry_internal(TupleHashTable hashtable, TupleTableSlot *slot,
        }
        else
        {
+           MinimalTuple firstTuple;
+           size_t totalsize; /* including alignment and additionalsize */
+
            /* created new entry */
            *isnew = true;
            /* zero caller data */
-           entry->additional = NULL;
            MemoryContextSwitchTo(hashtable->tablecxt);
+
            /* Copy the first tuple into the table context */
-           entry->firstTuple = ExecCopySlotMinimalTuple(slot);
+           firstTuple = ExecCopySlotMinimalTuple(slot);
+
+           /*
+            * Allocate additional space right after the MinimalTuple of size
+            * additionalsize. The caller can get a pointer to this data with
+            * TupleHashEntryGetAdditional(), and store arbitrary data there.
+            *
+            * This avoids the need to store an extra pointer or allocate an
+            * additional chunk, which would waste memory.
+            */
+           totalsize = MAXALIGN(firstTuple->t_len) + hashtable->additionalsize;
+           firstTuple = repalloc(firstTuple, totalsize);
+           memset((char *) firstTuple + firstTuple->t_len, 0,
+                  totalsize - firstTuple->t_len);
+
+           entry->firstTuple = firstTuple;
        }
    }
    else
index 3005b5c0e3b89ee38a4ba2288d61c60676232df6..19640752967667410a98959105c2ce2e51e9eb62 100644 (file)
@@ -1713,7 +1713,7 @@ hash_agg_entry_size(int numTrans, Size tupleWidth, Size transitionSpace)
        transitionChunkSize = 0;
 
    return
-       sizeof(TupleHashEntryData) +
+       TupleHashEntrySize() +
        tupleChunkSize +
        pergroupChunkSize +
        transitionChunkSize;
@@ -1954,7 +1954,7 @@ hash_agg_update_metrics(AggState *aggstate, bool from_tape, int npartitions)
    if (aggstate->hash_ngroups_current > 0)
    {
        aggstate->hashentrysize =
-           sizeof(TupleHashEntryData) +
+           TupleHashEntrySize() +
            (hashkey_mem / (double) aggstate->hash_ngroups_current);
    }
 }
@@ -2055,11 +2055,7 @@ initialize_hash_entry(AggState *aggstate, TupleHashTable hashtable,
    if (aggstate->numtrans == 0)
        return;
 
-   pergroup = (AggStatePerGroup)
-       MemoryContextAlloc(hashtable->tablecxt,
-                          sizeof(AggStatePerGroupData) * aggstate->numtrans);
-
-   entry->additional = pergroup;
+   pergroup = (AggStatePerGroup) TupleHashEntryGetAdditional(entry);
 
    /*
     * Initialize aggregates for new tuple group, lookup_hash_entries()
@@ -2123,7 +2119,7 @@ lookup_hash_entries(AggState *aggstate)
        {
            if (isnew)
                initialize_hash_entry(aggstate, hashtable, entry);
-           pergroup[setno] = entry->additional;
+           pergroup[setno] = TupleHashEntryGetAdditional(entry);
        }
        else
        {
@@ -2681,7 +2677,7 @@ agg_refill_hash_table(AggState *aggstate)
        {
            if (isnew)
                initialize_hash_entry(aggstate, perhash->hashtable, entry);
-           aggstate->hash_pergroup[batch->setno] = entry->additional;
+           aggstate->hash_pergroup[batch->setno] = TupleHashEntryGetAdditional(entry);
            advance_aggregates(aggstate);
        }
        else
@@ -2773,7 +2769,7 @@ agg_retrieve_hash_table_in_memory(AggState *aggstate)
    ExprContext *econtext;
    AggStatePerAgg peragg;
    AggStatePerGroup pergroup;
-   TupleHashEntryData *entry;
+   TupleHashEntry entry;
    TupleTableSlot *firstSlot;
    TupleTableSlot *result;
    AggStatePerHash perhash;
@@ -2845,7 +2841,7 @@ agg_retrieve_hash_table_in_memory(AggState *aggstate)
         * Transform representative tuple back into one with the right
         * columns.
         */
-       ExecStoreMinimalTuple(entry->firstTuple, hashslot, false);
+       ExecStoreMinimalTuple(TupleHashEntryGetTuple(entry), hashslot, false);
        slot_getallattrs(hashslot);
 
        ExecClearTuple(firstSlot);
@@ -2861,7 +2857,7 @@ agg_retrieve_hash_table_in_memory(AggState *aggstate)
        }
        ExecStoreVirtualTuple(firstSlot);
 
-       pergroup = (AggStatePerGroup) entry->additional;
+       pergroup = (AggStatePerGroup) TupleHashEntryGetAdditional(entry);
 
        /*
         * Use the representative input tuple for any references to
index 5b7ff9c374850db6ab451252bbada724a39d39bc..2a50d56fc8798c4ff048e64398d120792b24ce68 100644 (file)
@@ -425,6 +425,7 @@ setop_fill_hash_table(SetOpState *setopstate)
    {
        TupleTableSlot *outerslot;
        TupleHashEntryData *entry;
+       SetOpStatePerGroup pergroup;
        bool        isnew;
 
        outerslot = ExecProcNode(outerPlan);
@@ -437,16 +438,16 @@ setop_fill_hash_table(SetOpState *setopstate)
                                     outerslot,
                                     &isnew, NULL);
 
+       pergroup = TupleHashEntryGetAdditional(entry);
        /* If new tuple group, initialize counts to zero */
        if (isnew)
        {
-           entry->additional = (SetOpStatePerGroup)
-               MemoryContextAllocZero(setopstate->hashtable->tablecxt,
-                                      sizeof(SetOpStatePerGroupData));
+           pergroup->numLeft = 0;
+           pergroup->numRight = 0;
        }
 
        /* Advance the counts */
-       ((SetOpStatePerGroup) entry->additional)->numLeft++;
+       pergroup->numLeft++;
 
        /* Must reset expression context after each hashtable lookup */
        ResetExprContext(econtext);
@@ -478,7 +479,7 @@ setop_fill_hash_table(SetOpState *setopstate)
 
            /* Advance the counts if entry is already present */
            if (entry)
-               ((SetOpStatePerGroup) entry->additional)->numRight++;
+               ((SetOpStatePerGroup) TupleHashEntryGetAdditional(entry))->numRight++;
 
            /* Must reset expression context after each hashtable lookup */
            ResetExprContext(econtext);
@@ -496,7 +497,7 @@ setop_fill_hash_table(SetOpState *setopstate)
 static TupleTableSlot *
 setop_retrieve_hash_table(SetOpState *setopstate)
 {
-   TupleHashEntryData *entry;
+   TupleHashEntry entry;
    TupleTableSlot *resultTupleSlot;
 
    /*
@@ -526,12 +527,12 @@ setop_retrieve_hash_table(SetOpState *setopstate)
         * See if we should emit any copies of this tuple, and if so return
         * the first copy.
         */
-       set_output_count(setopstate, (SetOpStatePerGroup) entry->additional);
+       set_output_count(setopstate, (SetOpStatePerGroup) TupleHashEntryGetAdditional(entry));
 
        if (setopstate->numOutput > 0)
        {
            setopstate->numOutput--;
-           return ExecStoreMinimalTuple(entry->firstTuple,
+           return ExecStoreMinimalTuple(TupleHashEntryGetTuple(entry),
                                         resultTupleSlot,
                                         false);
        }
index 49767ed6a52af813cb4efb72f65ad33a4f842d29..f7f6fc2da0b95b48f7159fb976505dd00d6d76d0 100644 (file)
@@ -753,7 +753,7 @@ findPartialMatch(TupleHashTable hashtable, TupleTableSlot *slot,
    {
        CHECK_FOR_INTERRUPTS();
 
-       ExecStoreMinimalTuple(entry->firstTuple, hashtable->tableslot, false);
+       ExecStoreMinimalTuple(TupleHashEntryGetTuple(entry), hashtable->tableslot, false);
        if (!execTuplesUnequal(slot, hashtable->tableslot,
                               numCols, keyColIdx,
                               eqfunctions,
index f8a8d03e5337599eedc0c815f1c202004411b427..a49c32072e3ab37fd2a9252cb571b263bfcbdc44 100644 (file)
@@ -148,9 +148,12 @@ extern TupleHashEntry LookupTupleHashEntry(TupleHashTable hashtable,
                                           bool *isnew, uint32 *hash);
 extern uint32 TupleHashTableHash(TupleHashTable hashtable,
                                 TupleTableSlot *slot);
+extern size_t TupleHashEntrySize(void);
 extern TupleHashEntry LookupTupleHashEntryHash(TupleHashTable hashtable,
                                               TupleTableSlot *slot,
                                               bool *isnew, uint32 hash);
+extern MinimalTuple TupleHashEntryGetTuple(TupleHashEntry entry);
+extern void *TupleHashEntryGetAdditional(TupleHashEntry entry);
 extern TupleHashEntry FindTupleHashEntry(TupleHashTable hashtable,
                                         TupleTableSlot *slot,
                                         ExprState *eqcomp,
index b3f7aa299f57ab6f238bbebb52b1d8b416685a65..7d5871d6fac7a4a0a21098b9279ed10b7039c013 100644 (file)
@@ -806,17 +806,10 @@ typedef struct ExecAuxRowMark
  * point to tab_hash_expr and tab_eq_func respectively.
  * ----------------------------------------------------------------
  */
+typedef struct TupleHashEntryData TupleHashEntryData;
 typedef struct TupleHashEntryData *TupleHashEntry;
 typedef struct TupleHashTableData *TupleHashTable;
 
-typedef struct TupleHashEntryData
-{
-   MinimalTuple firstTuple;    /* copy of first tuple in this group */
-   void       *additional;     /* user data */
-   uint32      status;         /* hash status */
-   uint32      hash;           /* hash value (cached) */
-} TupleHashEntryData;
-
 /* define parameters necessary to generate the tuple hash table interface */
 #define SH_PREFIX tuplehash
 #define SH_ELEMENT_TYPE TupleHashEntryData
@@ -835,6 +828,7 @@ typedef struct TupleHashTableData
    Oid        *tab_collations; /* collations for hash and comparison */
    MemoryContext tablecxt;     /* memory context containing table */
    MemoryContext tempcxt;      /* context for function evaluations */
+   Size        additionalsize; /* size of additional data */
    TupleTableSlot *tableslot;  /* slot for referencing table entries */
    /* The following fields are set transiently for each table search: */
    TupleTableSlot *inputslot;  /* current input tuple's slot */