TupleHashTable: store additional data along with tuple.

author Jeff Davis <jdavis@postgresql.org>

Sat, 11 Jan 2025 01:14:37 +0000 (17:14 -0800)

committer Jeff Davis <jdavis@postgresql.org>

Sat, 11 Jan 2025 01:14:37 +0000 (17:14 -0800)
author Jeff Davis <jdavis@postgresql.org>
Sat, 11 Jan 2025 01:14:37 +0000 (17:14 -0800)
committer Jeff Davis <jdavis@postgresql.org>
Sat, 11 Jan 2025 01:14:37 +0000 (17:14 -0800)
diff --git a/src/backend/executor/execGrouping.c b/src/backend/executor/execGrouping.c

index 33b124fbb0a5014c451a83ef27325775d18c0b9d..40f1776a7bea0db15de663744176761426224352 100644 (file)
--- a/src/backend/executor/execGrouping.c
+++ b/src/backend/executor/execGrouping.c
@@ -20,6 +20,13 @@
  #include "miscadmin.h"
  #include "utils/lsyscache.h"
  
+typedef struct TupleHashEntryData
+{
+   MinimalTuple firstTuple;    /* copy of first tuple in this group */
+   uint32      status;         /* hash status */
+   uint32      hash;           /* hash value (cached) */
+} TupleHashEntryData;
+
  static int TupleHashTableMatch(struct tuplehash_hash *tb, const MinimalTuple tuple1, const MinimalTuple tuple2);
  static inline uint32 TupleHashTableHash_internal(struct tuplehash_hash *tb,
                                                  const MinimalTuple tuple);
@@ -196,6 +203,7 @@ BuildTupleHashTable(PlanState *parent,
     hashtable->tab_collations = collations;
     hashtable->tablecxt = tablecxt;
     hashtable->tempcxt = tempcxt;
+   hashtable->additionalsize = additionalsize;
     hashtable->tableslot = NULL;    /* will be made on first lookup */
     hashtable->inputslot = NULL;
     hashtable->in_hash_expr = NULL;
@@ -273,6 +281,15 @@ ResetTupleHashTable(TupleHashTable hashtable)
     tuplehash_reset(hashtable->hashtab);
  }
  
+/*
+ * Return size of the hash bucket. Useful for estimating memory usage.
+ */
+size_t
+TupleHashEntrySize(void)
+{
+   return sizeof(TupleHashEntryData);
+}
+
  /*
   * Find or create a hashtable entry for the tuple group containing the
   * given tuple.  The tuple must be the same type as the hashtable entries.
@@ -339,6 +356,24 @@ TupleHashTableHash(TupleHashTable hashtable, TupleTableSlot *slot)
     return hash;
  }
  
+MinimalTuple
+TupleHashEntryGetTuple(TupleHashEntry entry)
+{
+   return entry->firstTuple;
+}
+
+/*
+ * Get a pointer into the additional space allocated for this entry. The
+ * amount of space available is the additionalsize specified to
+ * BuildTupleHashTable(). If additionalsize was specified as zero, no
+ * additional space is available and this function should not be called.
+ */
+void *
+TupleHashEntryGetAdditional(TupleHashEntry entry)
+{
+   return (char *) entry->firstTuple + MAXALIGN(entry->firstTuple->t_len);
+}
+
  /*
   * A variant of LookupTupleHashEntry for callers that have already computed
   * the hash value.
@@ -477,13 +512,31 @@ LookupTupleHashEntry_internal(TupleHashTable hashtable, TupleTableSlot *slot,
         }
         else
         {
+           MinimalTuple firstTuple;
+           size_t totalsize; /* including alignment and additionalsize */
+
             /* created new entry */
             *isnew = true;
             /* zero caller data */
-           entry->additional = NULL;
             MemoryContextSwitchTo(hashtable->tablecxt);
+
             /* Copy the first tuple into the table context */
-           entry->firstTuple = ExecCopySlotMinimalTuple(slot);
+           firstTuple = ExecCopySlotMinimalTuple(slot);
+
+           /*
+            * Allocate additional space right after the MinimalTuple of size
+            * additionalsize. The caller can get a pointer to this data with
+            * TupleHashEntryGetAdditional(), and store arbitrary data there.
+            *
+            * This avoids the need to store an extra pointer or allocate an
+            * additional chunk, which would waste memory.
+            */
+           totalsize = MAXALIGN(firstTuple->t_len) + hashtable->additionalsize;
+           firstTuple = repalloc(firstTuple, totalsize);
+           memset((char *) firstTuple + firstTuple->t_len, 0,
+                  totalsize - firstTuple->t_len);
+
+           entry->firstTuple = firstTuple;
         }
     }
     else
diff --git a/src/backend/executor/nodeAgg.c b/src/backend/executor/nodeAgg.c

index 3005b5c0e3b89ee38a4ba2288d61c60676232df6..19640752967667410a98959105c2ce2e51e9eb62 100644 (file)
--- a/src/backend/executor/nodeAgg.c
+++ b/src/backend/executor/nodeAgg.c
@@ -1713,7 +1713,7 @@ hash_agg_entry_size(int numTrans, Size tupleWidth, Size transitionSpace)
         transitionChunkSize = 0;
  
     return
-       sizeof(TupleHashEntryData) +
+       TupleHashEntrySize() +
         tupleChunkSize +
         pergroupChunkSize +
         transitionChunkSize;
@@ -1954,7 +1954,7 @@ hash_agg_update_metrics(AggState *aggstate, bool from_tape, int npartitions)
     if (aggstate->hash_ngroups_current > 0)
     {
         aggstate->hashentrysize =
-           sizeof(TupleHashEntryData) +
+           TupleHashEntrySize() +
             (hashkey_mem / (double) aggstate->hash_ngroups_current);
     }
  }
@@ -2055,11 +2055,7 @@ initialize_hash_entry(AggState *aggstate, TupleHashTable hashtable,
     if (aggstate->numtrans == 0)
         return;
  
-   pergroup = (AggStatePerGroup)
-       MemoryContextAlloc(hashtable->tablecxt,
-                          sizeof(AggStatePerGroupData) * aggstate->numtrans);
-
-   entry->additional = pergroup;
+   pergroup = (AggStatePerGroup) TupleHashEntryGetAdditional(entry);
  
     /*
      * Initialize aggregates for new tuple group, lookup_hash_entries()
@@ -2123,7 +2119,7 @@ lookup_hash_entries(AggState *aggstate)
         {
             if (isnew)
                 initialize_hash_entry(aggstate, hashtable, entry);
-           pergroup[setno] = entry->additional;
+           pergroup[setno] = TupleHashEntryGetAdditional(entry);
         }
         else
         {
@@ -2681,7 +2677,7 @@ agg_refill_hash_table(AggState *aggstate)
         {
             if (isnew)
                 initialize_hash_entry(aggstate, perhash->hashtable, entry);
-           aggstate->hash_pergroup[batch->setno] = entry->additional;
+           aggstate->hash_pergroup[batch->setno] = TupleHashEntryGetAdditional(entry);
             advance_aggregates(aggstate);
         }
         else
@@ -2773,7 +2769,7 @@ agg_retrieve_hash_table_in_memory(AggState *aggstate)
     ExprContext *econtext;
     AggStatePerAgg peragg;
     AggStatePerGroup pergroup;
-   TupleHashEntryData *entry;
+   TupleHashEntry entry;
     TupleTableSlot *firstSlot;
     TupleTableSlot *result;
     AggStatePerHash perhash;
@@ -2845,7 +2841,7 @@ agg_retrieve_hash_table_in_memory(AggState *aggstate)
          * Transform representative tuple back into one with the right
          * columns.
          */
-       ExecStoreMinimalTuple(entry->firstTuple, hashslot, false);
+       ExecStoreMinimalTuple(TupleHashEntryGetTuple(entry), hashslot, false);
         slot_getallattrs(hashslot);
  
         ExecClearTuple(firstSlot);
@@ -2861,7 +2857,7 @@ agg_retrieve_hash_table_in_memory(AggState *aggstate)
         }
         ExecStoreVirtualTuple(firstSlot);
  
-       pergroup = (AggStatePerGroup) entry->additional;
+       pergroup = (AggStatePerGroup) TupleHashEntryGetAdditional(entry);
  
         /*
          * Use the representative input tuple for any references to
diff --git a/src/backend/executor/nodeSetOp.c b/src/backend/executor/nodeSetOp.c

index 5b7ff9c374850db6ab451252bbada724a39d39bc..2a50d56fc8798c4ff048e64398d120792b24ce68 100644 (file)
--- a/src/backend/executor/nodeSetOp.c
+++ b/src/backend/executor/nodeSetOp.c
@@ -425,6 +425,7 @@ setop_fill_hash_table(SetOpState *setopstate)
     {
         TupleTableSlot *outerslot;
         TupleHashEntryData *entry;
+       SetOpStatePerGroup pergroup;
         bool        isnew;
  
         outerslot = ExecProcNode(outerPlan);
@@ -437,16 +438,16 @@ setop_fill_hash_table(SetOpState *setopstate)
                                      outerslot,
                                      &isnew, NULL);
  
+       pergroup = TupleHashEntryGetAdditional(entry);
         /* If new tuple group, initialize counts to zero */
         if (isnew)
         {
-           entry->additional = (SetOpStatePerGroup)
-               MemoryContextAllocZero(setopstate->hashtable->tablecxt,
-                                      sizeof(SetOpStatePerGroupData));
+           pergroup->numLeft = 0;
+           pergroup->numRight = 0;
         }
  
         /* Advance the counts */
-       ((SetOpStatePerGroup) entry->additional)->numLeft++;
+       pergroup->numLeft++;
  
         /* Must reset expression context after each hashtable lookup */
         ResetExprContext(econtext);
@@ -478,7 +479,7 @@ setop_fill_hash_table(SetOpState *setopstate)
  
             /* Advance the counts if entry is already present */
             if (entry)
-               ((SetOpStatePerGroup) entry->additional)->numRight++;
+               ((SetOpStatePerGroup) TupleHashEntryGetAdditional(entry))->numRight++;
  
             /* Must reset expression context after each hashtable lookup */
             ResetExprContext(econtext);
@@ -496,7 +497,7 @@ setop_fill_hash_table(SetOpState *setopstate)
  static TupleTableSlot *
  setop_retrieve_hash_table(SetOpState *setopstate)
  {
-   TupleHashEntryData *entry;
+   TupleHashEntry entry;
     TupleTableSlot *resultTupleSlot;
  
     /*
@@ -526,12 +527,12 @@ setop_retrieve_hash_table(SetOpState *setopstate)
          * See if we should emit any copies of this tuple, and if so return
          * the first copy.
          */
-       set_output_count(setopstate, (SetOpStatePerGroup) entry->additional);
+       set_output_count(setopstate, (SetOpStatePerGroup) TupleHashEntryGetAdditional(entry));
  
         if (setopstate->numOutput > 0)
         {
             setopstate->numOutput--;
-           return ExecStoreMinimalTuple(entry->firstTuple,
+           return ExecStoreMinimalTuple(TupleHashEntryGetTuple(entry),
                                          resultTupleSlot,
                                          false);
         }
diff --git a/src/backend/executor/nodeSubplan.c b/src/backend/executor/nodeSubplan.c

index 49767ed6a52af813cb4efb72f65ad33a4f842d29..f7f6fc2da0b95b48f7159fb976505dd00d6d76d0 100644 (file)
--- a/src/backend/executor/nodeSubplan.c
+++ b/src/backend/executor/nodeSubplan.c
@@ -753,7 +753,7 @@ findPartialMatch(TupleHashTable hashtable, TupleTableSlot *slot,
     {
         CHECK_FOR_INTERRUPTS();
  
-       ExecStoreMinimalTuple(entry->firstTuple, hashtable->tableslot, false);
+       ExecStoreMinimalTuple(TupleHashEntryGetTuple(entry), hashtable->tableslot, false);
         if (!execTuplesUnequal(slot, hashtable->tableslot,
                                numCols, keyColIdx,
                                eqfunctions,
diff --git a/src/include/executor/executor.h b/src/include/executor/executor.h

index f8a8d03e5337599eedc0c815f1c202004411b427..a49c32072e3ab37fd2a9252cb571b263bfcbdc44 100644 (file)
--- a/src/include/executor/executor.h
+++ b/src/include/executor/executor.h
@@ -148,9 +148,12 @@ extern TupleHashEntry LookupTupleHashEntry(TupleHashTable hashtable,
                                            bool *isnew, uint32 *hash);
  extern uint32 TupleHashTableHash(TupleHashTable hashtable,
                                  TupleTableSlot *slot);
+extern size_t TupleHashEntrySize(void);
  extern TupleHashEntry LookupTupleHashEntryHash(TupleHashTable hashtable,
                                                TupleTableSlot *slot,
                                                bool *isnew, uint32 hash);
+extern MinimalTuple TupleHashEntryGetTuple(TupleHashEntry entry);
+extern void *TupleHashEntryGetAdditional(TupleHashEntry entry);
  extern TupleHashEntry FindTupleHashEntry(TupleHashTable hashtable,
                                          TupleTableSlot *slot,
                                          ExprState *eqcomp,
diff --git a/src/include/nodes/execnodes.h b/src/include/nodes/execnodes.h

index b3f7aa299f57ab6f238bbebb52b1d8b416685a65..7d5871d6fac7a4a0a21098b9279ed10b7039c013 100644 (file)
--- a/src/include/nodes/execnodes.h
+++ b/src/include/nodes/execnodes.h
@@ -806,17 +806,10 @@ typedef struct ExecAuxRowMark
   * point to tab_hash_expr and tab_eq_func respectively.
   * ----------------------------------------------------------------
   */
+typedef struct TupleHashEntryData TupleHashEntryData;
  typedef struct TupleHashEntryData *TupleHashEntry;
  typedef struct TupleHashTableData *TupleHashTable;
  
-typedef struct TupleHashEntryData
-{
-   MinimalTuple firstTuple;    /* copy of first tuple in this group */
-   void       *additional;     /* user data */
-   uint32      status;         /* hash status */
-   uint32      hash;           /* hash value (cached) */
-} TupleHashEntryData;
-
  /* define parameters necessary to generate the tuple hash table interface */
  #define SH_PREFIX tuplehash
  #define SH_ELEMENT_TYPE TupleHashEntryData
@@ -835,6 +828,7 @@ typedef struct TupleHashTableData
     Oid        *tab_collations; /* collations for hash and comparison */
     MemoryContext tablecxt;     /* memory context containing table */
     MemoryContext tempcxt;      /* context for function evaluations */
+   Size        additionalsize; /* size of additional data */
     TupleTableSlot *tableslot;  /* slot for referencing table entries */
     /* The following fields are set transiently for each table search: */
     TupleTableSlot *inputslot;  /* current input tuple's slot */
author	Jeff Davis <jdavis@postgresql.org>
	Sat, 11 Jan 2025 01:14:37 +0000 (17:14 -0800)
committer	Jeff Davis <jdavis@postgresql.org>
	Sat, 11 Jan 2025 01:14:37 +0000 (17:14 -0800)
src/backend/executor/execGrouping.c		patch \| blob \| blame \| history
src/backend/executor/nodeAgg.c		patch \| blob \| blame \| history
src/backend/executor/nodeSetOp.c		patch \| blob \| blame \| history
src/backend/executor/nodeSubplan.c		patch \| blob \| blame \| history
src/include/executor/executor.h		patch \| blob \| blame \| history
src/include/nodes/execnodes.h		patch \| blob \| blame \| history