Fix incorrect slot type in BuildTupleHashTableExt
authorDavid Rowley <drowley@postgresql.org>
Tue, 17 Dec 2024 23:05:55 +0000 (12:05 +1300)
committerDavid Rowley <drowley@postgresql.org>
Tue, 17 Dec 2024 23:05:55 +0000 (12:05 +1300)
0f5738202 adjusted the execGrouping.c code so it made use of ExprStates to
generate hash values.  That commit made a wrong assumption that the slot
type to pass to ExecBuildHash32FromAttrs() is always &TTSOpsMinimalTuple.
That's not the case as the slot type depends on the slot type passed to
LookupTupleHashEntry(), which for nodeRecursiveunion.c, could be any of
the current slot types.

Here we fix this by adding a new parameter to BuildTupleHashTableExt()
to allow the slot type to be passed in.  In the case of nodeSubplan.c
and nodeAgg.c the slot type is always &TTSOpsVirtual, so for both of
those cases, it's beneficial to pass the known slot type as that allows
ExecBuildHash32FromAttrs() to skip adding the tuple deform step to the
resulting ExprState.  Another possible fix would have been to have
ExecBuildHash32FromAttrs() set "fetch.kind" to NULL so that
ExecComputeSlotInfo() always determines the EEOP_INNER_FETCHSOME is
required, however, that option isn't favorable as slows down aggregation
and hashed subplan evaluation due to the extra (needless) deform step.

Thanks to Nathan Bossart for bisecting to find the offending commit
based on Paul's report.

Reported-by: Paul Ramsey <pramsey@cleverelephant.ca>
Discussion: https://postgr.es/m/99F064C1-B3EB-4BE7-97D2-D2A0AA487A71@cleverelephant.ca

src/backend/executor/execGrouping.c
src/backend/executor/nodeAgg.c
src/backend/executor/nodeRecursiveunion.c
src/backend/executor/nodeSetOp.c
src/backend/executor/nodeSubplan.c
src/include/executor/executor.h
src/test/regress/expected/with.out
src/test/regress/sql/with.sql

index 9a88fc652494ede8b7a4d0ef8ee704f16e70707e..4a8f72305ce19dc602c587f1c35aaecc71dd50cf 100644 (file)
@@ -135,6 +135,7 @@ execTuplesHashPrepare(int numCols,
 /*
  * Construct an empty TupleHashTable
  *
+ *  inputOps: slot ops for input hash values, or NULL if unknown or not fixed
  *     numCols, keyColIdx: identify the tuple fields to use as lookup key
  *     eqfunctions: equality comparison functions to use
  *     hashfunctions: datatype-specific hashing functions to use
@@ -154,6 +155,7 @@ execTuplesHashPrepare(int numCols,
 TupleHashTable
 BuildTupleHashTableExt(PlanState *parent,
                                           TupleDesc inputDesc,
+                                          const TupleTableSlotOps *inputOps,
                                           int numCols, AttrNumber *keyColIdx,
                                           const Oid *eqfuncoids,
                                           FmgrInfo *hashfunctions,
@@ -225,7 +227,7 @@ BuildTupleHashTableExt(PlanState *parent,
 
        /* build hash ExprState for all columns */
        hashtable->tab_hash_expr = ExecBuildHash32FromAttrs(inputDesc,
-                                                                                                               &TTSOpsMinimalTuple,
+                                                                                                               inputOps,
                                                                                                                hashfunctions,
                                                                                                                collations,
                                                                                                                numCols,
@@ -274,6 +276,7 @@ BuildTupleHashTable(PlanState *parent,
 {
        return BuildTupleHashTableExt(parent,
                                                                  inputDesc,
+                                                                 NULL,
                                                                  numCols, keyColIdx,
                                                                  eqfuncoids,
                                                                  hashfunctions,
index 84d33fdebc64216b05c63ee1644d58d825f010ba..53931c8285319c3a33e330e92182c69870a56c14 100644 (file)
@@ -1520,6 +1520,7 @@ build_hash_table(AggState *aggstate, int setno, long nbuckets)
 
        perhash->hashtable = BuildTupleHashTableExt(&aggstate->ss.ps,
                                                                                                perhash->hashslot->tts_tupleDescriptor,
+                                                                                               perhash->hashslot->tts_ops,
                                                                                                perhash->numCols,
                                                                                                perhash->hashGrpColIdxHash,
                                                                                                perhash->eqfuncoids,
index 22e7b83b2e6e9a11972a1a12b2e9b7c375b4d85e..39be4cdc3b1094c7a44a66db8df4d6c8a541c3b5 100644 (file)
@@ -37,8 +37,10 @@ build_hash_table(RecursiveUnionState *rustate)
        Assert(node->numCols > 0);
        Assert(node->numGroups > 0);
 
+       /* XXX is it worth working a bit harder to determine the inputOps here? */
        rustate->hashtable = BuildTupleHashTableExt(&rustate->ps,
                                                                                                desc,
+                                                                                               NULL,
                                                                                                node->numCols,
                                                                                                node->dupColIdx,
                                                                                                rustate->eqfuncoids,
index a8ac68b4826012dc9965b17ded06bda7bc4d8f52..b40d81f3ffaef6d983a0b707dc7d295d8288d47f 100644 (file)
@@ -128,6 +128,7 @@ build_hash_table(SetOpState *setopstate)
 
        setopstate->hashtable = BuildTupleHashTableExt(&setopstate->ps,
                                                                                                   desc,
+                                                                                                  NULL,
                                                                                                   node->numCols,
                                                                                                   node->dupColIdx,
                                                                                                   setopstate->eqfuncoids,
index f26c883c67dbc5b65b423bea98e36d75542cc753..bb4a0219194a93cfb3f107c3e00183ba4c694477 100644 (file)
@@ -519,6 +519,11 @@ buildSubPlanHash(SubPlanState *node, ExprContext *econtext)
         *
         * If it's not necessary to distinguish FALSE and UNKNOWN, then we don't
         * need to store subplan output rows that contain NULL.
+        *
+        * Because the input slot for each hash table is always the slot resulting
+        * from an ExecProject(), we can use TTSOpsVirtual for the input ops. This
+        * saves a needless fetch inner op step for the hashing ExprState created
+        * in BuildTupleHashTableExt().
         */
        MemoryContextReset(node->hashtablecxt);
        node->havehashrows = false;
@@ -533,6 +538,7 @@ buildSubPlanHash(SubPlanState *node, ExprContext *econtext)
        else
                node->hashtable = BuildTupleHashTableExt(node->parent,
                                                                                                 node->descRight,
+                                                                                                &TTSOpsVirtual,
                                                                                                 ncols,
                                                                                                 node->keyColIdx,
                                                                                                 node->tab_eq_funcoids,
@@ -561,6 +567,7 @@ buildSubPlanHash(SubPlanState *node, ExprContext *econtext)
                else
                        node->hashnulls = BuildTupleHashTableExt(node->parent,
                                                                                                         node->descRight,
+                                                                                                        &TTSOpsVirtual,
                                                                                                         ncols,
                                                                                                         node->keyColIdx,
                                                                                                         node->tab_eq_funcoids,
index 494ec4f2e5d2c7dd34abfb2da801381145bc569a..c8e6befca88aefbb275c759d27eb2f14a23cc4ef 100644 (file)
@@ -140,6 +140,7 @@ extern TupleHashTable BuildTupleHashTable(PlanState *parent,
                                                                                  MemoryContext tempcxt, bool use_variable_hash_iv);
 extern TupleHashTable BuildTupleHashTableExt(PlanState *parent,
                                                                                         TupleDesc inputDesc,
+                                                                                        const TupleTableSlotOps *inputOps,
                                                                                         int numCols, AttrNumber *keyColIdx,
                                                                                         const Oid *eqfuncoids,
                                                                                         FmgrInfo *hashfunctions,
index 08cfa5463fbcc6158a3c1c8b94610466112073dc..ff9754603bdd1c6bf037ba70408b5df0dca5ac68 100644 (file)
@@ -329,6 +329,26 @@ SELECT * FROM subdepartment ORDER BY name;
   1 |                 0 | A
 (1 row)
 
+-- exercise the deduplication code of a UNION with mixed input slot types
+WITH RECURSIVE subdepartment AS
+(
+       -- select all columns to prevent projection
+       SELECT id, parent_department, name FROM department WHERE name = 'A'
+       UNION
+       -- joins do projection
+       SELECT d.id, d.parent_department, d.name FROM department AS d
+       INNER JOIN subdepartment AS sd ON d.parent_department = sd.id
+)
+SELECT * FROM subdepartment ORDER BY name;
+ id | parent_department | name 
+----+-------------------+------
+  1 |                 0 | A
+  2 |                 1 | B
+  3 |                 2 | C
+  4 |                 2 | D
+  6 |                 4 | F
+(5 rows)
+
 -- inside subqueries
 SELECT count(*) FROM (
     WITH RECURSIVE t(n) AS (
index 8f6e6c0b40567100d1d131d496cc2dfde5aea74a..aca7bae6dddd4b91950214b5e1b5b634993fa513 100644 (file)
@@ -216,6 +216,20 @@ WITH RECURSIVE subdepartment AS
 )
 SELECT * FROM subdepartment ORDER BY name;
 
+-- exercise the deduplication code of a UNION with mixed input slot types
+WITH RECURSIVE subdepartment AS
+(
+       -- select all columns to prevent projection
+       SELECT id, parent_department, name FROM department WHERE name = 'A'
+
+       UNION
+
+       -- joins do projection
+       SELECT d.id, d.parent_department, d.name FROM department AS d
+       INNER JOIN subdepartment AS sd ON d.parent_department = sd.id
+)
+SELECT * FROM subdepartment ORDER BY name;
+
 -- inside subqueries
 SELECT count(*) FROM (
     WITH RECURSIVE t(n) AS (