0f5738202 adjusted the execGrouping.c code so it made use of ExprStates to
generate hash values. That commit made a wrong assumption that the slot
type to pass to ExecBuildHash32FromAttrs() is always &TTSOpsMinimalTuple.
That's not the case as the slot type depends on the slot type passed to
LookupTupleHashEntry(), which for nodeRecursiveunion.c, could be any of
the current slot types.
Here we fix this by adding a new parameter to BuildTupleHashTableExt()
to allow the slot type to be passed in. In the case of nodeSubplan.c
and nodeAgg.c the slot type is always &TTSOpsVirtual, so for both of
those cases, it's beneficial to pass the known slot type as that allows
ExecBuildHash32FromAttrs() to skip adding the tuple deform step to the
resulting ExprState. Another possible fix would have been to have
ExecBuildHash32FromAttrs() set "fetch.kind" to NULL so that
ExecComputeSlotInfo() always determines the EEOP_INNER_FETCHSOME is
required, however, that option isn't favorable as slows down aggregation
and hashed subplan evaluation due to the extra (needless) deform step.
Thanks to Nathan Bossart for bisecting to find the offending commit
based on Paul's report.
Reported-by: Paul Ramsey <pramsey@cleverelephant.ca>
Discussion: https://postgr.es/m/
99F064C1-B3EB-4BE7-97D2-
D2A0AA487A71@cleverelephant.ca
/*
* Construct an empty TupleHashTable
*
+ * inputOps: slot ops for input hash values, or NULL if unknown or not fixed
* numCols, keyColIdx: identify the tuple fields to use as lookup key
* eqfunctions: equality comparison functions to use
* hashfunctions: datatype-specific hashing functions to use
TupleHashTable
BuildTupleHashTableExt(PlanState *parent,
TupleDesc inputDesc,
+ const TupleTableSlotOps *inputOps,
int numCols, AttrNumber *keyColIdx,
const Oid *eqfuncoids,
FmgrInfo *hashfunctions,
/* build hash ExprState for all columns */
hashtable->tab_hash_expr = ExecBuildHash32FromAttrs(inputDesc,
- &TTSOpsMinimalTuple,
+ inputOps,
hashfunctions,
collations,
numCols,
{
return BuildTupleHashTableExt(parent,
inputDesc,
+ NULL,
numCols, keyColIdx,
eqfuncoids,
hashfunctions,
perhash->hashtable = BuildTupleHashTableExt(&aggstate->ss.ps,
perhash->hashslot->tts_tupleDescriptor,
+ perhash->hashslot->tts_ops,
perhash->numCols,
perhash->hashGrpColIdxHash,
perhash->eqfuncoids,
Assert(node->numCols > 0);
Assert(node->numGroups > 0);
+ /* XXX is it worth working a bit harder to determine the inputOps here? */
rustate->hashtable = BuildTupleHashTableExt(&rustate->ps,
desc,
+ NULL,
node->numCols,
node->dupColIdx,
rustate->eqfuncoids,
setopstate->hashtable = BuildTupleHashTableExt(&setopstate->ps,
desc,
+ NULL,
node->numCols,
node->dupColIdx,
setopstate->eqfuncoids,
*
* If it's not necessary to distinguish FALSE and UNKNOWN, then we don't
* need to store subplan output rows that contain NULL.
+ *
+ * Because the input slot for each hash table is always the slot resulting
+ * from an ExecProject(), we can use TTSOpsVirtual for the input ops. This
+ * saves a needless fetch inner op step for the hashing ExprState created
+ * in BuildTupleHashTableExt().
*/
MemoryContextReset(node->hashtablecxt);
node->havehashrows = false;
else
node->hashtable = BuildTupleHashTableExt(node->parent,
node->descRight,
+ &TTSOpsVirtual,
ncols,
node->keyColIdx,
node->tab_eq_funcoids,
else
node->hashnulls = BuildTupleHashTableExt(node->parent,
node->descRight,
+ &TTSOpsVirtual,
ncols,
node->keyColIdx,
node->tab_eq_funcoids,
MemoryContext tempcxt, bool use_variable_hash_iv);
extern TupleHashTable BuildTupleHashTableExt(PlanState *parent,
TupleDesc inputDesc,
+ const TupleTableSlotOps *inputOps,
int numCols, AttrNumber *keyColIdx,
const Oid *eqfuncoids,
FmgrInfo *hashfunctions,
1 | 0 | A
(1 row)
+-- exercise the deduplication code of a UNION with mixed input slot types
+WITH RECURSIVE subdepartment AS
+(
+ -- select all columns to prevent projection
+ SELECT id, parent_department, name FROM department WHERE name = 'A'
+ UNION
+ -- joins do projection
+ SELECT d.id, d.parent_department, d.name FROM department AS d
+ INNER JOIN subdepartment AS sd ON d.parent_department = sd.id
+)
+SELECT * FROM subdepartment ORDER BY name;
+ id | parent_department | name
+----+-------------------+------
+ 1 | 0 | A
+ 2 | 1 | B
+ 3 | 2 | C
+ 4 | 2 | D
+ 6 | 4 | F
+(5 rows)
+
-- inside subqueries
SELECT count(*) FROM (
WITH RECURSIVE t(n) AS (
)
SELECT * FROM subdepartment ORDER BY name;
+-- exercise the deduplication code of a UNION with mixed input slot types
+WITH RECURSIVE subdepartment AS
+(
+ -- select all columns to prevent projection
+ SELECT id, parent_department, name FROM department WHERE name = 'A'
+
+ UNION
+
+ -- joins do projection
+ SELECT d.id, d.parent_department, d.name FROM department AS d
+ INNER JOIN subdepartment AS sd ON d.parent_department = sd.id
+)
+SELECT * FROM subdepartment ORDER BY name;
+
-- inside subqueries
SELECT count(*) FROM (
WITH RECURSIVE t(n) AS (