Convert SetOp to read its inputs as outerPlan and innerPlan.

author Tom Lane <tgl@sss.pgh.pa.us>

Thu, 19 Dec 2024 21:23:45 +0000 (16:23 -0500)

committer Tom Lane <tgl@sss.pgh.pa.us>

Thu, 19 Dec 2024 21:23:45 +0000 (16:23 -0500)
author Tom Lane <tgl@sss.pgh.pa.us>
Thu, 19 Dec 2024 21:23:45 +0000 (16:23 -0500)
committer Tom Lane <tgl@sss.pgh.pa.us>
Thu, 19 Dec 2024 21:23:45 +0000 (16:23 -0500)
diff --git a/src/backend/executor/execUtils.c b/src/backend/executor/execUtils.c

index 740e8fb14866d697a7c49736027217c1eca7ae0d..df0223129c46bcc55e85ffb2657e64392913bd8f 100644 (file)
--- a/src/backend/executor/execUtils.c
+++ b/src/backend/executor/execUtils.c
@@ -526,6 +526,49 @@ ExecGetResultSlotOps(PlanState *planstate, bool *isfixed)
     return planstate->ps_ResultTupleSlot->tts_ops;
  }
  
+/*
+ * ExecGetCommonSlotOps - identify common result slot type, if any
+ *
+ * If all the given PlanState nodes return the same fixed tuple slot type,
+ * return the slot ops struct for that slot type.  Else, return NULL.
+ */
+const TupleTableSlotOps *
+ExecGetCommonSlotOps(PlanState **planstates, int nplans)
+{
+   const TupleTableSlotOps *result;
+   bool        isfixed;
+
+   if (nplans <= 0)
+       return NULL;
+   result = ExecGetResultSlotOps(planstates[0], &isfixed);
+   if (!isfixed)
+       return NULL;
+   for (int i = 1; i < nplans; i++)
+   {
+       const TupleTableSlotOps *thisops;
+
+       thisops = ExecGetResultSlotOps(planstates[i], &isfixed);
+       if (!isfixed)
+           return NULL;
+       if (result != thisops)
+           return NULL;
+   }
+   return result;
+}
+
+/*
+ * ExecGetCommonChildSlotOps - as above, for the PlanState's standard children
+ */
+const TupleTableSlotOps *
+ExecGetCommonChildSlotOps(PlanState *ps)
+{
+   PlanState  *planstates[2];
+
+   planstates[0] = outerPlanState(ps);
+   planstates[1] = innerPlanState(ps);
+   return ExecGetCommonSlotOps(planstates, 2);
+}
+
  
  /* ----------------
   *     ExecAssignProjectionInfo
diff --git a/src/backend/executor/nodeSetOp.c b/src/backend/executor/nodeSetOp.c

index b40d81f3ffaef6d983a0b707dc7d295d8288d47f..09089204e8bcb2edd9f1c94e0729ecb7aad1b14e 100644 (file)
--- a/src/backend/executor/nodeSetOp.c
+++ b/src/backend/executor/nodeSetOp.c
@@ -3,29 +3,30 @@
   * nodeSetOp.c
   *   Routines to handle INTERSECT and EXCEPT selection
   *
- * The input of a SetOp node consists of tuples from two relations,
- * which have been combined into one dataset, with a junk attribute added
- * that shows which relation each tuple came from.  In SETOP_SORTED mode,
- * the input has furthermore been sorted according to all the grouping
- * columns (ie, all the non-junk attributes).  The SetOp node scans each
- * group of identical tuples to determine how many came from each input
- * relation.  Then it is a simple matter to emit the output demanded by the
- * SQL spec for INTERSECT, INTERSECT ALL, EXCEPT, or EXCEPT ALL.
+ * The input of a SetOp node consists of two relations (outer and inner)
+ * with identical column sets.  In EXCEPT queries the outer relation is
+ * always the left side, while in INTERSECT cases the planner tries to
+ * make the outer relation be the smaller of the two inputs.
   *
- * In SETOP_HASHED mode, the input is delivered in no particular order,
- * except that we know all the tuples from one input relation will come before
- * all the tuples of the other.  The planner guarantees that the first input
- * relation is the left-hand one for EXCEPT, and tries to make the smaller
- * input relation come first for INTERSECT.  We build a hash table in memory
- * with one entry for each group of identical tuples, and count the number of
- * tuples in the group from each relation.  After seeing all the input, we
- * scan the hashtable and generate the correct output using those counts.
- * We can avoid making hashtable entries for any tuples appearing only in the
- * second input relation, since they cannot result in any output.
+ * In SETOP_SORTED mode, each input has been sorted according to all the
+ * grouping columns.  The SetOp node essentially performs a merge join on
+ * the grouping columns, except that it is only interested in counting how
+ * many tuples from each input match.  Then it is a simple matter to emit
+ * the output demanded by the SQL spec for INTERSECT, INTERSECT ALL, EXCEPT,
+ * or EXCEPT ALL.
+ *
+ * In SETOP_HASHED mode, the inputs are delivered in no particular order.
+ * We read the outer relation and build a hash table in memory with one entry
+ * for each group of identical tuples, counting the number of tuples in the
+ * group.  Then we read the inner relation and count the number of tuples
+ * matching each outer group.  (We can disregard any tuples appearing only
+ * in the inner relation, since they cannot result in any output.)  After
+ * seeing all the input, we scan the hashtable and generate the correct
+ * output using those counts.
   *
   * This node type is not used for UNION or UNION ALL, since those can be
- * implemented more cheaply (there's no need for the junk attribute to
- * identify the source relation).
+ * implemented more cheaply (there's no need to count the number of
+ * matching tuples).
   *
   * Note that SetOp does no qual checking nor projection.  The delivered
   * output tuples are just copies of the first-to-arrive tuple in each
@@ -54,65 +55,28 @@
  /*
   * SetOpStatePerGroupData - per-group working state
   *
- * These values are working state that is initialized at the start of
- * an input tuple group and updated for each input tuple.
- *
- * In SETOP_SORTED mode, we need only one of these structs, and it's kept in
- * the plan state node.  In SETOP_HASHED mode, the hash table contains one
- * of these for each tuple group.
+ * In SETOP_SORTED mode, we need only one of these structs, and it's just a
+ * local in setop_retrieve_sorted.  In SETOP_HASHED mode, the hash table
+ * contains one of these for each tuple group.
   */
  typedef struct SetOpStatePerGroupData
  {
-   long        numLeft;        /* number of left-input dups in group */
-   long        numRight;       /* number of right-input dups in group */
-}          SetOpStatePerGroupData;
+   int64       numLeft;        /* number of left-input dups in group */
+   int64       numRight;       /* number of right-input dups in group */
+} SetOpStatePerGroupData;
+
+typedef SetOpStatePerGroupData *SetOpStatePerGroup;
  
  
-static TupleTableSlot *setop_retrieve_direct(SetOpState *setopstate);
+static TupleTableSlot *setop_retrieve_sorted(SetOpState *setopstate);
+static void setop_load_group(SetOpStatePerInput *input, PlanState *inputPlan,
+                            SetOpState *setopstate);
+static int setop_compare_slots(TupleTableSlot *s1, TupleTableSlot *s2,
+                               SetOpState *setopstate);
  static void setop_fill_hash_table(SetOpState *setopstate);
  static TupleTableSlot *setop_retrieve_hash_table(SetOpState *setopstate);
  
  
-/*
- * Initialize state for a new group of input values.
- */
-static inline void
-initialize_counts(SetOpStatePerGroup pergroup)
-{
-   pergroup->numLeft = pergroup->numRight = 0;
-}
-
-/*
- * Advance the appropriate counter for one input tuple.
- */
-static inline void
-advance_counts(SetOpStatePerGroup pergroup, int flag)
-{
-   if (flag)
-       pergroup->numRight++;
-   else
-       pergroup->numLeft++;
-}
-
-/*
- * Fetch the "flag" column from an input tuple.
- * This is an integer column with value 0 for left side, 1 for right side.
- */
-static int
-fetch_tuple_flag(SetOpState *setopstate, TupleTableSlot *inputslot)
-{
-   SetOp      *node = (SetOp *) setopstate->ps.plan;
-   int         flag;
-   bool        isNull;
-
-   flag = DatumGetInt32(slot_getattr(inputslot,
-                                     node->flagColIdx,
-                                     &isNull));
-   Assert(!isNull);
-   Assert(flag == 0 || flag == 1);
-   return flag;
-}
-
  /*
   * Initialize the hash table to empty.
   */
@@ -126,14 +90,19 @@ build_hash_table(SetOpState *setopstate)
     Assert(node->strategy == SETOP_HASHED);
     Assert(node->numGroups > 0);
  
+   /*
+    * If both child plans deliver the same fixed tuple slot type, we can tell
+    * BuildTupleHashTableExt to expect that slot type as input.  Otherwise,
+    * we'll pass NULL denoting that any slot type is possible.
+    */
     setopstate->hashtable = BuildTupleHashTableExt(&setopstate->ps,
                                                    desc,
-                                                  NULL,
+                                                  ExecGetCommonChildSlotOps(&setopstate->ps),
                                                    node->numCols,
-                                                  node->dupColIdx,
+                                                  node->cmpColIdx,
                                                    setopstate->eqfuncoids,
                                                    setopstate->hashfunctions,
-                                                  node->dupCollations,
+                                                  node->cmpCollations,
                                                    node->numGroups,
                                                    0,
                                                    setopstate->ps.state->es_query_cxt,
@@ -218,108 +187,126 @@ ExecSetOp(PlanState *pstate)
         return setop_retrieve_hash_table(node);
     }
     else
-       return setop_retrieve_direct(node);
+       return setop_retrieve_sorted(node);
  }
  
  /*
   * ExecSetOp for non-hashed case
   */
  static TupleTableSlot *
-setop_retrieve_direct(SetOpState *setopstate)
+setop_retrieve_sorted(SetOpState *setopstate)
  {
     PlanState  *outerPlan;
-   SetOpStatePerGroup pergroup;
-   TupleTableSlot *outerslot;
+   PlanState  *innerPlan;
     TupleTableSlot *resultTupleSlot;
-   ExprContext *econtext = setopstate->ps.ps_ExprContext;
  
     /*
      * get state info from node
      */
     outerPlan = outerPlanState(setopstate);
-   pergroup = (SetOpStatePerGroup) setopstate->pergroup;
+   innerPlan = innerPlanState(setopstate);
     resultTupleSlot = setopstate->ps.ps_ResultTupleSlot;
  
     /*
-    * We loop retrieving groups until we find one we should return
+    * If first time through, establish the invariant that setop_load_group
+    * expects: each side's nextTupleSlot is the next output from the child
+    * plan, or empty if there is no more output from it.
      */
-   while (!setopstate->setop_done)
+   if (setopstate->need_init)
     {
+       setopstate->need_init = false;
+
+       setopstate->leftInput.nextTupleSlot = ExecProcNode(outerPlan);
+
         /*
-        * If we don't already have the first tuple of the new group, fetch it
-        * from the outer plan.
+        * If the outer relation is empty, then we will emit nothing, and we
+        * don't need to read the inner relation at all.
          */
-       if (setopstate->grp_firstTuple == NULL)
+       if (TupIsNull(setopstate->leftInput.nextTupleSlot))
         {
-           outerslot = ExecProcNode(outerPlan);
-           if (!TupIsNull(outerslot))
-           {
-               /* Make a copy of the first input tuple */
-               setopstate->grp_firstTuple = ExecCopySlotHeapTuple(outerslot);
-           }
-           else
-           {
-               /* outer plan produced no tuples at all */
-               setopstate->setop_done = true;
-               return NULL;
-           }
+           setopstate->setop_done = true;
+           return NULL;
         }
  
+       setopstate->rightInput.nextTupleSlot = ExecProcNode(innerPlan);
+
+       /* Set flags that we've not completed either side's group */
+       setopstate->leftInput.needGroup = true;
+       setopstate->rightInput.needGroup = true;
+   }
+
+   /*
+    * We loop retrieving groups until we find one we should return
+    */
+   while (!setopstate->setop_done)
+   {
+       int         cmpresult;
+       SetOpStatePerGroupData pergroup;
+
         /*
-        * Store the copied first input tuple in the tuple table slot reserved
-        * for it.  The tuple will be deleted when it is cleared from the
-        * slot.
+        * Fetch the rest of the current outer group, if we didn't already.
          */
-       ExecStoreHeapTuple(setopstate->grp_firstTuple,
-                          resultTupleSlot,
-                          true);
-       setopstate->grp_firstTuple = NULL;  /* don't keep two pointers */
+       if (setopstate->leftInput.needGroup)
+           setop_load_group(&setopstate->leftInput, outerPlan, setopstate);
  
-       /* Initialize working state for a new input tuple group */
-       initialize_counts(pergroup);
+       /*
+        * If no more outer groups, we're done, and don't need to look at any
+        * more of the inner relation.
+        */
+       if (setopstate->leftInput.numTuples == 0)
+       {
+           setopstate->setop_done = true;
+           break;
+       }
  
-       /* Count the first input tuple */
-       advance_counts(pergroup,
-                      fetch_tuple_flag(setopstate, resultTupleSlot));
+       /*
+        * Fetch the rest of the current inner group, if we didn't already.
+        */
+       if (setopstate->rightInput.needGroup)
+           setop_load_group(&setopstate->rightInput, innerPlan, setopstate);
  
         /*
-        * Scan the outer plan until we exhaust it or cross a group boundary.
+        * Determine whether we have matching groups on both sides (this is
+        * basically like the core logic of a merge join).
          */
-       for (;;)
-       {
-           outerslot = ExecProcNode(outerPlan);
-           if (TupIsNull(outerslot))
-           {
-               /* no more outer-plan tuples available */
-               setopstate->setop_done = true;
-               break;
-           }
-
-           /*
-            * Check whether we've crossed a group boundary.
-            */
-           econtext->ecxt_outertuple = resultTupleSlot;
-           econtext->ecxt_innertuple = outerslot;
-
-           if (!ExecQualAndReset(setopstate->eqfunction, econtext))
-           {
-               /*
-                * Save the first input tuple of the next group.
-                */
-               setopstate->grp_firstTuple = ExecCopySlotHeapTuple(outerslot);
-               break;
-           }
+       if (setopstate->rightInput.numTuples == 0)
+           cmpresult = -1;     /* as though left input is lesser */
+       else
+           cmpresult = setop_compare_slots(setopstate->leftInput.firstTupleSlot,
+                                           setopstate->rightInput.firstTupleSlot,
+                                           setopstate);
  
-           /* Still in same group, so count this tuple */
-           advance_counts(pergroup,
-                          fetch_tuple_flag(setopstate, outerslot));
+       if (cmpresult < 0)
+       {
+           /* Left group is first, and has no right matches */
+           pergroup.numLeft = setopstate->leftInput.numTuples;
+           pergroup.numRight = 0;
+           /* We'll need another left group next time */
+           setopstate->leftInput.needGroup = true;
+       }
+       else if (cmpresult == 0)
+       {
+           /* We have matching groups */
+           pergroup.numLeft = setopstate->leftInput.numTuples;
+           pergroup.numRight = setopstate->rightInput.numTuples;
+           /* We'll need to read from both sides next time */
+           setopstate->leftInput.needGroup = true;
+           setopstate->rightInput.needGroup = true;
+       }
+       else
+       {
+           /* Right group has no left matches, so we can ignore it */
+           setopstate->rightInput.needGroup = true;
+           continue;
         }
  
         /*
-        * Done scanning input tuple group.  See if we should emit any copies
-        * of result tuple, and if so return the first copy.
+        * Done scanning these input tuple groups.  See if we should emit any
+        * copies of result tuple, and if so return the first copy.  (Note
+        * that the result tuple is the same as the left input's firstTuple
+        * slot.)
          */
-       set_output_count(setopstate, pergroup);
+       set_output_count(setopstate, &pergroup);
  
         if (setopstate->numOutput > 0)
         {
@@ -334,84 +321,168 @@ setop_retrieve_direct(SetOpState *setopstate)
  }
  
  /*
- * ExecSetOp for hashed case: phase 1, read input and build hash table
+ * Load next group of tuples from one child plan or the other.
+ *
+ * On entry, we've already read the first tuple of the next group
+ * (if there is one) into input->nextTupleSlot.  This invariant
+ * is maintained on exit.
+ */
+static void
+setop_load_group(SetOpStatePerInput *input, PlanState *inputPlan,
+                SetOpState *setopstate)
+{
+   input->needGroup = false;
+
+   /* If we've exhausted this child plan, report an empty group */
+   if (TupIsNull(input->nextTupleSlot))
+   {
+       ExecClearTuple(input->firstTupleSlot);
+       input->numTuples = 0;
+       return;
+   }
+
+   /* Make a local copy of the first tuple for comparisons */
+   ExecStoreMinimalTuple(ExecCopySlotMinimalTuple(input->nextTupleSlot),
+                         input->firstTupleSlot,
+                         true);
+   /* and count it */
+   input->numTuples = 1;
+
+   /* Scan till we find the end-of-group */
+   for (;;)
+   {
+       int         cmpresult;
+
+       /* Get next input tuple, if there is one */
+       input->nextTupleSlot = ExecProcNode(inputPlan);
+       if (TupIsNull(input->nextTupleSlot))
+           break;
+
+       /* There is; does it belong to same group as firstTuple? */
+       cmpresult = setop_compare_slots(input->firstTupleSlot,
+                                       input->nextTupleSlot,
+                                       setopstate);
+       Assert(cmpresult <= 0); /* else input is mis-sorted */
+       if (cmpresult != 0)
+           break;
+
+       /* Still in same group, so count this tuple */
+       input->numTuples++;
+   }
+}
+
+/*
+ * Compare the tuples in the two given slots.
+ */
+static int
+setop_compare_slots(TupleTableSlot *s1, TupleTableSlot *s2,
+                   SetOpState *setopstate)
+{
+   /* We'll often need to fetch all the columns, so just do it */
+   slot_getallattrs(s1);
+   slot_getallattrs(s2);
+   for (int nkey = 0; nkey < setopstate->numCols; nkey++)
+   {
+       SortSupport sortKey = setopstate->sortKeys + nkey;
+       AttrNumber  attno = sortKey->ssup_attno;
+       Datum       datum1 = s1->tts_values[attno - 1],
+                   datum2 = s2->tts_values[attno - 1];
+       bool        isNull1 = s1->tts_isnull[attno - 1],
+                   isNull2 = s2->tts_isnull[attno - 1];
+       int         compare;
+
+       compare = ApplySortComparator(datum1, isNull1,
+                                     datum2, isNull2,
+                                     sortKey);
+       if (compare != 0)
+           return compare;
+   }
+   return 0;
+}
+
+/*
+ * ExecSetOp for hashed case: phase 1, read inputs and build hash table
   */
  static void
  setop_fill_hash_table(SetOpState *setopstate)
  {
-   SetOp      *node = (SetOp *) setopstate->ps.plan;
     PlanState  *outerPlan;
-   int         firstFlag;
-   bool        in_first_rel PG_USED_FOR_ASSERTS_ONLY;
+   PlanState  *innerPlan;
     ExprContext *econtext = setopstate->ps.ps_ExprContext;
+   bool        have_tuples = false;
  
     /*
      * get state info from node
      */
     outerPlan = outerPlanState(setopstate);
-   firstFlag = node->firstFlag;
-   /* verify planner didn't mess up */
-   Assert(firstFlag == 0 ||
-          (firstFlag == 1 &&
-           (node->cmd == SETOPCMD_INTERSECT ||
-            node->cmd == SETOPCMD_INTERSECT_ALL)));
+   innerPlan = innerPlanState(setopstate);
  
     /*
      * Process each outer-plan tuple, and then fetch the next one, until we
      * exhaust the outer plan.
      */
-   in_first_rel = true;
     for (;;)
     {
         TupleTableSlot *outerslot;
-       int         flag;
         TupleHashEntryData *entry;
         bool        isnew;
  
         outerslot = ExecProcNode(outerPlan);
         if (TupIsNull(outerslot))
             break;
+       have_tuples = true;
  
-       /* Identify whether it's left or right input */
-       flag = fetch_tuple_flag(setopstate, outerslot);
+       /* Find or build hashtable entry for this tuple's group */
+       entry = LookupTupleHashEntry(setopstate->hashtable,
+                                    outerslot,
+                                    &isnew, NULL);
  
-       if (flag == firstFlag)
+       /* If new tuple group, initialize counts to zero */
+       if (isnew)
         {
-           /* (still) in first input relation */
-           Assert(in_first_rel);
-
-           /* Find or build hashtable entry for this tuple's group */
-           entry = LookupTupleHashEntry(setopstate->hashtable, outerslot,
-                                        &isnew, NULL);
-
-           /* If new tuple group, initialize counts */
-           if (isnew)
-           {
-               entry->additional = (SetOpStatePerGroup)
-                   MemoryContextAlloc(setopstate->hashtable->tablecxt,
+           entry->additional = (SetOpStatePerGroup)
+               MemoryContextAllocZero(setopstate->hashtable->tablecxt,
                                        sizeof(SetOpStatePerGroupData));
-               initialize_counts((SetOpStatePerGroup) entry->additional);
-           }
-
-           /* Advance the counts */
-           advance_counts((SetOpStatePerGroup) entry->additional, flag);
         }
-       else
+
+       /* Advance the counts */
+       ((SetOpStatePerGroup) entry->additional)->numLeft++;
+
+       /* Must reset expression context after each hashtable lookup */
+       ResetExprContext(econtext);
+   }
+
+   /*
+    * If the outer relation is empty, then we will emit nothing, and we don't
+    * need to read the inner relation at all.
+    */
+   if (have_tuples)
+   {
+       /*
+        * Process each inner-plan tuple, and then fetch the next one, until
+        * we exhaust the inner plan.
+        */
+       for (;;)
         {
-           /* reached second relation */
-           in_first_rel = false;
+           TupleTableSlot *innerslot;
+           TupleHashEntryData *entry;
+
+           innerslot = ExecProcNode(innerPlan);
+           if (TupIsNull(innerslot))
+               break;
  
             /* For tuples not seen previously, do not make hashtable entry */
-           entry = LookupTupleHashEntry(setopstate->hashtable, outerslot,
+           entry = LookupTupleHashEntry(setopstate->hashtable,
+                                        innerslot,
                                          NULL, NULL);
  
             /* Advance the counts if entry is already present */
             if (entry)
-               advance_counts((SetOpStatePerGroup) entry->additional, flag);
-       }
+               ((SetOpStatePerGroup) entry->additional)->numRight++;
  
-       /* Must reset expression context after each hashtable lookup */
-       ResetExprContext(econtext);
+           /* Must reset expression context after each hashtable lookup */
+           ResetExprContext(econtext);
+       }
     }
  
     setopstate->table_filled = true;
@@ -482,7 +553,6 @@ SetOpState *
  ExecInitSetOp(SetOp *node, EState *estate, int eflags)
  {
     SetOpState *setopstate;
-   TupleDesc   outerDesc;
  
     /* check for unsupported flags */
     Assert(!(eflags & (EXEC_FLAG_BACKWARD | EXEC_FLAG_MARK)));
@@ -495,14 +565,10 @@ ExecInitSetOp(SetOp *node, EState *estate, int eflags)
     setopstate->ps.state = estate;
     setopstate->ps.ExecProcNode = ExecSetOp;
  
-   setopstate->eqfuncoids = NULL;
-   setopstate->hashfunctions = NULL;
     setopstate->setop_done = false;
     setopstate->numOutput = 0;
-   setopstate->pergroup = NULL;
-   setopstate->grp_firstTuple = NULL;
-   setopstate->hashtable = NULL;
-   setopstate->tableContext = NULL;
+   setopstate->numCols = node->numCols;
+   setopstate->need_init = true;
  
     /*
      * create expression context
@@ -523,52 +589,72 @@ ExecInitSetOp(SetOp *node, EState *estate, int eflags)
     /*
      * initialize child nodes
      *
-    * If we are hashing then the child plan does not need to handle REWIND
+    * If we are hashing then the child plans do not need to handle REWIND
      * efficiently; see ExecReScanSetOp.
      */
     if (node->strategy == SETOP_HASHED)
         eflags &= ~EXEC_FLAG_REWIND;
     outerPlanState(setopstate) = ExecInitNode(outerPlan(node), estate, eflags);
-   outerDesc = ExecGetResultType(outerPlanState(setopstate));
+   innerPlanState(setopstate) = ExecInitNode(innerPlan(node), estate, eflags);
  
     /*
-    * Initialize result slot and type. Setop nodes do no projections, so
-    * initialize projection info for this node appropriately.
+    * Initialize locally-allocated slots.  In hashed mode, we just need a
+    * result slot.  In sorted mode, we need one first-tuple-of-group slot for
+    * each input; we use the result slot for the left input's slot and create
+    * another for the right input.  (Note: the nextTupleSlot slots are not
+    * ours, but just point to the last slot returned by the input plan node.)
      */
-   ExecInitResultTupleSlotTL(&setopstate->ps,
-                             node->strategy == SETOP_HASHED ?
-                             &TTSOpsMinimalTuple : &TTSOpsHeapTuple);
+   ExecInitResultTupleSlotTL(&setopstate->ps, &TTSOpsMinimalTuple);
+   if (node->strategy != SETOP_HASHED)
+   {
+       setopstate->leftInput.firstTupleSlot =
+           setopstate->ps.ps_ResultTupleSlot;
+       setopstate->rightInput.firstTupleSlot =
+           ExecInitExtraTupleSlot(estate,
+                                  setopstate->ps.ps_ResultTupleDesc,
+                                  &TTSOpsMinimalTuple);
+   }
+
+   /* Setop nodes do no projections. */
     setopstate->ps.ps_ProjInfo = NULL;
  
     /*
-    * Precompute fmgr lookup data for inner loop. We need both equality and
-    * hashing functions to do it by hashing, but only equality if not
-    * hashing.
+    * Precompute fmgr lookup data for inner loop.  We need equality and
+    * hashing functions to do it by hashing, while for sorting we need
+    * SortSupport data.
      */
     if (node->strategy == SETOP_HASHED)
         execTuplesHashPrepare(node->numCols,
-                             node->dupOperators,
+                             node->cmpOperators,
                               &setopstate->eqfuncoids,
                               &setopstate->hashfunctions);
     else
-       setopstate->eqfunction =
-           execTuplesMatchPrepare(outerDesc,
-                                  node->numCols,
-                                  node->dupColIdx,
-                                  node->dupOperators,
-                                  node->dupCollations,
-                                  &setopstate->ps);
+   {
+       int         nkeys = node->numCols;
+
+       setopstate->sortKeys = (SortSupport)
+           palloc0(nkeys * sizeof(SortSupportData));
+       for (int i = 0; i < nkeys; i++)
+       {
+           SortSupport sortKey = setopstate->sortKeys + i;
+
+           sortKey->ssup_cxt = CurrentMemoryContext;
+           sortKey->ssup_collation = node->cmpCollations[i];
+           sortKey->ssup_nulls_first = node->cmpNullsFirst[i];
+           sortKey->ssup_attno = node->cmpColIdx[i];
+           /* abbreviated key conversion is not useful here */
+           sortKey->abbreviate = false;
  
+           PrepareSortSupportFromOrderingOp(node->cmpOperators[i], sortKey);
+       }
+   }
+
+   /* Create a hash table if needed */
     if (node->strategy == SETOP_HASHED)
     {
         build_hash_table(setopstate);
         setopstate->table_filled = false;
     }
-   else
-   {
-       setopstate->pergroup =
-           (SetOpStatePerGroup) palloc0(sizeof(SetOpStatePerGroupData));
-   }
  
     return setopstate;
  }
@@ -576,7 +662,7 @@ ExecInitSetOp(SetOp *node, EState *estate, int eflags)
  /* ----------------------------------------------------------------
   *     ExecEndSetOp
   *
- *     This shuts down the subplan and frees resources allocated
+ *     This shuts down the subplans and frees resources allocated
   *     to this node.
   * ----------------------------------------------------------------
   */
@@ -588,6 +674,7 @@ ExecEndSetOp(SetOpState *node)
         MemoryContextDelete(node->tableContext);
  
     ExecEndNode(outerPlanState(node));
+   ExecEndNode(innerPlanState(node));
  }
  
  
@@ -595,6 +682,7 @@ void
  ExecReScanSetOp(SetOpState *node)
  {
     PlanState  *outerPlan = outerPlanState(node);
+   PlanState  *innerPlan = innerPlanState(node);
  
     ExecClearTuple(node->ps.ps_ResultTupleSlot);
     node->setop_done = false;
@@ -612,34 +700,29 @@ ExecReScanSetOp(SetOpState *node)
             return;
  
         /*
-        * If we do have the hash table and the subplan does not have any
+        * If we do have the hash table and the subplans do not have any
          * parameter changes, then we can just rescan the existing hash table;
          * no need to build it again.
          */
-       if (outerPlan->chgParam == NULL)
+       if (outerPlan->chgParam == NULL && innerPlan->chgParam == NULL)
         {
             ResetTupleHashIterator(node->hashtable, &node->hashiter);
             return;
         }
-   }
-
-   /* Release first tuple of group, if we have made a copy */
-   if (node->grp_firstTuple != NULL)
-   {
-       heap_freetuple(node->grp_firstTuple);
-       node->grp_firstTuple = NULL;
-   }
  
-   /* Release any hashtable storage */
-   if (node->tableContext)
-       MemoryContextReset(node->tableContext);
+       /* Release any hashtable storage */
+       if (node->tableContext)
+           MemoryContextReset(node->tableContext);
  
-   /* And rebuild empty hashtable if needed */
-   if (((SetOp *) node->ps.plan)->strategy == SETOP_HASHED)
-   {
+       /* And rebuild an empty hashtable */
         ResetTupleHashTable(node->hashtable);
         node->table_filled = false;
     }
+   else
+   {
+       /* Need to re-read first input from each side */
+       node->need_init = true;
+   }
  
     /*
      * if chgParam of subnode is not null then plan will be re-scanned by
@@ -647,4 +730,6 @@ ExecReScanSetOp(SetOpState *node)
      */
     if (outerPlan->chgParam == NULL)
         ExecReScan(outerPlan);
+   if (innerPlan->chgParam == NULL)
+       ExecReScan(innerPlan);
  }
diff --git a/src/backend/optimizer/README b/src/backend/optimizer/README

index 2ab4f3dbf3793855d1646a424cc4048e97915787..f341d9f303c73f75fbab9542f91bb46300100f6a 100644 (file)
--- a/src/backend/optimizer/README
+++ b/src/backend/optimizer/README
@@ -649,7 +649,7 @@ RelOptInfo      - a relation or joined relations
    GroupingSetsPath - an Agg plan node used to implement GROUPING SETS
    MinMaxAggPath - a Result plan node with subplans performing MIN/MAX
    WindowAggPath - a WindowAgg plan node applied to some sub-path
-  SetOpPath     - a SetOp plan node applied to some sub-path
+  SetOpPath     - a SetOp plan node applied to two sub-paths
    RecursiveUnionPath - a RecursiveUnion plan node applied to two sub-paths
    LockRowsPath  - a LockRows plan node applied to some sub-path
    ModifyTablePath - a ModifyTable plan node applied to some sub-path(s)
diff --git a/src/backend/optimizer/plan/createplan.c b/src/backend/optimizer/plan/createplan.c

index 178c572b021ba64edc45f90934e89d64f857e5fe..b3e2294e84f87e3ea8c72f6acbf1638322efefee 100644 (file)
--- a/src/backend/optimizer/plan/createplan.c
+++ b/src/backend/optimizer/plan/createplan.c
@@ -301,9 +301,9 @@ static Unique *make_unique_from_pathkeys(Plan *lefttree,
                                          List *pathkeys, int numCols);
  static Gather *make_gather(List *qptlist, List *qpqual,
                            int nworkers, int rescan_param, bool single_copy, Plan *subplan);
-static SetOp *make_setop(SetOpCmd cmd, SetOpStrategy strategy, Plan *lefttree,
-                        List *distinctList, AttrNumber flagColIdx, int firstFlag,
-                        long numGroups);
+static SetOp *make_setop(SetOpCmd cmd, SetOpStrategy strategy,
+                        List *tlist, Plan *lefttree, Plan *righttree,
+                        List *groupList, long numGroups);
  static LockRows *make_lockrows(Plan *lefttree, List *rowMarks, int epqParam);
  static Result *make_result(List *tlist, Node *resconstantqual, Plan *subplan);
  static ProjectSet *make_project_set(List *tlist, Plan *subplan);
@@ -2719,25 +2719,29 @@ static SetOp *
  create_setop_plan(PlannerInfo *root, SetOpPath *best_path, int flags)
  {
     SetOp      *plan;
-   Plan       *subplan;
+   List       *tlist = build_path_tlist(root, &best_path->path);
+   Plan       *leftplan;
+   Plan       *rightplan;
     long        numGroups;
  
     /*
      * SetOp doesn't project, so tlist requirements pass through; moreover we
      * need grouping columns to be labeled.
      */
-   subplan = create_plan_recurse(root, best_path->subpath,
-                                 flags | CP_LABEL_TLIST);
+   leftplan = create_plan_recurse(root, best_path->leftpath,
+                                  flags | CP_LABEL_TLIST);
+   rightplan = create_plan_recurse(root, best_path->rightpath,
+                                   flags | CP_LABEL_TLIST);
  
     /* Convert numGroups to long int --- but 'ware overflow! */
     numGroups = clamp_cardinality_to_long(best_path->numGroups);
  
     plan = make_setop(best_path->cmd,
                       best_path->strategy,
-                     subplan,
-                     best_path->distinctList,
-                     best_path->flagColIdx,
-                     best_path->firstFlag,
+                     tlist,
+                     leftplan,
+                     rightplan,
+                     best_path->groupList,
                       numGroups);
  
     copy_generic_path_info(&plan->plan, (Path *) best_path);
@@ -6950,57 +6954,62 @@ make_gather(List *qptlist,
  }
  
  /*
- * distinctList is a list of SortGroupClauses, identifying the targetlist
- * items that should be considered by the SetOp filter.  The input path must
- * already be sorted accordingly.
+ * groupList is a list of SortGroupClauses, identifying the targetlist
+ * items that should be considered by the SetOp filter.  The input plans must
+ * already be sorted accordingly, if we're doing SETOP_SORTED mode.
   */
  static SetOp *
-make_setop(SetOpCmd cmd, SetOpStrategy strategy, Plan *lefttree,
-          List *distinctList, AttrNumber flagColIdx, int firstFlag,
-          long numGroups)
+make_setop(SetOpCmd cmd, SetOpStrategy strategy,
+          List *tlist, Plan *lefttree, Plan *righttree,
+          List *groupList, long numGroups)
  {
     SetOp      *node = makeNode(SetOp);
     Plan       *plan = &node->plan;
-   int         numCols = list_length(distinctList);
+   int         numCols = list_length(groupList);
     int         keyno = 0;
-   AttrNumber *dupColIdx;
-   Oid        *dupOperators;
-   Oid        *dupCollations;
+   AttrNumber *cmpColIdx;
+   Oid        *cmpOperators;
+   Oid        *cmpCollations;
+   bool       *cmpNullsFirst;
     ListCell   *slitem;
  
-   plan->targetlist = lefttree->targetlist;
+   plan->targetlist = tlist;
     plan->qual = NIL;
     plan->lefttree = lefttree;
-   plan->righttree = NULL;
+   plan->righttree = righttree;
  
     /*
-    * convert SortGroupClause list into arrays of attr indexes and equality
+    * convert SortGroupClause list into arrays of attr indexes and comparison
      * operators, as wanted by executor
      */
-   dupColIdx = (AttrNumber *) palloc(sizeof(AttrNumber) * numCols);
-   dupOperators = (Oid *) palloc(sizeof(Oid) * numCols);
-   dupCollations = (Oid *) palloc(sizeof(Oid) * numCols);
+   cmpColIdx = (AttrNumber *) palloc(sizeof(AttrNumber) * numCols);
+   cmpOperators = (Oid *) palloc(sizeof(Oid) * numCols);
+   cmpCollations = (Oid *) palloc(sizeof(Oid) * numCols);
+   cmpNullsFirst = (bool *) palloc(sizeof(bool) * numCols);
  
-   foreach(slitem, distinctList)
+   foreach(slitem, groupList)
     {
         SortGroupClause *sortcl = (SortGroupClause *) lfirst(slitem);
         TargetEntry *tle = get_sortgroupclause_tle(sortcl, plan->targetlist);
  
-       dupColIdx[keyno] = tle->resno;
-       dupOperators[keyno] = sortcl->eqop;
-       dupCollations[keyno] = exprCollation((Node *) tle->expr);
-       Assert(OidIsValid(dupOperators[keyno]));
+       cmpColIdx[keyno] = tle->resno;
+       if (strategy == SETOP_HASHED)
+           cmpOperators[keyno] = sortcl->eqop;
+       else
+           cmpOperators[keyno] = sortcl->sortop;
+       Assert(OidIsValid(cmpOperators[keyno]));
+       cmpCollations[keyno] = exprCollation((Node *) tle->expr);
+       cmpNullsFirst[keyno] = sortcl->nulls_first;
         keyno++;
     }
  
     node->cmd = cmd;
     node->strategy = strategy;
     node->numCols = numCols;
-   node->dupColIdx = dupColIdx;
-   node->dupOperators = dupOperators;
-   node->dupCollations = dupCollations;
-   node->flagColIdx = flagColIdx;
-   node->firstFlag = firstFlag;
+   node->cmpColIdx = cmpColIdx;
+   node->cmpOperators = cmpOperators;
+   node->cmpCollations = cmpCollations;
+   node->cmpNullsFirst = cmpNullsFirst;
     node->numGroups = numGroups;
  
     return node;
diff --git a/src/backend/optimizer/prep/prepunion.c b/src/backend/optimizer/prep/prepunion.c

index 9c3822f19add5ed73cc2f6cc9ccba48d73d6e380..1c43370005e48ecfd00b63a721a6a79600a75009 100644 (file)
--- a/src/backend/optimizer/prep/prepunion.c
+++ b/src/backend/optimizer/prep/prepunion.c
@@ -65,7 +65,7 @@ static List *plan_union_children(PlannerInfo *root,
                                  List **istrivial_tlist);
  static void postprocess_setop_rel(PlannerInfo *root, RelOptInfo *rel);
  static bool choose_hashed_setop(PlannerInfo *root, List *groupClauses,
-                               Path *input_path,
+                               Path *lpath, Path *rpath,
                                 double dNumGroups, double dNumOutputRows,
                                 const char *construct);
  static List *generate_setop_tlist(List *colTypes, List *colCollations,
@@ -315,8 +315,8 @@ recurse_set_operations(Node *setOp, PlannerInfo *root,
          * to the corresponding tlist entries of the subplan. However, since
          * the subplan was generated by generate_union_paths() or
          * generate_nonunion_paths(), and hence its tlist was generated by
-        * generate_append_tlist(), this will work.  We just tell
-        * generate_setop_tlist() to use varno 0.
+        * generate_append_tlist() or generate_setop_tlist(), this will work.
+        * We just tell generate_setop_tlist() to use varno 0.
          */
         if (flag >= 0 ||
             !tlist_same_datatypes(*pTargetList, colTypes, junkOK) ||
@@ -1028,29 +1028,27 @@ generate_nonunion_paths(SetOperationStmt *op, PlannerInfo *root,
                *path;
     List       *lpath_tlist,
                *rpath_tlist,
-              *tlist_list,
                *tlist,
-              *groupList,
-              *pathlist;
+              *groupList;
     bool        lpath_trivial_tlist,
-               rpath_trivial_tlist;
+               rpath_trivial_tlist,
+               result_trivial_tlist;
     double      dLeftGroups,
                 dRightGroups,
                 dNumGroups,
                 dNumOutputRows;
     bool        use_hash;
     SetOpCmd    cmd;
-   int         firstFlag;
  
     /*
      * Tell children to fetch all tuples.
      */
     root->tuple_fraction = 0.0;
  
-   /* Recurse on children, ensuring their outputs are marked */
+   /* Recurse on children */
     lrel = recurse_set_operations(op->larg, root,
                                   op->colTypes, op->colCollations,
-                                 false, 0,
+                                 false, -1,
                                   refnames_tlist,
                                   &lpath_tlist,
                                   &lpath_trivial_tlist);
@@ -1060,10 +1058,9 @@ generate_nonunion_paths(SetOperationStmt *op, PlannerInfo *root,
     else
         dLeftGroups = lrel->rows;
  
-   lpath = lrel->cheapest_total_path;
     rrel = recurse_set_operations(op->rarg, root,
                                   op->colTypes, op->colCollations,
-                                 false, 1,
+                                 false, -1,
                                   refnames_tlist,
                                   &rpath_tlist,
                                   &rpath_trivial_tlist);
@@ -1073,41 +1070,51 @@ generate_nonunion_paths(SetOperationStmt *op, PlannerInfo *root,
     else
         dRightGroups = rrel->rows;
  
-   rpath = rrel->cheapest_total_path;
-
     /* Undo effects of forcing tuple_fraction to 0 */
     root->tuple_fraction = save_fraction;
  
     /*
      * For EXCEPT, we must put the left input first.  For INTERSECT, either
      * order should give the same results, and we prefer to put the smaller
-    * input first in order to minimize the size of the hash table in the
-    * hashing case.  "Smaller" means the one with the fewer groups.
+    * input first in order to (a) minimize the size of the hash table in the
+    * hashing case, and (b) improve our chances of exploiting the executor's
+    * fast path for empty left-hand input.  "Smaller" means the one with the
+    * fewer groups.
      */
-   if (op->op == SETOP_EXCEPT || dLeftGroups <= dRightGroups)
-   {
-       pathlist = list_make2(lpath, rpath);
-       tlist_list = list_make2(lpath_tlist, rpath_tlist);
-       firstFlag = 0;
-   }
-   else
+   if (op->op != SETOP_EXCEPT && dLeftGroups > dRightGroups)
     {
-       pathlist = list_make2(rpath, lpath);
-       tlist_list = list_make2(rpath_tlist, lpath_tlist);
-       firstFlag = 1;
+       /* need to swap the two inputs */
+       RelOptInfo *tmprel;
+       List       *tmplist;
+       double      tmpd;
+
+       tmprel = lrel;
+       lrel = rrel;
+       rrel = tmprel;
+       tmplist = lpath_tlist;
+       lpath_tlist = rpath_tlist;
+       rpath_tlist = tmplist;
+       tmpd = dLeftGroups;
+       dLeftGroups = dRightGroups;
+       dRightGroups = tmpd;
     }
  
+   lpath = lrel->cheapest_total_path;
+   rpath = rrel->cheapest_total_path;
+
     /*
-    * Generate tlist for Append plan node.
+    * Generate tlist for SetOp plan node.
      *
-    * The tlist for an Append plan isn't important as far as the Append is
+    * The tlist for a SetOp plan isn't important so far as the SetOp is
      * concerned, but we must make it look real anyway for the benefit of the
-    * next plan level up.  In fact, it has to be real enough that the flag
-    * column is shown as a variable not a constant, else setrefs.c will get
-    * confused.
+    * next plan level up.
      */
-   tlist = generate_append_tlist(op->colTypes, op->colCollations, true,
-                                 tlist_list, refnames_tlist);
+   tlist = generate_setop_tlist(op->colTypes, op->colCollations, -1,
+                                0, false, lpath_tlist, refnames_tlist,
+                                &result_trivial_tlist);
+
+   /* We should not have needed any type coercions in the tlist */
+   Assert(result_trivial_tlist);
  
     *pTargetList = tlist;
  
@@ -1116,12 +1123,6 @@ generate_nonunion_paths(SetOperationStmt *op, PlannerInfo *root,
                                  bms_union(lrel->relids, rrel->relids));
     result_rel->reltarget = create_pathtarget(root, tlist);
  
-   /*
-    * Append the child results together.
-    */
-   path = (Path *) create_append_path(root, result_rel, pathlist, NIL,
-                                      NIL, NULL, 0, false, -1);
-
     /* Identify the grouping semantics */
     groupList = generate_setop_grouplist(op, tlist);
  
@@ -1140,25 +1141,40 @@ generate_nonunion_paths(SetOperationStmt *op, PlannerInfo *root,
     }
     else
     {
-       dNumGroups = Min(dLeftGroups, dRightGroups);
+       dNumGroups = dLeftGroups;
         dNumOutputRows = op->all ? Min(lpath->rows, rpath->rows) : dNumGroups;
     }
  
     /*
-    * Decide whether to hash or sort, and add a sort node if needed.
+    * Decide whether to hash or sort, and add sort nodes if needed.
      */
-   use_hash = choose_hashed_setop(root, groupList, path,
+   use_hash = choose_hashed_setop(root, groupList, lpath, rpath,
                                    dNumGroups, dNumOutputRows,
                                    (op->op == SETOP_INTERSECT) ? "INTERSECT" : "EXCEPT");
  
     if (groupList && !use_hash)
-       path = (Path *) create_sort_path(root,
-                                        result_rel,
-                                        path,
-                                        make_pathkeys_for_sortclauses(root,
-                                                                      groupList,
-                                                                      tlist),
-                                        -1.0);
+   {
+       List       *pathkeys;
+
+       pathkeys = make_pathkeys_for_sortclauses(root,
+                                                groupList,
+                                                lpath_tlist);
+       if (!pathkeys_contained_in(pathkeys, lpath->pathkeys))
+           lpath = (Path *) create_sort_path(root,
+                                             lpath->parent,
+                                             lpath,
+                                             pathkeys,
+                                             -1.0);
+       pathkeys = make_pathkeys_for_sortclauses(root,
+                                                groupList,
+                                                rpath_tlist);
+       if (!pathkeys_contained_in(pathkeys, rpath->pathkeys))
+           rpath = (Path *) create_sort_path(root,
+                                             rpath->parent,
+                                             rpath,
+                                             pathkeys,
+                                             -1.0);
+   }
  
     /*
      * Finally, add a SetOp path node to generate the correct output.
@@ -1178,12 +1194,11 @@ generate_nonunion_paths(SetOperationStmt *op, PlannerInfo *root,
     }
     path = (Path *) create_setop_path(root,
                                       result_rel,
-                                     path,
+                                     lpath,
+                                     rpath,
                                       cmd,
                                       use_hash ? SETOP_HASHED : SETOP_SORTED,
                                       groupList,
-                                     list_length(op->colTypes) + 1,
-                                     use_hash ? firstFlag : -1,
                                       dNumGroups,
                                       dNumOutputRows);
  
@@ -1285,10 +1300,13 @@ postprocess_setop_rel(PlannerInfo *root, RelOptInfo *rel)
  
  /*
   * choose_hashed_setop - should we use hashing for a set operation?
+ *
+ * XXX probably this should go away: just make both paths and let
+ * add_path sort it out.
   */
  static bool
  choose_hashed_setop(PlannerInfo *root, List *groupClauses,
-                   Path *input_path,
+                   Path *lpath, Path *rpath,
                     double dNumGroups, double dNumOutputRows,
                     const char *construct)
  {
@@ -1327,7 +1345,7 @@ choose_hashed_setop(PlannerInfo *root, List *groupClauses,
      * Don't do it if it doesn't look like the hashtable will fit into
      * hash_mem.
      */
-   hashentrysize = MAXALIGN(input_path->pathtarget->width) + MAXALIGN(SizeofMinimalTupleHeader);
+   hashentrysize = MAXALIGN(lpath->pathtarget->width) + MAXALIGN(SizeofMinimalTupleHeader);
  
     if (hashentrysize * dNumGroups > hash_mem_limit)
         return false;
@@ -1336,9 +1354,9 @@ choose_hashed_setop(PlannerInfo *root, List *groupClauses,
      * See if the estimated cost is no more than doing it the other way.
      *
      * We need to consider input_plan + hashagg versus input_plan + sort +
-    * group.  Note that the actual result plan might involve a SetOp or
-    * Unique node, not Agg or Group, but the cost estimates for Agg and Group
-    * should be close enough for our purposes here.
+    * group. XXX NOT TRUE: Note that the actual result plan might involve a
+    * SetOp or Unique node, not Agg or Group, but the cost estimates for Agg
+    * and Group should be close enough for our purposes here.
      *
      * These path variables are dummies that just hold cost fields; we don't
      * make actual Paths for these steps.
@@ -1346,27 +1364,31 @@ choose_hashed_setop(PlannerInfo *root, List *groupClauses,
     cost_agg(&hashed_p, root, AGG_HASHED, NULL,
              numGroupCols, dNumGroups,
              NIL,
-            input_path->disabled_nodes,
-            input_path->startup_cost, input_path->total_cost,
-            input_path->rows, input_path->pathtarget->width);
+            lpath->disabled_nodes + rpath->disabled_nodes,
+            lpath->startup_cost + rpath->startup_cost,
+            lpath->total_cost + rpath->total_cost,
+            lpath->rows + rpath->rows,
+            lpath->pathtarget->width);
  
     /*
-    * Now for the sorted case.  Note that the input is *always* unsorted,
-    * since it was made by appending unrelated sub-relations together.
+    * Now for the sorted case.  XXX NOT TRUE: Note that the input is *always*
+    * unsorted, since it was made by appending unrelated sub-relations
+    * together.
      */
-   sorted_p.disabled_nodes = input_path->disabled_nodes;
-   sorted_p.startup_cost = input_path->startup_cost;
-   sorted_p.total_cost = input_path->total_cost;
+   sorted_p.disabled_nodes = lpath->disabled_nodes + rpath->disabled_nodes;
+   sorted_p.startup_cost = lpath->startup_cost + rpath->startup_cost;
+   sorted_p.total_cost = lpath->total_cost + rpath->total_cost;
     /* XXX cost_sort doesn't actually look at pathkeys, so just pass NIL */
     cost_sort(&sorted_p, root, NIL, sorted_p.disabled_nodes,
               sorted_p.total_cost,
-             input_path->rows, input_path->pathtarget->width,
+             lpath->rows + rpath->rows,
+             lpath->pathtarget->width,
               0.0, work_mem, -1.0);
     cost_group(&sorted_p, root, numGroupCols, dNumGroups,
                NIL,
                sorted_p.disabled_nodes,
                sorted_p.startup_cost, sorted_p.total_cost,
-              input_path->rows);
+              lpath->rows + rpath->rows);
  
     /*
      * Now make the decision using the top-level tuple fraction.  First we
diff --git a/src/backend/optimizer/util/pathnode.c b/src/backend/optimizer/util/pathnode.c

index fc97bf6ee266b28e6090337dcf5434697843a013..e52e4b1d6774da6041bb50bc2ec01999f427f2aa 100644 (file)
--- a/src/backend/optimizer/util/pathnode.c
+++ b/src/backend/optimizer/util/pathnode.c
@@ -3634,25 +3634,26 @@ create_windowagg_path(PlannerInfo *root,
   *   Creates a pathnode that represents computation of INTERSECT or EXCEPT
   *
   * 'rel' is the parent relation associated with the result
- * 'subpath' is the path representing the source of data
+ * 'leftpath' is the path representing the left-hand source of data
+ * 'rightpath' is the path representing the right-hand source of data
   * 'cmd' is the specific semantics (INTERSECT or EXCEPT, with/without ALL)
   * 'strategy' is the implementation strategy (sorted or hashed)
- * 'distinctList' is a list of SortGroupClause's representing the grouping
- * 'flagColIdx' is the column number where the flag column will be, if any
- * 'firstFlag' is the flag value for the first input relation when hashing;
- *     or -1 when sorting
- * 'numGroups' is the estimated number of distinct groups
+ * 'groupList' is a list of SortGroupClause's representing the grouping
+ * 'numGroups' is the estimated number of distinct groups in left-hand input
   * 'outputRows' is the estimated number of output rows
+ *
+ * leftpath and rightpath must produce the same columns.  Moreover, if
+ * strategy is SETOP_SORTED, leftpath and rightpath must both be sorted
+ * by all the grouping columns.
   */
  SetOpPath *
  create_setop_path(PlannerInfo *root,
                   RelOptInfo *rel,
-                 Path *subpath,
+                 Path *leftpath,
+                 Path *rightpath,
                   SetOpCmd cmd,
                   SetOpStrategy strategy,
-                 List *distinctList,
-                 AttrNumber flagColIdx,
-                 int firstFlag,
+                 List *groupList,
                   double numGroups,
                   double outputRows)
  {
@@ -3660,34 +3661,37 @@ create_setop_path(PlannerInfo *root,
  
     pathnode->path.pathtype = T_SetOp;
     pathnode->path.parent = rel;
-   /* SetOp doesn't project, so use source path's pathtarget */
-   pathnode->path.pathtarget = subpath->pathtarget;
+   pathnode->path.pathtarget = rel->reltarget;
     /* For now, assume we are above any joins, so no parameterization */
     pathnode->path.param_info = NULL;
     pathnode->path.parallel_aware = false;
     pathnode->path.parallel_safe = rel->consider_parallel &&
-       subpath->parallel_safe;
-   pathnode->path.parallel_workers = subpath->parallel_workers;
+       leftpath->parallel_safe && rightpath->parallel_safe;
+   pathnode->path.parallel_workers =
+       leftpath->parallel_workers + rightpath->parallel_workers;
     /* SetOp preserves the input sort order if in sort mode */
     pathnode->path.pathkeys =
-       (strategy == SETOP_SORTED) ? subpath->pathkeys : NIL;
+       (strategy == SETOP_SORTED) ? leftpath->pathkeys : NIL;
  
-   pathnode->subpath = subpath;
+   pathnode->leftpath = leftpath;
+   pathnode->rightpath = rightpath;
     pathnode->cmd = cmd;
     pathnode->strategy = strategy;
-   pathnode->distinctList = distinctList;
-   pathnode->flagColIdx = flagColIdx;
-   pathnode->firstFlag = firstFlag;
+   pathnode->groupList = groupList;
     pathnode->numGroups = numGroups;
  
     /*
      * Charge one cpu_operator_cost per comparison per input tuple. We assume
      * all columns get compared at most of the tuples.
+    *
+    * XXX all wrong for hashing
      */
-   pathnode->path.disabled_nodes = subpath->disabled_nodes;
-   pathnode->path.startup_cost = subpath->startup_cost;
-   pathnode->path.total_cost = subpath->total_cost +
-       cpu_operator_cost * subpath->rows * list_length(distinctList);
+   pathnode->path.disabled_nodes =
+       leftpath->disabled_nodes + rightpath->disabled_nodes;
+   pathnode->path.startup_cost =
+       leftpath->startup_cost + rightpath->startup_cost;
+   pathnode->path.total_cost = leftpath->total_cost + rightpath->total_cost +
+       cpu_operator_cost * (leftpath->rows + rightpath->rows) * list_length(groupList);
     pathnode->path.rows = outputRows;
  
     return pathnode;
diff --git a/src/include/executor/executor.h b/src/include/executor/executor.h

index c8e6befca88aefbb275c759d27eb2f14a23cc4ef..dee866e96bb115d692a1e1e35049f4ca7384f4eb 100644 (file)
--- a/src/include/executor/executor.h
+++ b/src/include/executor/executor.h
@@ -585,6 +585,9 @@ extern void ExecAssignExprContext(EState *estate, PlanState *planstate);
  extern TupleDesc ExecGetResultType(PlanState *planstate);
  extern const TupleTableSlotOps *ExecGetResultSlotOps(PlanState *planstate,
                                                      bool *isfixed);
+extern const TupleTableSlotOps *ExecGetCommonSlotOps(PlanState **planstates,
+                                                    int nplans);
+extern const TupleTableSlotOps *ExecGetCommonChildSlotOps(PlanState *ps);
  extern void ExecAssignProjectionInfo(PlanState *planstate,
                                      TupleDesc inputDesc);
  extern void ExecConditionalAssignProjectionInfo(PlanState *planstate,
diff --git a/src/include/nodes/execnodes.h b/src/include/nodes/execnodes.h

index 79d5e96021eb6a0b6bf2ddfdb52fd7f95fdf29a5..1590b643920521183fd4486fa91af9b3c4e9609e 100644 (file)
--- a/src/include/nodes/execnodes.h
+++ b/src/include/nodes/execnodes.h
@@ -2803,27 +2803,34 @@ typedef struct HashState
  /* ----------------
   *  SetOpState information
   *
- *     Even in "sorted" mode, SetOp nodes are more complex than a simple
- *     Unique, since we have to count how many duplicates to return.  But
- *     we also support hashing, so this is really more like a cut-down
- *     form of Agg.
+ *     SetOp nodes support either sorted or hashed de-duplication.
+ *     The sorted mode is a bit like MergeJoin, the hashed mode like Agg.
   * ----------------
   */
-/* this struct is private in nodeSetOp.c: */
-typedef struct SetOpStatePerGroupData *SetOpStatePerGroup;
+typedef struct SetOpStatePerInput
+{
+   TupleTableSlot *firstTupleSlot; /* first tuple of current group */
+   int64       numTuples;      /* number of tuples in current group */
+   TupleTableSlot *nextTupleSlot;  /* next input tuple, if already read */
+   bool        needGroup;      /* do we need to load a new group? */
+} SetOpStatePerInput;
  
  typedef struct SetOpState
  {
     PlanState   ps;             /* its first field is NodeTag */
-   ExprState  *eqfunction;     /* equality comparator */
-   Oid        *eqfuncoids;     /* per-grouping-field equality fns */
-   FmgrInfo   *hashfunctions;  /* per-grouping-field hash fns */
     bool        setop_done;     /* indicates completion of output scan */
-   long        numOutput;      /* number of dups left to output */
+   int64       numOutput;      /* number of dups left to output */
+   int         numCols;        /* number of grouping columns */
+
     /* these fields are used in SETOP_SORTED mode: */
-   SetOpStatePerGroup pergroup;    /* per-group working state */
-   HeapTuple   grp_firstTuple; /* copy of first tuple of current group */
+   SortSupport sortKeys;       /* per-grouping-field sort data */
+   SetOpStatePerInput leftInput;   /* current outer-relation input state */
+   SetOpStatePerInput rightInput;  /* current inner-relation input state */
+   bool        need_init;      /* have we read the first tuples yet? */
+
     /* these fields are used in SETOP_HASHED mode: */
+   Oid        *eqfuncoids;     /* per-grouping-field equality fns */
+   FmgrInfo   *hashfunctions;  /* per-grouping-field hash fns */
     TupleHashTable hashtable;   /* hash table with one entry per group */
     MemoryContext tableContext; /* memory context containing hash table */
     bool        table_filled;   /* hash table filled yet? */
diff --git a/src/include/nodes/pathnodes.h b/src/include/nodes/pathnodes.h

index 0759e00e96ddb908ade91cae43730a30f66282da..58748d2ca6fd0a0418f0d6186a44c1f0db3996c1 100644 (file)
--- a/src/include/nodes/pathnodes.h
+++ b/src/include/nodes/pathnodes.h
@@ -2342,13 +2342,12 @@ typedef struct WindowAggPath
  typedef struct SetOpPath
  {
     Path        path;
-   Path       *subpath;        /* path representing input source */
+   Path       *leftpath;       /* paths representing input sources */
+   Path       *rightpath;
     SetOpCmd    cmd;            /* what to do, see nodes.h */
     SetOpStrategy strategy;     /* how to do it, see nodes.h */
-   List       *distinctList;   /* SortGroupClauses identifying target cols */
-   AttrNumber  flagColIdx;     /* where is the flag column, if any */
-   int         firstFlag;      /* flag value for first input relation */
-   Cardinality numGroups;      /* estimated number of groups in input */
+   List       *groupList;      /* SortGroupClauses identifying target cols */
+   Cardinality numGroups;      /* estimated number of groups in left input */
  } SetOpPath;
  
  /*
diff --git a/src/include/nodes/plannodes.h b/src/include/nodes/plannodes.h

index 52f29bcdb69c0e5c9ce9f342fb36d4c6c8c3724e..4633121689c9899845df10e693f530833dc24037 100644 (file)
--- a/src/include/nodes/plannodes.h
+++ b/src/include/nodes/plannodes.h
@@ -1225,23 +1225,20 @@ typedef struct SetOp
     /* how to do it, see nodes.h */
     SetOpStrategy strategy;
  
-   /* number of columns to check for duplicate-ness */
+   /* number of columns to compare */
     int         numCols;
  
     /* their indexes in the target list */
-   AttrNumber *dupColIdx pg_node_attr(array_size(numCols));
+   AttrNumber *cmpColIdx pg_node_attr(array_size(numCols));
  
-   /* equality operators to compare with */
-   Oid        *dupOperators pg_node_attr(array_size(numCols));
-   Oid        *dupCollations pg_node_attr(array_size(numCols));
-
-   /* where is the flag column, if any */
-   AttrNumber  flagColIdx;
+   /* comparison operators (either equality operators or sort operators) */
+   Oid        *cmpOperators pg_node_attr(array_size(numCols));
+   Oid        *cmpCollations pg_node_attr(array_size(numCols));
  
-   /* flag value for first input relation */
-   int         firstFlag;
+   /* nulls-first flags if sorting, otherwise not interesting */
+   bool       *cmpNullsFirst pg_node_attr(array_size(numCols));
  
-   /* estimated number of groups in input */
+   /* estimated number of groups in left input */
     long        numGroups;
  } SetOp;
  
diff --git a/src/include/optimizer/pathnode.h b/src/include/optimizer/pathnode.h

index 1035e6560c1fe7a8b792adc7e04780fb88e691c5..5a6d0350c1291ed3e35fce62ffee51ff8e89f330 100644 (file)
--- a/src/include/optimizer/pathnode.h
+++ b/src/include/optimizer/pathnode.h
@@ -260,12 +260,11 @@ extern WindowAggPath *create_windowagg_path(PlannerInfo *root,
                                             bool topwindow);
  extern SetOpPath *create_setop_path(PlannerInfo *root,
                                     RelOptInfo *rel,
-                                   Path *subpath,
+                                   Path *leftpath,
+                                   Path *rightpath,
                                     SetOpCmd cmd,
                                     SetOpStrategy strategy,
-                                   List *distinctList,
-                                   AttrNumber flagColIdx,
-                                   int firstFlag,
+                                   List *groupList,
                                     double numGroups,
                                     double outputRows);
  extern RecursiveUnionPath *create_recursiveunion_path(PlannerInfo *root,
diff --git a/src/test/regress/expected/subselect.out b/src/test/regress/expected/subselect.out

index 857ba1c40c01a724484fba4f92d47a4e7434ca16..b4cd204de779cf9342ef7aefd380c5d4d7591083 100644 (file)
--- a/src/test/regress/expected/subselect.out
+++ b/src/test/regress/expected/subselect.out
@@ -1231,22 +1231,18 @@ select count(*) from
      select * from onek i2 where i2.unique1 = o.unique2
    ) ss
  where o.ten = 1;
-                                  QUERY PLAN                                  
-------------------------------------------------------------------------------
+                         QUERY PLAN                         
+------------------------------------------------------------
   Aggregate
     ->  Nested Loop
           ->  Seq Scan on onek o
                 Filter: (ten = 1)
-         ->  Subquery Scan on ss
-               ->  HashSetOp Except
-                     ->  Append
-                           ->  Subquery Scan on "*SELECT* 1"
-                                 ->  Index Scan using onek_unique1 on onek i1
-                                       Index Cond: (unique1 = o.unique1)
-                           ->  Subquery Scan on "*SELECT* 2"
-                                 ->  Index Scan using onek_unique1 on onek i2
-                                       Index Cond: (unique1 = o.unique2)
-(13 rows)
+         ->  HashSetOp Except
+               ->  Index Scan using onek_unique1 on onek i1
+                     Index Cond: (unique1 = o.unique1)
+               ->  Index Scan using onek_unique1 on onek i2
+                     Index Cond: (unique1 = o.unique2)
+(9 rows)
  
  select count(*) from
    onek o cross join lateral (
diff --git a/src/test/regress/expected/union.out b/src/test/regress/expected/union.out

index c73631a9a1d8281a4785154e4c527725f95a66e6..17a0dea719563d2a2fbae21fa7b4a55586a786ec 100644 (file)
--- a/src/test/regress/expected/union.out
+++ b/src/test/regress/expected/union.out
@@ -370,17 +370,13 @@ select count(*) from
  explain (costs off)
  select count(*) from
    ( select unique1 from tenk1 intersect select fivethous from tenk1 ) ss;
-                                     QUERY PLAN                                     
-------------------------------------------------------------------------------------
+                            QUERY PLAN                            
+------------------------------------------------------------------
   Aggregate
-   ->  Subquery Scan on ss
-         ->  HashSetOp Intersect
-               ->  Append
-                     ->  Subquery Scan on "*SELECT* 2"
-                           ->  Seq Scan on tenk1
-                     ->  Subquery Scan on "*SELECT* 1"
-                           ->  Index Only Scan using tenk1_unique1 on tenk1 tenk1_1
-(8 rows)
+   ->  HashSetOp Intersect
+         ->  Seq Scan on tenk1
+         ->  Index Only Scan using tenk1_unique1 on tenk1 tenk1_1
+(4 rows)
  
  select count(*) from
    ( select unique1 from tenk1 intersect select fivethous from tenk1 ) ss;
@@ -391,16 +387,13 @@ select count(*) from
  
  explain (costs off)
  select unique1 from tenk1 except select unique2 from tenk1 where unique2 != 10;
-                               QUERY PLAN                               
-------------------------------------------------------------------------
+                         QUERY PLAN                         
+------------------------------------------------------------
   HashSetOp Except
-   ->  Append
-         ->  Subquery Scan on "*SELECT* 1"
-               ->  Index Only Scan using tenk1_unique1 on tenk1
-         ->  Subquery Scan on "*SELECT* 2"
-               ->  Index Only Scan using tenk1_unique2 on tenk1 tenk1_1
-                     Filter: (unique2 <> 10)
-(7 rows)
+   ->  Index Only Scan using tenk1_unique1 on tenk1
+   ->  Index Only Scan using tenk1_unique2 on tenk1 tenk1_1
+         Filter: (unique2 <> 10)
+(4 rows)
  
  select unique1 from tenk1 except select unique2 from tenk1 where unique2 != 10;
   unique1 
@@ -408,6 +401,34 @@ select unique1 from tenk1 except select unique2 from tenk1 where unique2 != 10;
        10
  (1 row)
  
+-- the hashed implementation is sensitive to child plans' tuple slot types
+explain (costs off)
+select * from int8_tbl intersect select q2, q1 from int8_tbl order by 1, 2;
+                 QUERY PLAN                  
+---------------------------------------------
+ Sort
+   Sort Key: int8_tbl.q1, int8_tbl.q2
+   ->  HashSetOp Intersect
+         ->  Seq Scan on int8_tbl
+         ->  Seq Scan on int8_tbl int8_tbl_1
+(5 rows)
+
+select * from int8_tbl intersect select q2, q1 from int8_tbl order by 1, 2;
+        q1        |        q2        
+------------------+------------------
+              123 | 4567890123456789
+ 4567890123456789 |              123
+ 4567890123456789 | 4567890123456789
+(3 rows)
+
+select q2, q1 from int8_tbl intersect select * from int8_tbl order by 1, 2;
+        q2        |        q1        
+------------------+------------------
+              123 | 4567890123456789
+ 4567890123456789 |              123
+ 4567890123456789 | 4567890123456789
+(3 rows)
+
  set enable_hashagg to off;
  explain (costs off)
  select count(*) from
@@ -434,19 +455,17 @@ select count(*) from
  explain (costs off)
  select count(*) from
    ( select unique1 from tenk1 intersect select fivethous from tenk1 ) ss;
-                                        QUERY PLAN                                        
-------------------------------------------------------------------------------------------
+                               QUERY PLAN                               
+------------------------------------------------------------------------
   Aggregate
-   ->  Subquery Scan on ss
-         ->  SetOp Intersect
-               ->  Sort
-                     Sort Key: "*SELECT* 2".fivethous
-                     ->  Append
-                           ->  Subquery Scan on "*SELECT* 2"
-                                 ->  Seq Scan on tenk1
-                           ->  Subquery Scan on "*SELECT* 1"
-                                 ->  Index Only Scan using tenk1_unique1 on tenk1 tenk1_1
-(10 rows)
+   ->  SetOp Intersect
+         ->  Sort
+               Sort Key: tenk1.fivethous
+               ->  Seq Scan on tenk1
+         ->  Sort
+               Sort Key: tenk1_1.unique1
+               ->  Index Only Scan using tenk1_unique1 on tenk1 tenk1_1
+(8 rows)
  
  select count(*) from
    ( select unique1 from tenk1 intersect select fivethous from tenk1 ) ss;
@@ -457,18 +476,17 @@ select count(*) from
  
  explain (costs off)
  select unique1 from tenk1 except select unique2 from tenk1 where unique2 != 10;
-                                  QUERY PLAN                                  
-------------------------------------------------------------------------------
+                            QUERY PLAN                            
+------------------------------------------------------------------
   SetOp Except
     ->  Sort
-         Sort Key: "*SELECT* 1".unique1
-         ->  Append
-               ->  Subquery Scan on "*SELECT* 1"
-                     ->  Index Only Scan using tenk1_unique1 on tenk1
-               ->  Subquery Scan on "*SELECT* 2"
-                     ->  Index Only Scan using tenk1_unique2 on tenk1 tenk1_1
-                           Filter: (unique2 <> 10)
-(9 rows)
+         Sort Key: tenk1.unique1
+         ->  Index Only Scan using tenk1_unique1 on tenk1
+   ->  Sort
+         Sort Key: tenk1_1.unique2
+         ->  Index Only Scan using tenk1_unique2 on tenk1 tenk1_1
+               Filter: (unique2 <> 10)
+(8 rows)
  
  select unique1 from tenk1 except select unique2 from tenk1 where unique2 != 10;
   unique1 
@@ -528,15 +546,12 @@ select x from (values (array[1, 2]), (array[1, 3])) _(x) union select x from (va
  
  explain (costs off)
  select x from (values (array[1, 2]), (array[1, 3])) _(x) intersect select x from (values (array[1, 2]), (array[1, 4])) _(x);
-                  QUERY PLAN                   
------------------------------------------------
+            QUERY PLAN             
+-----------------------------------
   HashSetOp Intersect
-   ->  Append
-         ->  Subquery Scan on "*SELECT* 1"
-               ->  Values Scan on "*VALUES*"
-         ->  Subquery Scan on "*SELECT* 2"
-               ->  Values Scan on "*VALUES*_1"
-(6 rows)
+   ->  Values Scan on "*VALUES*"
+   ->  Values Scan on "*VALUES*_1"
+(3 rows)
  
  select x from (values (array[1, 2]), (array[1, 3])) _(x) intersect select x from (values (array[1, 2]), (array[1, 4])) _(x);
     x   
@@ -546,15 +561,12 @@ select x from (values (array[1, 2]), (array[1, 3])) _(x) intersect select x from
  
  explain (costs off)
  select x from (values (array[1, 2]), (array[1, 3])) _(x) except select x from (values (array[1, 2]), (array[1, 4])) _(x);
-                  QUERY PLAN                   
------------------------------------------------
+            QUERY PLAN             
+-----------------------------------
   HashSetOp Except
-   ->  Append
-         ->  Subquery Scan on "*SELECT* 1"
-               ->  Values Scan on "*VALUES*"
-         ->  Subquery Scan on "*SELECT* 2"
-               ->  Values Scan on "*VALUES*_1"
-(6 rows)
+   ->  Values Scan on "*VALUES*"
+   ->  Values Scan on "*VALUES*_1"
+(3 rows)
  
  select x from (values (array[1, 2]), (array[1, 3])) _(x) except select x from (values (array[1, 2]), (array[1, 4])) _(x);
     x   
@@ -606,17 +618,16 @@ select x from (values (array[1, 2]), (array[1, 3])) _(x) union select x from (va
  
  explain (costs off)
  select x from (values (array[1, 2]), (array[1, 3])) _(x) intersect select x from (values (array[1, 2]), (array[1, 4])) _(x);
-                     QUERY PLAN                      
------------------------------------------------------
+               QUERY PLAN                
+-----------------------------------------
   SetOp Intersect
     ->  Sort
-         Sort Key: "*SELECT* 1".x
-         ->  Append
-               ->  Subquery Scan on "*SELECT* 1"
-                     ->  Values Scan on "*VALUES*"
-               ->  Subquery Scan on "*SELECT* 2"
-                     ->  Values Scan on "*VALUES*_1"
-(8 rows)
+         Sort Key: "*VALUES*".column1
+         ->  Values Scan on "*VALUES*"
+   ->  Sort
+         Sort Key: "*VALUES*_1".column1
+         ->  Values Scan on "*VALUES*_1"
+(7 rows)
  
  select x from (values (array[1, 2]), (array[1, 3])) _(x) intersect select x from (values (array[1, 2]), (array[1, 4])) _(x);
     x   
@@ -626,17 +637,16 @@ select x from (values (array[1, 2]), (array[1, 3])) _(x) intersect select x from
  
  explain (costs off)
  select x from (values (array[1, 2]), (array[1, 3])) _(x) except select x from (values (array[1, 2]), (array[1, 4])) _(x);
-                     QUERY PLAN                      
------------------------------------------------------
+               QUERY PLAN                
+-----------------------------------------
   SetOp Except
     ->  Sort
-         Sort Key: "*SELECT* 1".x
-         ->  Append
-               ->  Subquery Scan on "*SELECT* 1"
-                     ->  Values Scan on "*VALUES*"
-               ->  Subquery Scan on "*SELECT* 2"
-                     ->  Values Scan on "*VALUES*_1"
-(8 rows)
+         Sort Key: "*VALUES*".column1
+         ->  Values Scan on "*VALUES*"
+   ->  Sort
+         Sort Key: "*VALUES*_1".column1
+         ->  Values Scan on "*VALUES*_1"
+(7 rows)
  
  select x from (values (array[1, 2]), (array[1, 3])) _(x) except select x from (values (array[1, 2]), (array[1, 4])) _(x);
     x   
@@ -669,17 +679,16 @@ select x from (values (row(1, 2)), (row(1, 3))) _(x) union select x from (values
  
  explain (costs off)
  select x from (values (row(1, 2)), (row(1, 3))) _(x) intersect select x from (values (row(1, 2)), (row(1, 4))) _(x);
-                     QUERY PLAN                      
------------------------------------------------------
+               QUERY PLAN                
+-----------------------------------------
   SetOp Intersect
     ->  Sort
-         Sort Key: "*SELECT* 1".x
-         ->  Append
-               ->  Subquery Scan on "*SELECT* 1"
-                     ->  Values Scan on "*VALUES*"
-               ->  Subquery Scan on "*SELECT* 2"
-                     ->  Values Scan on "*VALUES*_1"
-(8 rows)
+         Sort Key: "*VALUES*".column1
+         ->  Values Scan on "*VALUES*"
+   ->  Sort
+         Sort Key: "*VALUES*_1".column1
+         ->  Values Scan on "*VALUES*_1"
+(7 rows)
  
  select x from (values (row(1, 2)), (row(1, 3))) _(x) intersect select x from (values (row(1, 2)), (row(1, 4))) _(x);
     x   
@@ -689,17 +698,16 @@ select x from (values (row(1, 2)), (row(1, 3))) _(x) intersect select x from (va
  
  explain (costs off)
  select x from (values (row(1, 2)), (row(1, 3))) _(x) except select x from (values (row(1, 2)), (row(1, 4))) _(x);
-                     QUERY PLAN                      
------------------------------------------------------
+               QUERY PLAN                
+-----------------------------------------
   SetOp Except
     ->  Sort
-         Sort Key: "*SELECT* 1".x
-         ->  Append
-               ->  Subquery Scan on "*SELECT* 1"
-                     ->  Values Scan on "*VALUES*"
-               ->  Subquery Scan on "*SELECT* 2"
-                     ->  Values Scan on "*VALUES*_1"
-(8 rows)
+         Sort Key: "*VALUES*".column1
+         ->  Values Scan on "*VALUES*"
+   ->  Sort
+         Sort Key: "*VALUES*_1".column1
+         ->  Values Scan on "*VALUES*_1"
+(7 rows)
  
  select x from (values (row(1, 2)), (row(1, 3))) _(x) except select x from (values (row(1, 2)), (row(1, 4))) _(x);
     x   
@@ -777,17 +785,16 @@ select x from (values (row(1, 2)), (row(1, 3))) _(x) union select x from (values
  
  explain (costs off)
  select x from (values (row(1, 2)), (row(1, 3))) _(x) intersect select x from (values (row(1, 2)), (row(1, 4))) _(x);
-                     QUERY PLAN                      
------------------------------------------------------
+               QUERY PLAN                
+-----------------------------------------
   SetOp Intersect
     ->  Sort
-         Sort Key: "*SELECT* 1".x
-         ->  Append
-               ->  Subquery Scan on "*SELECT* 1"
-                     ->  Values Scan on "*VALUES*"
-               ->  Subquery Scan on "*SELECT* 2"
-                     ->  Values Scan on "*VALUES*_1"
-(8 rows)
+         Sort Key: "*VALUES*".column1
+         ->  Values Scan on "*VALUES*"
+   ->  Sort
+         Sort Key: "*VALUES*_1".column1
+         ->  Values Scan on "*VALUES*_1"
+(7 rows)
  
  select x from (values (row(1, 2)), (row(1, 3))) _(x) intersect select x from (values (row(1, 2)), (row(1, 4))) _(x);
     x   
@@ -797,17 +804,16 @@ select x from (values (row(1, 2)), (row(1, 3))) _(x) intersect select x from (va
  
  explain (costs off)
  select x from (values (row(1, 2)), (row(1, 3))) _(x) except select x from (values (row(1, 2)), (row(1, 4))) _(x);
-                     QUERY PLAN                      
------------------------------------------------------
+               QUERY PLAN                
+-----------------------------------------
   SetOp Except
     ->  Sort
-         Sort Key: "*SELECT* 1".x
-         ->  Append
-               ->  Subquery Scan on "*SELECT* 1"
-                     ->  Values Scan on "*VALUES*"
-               ->  Subquery Scan on "*SELECT* 2"
-                     ->  Values Scan on "*VALUES*_1"
-(8 rows)
+         Sort Key: "*VALUES*".column1
+         ->  Values Scan on "*VALUES*"
+   ->  Sort
+         Sort Key: "*VALUES*_1".column1
+         ->  Values Scan on "*VALUES*_1"
+(7 rows)
  
  select x from (values (row(1, 2)), (row(1, 3))) _(x) except select x from (values (row(1, 2)), (row(1, 4))) _(x);
     x   
@@ -970,15 +976,12 @@ set enable_sort = false;
  -- no means to disable Unique.
  explain (costs off)
  select from generate_series(1,5) intersect select from generate_series(1,3);
-                              QUERY PLAN                              
-----------------------------------------------------------------------
+                        QUERY PLAN                        
+----------------------------------------------------------
   HashSetOp Intersect
-   ->  Append
-         ->  Subquery Scan on "*SELECT* 1"
-               ->  Function Scan on generate_series
-         ->  Subquery Scan on "*SELECT* 2"
-               ->  Function Scan on generate_series generate_series_1
-(6 rows)
+   ->  Function Scan on generate_series
+   ->  Function Scan on generate_series generate_series_1
+(3 rows)
  
  select from generate_series(1,5) union all select from generate_series(1,3);
  --
@@ -1015,15 +1018,12 @@ select from generate_series(1,5) union select from generate_series(1,3);
  
  explain (costs off)
  select from generate_series(1,5) intersect select from generate_series(1,3);
-                              QUERY PLAN                              
-----------------------------------------------------------------------
+                        QUERY PLAN                        
+----------------------------------------------------------
   SetOp Intersect
-   ->  Append
-         ->  Subquery Scan on "*SELECT* 1"
-               ->  Function Scan on generate_series
-         ->  Subquery Scan on "*SELECT* 2"
-               ->  Function Scan on generate_series generate_series_1
-(6 rows)
+   ->  Function Scan on generate_series
+   ->  Function Scan on generate_series generate_series_1
+(3 rows)
  
  select from generate_series(1,5) union select from generate_series(1,3);
  --
diff --git a/src/test/regress/sql/union.sql b/src/test/regress/sql/union.sql

index f8826514e42a6bda17700193512ce0cb690d1072..e34e8a2d7e60deb9d18c72da5ce2fd9d5a97eb15 100644 (file)
--- a/src/test/regress/sql/union.sql
+++ b/src/test/regress/sql/union.sql
@@ -138,6 +138,12 @@ explain (costs off)
  select unique1 from tenk1 except select unique2 from tenk1 where unique2 != 10;
  select unique1 from tenk1 except select unique2 from tenk1 where unique2 != 10;
  
+-- the hashed implementation is sensitive to child plans' tuple slot types
+explain (costs off)
+select * from int8_tbl intersect select q2, q1 from int8_tbl order by 1, 2;
+select * from int8_tbl intersect select q2, q1 from int8_tbl order by 1, 2;
+select q2, q1 from int8_tbl intersect select * from int8_tbl order by 1, 2;
+
  set enable_hashagg to off;
  
  explain (costs off)
diff --git a/src/tools/pgindent/typedefs.list b/src/tools/pgindent/typedefs.list

index 971a150dbb214ce4db0ddc3f7083e83b3a0ad8d3..fbdb932e6b64298d5b70041dd9e5941ef7c5029e 100644 (file)
--- a/src/tools/pgindent/typedefs.list
+++ b/src/tools/pgindent/typedefs.list
@@ -2615,6 +2615,8 @@ SetOpCmd
  SetOpPath
  SetOpState
  SetOpStatePerGroup
+SetOpStatePerGroupData
+SetOpStatePerInput
  SetOpStrategy
  SetOperation
  SetOperationStmt
author	Tom Lane <tgl@sss.pgh.pa.us>
	Thu, 19 Dec 2024 21:23:45 +0000 (16:23 -0500)
committer	Tom Lane <tgl@sss.pgh.pa.us>
	Thu, 19 Dec 2024 21:23:45 +0000 (16:23 -0500)
src/backend/executor/execUtils.c		patch \| blob \| blame \| history
src/backend/executor/nodeSetOp.c		patch \| blob \| blame \| history
src/backend/optimizer/README		patch \| blob \| blame \| history
src/backend/optimizer/plan/createplan.c		patch \| blob \| blame \| history
src/backend/optimizer/prep/prepunion.c		patch \| blob \| blame \| history
src/backend/optimizer/util/pathnode.c		patch \| blob \| blame \| history
src/include/executor/executor.h		patch \| blob \| blame \| history
src/include/nodes/execnodes.h		patch \| blob \| blame \| history
src/include/nodes/pathnodes.h		patch \| blob \| blame \| history
src/include/nodes/plannodes.h		patch \| blob \| blame \| history
src/include/optimizer/pathnode.h		patch \| blob \| blame \| history
src/test/regress/expected/subselect.out		patch \| blob \| blame \| history
src/test/regress/expected/union.out		patch \| blob \| blame \| history
src/test/regress/sql/union.sql		patch \| blob \| blame \| history
src/tools/pgindent/typedefs.list		patch \| blob \| blame \| history