Improve planner's handling of SetOp plans.
authorTom Lane <tgl@sss.pgh.pa.us>
Thu, 19 Dec 2024 22:02:25 +0000 (17:02 -0500)
committerTom Lane <tgl@sss.pgh.pa.us>
Thu, 19 Dec 2024 22:02:25 +0000 (17:02 -0500)
Remove the code for inserting flag columns in the inputs of a SetOp.
That was the only reason why there would be resjunk columns in a
set-operations plan tree, so we can get rid of some code that
supported that, too.

Get rid of choose_hashed_setop() in favor of building Paths for
the hashed and sorted alternatives, and letting them fight it out
within add_path().

Remove set_operation_ordered_results_useful(), which was giving wrong
answers due to examining the wrong ancestor node: we need to examine
the immediate SetOperationStmt parent not the topmost node.  Instead
make each caller of recurse_set_operations() pass down the relevant
parent node.  (This thinko seems to have led only to wasted planning
cycles and possibly-inferior plans, not wrong query answers.  Perhaps
we should back-patch it, but I'm not doing so right now.)

Teach generate_nonunion_paths() to consider pre-sorted inputs for
sorted SetOps, rather than always generating a Sort node.

Patch by me; thanks to Richard Guo and David Rowley for review.

Discussion: https://postgr.es/m/1850138.1731549611@sss.pgh.pa.us

src/backend/optimizer/plan/planner.c
src/backend/optimizer/prep/prepunion.c
src/backend/optimizer/util/pathnode.c
src/include/optimizer/prep.h
src/test/regress/expected/subselect.out
src/test/regress/expected/union.out
src/test/regress/sql/subselect.sql
src/test/regress/sql/union.sql

index f3856c519f604f2c8c09e34fdaa1995564d4dc68..7468961b0174707adbc5c3f3833bc429dd49fdef 100644 (file)
@@ -616,7 +616,7 @@ standard_planner(Query *parse, const char *query_string, int cursorOptions,
  * setops is used for set operation subqueries to provide the subquery with
  * the context in which it's being used so that Paths correctly sorted for the
  * set operation can be generated.  NULL when not planning a set operation
- * child.
+ * child, or when a child of a set op that isn't interested in sorted input.
  *
  * Basically, this routine does the stuff that should only be done once
  * per Query object.  It then calls grouping_planner.  At one time,
@@ -1350,7 +1350,7 @@ preprocess_phv_expression(PlannerInfo *root, Expr *expr)
  * setops is used for set operation subqueries to provide the subquery with
  * the context in which it's being used so that Paths correctly sorted for the
  * set operation can be generated.  NULL when not planning a set operation
- * child.
+ * child, or when a child of a set op that isn't interested in sorted input.
  *
  * Returns nothing; the useful output is in the Paths we attach to the
  * (UPPERREL_FINAL, NULL) upperrel in *root.  In addition,
@@ -3467,8 +3467,7 @@ standard_qp_callback(PlannerInfo *root, void *extra)
                                                                          tlist);
 
        /* setting setop_pathkeys might be useful to the union planner */
-       if (qp_extra->setop != NULL &&
-               set_operation_ordered_results_useful(qp_extra->setop))
+       if (qp_extra->setop != NULL)
        {
                List       *groupClauses;
                bool            sortable;
index 1c43370005e48ecfd00b63a721a6a79600a75009..698ef601be1841affcb08867d205704294b08c1c 100644 (file)
@@ -39,9 +39,9 @@
 
 
 static RelOptInfo *recurse_set_operations(Node *setOp, PlannerInfo *root,
+                                                                                 SetOperationStmt *parentOp,
                                                                                  List *colTypes, List *colCollations,
-                                                                                 bool junkOK,
-                                                                                 int flag, List *refnames_tlist,
+                                                                                 List *refnames_tlist,
                                                                                  List **pTargetList,
                                                                                  bool *istrivial_tlist);
 static RelOptInfo *generate_recursion_path(SetOperationStmt *setOp,
@@ -64,19 +64,13 @@ static List *plan_union_children(PlannerInfo *root,
                                                                 List **tlist_list,
                                                                 List **istrivial_tlist);
 static void postprocess_setop_rel(PlannerInfo *root, RelOptInfo *rel);
-static bool choose_hashed_setop(PlannerInfo *root, List *groupClauses,
-                                                               Path *lpath, Path *rpath,
-                                                               double dNumGroups, double dNumOutputRows,
-                                                               const char *construct);
 static List *generate_setop_tlist(List *colTypes, List *colCollations,
-                                                                 int flag,
                                                                  Index varno,
                                                                  bool hack_constants,
                                                                  List *input_tlist,
                                                                  List *refnames_tlist,
                                                                  bool *trivial_tlist);
 static List *generate_append_tlist(List *colTypes, List *colCollations,
-                                                                  bool flag,
                                                                   List *input_tlists,
                                                                   List *refnames_tlist);
 static List *generate_setop_grouplist(SetOperationStmt *op, List *targetlist);
@@ -160,12 +154,11 @@ plan_set_operations(PlannerInfo *root)
                /*
                 * Recurse on setOperations tree to generate paths for set ops. The
                 * final output paths should have just the column types shown as the
-                * output from the top-level node, plus possibly resjunk working
-                * columns (we can rely on upper-level nodes to deal with that).
+                * output from the top-level node.
                 */
                setop_rel = recurse_set_operations((Node *) topop, root,
+                                                                                  NULL,        /* no parent */
                                                                                   topop->colTypes, topop->colCollations,
-                                                                                  true, -1,
                                                                                   leftmostQuery->targetList,
                                                                                   &top_tlist,
                                                                                   &trivial_tlist);
@@ -177,50 +170,36 @@ plan_set_operations(PlannerInfo *root)
        return setop_rel;
 }
 
-/*
- * set_operation_ordered_results_useful
- *             Return true if the given SetOperationStmt can be executed by utilizing
- *             paths that provide sorted input according to the setop's targetlist.
- *             Returns false when sorted paths are not any more useful than unsorted
- *             ones.
- */
-bool
-set_operation_ordered_results_useful(SetOperationStmt *setop)
-{
-       /*
-        * Paths sorted by the targetlist are useful for UNION as we can opt to
-        * MergeAppend the sorted paths then Unique them.  Ordered paths are no
-        * more useful than unordered ones for UNION ALL.
-        */
-       if (!setop->all && setop->op == SETOP_UNION)
-               return true;
-
-       /*
-        * EXCEPT / EXCEPT ALL / INTERSECT / INTERSECT ALL cannot yet utilize
-        * correctly sorted input paths.
-        */
-       return false;
-}
-
 /*
  * recurse_set_operations
  *       Recursively handle one step in a tree of set operations
  *
+ * setOp: current step (could be a SetOperationStmt or a leaf RangeTblRef)
+ * parentOp: parent step, or NULL if none (but see below)
  * colTypes: OID list of set-op's result column datatypes
  * colCollations: OID list of set-op's result column collations
- * junkOK: if true, child resjunk columns may be left in the result
- * flag: if >= 0, add a resjunk output column indicating value of flag
  * refnames_tlist: targetlist to take column names from
  *
+ * parentOp should be passed as NULL unless that step is interested in
+ * getting sorted output from this step.  ("Sorted" means "sorted according
+ * to the default btree opclasses of the result column datatypes".)
+ *
  * Returns a RelOptInfo for the subtree, as well as these output parameters:
  * *pTargetList: receives the fully-fledged tlist for the subtree's top plan
  * *istrivial_tlist: true if, and only if, datatypes between parent and child
  * match.
  *
+ * If setOp is a leaf node, this function plans the sub-query but does
+ * not populate the pathlist of the returned RelOptInfo.  The caller will
+ * generate SubqueryScan paths using useful path(s) of the subquery (see
+ * build_setop_child_paths).  But this function does build the paths for
+ * set-operation nodes.
+ *
  * The pTargetList output parameter is mostly redundant with the pathtarget
  * of the returned RelOptInfo, but for the moment we need it because much of
  * the logic in this file depends on flag columns being marked resjunk.
- * Pending a redesign of how that works, this is the easy way out.
+ * XXX Now that there are no flag columns and hence no resjunk columns, we
+ * could probably refactor this file to deal only in pathtargets.
  *
  * We don't have to care about typmods here: the only allowed difference
  * between set-op input and output typmods is input is a specific typmod
@@ -228,9 +207,9 @@ set_operation_ordered_results_useful(SetOperationStmt *setop)
  */
 static RelOptInfo *
 recurse_set_operations(Node *setOp, PlannerInfo *root,
+                                          SetOperationStmt *parentOp,
                                           List *colTypes, List *colCollations,
-                                          bool junkOK,
-                                          int flag, List *refnames_tlist,
+                                          List *refnames_tlist,
                                           List **pTargetList,
                                           bool *istrivial_tlist)
 {
@@ -245,7 +224,6 @@ recurse_set_operations(Node *setOp, PlannerInfo *root,
        {
                RangeTblRef *rtr = (RangeTblRef *) setOp;
                RangeTblEntry *rte = root->simple_rte_array[rtr->rtindex];
-               SetOperationStmt *setops;
                Query      *subquery = rte->subquery;
                PlannerInfo *subroot;
                List       *tlist;
@@ -260,15 +238,13 @@ recurse_set_operations(Node *setOp, PlannerInfo *root,
                Assert(root->plan_params == NIL);
 
                /*
-                * Pass the set operation details to the subquery_planner to have it
-                * consider generating Paths correctly ordered for the set operation.
+                * Generate a subroot and Paths for the subquery.  If we have a
+                * parentOp, pass that down to encourage subquery_planner to consider
+                * suitably-sorted Paths.
                 */
-               setops = castNode(SetOperationStmt, root->parse->setOperations);
-
-               /* Generate a subroot and Paths for the subquery */
                subroot = rel->subroot = subquery_planner(root->glob, subquery, root,
                                                                                                  false, root->tuple_fraction,
-                                                                                                 setops);
+                                                                                                 parentOp);
 
                /*
                 * It should not be possible for the primitive query to contain any
@@ -279,7 +255,6 @@ recurse_set_operations(Node *setOp, PlannerInfo *root,
 
                /* Figure out the appropriate target list for this subquery. */
                tlist = generate_setop_tlist(colTypes, colCollations,
-                                                                        flag,
                                                                         rtr->rtindex,
                                                                         true,
                                                                         subroot->processed_tlist,
@@ -318,16 +293,14 @@ recurse_set_operations(Node *setOp, PlannerInfo *root,
                 * generate_append_tlist() or generate_setop_tlist(), this will work.
                 * We just tell generate_setop_tlist() to use varno 0.
                 */
-               if (flag >= 0 ||
-                       !tlist_same_datatypes(*pTargetList, colTypes, junkOK) ||
-                       !tlist_same_collations(*pTargetList, colCollations, junkOK))
+               if (!tlist_same_datatypes(*pTargetList, colTypes, false) ||
+                       !tlist_same_collations(*pTargetList, colCollations, false))
                {
                        PathTarget *target;
                        bool            trivial_tlist;
                        ListCell   *lc;
 
                        *pTargetList = generate_setop_tlist(colTypes, colCollations,
-                                                                                               flag,
                                                                                                0,
                                                                                                false,
                                                                                                *pTargetList,
@@ -410,8 +383,8 @@ generate_recursion_path(SetOperationStmt *setOp, PlannerInfo *root,
         * separately without any intention of combining them into one Append.
         */
        lrel = recurse_set_operations(setOp->larg, root,
+                                                                 NULL, /* no value in sorted results */
                                                                  setOp->colTypes, setOp->colCollations,
-                                                                 false, -1,
                                                                  refnames_tlist,
                                                                  &lpath_tlist,
                                                                  &lpath_trivial_tlist);
@@ -422,8 +395,8 @@ generate_recursion_path(SetOperationStmt *setOp, PlannerInfo *root,
        /* The right path will want to look at the left one ... */
        root->non_recursive_path = lpath;
        rrel = recurse_set_operations(setOp->rarg, root,
+                                                                 NULL, /* no value in sorted results */
                                                                  setOp->colTypes, setOp->colCollations,
-                                                                 false, -1,
                                                                  refnames_tlist,
                                                                  &rpath_tlist,
                                                                  &rpath_trivial_tlist);
@@ -436,7 +409,7 @@ generate_recursion_path(SetOperationStmt *setOp, PlannerInfo *root,
        /*
         * Generate tlist for RecursiveUnion path node --- same as in Append cases
         */
-       tlist = generate_append_tlist(setOp->colTypes, setOp->colCollations, false,
+       tlist = generate_append_tlist(setOp->colTypes, setOp->colCollations,
                                                                  list_make2(lpath_tlist, rpath_tlist),
                                                                  refnames_tlist);
 
@@ -495,6 +468,10 @@ generate_recursion_path(SetOperationStmt *setOp, PlannerInfo *root,
  * build_setop_child_paths
  *             Build paths for the set op child relation denoted by 'rel'.
  *
+ * 'rel' is an RTE_SUBQUERY relation.  We have already generated paths within
+ * the subquery's subroot; the task here is to create SubqueryScan paths for
+ * 'rel', representing scans of the useful subquery paths.
+ *
  * interesting_pathkeys: if not NIL, also include paths that suit these
  * pathkeys, sorting any unsorted paths as required.
  * *pNumGroups: if not NULL, we estimate the number of distinct groups
@@ -736,7 +713,7 @@ generate_union_paths(SetOperationStmt *op, PlannerInfo *root,
         * concerned, but we must make it look real anyway for the benefit of the
         * next plan level up.
         */
-       tlist = generate_append_tlist(op->colTypes, op->colCollations, false,
+       tlist = generate_append_tlist(op->colTypes, op->colCollations,
                                                                  tlist_list, refnames_tlist);
        *pTargetList = tlist;
 
@@ -1033,11 +1010,13 @@ generate_nonunion_paths(SetOperationStmt *op, PlannerInfo *root,
        bool            lpath_trivial_tlist,
                                rpath_trivial_tlist,
                                result_trivial_tlist;
+       List       *nonunion_pathkeys = NIL;
        double          dLeftGroups,
                                dRightGroups,
                                dNumGroups,
                                dNumOutputRows;
-       bool            use_hash;
+       bool            can_sort;
+       bool            can_hash;
        SetOpCmd        cmd;
 
        /*
@@ -1047,26 +1026,69 @@ generate_nonunion_paths(SetOperationStmt *op, PlannerInfo *root,
 
        /* Recurse on children */
        lrel = recurse_set_operations(op->larg, root,
+                                                                 op,
                                                                  op->colTypes, op->colCollations,
-                                                                 false, -1,
                                                                  refnames_tlist,
                                                                  &lpath_tlist,
                                                                  &lpath_trivial_tlist);
-       if (lrel->rtekind == RTE_SUBQUERY)
-               build_setop_child_paths(root, lrel, lpath_trivial_tlist, lpath_tlist,
-                                                               NIL, &dLeftGroups);
-       else
-               dLeftGroups = lrel->rows;
 
        rrel = recurse_set_operations(op->rarg, root,
+                                                                 op,
                                                                  op->colTypes, op->colCollations,
-                                                                 false, -1,
                                                                  refnames_tlist,
                                                                  &rpath_tlist,
                                                                  &rpath_trivial_tlist);
+
+       /*
+        * Generate tlist for SetOp plan node.
+        *
+        * The tlist for a SetOp plan isn't important so far as the SetOp is
+        * concerned, but we must make it look real anyway for the benefit of the
+        * next plan level up.
+        */
+       tlist = generate_setop_tlist(op->colTypes, op->colCollations,
+                                                                0, false, lpath_tlist, refnames_tlist,
+                                                                &result_trivial_tlist);
+
+       /* We should not have needed any type coercions in the tlist */
+       Assert(result_trivial_tlist);
+
+       *pTargetList = tlist;
+
+       /* Identify the grouping semantics */
+       groupList = generate_setop_grouplist(op, tlist);
+
+       /* Check whether the operators support sorting or hashing */
+       can_sort = grouping_is_sortable(groupList);
+       can_hash = grouping_is_hashable(groupList);
+       if (!can_sort && !can_hash)
+               ereport(ERROR,
+                               (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+               /* translator: %s is INTERSECT or EXCEPT */
+                                errmsg("could not implement %s",
+                                               (op->op == SETOP_INTERSECT) ? "INTERSECT" : "EXCEPT"),
+                                errdetail("Some of the datatypes only support hashing, while others only support sorting.")));
+
+       if (can_sort)
+       {
+               /* Determine the pathkeys for sorting by the whole target list */
+               nonunion_pathkeys = make_pathkeys_for_sortclauses(root, groupList,
+                                                                                                                 tlist);
+
+               root->query_pathkeys = nonunion_pathkeys;
+       }
+
+       /*
+        * Now that we've got all that info, we can build the child paths.
+        */
+       if (lrel->rtekind == RTE_SUBQUERY)
+               build_setop_child_paths(root, lrel, lpath_trivial_tlist, lpath_tlist,
+                                                               nonunion_pathkeys, &dLeftGroups);
+       else
+               dLeftGroups = lrel->rows;
        if (rrel->rtekind == RTE_SUBQUERY)
                build_setop_child_paths(root, rrel, rpath_trivial_tlist, rpath_tlist,
-                                                               NIL, &dRightGroups);
+                                                               nonunion_pathkeys, &dRightGroups);
        else
                dRightGroups = rrel->rows;
 
@@ -1102,30 +1124,11 @@ generate_nonunion_paths(SetOperationStmt *op, PlannerInfo *root,
        lpath = lrel->cheapest_total_path;
        rpath = rrel->cheapest_total_path;
 
-       /*
-        * Generate tlist for SetOp plan node.
-        *
-        * The tlist for a SetOp plan isn't important so far as the SetOp is
-        * concerned, but we must make it look real anyway for the benefit of the
-        * next plan level up.
-        */
-       tlist = generate_setop_tlist(op->colTypes, op->colCollations, -1,
-                                                                0, false, lpath_tlist, refnames_tlist,
-                                                                &result_trivial_tlist);
-
-       /* We should not have needed any type coercions in the tlist */
-       Assert(result_trivial_tlist);
-
-       *pTargetList = tlist;
-
        /* Build result relation. */
        result_rel = fetch_upper_rel(root, UPPERREL_SETOP,
                                                                 bms_union(lrel->relids, rrel->relids));
        result_rel->reltarget = create_pathtarget(root, tlist);
 
-       /* Identify the grouping semantics */
-       groupList = generate_setop_grouplist(op, tlist);
-
        /*
         * Estimate number of distinct groups that we'll need hashtable entries
         * for; this is the size of the left-hand input for EXCEPT, or the smaller
@@ -1144,66 +1147,106 @@ generate_nonunion_paths(SetOperationStmt *op, PlannerInfo *root,
                dNumGroups = dLeftGroups;
                dNumOutputRows = op->all ? Min(lpath->rows, rpath->rows) : dNumGroups;
        }
+       result_rel->rows = dNumOutputRows;
+
+       /* Select the SetOpCmd type */
+       switch (op->op)
+       {
+               case SETOP_INTERSECT:
+                       cmd = op->all ? SETOPCMD_INTERSECT_ALL : SETOPCMD_INTERSECT;
+                       break;
+               case SETOP_EXCEPT:
+                       cmd = op->all ? SETOPCMD_EXCEPT_ALL : SETOPCMD_EXCEPT;
+                       break;
+               default:
+                       elog(ERROR, "unrecognized set op: %d", (int) op->op);
+                       cmd = SETOPCMD_INTERSECT;       /* keep compiler quiet */
+                       break;
+       }
 
        /*
-        * Decide whether to hash or sort, and add sort nodes if needed.
+        * If we can hash, that just requires a SetOp atop the cheapest inputs.
         */
-       use_hash = choose_hashed_setop(root, groupList, lpath, rpath,
-                                                                  dNumGroups, dNumOutputRows,
-                                                                  (op->op == SETOP_INTERSECT) ? "INTERSECT" : "EXCEPT");
+       if (can_hash)
+       {
+               path = (Path *) create_setop_path(root,
+                                                                                 result_rel,
+                                                                                 lpath,
+                                                                                 rpath,
+                                                                                 cmd,
+                                                                                 SETOP_HASHED,
+                                                                                 groupList,
+                                                                                 dNumGroups,
+                                                                                 dNumOutputRows);
+               add_path(result_rel, path);
+       }
 
-       if (groupList && !use_hash)
+       /*
+        * If we can sort, generate the cheapest sorted input paths, and add a
+        * SetOp atop those.
+        */
+       if (can_sort)
        {
                List       *pathkeys;
+               Path       *slpath,
+                                  *srpath;
 
+               /* First the left input ... */
                pathkeys = make_pathkeys_for_sortclauses(root,
                                                                                                 groupList,
                                                                                                 lpath_tlist);
-               if (!pathkeys_contained_in(pathkeys, lpath->pathkeys))
-                       lpath = (Path *) create_sort_path(root,
-                                                                                         lpath->parent,
-                                                                                         lpath,
-                                                                                         pathkeys,
-                                                                                         -1.0);
+               if (pathkeys_contained_in(pathkeys, lpath->pathkeys))
+                       slpath = lpath;         /* cheapest path is already sorted */
+               else
+               {
+                       slpath = get_cheapest_path_for_pathkeys(lrel->pathlist,
+                                                                                                       nonunion_pathkeys,
+                                                                                                       NULL,
+                                                                                                       TOTAL_COST,
+                                                                                                       false);
+                       /* Subquery failed to produce any presorted paths? */
+                       if (slpath == NULL)
+                               slpath = (Path *) create_sort_path(root,
+                                                                                                  lpath->parent,
+                                                                                                  lpath,
+                                                                                                  pathkeys,
+                                                                                                  -1.0);
+               }
+
+               /* and now the same for the right. */
                pathkeys = make_pathkeys_for_sortclauses(root,
                                                                                                 groupList,
                                                                                                 rpath_tlist);
-               if (!pathkeys_contained_in(pathkeys, rpath->pathkeys))
-                       rpath = (Path *) create_sort_path(root,
-                                                                                         rpath->parent,
-                                                                                         rpath,
-                                                                                         pathkeys,
-                                                                                         -1.0);
-       }
+               if (pathkeys_contained_in(pathkeys, rpath->pathkeys))
+                       srpath = rpath;         /* cheapest path is already sorted */
+               else
+               {
+                       srpath = get_cheapest_path_for_pathkeys(rrel->pathlist,
+                                                                                                       nonunion_pathkeys,
+                                                                                                       NULL,
+                                                                                                       TOTAL_COST,
+                                                                                                       false);
+                       /* Subquery failed to produce any presorted paths? */
+                       if (srpath == NULL)
+                               srpath = (Path *) create_sort_path(root,
+                                                                                                  rpath->parent,
+                                                                                                  rpath,
+                                                                                                  pathkeys,
+                                                                                                  -1.0);
+               }
 
-       /*
-        * Finally, add a SetOp path node to generate the correct output.
-        */
-       switch (op->op)
-       {
-               case SETOP_INTERSECT:
-                       cmd = op->all ? SETOPCMD_INTERSECT_ALL : SETOPCMD_INTERSECT;
-                       break;
-               case SETOP_EXCEPT:
-                       cmd = op->all ? SETOPCMD_EXCEPT_ALL : SETOPCMD_EXCEPT;
-                       break;
-               default:
-                       elog(ERROR, "unrecognized set op: %d", (int) op->op);
-                       cmd = SETOPCMD_INTERSECT;       /* keep compiler quiet */
-                       break;
+               path = (Path *) create_setop_path(root,
+                                                                                 result_rel,
+                                                                                 slpath,
+                                                                                 srpath,
+                                                                                 cmd,
+                                                                                 SETOP_SORTED,
+                                                                                 groupList,
+                                                                                 dNumGroups,
+                                                                                 dNumOutputRows);
+               add_path(result_rel, path);
        }
-       path = (Path *) create_setop_path(root,
-                                                                         result_rel,
-                                                                         lpath,
-                                                                         rpath,
-                                                                         cmd,
-                                                                         use_hash ? SETOP_HASHED : SETOP_SORTED,
-                                                                         groupList,
-                                                                         dNumGroups,
-                                                                         dNumOutputRows);
-
-       result_rel->rows = path->rows;
-       add_path(result_rel, path);
+
        return result_rel;
 }
 
@@ -1259,17 +1302,15 @@ plan_union_children(PlannerInfo *root,
                /*
                 * Not same, so plan this child separately.
                 *
-                * Note we disallow any resjunk columns in child results.  This is
-                * necessary since the Append node that implements the union won't do
-                * any projection, and upper levels will get confused if some of our
-                * output tuples have junk and some don't.  This case only arises when
-                * we have an EXCEPT or INTERSECT as child, else there won't be
-                * resjunk anyway.
+                * If top_union isn't a UNION ALL, then we are interested in sorted
+                * output from the child, so pass top_union as parentOp.  Note that
+                * this isn't necessarily the child node's immediate SetOperationStmt
+                * parent, but that's fine: it's the effective parent.
                 */
                result = lappend(result, recurse_set_operations(setOp, root,
+                                                                                                               top_union->all ? NULL : top_union,
                                                                                                                top_union->colTypes,
                                                                                                                top_union->colCollations,
-                                                                                                               false, -1,
                                                                                                                refnames_tlist,
                                                                                                                &child_tlist,
                                                                                                                &trivial_tlist));
@@ -1298,121 +1339,11 @@ postprocess_setop_rel(PlannerInfo *root, RelOptInfo *rel)
        set_cheapest(rel);
 }
 
-/*
- * choose_hashed_setop - should we use hashing for a set operation?
- *
- * XXX probably this should go away: just make both paths and let
- * add_path sort it out.
- */
-static bool
-choose_hashed_setop(PlannerInfo *root, List *groupClauses,
-                                       Path *lpath, Path *rpath,
-                                       double dNumGroups, double dNumOutputRows,
-                                       const char *construct)
-{
-       int                     numGroupCols = list_length(groupClauses);
-       Size            hash_mem_limit = get_hash_memory_limit();
-       bool            can_sort;
-       bool            can_hash;
-       Size            hashentrysize;
-       Path            hashed_p;
-       Path            sorted_p;
-       double          tuple_fraction;
-
-       /* Check whether the operators support sorting or hashing */
-       can_sort = grouping_is_sortable(groupClauses);
-       can_hash = grouping_is_hashable(groupClauses);
-       if (can_hash && can_sort)
-       {
-               /* we have a meaningful choice to make, continue ... */
-       }
-       else if (can_hash)
-               return true;
-       else if (can_sort)
-               return false;
-       else
-               ereport(ERROR,
-                               (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
-               /* translator: %s is UNION, INTERSECT, or EXCEPT */
-                                errmsg("could not implement %s", construct),
-                                errdetail("Some of the datatypes only support hashing, while others only support sorting.")));
-
-       /* Prefer sorting when enable_hashagg is off */
-       if (!enable_hashagg)
-               return false;
-
-       /*
-        * Don't do it if it doesn't look like the hashtable will fit into
-        * hash_mem.
-        */
-       hashentrysize = MAXALIGN(lpath->pathtarget->width) + MAXALIGN(SizeofMinimalTupleHeader);
-
-       if (hashentrysize * dNumGroups > hash_mem_limit)
-               return false;
-
-       /*
-        * See if the estimated cost is no more than doing it the other way.
-        *
-        * We need to consider input_plan + hashagg versus input_plan + sort +
-        * group. XXX NOT TRUE: Note that the actual result plan might involve a
-        * SetOp or Unique node, not Agg or Group, but the cost estimates for Agg
-        * and Group should be close enough for our purposes here.
-        *
-        * These path variables are dummies that just hold cost fields; we don't
-        * make actual Paths for these steps.
-        */
-       cost_agg(&hashed_p, root, AGG_HASHED, NULL,
-                        numGroupCols, dNumGroups,
-                        NIL,
-                        lpath->disabled_nodes + rpath->disabled_nodes,
-                        lpath->startup_cost + rpath->startup_cost,
-                        lpath->total_cost + rpath->total_cost,
-                        lpath->rows + rpath->rows,
-                        lpath->pathtarget->width);
-
-       /*
-        * Now for the sorted case.  XXX NOT TRUE: Note that the input is *always*
-        * unsorted, since it was made by appending unrelated sub-relations
-        * together.
-        */
-       sorted_p.disabled_nodes = lpath->disabled_nodes + rpath->disabled_nodes;
-       sorted_p.startup_cost = lpath->startup_cost + rpath->startup_cost;
-       sorted_p.total_cost = lpath->total_cost + rpath->total_cost;
-       /* XXX cost_sort doesn't actually look at pathkeys, so just pass NIL */
-       cost_sort(&sorted_p, root, NIL, sorted_p.disabled_nodes,
-                         sorted_p.total_cost,
-                         lpath->rows + rpath->rows,
-                         lpath->pathtarget->width,
-                         0.0, work_mem, -1.0);
-       cost_group(&sorted_p, root, numGroupCols, dNumGroups,
-                          NIL,
-                          sorted_p.disabled_nodes,
-                          sorted_p.startup_cost, sorted_p.total_cost,
-                          lpath->rows + rpath->rows);
-
-       /*
-        * Now make the decision using the top-level tuple fraction.  First we
-        * have to convert an absolute count (LIMIT) into fractional form.
-        */
-       tuple_fraction = root->tuple_fraction;
-       if (tuple_fraction >= 1.0)
-               tuple_fraction /= dNumOutputRows;
-
-       if (compare_fractional_path_costs(&hashed_p, &sorted_p,
-                                                                         tuple_fraction) < 0)
-       {
-               /* Hashed is cheaper, so use it */
-               return true;
-       }
-       return false;
-}
-
 /*
  * Generate targetlist for a set-operation plan node
  *
  * colTypes: OID list of set-op's result column datatypes
  * colCollations: OID list of set-op's result column collations
- * flag: -1 if no flag column needed, 0 or 1 to create a const flag column
  * varno: varno to use in generated Vars
  * hack_constants: true to copy up constants (see comments in code)
  * input_tlist: targetlist of this node's input node
@@ -1421,7 +1352,6 @@ choose_hashed_setop(PlannerInfo *root, List *groupClauses,
  */
 static List *
 generate_setop_tlist(List *colTypes, List *colCollations,
-                                        int flag,
                                         Index varno,
                                         bool hack_constants,
                                         List *input_tlist,
@@ -1520,7 +1450,7 @@ generate_setop_tlist(List *colTypes, List *colCollations,
                                                          false);
 
                /*
-                * By convention, all non-resjunk columns in a setop tree have
+                * By convention, all output columns in a setop tree have
                 * ressortgroupref equal to their resno.  In some cases the ref isn't
                 * needed, but this is a cleaner way than modifying the tlist later.
                 */
@@ -1529,25 +1459,6 @@ generate_setop_tlist(List *colTypes, List *colCollations,
                tlist = lappend(tlist, tle);
        }
 
-       if (flag >= 0)
-       {
-               /* Add a resjunk flag column */
-               /* flag value is the given constant */
-               expr = (Node *) makeConst(INT4OID,
-                                                                 -1,
-                                                                 InvalidOid,
-                                                                 sizeof(int32),
-                                                                 Int32GetDatum(flag),
-                                                                 false,
-                                                                 true);
-               tle = makeTargetEntry((Expr *) expr,
-                                                         (AttrNumber) resno++,
-                                                         pstrdup("flag"),
-                                                         true);
-               tlist = lappend(tlist, tle);
-               *trivial_tlist = false; /* the extra entry makes it not trivial */
-       }
-
        return tlist;
 }
 
@@ -1556,7 +1467,6 @@ generate_setop_tlist(List *colTypes, List *colCollations,
  *
  * colTypes: OID list of set-op's result column datatypes
  * colCollations: OID list of set-op's result column collations
- * flag: true to create a flag column copied up from subplans
  * input_tlists: list of tlists for sub-plans of the Append
  * refnames_tlist: targetlist to take column names from
  *
@@ -1570,7 +1480,6 @@ generate_setop_tlist(List *colTypes, List *colCollations,
  */
 static List *
 generate_append_tlist(List *colTypes, List *colCollations,
-                                         bool flag,
                                          List *input_tlists,
                                          List *refnames_tlist)
 {
@@ -1604,8 +1513,7 @@ generate_append_tlist(List *colTypes, List *colCollations,
                {
                        TargetEntry *subtle = (TargetEntry *) lfirst(subtlistl);
 
-                       if (subtle->resjunk)
-                               continue;
+                       Assert(!subtle->resjunk);
                        Assert(curColType != NULL);
                        if (exprType((Node *) subtle->expr) == lfirst_oid(curColType))
                        {
@@ -1654,7 +1562,7 @@ generate_append_tlist(List *colTypes, List *colCollations,
                                                          false);
 
                /*
-                * By convention, all non-resjunk columns in a setop tree have
+                * By convention, all output columns in a setop tree have
                 * ressortgroupref equal to their resno.  In some cases the ref isn't
                 * needed, but this is a cleaner way than modifying the tlist later.
                 */
@@ -1663,23 +1571,6 @@ generate_append_tlist(List *colTypes, List *colCollations,
                tlist = lappend(tlist, tle);
        }
 
-       if (flag)
-       {
-               /* Add a resjunk flag column */
-               /* flag value is shown as copied up from subplan */
-               expr = (Node *) makeVar(0,
-                                                               resno,
-                                                               INT4OID,
-                                                               -1,
-                                                               InvalidOid,
-                                                               0);
-               tle = makeTargetEntry((Expr *) expr,
-                                                         (AttrNumber) resno++,
-                                                         pstrdup("flag"),
-                                                         true);
-               tlist = lappend(tlist, tle);
-       }
-
        pfree(colTypmods);
 
        return tlist;
@@ -1709,12 +1600,7 @@ generate_setop_grouplist(SetOperationStmt *op, List *targetlist)
                TargetEntry *tle = (TargetEntry *) lfirst(lt);
                SortGroupClause *sgc;
 
-               if (tle->resjunk)
-               {
-                       /* resjunk columns should not have sortgrouprefs */
-                       Assert(tle->ressortgroupref == 0);
-                       continue;                       /* ignore resjunk columns */
-               }
+               Assert(!tle->resjunk);
 
                /* non-resjunk columns should have sortgroupref = resno */
                Assert(tle->ressortgroupref == tle->resno);
index e52e4b1d6774da6041bb50bc2ec01999f427f2aa..4f74cafa259c50f43a678841fb288bb21cfdfec4 100644 (file)
@@ -3681,17 +3681,70 @@ create_setop_path(PlannerInfo *root,
        pathnode->numGroups = numGroups;
 
        /*
-        * Charge one cpu_operator_cost per comparison per input tuple. We assume
-        * all columns get compared at most of the tuples.
-        *
-        * XXX all wrong for hashing
+        * Compute cost estimates.  As things stand, we end up with the same total
+        * cost in this node for sort and hash methods, but different startup
+        * costs.  This could be refined perhaps, but it'll do for now.
         */
        pathnode->path.disabled_nodes =
                leftpath->disabled_nodes + rightpath->disabled_nodes;
-       pathnode->path.startup_cost =
-               leftpath->startup_cost + rightpath->startup_cost;
-       pathnode->path.total_cost = leftpath->total_cost + rightpath->total_cost +
-               cpu_operator_cost * (leftpath->rows + rightpath->rows) * list_length(groupList);
+       if (strategy == SETOP_SORTED)
+       {
+               /*
+                * In sorted mode, we can emit output incrementally.  Charge one
+                * cpu_operator_cost per comparison per input tuple.  Like cost_group,
+                * we assume all columns get compared at most of the tuples.
+                */
+               pathnode->path.startup_cost =
+                       leftpath->startup_cost + rightpath->startup_cost;
+               pathnode->path.total_cost =
+                       leftpath->total_cost + rightpath->total_cost +
+                       cpu_operator_cost * (leftpath->rows + rightpath->rows) * list_length(groupList);
+
+               /*
+                * Also charge a small amount per extracted tuple.  Like cost_sort,
+                * charge only operator cost not cpu_tuple_cost, since SetOp does no
+                * qual-checking or projection.
+                */
+               pathnode->path.total_cost += cpu_operator_cost * outputRows;
+       }
+       else
+       {
+               Size            hashentrysize;
+
+               /*
+                * In hashed mode, we must read all the input before we can emit
+                * anything.  Also charge comparison costs to represent the cost of
+                * hash table lookups.
+                */
+               pathnode->path.startup_cost =
+                       leftpath->total_cost + rightpath->total_cost +
+                       cpu_operator_cost * (leftpath->rows + rightpath->rows) * list_length(groupList);
+               pathnode->path.total_cost = pathnode->path.startup_cost;
+
+               /*
+                * Also charge a small amount per extracted tuple.  Like cost_sort,
+                * charge only operator cost not cpu_tuple_cost, since SetOp does no
+                * qual-checking or projection.
+                */
+               pathnode->path.total_cost += cpu_operator_cost * outputRows;
+
+               /*
+                * Mark the path as disabled if enable_hashagg is off.  While this
+                * isn't exactly a HashAgg node, it seems close enough to justify
+                * letting that switch control it.
+                */
+               if (!enable_hashagg)
+                       pathnode->path.disabled_nodes++;
+
+               /*
+                * Also disable if it doesn't look like the hashtable will fit into
+                * hash_mem.
+                */
+               hashentrysize = MAXALIGN(leftpath->pathtarget->width) +
+                       MAXALIGN(SizeofMinimalTupleHeader);
+               if (hashentrysize * numGroups > get_hash_memory_limit())
+                       pathnode->path.disabled_nodes++;
+       }
        pathnode->path.rows = outputRows;
 
        return pathnode;
index a52dec285d569970c49e702f8c0d41aa2505d279..74bb6c1d3b1928272ef4bf569eb38bf6f136dac1 100644 (file)
@@ -53,6 +53,5 @@ extern void preprocess_aggrefs(PlannerInfo *root, Node *clause);
  * prototypes for prepunion.c
  */
 extern RelOptInfo *plan_set_operations(PlannerInfo *root);
-extern bool set_operation_ordered_results_useful(SetOperationStmt *setop);
 
 #endif                                                 /* PREP_H */
index b4cd204de779cf9342ef7aefd380c5d4d7591083..ebc545e24615cad97435d650d26ae83768625781 100644 (file)
@@ -1221,8 +1221,10 @@ where o.ten = 0;
 (1 row)
 
 --
--- Test rescan of a SetOp node
+-- Test rescan of a hashed SetOp node
 --
+begin;
+set local enable_sort = off;
 explain (costs off)
 select count(*) from
   onek o cross join lateral (
@@ -1256,6 +1258,50 @@ where o.ten = 1;
    100
 (1 row)
 
+rollback;
+--
+-- Test rescan of a sorted SetOp node
+--
+begin;
+set local enable_hashagg = off;
+explain (costs off)
+select count(*) from
+  onek o cross join lateral (
+    select * from onek i1 where i1.unique1 = o.unique1
+    except
+    select * from onek i2 where i2.unique1 = o.unique2
+  ) ss
+where o.ten = 1;
+                                                                                                     QUERY PLAN                                                                                                      
+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
+ Aggregate
+   ->  Nested Loop
+         ->  Seq Scan on onek o
+               Filter: (ten = 1)
+         ->  SetOp Except
+               ->  Sort
+                     Sort Key: i1.unique1, i1.unique2, i1.two, i1.four, i1.ten, i1.twenty, i1.hundred, i1.thousand, i1.twothousand, i1.fivethous, i1.tenthous, i1.odd, i1.even, i1.stringu1, i1.stringu2, i1.string4
+                     ->  Index Scan using onek_unique1 on onek i1
+                           Index Cond: (unique1 = o.unique1)
+               ->  Sort
+                     Sort Key: i2.unique1, i2.unique2, i2.two, i2.four, i2.ten, i2.twenty, i2.hundred, i2.thousand, i2.twothousand, i2.fivethous, i2.tenthous, i2.odd, i2.even, i2.stringu1, i2.stringu2, i2.string4
+                     ->  Index Scan using onek_unique1 on onek i2
+                           Index Cond: (unique1 = o.unique2)
+(13 rows)
+
+select count(*) from
+  onek o cross join lateral (
+    select * from onek i1 where i1.unique1 = o.unique1
+    except
+    select * from onek i2 where i2.unique1 = o.unique2
+  ) ss
+where o.ten = 1;
+ count 
+-------
+   100
+(1 row)
+
+rollback;
 --
 -- Test rescan of a RecursiveUnion node
 --
index 17a0dea719563d2a2fbae21fa7b4a55586a786ec..caa8fe70a0586d1d41cc86092793ee3bb55cbc1a 100644 (file)
@@ -385,13 +385,15 @@ select count(*) from
   5000
 (1 row)
 
+-- this query will prefer a sorted setop unless we force it.
+set enable_indexscan to off;
 explain (costs off)
 select unique1 from tenk1 except select unique2 from tenk1 where unique2 != 10;
-                         QUERY PLAN                         
-------------------------------------------------------------
+           QUERY PLAN            
+---------------------------------
  HashSetOp Except
-   ->  Index Only Scan using tenk1_unique1 on tenk1
-   ->  Index Only Scan using tenk1_unique2 on tenk1 tenk1_1
+   ->  Seq Scan on tenk1
+   ->  Seq Scan on tenk1 tenk1_1
          Filter: (unique2 <> 10)
 (4 rows)
 
@@ -401,6 +403,7 @@ select unique1 from tenk1 except select unique2 from tenk1 where unique2 != 10;
       10
 (1 row)
 
+reset enable_indexscan;
 -- the hashed implementation is sensitive to child plans' tuple slot types
 explain (costs off)
 select * from int8_tbl intersect select q2, q1 from int8_tbl order by 1, 2;
@@ -455,17 +458,15 @@ select count(*) from
 explain (costs off)
 select count(*) from
   ( select unique1 from tenk1 intersect select fivethous from tenk1 ) ss;
-                               QUERY PLAN                               
-------------------------------------------------------------------------
+                            QUERY PLAN                            
+------------------------------------------------------------------
  Aggregate
    ->  SetOp Intersect
          ->  Sort
                Sort Key: tenk1.fivethous
                ->  Seq Scan on tenk1
-         ->  Sort
-               Sort Key: tenk1_1.unique1
-               ->  Index Only Scan using tenk1_unique1 on tenk1 tenk1_1
-(8 rows)
+         ->  Index Only Scan using tenk1_unique1 on tenk1 tenk1_1
+(6 rows)
 
 select count(*) from
   ( select unique1 from tenk1 intersect select fivethous from tenk1 ) ss;
@@ -476,17 +477,13 @@ select count(*) from
 
 explain (costs off)
 select unique1 from tenk1 except select unique2 from tenk1 where unique2 != 10;
-                            QUERY PLAN                            
-------------------------------------------------------------------
+                         QUERY PLAN                         
+------------------------------------------------------------
  SetOp Except
-   ->  Sort
-         Sort Key: tenk1.unique1
-         ->  Index Only Scan using tenk1_unique1 on tenk1
-   ->  Sort
-         Sort Key: tenk1_1.unique2
-         ->  Index Only Scan using tenk1_unique2 on tenk1 tenk1_1
-               Filter: (unique2 <> 10)
-(8 rows)
+   ->  Index Only Scan using tenk1_unique1 on tenk1
+   ->  Index Only Scan using tenk1_unique2 on tenk1 tenk1_1
+         Filter: (unique2 <> 10)
+(4 rows)
 
 select unique1 from tenk1 except select unique2 from tenk1 where unique2 != 10;
  unique1 
@@ -494,6 +491,20 @@ select unique1 from tenk1 except select unique2 from tenk1 where unique2 != 10;
       10
 (1 row)
 
+explain (costs off)
+select f1 from int4_tbl union all
+  (select unique1 from tenk1 union select unique2 from tenk1);
+                               QUERY PLAN                               
+------------------------------------------------------------------------
+ Append
+   ->  Seq Scan on int4_tbl
+   ->  Unique
+         ->  Merge Append
+               Sort Key: tenk1.unique1
+               ->  Index Only Scan using tenk1_unique1 on tenk1
+               ->  Index Only Scan using tenk1_unique2 on tenk1 tenk1_1
+(7 rows)
+
 reset enable_hashagg;
 -- non-hashable type
 set enable_hashagg to on;
@@ -978,7 +989,7 @@ explain (costs off)
 select from generate_series(1,5) intersect select from generate_series(1,3);
                         QUERY PLAN                        
 ----------------------------------------------------------
HashSetOp Intersect
+ SetOp Intersect
    ->  Function Scan on generate_series
    ->  Function Scan on generate_series generate_series_1
 (3 rows)
index a6ab42454f09c498eb5438bd2880421a8e644f06..6ed3636a9e46da621f41496a709b633ab09f7095 100644 (file)
@@ -638,8 +638,11 @@ select sum(ss.tst::int) from
 where o.ten = 0;
 
 --
--- Test rescan of a SetOp node
+-- Test rescan of a hashed SetOp node
 --
+begin;
+set local enable_sort = off;
+
 explain (costs off)
 select count(*) from
   onek o cross join lateral (
@@ -657,6 +660,33 @@ select count(*) from
   ) ss
 where o.ten = 1;
 
+rollback;
+
+--
+-- Test rescan of a sorted SetOp node
+--
+begin;
+set local enable_hashagg = off;
+
+explain (costs off)
+select count(*) from
+  onek o cross join lateral (
+    select * from onek i1 where i1.unique1 = o.unique1
+    except
+    select * from onek i2 where i2.unique1 = o.unique2
+  ) ss
+where o.ten = 1;
+
+select count(*) from
+  onek o cross join lateral (
+    select * from onek i1 where i1.unique1 = o.unique1
+    except
+    select * from onek i2 where i2.unique1 = o.unique2
+  ) ss
+where o.ten = 1;
+
+rollback;
+
 --
 -- Test rescan of a RecursiveUnion node
 --
index e34e8a2d7e60deb9d18c72da5ce2fd9d5a97eb15..13700a6bfc4e918b951d6829e36b970146e62dca 100644 (file)
@@ -134,10 +134,15 @@ select count(*) from
 select count(*) from
   ( select unique1 from tenk1 intersect select fivethous from tenk1 ) ss;
 
+-- this query will prefer a sorted setop unless we force it.
+set enable_indexscan to off;
+
 explain (costs off)
 select unique1 from tenk1 except select unique2 from tenk1 where unique2 != 10;
 select unique1 from tenk1 except select unique2 from tenk1 where unique2 != 10;
 
+reset enable_indexscan;
+
 -- the hashed implementation is sensitive to child plans' tuple slot types
 explain (costs off)
 select * from int8_tbl intersect select q2, q1 from int8_tbl order by 1, 2;
@@ -162,6 +167,10 @@ explain (costs off)
 select unique1 from tenk1 except select unique2 from tenk1 where unique2 != 10;
 select unique1 from tenk1 except select unique2 from tenk1 where unique2 != 10;
 
+explain (costs off)
+select f1 from int4_tbl union all
+  (select unique1 from tenk1 union select unique2 from tenk1);
+
 reset enable_hashagg;
 
 -- non-hashable type