Preparatory refactoring for parallel merge join support.
authorRobert Haas <rhaas@postgresql.org>
Tue, 7 Mar 2017 15:33:29 +0000 (10:33 -0500)
committerRobert Haas <rhaas@postgresql.org>
Tue, 7 Mar 2017 15:33:29 +0000 (10:33 -0500)
Extract the logic used by hash_inner_and_outer into a separate
function, get_cheapest_parallel_safe_total_inner, so that it can
also be used to plan parallel merge joins.

Also, add a require_parallel_safe argument to the existing function
get_cheapest_path_for_pathkeys, because parallel merge join needs
to find the cheapest path for a given set of pathkeys that is
parallel-safe, not just the cheapest one overall.

Patch by me, reviewed by Dilip Kumar.

Discussion: http://postgr.es/m/CA+TgmoYOv+dFK0MWW6366dFj_xTnohQfoBDrHyB7d1oZhrgPjA@mail.gmail.com

src/backend/optimizer/path/allpaths.c
src/backend/optimizer/path/joinpath.c
src/backend/optimizer/path/pathkeys.c
src/include/optimizer/paths.h

index 633b5c1608d0d649431d4bb18150b6a3610f9764..87a3faff09db2ff0537ed30728677fdc0338161a 100644 (file)
@@ -1447,12 +1447,14 @@ generate_mergeappend_paths(PlannerInfo *root, RelOptInfo *rel,
                get_cheapest_path_for_pathkeys(childrel->pathlist,
                                               pathkeys,
                                               NULL,
-                                              STARTUP_COST);
+                                              STARTUP_COST,
+                                              false);
            cheapest_total =
                get_cheapest_path_for_pathkeys(childrel->pathlist,
                                               pathkeys,
                                               NULL,
-                                              TOTAL_COST);
+                                              TOTAL_COST,
+                                              false);
 
            /*
             * If we can't find any paths with the right order just use the
@@ -1517,7 +1519,8 @@ get_cheapest_parameterized_child_path(PlannerInfo *root, RelOptInfo *rel,
    cheapest = get_cheapest_path_for_pathkeys(rel->pathlist,
                                              NIL,
                                              required_outer,
-                                             TOTAL_COST);
+                                             TOTAL_COST,
+                                             false);
    Assert(cheapest != NULL);
    if (bms_equal(PATH_REQ_OUTER(cheapest), required_outer))
        return cheapest;
index 99ec5834bfbf001780ad1d002d507c246a3d1cab..8bb5473330398015074714f0e84577752d51b4e5 100644 (file)
@@ -936,7 +936,8 @@ generate_mergejoin_paths(PlannerInfo *root,
        innerpath = get_cheapest_path_for_pathkeys(innerrel->pathlist,
                                                   trialsortkeys,
                                                   NULL,
-                                                  TOTAL_COST);
+                                                  TOTAL_COST,
+                                                  false);
        if (innerpath != NULL &&
            (cheapest_total_inner == NULL ||
             compare_path_costs(innerpath, cheapest_total_inner,
@@ -971,7 +972,8 @@ generate_mergejoin_paths(PlannerInfo *root,
        innerpath = get_cheapest_path_for_pathkeys(innerrel->pathlist,
                                                   trialsortkeys,
                                                   NULL,
-                                                  STARTUP_COST);
+                                                  STARTUP_COST,
+                                                  false);
        if (innerpath != NULL &&
            (cheapest_startup_inner == NULL ||
             compare_path_costs(innerpath, cheapest_startup_inner,
@@ -1517,21 +1519,8 @@ hash_inner_and_outer(PlannerInfo *root,
            if (cheapest_total_inner->parallel_safe)
                cheapest_safe_inner = cheapest_total_inner;
            else if (save_jointype != JOIN_UNIQUE_INNER)
-           {
-               ListCell   *lc;
-
-               foreach(lc, innerrel->pathlist)
-               {
-                   Path       *innerpath = (Path *) lfirst(lc);
-
-                   if (innerpath->parallel_safe &&
-                       bms_is_empty(PATH_REQ_OUTER(innerpath)))
-                   {
-                       cheapest_safe_inner = innerpath;
-                       break;
-                   }
-               }
-           }
+               cheapest_safe_inner =
+                   get_cheapest_parallel_safe_total_inner(innerrel->pathlist);
 
            if (cheapest_safe_inner != NULL)
                try_partial_hashjoin_path(root, joinrel,
index 1065b31ad107fcb431c9aec64f8ef4417e0a9d85..2c269062ecd0e8cea3d96fa0d516770a2736b19d 100644 (file)
@@ -337,11 +337,13 @@ pathkeys_contained_in(List *keys1, List *keys2)
  * 'pathkeys' represents a required ordering (in canonical form!)
  * 'required_outer' denotes allowable outer relations for parameterized paths
  * 'cost_criterion' is STARTUP_COST or TOTAL_COST
+ * 'require_parallel_safe' causes us to consider only parallel-safe paths
  */
 Path *
 get_cheapest_path_for_pathkeys(List *paths, List *pathkeys,
                               Relids required_outer,
-                              CostSelector cost_criterion)
+                              CostSelector cost_criterion,
+                              bool require_parallel_safe)
 {
    Path       *matched_path = NULL;
    ListCell   *l;
@@ -358,6 +360,9 @@ get_cheapest_path_for_pathkeys(List *paths, List *pathkeys,
            compare_path_costs(matched_path, path, cost_criterion) <= 0)
            continue;
 
+       if (require_parallel_safe && !path->parallel_safe)
+           continue;
+
        if (pathkeys_contained_in(pathkeys, path->pathkeys) &&
            bms_is_subset(PATH_REQ_OUTER(path), required_outer))
            matched_path = path;
@@ -407,6 +412,28 @@ get_cheapest_fractional_path_for_pathkeys(List *paths,
    return matched_path;
 }
 
+
+/*
+ * get_cheapest_parallel_safe_total_inner
+ *   Find the unparameterized parallel-safe path with the least total cost.
+ */
+Path *
+get_cheapest_parallel_safe_total_inner(List *paths)
+{
+   ListCell   *l;
+
+   foreach(l, paths)
+   {
+       Path       *innerpath = (Path *) lfirst(l);
+
+       if (innerpath->parallel_safe &&
+           bms_is_empty(PATH_REQ_OUTER(innerpath)))
+           return innerpath;
+   }
+
+   return NULL;
+}
+
 /****************************************************************************
  *     NEW PATHKEY FORMATION
  ****************************************************************************/
index ebda308c4166da190976ed881a00676cc386279d..bc0dcf4468f2eb1d780fb35d5a826e6bbda2ecd3 100644 (file)
@@ -182,11 +182,13 @@ extern PathKeysComparison compare_pathkeys(List *keys1, List *keys2);
 extern bool pathkeys_contained_in(List *keys1, List *keys2);
 extern Path *get_cheapest_path_for_pathkeys(List *paths, List *pathkeys,
                               Relids required_outer,
-                              CostSelector cost_criterion);
+                              CostSelector cost_criterion,
+                              bool require_parallel_safe);
 extern Path *get_cheapest_fractional_path_for_pathkeys(List *paths,
                                          List *pathkeys,
                                          Relids required_outer,
                                          double fraction);
+extern Path *get_cheapest_parallel_safe_total_inner(List *paths);
 extern List *build_index_pathkeys(PlannerInfo *root, IndexOptInfo *index,
                     ScanDirection scandir);
 extern List *build_expression_pathkey(PlannerInfo *root, Expr *expr,