Consider explicit incremental sort for mergejoins

author Richard Guo <rguo@postgresql.org>

Wed, 9 Oct 2024 08:14:42 +0000 (17:14 +0900)

committer Richard Guo <rguo@postgresql.org>

Wed, 9 Oct 2024 08:14:42 +0000 (17:14 +0900)
author Richard Guo <rguo@postgresql.org>
Wed, 9 Oct 2024 08:14:42 +0000 (17:14 +0900)
committer Richard Guo <rguo@postgresql.org>
Wed, 9 Oct 2024 08:14:42 +0000 (17:14 +0900)
diff --git a/src/backend/optimizer/path/costsize.c b/src/backend/optimizer/path/costsize.c

index e1523d15df1c7450ae6f6eb4b468a8eac698832e..c6e66e46f4a0dfe83aa813a4dc3e359639c5b8f3 100644 (file)
--- a/src/backend/optimizer/path/costsize.c
+++ b/src/backend/optimizer/path/costsize.c
@@ -3532,7 +3532,8 @@ final_cost_nestloop(PlannerInfo *root, NestPath *path,
   * join quals here, except for obtaining the scan selectivity estimate which
   * is really essential (but fortunately, use of caching keeps the cost of
   * getting that down to something reasonable).
- * We also assume that cost_sort is cheap enough to use here.
+ * We also assume that cost_sort/cost_incremental_sort is cheap enough to use
+ * here.
   *
   * 'workspace' is to be filled with startup_cost, total_cost, and perhaps
   *     other data to be used by final_cost_mergejoin
@@ -3569,7 +3570,8 @@ initial_cost_mergejoin(PlannerInfo *root, JoinCostWorkspace *workspace,
                 outerendsel,
                 innerstartsel,
                 innerendsel;
-   Path        sort_path;      /* dummy for result of cost_sort */
+   Path        sort_path;      /* dummy for result of
+                                * cost_sort/cost_incremental_sort */
  
     /* Protect some assumptions below that rowcounts aren't zero */
     if (outer_path_rows <= 0)
@@ -3682,16 +3684,54 @@ initial_cost_mergejoin(PlannerInfo *root, JoinCostWorkspace *workspace,
  
     if (outersortkeys)          /* do we need to sort outer? */
     {
-       cost_sort(&sort_path,
-                 root,
-                 outersortkeys,
-                 outer_path->disabled_nodes,
-                 outer_path->total_cost,
-                 outer_path_rows,
-                 outer_path->pathtarget->width,
-                 0.0,
-                 work_mem,
-                 -1.0);
+       bool        use_incremental_sort = false;
+       int         presorted_keys;
+
+       /*
+        * We choose to use incremental sort if it is enabled and there are
+        * presorted keys; otherwise we use full sort.
+        */
+       if (enable_incremental_sort)
+       {
+           bool        is_sorted PG_USED_FOR_ASSERTS_ONLY;
+
+           is_sorted = pathkeys_count_contained_in(outersortkeys,
+                                                   outer_path->pathkeys,
+                                                   &presorted_keys);
+           Assert(!is_sorted);
+
+           if (presorted_keys > 0)
+               use_incremental_sort = true;
+       }
+
+       if (!use_incremental_sort)
+       {
+           cost_sort(&sort_path,
+                     root,
+                     outersortkeys,
+                     outer_path->disabled_nodes,
+                     outer_path->total_cost,
+                     outer_path_rows,
+                     outer_path->pathtarget->width,
+                     0.0,
+                     work_mem,
+                     -1.0);
+       }
+       else
+       {
+           cost_incremental_sort(&sort_path,
+                                 root,
+                                 outersortkeys,
+                                 presorted_keys,
+                                 outer_path->disabled_nodes,
+                                 outer_path->startup_cost,
+                                 outer_path->total_cost,
+                                 outer_path_rows,
+                                 outer_path->pathtarget->width,
+                                 0.0,
+                                 work_mem,
+                                 -1.0);
+       }
         disabled_nodes += sort_path.disabled_nodes;
         startup_cost += sort_path.startup_cost;
         startup_cost += (sort_path.total_cost - sort_path.startup_cost)
@@ -3711,6 +3751,11 @@ initial_cost_mergejoin(PlannerInfo *root, JoinCostWorkspace *workspace,
  
     if (innersortkeys)          /* do we need to sort inner? */
     {
+       /*
+        * We do not consider incremental sort for inner path, because
+        * incremental sort does not support mark/restore.
+        */
+
         cost_sort(&sort_path,
                   root,
                   innersortkeys,
diff --git a/src/backend/optimizer/plan/createplan.c b/src/backend/optimizer/plan/createplan.c

index bb45ef318fb45d9e2223da48de4433c4ba1d2896..0d195a07ffc0cffd459923cc0ddfc06b4f4b3041 100644 (file)
--- a/src/backend/optimizer/plan/createplan.c
+++ b/src/backend/optimizer/plan/createplan.c
@@ -179,6 +179,8 @@ static void copy_generic_path_info(Plan *dest, Path *src);
  static void copy_plan_costsize(Plan *dest, Plan *src);
  static void label_sort_with_costsize(PlannerInfo *root, Sort *plan,
                                      double limit_tuples);
+static void label_incrementalsort_with_costsize(PlannerInfo *root, IncrementalSort *plan,
+                                               List *pathkeys, double limit_tuples);
  static SeqScan *make_seqscan(List *qptlist, List *qpqual, Index scanrelid);
  static SampleScan *make_samplescan(List *qptlist, List *qpqual, Index scanrelid,
                                    TableSampleClause *tsc);
@@ -4523,12 +4525,51 @@ create_mergejoin_plan(PlannerInfo *root,
     if (best_path->outersortkeys)
     {
         Relids      outer_relids = outer_path->parent->relids;
-       Sort       *sort = make_sort_from_pathkeys(outer_plan,
+       Plan       *sort_plan;
+       bool        use_incremental_sort = false;
+       int         presorted_keys;
+
+       /*
+        * We choose to use incremental sort if it is enabled and there are
+        * presorted keys; otherwise we use full sort.
+        */
+       if (enable_incremental_sort)
+       {
+           bool        is_sorted PG_USED_FOR_ASSERTS_ONLY;
+
+           is_sorted = pathkeys_count_contained_in(best_path->outersortkeys,
+                                                   outer_path->pathkeys,
+                                                   &presorted_keys);
+           Assert(!is_sorted);
+
+           if (presorted_keys > 0)
+               use_incremental_sort = true;
+       }
+
+       if (!use_incremental_sort)
+       {
+           sort_plan = (Plan *)
+               make_sort_from_pathkeys(outer_plan,
+                                       best_path->outersortkeys,
+                                       outer_relids);
+
+           label_sort_with_costsize(root, (Sort *) sort_plan, -1.0);
+       }
+       else
+       {
+           sort_plan = (Plan *)
+               make_incrementalsort_from_pathkeys(outer_plan,
                                                    best_path->outersortkeys,
-                                                  outer_relids);
+                                                  outer_relids,
+                                                  presorted_keys);
  
-       label_sort_with_costsize(root, sort, -1.0);
-       outer_plan = (Plan *) sort;
+           label_incrementalsort_with_costsize(root,
+                                               (IncrementalSort *) sort_plan,
+                                               best_path->outersortkeys,
+                                               -1.0);
+       }
+
+       outer_plan = sort_plan;
         outerpathkeys = best_path->outersortkeys;
     }
     else
@@ -4536,6 +4577,11 @@ create_mergejoin_plan(PlannerInfo *root,
  
     if (best_path->innersortkeys)
     {
+       /*
+        * We do not consider incremental sort for inner path, because
+        * incremental sort does not support mark/restore.
+        */
+
         Relids      inner_relids = inner_path->parent->relids;
         Sort       *sort = make_sort_from_pathkeys(inner_plan,
                                                    best_path->innersortkeys,
@@ -5447,10 +5493,6 @@ label_sort_with_costsize(PlannerInfo *root, Sort *plan, double limit_tuples)
     Plan       *lefttree = plan->plan.lefttree;
     Path        sort_path;      /* dummy for result of cost_sort */
  
-   /*
-    * This function shouldn't have to deal with IncrementalSort plans because
-    * they are only created from corresponding Path nodes.
-    */
     Assert(IsA(plan, Sort));
  
     cost_sort(&sort_path, root, NIL,
@@ -5469,6 +5511,37 @@ label_sort_with_costsize(PlannerInfo *root, Sort *plan, double limit_tuples)
     plan->plan.parallel_safe = lefttree->parallel_safe;
  }
  
+/*
+ * Same as label_sort_with_costsize, but labels the IncrementalSort node
+ * instead.
+ */
+static void
+label_incrementalsort_with_costsize(PlannerInfo *root, IncrementalSort *plan,
+                                   List *pathkeys, double limit_tuples)
+{
+   Plan       *lefttree = plan->sort.plan.lefttree;
+   Path        sort_path;      /* dummy for result of cost_incremental_sort */
+
+   Assert(IsA(plan, IncrementalSort));
+
+   cost_incremental_sort(&sort_path, root, pathkeys,
+                         plan->nPresortedCols,
+                         plan->sort.plan.disabled_nodes,
+                         lefttree->startup_cost,
+                         lefttree->total_cost,
+                         lefttree->plan_rows,
+                         lefttree->plan_width,
+                         0.0,
+                         work_mem,
+                         limit_tuples);
+   plan->sort.plan.startup_cost = sort_path.startup_cost;
+   plan->sort.plan.total_cost = sort_path.total_cost;
+   plan->sort.plan.plan_rows = lefttree->plan_rows;
+   plan->sort.plan.plan_width = lefttree->plan_width;
+   plan->sort.plan.parallel_aware = false;
+   plan->sort.plan.parallel_safe = lefttree->parallel_safe;
+}
+
  /*
   * bitmap_subplan_mark_shared
   *  Set isshared flag in bitmap subplan so that it will be created in
diff --git a/src/test/regress/expected/aggregates.out b/src/test/regress/expected/aggregates.out

index e14e73565675564a799248f265175af0d650c18e..495deb606e25aa8aa296927140676fb180688b6a 100644 (file)
--- a/src/test/regress/expected/aggregates.out
+++ b/src/test/regress/expected/aggregates.out
@@ -2858,29 +2858,27 @@ GROUP BY w, x, z, y;
           ->  Index Scan using btg_x_y_idx on btg
  (6 rows)
  
--- Utilize the ordering of merge join to avoid a full Sort operation
+-- Utilize the ordering of merge join to avoid a Sort operation
  SET enable_hashjoin = off;
  SET enable_nestloop = off;
  EXPLAIN (COSTS OFF)
  SELECT count(*)
-  FROM btg t1 JOIN btg t2 ON t1.z = t2.z AND t1.w = t2.w AND t1.x = t2.x
-  GROUP BY t1.x, t1.y, t1.z, t1.w;
-                                  QUERY PLAN                                   
--------------------------------------------------------------------------------
+  FROM btg t1 JOIN btg t2 ON t1.w = t2.w AND t1.x = t2.x AND t1.z = t2.z
+  GROUP BY t1.w, t1.z, t1.x;
+                               QUERY PLAN                                
+-------------------------------------------------------------------------
   GroupAggregate
-   Group Key: t1.z, t1.w, t1.x, t1.y
-   ->  Incremental Sort
-         Sort Key: t1.z, t1.w, t1.x, t1.y
-         Presorted Key: t1.z, t1.w, t1.x
-         ->  Merge Join
-               Merge Cond: ((t1.z = t2.z) AND (t1.w = t2.w) AND (t1.x = t2.x))
-               ->  Sort
-                     Sort Key: t1.z, t1.w, t1.x
-                     ->  Index Scan using btg_x_y_idx on btg t1
-               ->  Sort
-                     Sort Key: t2.z, t2.w, t2.x
-                     ->  Index Scan using btg_x_y_idx on btg t2
-(13 rows)
+   Group Key: t1.x, t1.w, t1.z
+   ->  Merge Join
+         Merge Cond: ((t1.x = t2.x) AND (t1.w = t2.w) AND (t1.z = t2.z))
+         ->  Incremental Sort
+               Sort Key: t1.x, t1.w, t1.z
+               Presorted Key: t1.x
+               ->  Index Scan using btg_x_y_idx on btg t1
+         ->  Sort
+               Sort Key: t2.x, t2.w, t2.z
+               ->  Index Scan using btg_x_y_idx on btg t2
+(11 rows)
  
  RESET enable_nestloop;
  RESET enable_hashjoin;
diff --git a/src/test/regress/expected/incremental_sort.out b/src/test/regress/expected/incremental_sort.out

index 79f0d37a87ef677da4cddbdb5959258cc85ad320..c561b62b2db2a1323ad91aee86b964a851453358 100644 (file)
--- a/src/test/regress/expected/incremental_sort.out
+++ b/src/test/regress/expected/incremental_sort.out
@@ -1701,3 +1701,24 @@ explain (costs off) select a, b, a <-> point(5, 5) dist from point_table order b
                 Order By: (a <-> '(5,5)'::point)
  (6 rows)
  
+-- Ensure we get an incremental sort on the outer side of the mergejoin
+explain (costs off)
+select * from
+  (select * from tenk1 order by four) t1 join tenk1 t2 on t1.four = t2.four and t1.two = t2.two
+order by t1.four, t1.two limit 1;
+                              QUERY PLAN                               
+-----------------------------------------------------------------------
+ Limit
+   ->  Merge Join
+         Merge Cond: ((tenk1.four = t2.four) AND (tenk1.two = t2.two))
+         ->  Incremental Sort
+               Sort Key: tenk1.four, tenk1.two
+               Presorted Key: tenk1.four
+               ->  Sort
+                     Sort Key: tenk1.four
+                     ->  Seq Scan on tenk1
+         ->  Sort
+               Sort Key: t2.four, t2.two
+               ->  Seq Scan on tenk1 t2
+(12 rows)
+
diff --git a/src/test/regress/sql/aggregates.sql b/src/test/regress/sql/aggregates.sql

index ddf38bafb4280d8ac8e5b97011d4983a8b5e7fce..4885daffe633e5f364a5347f25134268c4389334 100644 (file)
--- a/src/test/regress/sql/aggregates.sql
+++ b/src/test/regress/sql/aggregates.sql
@@ -1232,13 +1232,13 @@ EXPLAIN (COSTS OFF) SELECT count(*)
  FROM (SELECT * FROM btg ORDER BY x, y, w, z) AS q1
  GROUP BY w, x, z, y;
  
--- Utilize the ordering of merge join to avoid a full Sort operation
+-- Utilize the ordering of merge join to avoid a Sort operation
  SET enable_hashjoin = off;
  SET enable_nestloop = off;
  EXPLAIN (COSTS OFF)
  SELECT count(*)
-  FROM btg t1 JOIN btg t2 ON t1.z = t2.z AND t1.w = t2.w AND t1.x = t2.x
-  GROUP BY t1.x, t1.y, t1.z, t1.w;
+  FROM btg t1 JOIN btg t2 ON t1.w = t2.w AND t1.x = t2.x AND t1.z = t2.z
+  GROUP BY t1.w, t1.z, t1.x;
  RESET enable_nestloop;
  RESET enable_hashjoin;
  
diff --git a/src/test/regress/sql/incremental_sort.sql b/src/test/regress/sql/incremental_sort.sql

index ab471bdfffc12b3217608d8ae6a3f141626b29b3..98b20e17e180edd84a080142e8a7a361854ab264 100644 (file)
--- a/src/test/regress/sql/incremental_sort.sql
+++ b/src/test/regress/sql/incremental_sort.sql
@@ -292,3 +292,9 @@ create index point_table_a_idx on point_table using gist(a);
  -- Ensure we get an incremental sort plan for both of the following queries
  explain (costs off) select a, b, a <-> point(5, 5) dist from point_table order by dist, b limit 1;
  explain (costs off) select a, b, a <-> point(5, 5) dist from point_table order by dist, b desc limit 1;
+
+-- Ensure we get an incremental sort on the outer side of the mergejoin
+explain (costs off)
+select * from
+  (select * from tenk1 order by four) t1 join tenk1 t2 on t1.four = t2.four and t1.two = t2.two
+order by t1.four, t1.two limit 1;
author	Richard Guo <rguo@postgresql.org>
	Wed, 9 Oct 2024 08:14:42 +0000 (17:14 +0900)
committer	Richard Guo <rguo@postgresql.org>
	Wed, 9 Oct 2024 08:14:42 +0000 (17:14 +0900)
src/backend/optimizer/path/costsize.c		patch \| blob \| blame \| history
src/backend/optimizer/plan/createplan.c		patch \| blob \| blame \| history
src/test/regress/expected/aggregates.out		patch \| blob \| blame \| history
src/test/regress/expected/incremental_sort.out		patch \| blob \| blame \| history
src/test/regress/sql/aggregates.sql		patch \| blob \| blame \| history
src/test/regress/sql/incremental_sort.sql		patch \| blob \| blame \| history