Revert "Optimize order of GROUP BY keys".

author Tom Lane <tgl@sss.pgh.pa.us>

Mon, 3 Oct 2022 14:56:16 +0000 (10:56 -0400)

committer Tom Lane <tgl@sss.pgh.pa.us>

Mon, 3 Oct 2022 14:56:16 +0000 (10:56 -0400)
author Tom Lane <tgl@sss.pgh.pa.us>
Mon, 3 Oct 2022 14:56:16 +0000 (10:56 -0400)
committer Tom Lane <tgl@sss.pgh.pa.us>
Mon, 3 Oct 2022 14:56:16 +0000 (10:56 -0400)
diff --git a/contrib/postgres_fdw/expected/postgres_fdw.out b/contrib/postgres_fdw/expected/postgres_fdw.out

index 2e4e82a94f1df82ee5278bfcc272203e148d972c..cc9e39c4a5f3897f3cc974db76c0f853989fa8ef 100644 (file)
--- a/contrib/postgres_fdw/expected/postgres_fdw.out
+++ b/contrib/postgres_fdw/expected/postgres_fdw.out
@@ -2862,13 +2862,16 @@ select c2 * (random() <= 1)::int as c2 from ft2 group by c2 * (random() <= 1)::i
  -- GROUP BY clause in various forms, cardinal, alias and constant expression
  explain (verbose, costs off)
  select count(c2) w, c2 x, 5 y, 7.0 z from ft1 group by 2, y, 9.0::int order by 2;
-                                                 QUERY PLAN                                                 
-------------------------------------------------------------------------------------------------------------
- Foreign Scan
+                                      QUERY PLAN                                       
+---------------------------------------------------------------------------------------
+ Sort
     Output: (count(c2)), c2, 5, 7.0, 9
-   Relations: Aggregate on (public.ft1)
-   Remote SQL: SELECT count(c2), c2, 5, 7.0, 9 FROM "S 1"."T 1" GROUP BY 2, 3, 5 ORDER BY c2 ASC NULLS LAST
-(4 rows)
+   Sort Key: ft1.c2
+   ->  Foreign Scan
+         Output: (count(c2)), c2, 5, 7.0, 9
+         Relations: Aggregate on (public.ft1)
+         Remote SQL: SELECT count(c2), c2, 5, 7.0, 9 FROM "S 1"."T 1" GROUP BY 2, 3, 5
+(7 rows)
  
  select count(c2) w, c2 x, 5 y, 7.0 z from ft1 group by 2, y, 9.0::int order by 2;
    w  | x | y |  z  
diff --git a/doc/src/sgml/config.sgml b/doc/src/sgml/config.sgml

index d8848bc774f179ea5327b37051c66aaf43512c0c..d750290f1346da575382a824675f7cf5df6cea7d 100644 (file)
--- a/doc/src/sgml/config.sgml
+++ b/doc/src/sgml/config.sgml
@@ -5062,20 +5062,6 @@ ANY <replaceable class="parameter">num_sync</replaceable> ( <replaceable class="
        </listitem>
       </varlistentry>
  
-     <varlistentry id="guc-enable-groupby-reordering" xreflabel="enable_group_by_reordering">
-      <term><varname>enable_group_by_reordering</varname> (<type>boolean</type>)
-      <indexterm>
-       <primary><varname>enable_group_by_reordering</varname> configuration parameter</primary>
-      </indexterm>
-      </term>
-      <listitem>
-       <para>
-        Enables or disables reordering of keys in a <literal>GROUP BY</literal>
-        clause. The default is <literal>on</literal>.
-       </para>
-      </listitem>
-     </varlistentry>
-
       <varlistentry id="guc-enable-hashagg" xreflabel="enable_hashagg">
        <term><varname>enable_hashagg</varname> (<type>boolean</type>)
        <indexterm>
diff --git a/src/backend/optimizer/path/costsize.c b/src/backend/optimizer/path/costsize.c

index f486d42441b960f7f48928b772ccb381c7034f9f..5ef29eea69e0486656492a43236a49836a8cc361 100644 (file)
--- a/src/backend/optimizer/path/costsize.c
+++ b/src/backend/optimizer/path/costsize.c
@@ -1814,327 +1814,6 @@ cost_recursive_union(Path *runion, Path *nrterm, Path *rterm)
                                     rterm->pathtarget->width);
  }
  
-/*
- * is_fake_var
- *     Workaround for generate_append_tlist() which generates fake Vars with
- *     varno == 0, that will cause a fail of estimate_num_group() call
- *
- * XXX Ummm, why would estimate_num_group fail with this?
- */
-static bool
-is_fake_var(Expr *expr)
-{
-   if (IsA(expr, RelabelType))
-       expr = (Expr *) ((RelabelType *) expr)->arg;
-
-   return (IsA(expr, Var) && ((Var *) expr)->varno == 0);
-}
-
-/*
- * get_width_cost_multiplier
- *     Returns relative complexity of comparing two values based on its width.
- * The idea behind is that the comparison becomes more expensive the longer the
- * value is. Return value is in cpu_operator_cost units.
- */
-static double
-get_width_cost_multiplier(PlannerInfo *root, Expr *expr)
-{
-   double      width = -1.0;   /* fake value */
-
-   if (IsA(expr, RelabelType))
-       expr = (Expr *) ((RelabelType *) expr)->arg;
-
-   /* Try to find actual stat in corresponding relation */
-   if (IsA(expr, Var))
-   {
-       Var        *var = (Var *) expr;
-
-       if (var->varno > 0 && var->varno < root->simple_rel_array_size)
-       {
-           RelOptInfo *rel = root->simple_rel_array[var->varno];
-
-           if (rel != NULL &&
-               var->varattno >= rel->min_attr &&
-               var->varattno <= rel->max_attr)
-           {
-               int         ndx = var->varattno - rel->min_attr;
-
-               if (rel->attr_widths[ndx] > 0)
-                   width = rel->attr_widths[ndx];
-           }
-       }
-   }
-
-   /* Didn't find any actual stats, try using type width instead. */
-   if (width < 0.0)
-   {
-       Node       *node = (Node *) expr;
-
-       width = get_typavgwidth(exprType(node), exprTypmod(node));
-   }
-
-   /*
-    * Values are passed as Datum type, so comparisons can't be cheaper than
-    * comparing a Datum value.
-    *
-    * FIXME I find this reasoning questionable. We may pass int2, and
-    * comparing it is probably a bit cheaper than comparing a bigint.
-    */
-   if (width <= sizeof(Datum))
-       return 1.0;
-
-   /*
-    * We consider the cost of a comparison not to be directly proportional to
-    * width of the argument, because widths of the arguments could be
-    * slightly different (we only know the average width for the whole
-    * column). So we use log16(width) as an estimate.
-    */
-   return 1.0 + 0.125 * LOG2(width / sizeof(Datum));
-}
-
-/*
- * compute_cpu_sort_cost
- *     compute CPU cost of sort (i.e. in-memory)
- *
- * The main thing we need to calculate to estimate sort CPU costs is the number
- * of calls to the comparator functions. The difficulty is that for multi-column
- * sorts there may be different data types involved (for some of which the calls
- * may be much more expensive). Furthermore, columns may have a very different
- * number of distinct values - the higher the number, the fewer comparisons will
- * be needed for the following columns.
- *
- * The algorithm is incremental - we add pathkeys one by one, and at each step we
- * estimate the number of necessary comparisons (based on the number of distinct
- * groups in the current pathkey prefix and the new pathkey), and the comparison
- * costs (which is data type specific).
- *
- * Estimation of the number of comparisons is based on ideas from:
- *
- * "Quicksort Is Optimal", Robert Sedgewick, Jon Bentley, 2002
- * [https://www.cs.princeton.edu/~rs/talks/QuicksortIsOptimal.pdf]
- *
- * In term of that paper, let N - number of tuples, Xi - number of identical
- * tuples with value Ki, then the estimate of number of comparisons is:
- *
- * log(N! / (X1! * X2! * ..))  ~  sum(Xi * log(N/Xi))
- *
- * We assume all Xi the same because now we don't have any estimation of
- * group sizes, we have only know the estimate of number of groups (distinct
- * values). In that case, formula becomes:
- *
- * N * log(NumberOfGroups)
- *
- * For multi-column sorts we need to estimate the number of comparisons for
- * each individual column - for example with columns (c1, c2, ..., ck) we
- * can estimate that number of comparisons on ck is roughly
- *
- * ncomparisons(c1, c2, ..., ck) / ncomparisons(c1, c2, ..., c(k-1))
- *
- * Let k be a column number, Gk - number of groups defined by k columns, and Fk
- * the cost of the comparison is
- *
- * N * sum( Fk * log(Gk) )
- *
- * Note: We also consider column width, not just the comparator cost.
- *
- * NOTE: some callers currently pass NIL for pathkeys because they
- * can't conveniently supply the sort keys. In this case, it will fallback to
- * simple comparison cost estimate.
- */
-static Cost
-compute_cpu_sort_cost(PlannerInfo *root, List *pathkeys, int nPresortedKeys,
-                     Cost comparison_cost, double tuples, double output_tuples,
-                     bool heapSort)
-{
-   Cost        per_tuple_cost = 0.0;
-   ListCell   *lc;
-   List       *pathkeyExprs = NIL;
-   double      tuplesPerPrevGroup = tuples;
-   double      totalFuncCost = 1.0;
-   bool        has_fake_var = false;
-   int         i = 0;
-   Oid         prev_datatype = InvalidOid;
-   List       *cache_varinfos = NIL;
-
-   /* fallback if pathkeys is unknown */
-   if (pathkeys == NIL)
-   {
-       /*
-        * If we'll use a bounded heap-sort keeping just K tuples in memory,
-        * for a total number of tuple comparisons of N log2 K; but the
-        * constant factor is a bit higher than for quicksort. Tweak it so
-        * that the cost curve is continuous at the crossover point.
-        */
-       output_tuples = (heapSort) ? 2.0 * output_tuples : tuples;
-       per_tuple_cost += 2.0 * cpu_operator_cost * LOG2(output_tuples);
-
-       /* add cost provided by caller */
-       per_tuple_cost += comparison_cost;
-
-       return per_tuple_cost * tuples;
-   }
-
-   /*
-    * Computing total cost of sorting takes into account the per-column
-    * comparison function cost.  We try to compute the needed number of
-    * comparisons per column.
-    */
-   foreach(lc, pathkeys)
-   {
-       PathKey    *pathkey = (PathKey *) lfirst(lc);
-       EquivalenceMember *em;
-       double      nGroups,
-                   correctedNGroups;
-       Cost        funcCost = 1.0;
-
-       /*
-        * We believe that equivalence members aren't very different, so, to
-        * estimate cost we consider just the first member.
-        */
-       em = (EquivalenceMember *) linitial(pathkey->pk_eclass->ec_members);
-
-       if (em->em_datatype != InvalidOid)
-       {
-           /* do not lookup funcCost if the data type is the same */
-           if (prev_datatype != em->em_datatype)
-           {
-               Oid         sortop;
-               QualCost    cost;
-
-               sortop = get_opfamily_member(pathkey->pk_opfamily,
-                                            em->em_datatype, em->em_datatype,
-                                            pathkey->pk_strategy);
-
-               cost.startup = 0;
-               cost.per_tuple = 0;
-               add_function_cost(root, get_opcode(sortop), NULL, &cost);
-
-               /*
-                * add_function_cost returns the product of cpu_operator_cost
-                * and procost, but we need just procost, co undo that.
-                */
-               funcCost = cost.per_tuple / cpu_operator_cost;
-
-               prev_datatype = em->em_datatype;
-           }
-       }
-
-       /* factor in the width of the values in this column */
-       funcCost *= get_width_cost_multiplier(root, em->em_expr);
-
-       /* now we have per-key cost, so add to the running total */
-       totalFuncCost += funcCost;
-
-       /* remember if we have found a fake Var in pathkeys */
-       has_fake_var |= is_fake_var(em->em_expr);
-       pathkeyExprs = lappend(pathkeyExprs, em->em_expr);
-
-       /*
-        * We need to calculate the number of comparisons for this column,
-        * which requires knowing the group size. So we estimate the number of
-        * groups by calling estimate_num_groups_incremental(), which
-        * estimates the group size for "new" pathkeys.
-        *
-        * Note: estimate_num_groups_incremental does not handle fake Vars, so
-        * use a default estimate otherwise.
-        */
-       if (!has_fake_var)
-           nGroups = estimate_num_groups_incremental(root, pathkeyExprs,
-                                                     tuplesPerPrevGroup, NULL, NULL,
-                                                     &cache_varinfos,
-                                                     list_length(pathkeyExprs) - 1);
-       else if (tuples > 4.0)
-
-           /*
-            * Use geometric mean as estimation if there are no stats.
-            *
-            * We don't use DEFAULT_NUM_DISTINCT here, because that's used for
-            * a single column, but here we're dealing with multiple columns.
-            */
-           nGroups = ceil(2.0 + sqrt(tuples) * (i + 1) / list_length(pathkeys));
-       else
-           nGroups = tuples;
-
-       /*
-        * Presorted keys are not considered in the cost above, but we still
-        * do have to compare them in the qsort comparator. So make sure to
-        * factor in the cost in that case.
-        */
-       if (i >= nPresortedKeys)
-       {
-           if (heapSort)
-           {
-               /*
-                * have to keep at least one group, and a multiple of group
-                * size
-                */
-               correctedNGroups = ceil(output_tuples / tuplesPerPrevGroup);
-           }
-           else
-               /* all groups in the input */
-               correctedNGroups = nGroups;
-
-           correctedNGroups = Max(1.0, ceil(correctedNGroups));
-
-           per_tuple_cost += totalFuncCost * LOG2(correctedNGroups);
-       }
-
-       i++;
-
-       /*
-        * Uniform distributions with all groups being of the same size are
-        * the best case, with nice smooth behavior. Real-world distributions
-        * tend not to be uniform, though, and we don't have any reliable
-        * easy-to-use information. As a basic defense against skewed
-        * distributions, we use a 1.5 factor to make the expected group a bit
-        * larger, but we need to be careful not to make the group larger than
-        * in the preceding step.
-        */
-       tuplesPerPrevGroup = Min(tuplesPerPrevGroup,
-                                ceil(1.5 * tuplesPerPrevGroup / nGroups));
-
-       /*
-        * Once we get single-row group, it means tuples in the group are
-        * unique and we can skip all remaining columns.
-        */
-       if (tuplesPerPrevGroup <= 1.0)
-           break;
-   }
-
-   list_free(pathkeyExprs);
-
-   /* per_tuple_cost is in cpu_operator_cost units */
-   per_tuple_cost *= cpu_operator_cost;
-
-   /*
-    * Accordingly to "Introduction to algorithms", Thomas H. Cormen, Charles
-    * E. Leiserson, Ronald L. Rivest, ISBN 0-07-013143-0, quicksort
-    * estimation formula has additional term proportional to number of tuples
-    * (see Chapter 8.2 and Theorem 4.1). That affects cases with a low number
-    * of tuples, approximately less than 1e4. We could implement it as an
-    * additional multiplier under the logarithm, but we use a bit more
-    * complex formula which takes into account the number of unique tuples
-    * and it's not clear how to combine the multiplier with the number of
-    * groups. Estimate it as 10 cpu_operator_cost units.
-    */
-   per_tuple_cost += 10 * cpu_operator_cost;
-
-   per_tuple_cost += comparison_cost;
-
-   return tuples * per_tuple_cost;
-}
-
-/*
- * simple wrapper just to estimate best sort path
- */
-Cost
-cost_sort_estimate(PlannerInfo *root, List *pathkeys, int nPresortedKeys,
-                  double tuples)
-{
-   return compute_cpu_sort_cost(root, pathkeys, nPresortedKeys,
-                                0, tuples, tuples, false);
-}
-
  /*
   * cost_tuplesort
   *   Determines and returns the cost of sorting a relation using tuplesort,
@@ -2151,7 +1830,7 @@ cost_sort_estimate(PlannerInfo *root, List *pathkeys, int nPresortedKeys,
   * number of initial runs formed and M is the merge order used by tuplesort.c.
   * Since the average initial run should be about sort_mem, we have
   *     disk traffic = 2 * relsize * ceil(logM(p / sort_mem))
- *         and cpu cost (computed by compute_cpu_sort_cost()).
+ *     cpu = comparison_cost * t * log2(t)
   *
   * If the sort is bounded (i.e., only the first k result tuples are needed)
   * and k tuples can fit into sort_mem, we use a heap method that keeps only
@@ -2170,11 +1849,9 @@ cost_sort_estimate(PlannerInfo *root, List *pathkeys, int nPresortedKeys,
   * 'comparison_cost' is the extra cost per comparison, if any
   * 'sort_mem' is the number of kilobytes of work memory allowed for the sort
   * 'limit_tuples' is the bound on the number of output tuples; -1 if no bound
- * 'startup_cost' is expected to be 0 at input. If there is "input cost" it should
- * be added by caller later
   */
  static void
-cost_tuplesort(PlannerInfo *root, List *pathkeys, Cost *startup_cost, Cost *run_cost,
+cost_tuplesort(Cost *startup_cost, Cost *run_cost,
                double tuples, int width,
                Cost comparison_cost, int sort_mem,
                double limit_tuples)
@@ -2191,6 +1868,9 @@ cost_tuplesort(PlannerInfo *root, List *pathkeys, Cost *startup_cost, Cost *run_
     if (tuples < 2.0)
         tuples = 2.0;
  
+   /* Include the default cost-per-comparison */
+   comparison_cost += 2.0 * cpu_operator_cost;
+
     /* Do we have a useful LIMIT? */
     if (limit_tuples > 0 && limit_tuples < tuples)
     {
@@ -2214,10 +1894,12 @@ cost_tuplesort(PlannerInfo *root, List *pathkeys, Cost *startup_cost, Cost *run_
         double      log_runs;
         double      npageaccesses;
  
-       /* CPU costs */
-       *startup_cost = compute_cpu_sort_cost(root, pathkeys, 0,
-                                             comparison_cost, tuples,
-                                             tuples, false);
+       /*
+        * CPU costs
+        *
+        * Assume about N log2 N comparisons
+        */
+       *startup_cost = comparison_cost * tuples * LOG2(tuples);
  
         /* Disk costs */
  
@@ -2233,17 +1915,18 @@ cost_tuplesort(PlannerInfo *root, List *pathkeys, Cost *startup_cost, Cost *run_
     }
     else if (tuples > 2 * output_tuples || input_bytes > sort_mem_bytes)
     {
-       /* We'll use a bounded heap-sort keeping just K tuples in memory. */
-       *startup_cost = compute_cpu_sort_cost(root, pathkeys, 0,
-                                             comparison_cost, tuples,
-                                             output_tuples, true);
+       /*
+        * We'll use a bounded heap-sort keeping just K tuples in memory, for
+        * a total number of tuple comparisons of N log2 K; but the constant
+        * factor is a bit higher than for quicksort.  Tweak it so that the
+        * cost curve is continuous at the crossover point.
+        */
+       *startup_cost = comparison_cost * tuples * LOG2(2.0 * output_tuples);
     }
     else
     {
         /* We'll use plain quicksort on all the input tuples */
-       *startup_cost = compute_cpu_sort_cost(root, pathkeys, 0,
-                                             comparison_cost, tuples,
-                                             tuples, false);
+       *startup_cost = comparison_cost * tuples * LOG2(tuples);
     }
  
     /*
@@ -2276,8 +1959,8 @@ cost_incremental_sort(Path *path,
                       double input_tuples, int width, Cost comparison_cost, int sort_mem,
                       double limit_tuples)
  {
-   Cost        startup_cost,
-               run_cost,
+   Cost        startup_cost = 0,
+               run_cost = 0,
                 input_run_cost = input_total_cost - input_startup_cost;
     double      group_tuples,
                 input_groups;
@@ -2362,7 +2045,7 @@ cost_incremental_sort(Path *path,
      * pessimistic about incremental sort performance and increase its average
      * group size by half.
      */
-   cost_tuplesort(root, pathkeys, &group_startup_cost, &group_run_cost,
+   cost_tuplesort(&group_startup_cost, &group_run_cost,
                    1.5 * group_tuples, width, comparison_cost, sort_mem,
                    limit_tuples);
  
@@ -2370,7 +2053,7 @@ cost_incremental_sort(Path *path,
      * Startup cost of incremental sort is the startup cost of its first group
      * plus the cost of its input.
      */
-   startup_cost = group_startup_cost
+   startup_cost += group_startup_cost
         + input_startup_cost + group_input_run_cost;
  
     /*
@@ -2379,7 +2062,7 @@ cost_incremental_sort(Path *path,
      * group, plus the total cost to process the remaining groups, plus the
      * remaining cost of input.
      */
-   run_cost = group_run_cost
+   run_cost += group_run_cost
         + (group_run_cost + group_startup_cost) * (input_groups - 1)
         + group_input_run_cost * (input_groups - 1);
  
@@ -2419,7 +2102,7 @@ cost_sort(Path *path, PlannerInfo *root,
     Cost        startup_cost;
     Cost        run_cost;
  
-   cost_tuplesort(root, pathkeys, &startup_cost, &run_cost,
+   cost_tuplesort(&startup_cost, &run_cost,
                    tuples, width,
                    comparison_cost, sort_mem,
                    limit_tuples);
@@ -2517,7 +2200,7 @@ append_nonpartial_cost(List *subpaths, int numpaths, int parallel_workers)
   *   Determines and returns the cost of an Append node.
   */
  void
-cost_append(AppendPath *apath, PlannerInfo *root)
+cost_append(AppendPath *apath)
  {
     ListCell   *l;
  
@@ -2585,7 +2268,7 @@ cost_append(AppendPath *apath, PlannerInfo *root)
                      * any child.
                      */
                     cost_sort(&sort_path,
-                             root,
+                             NULL, /* doesn't currently need root */
                               pathkeys,
                               subpath->total_cost,
                               subpath->rows,
diff --git a/src/backend/optimizer/path/equivclass.c b/src/backend/optimizer/path/equivclass.c

index 799bdc91d095707f02d5e7004bbf39a2ad3ed93b..f962ff82ad2664f253caaf969507a60295b9d510 100644 (file)
--- a/src/backend/optimizer/path/equivclass.c
+++ b/src/backend/optimizer/path/equivclass.c
@@ -681,18 +681,7 @@ get_eclass_for_sort_expr(PlannerInfo *root,
  
             if (opcintype == cur_em->em_datatype &&
                 equal(expr, cur_em->em_expr))
-           {
-               /*
-                * Match!
-                *
-                * Copy the sortref if it wasn't set yet. That may happen if
-                * the ec was constructed from WHERE clause, i.e. it doesn't
-                * have a target reference at all.
-                */
-               if (cur_ec->ec_sortref == 0 && sortref > 0)
-                   cur_ec->ec_sortref = sortref;
-               return cur_ec;
-           }
+               return cur_ec;  /* Match! */
         }
     }
  
diff --git a/src/backend/optimizer/path/pathkeys.c b/src/backend/optimizer/path/pathkeys.c

index e2fdcd3163c2e1e99568ea65942ba5056f994d64..a9943cd6e019a31d33f89610a31848a8eac8b7ed 100644 (file)
--- a/src/backend/optimizer/path/pathkeys.c
+++ b/src/backend/optimizer/path/pathkeys.c
@@ -17,24 +17,17 @@
   */
  #include "postgres.h"
  
-#include <float.h>
-
-#include "miscadmin.h"
  #include "access/stratnum.h"
  #include "catalog/pg_opfamily.h"
  #include "nodes/makefuncs.h"
  #include "nodes/nodeFuncs.h"
  #include "nodes/plannodes.h"
-#include "optimizer/cost.h"
  #include "optimizer/optimizer.h"
  #include "optimizer/pathnode.h"
  #include "optimizer/paths.h"
  #include "partitioning/partbounds.h"
  #include "utils/lsyscache.h"
-#include "utils/selfuncs.h"
  
-/* Consider reordering of GROUP BY keys? */
-bool       enable_group_by_reordering = true;
  
  static bool pathkey_is_redundant(PathKey *new_pathkey, List *pathkeys);
  static bool matches_boolean_partition_clause(RestrictInfo *rinfo,
@@ -363,540 +356,6 @@ pathkeys_contained_in(List *keys1, List *keys2)
     return false;
  }
  
-/*
- * group_keys_reorder_by_pathkeys
- *     Reorder GROUP BY keys to match pathkeys of input path.
- *
- * Function returns new lists (pathkeys and clauses), original GROUP BY lists
- * stay untouched.
- *
- * Returns the number of GROUP BY keys with a matching pathkey.
- */
-int
-group_keys_reorder_by_pathkeys(List *pathkeys, List **group_pathkeys,
-                              List **group_clauses)
-{
-   List       *new_group_pathkeys = NIL,
-              *new_group_clauses = NIL;
-   ListCell   *lc;
-   int         n;
-
-   if (pathkeys == NIL || *group_pathkeys == NIL)
-       return 0;
-
-   /*
-    * Walk the pathkeys (determining ordering of the input path) and see if
-    * there's a matching GROUP BY key. If we find one, we append it to the
-    * list, and do the same for the clauses.
-    *
-    * Once we find the first pathkey without a matching GROUP BY key, the
-    * rest of the pathkeys are useless and can't be used to evaluate the
-    * grouping, so we abort the loop and ignore the remaining pathkeys.
-    *
-    * XXX Pathkeys are built in a way to allow simply comparing pointers.
-    */
-   foreach(lc, pathkeys)
-   {
-       PathKey    *pathkey = (PathKey *) lfirst(lc);
-       SortGroupClause *sgc;
-
-       /* abort on first mismatch */
-       if (!list_member_ptr(*group_pathkeys, pathkey))
-           break;
-
-       new_group_pathkeys = lappend(new_group_pathkeys, pathkey);
-
-       sgc = get_sortgroupref_clause(pathkey->pk_eclass->ec_sortref,
-                                     *group_clauses);
-
-       new_group_clauses = lappend(new_group_clauses, sgc);
-   }
-
-   /* remember the number of pathkeys with a matching GROUP BY key */
-   n = list_length(new_group_pathkeys);
-
-   /* append the remaining group pathkeys (will be treated as not sorted) */
-   *group_pathkeys = list_concat_unique_ptr(new_group_pathkeys,
-                                            *group_pathkeys);
-   *group_clauses = list_concat_unique_ptr(new_group_clauses,
-                                           *group_clauses);
-
-   return n;
-}
-
-/*
- * Used to generate all permutations of a pathkey list.
- */
-typedef struct PathkeyMutatorState
-{
-   List       *elemsList;
-   ListCell  **elemCells;
-   void      **elems;
-   int        *positions;
-   int         mutatorNColumns;
-   int         count;
-} PathkeyMutatorState;
-
-
-/*
- * PathkeyMutatorInit
- *     Initialize state of the permutation generator.
- *
- * We want to generate permutations of elements in the "elems" list. We may want
- * to skip some number of elements at the beginning (when treating as presorted)
- * or at the end (we only permute a limited number of group keys).
- *
- * The list is decomposed into elements, and we also keep pointers to individual
- * cells. This allows us to build the permuted list quickly and cheaply, without
- * creating any copies.
- */
-static void
-PathkeyMutatorInit(PathkeyMutatorState *state, List *elems, int start, int end)
-{
-   int         i;
-   int         n = end - start;
-   ListCell   *lc;
-
-   memset(state, 0, sizeof(*state));
-
-   state->mutatorNColumns = n;
-
-   state->elemsList = list_copy(elems);
-
-   state->elems = palloc(sizeof(void *) * n);
-   state->elemCells = palloc(sizeof(ListCell *) * n);
-   state->positions = palloc(sizeof(int) * n);
-
-   i = 0;
-   for_each_cell(lc, state->elemsList, list_nth_cell(state->elemsList, start))
-   {
-       state->elemCells[i] = lc;
-       state->elems[i] = lfirst(lc);
-       state->positions[i] = i + 1;
-       i++;
-
-       if (i >= n)
-           break;
-   }
-}
-
-/* Swap two elements of an array. */
-static void
-PathkeyMutatorSwap(int *a, int i, int j)
-{
-   int         s = a[i];
-
-   a[i] = a[j];
-   a[j] = s;
-}
-
-/*
- * Generate the next permutation of elements.
- */
-static bool
-PathkeyMutatorNextSet(int *a, int n)
-{
-   int         j,
-               k,
-               l,
-               r;
-
-   j = n - 2;
-
-   while (j >= 0 && a[j] >= a[j + 1])
-       j--;
-
-   if (j < 0)
-       return false;
-
-   k = n - 1;
-
-   while (k >= 0 && a[j] >= a[k])
-       k--;
-
-   PathkeyMutatorSwap(a, j, k);
-
-   l = j + 1;
-   r = n - 1;
-
-   while (l < r)
-       PathkeyMutatorSwap(a, l++, r--);
-
-   return true;
-}
-
-/*
- * PathkeyMutatorNext
- *     Generate the next permutation of list of elements.
- *
- * Returns the next permutation (as a list of elements) or NIL if there are no
- * more permutations.
- */
-static List *
-PathkeyMutatorNext(PathkeyMutatorState *state)
-{
-   int         i;
-
-   state->count++;
-
-   /* first permutation is original list */
-   if (state->count == 1)
-       return state->elemsList;
-
-   /* when there are no more permutations, return NIL */
-   if (!PathkeyMutatorNextSet(state->positions, state->mutatorNColumns))
-   {
-       pfree(state->elems);
-       pfree(state->elemCells);
-       pfree(state->positions);
-
-       list_free(state->elemsList);
-
-       return NIL;
-   }
-
-   /* update the list cells to point to the right elements */
-   for (i = 0; i < state->mutatorNColumns; i++)
-       lfirst(state->elemCells[i]) =
-           (void *) state->elems[state->positions[i] - 1];
-
-   return state->elemsList;
-}
-
-/*
- * Cost of comparing pathkeys.
- */
-typedef struct PathkeySortCost
-{
-   Cost        cost;
-   PathKey    *pathkey;
-} PathkeySortCost;
-
-static int
-pathkey_sort_cost_comparator(const void *_a, const void *_b)
-{
-   const PathkeySortCost *a = (PathkeySortCost *) _a;
-   const PathkeySortCost *b = (PathkeySortCost *) _b;
-
-   if (a->cost < b->cost)
-       return -1;
-   else if (a->cost == b->cost)
-       return 0;
-   return 1;
-}
-
-/*
- * get_cheapest_group_keys_order
- *     Reorders the group pathkeys / clauses to minimize the comparison cost.
- *
- * Given the list of pathkeys in '*group_pathkeys', we try to arrange these
- * in an order that minimizes the sort costs that will be incurred by the
- * GROUP BY.  The costs mainly depend on the cost of the sort comparator
- * function(s) and the number of distinct values in each column of the GROUP
- * BY clause (*group_clauses).  Sorting on subsequent columns is only required
- * for tiebreak situations where two values sort equally.
- *
- * In case the input is partially sorted, only the remaining pathkeys are
- * considered.  'n_preordered' denotes how many of the leading *group_pathkeys
- * the input is presorted by.
- *
- * Returns true and sets *group_pathkeys and *group_clauses to the newly
- * ordered versions of the lists that were passed in via these parameters.
- * If no reordering was deemed necessary then we return false, in which case
- * the *group_pathkeys and *group_clauses lists are left untouched. The
- * original *group_pathkeys and *group_clauses parameter values are never
- * destructively modified in place.
- */
-static bool
-get_cheapest_group_keys_order(PlannerInfo *root, double nrows,
-                             List **group_pathkeys, List **group_clauses,
-                             int n_preordered)
-{
-   List       *new_group_pathkeys = NIL,
-              *new_group_clauses = NIL,
-              *var_group_pathkeys;
-
-   ListCell   *cell;
-   PathkeyMutatorState mstate;
-   double      cheapest_sort_cost = DBL_MAX;
-
-   int         nFreeKeys;
-   int         nToPermute;
-
-   /* If there are less than 2 unsorted pathkeys, we're done. */
-   if (list_length(*group_pathkeys) - n_preordered < 2)
-       return false;
-
-   /*
-    * We could exhaustively cost all possible orderings of the pathkeys, but
-    * for a large number of pathkeys it might be prohibitively expensive. So
-    * we try to apply simple cheap heuristics first - we sort the pathkeys by
-    * sort cost (as if the pathkey was sorted independently) and then check
-    * only the four cheapest pathkeys. The remaining pathkeys are kept
-    * ordered by cost.
-    *
-    * XXX This is a very simple heuristics, but likely to work fine for most
-    * cases (because the number of GROUP BY clauses tends to be lower than
-    * 4). But it ignores how the number of distinct values in each pathkey
-    * affects the following steps. It might be better to use "more expensive"
-    * pathkey first if it has many distinct values, because it then limits
-    * the number of comparisons for the remaining pathkeys. But evaluating
-    * that is likely quite the expensive.
-    */
-   nFreeKeys = list_length(*group_pathkeys) - n_preordered;
-   nToPermute = 4;
-   if (nFreeKeys > nToPermute)
-   {
-       PathkeySortCost *costs = palloc(sizeof(PathkeySortCost) * nFreeKeys);
-       PathkeySortCost *cost = costs;
-
-       /*
-        * Estimate cost for sorting individual pathkeys skipping the
-        * pre-ordered pathkeys.
-        */
-       for_each_from(cell, *group_pathkeys, n_preordered)
-       {
-           PathKey    *pathkey = (PathKey *) lfirst(cell);
-           List       *to_cost = list_make1(pathkey);
-
-           cost->pathkey = pathkey;
-           cost->cost = cost_sort_estimate(root, to_cost, 0, nrows);
-           cost++;
-
-           list_free(to_cost);
-       }
-
-       /* sort the pathkeys by sort cost in ascending order */
-       qsort(costs, nFreeKeys, sizeof(*costs), pathkey_sort_cost_comparator);
-
-       /*
-        * Rebuild the list of pathkeys - first the preordered ones, then the
-        * rest ordered by cost.
-        */
-       new_group_pathkeys = list_copy_head(*group_pathkeys, n_preordered);
-
-       for (int i = 0; i < nFreeKeys; i++)
-           new_group_pathkeys = lappend(new_group_pathkeys, costs[i].pathkey);
-
-       pfree(costs);
-   }
-   else
-   {
-       /* Copy the list, so that we can free the new list by list_free. */
-       new_group_pathkeys = list_copy(*group_pathkeys);
-       nToPermute = nFreeKeys;
-   }
-
-   Assert(list_length(new_group_pathkeys) == list_length(*group_pathkeys));
-
-   /*
-    * Generate pathkey lists with permutations of the first nToPermute
-    * pathkeys.
-    *
-    * XXX We simply calculate sort cost for each individual pathkey list, but
-    * there's room for two dynamic programming optimizations here. Firstly,
-    * we may pass the current "best" cost to cost_sort_estimate so that it
-    * can "abort" if the estimated pathkeys list exceeds it. Secondly, it
-    * could pass the return information about the position when it exceeded
-    * the cost, and we could skip all permutations with the same prefix.
-    *
-    * Imagine we've already found ordering with cost C1, and we're evaluating
-    * another ordering - cost_sort_estimate() calculates cost by adding the
-    * pathkeys one by one (more or less), and the cost only grows. If at any
-    * point it exceeds C1, it can't possibly be "better" so we can discard
-    * it. But we also know that we can discard all ordering with the same
-    * prefix, because if we're estimating (a,b,c,d) and we exceed C1 at (a,b)
-    * then the same thing will happen for any ordering with this prefix.
-    */
-   PathkeyMutatorInit(&mstate, new_group_pathkeys, n_preordered, n_preordered + nToPermute);
-
-   while ((var_group_pathkeys = PathkeyMutatorNext(&mstate)) != NIL)
-   {
-       Cost        cost;
-
-       cost = cost_sort_estimate(root, var_group_pathkeys, n_preordered, nrows);
-
-       if (cost < cheapest_sort_cost)
-       {
-           list_free(new_group_pathkeys);
-           new_group_pathkeys = list_copy(var_group_pathkeys);
-           cheapest_sort_cost = cost;
-       }
-   }
-
-   /* Reorder the group clauses according to the reordered pathkeys. */
-   foreach(cell, new_group_pathkeys)
-   {
-       PathKey    *pathkey = (PathKey *) lfirst(cell);
-
-       new_group_clauses = lappend(new_group_clauses,
-                                   get_sortgroupref_clause(pathkey->pk_eclass->ec_sortref,
-                                                           *group_clauses));
-   }
-
-   /* Just append the rest GROUP BY clauses */
-   new_group_clauses = list_concat_unique_ptr(new_group_clauses,
-                                              *group_clauses);
-
-   *group_pathkeys = new_group_pathkeys;
-   *group_clauses = new_group_clauses;
-
-   return true;
-}
-
-/*
- * get_useful_group_keys_orderings
- *     Determine which orderings of GROUP BY keys are potentially interesting.
- *
- * Returns list of PathKeyInfo items, each representing an interesting ordering
- * of GROUP BY keys. Each item stores pathkeys and clauses in matching order.
- *
- * The function considers (and keeps) multiple group by orderings:
- *
- * - the original ordering, as specified by the GROUP BY clause
- *
- * - GROUP BY keys reordered to minimize the sort cost
- *
- * - GROUP BY keys reordered to match path ordering (as much as possible), with
- *   the tail reordered to minimize the sort cost
- *
- * - GROUP BY keys to match target ORDER BY clause (as much as possible), with
- *   the tail reordered to minimize the sort cost
- *
- * There are other potentially interesting orderings (e.g. it might be best to
- * match the first ORDER BY key, order the remaining keys differently and then
- * rely on the incremental sort to fix this), but we ignore those for now. To
- * make this work we'd have to pretty much generate all possible permutations.
- */
-List *
-get_useful_group_keys_orderings(PlannerInfo *root, double nrows,
-                               List *path_pathkeys,
-                               List *group_pathkeys, List *group_clauses,
-                               List *aggregate_pathkeys)
-{
-   Query      *parse = root->parse;
-   List       *infos = NIL;
-   PathKeyInfo *info;
-   int         n_preordered = 0;
-
-   List       *pathkeys = group_pathkeys;
-   List       *clauses = group_clauses;
-
-   /* always return at least the original pathkeys/clauses */
-   info = makeNode(PathKeyInfo);
-   if (aggregate_pathkeys != NIL)
-       info->pathkeys = list_concat_copy(pathkeys, aggregate_pathkeys);
-   else
-       info->pathkeys = pathkeys;
-   info->clauses = clauses;
-
-   infos = lappend(infos, info);
-
-   /*
-    * Should we try generating alternative orderings of the group keys? If
-    * not, we produce only the order specified in the query, i.e. the
-    * optimization is effectively disabled.
-    */
-   if (!enable_group_by_reordering)
-       return infos;
-
-   /* for grouping sets we can't do any reordering */
-   if (parse->groupingSets)
-       return infos;
-
-   /*
-    * Try reordering pathkeys to minimize the sort cost, ignoring both the
-    * target ordering (ORDER BY) and ordering of the input path.
-    */
-   if (get_cheapest_group_keys_order(root, nrows, &pathkeys, &clauses,
-                                     n_preordered))
-   {
-       info = makeNode(PathKeyInfo);
-       if (aggregate_pathkeys != NIL)
-           info->pathkeys = list_concat_copy(pathkeys, aggregate_pathkeys);
-       else
-           info->pathkeys = pathkeys;
-       info->clauses = clauses;
-
-       infos = lappend(infos, info);
-   }
-
-   /*
-    * If the path is sorted in some way, try reordering the group keys to
-    * match as much of the ordering as possible - we get this sort for free
-    * (mostly).
-    *
-    * We must not do this when there are no grouping sets, because those use
-    * more complex logic to decide the ordering.
-    *
-    * XXX Isn't this somewhat redundant with presorted_keys? Actually, it's
-    * more a complement, because it allows benefiting from incremental sort
-    * as much as possible.
-    *
-    * XXX This does nothing if (n_preordered == 0). We shouldn't create the
-    * info in this case.
-    */
-   if (path_pathkeys)
-   {
-       n_preordered = group_keys_reorder_by_pathkeys(path_pathkeys,
-                                                     &pathkeys,
-                                                     &clauses);
-
-       /* reorder the tail to minimize sort cost */
-       get_cheapest_group_keys_order(root, nrows, &pathkeys, &clauses,
-                                     n_preordered);
-
-       /*
-        * reorder the tail to minimize sort cost
-        *
-        * XXX Ignore the return value - there may be nothing to reorder, in
-        * which case get_cheapest_group_keys_order returns false. But we
-        * still want to keep the keys reordered to path_pathkeys.
-        */
-       info = makeNode(PathKeyInfo);
-       if (aggregate_pathkeys != NIL)
-           info->pathkeys = list_concat_copy(pathkeys, aggregate_pathkeys);
-       else
-           info->pathkeys = pathkeys;
-       info->clauses = clauses;
-
-       infos = lappend(infos, info);
-   }
-
-   /*
-    * Try reordering pathkeys to minimize the sort cost (this time consider
-    * the ORDER BY clause, but only if set debug_group_by_match_order_by).
-    */
-   if (root->sort_pathkeys)
-   {
-       n_preordered = group_keys_reorder_by_pathkeys(root->sort_pathkeys,
-                                                     &pathkeys,
-                                                     &clauses);
-
-       /*
-        * reorder the tail to minimize sort cost
-        *
-        * XXX Ignore the return value - there may be nothing to reorder, in
-        * which case get_cheapest_group_keys_order returns false. But we
-        * still want to keep the keys reordered to sort_pathkeys.
-        */
-       get_cheapest_group_keys_order(root, nrows, &pathkeys, &clauses,
-                                     n_preordered);
-
-       /* keep the group keys reordered to match ordering of input path */
-       info = makeNode(PathKeyInfo);
-       if (aggregate_pathkeys != NIL)
-           info->pathkeys = list_concat_copy(pathkeys, aggregate_pathkeys);
-       else
-           info->pathkeys = pathkeys;
-       info->clauses = clauses;
-
-       infos = lappend(infos, info);
-   }
-
-   return infos;
-}
-
  /*
   * pathkeys_count_contained_in
   *    Same as pathkeys_contained_in, but also sets length of longest
@@ -2456,54 +1915,6 @@ pathkeys_useful_for_ordering(PlannerInfo *root, List *pathkeys)
     return n_common_pathkeys;
  }
  
-/*
- * pathkeys_useful_for_grouping
- *     Count the number of pathkeys that are useful for grouping (instead of
- *     explicit sort)
- *
- * Group pathkeys could be reordered to benefit from the ordering. The
- * ordering may not be "complete" and may require incremental sort, but that's
- * fine. So we simply count prefix pathkeys with a matching group key, and
- * stop once we find the first pathkey without a match.
- *
- * So e.g. with pathkeys (a,b,c) and group keys (a,b,e) this determines (a,b)
- * pathkeys are useful for grouping, and we might do incremental sort to get
- * path ordered by (a,b,e).
- *
- * This logic is necessary to retain paths with ordering not matching grouping
- * keys directly, without the reordering.
- *
- * Returns the length of pathkey prefix with matching group keys.
- */
-static int
-pathkeys_useful_for_grouping(PlannerInfo *root, List *pathkeys)
-{
-   ListCell   *key;
-   int         n = 0;
-
-   /* no special ordering requested for grouping */
-   if (root->group_pathkeys == NIL)
-       return 0;
-
-   /* unordered path */
-   if (pathkeys == NIL)
-       return 0;
-
-   /* walk the pathkeys and search for matching group key */
-   foreach(key, pathkeys)
-   {
-       PathKey    *pathkey = (PathKey *) lfirst(key);
-
-       /* no matching group key, we're done */
-       if (!list_member_ptr(root->group_pathkeys, pathkey))
-           break;
-
-       n++;
-   }
-
-   return n;
-}
-
  /*
   * truncate_useless_pathkeys
   *     Shorten the given pathkey list to just the useful pathkeys.
@@ -2518,9 +1929,6 @@ truncate_useless_pathkeys(PlannerInfo *root,
  
     nuseful = pathkeys_useful_for_merging(root, rel, pathkeys);
     nuseful2 = pathkeys_useful_for_ordering(root, pathkeys);
-   if (nuseful2 > nuseful)
-       nuseful = nuseful2;
-   nuseful2 = pathkeys_useful_for_grouping(root, pathkeys);
     if (nuseful2 > nuseful)
         nuseful = nuseful2;
  
@@ -2556,8 +1964,6 @@ has_useful_pathkeys(PlannerInfo *root, RelOptInfo *rel)
  {
     if (rel->joininfo != NIL || rel->has_eclass_joins)
         return true;            /* might be able to use pathkeys for merging */
-   if (root->group_pathkeys != NIL)
-       return true;            /* might be able to use pathkeys for grouping */
     if (root->query_pathkeys != NIL)
         return true;            /* might be able to use them for ordering */
     return false;               /* definitely useless */
diff --git a/src/backend/optimizer/plan/planner.c b/src/backend/optimizer/plan/planner.c

index 8014d1fd2509f17d64b5d99fc8ca8f076f1efbb9..9c5836683c69b9cc5a60b7f3356255070820564b 100644 (file)
--- a/src/backend/optimizer/plan/planner.c
+++ b/src/backend/optimizer/plan/planner.c
@@ -2758,9 +2758,8 @@ remove_useless_groupby_columns(PlannerInfo *root)
   *
   * In principle it might be interesting to consider other orderings of the
   * GROUP BY elements, which could match the sort ordering of other
- * possible plans (eg an indexscan) and thereby reduce cost.  However, we
- * don't yet have sufficient information to do that here, so that's left until
- * later in planning.  See get_useful_group_keys_orderings().
+ * possible plans (eg an indexscan) and thereby reduce cost.  We don't
+ * bother with that, though.  Hashed grouping will frequently win anyway.
   *
   * Note: we need no comparable processing of the distinctClause because
   * the parser already enforced that that matches ORDER BY.
@@ -6433,148 +6432,30 @@ add_paths_to_grouping_rel(PlannerInfo *root, RelOptInfo *input_rel,
  
     if (can_sort)
     {
-       List       *group_pathkeys;
-       List       *orderAggPathkeys;
-       int         numAggPathkeys;
-
-       numAggPathkeys = list_length(root->group_pathkeys) -
-           root->num_groupby_pathkeys;
-
-       if (numAggPathkeys > 0)
-       {
-           group_pathkeys = list_copy_head(root->group_pathkeys,
-                                           root->num_groupby_pathkeys);
-           orderAggPathkeys = list_copy_tail(root->group_pathkeys,
-                                             root->num_groupby_pathkeys);
-       }
-       else
-       {
-           group_pathkeys = root->group_pathkeys;
-           orderAggPathkeys = NIL;
-       }
-
         /*
          * Use any available suitably-sorted path as input, and also consider
          * sorting the cheapest-total path.
          */
         foreach(lc, input_rel->pathlist)
         {
-           ListCell   *lc2;
             Path       *path = (Path *) lfirst(lc);
             Path       *path_original = path;
+           bool        is_sorted;
+           int         presorted_keys;
  
-           List       *pathkey_orderings = NIL;
-
-           List       *group_clauses = parse->groupClause;
-
-           /* generate alternative group orderings that might be useful */
-           pathkey_orderings = get_useful_group_keys_orderings(root,
-                                                               path->rows,
-                                                               path->pathkeys,
-                                                               group_pathkeys,
-                                                               group_clauses,
-                                                               orderAggPathkeys);
-
-           Assert(pathkey_orderings != NIL);
+           is_sorted = pathkeys_count_contained_in(root->group_pathkeys,
+                                                   path->pathkeys,
+                                                   &presorted_keys);
  
-           /* process all potentially interesting grouping reorderings */
-           foreach(lc2, pathkey_orderings)
+           if (path == cheapest_path || is_sorted)
             {
-               bool        is_sorted;
-               int         presorted_keys = 0;
-               PathKeyInfo *info = (PathKeyInfo *) lfirst(lc2);
-
-               /* restore the path (we replace it in the loop) */
-               path = path_original;
-
-               is_sorted = pathkeys_count_contained_in(info->pathkeys,
-                                                       path->pathkeys,
-                                                       &presorted_keys);
-
-               if (path == cheapest_path || is_sorted)
-               {
-                   /* Sort the cheapest-total path if it isn't already sorted */
-                   if (!is_sorted)
-                       path = (Path *) create_sort_path(root,
-                                                        grouped_rel,
-                                                        path,
-                                                        info->pathkeys,
-                                                        -1.0);
-
-                   /* Now decide what to stick atop it */
-                   if (parse->groupingSets)
-                   {
-                       consider_groupingsets_paths(root, grouped_rel,
-                                                   path, true, can_hash,
-                                                   gd, agg_costs, dNumGroups);
-                   }
-                   else if (parse->hasAggs)
-                   {
-                       /*
-                        * We have aggregation, possibly with plain GROUP BY.
-                        * Make an AggPath.
-                        */
-                       add_path(grouped_rel, (Path *)
-                                create_agg_path(root,
-                                                grouped_rel,
-                                                path,
-                                                grouped_rel->reltarget,
-                                                info->clauses ? AGG_SORTED : AGG_PLAIN,
-                                                AGGSPLIT_SIMPLE,
-                                                info->clauses,
-                                                havingQual,
-                                                agg_costs,
-                                                dNumGroups));
-                   }
-                   else if (group_clauses)
-                   {
-                       /*
-                        * We have GROUP BY without aggregation or grouping
-                        * sets. Make a GroupPath.
-                        */
-                       add_path(grouped_rel, (Path *)
-                                create_group_path(root,
-                                                  grouped_rel,
-                                                  path,
-                                                  info->clauses,
-                                                  havingQual,
-                                                  dNumGroups));
-                   }
-                   else
-                   {
-                       /* Other cases should have been handled above */
-                       Assert(false);
-                   }
-               }
-
-               /*
-                * Now we may consider incremental sort on this path, but only
-                * when the path is not already sorted and when incremental
-                * sort is enabled.
-                */
-               if (is_sorted || !enable_incremental_sort)
-                   continue;
-
-               /* Restore the input path (we might have added Sort on top). */
-               path = path_original;
-
-               /* no shared prefix, no point in building incremental sort */
-               if (presorted_keys == 0)
-                   continue;
-
-               /*
-                * We should have already excluded pathkeys of length 1
-                * because then presorted_keys > 0 would imply is_sorted was
-                * true.
-                */
-               Assert(list_length(root->group_pathkeys) != 1);
-
-               path = (Path *) create_incremental_sort_path(root,
-                                                            grouped_rel,
-                                                            path,
-                                                            info->pathkeys,
-                                                            presorted_keys,
-                                                            -1.0);
+               /* Sort the cheapest-total path if it isn't already sorted */
+               if (!is_sorted)
+                   path = (Path *) create_sort_path(root,
+                                                    grouped_rel,
+                                                    path,
+                                                    root->group_pathkeys,
+                                                    -1.0);
  
                 /* Now decide what to stick atop it */
                 if (parse->groupingSets)
@@ -6594,9 +6475,9 @@ add_paths_to_grouping_rel(PlannerInfo *root, RelOptInfo *input_rel,
                                              grouped_rel,
                                              path,
                                              grouped_rel->reltarget,
-                                            info->clauses ? AGG_SORTED : AGG_PLAIN,
+                                            parse->groupClause ? AGG_SORTED : AGG_PLAIN,
                                              AGGSPLIT_SIMPLE,
-                                            info->clauses,
+                                            parse->groupClause,
                                              havingQual,
                                              agg_costs,
                                              dNumGroups));
@@ -6611,7 +6492,7 @@ add_paths_to_grouping_rel(PlannerInfo *root, RelOptInfo *input_rel,
                              create_group_path(root,
                                                grouped_rel,
                                                path,
-                                              info->clauses,
+                                              parse->groupClause,
                                                havingQual,
                                                dNumGroups));
                 }
@@ -6621,6 +6502,79 @@ add_paths_to_grouping_rel(PlannerInfo *root, RelOptInfo *input_rel,
                     Assert(false);
                 }
             }
+
+           /*
+            * Now we may consider incremental sort on this path, but only
+            * when the path is not already sorted and when incremental sort
+            * is enabled.
+            */
+           if (is_sorted || !enable_incremental_sort)
+               continue;
+
+           /* Restore the input path (we might have added Sort on top). */
+           path = path_original;
+
+           /* no shared prefix, no point in building incremental sort */
+           if (presorted_keys == 0)
+               continue;
+
+           /*
+            * We should have already excluded pathkeys of length 1 because
+            * then presorted_keys > 0 would imply is_sorted was true.
+            */
+           Assert(list_length(root->group_pathkeys) != 1);
+
+           path = (Path *) create_incremental_sort_path(root,
+                                                        grouped_rel,
+                                                        path,
+                                                        root->group_pathkeys,
+                                                        presorted_keys,
+                                                        -1.0);
+
+           /* Now decide what to stick atop it */
+           if (parse->groupingSets)
+           {
+               consider_groupingsets_paths(root, grouped_rel,
+                                           path, true, can_hash,
+                                           gd, agg_costs, dNumGroups);
+           }
+           else if (parse->hasAggs)
+           {
+               /*
+                * We have aggregation, possibly with plain GROUP BY. Make an
+                * AggPath.
+                */
+               add_path(grouped_rel, (Path *)
+                        create_agg_path(root,
+                                        grouped_rel,
+                                        path,
+                                        grouped_rel->reltarget,
+                                        parse->groupClause ? AGG_SORTED : AGG_PLAIN,
+                                        AGGSPLIT_SIMPLE,
+                                        parse->groupClause,
+                                        havingQual,
+                                        agg_costs,
+                                        dNumGroups));
+           }
+           else if (parse->groupClause)
+           {
+               /*
+                * We have GROUP BY without aggregation or grouping sets. Make
+                * a GroupPath.
+                */
+               add_path(grouped_rel, (Path *)
+                        create_group_path(root,
+                                          grouped_rel,
+                                          path,
+                                          parse->groupClause,
+                                          havingQual,
+                                          dNumGroups));
+           }
+           else
+           {
+               /* Other cases should have been handled above */
+               Assert(false);
+           }
         }
  
         /*
@@ -6631,128 +6585,100 @@ add_paths_to_grouping_rel(PlannerInfo *root, RelOptInfo *input_rel,
         {
             foreach(lc, partially_grouped_rel->pathlist)
             {
-               ListCell   *lc2;
                 Path       *path = (Path *) lfirst(lc);
                 Path       *path_original = path;
-               List       *pathkey_orderings = NIL;
-               List       *group_clauses = parse->groupClause;
-
-               /* generate alternative group orderings that might be useful */
-               pathkey_orderings = get_useful_group_keys_orderings(root,
-                                                                   path->rows,
-                                                                   path->pathkeys,
-                                                                   group_pathkeys,
-                                                                   group_clauses,
-                                                                   orderAggPathkeys);
+               bool        is_sorted;
+               int         presorted_keys;
  
-               Assert(pathkey_orderings != NIL);
+               is_sorted = pathkeys_count_contained_in(root->group_pathkeys,
+                                                       path->pathkeys,
+                                                       &presorted_keys);
  
-               /* process all potentially interesting grouping reorderings */
-               foreach(lc2, pathkey_orderings)
+               /*
+                * Insert a Sort node, if required.  But there's no point in
+                * sorting anything but the cheapest path.
+                */
+               if (!is_sorted)
                 {
-                   bool        is_sorted;
-                   int         presorted_keys = 0;
-                   PathKeyInfo *info = (PathKeyInfo *) lfirst(lc2);
-
-                   /* restore the path (we replace it in the loop) */
-                   path = path_original;
-
-                   is_sorted = pathkeys_count_contained_in(info->pathkeys,
-                                                           path->pathkeys,
-                                                           &presorted_keys);
-
-                   /*
-                    * Insert a Sort node, if required.  But there's no point
-                    * in sorting anything but the cheapest path.
-                    */
-                   if (!is_sorted)
-                   {
-                       if (path != partially_grouped_rel->cheapest_total_path)
-                           continue;
-                       path = (Path *) create_sort_path(root,
-                                                        grouped_rel,
-                                                        path,
-                                                        info->pathkeys,
-                                                        -1.0);
-                   }
+                   if (path != partially_grouped_rel->cheapest_total_path)
+                       continue;
+                   path = (Path *) create_sort_path(root,
+                                                    grouped_rel,
+                                                    path,
+                                                    root->group_pathkeys,
+                                                    -1.0);
+               }
  
-                   if (parse->hasAggs)
-                       add_path(grouped_rel, (Path *)
-                                create_agg_path(root,
-                                                grouped_rel,
-                                                path,
-                                                grouped_rel->reltarget,
-                                                info->clauses ? AGG_SORTED : AGG_PLAIN,
-                                                AGGSPLIT_FINAL_DESERIAL,
-                                                info->clauses,
-                                                havingQual,
-                                                agg_final_costs,
-                                                dNumGroups));
-                   else
-                       add_path(grouped_rel, (Path *)
-                                create_group_path(root,
-                                                  grouped_rel,
-                                                  path,
-                                                  info->clauses,
-                                                  havingQual,
-                                                  dNumGroups));
+               if (parse->hasAggs)
+                   add_path(grouped_rel, (Path *)
+                            create_agg_path(root,
+                                            grouped_rel,
+                                            path,
+                                            grouped_rel->reltarget,
+                                            parse->groupClause ? AGG_SORTED : AGG_PLAIN,
+                                            AGGSPLIT_FINAL_DESERIAL,
+                                            parse->groupClause,
+                                            havingQual,
+                                            agg_final_costs,
+                                            dNumGroups));
+               else
+                   add_path(grouped_rel, (Path *)
+                            create_group_path(root,
+                                              grouped_rel,
+                                              path,
+                                              parse->groupClause,
+                                              havingQual,
+                                              dNumGroups));
  
-                   /*
-                    * Now we may consider incremental sort on this path, but
-                    * only when the path is not already sorted and when
-                    * incremental sort is enabled.
-                    */
-                   if (is_sorted || !enable_incremental_sort)
-                       continue;
+               /*
+                * Now we may consider incremental sort on this path, but only
+                * when the path is not already sorted and when incremental
+                * sort is enabled.
+                */
+               if (is_sorted || !enable_incremental_sort)
+                   continue;
  
-                   /*
-                    * Restore the input path (we might have added Sort on
-                    * top).
-                    */
-                   path = path_original;
+               /* Restore the input path (we might have added Sort on top). */
+               path = path_original;
  
-                   /*
-                    * no shared prefix, not point in building incremental
-                    * sort
-                    */
-                   if (presorted_keys == 0)
-                       continue;
+               /* no shared prefix, not point in building incremental sort */
+               if (presorted_keys == 0)
+                   continue;
  
-                   /*
-                    * We should have already excluded pathkeys of length 1
-                    * because then presorted_keys > 0 would imply is_sorted
-                    * was true.
-                    */
-                   Assert(list_length(root->group_pathkeys) != 1);
+               /*
+                * We should have already excluded pathkeys of length 1
+                * because then presorted_keys > 0 would imply is_sorted was
+                * true.
+                */
+               Assert(list_length(root->group_pathkeys) != 1);
  
-                   path = (Path *) create_incremental_sort_path(root,
-                                                                grouped_rel,
-                                                                path,
-                                                                info->pathkeys,
-                                                                presorted_keys,
-                                                                -1.0);
+               path = (Path *) create_incremental_sort_path(root,
+                                                            grouped_rel,
+                                                            path,
+                                                            root->group_pathkeys,
+                                                            presorted_keys,
+                                                            -1.0);
  
-                   if (parse->hasAggs)
-                       add_path(grouped_rel, (Path *)
-                                create_agg_path(root,
-                                                grouped_rel,
-                                                path,
-                                                grouped_rel->reltarget,
-                                                info->clauses ? AGG_SORTED : AGG_PLAIN,
-                                                AGGSPLIT_FINAL_DESERIAL,
-                                                info->clauses,
-                                                havingQual,
-                                                agg_final_costs,
-                                                dNumGroups));
-                   else
-                       add_path(grouped_rel, (Path *)
-                                create_group_path(root,
-                                                  grouped_rel,
-                                                  path,
-                                                  info->clauses,
-                                                  havingQual,
-                                                  dNumGroups));
-               }
+               if (parse->hasAggs)
+                   add_path(grouped_rel, (Path *)
+                            create_agg_path(root,
+                                            grouped_rel,
+                                            path,
+                                            grouped_rel->reltarget,
+                                            parse->groupClause ? AGG_SORTED : AGG_PLAIN,
+                                            AGGSPLIT_FINAL_DESERIAL,
+                                            parse->groupClause,
+                                            havingQual,
+                                            agg_final_costs,
+                                            dNumGroups));
+               else
+                   add_path(grouped_rel, (Path *)
+                            create_group_path(root,
+                                              grouped_rel,
+                                              path,
+                                              parse->groupClause,
+                                              havingQual,
+                                              dNumGroups));
             }
         }
     }
@@ -6946,26 +6872,6 @@ create_partial_grouping_paths(PlannerInfo *root,
  
     if (can_sort && cheapest_total_path != NULL)
     {
-       List       *group_pathkeys;
-       List       *orderAggPathkeys;
-       int         numAggPathkeys;
-
-       numAggPathkeys = list_length(root->group_pathkeys) -
-           root->num_groupby_pathkeys;
-
-       if (numAggPathkeys > 0)
-       {
-           group_pathkeys = list_copy_head(root->group_pathkeys,
-                                           root->num_groupby_pathkeys);
-           orderAggPathkeys = list_copy_tail(root->group_pathkeys,
-                                             root->num_groupby_pathkeys);
-       }
-       else
-       {
-           group_pathkeys = root->group_pathkeys;
-           orderAggPathkeys = NIL;
-       }
-
         /* This should have been checked previously */
         Assert(parse->hasAggs || parse->groupClause);
  
@@ -6975,69 +6881,41 @@ create_partial_grouping_paths(PlannerInfo *root,
          */
         foreach(lc, input_rel->pathlist)
         {
-           ListCell   *lc2;
             Path       *path = (Path *) lfirst(lc);
-           Path       *path_save = path;
-           List       *pathkey_orderings = NIL;
-           List       *group_clauses = parse->groupClause;
-
-           /* generate alternative group orderings that might be useful */
-           pathkey_orderings = get_useful_group_keys_orderings(root,
-                                                               path->rows,
-                                                               path->pathkeys,
-                                                               group_pathkeys,
-                                                               group_clauses,
-                                                               orderAggPathkeys);
-
-           Assert(pathkey_orderings != NIL);
-
-           /* process all potentially interesting grouping reorderings */
-           foreach(lc2, pathkey_orderings)
-           {
-               bool        is_sorted;
-               int         presorted_keys = 0;
-               PathKeyInfo *info = (PathKeyInfo *) lfirst(lc2);
-
-               /* restore the path (we replace it in the loop) */
-               path = path_save;
-
-               is_sorted = pathkeys_count_contained_in(info->pathkeys,
-                                                       path->pathkeys,
-                                                       &presorted_keys);
+           bool        is_sorted;
  
-               if (path == cheapest_total_path || is_sorted)
-               {
-                   /* Sort the cheapest partial path, if it isn't already */
-                   if (!is_sorted)
-                   {
-                       path = (Path *) create_sort_path(root,
-                                                        partially_grouped_rel,
-                                                        path,
-                                                        info->pathkeys,
-                                                        -1.0);
-                   }
+           is_sorted = pathkeys_contained_in(root->group_pathkeys,
+                                             path->pathkeys);
+           if (path == cheapest_total_path || is_sorted)
+           {
+               /* Sort the cheapest partial path, if it isn't already */
+               if (!is_sorted)
+                   path = (Path *) create_sort_path(root,
+                                                    partially_grouped_rel,
+                                                    path,
+                                                    root->group_pathkeys,
+                                                    -1.0);
  
-                   if (parse->hasAggs)
-                       add_path(partially_grouped_rel, (Path *)
-                                create_agg_path(root,
-                                                partially_grouped_rel,
-                                                path,
-                                                partially_grouped_rel->reltarget,
-                                                info->clauses ? AGG_SORTED : AGG_PLAIN,
-                                                AGGSPLIT_INITIAL_SERIAL,
-                                                info->clauses,
-                                                NIL,
-                                                agg_partial_costs,
-                                                dNumPartialGroups));
-                   else
-                       add_path(partially_grouped_rel, (Path *)
-                                create_group_path(root,
-                                                  partially_grouped_rel,
-                                                  path,
-                                                  info->clauses,
-                                                  NIL,
-                                                  dNumPartialGroups));
-               }
+               if (parse->hasAggs)
+                   add_path(partially_grouped_rel, (Path *)
+                            create_agg_path(root,
+                                            partially_grouped_rel,
+                                            path,
+                                            partially_grouped_rel->reltarget,
+                                            parse->groupClause ? AGG_SORTED : AGG_PLAIN,
+                                            AGGSPLIT_INITIAL_SERIAL,
+                                            parse->groupClause,
+                                            NIL,
+                                            agg_partial_costs,
+                                            dNumPartialGroups));
+               else
+                   add_path(partially_grouped_rel, (Path *)
+                            create_group_path(root,
+                                              partially_grouped_rel,
+                                              path,
+                                              parse->groupClause,
+                                              NIL,
+                                              dNumPartialGroups));
             }
         }
  
@@ -7047,8 +6925,6 @@ create_partial_grouping_paths(PlannerInfo *root,
          * We can also skip the entire loop when we only have a single-item
          * group_pathkeys because then we can't possibly have a presorted
          * prefix of the list without having the list be fully sorted.
-        *
-        * XXX Shouldn't this also consider the group-key-reordering?
          */
         if (enable_incremental_sort && list_length(root->group_pathkeys) > 1)
         {
@@ -7103,122 +6979,27 @@ create_partial_grouping_paths(PlannerInfo *root,
  
     if (can_sort && cheapest_partial_path != NULL)
     {
-       List       *group_pathkeys;
-       List       *orderAggPathkeys;
-       int         numAggPathkeys;
-
-       numAggPathkeys = list_length(root->group_pathkeys) -
-           root->num_groupby_pathkeys;
-
-       if (numAggPathkeys > 0)
-       {
-           group_pathkeys = list_copy_head(root->group_pathkeys,
-                                           root->num_groupby_pathkeys);
-           orderAggPathkeys = list_copy_tail(root->group_pathkeys,
-                                             root->num_groupby_pathkeys);
-       }
-       else
-       {
-           group_pathkeys = root->group_pathkeys;
-           orderAggPathkeys = NIL;
-       }
-
         /* Similar to above logic, but for partial paths. */
         foreach(lc, input_rel->partial_pathlist)
         {
-           ListCell   *lc2;
             Path       *path = (Path *) lfirst(lc);
             Path       *path_original = path;
-           List       *pathkey_orderings = NIL;
-           List       *group_clauses = parse->groupClause;
-
-           /* generate alternative group orderings that might be useful */
-           pathkey_orderings = get_useful_group_keys_orderings(root,
-                                                               path->rows,
-                                                               path->pathkeys,
-                                                               group_pathkeys,
-                                                               group_clauses,
-                                                               orderAggPathkeys);
+           bool        is_sorted;
+           int         presorted_keys;
  
-           Assert(pathkey_orderings != NIL);
+           is_sorted = pathkeys_count_contained_in(root->group_pathkeys,
+                                                   path->pathkeys,
+                                                   &presorted_keys);
  
-           /* process all potentially interesting grouping reorderings */
-           foreach(lc2, pathkey_orderings)
+           if (path == cheapest_partial_path || is_sorted)
             {
-               bool        is_sorted;
-               int         presorted_keys = 0;
-               PathKeyInfo *info = (PathKeyInfo *) lfirst(lc2);
-
-               /* restore the path (we replace it in the loop) */
-               path = path_original;
-
-               is_sorted = pathkeys_count_contained_in(info->pathkeys,
-                                                       path->pathkeys,
-                                                       &presorted_keys);
-
-               if (path == cheapest_partial_path || is_sorted)
-               {
-
-                   /* Sort the cheapest partial path, if it isn't already */
-                   if (!is_sorted)
-                   {
-                       path = (Path *) create_sort_path(root,
-                                                        partially_grouped_rel,
-                                                        path,
-                                                        info->pathkeys,
-                                                        -1.0);
-                   }
-
-                   if (parse->hasAggs)
-                       add_partial_path(partially_grouped_rel, (Path *)
-                                        create_agg_path(root,
-                                                        partially_grouped_rel,
-                                                        path,
-                                                        partially_grouped_rel->reltarget,
-                                                        info->clauses ? AGG_SORTED : AGG_PLAIN,
-                                                        AGGSPLIT_INITIAL_SERIAL,
-                                                        info->clauses,
-                                                        NIL,
-                                                        agg_partial_costs,
-                                                        dNumPartialPartialGroups));
-                   else
-                       add_partial_path(partially_grouped_rel, (Path *)
-                                        create_group_path(root,
-                                                          partially_grouped_rel,
-                                                          path,
-                                                          info->clauses,
-                                                          NIL,
-                                                          dNumPartialPartialGroups));
-               }
-
-               /*
-                * Now we may consider incremental sort on this path, but only
-                * when the path is not already sorted and when incremental
-                * sort is enabled.
-                */
-               if (is_sorted || !enable_incremental_sort)
-                   continue;
-
-               /* Restore the input path (we might have added Sort on top). */
-               path = path_original;
-
-               /* no shared prefix, not point in building incremental sort */
-               if (presorted_keys == 0)
-                   continue;
-
-               /*
-                * We should have already excluded pathkeys of length 1
-                * because then presorted_keys > 0 would imply is_sorted was
-                * true.
-                */
-               Assert(list_length(root->group_pathkeys) != 1);
-
-               path = (Path *) create_incremental_sort_path(root,
-                                                            partially_grouped_rel,
-                                                            path,
-                                                            info->pathkeys,
-                                                            presorted_keys,
-                                                            -1.0);
+               /* Sort the cheapest partial path, if it isn't already */
+               if (!is_sorted)
+                   path = (Path *) create_sort_path(root,
+                                                    partially_grouped_rel,
+                                                    path,
+                                                    root->group_pathkeys,
+                                                    -1.0);
  
                 if (parse->hasAggs)
                     add_partial_path(partially_grouped_rel, (Path *)
@@ -7226,9 +7007,9 @@ create_partial_grouping_paths(PlannerInfo *root,
                                                      partially_grouped_rel,
                                                      path,
                                                      partially_grouped_rel->reltarget,
-                                                    info->clauses ? AGG_SORTED : AGG_PLAIN,
+                                                    parse->groupClause ? AGG_SORTED : AGG_PLAIN,
                                                      AGGSPLIT_INITIAL_SERIAL,
-                                                    info->clauses,
+                                                    parse->groupClause,
                                                      NIL,
                                                      agg_partial_costs,
                                                      dNumPartialPartialGroups));
@@ -7237,10 +7018,59 @@ create_partial_grouping_paths(PlannerInfo *root,
                                      create_group_path(root,
                                                        partially_grouped_rel,
                                                        path,
-                                                      info->clauses,
+                                                      parse->groupClause,
                                                        NIL,
                                                        dNumPartialPartialGroups));
             }
+
+           /*
+            * Now we may consider incremental sort on this path, but only
+            * when the path is not already sorted and when incremental sort
+            * is enabled.
+            */
+           if (is_sorted || !enable_incremental_sort)
+               continue;
+
+           /* Restore the input path (we might have added Sort on top). */
+           path = path_original;
+
+           /* no shared prefix, not point in building incremental sort */
+           if (presorted_keys == 0)
+               continue;
+
+           /*
+            * We should have already excluded pathkeys of length 1 because
+            * then presorted_keys > 0 would imply is_sorted was true.
+            */
+           Assert(list_length(root->group_pathkeys) != 1);
+
+           path = (Path *) create_incremental_sort_path(root,
+                                                        partially_grouped_rel,
+                                                        path,
+                                                        root->group_pathkeys,
+                                                        presorted_keys,
+                                                        -1.0);
+
+           if (parse->hasAggs)
+               add_partial_path(partially_grouped_rel, (Path *)
+                                create_agg_path(root,
+                                                partially_grouped_rel,
+                                                path,
+                                                partially_grouped_rel->reltarget,
+                                                parse->groupClause ? AGG_SORTED : AGG_PLAIN,
+                                                AGGSPLIT_INITIAL_SERIAL,
+                                                parse->groupClause,
+                                                NIL,
+                                                agg_partial_costs,
+                                                dNumPartialPartialGroups));
+           else
+               add_partial_path(partially_grouped_rel, (Path *)
+                                create_group_path(root,
+                                                  partially_grouped_rel,
+                                                  path,
+                                                  parse->groupClause,
+                                                  NIL,
+                                                  dNumPartialPartialGroups));
         }
     }
  
diff --git a/src/backend/optimizer/util/pathnode.c b/src/backend/optimizer/util/pathnode.c

index e10561d843ae8a7ee92ff60d253b51dbac6246a3..70f61ae7b1c276f2144b4631b58479a88d2076d3 100644 (file)
--- a/src/backend/optimizer/util/pathnode.c
+++ b/src/backend/optimizer/util/pathnode.c
@@ -1346,12 +1346,12 @@ create_append_path(PlannerInfo *root,
             pathnode->path.total_cost = child->total_cost;
         }
         else
-           cost_append(pathnode, root);
+           cost_append(pathnode);
         /* Must do this last, else cost_append complains */
         pathnode->path.pathkeys = child->pathkeys;
     }
     else
-       cost_append(pathnode, root);
+       cost_append(pathnode);
  
     /* If the caller provided a row estimate, override the computed value. */
     if (rows >= 0)
diff --git a/src/backend/utils/adt/selfuncs.c b/src/backend/utils/adt/selfuncs.c

index 1808388397a8be4e50652ec97fc9d299bb341b3f..234fb665806292a30cc451743a6fca7d34af18fd 100644 (file)
--- a/src/backend/utils/adt/selfuncs.c
+++ b/src/backend/utils/adt/selfuncs.c
@@ -3369,28 +3369,11 @@ double
  estimate_num_groups(PlannerInfo *root, List *groupExprs, double input_rows,
                     List **pgset, EstimationInfo *estinfo)
  {
-   return estimate_num_groups_incremental(root, groupExprs,
-                                          input_rows, pgset, estinfo,
-                                          NULL, 0);
-}
-
-/*
- * estimate_num_groups_incremental
- *     An estimate_num_groups variant, optimized for cases that are adding the
- *     expressions incrementally (e.g. one by one).
- */
-double
-estimate_num_groups_incremental(PlannerInfo *root, List *groupExprs,
-                               double input_rows,
-                               List **pgset, EstimationInfo *estinfo,
-                               List **cache_varinfos, int prevNExprs)
-{
-   List       *varinfos = (cache_varinfos) ? *cache_varinfos : NIL;
+   List       *varinfos = NIL;
     double      srf_multiplier = 1.0;
     double      numdistinct;
     ListCell   *l;
-   int         i,
-               j;
+   int         i;
  
     /* Zero the estinfo output parameter, if non-NULL */
     if (estinfo != NULL)
@@ -3421,7 +3404,7 @@ estimate_num_groups_incremental(PlannerInfo *root, List *groupExprs,
      */
     numdistinct = 1.0;
  
-   i = j = 0;
+   i = 0;
     foreach(l, groupExprs)
     {
         Node       *groupexpr = (Node *) lfirst(l);
@@ -3430,14 +3413,6 @@ estimate_num_groups_incremental(PlannerInfo *root, List *groupExprs,
         List       *varshere;
         ListCell   *l2;
  
-       /* was done on previous call */
-       if (cache_varinfos && j++ < prevNExprs)
-       {
-           if (pgset)
-               i++;            /* to keep in sync with lines below */
-           continue;
-       }
-
         /* is expression in this grouping set? */
         if (pgset && !list_member_int(*pgset, i++))
             continue;
@@ -3507,11 +3482,7 @@ estimate_num_groups_incremental(PlannerInfo *root, List *groupExprs,
         if (varshere == NIL)
         {
             if (contain_volatile_functions(groupexpr))
-           {
-               if (cache_varinfos)
-                   *cache_varinfos = varinfos;
                 return input_rows;
-           }
             continue;
         }
  
@@ -3528,9 +3499,6 @@ estimate_num_groups_incremental(PlannerInfo *root, List *groupExprs,
         }
     }
  
-   if (cache_varinfos)
-       *cache_varinfos = varinfos;
-
     /*
      * If now no Vars, we must have an all-constant or all-boolean GROUP BY
      * list.
diff --git a/src/backend/utils/misc/guc_tables.c b/src/backend/utils/misc/guc_tables.c

index fda3f9befb06706643627957ec714818b0f44958..7ff653b51723e9d6bc6b85786da844b5d5549522 100644 (file)
--- a/src/backend/utils/misc/guc_tables.c
+++ b/src/backend/utils/misc/guc_tables.c
@@ -967,16 +967,6 @@ struct config_bool ConfigureNamesBool[] =
         true,
         NULL, NULL, NULL
     },
-   {
-       {"enable_group_by_reordering", PGC_USERSET, QUERY_TUNING_METHOD,
-           gettext_noop("Enables reordering of GROUP BY keys."),
-           NULL,
-           GUC_EXPLAIN
-       },
-       &enable_group_by_reordering,
-       true,
-       NULL, NULL, NULL
-   },
     {
         {"geqo", PGC_USERSET, QUERY_TUNING_GEQO,
             gettext_noop("Enables genetic query optimization."),
diff --git a/src/backend/utils/misc/postgresql.conf.sample b/src/backend/utils/misc/postgresql.conf.sample

index 2ae76e5cfb7b89769f5ad9756b39e724ac38643c..868d21c351e819a3a6f8b3904a67e5819f2d587b 100644 (file)
--- a/src/backend/utils/misc/postgresql.conf.sample
+++ b/src/backend/utils/misc/postgresql.conf.sample
@@ -388,7 +388,6 @@
  #enable_seqscan = on
  #enable_sort = on
  #enable_tidscan = on
-#enable_group_by_reordering = on
  
  # - Planner Cost Constants -
  
diff --git a/src/include/nodes/pathnodes.h b/src/include/nodes/pathnodes.h

index 294cfe9c47c96159fe2a42dfb0dd31b6fe9463b1..6bda383bead10a15a33fc59d362102fa643e0b52 100644 (file)
--- a/src/include/nodes/pathnodes.h
+++ b/src/include/nodes/pathnodes.h
@@ -1372,18 +1372,6 @@ typedef struct PathKey
     bool        pk_nulls_first; /* do NULLs come before normal values? */
  } PathKey;
  
-/*
- * Combines information about pathkeys and the associated clauses.
- */
-typedef struct PathKeyInfo
-{
-   pg_node_attr(no_read)
-
-   NodeTag     type;
-   List       *pathkeys;
-   List       *clauses;
-} PathKeyInfo;
-
  /*
   * VolatileFunctionStatus -- allows nodes to cache their
   * contain_volatile_functions properties. VOLATILITY_UNKNOWN means not yet
diff --git a/src/include/optimizer/cost.h b/src/include/optimizer/cost.h

index f27d11eaa9ff75963690ccc8be287d159f524f27..204e94b6d1098315ae5677870d8b71313e54031c 100644 (file)
--- a/src/include/optimizer/cost.h
+++ b/src/include/optimizer/cost.h
@@ -115,9 +115,7 @@ extern void cost_incremental_sort(Path *path,
                                   Cost input_startup_cost, Cost input_total_cost,
                                   double input_tuples, int width, Cost comparison_cost, int sort_mem,
                                   double limit_tuples);
-extern Cost cost_sort_estimate(PlannerInfo *root, List *pathkeys,
-                              int nPresortedKeys, double tuples);
-extern void cost_append(AppendPath *apath, PlannerInfo *root);
+extern void cost_append(AppendPath *apath);
  extern void cost_merge_append(Path *path, PlannerInfo *root,
                               List *pathkeys, int n_streams,
                               Cost input_startup_cost, Cost input_total_cost,
diff --git a/src/include/optimizer/paths.h b/src/include/optimizer/paths.h

index 881386997c39f9e48e97b55cc667bf0606e72f64..41f765d3422689ca3ca20f81021bcd5cb953178d 100644 (file)
--- a/src/include/optimizer/paths.h
+++ b/src/include/optimizer/paths.h
@@ -24,7 +24,6 @@ extern PGDLLIMPORT bool enable_geqo;
  extern PGDLLIMPORT int geqo_threshold;
  extern PGDLLIMPORT int min_parallel_table_scan_size;
  extern PGDLLIMPORT int min_parallel_index_scan_size;
-extern PGDLLIMPORT bool enable_group_by_reordering;
  
  /* Hook for plugins to get control in set_rel_pathlist() */
  typedef void (*set_rel_pathlist_hook_type) (PlannerInfo *root,
@@ -203,13 +202,6 @@ typedef enum
  extern PathKeysComparison compare_pathkeys(List *keys1, List *keys2);
  extern bool pathkeys_contained_in(List *keys1, List *keys2);
  extern bool pathkeys_count_contained_in(List *keys1, List *keys2, int *n_common);
-extern int group_keys_reorder_by_pathkeys(List *pathkeys,
-                                          List **group_pathkeys,
-                                          List **group_clauses);
-extern List *get_useful_group_keys_orderings(PlannerInfo *root, double nrows,
-                                            List *path_pathkeys,
-                                            List *group_pathkeys, List *group_clauses,
-                                            List *aggregate_pathkeys);
  extern Path *get_cheapest_path_for_pathkeys(List *paths, List *pathkeys,
                                             Relids required_outer,
                                             CostSelector cost_criterion,
diff --git a/src/include/utils/selfuncs.h b/src/include/utils/selfuncs.h

index d485b9bfcd9cfcfaad216c6bb00daa2c36f4bf45..8f3d73edfb2bb04f58e7c5f7a1e31e77b8b2a34a 100644 (file)
--- a/src/include/utils/selfuncs.h
+++ b/src/include/utils/selfuncs.h
@@ -214,11 +214,6 @@ extern double estimate_num_groups(PlannerInfo *root, List *groupExprs,
                                   double input_rows, List **pgset,
                                   EstimationInfo *estinfo);
  
-extern double estimate_num_groups_incremental(PlannerInfo *root, List *groupExprs,
-                                             double input_rows, List **pgset,
-                                             EstimationInfo *estinfo,
-                                             List **cache_varinfos, int prevNExprs);
-
  extern void estimate_hash_bucket_stats(PlannerInfo *root,
                                        Node *hashkey, double nbuckets,
                                        Selectivity *mcv_freq,
diff --git a/src/test/regress/expected/aggregates.out b/src/test/regress/expected/aggregates.out

index b2198724e3c95bb5ddec4c8d3a07a059adaef3a1..fc2bd40be2d8cd22ab08052f718a6b7770ccfa90 100644 (file)
--- a/src/test/regress/expected/aggregates.out
+++ b/src/test/regress/expected/aggregates.out
@@ -1210,8 +1210,7 @@ explain (costs off)
    select distinct min(f1), max(f1) from minmaxtest;
                                           QUERY PLAN                                          
  ---------------------------------------------------------------------------------------------
- HashAggregate
-   Group Key: $0, $1
+ Unique
     InitPlan 1 (returns $0)
       ->  Limit
             ->  Merge Append
@@ -1234,8 +1233,10 @@ explain (costs off)
                   ->  Index Only Scan using minmaxtest2i on minmaxtest2 minmaxtest_8
                         Index Cond: (f1 IS NOT NULL)
                   ->  Index Only Scan Backward using minmaxtest3i on minmaxtest3 minmaxtest_9
-   ->  Result
-(25 rows)
+   ->  Sort
+         Sort Key: ($0), ($1)
+         ->  Result
+(26 rows)
  
  select distinct min(f1), max(f1) from minmaxtest;
   min | max 
@@ -2525,241 +2526,6 @@ SELECT balk(hundred) FROM tenk1;
  (1 row)
  
  ROLLBACK;
--- GROUP BY optimization by reorder columns
-SELECT
-   i AS id,
-   i/2 AS p,
-   format('%60s', i%2) AS v,
-   i/4 AS c,
-   i/8 AS d,
-   (random() * (10000/8))::int as e --the same as d but no correlation with p
-   INTO btg
-FROM
-   generate_series(1, 10000) i;
-VACUUM btg;
-ANALYZE btg;
--- GROUP BY optimization by reorder columns by frequency
-SET enable_hashagg=off;
-SET max_parallel_workers= 0;
-SET max_parallel_workers_per_gather = 0;
-EXPLAIN (COSTS off)
-SELECT count(*) FROM btg GROUP BY p, v;
-         QUERY PLAN          
------------------------------
- GroupAggregate
-   Group Key: p, v
-   ->  Sort
-         Sort Key: p, v
-         ->  Seq Scan on btg
-(5 rows)
-
-EXPLAIN (COSTS off)
-SELECT count(*) FROM btg GROUP BY v, p;
-         QUERY PLAN          
------------------------------
- GroupAggregate
-   Group Key: p, v
-   ->  Sort
-         Sort Key: p, v
-         ->  Seq Scan on btg
-(5 rows)
-
-EXPLAIN (COSTS off)
-SELECT count(*) FROM btg GROUP BY v, p, c;
-         QUERY PLAN          
------------------------------
- GroupAggregate
-   Group Key: p, c, v
-   ->  Sort
-         Sort Key: p, c, v
-         ->  Seq Scan on btg
-(5 rows)
-
-EXPLAIN (COSTS off)
-SELECT count(*) FROM btg GROUP BY v, p, c ORDER BY v, p, c;
-         QUERY PLAN          
------------------------------
- GroupAggregate
-   Group Key: v, p, c
-   ->  Sort
-         Sort Key: v, p, c
-         ->  Seq Scan on btg
-(5 rows)
-
-EXPLAIN (COSTS off)
-SELECT count(*) FROM btg GROUP BY v, p, d, c;
-          QUERY PLAN          
-------------------------------
- GroupAggregate
-   Group Key: p, d, c, v
-   ->  Sort
-         Sort Key: p, d, c, v
-         ->  Seq Scan on btg
-(5 rows)
-
-EXPLAIN (COSTS off)
-SELECT count(*) FROM btg GROUP BY v, p, d, c ORDER BY v, p, d ,c;
-          QUERY PLAN          
-------------------------------
- GroupAggregate
-   Group Key: v, p, d, c
-   ->  Sort
-         Sort Key: v, p, d, c
-         ->  Seq Scan on btg
-(5 rows)
-
-EXPLAIN (COSTS off)
-SELECT count(*) FROM btg GROUP BY v, p, d, c ORDER BY p, v, d ,c;
-          QUERY PLAN          
-------------------------------
- GroupAggregate
-   Group Key: p, v, d, c
-   ->  Sort
-         Sort Key: p, v, d, c
-         ->  Seq Scan on btg
-(5 rows)
-
-EXPLAIN (COSTS off)
-SELECT count(*) FROM btg GROUP BY p, d, e;
-         QUERY PLAN          
------------------------------
- GroupAggregate
-   Group Key: p, d, e
-   ->  Sort
-         Sort Key: p, d, e
-         ->  Seq Scan on btg
-(5 rows)
-
-EXPLAIN (COSTS off)
-SELECT count(*) FROM btg GROUP BY p, e, d;
-         QUERY PLAN          
------------------------------
- GroupAggregate
-   Group Key: p, e, d
-   ->  Sort
-         Sort Key: p, e, d
-         ->  Seq Scan on btg
-(5 rows)
-
-CREATE STATISTICS btg_dep ON d, e, p FROM btg;
-ANALYZE btg;
-EXPLAIN (COSTS off)
-SELECT count(*) FROM btg GROUP BY p, d, e;
-         QUERY PLAN          
------------------------------
- GroupAggregate
-   Group Key: p, d, e
-   ->  Sort
-         Sort Key: p, d, e
-         ->  Seq Scan on btg
-(5 rows)
-
-EXPLAIN (COSTS off)
-SELECT count(*) FROM btg GROUP BY p, e, d;
-         QUERY PLAN          
------------------------------
- GroupAggregate
-   Group Key: p, e, d
-   ->  Sort
-         Sort Key: p, e, d
-         ->  Seq Scan on btg
-(5 rows)
-
--- GROUP BY optimization by reorder columns by index scan
-CREATE INDEX ON btg(p, v);
-SET enable_seqscan=off;
-SET enable_bitmapscan=off;
-VACUUM btg;
-EXPLAIN (COSTS off)
-SELECT count(*) FROM btg GROUP BY p, v;
-                   QUERY PLAN                   
-------------------------------------------------
- GroupAggregate
-   Group Key: p, v
-   ->  Index Only Scan using btg_p_v_idx on btg
-(3 rows)
-
-EXPLAIN (COSTS off)
-SELECT count(*) FROM btg GROUP BY p, v ORDER BY p, v;
-                   QUERY PLAN                   
-------------------------------------------------
- GroupAggregate
-   Group Key: p, v
-   ->  Index Only Scan using btg_p_v_idx on btg
-(3 rows)
-
-EXPLAIN (COSTS off)
-SELECT count(*) FROM btg GROUP BY v, p;
-                   QUERY PLAN                   
-------------------------------------------------
- GroupAggregate
-   Group Key: p, v
-   ->  Index Only Scan using btg_p_v_idx on btg
-(3 rows)
-
-EXPLAIN (COSTS off)
-SELECT count(*) FROM btg GROUP BY v, p ORDER BY p, v;
-                   QUERY PLAN                   
-------------------------------------------------
- GroupAggregate
-   Group Key: p, v
-   ->  Index Only Scan using btg_p_v_idx on btg
-(3 rows)
-
-EXPLAIN (COSTS off)
-SELECT count(*) FROM btg GROUP BY v, p, c;
-                   QUERY PLAN                    
--------------------------------------------------
- GroupAggregate
-   Group Key: p, c, v
-   ->  Incremental Sort
-         Sort Key: p, c, v
-         Presorted Key: p
-         ->  Index Scan using btg_p_v_idx on btg
-(6 rows)
-
-EXPLAIN (COSTS off)
-SELECT count(*) FROM btg GROUP BY v, p, c ORDER BY p, v;
-                   QUERY PLAN                    
--------------------------------------------------
- GroupAggregate
-   Group Key: p, v, c
-   ->  Incremental Sort
-         Sort Key: p, v, c
-         Presorted Key: p, v
-         ->  Index Scan using btg_p_v_idx on btg
-(6 rows)
-
-EXPLAIN (COSTS off)
-SELECT count(*) FROM btg GROUP BY v, c, p, d;
-                   QUERY PLAN                    
--------------------------------------------------
- GroupAggregate
-   Group Key: p, c, d, v
-   ->  Incremental Sort
-         Sort Key: p, c, d, v
-         Presorted Key: p
-         ->  Index Scan using btg_p_v_idx on btg
-(6 rows)
-
-EXPLAIN (COSTS off)
-SELECT count(*) FROM btg GROUP BY v, c, p, d ORDER BY p, v;
-                   QUERY PLAN                    
--------------------------------------------------
- GroupAggregate
-   Group Key: p, v, c, d
-   ->  Incremental Sort
-         Sort Key: p, v, c, d
-         Presorted Key: p, v
-         ->  Index Scan using btg_p_v_idx on btg
-(6 rows)
-
-DROP TABLE btg;
-RESET enable_hashagg;
-RESET max_parallel_workers;
-RESET max_parallel_workers_per_gather;
-RESET enable_seqscan;
-RESET enable_bitmapscan;
  -- Secondly test the case of a parallel aggregate combiner function
  -- returning NULL. For that use normal transition function, but a
  -- combiner function returning NULL.
diff --git a/src/test/regress/expected/incremental_sort.out b/src/test/regress/expected/incremental_sort.out

index 49953eaaded3b0137884a89056c57bedeaa77b38..0a631124c2257b6b86802de96dd86e664c12a1aa 100644 (file)
--- a/src/test/regress/expected/incremental_sort.out
+++ b/src/test/regress/expected/incremental_sort.out
@@ -1439,7 +1439,7 @@ set parallel_setup_cost = 0;
  set parallel_tuple_cost = 0;
  set max_parallel_workers_per_gather = 2;
  create table t (a int, b int, c int);
-insert into t select mod(i,10),mod(i,10),i from generate_series(1,60000) s(i);
+insert into t select mod(i,10),mod(i,10),i from generate_series(1,10000) s(i);
  create index on t (a);
  analyze t;
  set enable_incremental_sort = off;
diff --git a/src/test/regress/expected/join.out b/src/test/regress/expected/join.out

index 2ed2e542a44926dd101af86aeef0d6d8ab249f15..08334761ae6020e0a8564fd06a80e0aa9c5cf790 100644 (file)
--- a/src/test/regress/expected/join.out
+++ b/src/test/regress/expected/join.out
@@ -1984,8 +1984,8 @@ USING (name);
  ------+----+----
   bb   | 12 | 13
   cc   | 22 | 23
- ee   | 42 |   
   dd   |    | 33
+ ee   | 42 |   
  (4 rows)
  
  -- Cases with non-nullable expressions in subquery results;
@@ -2019,8 +2019,8 @@ NATURAL FULL JOIN
  ------+------+------+------+------
   bb   |   12 |    2 |   13 |    3
   cc   |   22 |    2 |   23 |    3
- ee   |   42 |    2 |      |     
   dd   |      |      |   33 |    3
+ ee   |   42 |    2 |      |     
  (4 rows)
  
  SELECT * FROM
@@ -4676,20 +4676,18 @@ select d.* from d left join (select * from b group by b.id, b.c_id) s
  explain (costs off)
  select d.* from d left join (select distinct * from b) s
    on d.a = s.id;
-                 QUERY PLAN                  
----------------------------------------------
- Merge Left Join
-   Merge Cond: (d.a = s.id)
+              QUERY PLAN              
+--------------------------------------
+ Merge Right Join
+   Merge Cond: (b.id = d.a)
+   ->  Unique
+         ->  Sort
+               Sort Key: b.id, b.c_id
+               ->  Seq Scan on b
     ->  Sort
           Sort Key: d.a
           ->  Seq Scan on d
-   ->  Sort
-         Sort Key: s.id
-         ->  Subquery Scan on s
-               ->  HashAggregate
-                     Group Key: b.id, b.c_id
-                     ->  Seq Scan on b
-(11 rows)
+(9 rows)
  
  -- check join removal works when uniqueness of the join condition is enforced
  -- by a UNION
@@ -6399,39 +6397,44 @@ select * from j1 natural join j2;
  explain (verbose, costs off)
  select * from j1
  inner join (select distinct id from j3) j3 on j1.id = j3.id;
-            QUERY PLAN             
------------------------------------
+               QUERY PLAN                
+-----------------------------------------
   Nested Loop
     Output: j1.id, j3.id
     Inner Unique: true
     Join Filter: (j1.id = j3.id)
-   ->  HashAggregate
+   ->  Unique
           Output: j3.id
-         Group Key: j3.id
-         ->  Seq Scan on public.j3
+         ->  Sort
                 Output: j3.id
+               Sort Key: j3.id
+               ->  Seq Scan on public.j3
+                     Output: j3.id
     ->  Seq Scan on public.j1
           Output: j1.id
-(11 rows)
+(13 rows)
  
  -- ensure group by clause allows the inner to become unique
  explain (verbose, costs off)
  select * from j1
  inner join (select id from j3 group by id) j3 on j1.id = j3.id;
-            QUERY PLAN             
------------------------------------
+               QUERY PLAN                
+-----------------------------------------
   Nested Loop
     Output: j1.id, j3.id
     Inner Unique: true
     Join Filter: (j1.id = j3.id)
-   ->  HashAggregate
+   ->  Group
           Output: j3.id
           Group Key: j3.id
-         ->  Seq Scan on public.j3
+         ->  Sort
                 Output: j3.id
+               Sort Key: j3.id
+               ->  Seq Scan on public.j3
+                     Output: j3.id
     ->  Seq Scan on public.j1
           Output: j1.id
-(11 rows)
+(14 rows)
  
  drop table j1;
  drop table j2;
diff --git a/src/test/regress/expected/merge.out b/src/test/regress/expected/merge.out

index 4047c3e761e1f3c600b64ebd864d5cdea4f4b417..787af41dfe5b8bdf1c7255ad0e1c191af5cf9a85 100644 (file)
--- a/src/test/regress/expected/merge.out
+++ b/src/test/regress/expected/merge.out
@@ -1460,15 +1460,18 @@ WHEN MATCHED AND t.a < 10 THEN
                             explain_merge                            
  --------------------------------------------------------------------
   Merge on ex_mtarget t (actual rows=0 loops=1)
-   ->  Hash Join (actual rows=0 loops=1)
-         Hash Cond: (s.a = t.a)
-         ->  Seq Scan on ex_msource s (actual rows=1 loops=1)
-         ->  Hash (actual rows=0 loops=1)
-               Buckets: xxx  Batches: xxx  Memory Usage: xxx
+   ->  Merge Join (actual rows=0 loops=1)
+         Merge Cond: (t.a = s.a)
+         ->  Sort (actual rows=0 loops=1)
+               Sort Key: t.a
+               Sort Method: quicksort  Memory: xxx
                 ->  Seq Scan on ex_mtarget t (actual rows=0 loops=1)
                       Filter: (a < '-1000'::integer)
                       Rows Removed by Filter: 54
-(9 rows)
+         ->  Sort (never executed)
+               Sort Key: s.a
+               ->  Seq Scan on ex_msource s (never executed)
+(12 rows)
  
  DROP TABLE ex_msource, ex_mtarget;
  DROP FUNCTION explain_merge(text);
diff --git a/src/test/regress/expected/partition_aggregate.out b/src/test/regress/expected/partition_aggregate.out

index db36e3a150af7d408dcce02fd2402028dad676ac..a82b8fb8fb7827a605e2da660c28aae2f24434b5 100644 (file)
--- a/src/test/regress/expected/partition_aggregate.out
+++ b/src/test/regress/expected/partition_aggregate.out
@@ -949,12 +949,12 @@ SET parallel_setup_cost = 0;
  -- is not partial agg safe.
  EXPLAIN (COSTS OFF)
  SELECT a, sum(b), array_agg(distinct c), count(*) FROM pagg_tab_ml GROUP BY a HAVING avg(b) < 3 ORDER BY 1, 2, 3;
-                                         QUERY PLAN                                         
---------------------------------------------------------------------------------------------
- Gather Merge
-   Workers Planned: 2
-   ->  Sort
-         Sort Key: pagg_tab_ml.a, (sum(pagg_tab_ml.b)), (array_agg(DISTINCT pagg_tab_ml.c))
+                                      QUERY PLAN                                      
+--------------------------------------------------------------------------------------
+ Sort
+   Sort Key: pagg_tab_ml.a, (sum(pagg_tab_ml.b)), (array_agg(DISTINCT pagg_tab_ml.c))
+   ->  Gather
+         Workers Planned: 2
           ->  Parallel Append
                 ->  GroupAggregate
                       Group Key: pagg_tab_ml.a
@@ -1381,26 +1381,28 @@ SELECT x, sum(y), avg(y), count(*) FROM pagg_tab_para GROUP BY x HAVING avg(y) <
  -- When GROUP BY clause does not match; partial aggregation is performed for each partition.
  EXPLAIN (COSTS OFF)
  SELECT y, sum(x), avg(x), count(*) FROM pagg_tab_para GROUP BY y HAVING avg(x) < 12 ORDER BY 1, 2, 3;
-                                     QUERY PLAN                                      
--------------------------------------------------------------------------------------
+                                        QUERY PLAN                                         
+-------------------------------------------------------------------------------------------
   Sort
     Sort Key: pagg_tab_para.y, (sum(pagg_tab_para.x)), (avg(pagg_tab_para.x))
-   ->  Finalize HashAggregate
+   ->  Finalize GroupAggregate
           Group Key: pagg_tab_para.y
           Filter: (avg(pagg_tab_para.x) < '12'::numeric)
-         ->  Gather
+         ->  Gather Merge
                 Workers Planned: 2
-               ->  Parallel Append
-                     ->  Partial HashAggregate
-                           Group Key: pagg_tab_para.y
-                           ->  Parallel Seq Scan on pagg_tab_para_p1 pagg_tab_para
-                     ->  Partial HashAggregate
-                           Group Key: pagg_tab_para_1.y
-                           ->  Parallel Seq Scan on pagg_tab_para_p2 pagg_tab_para_1
-                     ->  Partial HashAggregate
-                           Group Key: pagg_tab_para_2.y
-                           ->  Parallel Seq Scan on pagg_tab_para_p3 pagg_tab_para_2
-(17 rows)
+               ->  Sort
+                     Sort Key: pagg_tab_para.y
+                     ->  Parallel Append
+                           ->  Partial HashAggregate
+                                 Group Key: pagg_tab_para.y
+                                 ->  Parallel Seq Scan on pagg_tab_para_p1 pagg_tab_para
+                           ->  Partial HashAggregate
+                                 Group Key: pagg_tab_para_1.y
+                                 ->  Parallel Seq Scan on pagg_tab_para_p2 pagg_tab_para_1
+                           ->  Partial HashAggregate
+                                 Group Key: pagg_tab_para_2.y
+                                 ->  Parallel Seq Scan on pagg_tab_para_p3 pagg_tab_para_2
+(19 rows)
  
  SELECT y, sum(x), avg(x), count(*) FROM pagg_tab_para GROUP BY y HAVING avg(x) < 12 ORDER BY 1, 2, 3;
   y  |  sum  |         avg         | count 
diff --git a/src/test/regress/expected/partition_join.out b/src/test/regress/expected/partition_join.out

index 03926a841380fe12d2dcb325fc932912083c8c2a..bb5b7c47a456ade2c60dd194d072e1b965f115f6 100644 (file)
--- a/src/test/regress/expected/partition_join.out
+++ b/src/test/regress/expected/partition_join.out
@@ -466,41 +466,52 @@ EXPLAIN (COSTS OFF)
  SELECT a, b FROM prt1 FULL JOIN prt2 p2(b,a,c) USING(a,b)
    WHERE a BETWEEN 490 AND 510
    GROUP BY 1, 2 ORDER BY 1, 2;
-                                                QUERY PLAN                                                 
------------------------------------------------------------------------------------------------------------
+                                                   QUERY PLAN                                                    
+-----------------------------------------------------------------------------------------------------------------
   Group
     Group Key: (COALESCE(prt1.a, p2.a)), (COALESCE(prt1.b, p2.b))
-   ->  Sort
+   ->  Merge Append
           Sort Key: (COALESCE(prt1.a, p2.a)), (COALESCE(prt1.b, p2.b))
-         ->  Append
-               ->  Merge Full Join
-                     Merge Cond: ((prt1_1.a = p2_1.a) AND (prt1_1.b = p2_1.b))
-                     Filter: ((COALESCE(prt1_1.a, p2_1.a) >= 490) AND (COALESCE(prt1_1.a, p2_1.a) <= 510))
-                     ->  Sort
-                           Sort Key: prt1_1.a, prt1_1.b
-                           ->  Seq Scan on prt1_p1 prt1_1
-                     ->  Sort
-                           Sort Key: p2_1.a, p2_1.b
-                           ->  Seq Scan on prt2_p1 p2_1
-               ->  Merge Full Join
-                     Merge Cond: ((prt1_2.a = p2_2.a) AND (prt1_2.b = p2_2.b))
-                     Filter: ((COALESCE(prt1_2.a, p2_2.a) >= 490) AND (COALESCE(prt1_2.a, p2_2.a) <= 510))
-                     ->  Sort
-                           Sort Key: prt1_2.a, prt1_2.b
-                           ->  Seq Scan on prt1_p2 prt1_2
-                     ->  Sort
-                           Sort Key: p2_2.a, p2_2.b
-                           ->  Seq Scan on prt2_p2 p2_2
-               ->  Merge Full Join
-                     Merge Cond: ((prt1_3.b = p2_3.b) AND (prt1_3.a = p2_3.a))
-                     Filter: ((COALESCE(prt1_3.a, p2_3.a) >= 490) AND (COALESCE(prt1_3.a, p2_3.a) <= 510))
-                     ->  Sort
-                           Sort Key: prt1_3.b, prt1_3.a
-                           ->  Seq Scan on prt1_p3 prt1_3
-                     ->  Sort
-                           Sort Key: p2_3.b, p2_3.a
-                           ->  Seq Scan on prt2_p3 p2_3
-(32 rows)
+         ->  Group
+               Group Key: (COALESCE(prt1.a, p2.a)), (COALESCE(prt1.b, p2.b))
+               ->  Sort
+                     Sort Key: (COALESCE(prt1.a, p2.a)), (COALESCE(prt1.b, p2.b))
+                     ->  Merge Full Join
+                           Merge Cond: ((prt1.a = p2.a) AND (prt1.b = p2.b))
+                           Filter: ((COALESCE(prt1.a, p2.a) >= 490) AND (COALESCE(prt1.a, p2.a) <= 510))
+                           ->  Sort
+                                 Sort Key: prt1.a, prt1.b
+                                 ->  Seq Scan on prt1_p1 prt1
+                           ->  Sort
+                                 Sort Key: p2.a, p2.b
+                                 ->  Seq Scan on prt2_p1 p2
+         ->  Group
+               Group Key: (COALESCE(prt1_1.a, p2_1.a)), (COALESCE(prt1_1.b, p2_1.b))
+               ->  Sort
+                     Sort Key: (COALESCE(prt1_1.a, p2_1.a)), (COALESCE(prt1_1.b, p2_1.b))
+                     ->  Merge Full Join
+                           Merge Cond: ((prt1_1.a = p2_1.a) AND (prt1_1.b = p2_1.b))
+                           Filter: ((COALESCE(prt1_1.a, p2_1.a) >= 490) AND (COALESCE(prt1_1.a, p2_1.a) <= 510))
+                           ->  Sort
+                                 Sort Key: prt1_1.a, prt1_1.b
+                                 ->  Seq Scan on prt1_p2 prt1_1
+                           ->  Sort
+                                 Sort Key: p2_1.a, p2_1.b
+                                 ->  Seq Scan on prt2_p2 p2_1
+         ->  Group
+               Group Key: (COALESCE(prt1_2.a, p2_2.a)), (COALESCE(prt1_2.b, p2_2.b))
+               ->  Sort
+                     Sort Key: (COALESCE(prt1_2.a, p2_2.a)), (COALESCE(prt1_2.b, p2_2.b))
+                     ->  Merge Full Join
+                           Merge Cond: ((prt1_2.a = p2_2.a) AND (prt1_2.b = p2_2.b))
+                           Filter: ((COALESCE(prt1_2.a, p2_2.a) >= 490) AND (COALESCE(prt1_2.a, p2_2.a) <= 510))
+                           ->  Sort
+                                 Sort Key: prt1_2.a, prt1_2.b
+                                 ->  Seq Scan on prt1_p3 prt1_2
+                           ->  Sort
+                                 Sort Key: p2_2.a, p2_2.b
+                                 ->  Seq Scan on prt2_p3 p2_2
+(43 rows)
  
  SELECT a, b FROM prt1 FULL JOIN prt2 p2(b,a,c) USING(a,b)
    WHERE a BETWEEN 490 AND 510
diff --git a/src/test/regress/expected/sysviews.out b/src/test/regress/expected/sysviews.out

index 4e775af1758c153bfaa41be821de994d9909104f..579b861d84f31386724ed3b41216054aa0b3bab8 100644 (file)
--- a/src/test/regress/expected/sysviews.out
+++ b/src/test/regress/expected/sysviews.out
@@ -114,7 +114,6 @@ select name, setting from pg_settings where name like 'enable%';
   enable_async_append            | on
   enable_bitmapscan              | on
   enable_gathermerge             | on
- enable_group_by_reordering     | on
   enable_hashagg                 | on
   enable_hashjoin                | on
   enable_incremental_sort        | on
@@ -132,7 +131,7 @@ select name, setting from pg_settings where name like 'enable%';
   enable_seqscan                 | on
   enable_sort                    | on
   enable_tidscan                 | on
-(21 rows)
+(20 rows)
  
  -- Test that the pg_timezone_names and pg_timezone_abbrevs views are
  -- more-or-less working.  We can't test their contents in any great detail
diff --git a/src/test/regress/expected/union.out b/src/test/regress/expected/union.out

index 7ac4a9380e2195a139bdd870b20c2fd18bbff09b..dece7310cfef8aed3aff28ca9ae7e121f52bb27c 100644 (file)
--- a/src/test/regress/expected/union.out
+++ b/src/test/regress/expected/union.out
@@ -1303,22 +1303,24 @@ select distinct q1 from
     union all
     select distinct * from int8_tbl i82) ss
  where q2 = q2;
-                     QUERY PLAN                     
-----------------------------------------------------
- HashAggregate
-   Group Key: "*SELECT* 1".q1
-   ->  Append
+                        QUERY PLAN                        
+----------------------------------------------------------
+ Unique
+   ->  Merge Append
+         Sort Key: "*SELECT* 1".q1
           ->  Subquery Scan on "*SELECT* 1"
-               ->  HashAggregate
-                     Group Key: i81.q1, i81.q2
-                     ->  Seq Scan on int8_tbl i81
-                           Filter: (q2 IS NOT NULL)
+               ->  Unique
+                     ->  Sort
+                           Sort Key: i81.q1, i81.q2
+                           ->  Seq Scan on int8_tbl i81
+                                 Filter: (q2 IS NOT NULL)
           ->  Subquery Scan on "*SELECT* 2"
-               ->  HashAggregate
-                     Group Key: i82.q1, i82.q2
-                     ->  Seq Scan on int8_tbl i82
-                           Filter: (q2 IS NOT NULL)
-(13 rows)
+               ->  Unique
+                     ->  Sort
+                           Sort Key: i82.q1, i82.q2
+                           ->  Seq Scan on int8_tbl i82
+                                 Filter: (q2 IS NOT NULL)
+(15 rows)
  
  select distinct q1 from
    (select distinct * from int8_tbl i81
@@ -1337,22 +1339,24 @@ select distinct q1 from
     union all
     select distinct * from int8_tbl i82) ss
  where -q1 = q2;
-                    QUERY PLAN                    
---------------------------------------------------
- HashAggregate
-   Group Key: "*SELECT* 1".q1
-   ->  Append
+                       QUERY PLAN                       
+--------------------------------------------------------
+ Unique
+   ->  Merge Append
+         Sort Key: "*SELECT* 1".q1
           ->  Subquery Scan on "*SELECT* 1"
-               ->  HashAggregate
-                     Group Key: i81.q1, i81.q2
-                     ->  Seq Scan on int8_tbl i81
-                           Filter: ((- q1) = q2)
+               ->  Unique
+                     ->  Sort
+                           Sort Key: i81.q1, i81.q2
+                           ->  Seq Scan on int8_tbl i81
+                                 Filter: ((- q1) = q2)
           ->  Subquery Scan on "*SELECT* 2"
-               ->  HashAggregate
-                     Group Key: i82.q1, i82.q2
-                     ->  Seq Scan on int8_tbl i82
-                           Filter: ((- q1) = q2)
-(13 rows)
+               ->  Unique
+                     ->  Sort
+                           Sort Key: i82.q1, i82.q2
+                           ->  Seq Scan on int8_tbl i82
+                                 Filter: ((- q1) = q2)
+(15 rows)
  
  select distinct q1 from
    (select distinct * from int8_tbl i81
diff --git a/src/test/regress/sql/aggregates.sql b/src/test/regress/sql/aggregates.sql

index 4540a06f454e2ba3a96559da5ebc6db238357d45..a4c00ff7a9da0833f7eff4f5f48131ef874faa40 100644 (file)
--- a/src/test/regress/sql/aggregates.sql
+++ b/src/test/regress/sql/aggregates.sql
@@ -1068,105 +1068,6 @@ SELECT balk(hundred) FROM tenk1;
  
  ROLLBACK;
  
--- GROUP BY optimization by reorder columns
-
-SELECT
-   i AS id,
-   i/2 AS p,
-   format('%60s', i%2) AS v,
-   i/4 AS c,
-   i/8 AS d,
-   (random() * (10000/8))::int as e --the same as d but no correlation with p
-   INTO btg
-FROM
-   generate_series(1, 10000) i;
-
-VACUUM btg;
-ANALYZE btg;
-
--- GROUP BY optimization by reorder columns by frequency
-
-SET enable_hashagg=off;
-SET max_parallel_workers= 0;
-SET max_parallel_workers_per_gather = 0;
-
-EXPLAIN (COSTS off)
-SELECT count(*) FROM btg GROUP BY p, v;
-
-EXPLAIN (COSTS off)
-SELECT count(*) FROM btg GROUP BY v, p;
-
-EXPLAIN (COSTS off)
-SELECT count(*) FROM btg GROUP BY v, p, c;
-
-EXPLAIN (COSTS off)
-SELECT count(*) FROM btg GROUP BY v, p, c ORDER BY v, p, c;
-
-EXPLAIN (COSTS off)
-SELECT count(*) FROM btg GROUP BY v, p, d, c;
-
-EXPLAIN (COSTS off)
-SELECT count(*) FROM btg GROUP BY v, p, d, c ORDER BY v, p, d ,c;
-
-EXPLAIN (COSTS off)
-SELECT count(*) FROM btg GROUP BY v, p, d, c ORDER BY p, v, d ,c;
-
-EXPLAIN (COSTS off)
-SELECT count(*) FROM btg GROUP BY p, d, e;
-
-EXPLAIN (COSTS off)
-SELECT count(*) FROM btg GROUP BY p, e, d;
-
-CREATE STATISTICS btg_dep ON d, e, p FROM btg;
-ANALYZE btg;
-
-EXPLAIN (COSTS off)
-SELECT count(*) FROM btg GROUP BY p, d, e;
-
-EXPLAIN (COSTS off)
-SELECT count(*) FROM btg GROUP BY p, e, d;
-
-
--- GROUP BY optimization by reorder columns by index scan
-
-CREATE INDEX ON btg(p, v);
-SET enable_seqscan=off;
-SET enable_bitmapscan=off;
-VACUUM btg;
-
-EXPLAIN (COSTS off)
-SELECT count(*) FROM btg GROUP BY p, v;
-
-EXPLAIN (COSTS off)
-SELECT count(*) FROM btg GROUP BY p, v ORDER BY p, v;
-
-EXPLAIN (COSTS off)
-SELECT count(*) FROM btg GROUP BY v, p;
-
-EXPLAIN (COSTS off)
-SELECT count(*) FROM btg GROUP BY v, p ORDER BY p, v;
-
-EXPLAIN (COSTS off)
-SELECT count(*) FROM btg GROUP BY v, p, c;
-
-EXPLAIN (COSTS off)
-SELECT count(*) FROM btg GROUP BY v, p, c ORDER BY p, v;
-
-EXPLAIN (COSTS off)
-SELECT count(*) FROM btg GROUP BY v, c, p, d;
-
-EXPLAIN (COSTS off)
-SELECT count(*) FROM btg GROUP BY v, c, p, d ORDER BY p, v;
-
-DROP TABLE btg;
-
-RESET enable_hashagg;
-RESET max_parallel_workers;
-RESET max_parallel_workers_per_gather;
-RESET enable_seqscan;
-RESET enable_bitmapscan;
-
-
  -- Secondly test the case of a parallel aggregate combiner function
  -- returning NULL. For that use normal transition function, but a
  -- combiner function returning NULL.
diff --git a/src/test/regress/sql/incremental_sort.sql b/src/test/regress/sql/incremental_sort.sql

index 6a0e87c7f64d492ec5b0d362eec2dafebe0e13ce..284a354dbb76224b9be12d58bcbc1b35cbfa88f5 100644 (file)
--- a/src/test/regress/sql/incremental_sort.sql
+++ b/src/test/regress/sql/incremental_sort.sql
@@ -213,7 +213,7 @@ set parallel_tuple_cost = 0;
  set max_parallel_workers_per_gather = 2;
  
  create table t (a int, b int, c int);
-insert into t select mod(i,10),mod(i,10),i from generate_series(1,60000) s(i);
+insert into t select mod(i,10),mod(i,10),i from generate_series(1,10000) s(i);
  create index on t (a);
  analyze t;
author	Tom Lane <tgl@sss.pgh.pa.us>
	Mon, 3 Oct 2022 14:56:16 +0000 (10:56 -0400)
committer	Tom Lane <tgl@sss.pgh.pa.us>
	Mon, 3 Oct 2022 14:56:16 +0000 (10:56 -0400)
contrib/postgres_fdw/expected/postgres_fdw.out		patch \| blob \| blame \| history
doc/src/sgml/config.sgml		patch \| blob \| blame \| history
src/backend/optimizer/path/costsize.c		patch \| blob \| blame \| history
src/backend/optimizer/path/equivclass.c		patch \| blob \| blame \| history
src/backend/optimizer/path/pathkeys.c		patch \| blob \| blame \| history
src/backend/optimizer/plan/planner.c		patch \| blob \| blame \| history
src/backend/optimizer/util/pathnode.c		patch \| blob \| blame \| history
src/backend/utils/adt/selfuncs.c		patch \| blob \| blame \| history
src/backend/utils/misc/guc_tables.c		patch \| blob \| blame \| history
src/backend/utils/misc/postgresql.conf.sample		patch \| blob \| blame \| history
src/include/nodes/pathnodes.h		patch \| blob \| blame \| history
src/include/optimizer/cost.h		patch \| blob \| blame \| history
src/include/optimizer/paths.h		patch \| blob \| blame \| history
src/include/utils/selfuncs.h		patch \| blob \| blame \| history
src/test/regress/expected/aggregates.out		patch \| blob \| blame \| history
src/test/regress/expected/incremental_sort.out		patch \| blob \| blame \| history
src/test/regress/expected/join.out		patch \| blob \| blame \| history
src/test/regress/expected/merge.out		patch \| blob \| blame \| history
src/test/regress/expected/partition_aggregate.out		patch \| blob \| blame \| history
src/test/regress/expected/partition_join.out		patch \| blob \| blame \| history
src/test/regress/expected/sysviews.out		patch \| blob \| blame \| history
src/test/regress/expected/union.out		patch \| blob \| blame \| history
src/test/regress/sql/aggregates.sql		patch \| blob \| blame \| history
src/test/regress/sql/incremental_sort.sql		patch \| blob \| blame \| history