-- GROUP BY clause in various forms, cardinal, alias and constant expression
explain (verbose, costs off)
select count(c2) w, c2 x, 5 y, 7.0 z from ft1 group by 2, y, 9.0::int order by 2;
- QUERY PLAN
-------------------------------------------------------------------------------------------------------------
- Foreign Scan
+ QUERY PLAN
+---------------------------------------------------------------------------------------
+ Sort
Output: (count(c2)), c2, 5, 7.0, 9
- Relations: Aggregate on (public.ft1)
- Remote SQL: SELECT count(c2), c2, 5, 7.0, 9 FROM "S 1"."T 1" GROUP BY 2, 3, 5 ORDER BY c2 ASC NULLS LAST
-(4 rows)
+ Sort Key: ft1.c2
+ -> Foreign Scan
+ Output: (count(c2)), c2, 5, 7.0, 9
+ Relations: Aggregate on (public.ft1)
+ Remote SQL: SELECT count(c2), c2, 5, 7.0, 9 FROM "S 1"."T 1" GROUP BY 2, 3, 5
+(7 rows)
select count(c2) w, c2 x, 5 y, 7.0 z from ft1 group by 2, y, 9.0::int order by 2;
w | x | y | z
</listitem>
</varlistentry>
- <varlistentry id="guc-enable-groupby-reordering" xreflabel="enable_group_by_reordering">
- <term><varname>enable_group_by_reordering</varname> (<type>boolean</type>)
- <indexterm>
- <primary><varname>enable_group_by_reordering</varname> configuration parameter</primary>
- </indexterm>
- </term>
- <listitem>
- <para>
- Enables or disables reordering of keys in a <literal>GROUP BY</literal>
- clause. The default is <literal>on</literal>.
- </para>
- </listitem>
- </varlistentry>
-
<varlistentry id="guc-enable-hashagg" xreflabel="enable_hashagg">
<term><varname>enable_hashagg</varname> (<type>boolean</type>)
<indexterm>
rterm->pathtarget->width);
}
-/*
- * is_fake_var
- * Workaround for generate_append_tlist() which generates fake Vars with
- * varno == 0, that will cause a fail of estimate_num_group() call
- *
- * XXX Ummm, why would estimate_num_group fail with this?
- */
-static bool
-is_fake_var(Expr *expr)
-{
- if (IsA(expr, RelabelType))
- expr = (Expr *) ((RelabelType *) expr)->arg;
-
- return (IsA(expr, Var) && ((Var *) expr)->varno == 0);
-}
-
-/*
- * get_width_cost_multiplier
- * Returns relative complexity of comparing two values based on its width.
- * The idea behind is that the comparison becomes more expensive the longer the
- * value is. Return value is in cpu_operator_cost units.
- */
-static double
-get_width_cost_multiplier(PlannerInfo *root, Expr *expr)
-{
- double width = -1.0; /* fake value */
-
- if (IsA(expr, RelabelType))
- expr = (Expr *) ((RelabelType *) expr)->arg;
-
- /* Try to find actual stat in corresponding relation */
- if (IsA(expr, Var))
- {
- Var *var = (Var *) expr;
-
- if (var->varno > 0 && var->varno < root->simple_rel_array_size)
- {
- RelOptInfo *rel = root->simple_rel_array[var->varno];
-
- if (rel != NULL &&
- var->varattno >= rel->min_attr &&
- var->varattno <= rel->max_attr)
- {
- int ndx = var->varattno - rel->min_attr;
-
- if (rel->attr_widths[ndx] > 0)
- width = rel->attr_widths[ndx];
- }
- }
- }
-
- /* Didn't find any actual stats, try using type width instead. */
- if (width < 0.0)
- {
- Node *node = (Node *) expr;
-
- width = get_typavgwidth(exprType(node), exprTypmod(node));
- }
-
- /*
- * Values are passed as Datum type, so comparisons can't be cheaper than
- * comparing a Datum value.
- *
- * FIXME I find this reasoning questionable. We may pass int2, and
- * comparing it is probably a bit cheaper than comparing a bigint.
- */
- if (width <= sizeof(Datum))
- return 1.0;
-
- /*
- * We consider the cost of a comparison not to be directly proportional to
- * width of the argument, because widths of the arguments could be
- * slightly different (we only know the average width for the whole
- * column). So we use log16(width) as an estimate.
- */
- return 1.0 + 0.125 * LOG2(width / sizeof(Datum));
-}
-
-/*
- * compute_cpu_sort_cost
- * compute CPU cost of sort (i.e. in-memory)
- *
- * The main thing we need to calculate to estimate sort CPU costs is the number
- * of calls to the comparator functions. The difficulty is that for multi-column
- * sorts there may be different data types involved (for some of which the calls
- * may be much more expensive). Furthermore, columns may have a very different
- * number of distinct values - the higher the number, the fewer comparisons will
- * be needed for the following columns.
- *
- * The algorithm is incremental - we add pathkeys one by one, and at each step we
- * estimate the number of necessary comparisons (based on the number of distinct
- * groups in the current pathkey prefix and the new pathkey), and the comparison
- * costs (which is data type specific).
- *
- * Estimation of the number of comparisons is based on ideas from:
- *
- * "Quicksort Is Optimal", Robert Sedgewick, Jon Bentley, 2002
- * [https://www.cs.princeton.edu/~rs/talks/QuicksortIsOptimal.pdf]
- *
- * In term of that paper, let N - number of tuples, Xi - number of identical
- * tuples with value Ki, then the estimate of number of comparisons is:
- *
- * log(N! / (X1! * X2! * ..)) ~ sum(Xi * log(N/Xi))
- *
- * We assume all Xi the same because now we don't have any estimation of
- * group sizes, we have only know the estimate of number of groups (distinct
- * values). In that case, formula becomes:
- *
- * N * log(NumberOfGroups)
- *
- * For multi-column sorts we need to estimate the number of comparisons for
- * each individual column - for example with columns (c1, c2, ..., ck) we
- * can estimate that number of comparisons on ck is roughly
- *
- * ncomparisons(c1, c2, ..., ck) / ncomparisons(c1, c2, ..., c(k-1))
- *
- * Let k be a column number, Gk - number of groups defined by k columns, and Fk
- * the cost of the comparison is
- *
- * N * sum( Fk * log(Gk) )
- *
- * Note: We also consider column width, not just the comparator cost.
- *
- * NOTE: some callers currently pass NIL for pathkeys because they
- * can't conveniently supply the sort keys. In this case, it will fallback to
- * simple comparison cost estimate.
- */
-static Cost
-compute_cpu_sort_cost(PlannerInfo *root, List *pathkeys, int nPresortedKeys,
- Cost comparison_cost, double tuples, double output_tuples,
- bool heapSort)
-{
- Cost per_tuple_cost = 0.0;
- ListCell *lc;
- List *pathkeyExprs = NIL;
- double tuplesPerPrevGroup = tuples;
- double totalFuncCost = 1.0;
- bool has_fake_var = false;
- int i = 0;
- Oid prev_datatype = InvalidOid;
- List *cache_varinfos = NIL;
-
- /* fallback if pathkeys is unknown */
- if (pathkeys == NIL)
- {
- /*
- * If we'll use a bounded heap-sort keeping just K tuples in memory,
- * for a total number of tuple comparisons of N log2 K; but the
- * constant factor is a bit higher than for quicksort. Tweak it so
- * that the cost curve is continuous at the crossover point.
- */
- output_tuples = (heapSort) ? 2.0 * output_tuples : tuples;
- per_tuple_cost += 2.0 * cpu_operator_cost * LOG2(output_tuples);
-
- /* add cost provided by caller */
- per_tuple_cost += comparison_cost;
-
- return per_tuple_cost * tuples;
- }
-
- /*
- * Computing total cost of sorting takes into account the per-column
- * comparison function cost. We try to compute the needed number of
- * comparisons per column.
- */
- foreach(lc, pathkeys)
- {
- PathKey *pathkey = (PathKey *) lfirst(lc);
- EquivalenceMember *em;
- double nGroups,
- correctedNGroups;
- Cost funcCost = 1.0;
-
- /*
- * We believe that equivalence members aren't very different, so, to
- * estimate cost we consider just the first member.
- */
- em = (EquivalenceMember *) linitial(pathkey->pk_eclass->ec_members);
-
- if (em->em_datatype != InvalidOid)
- {
- /* do not lookup funcCost if the data type is the same */
- if (prev_datatype != em->em_datatype)
- {
- Oid sortop;
- QualCost cost;
-
- sortop = get_opfamily_member(pathkey->pk_opfamily,
- em->em_datatype, em->em_datatype,
- pathkey->pk_strategy);
-
- cost.startup = 0;
- cost.per_tuple = 0;
- add_function_cost(root, get_opcode(sortop), NULL, &cost);
-
- /*
- * add_function_cost returns the product of cpu_operator_cost
- * and procost, but we need just procost, co undo that.
- */
- funcCost = cost.per_tuple / cpu_operator_cost;
-
- prev_datatype = em->em_datatype;
- }
- }
-
- /* factor in the width of the values in this column */
- funcCost *= get_width_cost_multiplier(root, em->em_expr);
-
- /* now we have per-key cost, so add to the running total */
- totalFuncCost += funcCost;
-
- /* remember if we have found a fake Var in pathkeys */
- has_fake_var |= is_fake_var(em->em_expr);
- pathkeyExprs = lappend(pathkeyExprs, em->em_expr);
-
- /*
- * We need to calculate the number of comparisons for this column,
- * which requires knowing the group size. So we estimate the number of
- * groups by calling estimate_num_groups_incremental(), which
- * estimates the group size for "new" pathkeys.
- *
- * Note: estimate_num_groups_incremental does not handle fake Vars, so
- * use a default estimate otherwise.
- */
- if (!has_fake_var)
- nGroups = estimate_num_groups_incremental(root, pathkeyExprs,
- tuplesPerPrevGroup, NULL, NULL,
- &cache_varinfos,
- list_length(pathkeyExprs) - 1);
- else if (tuples > 4.0)
-
- /*
- * Use geometric mean as estimation if there are no stats.
- *
- * We don't use DEFAULT_NUM_DISTINCT here, because that's used for
- * a single column, but here we're dealing with multiple columns.
- */
- nGroups = ceil(2.0 + sqrt(tuples) * (i + 1) / list_length(pathkeys));
- else
- nGroups = tuples;
-
- /*
- * Presorted keys are not considered in the cost above, but we still
- * do have to compare them in the qsort comparator. So make sure to
- * factor in the cost in that case.
- */
- if (i >= nPresortedKeys)
- {
- if (heapSort)
- {
- /*
- * have to keep at least one group, and a multiple of group
- * size
- */
- correctedNGroups = ceil(output_tuples / tuplesPerPrevGroup);
- }
- else
- /* all groups in the input */
- correctedNGroups = nGroups;
-
- correctedNGroups = Max(1.0, ceil(correctedNGroups));
-
- per_tuple_cost += totalFuncCost * LOG2(correctedNGroups);
- }
-
- i++;
-
- /*
- * Uniform distributions with all groups being of the same size are
- * the best case, with nice smooth behavior. Real-world distributions
- * tend not to be uniform, though, and we don't have any reliable
- * easy-to-use information. As a basic defense against skewed
- * distributions, we use a 1.5 factor to make the expected group a bit
- * larger, but we need to be careful not to make the group larger than
- * in the preceding step.
- */
- tuplesPerPrevGroup = Min(tuplesPerPrevGroup,
- ceil(1.5 * tuplesPerPrevGroup / nGroups));
-
- /*
- * Once we get single-row group, it means tuples in the group are
- * unique and we can skip all remaining columns.
- */
- if (tuplesPerPrevGroup <= 1.0)
- break;
- }
-
- list_free(pathkeyExprs);
-
- /* per_tuple_cost is in cpu_operator_cost units */
- per_tuple_cost *= cpu_operator_cost;
-
- /*
- * Accordingly to "Introduction to algorithms", Thomas H. Cormen, Charles
- * E. Leiserson, Ronald L. Rivest, ISBN 0-07-013143-0, quicksort
- * estimation formula has additional term proportional to number of tuples
- * (see Chapter 8.2 and Theorem 4.1). That affects cases with a low number
- * of tuples, approximately less than 1e4. We could implement it as an
- * additional multiplier under the logarithm, but we use a bit more
- * complex formula which takes into account the number of unique tuples
- * and it's not clear how to combine the multiplier with the number of
- * groups. Estimate it as 10 cpu_operator_cost units.
- */
- per_tuple_cost += 10 * cpu_operator_cost;
-
- per_tuple_cost += comparison_cost;
-
- return tuples * per_tuple_cost;
-}
-
-/*
- * simple wrapper just to estimate best sort path
- */
-Cost
-cost_sort_estimate(PlannerInfo *root, List *pathkeys, int nPresortedKeys,
- double tuples)
-{
- return compute_cpu_sort_cost(root, pathkeys, nPresortedKeys,
- 0, tuples, tuples, false);
-}
-
/*
* cost_tuplesort
* Determines and returns the cost of sorting a relation using tuplesort,
* number of initial runs formed and M is the merge order used by tuplesort.c.
* Since the average initial run should be about sort_mem, we have
* disk traffic = 2 * relsize * ceil(logM(p / sort_mem))
- * and cpu cost (computed by compute_cpu_sort_cost()).
+ * cpu = comparison_cost * t * log2(t)
*
* If the sort is bounded (i.e., only the first k result tuples are needed)
* and k tuples can fit into sort_mem, we use a heap method that keeps only
* 'comparison_cost' is the extra cost per comparison, if any
* 'sort_mem' is the number of kilobytes of work memory allowed for the sort
* 'limit_tuples' is the bound on the number of output tuples; -1 if no bound
- * 'startup_cost' is expected to be 0 at input. If there is "input cost" it should
- * be added by caller later
*/
static void
-cost_tuplesort(PlannerInfo *root, List *pathkeys, Cost *startup_cost, Cost *run_cost,
+cost_tuplesort(Cost *startup_cost, Cost *run_cost,
double tuples, int width,
Cost comparison_cost, int sort_mem,
double limit_tuples)
if (tuples < 2.0)
tuples = 2.0;
+ /* Include the default cost-per-comparison */
+ comparison_cost += 2.0 * cpu_operator_cost;
+
/* Do we have a useful LIMIT? */
if (limit_tuples > 0 && limit_tuples < tuples)
{
double log_runs;
double npageaccesses;
- /* CPU costs */
- *startup_cost = compute_cpu_sort_cost(root, pathkeys, 0,
- comparison_cost, tuples,
- tuples, false);
+ /*
+ * CPU costs
+ *
+ * Assume about N log2 N comparisons
+ */
+ *startup_cost = comparison_cost * tuples * LOG2(tuples);
/* Disk costs */
}
else if (tuples > 2 * output_tuples || input_bytes > sort_mem_bytes)
{
- /* We'll use a bounded heap-sort keeping just K tuples in memory. */
- *startup_cost = compute_cpu_sort_cost(root, pathkeys, 0,
- comparison_cost, tuples,
- output_tuples, true);
+ /*
+ * We'll use a bounded heap-sort keeping just K tuples in memory, for
+ * a total number of tuple comparisons of N log2 K; but the constant
+ * factor is a bit higher than for quicksort. Tweak it so that the
+ * cost curve is continuous at the crossover point.
+ */
+ *startup_cost = comparison_cost * tuples * LOG2(2.0 * output_tuples);
}
else
{
/* We'll use plain quicksort on all the input tuples */
- *startup_cost = compute_cpu_sort_cost(root, pathkeys, 0,
- comparison_cost, tuples,
- tuples, false);
+ *startup_cost = comparison_cost * tuples * LOG2(tuples);
}
/*
double input_tuples, int width, Cost comparison_cost, int sort_mem,
double limit_tuples)
{
- Cost startup_cost,
- run_cost,
+ Cost startup_cost = 0,
+ run_cost = 0,
input_run_cost = input_total_cost - input_startup_cost;
double group_tuples,
input_groups;
* pessimistic about incremental sort performance and increase its average
* group size by half.
*/
- cost_tuplesort(root, pathkeys, &group_startup_cost, &group_run_cost,
+ cost_tuplesort(&group_startup_cost, &group_run_cost,
1.5 * group_tuples, width, comparison_cost, sort_mem,
limit_tuples);
* Startup cost of incremental sort is the startup cost of its first group
* plus the cost of its input.
*/
- startup_cost = group_startup_cost
+ startup_cost += group_startup_cost
+ input_startup_cost + group_input_run_cost;
/*
* group, plus the total cost to process the remaining groups, plus the
* remaining cost of input.
*/
- run_cost = group_run_cost
+ run_cost += group_run_cost
+ (group_run_cost + group_startup_cost) * (input_groups - 1)
+ group_input_run_cost * (input_groups - 1);
Cost startup_cost;
Cost run_cost;
- cost_tuplesort(root, pathkeys, &startup_cost, &run_cost,
+ cost_tuplesort(&startup_cost, &run_cost,
tuples, width,
comparison_cost, sort_mem,
limit_tuples);
* Determines and returns the cost of an Append node.
*/
void
-cost_append(AppendPath *apath, PlannerInfo *root)
+cost_append(AppendPath *apath)
{
ListCell *l;
* any child.
*/
cost_sort(&sort_path,
- root,
+ NULL, /* doesn't currently need root */
pathkeys,
subpath->total_cost,
subpath->rows,
if (opcintype == cur_em->em_datatype &&
equal(expr, cur_em->em_expr))
- {
- /*
- * Match!
- *
- * Copy the sortref if it wasn't set yet. That may happen if
- * the ec was constructed from WHERE clause, i.e. it doesn't
- * have a target reference at all.
- */
- if (cur_ec->ec_sortref == 0 && sortref > 0)
- cur_ec->ec_sortref = sortref;
- return cur_ec;
- }
+ return cur_ec; /* Match! */
}
}
*/
#include "postgres.h"
-#include <float.h>
-
-#include "miscadmin.h"
#include "access/stratnum.h"
#include "catalog/pg_opfamily.h"
#include "nodes/makefuncs.h"
#include "nodes/nodeFuncs.h"
#include "nodes/plannodes.h"
-#include "optimizer/cost.h"
#include "optimizer/optimizer.h"
#include "optimizer/pathnode.h"
#include "optimizer/paths.h"
#include "partitioning/partbounds.h"
#include "utils/lsyscache.h"
-#include "utils/selfuncs.h"
-/* Consider reordering of GROUP BY keys? */
-bool enable_group_by_reordering = true;
static bool pathkey_is_redundant(PathKey *new_pathkey, List *pathkeys);
static bool matches_boolean_partition_clause(RestrictInfo *rinfo,
return false;
}
-/*
- * group_keys_reorder_by_pathkeys
- * Reorder GROUP BY keys to match pathkeys of input path.
- *
- * Function returns new lists (pathkeys and clauses), original GROUP BY lists
- * stay untouched.
- *
- * Returns the number of GROUP BY keys with a matching pathkey.
- */
-int
-group_keys_reorder_by_pathkeys(List *pathkeys, List **group_pathkeys,
- List **group_clauses)
-{
- List *new_group_pathkeys = NIL,
- *new_group_clauses = NIL;
- ListCell *lc;
- int n;
-
- if (pathkeys == NIL || *group_pathkeys == NIL)
- return 0;
-
- /*
- * Walk the pathkeys (determining ordering of the input path) and see if
- * there's a matching GROUP BY key. If we find one, we append it to the
- * list, and do the same for the clauses.
- *
- * Once we find the first pathkey without a matching GROUP BY key, the
- * rest of the pathkeys are useless and can't be used to evaluate the
- * grouping, so we abort the loop and ignore the remaining pathkeys.
- *
- * XXX Pathkeys are built in a way to allow simply comparing pointers.
- */
- foreach(lc, pathkeys)
- {
- PathKey *pathkey = (PathKey *) lfirst(lc);
- SortGroupClause *sgc;
-
- /* abort on first mismatch */
- if (!list_member_ptr(*group_pathkeys, pathkey))
- break;
-
- new_group_pathkeys = lappend(new_group_pathkeys, pathkey);
-
- sgc = get_sortgroupref_clause(pathkey->pk_eclass->ec_sortref,
- *group_clauses);
-
- new_group_clauses = lappend(new_group_clauses, sgc);
- }
-
- /* remember the number of pathkeys with a matching GROUP BY key */
- n = list_length(new_group_pathkeys);
-
- /* append the remaining group pathkeys (will be treated as not sorted) */
- *group_pathkeys = list_concat_unique_ptr(new_group_pathkeys,
- *group_pathkeys);
- *group_clauses = list_concat_unique_ptr(new_group_clauses,
- *group_clauses);
-
- return n;
-}
-
-/*
- * Used to generate all permutations of a pathkey list.
- */
-typedef struct PathkeyMutatorState
-{
- List *elemsList;
- ListCell **elemCells;
- void **elems;
- int *positions;
- int mutatorNColumns;
- int count;
-} PathkeyMutatorState;
-
-
-/*
- * PathkeyMutatorInit
- * Initialize state of the permutation generator.
- *
- * We want to generate permutations of elements in the "elems" list. We may want
- * to skip some number of elements at the beginning (when treating as presorted)
- * or at the end (we only permute a limited number of group keys).
- *
- * The list is decomposed into elements, and we also keep pointers to individual
- * cells. This allows us to build the permuted list quickly and cheaply, without
- * creating any copies.
- */
-static void
-PathkeyMutatorInit(PathkeyMutatorState *state, List *elems, int start, int end)
-{
- int i;
- int n = end - start;
- ListCell *lc;
-
- memset(state, 0, sizeof(*state));
-
- state->mutatorNColumns = n;
-
- state->elemsList = list_copy(elems);
-
- state->elems = palloc(sizeof(void *) * n);
- state->elemCells = palloc(sizeof(ListCell *) * n);
- state->positions = palloc(sizeof(int) * n);
-
- i = 0;
- for_each_cell(lc, state->elemsList, list_nth_cell(state->elemsList, start))
- {
- state->elemCells[i] = lc;
- state->elems[i] = lfirst(lc);
- state->positions[i] = i + 1;
- i++;
-
- if (i >= n)
- break;
- }
-}
-
-/* Swap two elements of an array. */
-static void
-PathkeyMutatorSwap(int *a, int i, int j)
-{
- int s = a[i];
-
- a[i] = a[j];
- a[j] = s;
-}
-
-/*
- * Generate the next permutation of elements.
- */
-static bool
-PathkeyMutatorNextSet(int *a, int n)
-{
- int j,
- k,
- l,
- r;
-
- j = n - 2;
-
- while (j >= 0 && a[j] >= a[j + 1])
- j--;
-
- if (j < 0)
- return false;
-
- k = n - 1;
-
- while (k >= 0 && a[j] >= a[k])
- k--;
-
- PathkeyMutatorSwap(a, j, k);
-
- l = j + 1;
- r = n - 1;
-
- while (l < r)
- PathkeyMutatorSwap(a, l++, r--);
-
- return true;
-}
-
-/*
- * PathkeyMutatorNext
- * Generate the next permutation of list of elements.
- *
- * Returns the next permutation (as a list of elements) or NIL if there are no
- * more permutations.
- */
-static List *
-PathkeyMutatorNext(PathkeyMutatorState *state)
-{
- int i;
-
- state->count++;
-
- /* first permutation is original list */
- if (state->count == 1)
- return state->elemsList;
-
- /* when there are no more permutations, return NIL */
- if (!PathkeyMutatorNextSet(state->positions, state->mutatorNColumns))
- {
- pfree(state->elems);
- pfree(state->elemCells);
- pfree(state->positions);
-
- list_free(state->elemsList);
-
- return NIL;
- }
-
- /* update the list cells to point to the right elements */
- for (i = 0; i < state->mutatorNColumns; i++)
- lfirst(state->elemCells[i]) =
- (void *) state->elems[state->positions[i] - 1];
-
- return state->elemsList;
-}
-
-/*
- * Cost of comparing pathkeys.
- */
-typedef struct PathkeySortCost
-{
- Cost cost;
- PathKey *pathkey;
-} PathkeySortCost;
-
-static int
-pathkey_sort_cost_comparator(const void *_a, const void *_b)
-{
- const PathkeySortCost *a = (PathkeySortCost *) _a;
- const PathkeySortCost *b = (PathkeySortCost *) _b;
-
- if (a->cost < b->cost)
- return -1;
- else if (a->cost == b->cost)
- return 0;
- return 1;
-}
-
-/*
- * get_cheapest_group_keys_order
- * Reorders the group pathkeys / clauses to minimize the comparison cost.
- *
- * Given the list of pathkeys in '*group_pathkeys', we try to arrange these
- * in an order that minimizes the sort costs that will be incurred by the
- * GROUP BY. The costs mainly depend on the cost of the sort comparator
- * function(s) and the number of distinct values in each column of the GROUP
- * BY clause (*group_clauses). Sorting on subsequent columns is only required
- * for tiebreak situations where two values sort equally.
- *
- * In case the input is partially sorted, only the remaining pathkeys are
- * considered. 'n_preordered' denotes how many of the leading *group_pathkeys
- * the input is presorted by.
- *
- * Returns true and sets *group_pathkeys and *group_clauses to the newly
- * ordered versions of the lists that were passed in via these parameters.
- * If no reordering was deemed necessary then we return false, in which case
- * the *group_pathkeys and *group_clauses lists are left untouched. The
- * original *group_pathkeys and *group_clauses parameter values are never
- * destructively modified in place.
- */
-static bool
-get_cheapest_group_keys_order(PlannerInfo *root, double nrows,
- List **group_pathkeys, List **group_clauses,
- int n_preordered)
-{
- List *new_group_pathkeys = NIL,
- *new_group_clauses = NIL,
- *var_group_pathkeys;
-
- ListCell *cell;
- PathkeyMutatorState mstate;
- double cheapest_sort_cost = DBL_MAX;
-
- int nFreeKeys;
- int nToPermute;
-
- /* If there are less than 2 unsorted pathkeys, we're done. */
- if (list_length(*group_pathkeys) - n_preordered < 2)
- return false;
-
- /*
- * We could exhaustively cost all possible orderings of the pathkeys, but
- * for a large number of pathkeys it might be prohibitively expensive. So
- * we try to apply simple cheap heuristics first - we sort the pathkeys by
- * sort cost (as if the pathkey was sorted independently) and then check
- * only the four cheapest pathkeys. The remaining pathkeys are kept
- * ordered by cost.
- *
- * XXX This is a very simple heuristics, but likely to work fine for most
- * cases (because the number of GROUP BY clauses tends to be lower than
- * 4). But it ignores how the number of distinct values in each pathkey
- * affects the following steps. It might be better to use "more expensive"
- * pathkey first if it has many distinct values, because it then limits
- * the number of comparisons for the remaining pathkeys. But evaluating
- * that is likely quite the expensive.
- */
- nFreeKeys = list_length(*group_pathkeys) - n_preordered;
- nToPermute = 4;
- if (nFreeKeys > nToPermute)
- {
- PathkeySortCost *costs = palloc(sizeof(PathkeySortCost) * nFreeKeys);
- PathkeySortCost *cost = costs;
-
- /*
- * Estimate cost for sorting individual pathkeys skipping the
- * pre-ordered pathkeys.
- */
- for_each_from(cell, *group_pathkeys, n_preordered)
- {
- PathKey *pathkey = (PathKey *) lfirst(cell);
- List *to_cost = list_make1(pathkey);
-
- cost->pathkey = pathkey;
- cost->cost = cost_sort_estimate(root, to_cost, 0, nrows);
- cost++;
-
- list_free(to_cost);
- }
-
- /* sort the pathkeys by sort cost in ascending order */
- qsort(costs, nFreeKeys, sizeof(*costs), pathkey_sort_cost_comparator);
-
- /*
- * Rebuild the list of pathkeys - first the preordered ones, then the
- * rest ordered by cost.
- */
- new_group_pathkeys = list_copy_head(*group_pathkeys, n_preordered);
-
- for (int i = 0; i < nFreeKeys; i++)
- new_group_pathkeys = lappend(new_group_pathkeys, costs[i].pathkey);
-
- pfree(costs);
- }
- else
- {
- /* Copy the list, so that we can free the new list by list_free. */
- new_group_pathkeys = list_copy(*group_pathkeys);
- nToPermute = nFreeKeys;
- }
-
- Assert(list_length(new_group_pathkeys) == list_length(*group_pathkeys));
-
- /*
- * Generate pathkey lists with permutations of the first nToPermute
- * pathkeys.
- *
- * XXX We simply calculate sort cost for each individual pathkey list, but
- * there's room for two dynamic programming optimizations here. Firstly,
- * we may pass the current "best" cost to cost_sort_estimate so that it
- * can "abort" if the estimated pathkeys list exceeds it. Secondly, it
- * could pass the return information about the position when it exceeded
- * the cost, and we could skip all permutations with the same prefix.
- *
- * Imagine we've already found ordering with cost C1, and we're evaluating
- * another ordering - cost_sort_estimate() calculates cost by adding the
- * pathkeys one by one (more or less), and the cost only grows. If at any
- * point it exceeds C1, it can't possibly be "better" so we can discard
- * it. But we also know that we can discard all ordering with the same
- * prefix, because if we're estimating (a,b,c,d) and we exceed C1 at (a,b)
- * then the same thing will happen for any ordering with this prefix.
- */
- PathkeyMutatorInit(&mstate, new_group_pathkeys, n_preordered, n_preordered + nToPermute);
-
- while ((var_group_pathkeys = PathkeyMutatorNext(&mstate)) != NIL)
- {
- Cost cost;
-
- cost = cost_sort_estimate(root, var_group_pathkeys, n_preordered, nrows);
-
- if (cost < cheapest_sort_cost)
- {
- list_free(new_group_pathkeys);
- new_group_pathkeys = list_copy(var_group_pathkeys);
- cheapest_sort_cost = cost;
- }
- }
-
- /* Reorder the group clauses according to the reordered pathkeys. */
- foreach(cell, new_group_pathkeys)
- {
- PathKey *pathkey = (PathKey *) lfirst(cell);
-
- new_group_clauses = lappend(new_group_clauses,
- get_sortgroupref_clause(pathkey->pk_eclass->ec_sortref,
- *group_clauses));
- }
-
- /* Just append the rest GROUP BY clauses */
- new_group_clauses = list_concat_unique_ptr(new_group_clauses,
- *group_clauses);
-
- *group_pathkeys = new_group_pathkeys;
- *group_clauses = new_group_clauses;
-
- return true;
-}
-
-/*
- * get_useful_group_keys_orderings
- * Determine which orderings of GROUP BY keys are potentially interesting.
- *
- * Returns list of PathKeyInfo items, each representing an interesting ordering
- * of GROUP BY keys. Each item stores pathkeys and clauses in matching order.
- *
- * The function considers (and keeps) multiple group by orderings:
- *
- * - the original ordering, as specified by the GROUP BY clause
- *
- * - GROUP BY keys reordered to minimize the sort cost
- *
- * - GROUP BY keys reordered to match path ordering (as much as possible), with
- * the tail reordered to minimize the sort cost
- *
- * - GROUP BY keys to match target ORDER BY clause (as much as possible), with
- * the tail reordered to minimize the sort cost
- *
- * There are other potentially interesting orderings (e.g. it might be best to
- * match the first ORDER BY key, order the remaining keys differently and then
- * rely on the incremental sort to fix this), but we ignore those for now. To
- * make this work we'd have to pretty much generate all possible permutations.
- */
-List *
-get_useful_group_keys_orderings(PlannerInfo *root, double nrows,
- List *path_pathkeys,
- List *group_pathkeys, List *group_clauses,
- List *aggregate_pathkeys)
-{
- Query *parse = root->parse;
- List *infos = NIL;
- PathKeyInfo *info;
- int n_preordered = 0;
-
- List *pathkeys = group_pathkeys;
- List *clauses = group_clauses;
-
- /* always return at least the original pathkeys/clauses */
- info = makeNode(PathKeyInfo);
- if (aggregate_pathkeys != NIL)
- info->pathkeys = list_concat_copy(pathkeys, aggregate_pathkeys);
- else
- info->pathkeys = pathkeys;
- info->clauses = clauses;
-
- infos = lappend(infos, info);
-
- /*
- * Should we try generating alternative orderings of the group keys? If
- * not, we produce only the order specified in the query, i.e. the
- * optimization is effectively disabled.
- */
- if (!enable_group_by_reordering)
- return infos;
-
- /* for grouping sets we can't do any reordering */
- if (parse->groupingSets)
- return infos;
-
- /*
- * Try reordering pathkeys to minimize the sort cost, ignoring both the
- * target ordering (ORDER BY) and ordering of the input path.
- */
- if (get_cheapest_group_keys_order(root, nrows, &pathkeys, &clauses,
- n_preordered))
- {
- info = makeNode(PathKeyInfo);
- if (aggregate_pathkeys != NIL)
- info->pathkeys = list_concat_copy(pathkeys, aggregate_pathkeys);
- else
- info->pathkeys = pathkeys;
- info->clauses = clauses;
-
- infos = lappend(infos, info);
- }
-
- /*
- * If the path is sorted in some way, try reordering the group keys to
- * match as much of the ordering as possible - we get this sort for free
- * (mostly).
- *
- * We must not do this when there are no grouping sets, because those use
- * more complex logic to decide the ordering.
- *
- * XXX Isn't this somewhat redundant with presorted_keys? Actually, it's
- * more a complement, because it allows benefiting from incremental sort
- * as much as possible.
- *
- * XXX This does nothing if (n_preordered == 0). We shouldn't create the
- * info in this case.
- */
- if (path_pathkeys)
- {
- n_preordered = group_keys_reorder_by_pathkeys(path_pathkeys,
- &pathkeys,
- &clauses);
-
- /* reorder the tail to minimize sort cost */
- get_cheapest_group_keys_order(root, nrows, &pathkeys, &clauses,
- n_preordered);
-
- /*
- * reorder the tail to minimize sort cost
- *
- * XXX Ignore the return value - there may be nothing to reorder, in
- * which case get_cheapest_group_keys_order returns false. But we
- * still want to keep the keys reordered to path_pathkeys.
- */
- info = makeNode(PathKeyInfo);
- if (aggregate_pathkeys != NIL)
- info->pathkeys = list_concat_copy(pathkeys, aggregate_pathkeys);
- else
- info->pathkeys = pathkeys;
- info->clauses = clauses;
-
- infos = lappend(infos, info);
- }
-
- /*
- * Try reordering pathkeys to minimize the sort cost (this time consider
- * the ORDER BY clause, but only if set debug_group_by_match_order_by).
- */
- if (root->sort_pathkeys)
- {
- n_preordered = group_keys_reorder_by_pathkeys(root->sort_pathkeys,
- &pathkeys,
- &clauses);
-
- /*
- * reorder the tail to minimize sort cost
- *
- * XXX Ignore the return value - there may be nothing to reorder, in
- * which case get_cheapest_group_keys_order returns false. But we
- * still want to keep the keys reordered to sort_pathkeys.
- */
- get_cheapest_group_keys_order(root, nrows, &pathkeys, &clauses,
- n_preordered);
-
- /* keep the group keys reordered to match ordering of input path */
- info = makeNode(PathKeyInfo);
- if (aggregate_pathkeys != NIL)
- info->pathkeys = list_concat_copy(pathkeys, aggregate_pathkeys);
- else
- info->pathkeys = pathkeys;
- info->clauses = clauses;
-
- infos = lappend(infos, info);
- }
-
- return infos;
-}
-
/*
* pathkeys_count_contained_in
* Same as pathkeys_contained_in, but also sets length of longest
return n_common_pathkeys;
}
-/*
- * pathkeys_useful_for_grouping
- * Count the number of pathkeys that are useful for grouping (instead of
- * explicit sort)
- *
- * Group pathkeys could be reordered to benefit from the ordering. The
- * ordering may not be "complete" and may require incremental sort, but that's
- * fine. So we simply count prefix pathkeys with a matching group key, and
- * stop once we find the first pathkey without a match.
- *
- * So e.g. with pathkeys (a,b,c) and group keys (a,b,e) this determines (a,b)
- * pathkeys are useful for grouping, and we might do incremental sort to get
- * path ordered by (a,b,e).
- *
- * This logic is necessary to retain paths with ordering not matching grouping
- * keys directly, without the reordering.
- *
- * Returns the length of pathkey prefix with matching group keys.
- */
-static int
-pathkeys_useful_for_grouping(PlannerInfo *root, List *pathkeys)
-{
- ListCell *key;
- int n = 0;
-
- /* no special ordering requested for grouping */
- if (root->group_pathkeys == NIL)
- return 0;
-
- /* unordered path */
- if (pathkeys == NIL)
- return 0;
-
- /* walk the pathkeys and search for matching group key */
- foreach(key, pathkeys)
- {
- PathKey *pathkey = (PathKey *) lfirst(key);
-
- /* no matching group key, we're done */
- if (!list_member_ptr(root->group_pathkeys, pathkey))
- break;
-
- n++;
- }
-
- return n;
-}
-
/*
* truncate_useless_pathkeys
* Shorten the given pathkey list to just the useful pathkeys.
nuseful = pathkeys_useful_for_merging(root, rel, pathkeys);
nuseful2 = pathkeys_useful_for_ordering(root, pathkeys);
- if (nuseful2 > nuseful)
- nuseful = nuseful2;
- nuseful2 = pathkeys_useful_for_grouping(root, pathkeys);
if (nuseful2 > nuseful)
nuseful = nuseful2;
{
if (rel->joininfo != NIL || rel->has_eclass_joins)
return true; /* might be able to use pathkeys for merging */
- if (root->group_pathkeys != NIL)
- return true; /* might be able to use pathkeys for grouping */
if (root->query_pathkeys != NIL)
return true; /* might be able to use them for ordering */
return false; /* definitely useless */
*
* In principle it might be interesting to consider other orderings of the
* GROUP BY elements, which could match the sort ordering of other
- * possible plans (eg an indexscan) and thereby reduce cost. However, we
- * don't yet have sufficient information to do that here, so that's left until
- * later in planning. See get_useful_group_keys_orderings().
+ * possible plans (eg an indexscan) and thereby reduce cost. We don't
+ * bother with that, though. Hashed grouping will frequently win anyway.
*
* Note: we need no comparable processing of the distinctClause because
* the parser already enforced that that matches ORDER BY.
if (can_sort)
{
- List *group_pathkeys;
- List *orderAggPathkeys;
- int numAggPathkeys;
-
- numAggPathkeys = list_length(root->group_pathkeys) -
- root->num_groupby_pathkeys;
-
- if (numAggPathkeys > 0)
- {
- group_pathkeys = list_copy_head(root->group_pathkeys,
- root->num_groupby_pathkeys);
- orderAggPathkeys = list_copy_tail(root->group_pathkeys,
- root->num_groupby_pathkeys);
- }
- else
- {
- group_pathkeys = root->group_pathkeys;
- orderAggPathkeys = NIL;
- }
-
/*
* Use any available suitably-sorted path as input, and also consider
* sorting the cheapest-total path.
*/
foreach(lc, input_rel->pathlist)
{
- ListCell *lc2;
Path *path = (Path *) lfirst(lc);
Path *path_original = path;
+ bool is_sorted;
+ int presorted_keys;
- List *pathkey_orderings = NIL;
-
- List *group_clauses = parse->groupClause;
-
- /* generate alternative group orderings that might be useful */
- pathkey_orderings = get_useful_group_keys_orderings(root,
- path->rows,
- path->pathkeys,
- group_pathkeys,
- group_clauses,
- orderAggPathkeys);
-
- Assert(pathkey_orderings != NIL);
+ is_sorted = pathkeys_count_contained_in(root->group_pathkeys,
+ path->pathkeys,
+ &presorted_keys);
- /* process all potentially interesting grouping reorderings */
- foreach(lc2, pathkey_orderings)
+ if (path == cheapest_path || is_sorted)
{
- bool is_sorted;
- int presorted_keys = 0;
- PathKeyInfo *info = (PathKeyInfo *) lfirst(lc2);
-
- /* restore the path (we replace it in the loop) */
- path = path_original;
-
- is_sorted = pathkeys_count_contained_in(info->pathkeys,
- path->pathkeys,
- &presorted_keys);
-
- if (path == cheapest_path || is_sorted)
- {
- /* Sort the cheapest-total path if it isn't already sorted */
- if (!is_sorted)
- path = (Path *) create_sort_path(root,
- grouped_rel,
- path,
- info->pathkeys,
- -1.0);
-
- /* Now decide what to stick atop it */
- if (parse->groupingSets)
- {
- consider_groupingsets_paths(root, grouped_rel,
- path, true, can_hash,
- gd, agg_costs, dNumGroups);
- }
- else if (parse->hasAggs)
- {
- /*
- * We have aggregation, possibly with plain GROUP BY.
- * Make an AggPath.
- */
- add_path(grouped_rel, (Path *)
- create_agg_path(root,
- grouped_rel,
- path,
- grouped_rel->reltarget,
- info->clauses ? AGG_SORTED : AGG_PLAIN,
- AGGSPLIT_SIMPLE,
- info->clauses,
- havingQual,
- agg_costs,
- dNumGroups));
- }
- else if (group_clauses)
- {
- /*
- * We have GROUP BY without aggregation or grouping
- * sets. Make a GroupPath.
- */
- add_path(grouped_rel, (Path *)
- create_group_path(root,
- grouped_rel,
- path,
- info->clauses,
- havingQual,
- dNumGroups));
- }
- else
- {
- /* Other cases should have been handled above */
- Assert(false);
- }
- }
-
- /*
- * Now we may consider incremental sort on this path, but only
- * when the path is not already sorted and when incremental
- * sort is enabled.
- */
- if (is_sorted || !enable_incremental_sort)
- continue;
-
- /* Restore the input path (we might have added Sort on top). */
- path = path_original;
-
- /* no shared prefix, no point in building incremental sort */
- if (presorted_keys == 0)
- continue;
-
- /*
- * We should have already excluded pathkeys of length 1
- * because then presorted_keys > 0 would imply is_sorted was
- * true.
- */
- Assert(list_length(root->group_pathkeys) != 1);
-
- path = (Path *) create_incremental_sort_path(root,
- grouped_rel,
- path,
- info->pathkeys,
- presorted_keys,
- -1.0);
+ /* Sort the cheapest-total path if it isn't already sorted */
+ if (!is_sorted)
+ path = (Path *) create_sort_path(root,
+ grouped_rel,
+ path,
+ root->group_pathkeys,
+ -1.0);
/* Now decide what to stick atop it */
if (parse->groupingSets)
grouped_rel,
path,
grouped_rel->reltarget,
- info->clauses ? AGG_SORTED : AGG_PLAIN,
+ parse->groupClause ? AGG_SORTED : AGG_PLAIN,
AGGSPLIT_SIMPLE,
- info->clauses,
+ parse->groupClause,
havingQual,
agg_costs,
dNumGroups));
create_group_path(root,
grouped_rel,
path,
- info->clauses,
+ parse->groupClause,
havingQual,
dNumGroups));
}
Assert(false);
}
}
+
+ /*
+ * Now we may consider incremental sort on this path, but only
+ * when the path is not already sorted and when incremental sort
+ * is enabled.
+ */
+ if (is_sorted || !enable_incremental_sort)
+ continue;
+
+ /* Restore the input path (we might have added Sort on top). */
+ path = path_original;
+
+ /* no shared prefix, no point in building incremental sort */
+ if (presorted_keys == 0)
+ continue;
+
+ /*
+ * We should have already excluded pathkeys of length 1 because
+ * then presorted_keys > 0 would imply is_sorted was true.
+ */
+ Assert(list_length(root->group_pathkeys) != 1);
+
+ path = (Path *) create_incremental_sort_path(root,
+ grouped_rel,
+ path,
+ root->group_pathkeys,
+ presorted_keys,
+ -1.0);
+
+ /* Now decide what to stick atop it */
+ if (parse->groupingSets)
+ {
+ consider_groupingsets_paths(root, grouped_rel,
+ path, true, can_hash,
+ gd, agg_costs, dNumGroups);
+ }
+ else if (parse->hasAggs)
+ {
+ /*
+ * We have aggregation, possibly with plain GROUP BY. Make an
+ * AggPath.
+ */
+ add_path(grouped_rel, (Path *)
+ create_agg_path(root,
+ grouped_rel,
+ path,
+ grouped_rel->reltarget,
+ parse->groupClause ? AGG_SORTED : AGG_PLAIN,
+ AGGSPLIT_SIMPLE,
+ parse->groupClause,
+ havingQual,
+ agg_costs,
+ dNumGroups));
+ }
+ else if (parse->groupClause)
+ {
+ /*
+ * We have GROUP BY without aggregation or grouping sets. Make
+ * a GroupPath.
+ */
+ add_path(grouped_rel, (Path *)
+ create_group_path(root,
+ grouped_rel,
+ path,
+ parse->groupClause,
+ havingQual,
+ dNumGroups));
+ }
+ else
+ {
+ /* Other cases should have been handled above */
+ Assert(false);
+ }
}
/*
{
foreach(lc, partially_grouped_rel->pathlist)
{
- ListCell *lc2;
Path *path = (Path *) lfirst(lc);
Path *path_original = path;
- List *pathkey_orderings = NIL;
- List *group_clauses = parse->groupClause;
-
- /* generate alternative group orderings that might be useful */
- pathkey_orderings = get_useful_group_keys_orderings(root,
- path->rows,
- path->pathkeys,
- group_pathkeys,
- group_clauses,
- orderAggPathkeys);
+ bool is_sorted;
+ int presorted_keys;
- Assert(pathkey_orderings != NIL);
+ is_sorted = pathkeys_count_contained_in(root->group_pathkeys,
+ path->pathkeys,
+ &presorted_keys);
- /* process all potentially interesting grouping reorderings */
- foreach(lc2, pathkey_orderings)
+ /*
+ * Insert a Sort node, if required. But there's no point in
+ * sorting anything but the cheapest path.
+ */
+ if (!is_sorted)
{
- bool is_sorted;
- int presorted_keys = 0;
- PathKeyInfo *info = (PathKeyInfo *) lfirst(lc2);
-
- /* restore the path (we replace it in the loop) */
- path = path_original;
-
- is_sorted = pathkeys_count_contained_in(info->pathkeys,
- path->pathkeys,
- &presorted_keys);
-
- /*
- * Insert a Sort node, if required. But there's no point
- * in sorting anything but the cheapest path.
- */
- if (!is_sorted)
- {
- if (path != partially_grouped_rel->cheapest_total_path)
- continue;
- path = (Path *) create_sort_path(root,
- grouped_rel,
- path,
- info->pathkeys,
- -1.0);
- }
+ if (path != partially_grouped_rel->cheapest_total_path)
+ continue;
+ path = (Path *) create_sort_path(root,
+ grouped_rel,
+ path,
+ root->group_pathkeys,
+ -1.0);
+ }
- if (parse->hasAggs)
- add_path(grouped_rel, (Path *)
- create_agg_path(root,
- grouped_rel,
- path,
- grouped_rel->reltarget,
- info->clauses ? AGG_SORTED : AGG_PLAIN,
- AGGSPLIT_FINAL_DESERIAL,
- info->clauses,
- havingQual,
- agg_final_costs,
- dNumGroups));
- else
- add_path(grouped_rel, (Path *)
- create_group_path(root,
- grouped_rel,
- path,
- info->clauses,
- havingQual,
- dNumGroups));
+ if (parse->hasAggs)
+ add_path(grouped_rel, (Path *)
+ create_agg_path(root,
+ grouped_rel,
+ path,
+ grouped_rel->reltarget,
+ parse->groupClause ? AGG_SORTED : AGG_PLAIN,
+ AGGSPLIT_FINAL_DESERIAL,
+ parse->groupClause,
+ havingQual,
+ agg_final_costs,
+ dNumGroups));
+ else
+ add_path(grouped_rel, (Path *)
+ create_group_path(root,
+ grouped_rel,
+ path,
+ parse->groupClause,
+ havingQual,
+ dNumGroups));
- /*
- * Now we may consider incremental sort on this path, but
- * only when the path is not already sorted and when
- * incremental sort is enabled.
- */
- if (is_sorted || !enable_incremental_sort)
- continue;
+ /*
+ * Now we may consider incremental sort on this path, but only
+ * when the path is not already sorted and when incremental
+ * sort is enabled.
+ */
+ if (is_sorted || !enable_incremental_sort)
+ continue;
- /*
- * Restore the input path (we might have added Sort on
- * top).
- */
- path = path_original;
+ /* Restore the input path (we might have added Sort on top). */
+ path = path_original;
- /*
- * no shared prefix, not point in building incremental
- * sort
- */
- if (presorted_keys == 0)
- continue;
+ /* no shared prefix, not point in building incremental sort */
+ if (presorted_keys == 0)
+ continue;
- /*
- * We should have already excluded pathkeys of length 1
- * because then presorted_keys > 0 would imply is_sorted
- * was true.
- */
- Assert(list_length(root->group_pathkeys) != 1);
+ /*
+ * We should have already excluded pathkeys of length 1
+ * because then presorted_keys > 0 would imply is_sorted was
+ * true.
+ */
+ Assert(list_length(root->group_pathkeys) != 1);
- path = (Path *) create_incremental_sort_path(root,
- grouped_rel,
- path,
- info->pathkeys,
- presorted_keys,
- -1.0);
+ path = (Path *) create_incremental_sort_path(root,
+ grouped_rel,
+ path,
+ root->group_pathkeys,
+ presorted_keys,
+ -1.0);
- if (parse->hasAggs)
- add_path(grouped_rel, (Path *)
- create_agg_path(root,
- grouped_rel,
- path,
- grouped_rel->reltarget,
- info->clauses ? AGG_SORTED : AGG_PLAIN,
- AGGSPLIT_FINAL_DESERIAL,
- info->clauses,
- havingQual,
- agg_final_costs,
- dNumGroups));
- else
- add_path(grouped_rel, (Path *)
- create_group_path(root,
- grouped_rel,
- path,
- info->clauses,
- havingQual,
- dNumGroups));
- }
+ if (parse->hasAggs)
+ add_path(grouped_rel, (Path *)
+ create_agg_path(root,
+ grouped_rel,
+ path,
+ grouped_rel->reltarget,
+ parse->groupClause ? AGG_SORTED : AGG_PLAIN,
+ AGGSPLIT_FINAL_DESERIAL,
+ parse->groupClause,
+ havingQual,
+ agg_final_costs,
+ dNumGroups));
+ else
+ add_path(grouped_rel, (Path *)
+ create_group_path(root,
+ grouped_rel,
+ path,
+ parse->groupClause,
+ havingQual,
+ dNumGroups));
}
}
}
if (can_sort && cheapest_total_path != NULL)
{
- List *group_pathkeys;
- List *orderAggPathkeys;
- int numAggPathkeys;
-
- numAggPathkeys = list_length(root->group_pathkeys) -
- root->num_groupby_pathkeys;
-
- if (numAggPathkeys > 0)
- {
- group_pathkeys = list_copy_head(root->group_pathkeys,
- root->num_groupby_pathkeys);
- orderAggPathkeys = list_copy_tail(root->group_pathkeys,
- root->num_groupby_pathkeys);
- }
- else
- {
- group_pathkeys = root->group_pathkeys;
- orderAggPathkeys = NIL;
- }
-
/* This should have been checked previously */
Assert(parse->hasAggs || parse->groupClause);
*/
foreach(lc, input_rel->pathlist)
{
- ListCell *lc2;
Path *path = (Path *) lfirst(lc);
- Path *path_save = path;
- List *pathkey_orderings = NIL;
- List *group_clauses = parse->groupClause;
-
- /* generate alternative group orderings that might be useful */
- pathkey_orderings = get_useful_group_keys_orderings(root,
- path->rows,
- path->pathkeys,
- group_pathkeys,
- group_clauses,
- orderAggPathkeys);
-
- Assert(pathkey_orderings != NIL);
-
- /* process all potentially interesting grouping reorderings */
- foreach(lc2, pathkey_orderings)
- {
- bool is_sorted;
- int presorted_keys = 0;
- PathKeyInfo *info = (PathKeyInfo *) lfirst(lc2);
-
- /* restore the path (we replace it in the loop) */
- path = path_save;
-
- is_sorted = pathkeys_count_contained_in(info->pathkeys,
- path->pathkeys,
- &presorted_keys);
+ bool is_sorted;
- if (path == cheapest_total_path || is_sorted)
- {
- /* Sort the cheapest partial path, if it isn't already */
- if (!is_sorted)
- {
- path = (Path *) create_sort_path(root,
- partially_grouped_rel,
- path,
- info->pathkeys,
- -1.0);
- }
+ is_sorted = pathkeys_contained_in(root->group_pathkeys,
+ path->pathkeys);
+ if (path == cheapest_total_path || is_sorted)
+ {
+ /* Sort the cheapest partial path, if it isn't already */
+ if (!is_sorted)
+ path = (Path *) create_sort_path(root,
+ partially_grouped_rel,
+ path,
+ root->group_pathkeys,
+ -1.0);
- if (parse->hasAggs)
- add_path(partially_grouped_rel, (Path *)
- create_agg_path(root,
- partially_grouped_rel,
- path,
- partially_grouped_rel->reltarget,
- info->clauses ? AGG_SORTED : AGG_PLAIN,
- AGGSPLIT_INITIAL_SERIAL,
- info->clauses,
- NIL,
- agg_partial_costs,
- dNumPartialGroups));
- else
- add_path(partially_grouped_rel, (Path *)
- create_group_path(root,
- partially_grouped_rel,
- path,
- info->clauses,
- NIL,
- dNumPartialGroups));
- }
+ if (parse->hasAggs)
+ add_path(partially_grouped_rel, (Path *)
+ create_agg_path(root,
+ partially_grouped_rel,
+ path,
+ partially_grouped_rel->reltarget,
+ parse->groupClause ? AGG_SORTED : AGG_PLAIN,
+ AGGSPLIT_INITIAL_SERIAL,
+ parse->groupClause,
+ NIL,
+ agg_partial_costs,
+ dNumPartialGroups));
+ else
+ add_path(partially_grouped_rel, (Path *)
+ create_group_path(root,
+ partially_grouped_rel,
+ path,
+ parse->groupClause,
+ NIL,
+ dNumPartialGroups));
}
}
* We can also skip the entire loop when we only have a single-item
* group_pathkeys because then we can't possibly have a presorted
* prefix of the list without having the list be fully sorted.
- *
- * XXX Shouldn't this also consider the group-key-reordering?
*/
if (enable_incremental_sort && list_length(root->group_pathkeys) > 1)
{
if (can_sort && cheapest_partial_path != NULL)
{
- List *group_pathkeys;
- List *orderAggPathkeys;
- int numAggPathkeys;
-
- numAggPathkeys = list_length(root->group_pathkeys) -
- root->num_groupby_pathkeys;
-
- if (numAggPathkeys > 0)
- {
- group_pathkeys = list_copy_head(root->group_pathkeys,
- root->num_groupby_pathkeys);
- orderAggPathkeys = list_copy_tail(root->group_pathkeys,
- root->num_groupby_pathkeys);
- }
- else
- {
- group_pathkeys = root->group_pathkeys;
- orderAggPathkeys = NIL;
- }
-
/* Similar to above logic, but for partial paths. */
foreach(lc, input_rel->partial_pathlist)
{
- ListCell *lc2;
Path *path = (Path *) lfirst(lc);
Path *path_original = path;
- List *pathkey_orderings = NIL;
- List *group_clauses = parse->groupClause;
-
- /* generate alternative group orderings that might be useful */
- pathkey_orderings = get_useful_group_keys_orderings(root,
- path->rows,
- path->pathkeys,
- group_pathkeys,
- group_clauses,
- orderAggPathkeys);
+ bool is_sorted;
+ int presorted_keys;
- Assert(pathkey_orderings != NIL);
+ is_sorted = pathkeys_count_contained_in(root->group_pathkeys,
+ path->pathkeys,
+ &presorted_keys);
- /* process all potentially interesting grouping reorderings */
- foreach(lc2, pathkey_orderings)
+ if (path == cheapest_partial_path || is_sorted)
{
- bool is_sorted;
- int presorted_keys = 0;
- PathKeyInfo *info = (PathKeyInfo *) lfirst(lc2);
-
- /* restore the path (we replace it in the loop) */
- path = path_original;
-
- is_sorted = pathkeys_count_contained_in(info->pathkeys,
- path->pathkeys,
- &presorted_keys);
-
- if (path == cheapest_partial_path || is_sorted)
- {
-
- /* Sort the cheapest partial path, if it isn't already */
- if (!is_sorted)
- {
- path = (Path *) create_sort_path(root,
- partially_grouped_rel,
- path,
- info->pathkeys,
- -1.0);
- }
-
- if (parse->hasAggs)
- add_partial_path(partially_grouped_rel, (Path *)
- create_agg_path(root,
- partially_grouped_rel,
- path,
- partially_grouped_rel->reltarget,
- info->clauses ? AGG_SORTED : AGG_PLAIN,
- AGGSPLIT_INITIAL_SERIAL,
- info->clauses,
- NIL,
- agg_partial_costs,
- dNumPartialPartialGroups));
- else
- add_partial_path(partially_grouped_rel, (Path *)
- create_group_path(root,
- partially_grouped_rel,
- path,
- info->clauses,
- NIL,
- dNumPartialPartialGroups));
- }
-
- /*
- * Now we may consider incremental sort on this path, but only
- * when the path is not already sorted and when incremental
- * sort is enabled.
- */
- if (is_sorted || !enable_incremental_sort)
- continue;
-
- /* Restore the input path (we might have added Sort on top). */
- path = path_original;
-
- /* no shared prefix, not point in building incremental sort */
- if (presorted_keys == 0)
- continue;
-
- /*
- * We should have already excluded pathkeys of length 1
- * because then presorted_keys > 0 would imply is_sorted was
- * true.
- */
- Assert(list_length(root->group_pathkeys) != 1);
-
- path = (Path *) create_incremental_sort_path(root,
- partially_grouped_rel,
- path,
- info->pathkeys,
- presorted_keys,
- -1.0);
+ /* Sort the cheapest partial path, if it isn't already */
+ if (!is_sorted)
+ path = (Path *) create_sort_path(root,
+ partially_grouped_rel,
+ path,
+ root->group_pathkeys,
+ -1.0);
if (parse->hasAggs)
add_partial_path(partially_grouped_rel, (Path *)
partially_grouped_rel,
path,
partially_grouped_rel->reltarget,
- info->clauses ? AGG_SORTED : AGG_PLAIN,
+ parse->groupClause ? AGG_SORTED : AGG_PLAIN,
AGGSPLIT_INITIAL_SERIAL,
- info->clauses,
+ parse->groupClause,
NIL,
agg_partial_costs,
dNumPartialPartialGroups));
create_group_path(root,
partially_grouped_rel,
path,
- info->clauses,
+ parse->groupClause,
NIL,
dNumPartialPartialGroups));
}
+
+ /*
+ * Now we may consider incremental sort on this path, but only
+ * when the path is not already sorted and when incremental sort
+ * is enabled.
+ */
+ if (is_sorted || !enable_incremental_sort)
+ continue;
+
+ /* Restore the input path (we might have added Sort on top). */
+ path = path_original;
+
+ /* no shared prefix, not point in building incremental sort */
+ if (presorted_keys == 0)
+ continue;
+
+ /*
+ * We should have already excluded pathkeys of length 1 because
+ * then presorted_keys > 0 would imply is_sorted was true.
+ */
+ Assert(list_length(root->group_pathkeys) != 1);
+
+ path = (Path *) create_incremental_sort_path(root,
+ partially_grouped_rel,
+ path,
+ root->group_pathkeys,
+ presorted_keys,
+ -1.0);
+
+ if (parse->hasAggs)
+ add_partial_path(partially_grouped_rel, (Path *)
+ create_agg_path(root,
+ partially_grouped_rel,
+ path,
+ partially_grouped_rel->reltarget,
+ parse->groupClause ? AGG_SORTED : AGG_PLAIN,
+ AGGSPLIT_INITIAL_SERIAL,
+ parse->groupClause,
+ NIL,
+ agg_partial_costs,
+ dNumPartialPartialGroups));
+ else
+ add_partial_path(partially_grouped_rel, (Path *)
+ create_group_path(root,
+ partially_grouped_rel,
+ path,
+ parse->groupClause,
+ NIL,
+ dNumPartialPartialGroups));
}
}
pathnode->path.total_cost = child->total_cost;
}
else
- cost_append(pathnode, root);
+ cost_append(pathnode);
/* Must do this last, else cost_append complains */
pathnode->path.pathkeys = child->pathkeys;
}
else
- cost_append(pathnode, root);
+ cost_append(pathnode);
/* If the caller provided a row estimate, override the computed value. */
if (rows >= 0)
estimate_num_groups(PlannerInfo *root, List *groupExprs, double input_rows,
List **pgset, EstimationInfo *estinfo)
{
- return estimate_num_groups_incremental(root, groupExprs,
- input_rows, pgset, estinfo,
- NULL, 0);
-}
-
-/*
- * estimate_num_groups_incremental
- * An estimate_num_groups variant, optimized for cases that are adding the
- * expressions incrementally (e.g. one by one).
- */
-double
-estimate_num_groups_incremental(PlannerInfo *root, List *groupExprs,
- double input_rows,
- List **pgset, EstimationInfo *estinfo,
- List **cache_varinfos, int prevNExprs)
-{
- List *varinfos = (cache_varinfos) ? *cache_varinfos : NIL;
+ List *varinfos = NIL;
double srf_multiplier = 1.0;
double numdistinct;
ListCell *l;
- int i,
- j;
+ int i;
/* Zero the estinfo output parameter, if non-NULL */
if (estinfo != NULL)
*/
numdistinct = 1.0;
- i = j = 0;
+ i = 0;
foreach(l, groupExprs)
{
Node *groupexpr = (Node *) lfirst(l);
List *varshere;
ListCell *l2;
- /* was done on previous call */
- if (cache_varinfos && j++ < prevNExprs)
- {
- if (pgset)
- i++; /* to keep in sync with lines below */
- continue;
- }
-
/* is expression in this grouping set? */
if (pgset && !list_member_int(*pgset, i++))
continue;
if (varshere == NIL)
{
if (contain_volatile_functions(groupexpr))
- {
- if (cache_varinfos)
- *cache_varinfos = varinfos;
return input_rows;
- }
continue;
}
}
}
- if (cache_varinfos)
- *cache_varinfos = varinfos;
-
/*
* If now no Vars, we must have an all-constant or all-boolean GROUP BY
* list.
true,
NULL, NULL, NULL
},
- {
- {"enable_group_by_reordering", PGC_USERSET, QUERY_TUNING_METHOD,
- gettext_noop("Enables reordering of GROUP BY keys."),
- NULL,
- GUC_EXPLAIN
- },
- &enable_group_by_reordering,
- true,
- NULL, NULL, NULL
- },
{
{"geqo", PGC_USERSET, QUERY_TUNING_GEQO,
gettext_noop("Enables genetic query optimization."),
#enable_seqscan = on
#enable_sort = on
#enable_tidscan = on
-#enable_group_by_reordering = on
# - Planner Cost Constants -
bool pk_nulls_first; /* do NULLs come before normal values? */
} PathKey;
-/*
- * Combines information about pathkeys and the associated clauses.
- */
-typedef struct PathKeyInfo
-{
- pg_node_attr(no_read)
-
- NodeTag type;
- List *pathkeys;
- List *clauses;
-} PathKeyInfo;
-
/*
* VolatileFunctionStatus -- allows nodes to cache their
* contain_volatile_functions properties. VOLATILITY_UNKNOWN means not yet
Cost input_startup_cost, Cost input_total_cost,
double input_tuples, int width, Cost comparison_cost, int sort_mem,
double limit_tuples);
-extern Cost cost_sort_estimate(PlannerInfo *root, List *pathkeys,
- int nPresortedKeys, double tuples);
-extern void cost_append(AppendPath *apath, PlannerInfo *root);
+extern void cost_append(AppendPath *apath);
extern void cost_merge_append(Path *path, PlannerInfo *root,
List *pathkeys, int n_streams,
Cost input_startup_cost, Cost input_total_cost,
extern PGDLLIMPORT int geqo_threshold;
extern PGDLLIMPORT int min_parallel_table_scan_size;
extern PGDLLIMPORT int min_parallel_index_scan_size;
-extern PGDLLIMPORT bool enable_group_by_reordering;
/* Hook for plugins to get control in set_rel_pathlist() */
typedef void (*set_rel_pathlist_hook_type) (PlannerInfo *root,
extern PathKeysComparison compare_pathkeys(List *keys1, List *keys2);
extern bool pathkeys_contained_in(List *keys1, List *keys2);
extern bool pathkeys_count_contained_in(List *keys1, List *keys2, int *n_common);
-extern int group_keys_reorder_by_pathkeys(List *pathkeys,
- List **group_pathkeys,
- List **group_clauses);
-extern List *get_useful_group_keys_orderings(PlannerInfo *root, double nrows,
- List *path_pathkeys,
- List *group_pathkeys, List *group_clauses,
- List *aggregate_pathkeys);
extern Path *get_cheapest_path_for_pathkeys(List *paths, List *pathkeys,
Relids required_outer,
CostSelector cost_criterion,
double input_rows, List **pgset,
EstimationInfo *estinfo);
-extern double estimate_num_groups_incremental(PlannerInfo *root, List *groupExprs,
- double input_rows, List **pgset,
- EstimationInfo *estinfo,
- List **cache_varinfos, int prevNExprs);
-
extern void estimate_hash_bucket_stats(PlannerInfo *root,
Node *hashkey, double nbuckets,
Selectivity *mcv_freq,
select distinct min(f1), max(f1) from minmaxtest;
QUERY PLAN
---------------------------------------------------------------------------------------------
- HashAggregate
- Group Key: $0, $1
+ Unique
InitPlan 1 (returns $0)
-> Limit
-> Merge Append
-> Index Only Scan using minmaxtest2i on minmaxtest2 minmaxtest_8
Index Cond: (f1 IS NOT NULL)
-> Index Only Scan Backward using minmaxtest3i on minmaxtest3 minmaxtest_9
- -> Result
-(25 rows)
+ -> Sort
+ Sort Key: ($0), ($1)
+ -> Result
+(26 rows)
select distinct min(f1), max(f1) from minmaxtest;
min | max
(1 row)
ROLLBACK;
--- GROUP BY optimization by reorder columns
-SELECT
- i AS id,
- i/2 AS p,
- format('%60s', i%2) AS v,
- i/4 AS c,
- i/8 AS d,
- (random() * (10000/8))::int as e --the same as d but no correlation with p
- INTO btg
-FROM
- generate_series(1, 10000) i;
-VACUUM btg;
-ANALYZE btg;
--- GROUP BY optimization by reorder columns by frequency
-SET enable_hashagg=off;
-SET max_parallel_workers= 0;
-SET max_parallel_workers_per_gather = 0;
-EXPLAIN (COSTS off)
-SELECT count(*) FROM btg GROUP BY p, v;
- QUERY PLAN
------------------------------
- GroupAggregate
- Group Key: p, v
- -> Sort
- Sort Key: p, v
- -> Seq Scan on btg
-(5 rows)
-
-EXPLAIN (COSTS off)
-SELECT count(*) FROM btg GROUP BY v, p;
- QUERY PLAN
------------------------------
- GroupAggregate
- Group Key: p, v
- -> Sort
- Sort Key: p, v
- -> Seq Scan on btg
-(5 rows)
-
-EXPLAIN (COSTS off)
-SELECT count(*) FROM btg GROUP BY v, p, c;
- QUERY PLAN
------------------------------
- GroupAggregate
- Group Key: p, c, v
- -> Sort
- Sort Key: p, c, v
- -> Seq Scan on btg
-(5 rows)
-
-EXPLAIN (COSTS off)
-SELECT count(*) FROM btg GROUP BY v, p, c ORDER BY v, p, c;
- QUERY PLAN
------------------------------
- GroupAggregate
- Group Key: v, p, c
- -> Sort
- Sort Key: v, p, c
- -> Seq Scan on btg
-(5 rows)
-
-EXPLAIN (COSTS off)
-SELECT count(*) FROM btg GROUP BY v, p, d, c;
- QUERY PLAN
-------------------------------
- GroupAggregate
- Group Key: p, d, c, v
- -> Sort
- Sort Key: p, d, c, v
- -> Seq Scan on btg
-(5 rows)
-
-EXPLAIN (COSTS off)
-SELECT count(*) FROM btg GROUP BY v, p, d, c ORDER BY v, p, d ,c;
- QUERY PLAN
-------------------------------
- GroupAggregate
- Group Key: v, p, d, c
- -> Sort
- Sort Key: v, p, d, c
- -> Seq Scan on btg
-(5 rows)
-
-EXPLAIN (COSTS off)
-SELECT count(*) FROM btg GROUP BY v, p, d, c ORDER BY p, v, d ,c;
- QUERY PLAN
-------------------------------
- GroupAggregate
- Group Key: p, v, d, c
- -> Sort
- Sort Key: p, v, d, c
- -> Seq Scan on btg
-(5 rows)
-
-EXPLAIN (COSTS off)
-SELECT count(*) FROM btg GROUP BY p, d, e;
- QUERY PLAN
------------------------------
- GroupAggregate
- Group Key: p, d, e
- -> Sort
- Sort Key: p, d, e
- -> Seq Scan on btg
-(5 rows)
-
-EXPLAIN (COSTS off)
-SELECT count(*) FROM btg GROUP BY p, e, d;
- QUERY PLAN
------------------------------
- GroupAggregate
- Group Key: p, e, d
- -> Sort
- Sort Key: p, e, d
- -> Seq Scan on btg
-(5 rows)
-
-CREATE STATISTICS btg_dep ON d, e, p FROM btg;
-ANALYZE btg;
-EXPLAIN (COSTS off)
-SELECT count(*) FROM btg GROUP BY p, d, e;
- QUERY PLAN
------------------------------
- GroupAggregate
- Group Key: p, d, e
- -> Sort
- Sort Key: p, d, e
- -> Seq Scan on btg
-(5 rows)
-
-EXPLAIN (COSTS off)
-SELECT count(*) FROM btg GROUP BY p, e, d;
- QUERY PLAN
------------------------------
- GroupAggregate
- Group Key: p, e, d
- -> Sort
- Sort Key: p, e, d
- -> Seq Scan on btg
-(5 rows)
-
--- GROUP BY optimization by reorder columns by index scan
-CREATE INDEX ON btg(p, v);
-SET enable_seqscan=off;
-SET enable_bitmapscan=off;
-VACUUM btg;
-EXPLAIN (COSTS off)
-SELECT count(*) FROM btg GROUP BY p, v;
- QUERY PLAN
-------------------------------------------------
- GroupAggregate
- Group Key: p, v
- -> Index Only Scan using btg_p_v_idx on btg
-(3 rows)
-
-EXPLAIN (COSTS off)
-SELECT count(*) FROM btg GROUP BY p, v ORDER BY p, v;
- QUERY PLAN
-------------------------------------------------
- GroupAggregate
- Group Key: p, v
- -> Index Only Scan using btg_p_v_idx on btg
-(3 rows)
-
-EXPLAIN (COSTS off)
-SELECT count(*) FROM btg GROUP BY v, p;
- QUERY PLAN
-------------------------------------------------
- GroupAggregate
- Group Key: p, v
- -> Index Only Scan using btg_p_v_idx on btg
-(3 rows)
-
-EXPLAIN (COSTS off)
-SELECT count(*) FROM btg GROUP BY v, p ORDER BY p, v;
- QUERY PLAN
-------------------------------------------------
- GroupAggregate
- Group Key: p, v
- -> Index Only Scan using btg_p_v_idx on btg
-(3 rows)
-
-EXPLAIN (COSTS off)
-SELECT count(*) FROM btg GROUP BY v, p, c;
- QUERY PLAN
--------------------------------------------------
- GroupAggregate
- Group Key: p, c, v
- -> Incremental Sort
- Sort Key: p, c, v
- Presorted Key: p
- -> Index Scan using btg_p_v_idx on btg
-(6 rows)
-
-EXPLAIN (COSTS off)
-SELECT count(*) FROM btg GROUP BY v, p, c ORDER BY p, v;
- QUERY PLAN
--------------------------------------------------
- GroupAggregate
- Group Key: p, v, c
- -> Incremental Sort
- Sort Key: p, v, c
- Presorted Key: p, v
- -> Index Scan using btg_p_v_idx on btg
-(6 rows)
-
-EXPLAIN (COSTS off)
-SELECT count(*) FROM btg GROUP BY v, c, p, d;
- QUERY PLAN
--------------------------------------------------
- GroupAggregate
- Group Key: p, c, d, v
- -> Incremental Sort
- Sort Key: p, c, d, v
- Presorted Key: p
- -> Index Scan using btg_p_v_idx on btg
-(6 rows)
-
-EXPLAIN (COSTS off)
-SELECT count(*) FROM btg GROUP BY v, c, p, d ORDER BY p, v;
- QUERY PLAN
--------------------------------------------------
- GroupAggregate
- Group Key: p, v, c, d
- -> Incremental Sort
- Sort Key: p, v, c, d
- Presorted Key: p, v
- -> Index Scan using btg_p_v_idx on btg
-(6 rows)
-
-DROP TABLE btg;
-RESET enable_hashagg;
-RESET max_parallel_workers;
-RESET max_parallel_workers_per_gather;
-RESET enable_seqscan;
-RESET enable_bitmapscan;
-- Secondly test the case of a parallel aggregate combiner function
-- returning NULL. For that use normal transition function, but a
-- combiner function returning NULL.
set parallel_tuple_cost = 0;
set max_parallel_workers_per_gather = 2;
create table t (a int, b int, c int);
-insert into t select mod(i,10),mod(i,10),i from generate_series(1,60000) s(i);
+insert into t select mod(i,10),mod(i,10),i from generate_series(1,10000) s(i);
create index on t (a);
analyze t;
set enable_incremental_sort = off;
------+----+----
bb | 12 | 13
cc | 22 | 23
- ee | 42 |
dd | | 33
+ ee | 42 |
(4 rows)
-- Cases with non-nullable expressions in subquery results;
------+------+------+------+------
bb | 12 | 2 | 13 | 3
cc | 22 | 2 | 23 | 3
- ee | 42 | 2 | |
dd | | | 33 | 3
+ ee | 42 | 2 | |
(4 rows)
SELECT * FROM
explain (costs off)
select d.* from d left join (select distinct * from b) s
on d.a = s.id;
- QUERY PLAN
----------------------------------------------
- Merge Left Join
- Merge Cond: (d.a = s.id)
+ QUERY PLAN
+--------------------------------------
+ Merge Right Join
+ Merge Cond: (b.id = d.a)
+ -> Unique
+ -> Sort
+ Sort Key: b.id, b.c_id
+ -> Seq Scan on b
-> Sort
Sort Key: d.a
-> Seq Scan on d
- -> Sort
- Sort Key: s.id
- -> Subquery Scan on s
- -> HashAggregate
- Group Key: b.id, b.c_id
- -> Seq Scan on b
-(11 rows)
+(9 rows)
-- check join removal works when uniqueness of the join condition is enforced
-- by a UNION
explain (verbose, costs off)
select * from j1
inner join (select distinct id from j3) j3 on j1.id = j3.id;
- QUERY PLAN
------------------------------------
+ QUERY PLAN
+-----------------------------------------
Nested Loop
Output: j1.id, j3.id
Inner Unique: true
Join Filter: (j1.id = j3.id)
- -> HashAggregate
+ -> Unique
Output: j3.id
- Group Key: j3.id
- -> Seq Scan on public.j3
+ -> Sort
Output: j3.id
+ Sort Key: j3.id
+ -> Seq Scan on public.j3
+ Output: j3.id
-> Seq Scan on public.j1
Output: j1.id
-(11 rows)
+(13 rows)
-- ensure group by clause allows the inner to become unique
explain (verbose, costs off)
select * from j1
inner join (select id from j3 group by id) j3 on j1.id = j3.id;
- QUERY PLAN
------------------------------------
+ QUERY PLAN
+-----------------------------------------
Nested Loop
Output: j1.id, j3.id
Inner Unique: true
Join Filter: (j1.id = j3.id)
- -> HashAggregate
+ -> Group
Output: j3.id
Group Key: j3.id
- -> Seq Scan on public.j3
+ -> Sort
Output: j3.id
+ Sort Key: j3.id
+ -> Seq Scan on public.j3
+ Output: j3.id
-> Seq Scan on public.j1
Output: j1.id
-(11 rows)
+(14 rows)
drop table j1;
drop table j2;
explain_merge
--------------------------------------------------------------------
Merge on ex_mtarget t (actual rows=0 loops=1)
- -> Hash Join (actual rows=0 loops=1)
- Hash Cond: (s.a = t.a)
- -> Seq Scan on ex_msource s (actual rows=1 loops=1)
- -> Hash (actual rows=0 loops=1)
- Buckets: xxx Batches: xxx Memory Usage: xxx
+ -> Merge Join (actual rows=0 loops=1)
+ Merge Cond: (t.a = s.a)
+ -> Sort (actual rows=0 loops=1)
+ Sort Key: t.a
+ Sort Method: quicksort Memory: xxx
-> Seq Scan on ex_mtarget t (actual rows=0 loops=1)
Filter: (a < '-1000'::integer)
Rows Removed by Filter: 54
-(9 rows)
+ -> Sort (never executed)
+ Sort Key: s.a
+ -> Seq Scan on ex_msource s (never executed)
+(12 rows)
DROP TABLE ex_msource, ex_mtarget;
DROP FUNCTION explain_merge(text);
-- is not partial agg safe.
EXPLAIN (COSTS OFF)
SELECT a, sum(b), array_agg(distinct c), count(*) FROM pagg_tab_ml GROUP BY a HAVING avg(b) < 3 ORDER BY 1, 2, 3;
- QUERY PLAN
---------------------------------------------------------------------------------------------
- Gather Merge
- Workers Planned: 2
- -> Sort
- Sort Key: pagg_tab_ml.a, (sum(pagg_tab_ml.b)), (array_agg(DISTINCT pagg_tab_ml.c))
+ QUERY PLAN
+--------------------------------------------------------------------------------------
+ Sort
+ Sort Key: pagg_tab_ml.a, (sum(pagg_tab_ml.b)), (array_agg(DISTINCT pagg_tab_ml.c))
+ -> Gather
+ Workers Planned: 2
-> Parallel Append
-> GroupAggregate
Group Key: pagg_tab_ml.a
-- When GROUP BY clause does not match; partial aggregation is performed for each partition.
EXPLAIN (COSTS OFF)
SELECT y, sum(x), avg(x), count(*) FROM pagg_tab_para GROUP BY y HAVING avg(x) < 12 ORDER BY 1, 2, 3;
- QUERY PLAN
--------------------------------------------------------------------------------------
+ QUERY PLAN
+-------------------------------------------------------------------------------------------
Sort
Sort Key: pagg_tab_para.y, (sum(pagg_tab_para.x)), (avg(pagg_tab_para.x))
- -> Finalize HashAggregate
+ -> Finalize GroupAggregate
Group Key: pagg_tab_para.y
Filter: (avg(pagg_tab_para.x) < '12'::numeric)
- -> Gather
+ -> Gather Merge
Workers Planned: 2
- -> Parallel Append
- -> Partial HashAggregate
- Group Key: pagg_tab_para.y
- -> Parallel Seq Scan on pagg_tab_para_p1 pagg_tab_para
- -> Partial HashAggregate
- Group Key: pagg_tab_para_1.y
- -> Parallel Seq Scan on pagg_tab_para_p2 pagg_tab_para_1
- -> Partial HashAggregate
- Group Key: pagg_tab_para_2.y
- -> Parallel Seq Scan on pagg_tab_para_p3 pagg_tab_para_2
-(17 rows)
+ -> Sort
+ Sort Key: pagg_tab_para.y
+ -> Parallel Append
+ -> Partial HashAggregate
+ Group Key: pagg_tab_para.y
+ -> Parallel Seq Scan on pagg_tab_para_p1 pagg_tab_para
+ -> Partial HashAggregate
+ Group Key: pagg_tab_para_1.y
+ -> Parallel Seq Scan on pagg_tab_para_p2 pagg_tab_para_1
+ -> Partial HashAggregate
+ Group Key: pagg_tab_para_2.y
+ -> Parallel Seq Scan on pagg_tab_para_p3 pagg_tab_para_2
+(19 rows)
SELECT y, sum(x), avg(x), count(*) FROM pagg_tab_para GROUP BY y HAVING avg(x) < 12 ORDER BY 1, 2, 3;
y | sum | avg | count
SELECT a, b FROM prt1 FULL JOIN prt2 p2(b,a,c) USING(a,b)
WHERE a BETWEEN 490 AND 510
GROUP BY 1, 2 ORDER BY 1, 2;
- QUERY PLAN
------------------------------------------------------------------------------------------------------------
+ QUERY PLAN
+-----------------------------------------------------------------------------------------------------------------
Group
Group Key: (COALESCE(prt1.a, p2.a)), (COALESCE(prt1.b, p2.b))
- -> Sort
+ -> Merge Append
Sort Key: (COALESCE(prt1.a, p2.a)), (COALESCE(prt1.b, p2.b))
- -> Append
- -> Merge Full Join
- Merge Cond: ((prt1_1.a = p2_1.a) AND (prt1_1.b = p2_1.b))
- Filter: ((COALESCE(prt1_1.a, p2_1.a) >= 490) AND (COALESCE(prt1_1.a, p2_1.a) <= 510))
- -> Sort
- Sort Key: prt1_1.a, prt1_1.b
- -> Seq Scan on prt1_p1 prt1_1
- -> Sort
- Sort Key: p2_1.a, p2_1.b
- -> Seq Scan on prt2_p1 p2_1
- -> Merge Full Join
- Merge Cond: ((prt1_2.a = p2_2.a) AND (prt1_2.b = p2_2.b))
- Filter: ((COALESCE(prt1_2.a, p2_2.a) >= 490) AND (COALESCE(prt1_2.a, p2_2.a) <= 510))
- -> Sort
- Sort Key: prt1_2.a, prt1_2.b
- -> Seq Scan on prt1_p2 prt1_2
- -> Sort
- Sort Key: p2_2.a, p2_2.b
- -> Seq Scan on prt2_p2 p2_2
- -> Merge Full Join
- Merge Cond: ((prt1_3.b = p2_3.b) AND (prt1_3.a = p2_3.a))
- Filter: ((COALESCE(prt1_3.a, p2_3.a) >= 490) AND (COALESCE(prt1_3.a, p2_3.a) <= 510))
- -> Sort
- Sort Key: prt1_3.b, prt1_3.a
- -> Seq Scan on prt1_p3 prt1_3
- -> Sort
- Sort Key: p2_3.b, p2_3.a
- -> Seq Scan on prt2_p3 p2_3
-(32 rows)
+ -> Group
+ Group Key: (COALESCE(prt1.a, p2.a)), (COALESCE(prt1.b, p2.b))
+ -> Sort
+ Sort Key: (COALESCE(prt1.a, p2.a)), (COALESCE(prt1.b, p2.b))
+ -> Merge Full Join
+ Merge Cond: ((prt1.a = p2.a) AND (prt1.b = p2.b))
+ Filter: ((COALESCE(prt1.a, p2.a) >= 490) AND (COALESCE(prt1.a, p2.a) <= 510))
+ -> Sort
+ Sort Key: prt1.a, prt1.b
+ -> Seq Scan on prt1_p1 prt1
+ -> Sort
+ Sort Key: p2.a, p2.b
+ -> Seq Scan on prt2_p1 p2
+ -> Group
+ Group Key: (COALESCE(prt1_1.a, p2_1.a)), (COALESCE(prt1_1.b, p2_1.b))
+ -> Sort
+ Sort Key: (COALESCE(prt1_1.a, p2_1.a)), (COALESCE(prt1_1.b, p2_1.b))
+ -> Merge Full Join
+ Merge Cond: ((prt1_1.a = p2_1.a) AND (prt1_1.b = p2_1.b))
+ Filter: ((COALESCE(prt1_1.a, p2_1.a) >= 490) AND (COALESCE(prt1_1.a, p2_1.a) <= 510))
+ -> Sort
+ Sort Key: prt1_1.a, prt1_1.b
+ -> Seq Scan on prt1_p2 prt1_1
+ -> Sort
+ Sort Key: p2_1.a, p2_1.b
+ -> Seq Scan on prt2_p2 p2_1
+ -> Group
+ Group Key: (COALESCE(prt1_2.a, p2_2.a)), (COALESCE(prt1_2.b, p2_2.b))
+ -> Sort
+ Sort Key: (COALESCE(prt1_2.a, p2_2.a)), (COALESCE(prt1_2.b, p2_2.b))
+ -> Merge Full Join
+ Merge Cond: ((prt1_2.a = p2_2.a) AND (prt1_2.b = p2_2.b))
+ Filter: ((COALESCE(prt1_2.a, p2_2.a) >= 490) AND (COALESCE(prt1_2.a, p2_2.a) <= 510))
+ -> Sort
+ Sort Key: prt1_2.a, prt1_2.b
+ -> Seq Scan on prt1_p3 prt1_2
+ -> Sort
+ Sort Key: p2_2.a, p2_2.b
+ -> Seq Scan on prt2_p3 p2_2
+(43 rows)
SELECT a, b FROM prt1 FULL JOIN prt2 p2(b,a,c) USING(a,b)
WHERE a BETWEEN 490 AND 510
enable_async_append | on
enable_bitmapscan | on
enable_gathermerge | on
- enable_group_by_reordering | on
enable_hashagg | on
enable_hashjoin | on
enable_incremental_sort | on
enable_seqscan | on
enable_sort | on
enable_tidscan | on
-(21 rows)
+(20 rows)
-- Test that the pg_timezone_names and pg_timezone_abbrevs views are
-- more-or-less working. We can't test their contents in any great detail
union all
select distinct * from int8_tbl i82) ss
where q2 = q2;
- QUERY PLAN
-----------------------------------------------------
- HashAggregate
- Group Key: "*SELECT* 1".q1
- -> Append
+ QUERY PLAN
+----------------------------------------------------------
+ Unique
+ -> Merge Append
+ Sort Key: "*SELECT* 1".q1
-> Subquery Scan on "*SELECT* 1"
- -> HashAggregate
- Group Key: i81.q1, i81.q2
- -> Seq Scan on int8_tbl i81
- Filter: (q2 IS NOT NULL)
+ -> Unique
+ -> Sort
+ Sort Key: i81.q1, i81.q2
+ -> Seq Scan on int8_tbl i81
+ Filter: (q2 IS NOT NULL)
-> Subquery Scan on "*SELECT* 2"
- -> HashAggregate
- Group Key: i82.q1, i82.q2
- -> Seq Scan on int8_tbl i82
- Filter: (q2 IS NOT NULL)
-(13 rows)
+ -> Unique
+ -> Sort
+ Sort Key: i82.q1, i82.q2
+ -> Seq Scan on int8_tbl i82
+ Filter: (q2 IS NOT NULL)
+(15 rows)
select distinct q1 from
(select distinct * from int8_tbl i81
union all
select distinct * from int8_tbl i82) ss
where -q1 = q2;
- QUERY PLAN
---------------------------------------------------
- HashAggregate
- Group Key: "*SELECT* 1".q1
- -> Append
+ QUERY PLAN
+--------------------------------------------------------
+ Unique
+ -> Merge Append
+ Sort Key: "*SELECT* 1".q1
-> Subquery Scan on "*SELECT* 1"
- -> HashAggregate
- Group Key: i81.q1, i81.q2
- -> Seq Scan on int8_tbl i81
- Filter: ((- q1) = q2)
+ -> Unique
+ -> Sort
+ Sort Key: i81.q1, i81.q2
+ -> Seq Scan on int8_tbl i81
+ Filter: ((- q1) = q2)
-> Subquery Scan on "*SELECT* 2"
- -> HashAggregate
- Group Key: i82.q1, i82.q2
- -> Seq Scan on int8_tbl i82
- Filter: ((- q1) = q2)
-(13 rows)
+ -> Unique
+ -> Sort
+ Sort Key: i82.q1, i82.q2
+ -> Seq Scan on int8_tbl i82
+ Filter: ((- q1) = q2)
+(15 rows)
select distinct q1 from
(select distinct * from int8_tbl i81
ROLLBACK;
--- GROUP BY optimization by reorder columns
-
-SELECT
- i AS id,
- i/2 AS p,
- format('%60s', i%2) AS v,
- i/4 AS c,
- i/8 AS d,
- (random() * (10000/8))::int as e --the same as d but no correlation with p
- INTO btg
-FROM
- generate_series(1, 10000) i;
-
-VACUUM btg;
-ANALYZE btg;
-
--- GROUP BY optimization by reorder columns by frequency
-
-SET enable_hashagg=off;
-SET max_parallel_workers= 0;
-SET max_parallel_workers_per_gather = 0;
-
-EXPLAIN (COSTS off)
-SELECT count(*) FROM btg GROUP BY p, v;
-
-EXPLAIN (COSTS off)
-SELECT count(*) FROM btg GROUP BY v, p;
-
-EXPLAIN (COSTS off)
-SELECT count(*) FROM btg GROUP BY v, p, c;
-
-EXPLAIN (COSTS off)
-SELECT count(*) FROM btg GROUP BY v, p, c ORDER BY v, p, c;
-
-EXPLAIN (COSTS off)
-SELECT count(*) FROM btg GROUP BY v, p, d, c;
-
-EXPLAIN (COSTS off)
-SELECT count(*) FROM btg GROUP BY v, p, d, c ORDER BY v, p, d ,c;
-
-EXPLAIN (COSTS off)
-SELECT count(*) FROM btg GROUP BY v, p, d, c ORDER BY p, v, d ,c;
-
-EXPLAIN (COSTS off)
-SELECT count(*) FROM btg GROUP BY p, d, e;
-
-EXPLAIN (COSTS off)
-SELECT count(*) FROM btg GROUP BY p, e, d;
-
-CREATE STATISTICS btg_dep ON d, e, p FROM btg;
-ANALYZE btg;
-
-EXPLAIN (COSTS off)
-SELECT count(*) FROM btg GROUP BY p, d, e;
-
-EXPLAIN (COSTS off)
-SELECT count(*) FROM btg GROUP BY p, e, d;
-
-
--- GROUP BY optimization by reorder columns by index scan
-
-CREATE INDEX ON btg(p, v);
-SET enable_seqscan=off;
-SET enable_bitmapscan=off;
-VACUUM btg;
-
-EXPLAIN (COSTS off)
-SELECT count(*) FROM btg GROUP BY p, v;
-
-EXPLAIN (COSTS off)
-SELECT count(*) FROM btg GROUP BY p, v ORDER BY p, v;
-
-EXPLAIN (COSTS off)
-SELECT count(*) FROM btg GROUP BY v, p;
-
-EXPLAIN (COSTS off)
-SELECT count(*) FROM btg GROUP BY v, p ORDER BY p, v;
-
-EXPLAIN (COSTS off)
-SELECT count(*) FROM btg GROUP BY v, p, c;
-
-EXPLAIN (COSTS off)
-SELECT count(*) FROM btg GROUP BY v, p, c ORDER BY p, v;
-
-EXPLAIN (COSTS off)
-SELECT count(*) FROM btg GROUP BY v, c, p, d;
-
-EXPLAIN (COSTS off)
-SELECT count(*) FROM btg GROUP BY v, c, p, d ORDER BY p, v;
-
-DROP TABLE btg;
-
-RESET enable_hashagg;
-RESET max_parallel_workers;
-RESET max_parallel_workers_per_gather;
-RESET enable_seqscan;
-RESET enable_bitmapscan;
-
-
-- Secondly test the case of a parallel aggregate combiner function
-- returning NULL. For that use normal transition function, but a
-- combiner function returning NULL.
set max_parallel_workers_per_gather = 2;
create table t (a int, b int, c int);
-insert into t select mod(i,10),mod(i,10),i from generate_series(1,60000) s(i);
+insert into t select mod(i,10),mod(i,10),i from generate_series(1,10000) s(i);
create index on t (a);
analyze t;