</listitem>
</varlistentry>
- <varlistentry id="guc-enable-groupingsets-hash-disk" xreflabel="enable_groupingsets_hash_disk">
- <term><varname>enable_groupingsets_hash_disk</varname> (<type>boolean</type>)
- <indexterm>
- <primary><varname>enable_groupingsets_hash_disk</varname> configuration parameter</primary>
- </indexterm>
- </term>
- <listitem>
- <para>
- Enables or disables the query planner's use of hashed aggregation plan
- types for grouping sets when the total size of the hash tables is
- expected to exceed <varname>work_mem</varname>. See <xref
- linkend="queries-grouping-sets"/>. The default is
- <literal>off</literal>.
- </para>
- </listitem>
- </varlistentry>
-
<varlistentry id="guc-enable-hashagg" xreflabel="enable_hashagg">
<term><varname>enable_hashagg</varname> (<type>boolean</type>)
<indexterm>
</listitem>
</varlistentry>
- <varlistentry id="guc-enable-hashagg-disk" xreflabel="enable_hashagg_disk">
- <term><varname>enable_hashagg_disk</varname> (<type>boolean</type>)
+ <varlistentry id="guc-hashagg-avoid-disk-plan" xreflabel="hashagg_avoid_disk_plan">
+ <term><varname>hashagg_avoid_disk_plan</varname> (<type>boolean</type>)
<indexterm>
- <primary><varname>enable_hashagg_disk</varname> configuration parameter</primary>
+ <primary><varname>hashagg_avoid_disk_plan</varname> configuration parameter</primary>
</indexterm>
</term>
<listitem>
<para>
- Enables or disables the query planner's use of hashed aggregation plan
- types when the memory usage is expected to exceed
- <varname>work_mem</varname>. The default is <literal>on</literal>.
+ If set to <literal>on</literal>, causes the planner to avoid choosing
+ hashed aggregation plans that are expected to use the disk. If hashed
+ aggregation is chosen, it may still require the use of disk at
+ execution time, even if this parameter is enabled. The default is
+ <literal>off</literal>.
</para>
</listitem>
</varlistentry>
bool enable_sort = true;
bool enable_incrementalsort = true;
bool enable_hashagg = true;
-bool enable_hashagg_disk = true;
-bool enable_groupingsets_hash_disk = false;
+bool hashagg_avoid_disk_plan = true;
bool enable_nestloop = true;
bool enable_material = true;
bool enable_mergejoin = true;
dNumGroups - exclude_groups);
/*
- * If we have sortable columns to work with (gd->rollups is non-empty)
- * and enable_groupingsets_hash_disk is disabled, don't generate
- * hash-based paths that will exceed work_mem.
+ * gd->rollups is empty if we have only unsortable columns to work
+ * with. Override work_mem in that case; otherwise, we'll rely on the
+ * sorted-input case to generate usable mixed paths.
*/
- if (!enable_groupingsets_hash_disk &&
- hashsize > work_mem * 1024L && gd->rollups)
+ if (hashsize > work_mem * 1024L && gd->rollups)
return; /* nope, won't fit */
/*
{
Size hashentrysize = hash_agg_entry_size(0, cheapest_input_path->pathtarget->width, 0);
- allow_hash = enable_hashagg_disk ||
+ allow_hash = !hashagg_avoid_disk_plan ||
(hashentrysize * numDistinctRows <= work_mem * 1024L);
}
* were unable to sort above, then we'd better generate a Path, so
* that we at least have one.
*/
- if (enable_hashagg_disk ||
+ if (!hashagg_avoid_disk_plan ||
hashaggtablesize < work_mem * 1024L ||
grouped_rel->pathlist == NIL)
{
agg_final_costs,
dNumGroups);
- if (enable_hashagg_disk ||
+ if (!hashagg_avoid_disk_plan ||
hashaggtablesize < work_mem * 1024L)
add_path(grouped_rel, (Path *)
create_agg_path(root,
* Tentatively produce a partial HashAgg Path, depending on if it
* looks as if the hash table will fit in work_mem.
*/
- if ((enable_hashagg_disk || hashaggtablesize < work_mem * 1024L) &&
+ if ((!hashagg_avoid_disk_plan || hashaggtablesize < work_mem * 1024L) &&
cheapest_total_path != NULL)
{
add_path(partially_grouped_rel, (Path *)
dNumPartialPartialGroups);
/* Do the same for partial paths. */
- if ((enable_hashagg_disk || hashaggtablesize < work_mem * 1024L) &&
+ if ((!hashagg_avoid_disk_plan ||
+ hashaggtablesize < work_mem * 1024L) &&
cheapest_partial_path != NULL)
{
add_partial_path(partially_grouped_rel, (Path *)
NULL, NULL, NULL
},
{
- {"enable_hashagg_disk", PGC_USERSET, QUERY_TUNING_METHOD,
- gettext_noop("Enables the planner's use of hashed aggregation plans that are expected to exceed work_mem."),
+ {"hashagg_avoid_disk_plan", PGC_USERSET, QUERY_TUNING_METHOD,
+ gettext_noop("Causes the planner to avoid hashed aggregation plans that are expected to use the disk."),
NULL,
GUC_EXPLAIN
},
- &enable_hashagg_disk,
- true,
- NULL, NULL, NULL
- },
- {
- {"enable_groupingsets_hash_disk", PGC_USERSET, QUERY_TUNING_METHOD,
- gettext_noop("Enables the planner's use of hashed aggregation plans for groupingsets when the total size of the hash tables is expected to exceed work_mem."),
- NULL,
- GUC_EXPLAIN
- },
- &enable_groupingsets_hash_disk,
+ &hashagg_avoid_disk_plan,
false,
NULL, NULL, NULL
},
extern PGDLLIMPORT bool enable_sort;
extern PGDLLIMPORT bool enable_incrementalsort;
extern PGDLLIMPORT bool enable_hashagg;
-extern PGDLLIMPORT bool enable_hashagg_disk;
-extern PGDLLIMPORT bool enable_groupingsets_hash_disk;
+extern PGDLLIMPORT bool hashagg_avoid_disk_plan;
extern PGDLLIMPORT bool enable_nestloop;
extern PGDLLIMPORT bool enable_material;
extern PGDLLIMPORT bool enable_mergejoin;
-- aggregation. Force spilling in both cases by setting work_mem low.
--
set work_mem='64kB';
+create table agg_data_2k as
+select g from generate_series(0, 1999) g;
+analyze agg_data_2k;
+create table agg_data_20k as
+select g from generate_series(0, 19999) g;
+analyze agg_data_20k;
-- Produce results with sorting.
set enable_hashagg = false;
set jit_above_cost = 0;
explain (costs off)
select g%10000 as c1, sum(g::numeric) as c2, count(*) as c3
- from generate_series(0, 19999) g
- group by g%10000;
- QUERY PLAN
-------------------------------------------------
+ from agg_data_20k group by g%10000;
+ QUERY PLAN
+--------------------------------------
GroupAggregate
Group Key: ((g % 10000))
-> Sort
Sort Key: ((g % 10000))
- -> Function Scan on generate_series g
+ -> Seq Scan on agg_data_20k
(5 rows)
create table agg_group_1 as
select g%10000 as c1, sum(g::numeric) as c2, count(*) as c3
- from generate_series(0, 19999) g
- group by g%10000;
+ from agg_data_20k group by g%10000;
create table agg_group_2 as
select * from
(values (100), (300), (500)) as r(a),
select (g/2)::numeric as c1,
array_agg(g::numeric) as c2,
count(*) as c3
- from generate_series(0, 1999) g
+ from agg_data_2k
where g < r.a
group by g/2) as s;
set jit_above_cost to default;
create table agg_group_3 as
select (g/2)::numeric as c1, sum(7::int4) as c2, count(*) as c3
- from generate_series(0, 1999) g
- group by g/2;
+ from agg_data_2k group by g/2;
create table agg_group_4 as
select (g/2)::numeric as c1, array_agg(g::numeric) as c2, count(*) as c3
- from generate_series(0, 1999) g
- group by g/2;
+ from agg_data_2k group by g/2;
-- Produce results with hash aggregation
set enable_hashagg = true;
set enable_sort = false;
set jit_above_cost = 0;
explain (costs off)
select g%10000 as c1, sum(g::numeric) as c2, count(*) as c3
- from generate_series(0, 19999) g
- group by g%10000;
- QUERY PLAN
-------------------------------------------
+ from agg_data_20k group by g%10000;
+ QUERY PLAN
+--------------------------------
HashAggregate
Group Key: (g % 10000)
- -> Function Scan on generate_series g
+ -> Seq Scan on agg_data_20k
(3 rows)
create table agg_hash_1 as
select g%10000 as c1, sum(g::numeric) as c2, count(*) as c3
- from generate_series(0, 19999) g
- group by g%10000;
+ from agg_data_20k group by g%10000;
create table agg_hash_2 as
select * from
(values (100), (300), (500)) as r(a),
select (g/2)::numeric as c1,
array_agg(g::numeric) as c2,
count(*) as c3
- from generate_series(0, 1999) g
+ from agg_data_2k
where g < r.a
group by g/2) as s;
set jit_above_cost to default;
create table agg_hash_3 as
select (g/2)::numeric as c1, sum(7::int4) as c2, count(*) as c3
- from generate_series(0, 1999) g
- group by g/2;
+ from agg_data_2k group by g/2;
create table agg_hash_4 as
select (g/2)::numeric as c1, array_agg(g::numeric) as c2, count(*) as c3
- from generate_series(0, 1999) g
- group by g/2;
+ from agg_data_2k group by g/2;
set enable_sort = true;
set work_mem to default;
-- Compare group aggregation results to hash aggregation results
--
-- Compare results between plans using sorting and plans using hash
-- aggregation. Force spilling in both cases by setting work_mem low
--- and turning on enable_groupingsets_hash_disk.
+-- and altering the statistics.
--
-SET enable_groupingsets_hash_disk = true;
+create table gs_data_1 as
+select g%1000 as g1000, g%100 as g100, g%10 as g10, g
+ from generate_series(0,1999) g;
+analyze gs_data_1;
+alter table gs_data_1 set (autovacuum_enabled = 'false');
+update pg_class set reltuples = 10 where relname='gs_data_1';
SET work_mem='64kB';
-- Produce results with sorting.
set enable_hashagg = false;
set jit_above_cost = 0;
explain (costs off)
-select g100, g10, sum(g::numeric), count(*), max(g::text) from
- (select g%1000 as g1000, g%100 as g100, g%10 as g10, g
- from generate_series(0,1999) g) s
-group by cube (g1000, g100,g10);
- QUERY PLAN
----------------------------------------------------------------
+select g100, g10, sum(g::numeric), count(*), max(g::text)
+from gs_data_1 group by cube (g1000, g100,g10);
+ QUERY PLAN
+------------------------------------
GroupAggregate
- Group Key: ((g.g % 1000)), ((g.g % 100)), ((g.g % 10))
- Group Key: ((g.g % 1000)), ((g.g % 100))
- Group Key: ((g.g % 1000))
+ Group Key: g1000, g100, g10
+ Group Key: g1000, g100
+ Group Key: g1000
Group Key: ()
- Sort Key: ((g.g % 100)), ((g.g % 10))
- Group Key: ((g.g % 100)), ((g.g % 10))
- Group Key: ((g.g % 100))
- Sort Key: ((g.g % 10)), ((g.g % 1000))
- Group Key: ((g.g % 10)), ((g.g % 1000))
- Group Key: ((g.g % 10))
+ Sort Key: g100, g10
+ Group Key: g100, g10
+ Group Key: g100
+ Sort Key: g10, g1000
+ Group Key: g10, g1000
+ Group Key: g10
-> Sort
- Sort Key: ((g.g % 1000)), ((g.g % 100)), ((g.g % 10))
- -> Function Scan on generate_series g
+ Sort Key: g1000, g100, g10
+ -> Seq Scan on gs_data_1
(14 rows)
create table gs_group_1 as
-select g100, g10, sum(g::numeric), count(*), max(g::text) from
- (select g%1000 as g1000, g%100 as g100, g%10 as g10, g
- from generate_series(0,1999) g) s
-group by cube (g1000, g100,g10);
+select g100, g10, sum(g::numeric), count(*), max(g::text)
+from gs_data_1 group by cube (g1000, g100,g10);
-- Produce results with hash aggregation.
set enable_hashagg = true;
set enable_sort = false;
explain (costs off)
-select g100, g10, sum(g::numeric), count(*), max(g::text) from
- (select g%1000 as g1000, g%100 as g100, g%10 as g10, g
- from generate_series(0,1999) g) s
-group by cube (g1000, g100,g10);
- QUERY PLAN
----------------------------------------------------
+select g100, g10, sum(g::numeric), count(*), max(g::text)
+from gs_data_1 group by cube (g1000, g100,g10);
+ QUERY PLAN
+------------------------------
MixedAggregate
- Hash Key: (g.g % 1000), (g.g % 100), (g.g % 10)
- Hash Key: (g.g % 1000), (g.g % 100)
- Hash Key: (g.g % 1000)
- Hash Key: (g.g % 100), (g.g % 10)
- Hash Key: (g.g % 100)
- Hash Key: (g.g % 10), (g.g % 1000)
- Hash Key: (g.g % 10)
+ Hash Key: g1000, g100, g10
+ Hash Key: g1000, g100
+ Hash Key: g1000
+ Hash Key: g100, g10
+ Hash Key: g100
+ Hash Key: g10, g1000
+ Hash Key: g10
Group Key: ()
- -> Function Scan on generate_series g
+ -> Seq Scan on gs_data_1
(10 rows)
create table gs_hash_1 as
-select g100, g10, sum(g::numeric), count(*), max(g::text) from
- (select g%1000 as g1000, g%100 as g100, g%10 as g10, g
- from generate_series(0,1999) g) s
-group by cube (g1000, g100,g10);
+select g100, g10, sum(g::numeric), count(*), max(g::text)
+from gs_data_1 group by cube (g1000, g100,g10);
set enable_sort = true;
set work_mem to default;
-- Compare results
drop table gs_group_1;
drop table gs_hash_1;
-SET enable_groupingsets_hash_disk TO DEFAULT;
-- end
--------------------------------+---------
enable_bitmapscan | on
enable_gathermerge | on
- enable_groupingsets_hash_disk | off
enable_hashagg | on
- enable_hashagg_disk | on
enable_hashjoin | on
enable_incrementalsort | on
enable_indexonlyscan | on
enable_seqscan | on
enable_sort | on
enable_tidscan | on
-(20 rows)
+(18 rows)
-- Test that the pg_timezone_names and pg_timezone_abbrevs views are
-- more-or-less working. We can't test their contents in any great detail
set work_mem='64kB';
+create table agg_data_2k as
+select g from generate_series(0, 1999) g;
+analyze agg_data_2k;
+
+create table agg_data_20k as
+select g from generate_series(0, 19999) g;
+analyze agg_data_20k;
+
-- Produce results with sorting.
set enable_hashagg = false;
explain (costs off)
select g%10000 as c1, sum(g::numeric) as c2, count(*) as c3
- from generate_series(0, 19999) g
- group by g%10000;
+ from agg_data_20k group by g%10000;
create table agg_group_1 as
select g%10000 as c1, sum(g::numeric) as c2, count(*) as c3
- from generate_series(0, 19999) g
- group by g%10000;
+ from agg_data_20k group by g%10000;
create table agg_group_2 as
select * from
select (g/2)::numeric as c1,
array_agg(g::numeric) as c2,
count(*) as c3
- from generate_series(0, 1999) g
+ from agg_data_2k
where g < r.a
group by g/2) as s;
create table agg_group_3 as
select (g/2)::numeric as c1, sum(7::int4) as c2, count(*) as c3
- from generate_series(0, 1999) g
- group by g/2;
+ from agg_data_2k group by g/2;
create table agg_group_4 as
select (g/2)::numeric as c1, array_agg(g::numeric) as c2, count(*) as c3
- from generate_series(0, 1999) g
- group by g/2;
+ from agg_data_2k group by g/2;
-- Produce results with hash aggregation
explain (costs off)
select g%10000 as c1, sum(g::numeric) as c2, count(*) as c3
- from generate_series(0, 19999) g
- group by g%10000;
+ from agg_data_20k group by g%10000;
create table agg_hash_1 as
select g%10000 as c1, sum(g::numeric) as c2, count(*) as c3
- from generate_series(0, 19999) g
- group by g%10000;
+ from agg_data_20k group by g%10000;
create table agg_hash_2 as
select * from
select (g/2)::numeric as c1,
array_agg(g::numeric) as c2,
count(*) as c3
- from generate_series(0, 1999) g
+ from agg_data_2k
where g < r.a
group by g/2) as s;
create table agg_hash_3 as
select (g/2)::numeric as c1, sum(7::int4) as c2, count(*) as c3
- from generate_series(0, 1999) g
- group by g/2;
+ from agg_data_2k group by g/2;
create table agg_hash_4 as
select (g/2)::numeric as c1, array_agg(g::numeric) as c2, count(*) as c3
- from generate_series(0, 1999) g
- group by g/2;
+ from agg_data_2k group by g/2;
set enable_sort = true;
set work_mem to default;
--
-- Compare results between plans using sorting and plans using hash
-- aggregation. Force spilling in both cases by setting work_mem low
--- and turning on enable_groupingsets_hash_disk.
+-- and altering the statistics.
--
-SET enable_groupingsets_hash_disk = true;
+create table gs_data_1 as
+select g%1000 as g1000, g%100 as g100, g%10 as g10, g
+ from generate_series(0,1999) g;
+
+analyze gs_data_1;
+alter table gs_data_1 set (autovacuum_enabled = 'false');
+update pg_class set reltuples = 10 where relname='gs_data_1';
+
SET work_mem='64kB';
-- Produce results with sorting.
set jit_above_cost = 0;
explain (costs off)
-select g100, g10, sum(g::numeric), count(*), max(g::text) from
- (select g%1000 as g1000, g%100 as g100, g%10 as g10, g
- from generate_series(0,1999) g) s
-group by cube (g1000, g100,g10);
+select g100, g10, sum(g::numeric), count(*), max(g::text)
+from gs_data_1 group by cube (g1000, g100,g10);
create table gs_group_1 as
-select g100, g10, sum(g::numeric), count(*), max(g::text) from
- (select g%1000 as g1000, g%100 as g100, g%10 as g10, g
- from generate_series(0,1999) g) s
-group by cube (g1000, g100,g10);
+select g100, g10, sum(g::numeric), count(*), max(g::text)
+from gs_data_1 group by cube (g1000, g100,g10);
-- Produce results with hash aggregation.
set enable_sort = false;
explain (costs off)
-select g100, g10, sum(g::numeric), count(*), max(g::text) from
- (select g%1000 as g1000, g%100 as g100, g%10 as g10, g
- from generate_series(0,1999) g) s
-group by cube (g1000, g100,g10);
+select g100, g10, sum(g::numeric), count(*), max(g::text)
+from gs_data_1 group by cube (g1000, g100,g10);
create table gs_hash_1 as
-select g100, g10, sum(g::numeric), count(*), max(g::text) from
- (select g%1000 as g1000, g%100 as g100, g%10 as g10, g
- from generate_series(0,1999) g) s
-group by cube (g1000, g100,g10);
-
+select g100, g10, sum(g::numeric), count(*), max(g::text)
+from gs_data_1 group by cube (g1000, g100,g10);
set enable_sort = true;
set work_mem to default;
drop table gs_group_1;
drop table gs_hash_1;
-SET enable_groupingsets_hash_disk TO DEFAULT;
-
-- end