* for both forward and reverse scans.
*/
if (rel->part_scheme != NULL && IS_SIMPLE_REL(rel) &&
- partitions_are_ordered(rel->boundinfo, rel->nparts))
+ partitions_are_ordered(rel->boundinfo, rel->live_parts))
{
partition_pathkeys = build_partition_pathkeys(root, rel,
ForwardScanDirection,
int i;
Assert(partscheme != NULL);
- Assert(partitions_are_ordered(partrel->boundinfo, partrel->nparts));
+ Assert(partitions_are_ordered(partrel->boundinfo, partrel->live_parts));
/* For now, we can only cope with baserels */
Assert(IS_SIMPLE_REL(partrel));
boundinfo->ndatums = nparts;
boundinfo->datums = (Datum **) palloc0(nparts * sizeof(Datum *));
boundinfo->kind = NULL;
+ boundinfo->interleaved_parts = NULL;
boundinfo->nindexes = greatest_modulus;
boundinfo->indexes = (int *) palloc(greatest_modulus * sizeof(int));
for (i = 0; i < greatest_modulus; i++)
boundinfo->ndatums = ndatums;
boundinfo->datums = (Datum **) palloc0(ndatums * sizeof(Datum *));
boundinfo->kind = NULL;
+ boundinfo->interleaved_parts = NULL;
boundinfo->nindexes = ndatums;
boundinfo->indexes = (int *) palloc(ndatums * sizeof(int));
boundinfo->default_index = (*mapping)[default_index];
}
+ /*
+ * Calculate interleaved partitions. Here we look for partitions which
+ * might be interleaved with other partitions and set a bit in
+ * interleaved_parts for any partitions which may be interleaved with
+ * another partition.
+ */
+
+ /*
+ * There must be multiple partitions to have any interleaved partitions,
+ * otherwise there's nothing to interleave with.
+ */
+ if (nparts > 1)
+ {
+ /*
+ * Short-circuit check to see if only 1 Datum is allowed per
+ * partition. When this is true there's no need to do the more
+ * expensive checks to look for interleaved values.
+ */
+ if (boundinfo->ndatums +
+ partition_bound_accepts_nulls(boundinfo) +
+ partition_bound_has_default(boundinfo) != nparts)
+ {
+ int last_index = -1;
+
+ /*
+ * Since the indexes array is sorted in Datum order, if any
+ * partitions are interleaved then it will show up by the
+ * partition indexes not being in ascending order. Here we check
+ * for that and record all partitions that are out of order.
+ */
+ for (i = 0; i < boundinfo->nindexes; i++)
+ {
+ int index = boundinfo->indexes[i];
+
+ if (index < last_index)
+ boundinfo->interleaved_parts = bms_add_member(boundinfo->interleaved_parts,
+ index);
+
+ /*
+ * Mark the NULL partition as interleaved if we find that it
+ * allows some other non-NULL Datum.
+ */
+ if (partition_bound_accepts_nulls(boundinfo) &&
+ index == boundinfo->null_index)
+ boundinfo->interleaved_parts = bms_add_member(boundinfo->interleaved_parts,
+ boundinfo->null_index);
+
+ last_index = index;
+ }
+ }
+
+ /*
+ * The DEFAULT partition is the "catch-all" partition that can contain
+ * anything that does not belong to any other partition. If there are
+ * any other partitions then the DEFAULT partition must be marked as
+ * interleaved.
+ */
+ if (partition_bound_has_default(boundinfo))
+ boundinfo->interleaved_parts = bms_add_member(boundinfo->interleaved_parts,
+ boundinfo->default_index);
+ }
+
+
/* All partitions must now have been assigned canonical indexes. */
Assert(next_index == nparts);
return boundinfo;
boundinfo->kind = (PartitionRangeDatumKind **)
palloc(ndatums *
sizeof(PartitionRangeDatumKind *));
+ boundinfo->interleaved_parts = NULL;
/*
* For range partitioning, an additional value of -1 is stored as the last
else
dest->kind = NULL;
+ /* copy interleaved partitions for LIST partitioned tables */
+ dest->interleaved_parts = bms_copy(src->interleaved_parts);
+
/*
* For hash partitioning, datums array will have two elements - modulus
* and remainder.
* that is partitions appearing earlier in the PartitionDesc sequence
* contain partition keys strictly less than those appearing later.
* Also, if NULL values are possible, they must come in the last
- * partition defined in the PartitionDesc.
+ * partition defined in the PartitionDesc. 'live_parts' marks which
+ * partitions we should include when checking the ordering. Partitions
+ * that do not appear in 'live_parts' are ignored.
*
* If out of order, or there is insufficient info to know the order,
* then we return false.
*/
bool
-partitions_are_ordered(PartitionBoundInfo boundinfo, int nparts)
+partitions_are_ordered(PartitionBoundInfo boundinfo, Bitmapset *live_parts)
{
Assert(boundinfo != NULL);
* RANGE-type partitioning guarantees that the partitions can be
* scanned in the order that they're defined in the PartitionDesc
* to provide sequential, non-overlapping ranges of tuples.
- * However, if a DEFAULT partition exists then it doesn't work, as
- * that could contain tuples from either below or above the
- * defined range, or tuples belonging to gaps between partitions.
+ * However, if a DEFAULT partition exists and it's contained
+ * within live_parts, then the partitions are not ordered.
*/
- if (!partition_bound_has_default(boundinfo))
+ if (!partition_bound_has_default(boundinfo) ||
+ !bms_is_member(boundinfo->default_index, live_parts))
return true;
break;
case PARTITION_STRATEGY_LIST:
/*
- * LIST partitioning can also guarantee ordering, but only if the
- * partitions don't accept interleaved values. We could likely
- * check for this by looping over the PartitionBound's indexes
- * array to check that the indexes are in order. For now, let's
- * just keep it simple and just accept LIST partitioning when
- * there's no DEFAULT partition, exactly one value per partition,
- * and optionally a NULL partition that does not accept any other
- * values. Such a NULL partition will come last in the
- * PartitionDesc, and the other partitions will be properly
- * ordered. This is a cheap test to make as it does not require
- * any per-partition processing. Maybe we'd like to handle more
- * complex cases in the future.
+ * LIST partitioned are ordered providing none of live_parts
+ * overlap with the partitioned table's interleaved partitions.
*/
- if (partition_bound_has_default(boundinfo))
- return false;
-
- if (boundinfo->ndatums + partition_bound_accepts_nulls(boundinfo)
- == nparts)
+ if (!bms_overlap(live_parts, boundinfo->interleaved_parts))
return true;
- break;
+ break;
default:
/* HASH, or some other strategy */
break;
* The indexes array is indexed according to the hash key's remainder modulo
* the greatest modulus, and it contains either the partition index accepting
* that remainder, or -1 if there is no partition for that remainder.
+ *
+ * For LIST partitioned tables, we track the partition indexes of partitions
+ * which are possibly "interleaved" partitions. A partition is considered
+ * interleaved if it allows multiple values and there exists at least one
+ * other partition which could contain a value that lies between those values.
+ * For example, if a partition exists FOR VALUES IN(3,5) and another partition
+ * exists FOR VALUES IN (4), then the IN(3,5) partition is an interleaved
+ * partition. The same is possible with DEFAULT partitions since they can
+ * contain any value that does not belong in another partition. This field
+ * only serves as proof that a particular partition is not interleaved, not
+ * proof that it is interleaved. When we're uncertain, we marked the
+ * partition as interleaved.
*/
typedef struct PartitionBoundInfoData
{
PartitionRangeDatumKind **kind; /* The kind of each range bound datum;
* NULL for hash and list partitioned
* tables */
+ Bitmapset *interleaved_parts; /* Partition indexes of partitions which
+ * may be interleaved. See above. This is
+ * only set for LIST partitioned tables */
int nindexes; /* Length of the indexes[] array */
int *indexes; /* Partition indexes */
int null_index; /* Index of the null-accepting partition; -1
JoinType jointype,
List **outer_parts,
List **inner_parts);
-extern bool partitions_are_ordered(PartitionBoundInfo boundinfo, int nparts);
+extern bool partitions_are_ordered(PartitionBoundInfo boundinfo,
+ Bitmapset *live_parts);
extern void check_new_partition_bound(char *relname, Relation parent,
PartitionBoundSpec *spec,
ParseState *pstate);
Index Cond: (a < 20)
(9 rows)
+set enable_bitmapscan to off;
+set enable_sort to off;
create table mclparted (a int) partition by list(a);
create table mclparted1 partition of mclparted for values in(1);
create table mclparted2 partition of mclparted for values in(2);
-> Index Only Scan using mclparted4_a_idx on mclparted4 mclparted_4
(6 rows)
+explain (costs off) select * from mclparted where a in(3,4,5) order by a;
+ QUERY PLAN
+----------------------------------------------------------------------------
+ Merge Append
+ Sort Key: mclparted.a
+ -> Index Only Scan using mclparted3_5_a_idx on mclparted3_5 mclparted_1
+ Index Cond: (a = ANY ('{3,4,5}'::integer[]))
+ -> Index Only Scan using mclparted4_a_idx on mclparted4 mclparted_2
+ Index Cond: (a = ANY ('{3,4,5}'::integer[]))
+(6 rows)
+
+-- Introduce a NULL and DEFAULT partition so we can test more complex cases
+create table mclparted_null partition of mclparted for values in(null);
+create table mclparted_def partition of mclparted default;
+-- Append can be used providing we don't scan the interleaved partition
+explain (costs off) select * from mclparted where a in(1,2,4) order by a;
+ QUERY PLAN
+------------------------------------------------------------------------
+ Append
+ -> Index Only Scan using mclparted1_a_idx on mclparted1 mclparted_1
+ Index Cond: (a = ANY ('{1,2,4}'::integer[]))
+ -> Index Only Scan using mclparted2_a_idx on mclparted2 mclparted_2
+ Index Cond: (a = ANY ('{1,2,4}'::integer[]))
+ -> Index Only Scan using mclparted4_a_idx on mclparted4 mclparted_3
+ Index Cond: (a = ANY ('{1,2,4}'::integer[]))
+(7 rows)
+
+explain (costs off) select * from mclparted where a in(1,2,4) or a is null order by a;
+ QUERY PLAN
+--------------------------------------------------------------------------------
+ Append
+ -> Index Only Scan using mclparted1_a_idx on mclparted1 mclparted_1
+ Filter: ((a = ANY ('{1,2,4}'::integer[])) OR (a IS NULL))
+ -> Index Only Scan using mclparted2_a_idx on mclparted2 mclparted_2
+ Filter: ((a = ANY ('{1,2,4}'::integer[])) OR (a IS NULL))
+ -> Index Only Scan using mclparted4_a_idx on mclparted4 mclparted_3
+ Filter: ((a = ANY ('{1,2,4}'::integer[])) OR (a IS NULL))
+ -> Index Only Scan using mclparted_null_a_idx on mclparted_null mclparted_4
+ Filter: ((a = ANY ('{1,2,4}'::integer[])) OR (a IS NULL))
+(9 rows)
+
+-- Test a more complex case where the NULL partition allows some other value
+drop table mclparted_null;
+create table mclparted_0_null partition of mclparted for values in(0,null);
+-- Ensure MergeAppend is used since 0 and NULLs are in the same partition.
+explain (costs off) select * from mclparted where a in(1,2,4) or a is null order by a;
+ QUERY PLAN
+------------------------------------------------------------------------------------
+ Merge Append
+ Sort Key: mclparted.a
+ -> Index Only Scan using mclparted_0_null_a_idx on mclparted_0_null mclparted_1
+ Filter: ((a = ANY ('{1,2,4}'::integer[])) OR (a IS NULL))
+ -> Index Only Scan using mclparted1_a_idx on mclparted1 mclparted_2
+ Filter: ((a = ANY ('{1,2,4}'::integer[])) OR (a IS NULL))
+ -> Index Only Scan using mclparted2_a_idx on mclparted2 mclparted_3
+ Filter: ((a = ANY ('{1,2,4}'::integer[])) OR (a IS NULL))
+ -> Index Only Scan using mclparted4_a_idx on mclparted4 mclparted_4
+ Filter: ((a = ANY ('{1,2,4}'::integer[])) OR (a IS NULL))
+(10 rows)
+
+explain (costs off) select * from mclparted where a in(0,1,2,4) order by a;
+ QUERY PLAN
+------------------------------------------------------------------------------------
+ Merge Append
+ Sort Key: mclparted.a
+ -> Index Only Scan using mclparted_0_null_a_idx on mclparted_0_null mclparted_1
+ Index Cond: (a = ANY ('{0,1,2,4}'::integer[]))
+ -> Index Only Scan using mclparted1_a_idx on mclparted1 mclparted_2
+ Index Cond: (a = ANY ('{0,1,2,4}'::integer[]))
+ -> Index Only Scan using mclparted2_a_idx on mclparted2 mclparted_3
+ Index Cond: (a = ANY ('{0,1,2,4}'::integer[]))
+ -> Index Only Scan using mclparted4_a_idx on mclparted4 mclparted_4
+ Index Cond: (a = ANY ('{0,1,2,4}'::integer[]))
+(10 rows)
+
+-- Ensure Append is used when the null partition is pruned
+explain (costs off) select * from mclparted where a in(1,2,4) order by a;
+ QUERY PLAN
+------------------------------------------------------------------------
+ Append
+ -> Index Only Scan using mclparted1_a_idx on mclparted1 mclparted_1
+ Index Cond: (a = ANY ('{1,2,4}'::integer[]))
+ -> Index Only Scan using mclparted2_a_idx on mclparted2 mclparted_2
+ Index Cond: (a = ANY ('{1,2,4}'::integer[]))
+ -> Index Only Scan using mclparted4_a_idx on mclparted4 mclparted_3
+ Index Cond: (a = ANY ('{1,2,4}'::integer[]))
+(7 rows)
+
+-- Ensure MergeAppend is used when the default partition is not pruned
+explain (costs off) select * from mclparted where a in(1,2,4,100) order by a;
+ QUERY PLAN
+------------------------------------------------------------------------------
+ Merge Append
+ Sort Key: mclparted.a
+ -> Index Only Scan using mclparted1_a_idx on mclparted1 mclparted_1
+ Index Cond: (a = ANY ('{1,2,4,100}'::integer[]))
+ -> Index Only Scan using mclparted2_a_idx on mclparted2 mclparted_2
+ Index Cond: (a = ANY ('{1,2,4,100}'::integer[]))
+ -> Index Only Scan using mclparted4_a_idx on mclparted4 mclparted_3
+ Index Cond: (a = ANY ('{1,2,4,100}'::integer[]))
+ -> Index Only Scan using mclparted_def_a_idx on mclparted_def mclparted_4
+ Index Cond: (a = ANY ('{1,2,4,100}'::integer[]))
+(10 rows)
+
drop table mclparted;
+reset enable_sort;
+reset enable_bitmapscan;
-- Ensure subplans which don't have a path with the correct pathkeys get
-- sorted correctly.
drop index mcrparted_a_abs_c_idx;
-- during planning.
explain (costs off) select * from mcrparted where a < 20 order by a, abs(b), c;
+set enable_bitmapscan to off;
+set enable_sort to off;
create table mclparted (a int) partition by list(a);
create table mclparted1 partition of mclparted for values in(1);
create table mclparted2 partition of mclparted for values in(2);
create table mclparted4 partition of mclparted for values in(4);
explain (costs off) select * from mclparted order by a;
+explain (costs off) select * from mclparted where a in(3,4,5) order by a;
+
+-- Introduce a NULL and DEFAULT partition so we can test more complex cases
+create table mclparted_null partition of mclparted for values in(null);
+create table mclparted_def partition of mclparted default;
+
+-- Append can be used providing we don't scan the interleaved partition
+explain (costs off) select * from mclparted where a in(1,2,4) order by a;
+explain (costs off) select * from mclparted where a in(1,2,4) or a is null order by a;
+
+-- Test a more complex case where the NULL partition allows some other value
+drop table mclparted_null;
+create table mclparted_0_null partition of mclparted for values in(0,null);
+
+-- Ensure MergeAppend is used since 0 and NULLs are in the same partition.
+explain (costs off) select * from mclparted where a in(1,2,4) or a is null order by a;
+explain (costs off) select * from mclparted where a in(0,1,2,4) order by a;
+
+-- Ensure Append is used when the null partition is pruned
+explain (costs off) select * from mclparted where a in(1,2,4) order by a;
+
+-- Ensure MergeAppend is used when the default partition is not pruned
+explain (costs off) select * from mclparted where a in(1,2,4,100) order by a;
drop table mclparted;
+reset enable_sort;
+reset enable_bitmapscan;
-- Ensure subplans which don't have a path with the correct pathkeys get
-- sorted correctly.