From 30b4955a4668887044568743debef804b14418ca Mon Sep 17 00:00:00 2001 From: Alexander Korotkov Date: Tue, 9 Jan 2024 00:08:35 +0200 Subject: [PATCH] Fix misuse of RelOptInfo.unique_for_rels cache by SJE When SJE uses RelOptInfo.unique_for_rels cache, it passes filtered quals to innerrel_is_unique_ext(). That might lead to an invalid match to cache entries made by previous non self-join checking calls. Add UniqueRelInfo.self_join flag to prevent such cases. Also, fix that SJE should require a strict match of outerrelids to make sure UniqueRelInfo.extra_clauses are valid. Reported-by: Alexander Lakhin Discussion: https://postgr.es/m/4788f781-31bd-9796-d7d6-588a751c8787%40gmail.com --- src/backend/optimizer/plan/analyzejoins.c | 30 ++++++++++++++++------- src/include/nodes/pathnodes.h | 6 +++++ src/test/regress/expected/join.out | 23 +++++++++++++++++ src/test/regress/sql/join.sql | 9 +++++++ 4 files changed, 59 insertions(+), 9 deletions(-) diff --git a/src/backend/optimizer/plan/analyzejoins.c b/src/backend/optimizer/plan/analyzejoins.c index fb01fbe357a..1fb17ee29f2 100644 --- a/src/backend/optimizer/plan/analyzejoins.c +++ b/src/backend/optimizer/plan/analyzejoins.c @@ -1247,8 +1247,10 @@ innerrel_is_unique(PlannerInfo *root, /* * innerrel_is_unique_ext - * Do the same as innerrel_is_unique(), but also return additional clauses - * from a baserestrictinfo list that were used to prove uniqueness. + * Do the same as innerrel_is_unique(), but also set to '*extra_clauses' + * additional clauses from a baserestrictinfo list that were used to prove + * uniqueness. A non NULL 'extra_clauses' indicates that we're checking + * for self-join and correspondingly dealing with filtered clauses. */ bool innerrel_is_unique_ext(PlannerInfo *root, @@ -1264,6 +1266,7 @@ innerrel_is_unique_ext(PlannerInfo *root, ListCell *lc; UniqueRelInfo *uniqueRelInfo; List *outer_exprs = NIL; + bool self_join = (extra_clauses != NULL); /* Certainly can't prove uniqueness when there are no joinclauses */ if (restrictlist == NIL) @@ -1278,16 +1281,23 @@ innerrel_is_unique_ext(PlannerInfo *root, /* * Query the cache to see if we've managed to prove that innerrel is - * unique for any subset of this outerrel. We don't need an exact match, - * as extra outerrels can't make the innerrel any less unique (or more - * formally, the restrictlist for a join to a superset outerrel must be a - * superset of the conditions we successfully used before). + * unique for any subset of this outerrel. For non self-join search, we + * don't need an exact match, as extra outerrels can't make the innerrel + * any less unique (or more formally, the restrictlist for a join to a + * superset outerrel must be a superset of the conditions we successfully + * used before). For self-join search, we require an exact match of + * outerrels, because we need extra clauses to be valid for our case. + * Also, for self-join checking we've filtered the clauses list. Thus, + * for a self-join search, we can match only the result cached for another + * self-join check. */ foreach(lc, innerrel->unique_for_rels) { uniqueRelInfo = (UniqueRelInfo *) lfirst(lc); - if (bms_is_subset(uniqueRelInfo->outerrelids, outerrelids)) + if ((!self_join && bms_is_subset(uniqueRelInfo->outerrelids, outerrelids)) || + (self_join && bms_equal(uniqueRelInfo->outerrelids, outerrelids) && + uniqueRelInfo->self_join)) { if (extra_clauses) *extra_clauses = uniqueRelInfo->extra_clauses; @@ -1309,7 +1319,8 @@ innerrel_is_unique_ext(PlannerInfo *root, /* No cached information, so try to make the proof. */ if (is_innerrel_unique_for(root, joinrelids, outerrelids, innerrel, - jointype, restrictlist, &outer_exprs)) + jointype, restrictlist, + self_join ? &outer_exprs : NULL)) { /* * Cache the positive result for future probes, being sure to keep it @@ -1323,8 +1334,9 @@ innerrel_is_unique_ext(PlannerInfo *root, */ old_context = MemoryContextSwitchTo(root->planner_cxt); uniqueRelInfo = makeNode(UniqueRelInfo); - uniqueRelInfo->extra_clauses = outer_exprs; uniqueRelInfo->outerrelids = bms_copy(outerrelids); + uniqueRelInfo->self_join = self_join; + uniqueRelInfo->extra_clauses = outer_exprs; innerrel->unique_for_rels = lappend(innerrel->unique_for_rels, uniqueRelInfo); MemoryContextSwitchTo(old_context); diff --git a/src/include/nodes/pathnodes.h b/src/include/nodes/pathnodes.h index bb34cfb843f..b9713ec9aa6 100644 --- a/src/include/nodes/pathnodes.h +++ b/src/include/nodes/pathnodes.h @@ -3407,6 +3407,12 @@ typedef struct UniqueRelInfo */ Relids outerrelids; + /* + * The relation in consideration is unique when considering only clauses + * suitable for self-join (passed split_selfjoin_quals()). + */ + bool self_join; + /* * Additional clauses from a baserestrictinfo list that were used to prove * the uniqueness. We cache it for the self-join checking procedure: a diff --git a/src/test/regress/expected/join.out b/src/test/regress/expected/join.out index faad882a033..fb4a7ea66ef 100644 --- a/src/test/regress/expected/join.out +++ b/src/test/regress/expected/join.out @@ -6905,6 +6905,29 @@ where false; ---------- (0 rows) +-- Check that SJE does not mistakenly re-use knowledge of relation uniqueness +-- made with different set of quals +insert into emp1 values (2, 1); +explain (costs off) +select * from emp1 t1 where exists (select * from emp1 t2 + where t2.id = t1.code and t2.code > 0); + QUERY PLAN +--------------------------------------------- + Nested Loop + -> Seq Scan on emp1 t1 + -> Index Scan using emp1_pkey on emp1 t2 + Index Cond: (id = t1.code) + Filter: (code > 0) +(5 rows) + +select * from emp1 t1 where exists (select * from emp1 t2 + where t2.id = t1.code and t2.code > 0); + id | code +----+------ + 1 | 1 + 2 | 1 +(2 rows) + -- We can remove the join even if we find the join can't duplicate rows and -- the base quals of each side are different. In the following case we end up -- moving quals over to s1 to make it so it can't match any rows. diff --git a/src/test/regress/sql/join.sql b/src/test/regress/sql/join.sql index 4f3c51b6eba..2321dbf949f 100644 --- a/src/test/regress/sql/join.sql +++ b/src/test/regress/sql/join.sql @@ -2638,6 +2638,15 @@ select 1 from emp1 full join where false) s on true where false; +-- Check that SJE does not mistakenly re-use knowledge of relation uniqueness +-- made with different set of quals +insert into emp1 values (2, 1); +explain (costs off) +select * from emp1 t1 where exists (select * from emp1 t2 + where t2.id = t1.code and t2.code > 0); +select * from emp1 t1 where exists (select * from emp1 t2 + where t2.id = t1.code and t2.code > 0); + -- We can remove the join even if we find the join can't duplicate rows and -- the base quals of each side are different. In the following case we end up -- moving quals over to s1 to make it so it can't match any rows. -- 2.30.2