Fix misuse of RelOptInfo.unique_for_rels cache by SJE

author Alexander Korotkov <akorotkov@postgresql.org>

Mon, 8 Jan 2024 22:08:35 +0000 (00:08 +0200)

committer Alexander Korotkov <akorotkov@postgresql.org>

Mon, 8 Jan 2024 22:09:06 +0000 (00:09 +0200)
author Alexander Korotkov <akorotkov@postgresql.org>
Mon, 8 Jan 2024 22:08:35 +0000 (00:08 +0200)
committer Alexander Korotkov <akorotkov@postgresql.org>
Mon, 8 Jan 2024 22:09:06 +0000 (00:09 +0200)
diff --git a/src/backend/optimizer/plan/analyzejoins.c b/src/backend/optimizer/plan/analyzejoins.c

index fb01fbe357a94a69171cdd497225fbbddf7788d2..1fb17ee29f2619b2a0ff54d39c65cc330b2cb290 100644 (file)
--- a/src/backend/optimizer/plan/analyzejoins.c
+++ b/src/backend/optimizer/plan/analyzejoins.c
@@ -1247,8 +1247,10 @@ innerrel_is_unique(PlannerInfo *root,
  
  /*
   * innerrel_is_unique_ext
- *   Do the same as innerrel_is_unique(), but also return additional clauses
- *   from a baserestrictinfo list that were used to prove uniqueness.
+ *   Do the same as innerrel_is_unique(), but also set to '*extra_clauses'
+ *   additional clauses from a baserestrictinfo list that were used to prove
+ *   uniqueness.  A non NULL 'extra_clauses' indicates that we're checking
+ *   for self-join and correspondingly dealing with filtered clauses.
   */
  bool
  innerrel_is_unique_ext(PlannerInfo *root,
@@ -1264,6 +1266,7 @@ innerrel_is_unique_ext(PlannerInfo *root,
     ListCell   *lc;
     UniqueRelInfo *uniqueRelInfo;
     List       *outer_exprs = NIL;
+   bool        self_join = (extra_clauses != NULL);
  
     /* Certainly can't prove uniqueness when there are no joinclauses */
     if (restrictlist == NIL)
@@ -1278,16 +1281,23 @@ innerrel_is_unique_ext(PlannerInfo *root,
  
     /*
      * Query the cache to see if we've managed to prove that innerrel is
-    * unique for any subset of this outerrel.  We don't need an exact match,
-    * as extra outerrels can't make the innerrel any less unique (or more
-    * formally, the restrictlist for a join to a superset outerrel must be a
-    * superset of the conditions we successfully used before).
+    * unique for any subset of this outerrel.  For non self-join search, we
+    * don't need an exact match, as extra outerrels can't make the innerrel
+    * any less unique (or more formally, the restrictlist for a join to a
+    * superset outerrel must be a superset of the conditions we successfully
+    * used before). For self-join search, we require an exact match of
+    * outerrels, because we need extra clauses to be valid for our case.
+    * Also, for self-join checking we've filtered the clauses list.  Thus,
+    * for a self-join search, we can match only the result cached for another
+    * self-join check.
      */
     foreach(lc, innerrel->unique_for_rels)
     {
         uniqueRelInfo = (UniqueRelInfo *) lfirst(lc);
  
-       if (bms_is_subset(uniqueRelInfo->outerrelids, outerrelids))
+       if ((!self_join && bms_is_subset(uniqueRelInfo->outerrelids, outerrelids)) ||
+           (self_join && bms_equal(uniqueRelInfo->outerrelids, outerrelids) &&
+            uniqueRelInfo->self_join))
         {
             if (extra_clauses)
                 *extra_clauses = uniqueRelInfo->extra_clauses;
@@ -1309,7 +1319,8 @@ innerrel_is_unique_ext(PlannerInfo *root,
  
     /* No cached information, so try to make the proof. */
     if (is_innerrel_unique_for(root, joinrelids, outerrelids, innerrel,
-                              jointype, restrictlist, &outer_exprs))
+                              jointype, restrictlist,
+                              self_join ? &outer_exprs : NULL))
     {
         /*
          * Cache the positive result for future probes, being sure to keep it
@@ -1323,8 +1334,9 @@ innerrel_is_unique_ext(PlannerInfo *root,
          */
         old_context = MemoryContextSwitchTo(root->planner_cxt);
         uniqueRelInfo = makeNode(UniqueRelInfo);
-       uniqueRelInfo->extra_clauses = outer_exprs;
         uniqueRelInfo->outerrelids = bms_copy(outerrelids);
+       uniqueRelInfo->self_join = self_join;
+       uniqueRelInfo->extra_clauses = outer_exprs;
         innerrel->unique_for_rels = lappend(innerrel->unique_for_rels,
                                             uniqueRelInfo);
         MemoryContextSwitchTo(old_context);
diff --git a/src/include/nodes/pathnodes.h b/src/include/nodes/pathnodes.h

index bb34cfb843fe11fc23586ebd26980926319acc26..b9713ec9aa6650b56ce35993be55f70c7c2fdd7c 100644 (file)
--- a/src/include/nodes/pathnodes.h
+++ b/src/include/nodes/pathnodes.h
@@ -3407,6 +3407,12 @@ typedef struct UniqueRelInfo
      */
     Relids      outerrelids;
  
+   /*
+    * The relation in consideration is unique when considering only clauses
+    * suitable for self-join (passed split_selfjoin_quals()).
+    */
+   bool        self_join;
+
     /*
      * Additional clauses from a baserestrictinfo list that were used to prove
      * the uniqueness.   We cache it for the self-join checking procedure: a
diff --git a/src/test/regress/expected/join.out b/src/test/regress/expected/join.out

index faad882a0330b8cc6c68bc6a80d2af9cd63d2f10..fb4a7ea66efbba0dd7c1fcec8373caf31e7efdab 100644 (file)
--- a/src/test/regress/expected/join.out
+++ b/src/test/regress/expected/join.out
@@ -6905,6 +6905,29 @@ where false;
  ----------
  (0 rows)
  
+-- Check that SJE does not mistakenly re-use knowledge of relation uniqueness
+-- made with different set of quals
+insert into emp1 values (2, 1);
+explain (costs off)
+select * from emp1 t1 where exists (select * from emp1 t2
+                                    where t2.id = t1.code and t2.code > 0);
+                 QUERY PLAN                  
+---------------------------------------------
+ Nested Loop
+   ->  Seq Scan on emp1 t1
+   ->  Index Scan using emp1_pkey on emp1 t2
+         Index Cond: (id = t1.code)
+         Filter: (code > 0)
+(5 rows)
+
+select * from emp1 t1 where exists (select * from emp1 t2
+                                    where t2.id = t1.code and t2.code > 0);
+ id | code 
+----+------
+  1 |    1
+  2 |    1
+(2 rows)
+
  -- We can remove the join even if we find the join can't duplicate rows and
  -- the base quals of each side are different.  In the following case we end up
  -- moving quals over to s1 to make it so it can't match any rows.
diff --git a/src/test/regress/sql/join.sql b/src/test/regress/sql/join.sql

index 4f3c51b6eba18fa3d88c9c7018e4fa75cd0b15d3..2321dbf949fe8cce37c1ed6b670d68f46dce9521 100644 (file)
--- a/src/test/regress/sql/join.sql
+++ b/src/test/regress/sql/join.sql
@@ -2638,6 +2638,15 @@ select 1 from emp1 full join
      where false) s on true
  where false;
  
+-- Check that SJE does not mistakenly re-use knowledge of relation uniqueness
+-- made with different set of quals
+insert into emp1 values (2, 1);
+explain (costs off)
+select * from emp1 t1 where exists (select * from emp1 t2
+                                    where t2.id = t1.code and t2.code > 0);
+select * from emp1 t1 where exists (select * from emp1 t2
+                                    where t2.id = t1.code and t2.code > 0);
+
  -- We can remove the join even if we find the join can't duplicate rows and
  -- the base quals of each side are different.  In the following case we end up
  -- moving quals over to s1 to make it so it can't match any rows.
author	Alexander Korotkov <akorotkov@postgresql.org>
	Mon, 8 Jan 2024 22:08:35 +0000 (00:08 +0200)
committer	Alexander Korotkov <akorotkov@postgresql.org>
	Mon, 8 Jan 2024 22:09:06 +0000 (00:09 +0200)
src/backend/optimizer/plan/analyzejoins.c		patch \| blob \| blame \| history
src/include/nodes/pathnodes.h		patch \| blob \| blame \| history
src/test/regress/expected/join.out		patch \| blob \| blame \| history
src/test/regress/sql/join.sql		patch \| blob \| blame \| history