Implement Self-Join Elimination

author Alexander Korotkov <akorotkov@postgresql.org>

Wed, 12 Feb 2025 22:56:03 +0000 (00:56 +0200)

committer Alexander Korotkov <akorotkov@postgresql.org>

Mon, 17 Feb 2025 10:44:12 +0000 (12:44 +0200)
author Alexander Korotkov <akorotkov@postgresql.org>
Wed, 12 Feb 2025 22:56:03 +0000 (00:56 +0200)
committer Alexander Korotkov <akorotkov@postgresql.org>
Mon, 17 Feb 2025 10:44:12 +0000 (12:44 +0200)
diff --git a/doc/src/sgml/config.sgml b/doc/src/sgml/config.sgml

index 60829b79d83b0cd2ed6e3a2c9fa2fd9a77bf79e2..336630ce417e3292f849bd7df25d20072f333ffe 100644 (file)
--- a/doc/src/sgml/config.sgml
+++ b/doc/src/sgml/config.sgml
@@ -5544,6 +5544,22 @@ ANY <replaceable class="parameter">num_sync</replaceable> ( <replaceable class="
        </listitem>
       </varlistentry>
  
+     <varlistentry id="guc-enable_self_join_elimination" xreflabel="enable_self_join_elimination">
+      <term><varname>enable_self_join_elimination</varname> (<type>boolean</type>)
+      <indexterm>
+       <primary><varname>enable_self_join_elimination</varname> configuration parameter</primary>
+      </indexterm>
+      </term>
+      <listitem>
+       <para>
+        Enables or disables the query planner's optimization which analyses
+        the query tree and replaces self joins with semantically equivalent
+        single scans.  Takes into consideration only plain tables.
+        The default is <literal>on</literal>.
+       </para>
+      </listitem>
+     </varlistentry>
+
       <varlistentry id="guc-enable-seqscan" xreflabel="enable_seqscan">
        <term><varname>enable_seqscan</varname> (<type>boolean</type>)
        <indexterm>
diff --git a/src/backend/optimizer/path/equivclass.c b/src/backend/optimizer/path/equivclass.c

index 7cafaca33c5cb1efefd96de5ec6147767d8af916..0f9ecf5ee8b2d80a3bb3aa21dd51e6df5e42dcdd 100644 (file)
--- a/src/backend/optimizer/path/equivclass.c
+++ b/src/backend/optimizer/path/equivclass.c
@@ -852,7 +852,8 @@ find_computable_ec_member(PlannerInfo *root,
     exprvars = pull_var_clause((Node *) exprs,
                                PVC_INCLUDE_AGGREGATES |
                                PVC_INCLUDE_WINDOWFUNCS |
-                              PVC_INCLUDE_PLACEHOLDERS);
+                              PVC_INCLUDE_PLACEHOLDERS |
+                              PVC_INCLUDE_CONVERTROWTYPES);
  
     foreach(lc, ec->ec_members)
     {
diff --git a/src/backend/optimizer/path/indxpath.c b/src/backend/optimizer/path/indxpath.c

index 6e2051efc65f8a7afd59dd74148c4a2003cf7806..a43ca16d683583e4d615a0a04785660c51d05562 100644 (file)
--- a/src/backend/optimizer/path/indxpath.c
+++ b/src/backend/optimizer/path/indxpath.c
@@ -4162,6 +4162,22 @@ bool
  relation_has_unique_index_for(PlannerInfo *root, RelOptInfo *rel,
                               List *restrictlist,
                               List *exprlist, List *oprlist)
+{
+   return relation_has_unique_index_ext(root, rel, restrictlist,
+                                        exprlist, oprlist, NULL);
+}
+
+/*
+ * relation_has_unique_index_ext
+ *   Same as relation_has_unique_index_for(), but supports extra_clauses
+ *   parameter.  If extra_clauses isn't NULL, return baserestrictinfo clauses
+ *   which were used to derive uniqueness.
+ */
+bool
+relation_has_unique_index_ext(PlannerInfo *root, RelOptInfo *rel,
+                             List *restrictlist,
+                             List *exprlist, List *oprlist,
+                             List **extra_clauses)
  {
     ListCell   *ic;
  
@@ -4217,6 +4233,7 @@ relation_has_unique_index_for(PlannerInfo *root, RelOptInfo *rel,
     {
         IndexOptInfo *ind = (IndexOptInfo *) lfirst(ic);
         int         c;
+       List       *exprs = NIL;
  
         /*
          * If the index is not unique, or not immediately enforced, or if it's
@@ -4268,6 +4285,24 @@ relation_has_unique_index_for(PlannerInfo *root, RelOptInfo *rel,
                 if (match_index_to_operand(rexpr, c, ind))
                 {
                     matched = true; /* column is unique */
+
+                   if (bms_membership(rinfo->clause_relids) == BMS_SINGLETON)
+                   {
+                       MemoryContext oldMemCtx =
+                           MemoryContextSwitchTo(root->planner_cxt);
+
+                       /*
+                        * Add filter clause into a list allowing caller to
+                        * know if uniqueness have made not only by join
+                        * clauses.
+                        */
+                       Assert(bms_is_empty(rinfo->left_relids) ||
+                              bms_is_empty(rinfo->right_relids));
+                       if (extra_clauses)
+                           exprs = lappend(exprs, rinfo);
+                       MemoryContextSwitchTo(oldMemCtx);
+                   }
+
                     break;
                 }
             }
@@ -4310,7 +4345,11 @@ relation_has_unique_index_for(PlannerInfo *root, RelOptInfo *rel,
  
         /* Matched all key columns of this index? */
         if (c == ind->nkeycolumns)
+       {
+           if (extra_clauses)
+               *extra_clauses = exprs;
             return true;
+       }
     }
  
     return false;
diff --git a/src/backend/optimizer/plan/analyzejoins.c b/src/backend/optimizer/plan/analyzejoins.c

index b33fc671775a90abc26fd7c92176d751a93e00cd..3aa04d0d4e11012afdd7a6c7ff0f1528e5ddb564 100644 (file)
--- a/src/backend/optimizer/plan/analyzejoins.c
+++ b/src/backend/optimizer/plan/analyzejoins.c
@@ -22,6 +22,7 @@
   */
  #include "postgres.h"
  
+#include "catalog/pg_class.h"
  #include "nodes/nodeFuncs.h"
  #include "optimizer/joininfo.h"
  #include "optimizer/optimizer.h"
@@ -30,27 +31,48 @@
  #include "optimizer/placeholder.h"
  #include "optimizer/planmain.h"
  #include "optimizer/restrictinfo.h"
+#include "rewrite/rewriteManip.h"
  #include "utils/lsyscache.h"
  
+/*
+ * Utility structure.  A sorting procedure is needed to simplify the search
+ * of SJE-candidate baserels referencing the same database relation.  Having
+ * collected all baserels from the query jointree, the planner sorts them
+ * according to the reloid value, groups them with the next pass and attempts
+ * to remove self-joins.
+ *
+ * Preliminary sorting prevents quadratic behavior that can be harmful in the
+ * case of numerous joins.
+ */
+typedef struct
+{
+   int         relid;
+   Oid         reloid;
+} SelfJoinCandidate;
+
+bool       enable_self_join_elimination;
+
  /* local functions */
  static bool join_is_removable(PlannerInfo *root, SpecialJoinInfo *sjinfo);
-static void remove_rel_from_query(PlannerInfo *root, int relid,
-                                 SpecialJoinInfo *sjinfo);
+static void remove_leftjoinrel_from_query(PlannerInfo *root, int relid,
+                                         SpecialJoinInfo *sjinfo);
  static void remove_rel_from_restrictinfo(RestrictInfo *rinfo,
                                          int relid, int ojrelid);
  static void remove_rel_from_eclass(EquivalenceClass *ec,
-                                  int relid, int ojrelid);
+                                  int relid, int ojrelid, int subst);
  static List *remove_rel_from_joinlist(List *joinlist, int relid, int *nremoved);
  static bool rel_supports_distinctness(PlannerInfo *root, RelOptInfo *rel);
  static bool rel_is_distinct_for(PlannerInfo *root, RelOptInfo *rel,
-                               List *clause_list);
+                               List *clause_list, List **extra_clauses);
  static Oid distinct_col_search(int colno, List *colnos, List *opids);
  static bool is_innerrel_unique_for(PlannerInfo *root,
                                    Relids joinrelids,
                                    Relids outerrelids,
                                    RelOptInfo *innerrel,
                                    JoinType jointype,
-                                  List *restrictlist);
+                                  List *restrictlist,
+                                  List **extra_clauses);
+static int self_join_candidates_cmp(const void *a, const void *b);
  
  
  /*
@@ -88,7 +110,7 @@ restart:
          */
         innerrelid = bms_singleton_member(sjinfo->min_righthand);
  
-       remove_rel_from_query(root, innerrelid, sjinfo);
+       remove_leftjoinrel_from_query(root, innerrelid, sjinfo);
  
         /* We verify that exactly one reference gets removed from joinlist */
         nremoved = 0;
@@ -276,7 +298,7 @@ join_is_removable(PlannerInfo *root, SpecialJoinInfo *sjinfo)
      * Now that we have the relevant equality join clauses, try to prove the
      * innerrel distinct.
      */
-   if (rel_is_distinct_for(root, innerrel, clause_list))
+   if (rel_is_distinct_for(root, innerrel, clause_list, NULL))
         return true;
  
     /*
@@ -288,36 +310,31 @@ join_is_removable(PlannerInfo *root, SpecialJoinInfo *sjinfo)
  
  
  /*
- * Remove the target relid and references to the target join from the
+ * Remove the target rel->relid and references to the target join from the
   * planner's data structures, having determined that there is no need
- * to include them in the query.
+ * to include them in the query. Optionally replace them with subst if subst
+ * is non-negative.
   *
- * We are not terribly thorough here.  We only bother to update parts of
- * the planner's data structures that will actually be consulted later.
+ * This function updates only parts needed for both left-join removal and
+ * self-join removal.
   */
  static void
-remove_rel_from_query(PlannerInfo *root, int relid, SpecialJoinInfo *sjinfo)
+remove_rel_from_query(PlannerInfo *root, RelOptInfo *rel,
+                     int subst, SpecialJoinInfo *sjinfo,
+                     Relids joinrelids)
  {
-   RelOptInfo *rel = find_base_rel(root, relid);
-   int         ojrelid = sjinfo->ojrelid;
-   Relids      joinrelids;
-   Relids      join_plus_commute;
-   List       *joininfos;
+   int         relid = rel->relid;
+   int         ojrelid = (sjinfo != NULL) ? sjinfo->ojrelid : -1;
     Index       rti;
     ListCell   *l;
  
-   /* Compute the relid set for the join we are considering */
-   joinrelids = bms_union(sjinfo->min_lefthand, sjinfo->min_righthand);
-   Assert(ojrelid != 0);
-   joinrelids = bms_add_member(joinrelids, ojrelid);
-
     /*
      * Update all_baserels and related relid sets.
      */
-   root->all_baserels = bms_del_member(root->all_baserels, relid);
-   root->outer_join_rels = bms_del_member(root->outer_join_rels, ojrelid);
-   root->all_query_rels = bms_del_member(root->all_query_rels, relid);
-   root->all_query_rels = bms_del_member(root->all_query_rels, ojrelid);
+   root->all_baserels = adjust_relid_set(root->all_baserels, relid, subst);
+   root->outer_join_rels = adjust_relid_set(root->outer_join_rels, ojrelid, subst);
+   root->all_query_rels = adjust_relid_set(root->all_query_rels, relid, subst);
+   root->all_query_rels = adjust_relid_set(root->all_query_rels, ojrelid, subst);
  
     /*
      * Likewise remove references from SpecialJoinInfo data structures.
@@ -341,20 +358,33 @@ remove_rel_from_query(PlannerInfo *root, int relid, SpecialJoinInfo *sjinfo)
         sjinf->min_righthand = bms_copy(sjinf->min_righthand);
         sjinf->syn_lefthand = bms_copy(sjinf->syn_lefthand);
         sjinf->syn_righthand = bms_copy(sjinf->syn_righthand);
-       /* Now remove relid and ojrelid bits from the sets: */
-       sjinf->min_lefthand = bms_del_member(sjinf->min_lefthand, relid);
-       sjinf->min_righthand = bms_del_member(sjinf->min_righthand, relid);
-       sjinf->syn_lefthand = bms_del_member(sjinf->syn_lefthand, relid);
-       sjinf->syn_righthand = bms_del_member(sjinf->syn_righthand, relid);
-       sjinf->min_lefthand = bms_del_member(sjinf->min_lefthand, ojrelid);
-       sjinf->min_righthand = bms_del_member(sjinf->min_righthand, ojrelid);
-       sjinf->syn_lefthand = bms_del_member(sjinf->syn_lefthand, ojrelid);
-       sjinf->syn_righthand = bms_del_member(sjinf->syn_righthand, ojrelid);
-       /* relid cannot appear in these fields, but ojrelid can: */
-       sjinf->commute_above_l = bms_del_member(sjinf->commute_above_l, ojrelid);
-       sjinf->commute_above_r = bms_del_member(sjinf->commute_above_r, ojrelid);
-       sjinf->commute_below_l = bms_del_member(sjinf->commute_below_l, ojrelid);
-       sjinf->commute_below_r = bms_del_member(sjinf->commute_below_r, ojrelid);
+       /* Now remove relid from the sets: */
+       sjinf->min_lefthand = adjust_relid_set(sjinf->min_lefthand, relid, subst);
+       sjinf->min_righthand = adjust_relid_set(sjinf->min_righthand, relid, subst);
+       sjinf->syn_lefthand = adjust_relid_set(sjinf->syn_lefthand, relid, subst);
+       sjinf->syn_righthand = adjust_relid_set(sjinf->syn_righthand, relid, subst);
+
+       if (sjinfo != NULL)
+       {
+           Assert(subst <= 0 && ojrelid > 0);
+
+           /* Remove ojrelid bits from the sets: */
+           sjinf->min_lefthand = bms_del_member(sjinf->min_lefthand, ojrelid);
+           sjinf->min_righthand = bms_del_member(sjinf->min_righthand, ojrelid);
+           sjinf->syn_lefthand = bms_del_member(sjinf->syn_lefthand, ojrelid);
+           sjinf->syn_righthand = bms_del_member(sjinf->syn_righthand, ojrelid);
+           /* relid cannot appear in these fields, but ojrelid can: */
+           sjinf->commute_above_l = bms_del_member(sjinf->commute_above_l, ojrelid);
+           sjinf->commute_above_r = bms_del_member(sjinf->commute_above_r, ojrelid);
+           sjinf->commute_below_l = bms_del_member(sjinf->commute_below_l, ojrelid);
+           sjinf->commute_below_r = bms_del_member(sjinf->commute_below_r, ojrelid);
+       }
+       else
+       {
+           Assert(subst > 0 && ojrelid == -1);
+
+           ChangeVarNodes((Node *) sjinf->semi_rhs_exprs, relid, subst, 0);
+       }
     }
  
     /*
@@ -375,10 +405,10 @@ remove_rel_from_query(PlannerInfo *root, int relid, SpecialJoinInfo *sjinfo)
     {
         PlaceHolderInfo *phinfo = (PlaceHolderInfo *) lfirst(l);
  
-       Assert(!bms_is_member(relid, phinfo->ph_lateral));
+       Assert(sjinfo == NULL || !bms_is_member(relid, phinfo->ph_lateral));
         if (bms_is_subset(phinfo->ph_needed, joinrelids) &&
             bms_is_member(relid, phinfo->ph_eval_at) &&
-           !bms_is_member(ojrelid, phinfo->ph_eval_at))
+           (sjinfo == NULL || !bms_is_member(ojrelid, phinfo->ph_eval_at)))
         {
             root->placeholder_list = foreach_delete_current(root->placeholder_list,
                                                             l);
@@ -388,21 +418,112 @@ remove_rel_from_query(PlannerInfo *root, int relid, SpecialJoinInfo *sjinfo)
         {
             PlaceHolderVar *phv = phinfo->ph_var;
  
-           phinfo->ph_eval_at = bms_del_member(phinfo->ph_eval_at, relid);
-           phinfo->ph_eval_at = bms_del_member(phinfo->ph_eval_at, ojrelid);
+           phinfo->ph_eval_at = adjust_relid_set(phinfo->ph_eval_at, relid, subst);
+           phinfo->ph_eval_at = adjust_relid_set(phinfo->ph_eval_at, ojrelid, subst);
             Assert(!bms_is_empty(phinfo->ph_eval_at));  /* checked previously */
             /* Reduce ph_needed to contain only "relation 0"; see below */
             if (bms_is_member(0, phinfo->ph_needed))
                 phinfo->ph_needed = bms_make_singleton(0);
             else
                 phinfo->ph_needed = NULL;
-           phv->phrels = bms_del_member(phv->phrels, relid);
-           phv->phrels = bms_del_member(phv->phrels, ojrelid);
+
+           phinfo->ph_lateral = adjust_relid_set(phinfo->ph_lateral, relid, subst);
+
+           /*
+            * ph_lateral might contain rels mentioned in ph_eval_at after the
+            * replacement, remove them.
+            */
+           phinfo->ph_lateral = bms_difference(phinfo->ph_lateral, phinfo->ph_eval_at);
+           /* ph_lateral might or might not be empty */
+
+           phv->phrels = adjust_relid_set(phv->phrels, relid, subst);
+           phv->phrels = adjust_relid_set(phv->phrels, ojrelid, subst);
             Assert(!bms_is_empty(phv->phrels));
+
+           ChangeVarNodes((Node *) phv->phexpr, relid, subst, 0);
+
             Assert(phv->phnullingrels == NULL); /* no need to adjust */
         }
     }
  
+   /*
+    * Likewise remove references from EquivalenceClasses.
+    */
+   foreach(l, root->eq_classes)
+   {
+       EquivalenceClass *ec = (EquivalenceClass *) lfirst(l);
+
+       if (bms_is_member(relid, ec->ec_relids) ||
+           (sjinfo == NULL || bms_is_member(ojrelid, ec->ec_relids)))
+           remove_rel_from_eclass(ec, relid, ojrelid, subst);
+   }
+
+   /*
+    * Finally, we must recompute per-Var attr_needed and per-PlaceHolderVar
+    * ph_needed relid sets.  These have to be known accurately, else we may
+    * fail to remove other now-removable outer joins.  And our removal of the
+    * join clause(s) for this outer join may mean that Vars that were
+    * formerly needed no longer are.  So we have to do this honestly by
+    * repeating the construction of those relid sets.  We can cheat to one
+    * small extent: we can avoid re-examining the targetlist and HAVING qual
+    * by preserving "relation 0" bits from the existing relid sets.  This is
+    * safe because we'd never remove such references.
+    *
+    * So, start by removing all other bits from attr_needed sets and
+    * lateral_vars lists.  (We already did this above for ph_needed.)
+    */
+   for (rti = 1; rti < root->simple_rel_array_size; rti++)
+   {
+       RelOptInfo *otherrel = root->simple_rel_array[rti];
+       int         attroff;
+
+       /* there may be empty slots corresponding to non-baserel RTEs */
+       if (otherrel == NULL)
+           continue;
+
+       Assert(otherrel->relid == rti); /* sanity check on array */
+
+       for (attroff = otherrel->max_attr - otherrel->min_attr;
+            attroff >= 0;
+            attroff--)
+       {
+           if (bms_is_member(0, otherrel->attr_needed[attroff]))
+               otherrel->attr_needed[attroff] = bms_make_singleton(0);
+           else
+               otherrel->attr_needed[attroff] = NULL;
+       }
+
+       if (subst > 0)
+           ChangeVarNodes((Node *) otherrel->lateral_vars, relid, subst, 0);
+   }
+}
+
+/*
+ * Remove the target relid and references to the target join from the
+ * planner's data structures, having determined that there is no need
+ * to include them in the query.
+ *
+ * We are not terribly thorough here.  We only bother to update parts of
+ * the planner's data structures that will actually be consulted later.
+ */
+static void
+remove_leftjoinrel_from_query(PlannerInfo *root, int relid,
+                             SpecialJoinInfo *sjinfo)
+{
+   RelOptInfo *rel = find_base_rel(root, relid);
+   int         ojrelid = sjinfo->ojrelid;
+   Relids      joinrelids;
+   Relids      join_plus_commute;
+   List       *joininfos;
+   ListCell   *l;
+
+   /* Compute the relid set for the join we are considering */
+   joinrelids = bms_union(sjinfo->min_lefthand, sjinfo->min_righthand);
+   Assert(ojrelid != 0);
+   joinrelids = bms_add_member(joinrelids, ojrelid);
+
+   remove_rel_from_query(root, rel, -1, sjinfo, joinrelids);
+
     /*
      * Remove any joinquals referencing the rel from the joininfo lists.
      *
@@ -465,18 +586,6 @@ remove_rel_from_query(PlannerInfo *root, int relid, SpecialJoinInfo *sjinfo)
         }
     }
  
-   /*
-    * Likewise remove references from EquivalenceClasses.
-    */
-   foreach(l, root->eq_classes)
-   {
-       EquivalenceClass *ec = (EquivalenceClass *) lfirst(l);
-
-       if (bms_is_member(relid, ec->ec_relids) ||
-           bms_is_member(ojrelid, ec->ec_relids))
-           remove_rel_from_eclass(ec, relid, ojrelid);
-   }
-
     /*
      * There may be references to the rel in root->fkey_list, but if so,
      * match_foreign_keys_to_quals() will get rid of them.
@@ -492,42 +601,6 @@ remove_rel_from_query(PlannerInfo *root, int relid, SpecialJoinInfo *sjinfo)
     /* And nuke the RelOptInfo, just in case there's another access path */
     pfree(rel);
  
-   /*
-    * Finally, we must recompute per-Var attr_needed and per-PlaceHolderVar
-    * ph_needed relid sets.  These have to be known accurately, else we may
-    * fail to remove other now-removable outer joins.  And our removal of the
-    * join clause(s) for this outer join may mean that Vars that were
-    * formerly needed no longer are.  So we have to do this honestly by
-    * repeating the construction of those relid sets.  We can cheat to one
-    * small extent: we can avoid re-examining the targetlist and HAVING qual
-    * by preserving "relation 0" bits from the existing relid sets.  This is
-    * safe because we'd never remove such references.
-    *
-    * So, start by removing all other bits from attr_needed sets.  (We
-    * already did this above for ph_needed.)
-    */
-   for (rti = 1; rti < root->simple_rel_array_size; rti++)
-   {
-       RelOptInfo *otherrel = root->simple_rel_array[rti];
-       int         attroff;
-
-       /* there may be empty slots corresponding to non-baserel RTEs */
-       if (otherrel == NULL)
-           continue;
-
-       Assert(otherrel->relid == rti); /* sanity check on array */
-
-       for (attroff = otherrel->max_attr - otherrel->min_attr;
-            attroff >= 0;
-            attroff--)
-       {
-           if (bms_is_member(0, otherrel->attr_needed[attroff]))
-               otherrel->attr_needed[attroff] = bms_make_singleton(0);
-           else
-               otherrel->attr_needed[attroff] = NULL;
-       }
-   }
-
     /*
      * Now repeat construction of attr_needed bits coming from all other
      * sources.
@@ -607,13 +680,13 @@ remove_rel_from_restrictinfo(RestrictInfo *rinfo, int relid, int ojrelid)
   * level(s).
   */
  static void
-remove_rel_from_eclass(EquivalenceClass *ec, int relid, int ojrelid)
+remove_rel_from_eclass(EquivalenceClass *ec, int relid, int ojrelid, int subst)
  {
     ListCell   *lc;
  
     /* Fix up the EC's overall relids */
-   ec->ec_relids = bms_del_member(ec->ec_relids, relid);
-   ec->ec_relids = bms_del_member(ec->ec_relids, ojrelid);
+   ec->ec_relids = adjust_relid_set(ec->ec_relids, relid, subst);
+   ec->ec_relids = adjust_relid_set(ec->ec_relids, ojrelid, subst);
  
     /*
      * Fix up the member expressions.  Any non-const member that ends with
@@ -625,11 +698,11 @@ remove_rel_from_eclass(EquivalenceClass *ec, int relid, int ojrelid)
         EquivalenceMember *cur_em = (EquivalenceMember *) lfirst(lc);
  
         if (bms_is_member(relid, cur_em->em_relids) ||
-           bms_is_member(ojrelid, cur_em->em_relids))
+           (ojrelid != -1 && bms_is_member(ojrelid, cur_em->em_relids)))
         {
             Assert(!cur_em->em_is_const);
-           cur_em->em_relids = bms_del_member(cur_em->em_relids, relid);
-           cur_em->em_relids = bms_del_member(cur_em->em_relids, ojrelid);
+           cur_em->em_relids = adjust_relid_set(cur_em->em_relids, relid, subst);
+           cur_em->em_relids = adjust_relid_set(cur_em->em_relids, ojrelid, subst);
             if (bms_is_empty(cur_em->em_relids))
                 ec->ec_members = foreach_delete_current(ec->ec_members, lc);
         }
@@ -640,7 +713,10 @@ remove_rel_from_eclass(EquivalenceClass *ec, int relid, int ojrelid)
     {
         RestrictInfo *rinfo = (RestrictInfo *) lfirst(lc);
  
-       remove_rel_from_restrictinfo(rinfo, relid, ojrelid);
+       if (ojrelid == -1)
+           ChangeVarNodes((Node *) rinfo, relid, subst, 0);
+       else
+           remove_rel_from_restrictinfo(rinfo, relid, ojrelid);
     }
  
     /*
@@ -844,9 +920,15 @@ rel_supports_distinctness(PlannerInfo *root, RelOptInfo *rel)
   * Note that the passed-in clause_list may be destructively modified!  This
   * is OK for current uses, because the clause_list is built by the caller for
   * the sole purpose of passing to this function.
+ *
+ * (*extra_clauses) to be set to the right sides of baserestrictinfo clauses,
+ * looking like "x = const" if distinctness is derived from such clauses, not
+ * joininfo clauses.  Pass NULL to the extra_clauses if this value is not
+ * needed.
   */
  static bool
-rel_is_distinct_for(PlannerInfo *root, RelOptInfo *rel, List *clause_list)
+rel_is_distinct_for(PlannerInfo *root, RelOptInfo *rel, List *clause_list,
+                   List **extra_clauses)
  {
     /*
      * We could skip a couple of tests here if we assume all callers checked
@@ -859,10 +941,11 @@ rel_is_distinct_for(PlannerInfo *root, RelOptInfo *rel, List *clause_list)
     {
         /*
          * Examine the indexes to see if we have a matching unique index.
-        * relation_has_unique_index_for automatically adds any usable
+        * relation_has_unique_index_ext automatically adds any usable
          * restriction clauses for the rel, so we needn't do that here.
          */
-       if (relation_has_unique_index_for(root, rel, clause_list, NIL, NIL))
+       if (relation_has_unique_index_ext(root, rel, clause_list, NIL, NIL,
+                                         extra_clauses))
             return true;
     }
     else if (rel->rtekind == RTE_SUBQUERY)
@@ -1176,9 +1259,35 @@ innerrel_is_unique(PlannerInfo *root,
                    JoinType jointype,
                    List *restrictlist,
                    bool force_cache)
+{
+   return innerrel_is_unique_ext(root, joinrelids, outerrelids, innerrel,
+                                 jointype, restrictlist, force_cache, NULL);
+}
+
+/*
+ * innerrel_is_unique_ext
+ *   Do the same as innerrel_is_unique(), but also set to (*extra_clauses)
+ *   additional clauses from a baserestrictinfo list used to prove the
+ *   uniqueness.
+ *
+ * A non-NULL extra_clauses indicates that we're checking for self-join and
+ * correspondingly dealing with filtered clauses.
+ */
+bool
+innerrel_is_unique_ext(PlannerInfo *root,
+                      Relids joinrelids,
+                      Relids outerrelids,
+                      RelOptInfo *innerrel,
+                      JoinType jointype,
+                      List *restrictlist,
+                      bool force_cache,
+                      List **extra_clauses)
  {
     MemoryContext old_context;
     ListCell   *lc;
+   UniqueRelInfo *uniqueRelInfo;
+   List       *outer_exprs = NIL;
+   bool        self_join = (extra_clauses != NULL);
  
     /* Certainly can't prove uniqueness when there are no joinclauses */
     if (restrictlist == NIL)
@@ -1193,17 +1302,28 @@ innerrel_is_unique(PlannerInfo *root,
  
     /*
      * Query the cache to see if we've managed to prove that innerrel is
-    * unique for any subset of this outerrel.  We don't need an exact match,
-    * as extra outerrels can't make the innerrel any less unique (or more
-    * formally, the restrictlist for a join to a superset outerrel must be a
-    * superset of the conditions we successfully used before).
+    * unique for any subset of this outerrel.  For non-self-join search, we
+    * don't need an exact match, as extra outerrels can't make the innerrel
+    * any less unique (or more formally, the restrictlist for a join to a
+    * superset outerrel must be a superset of the conditions we successfully
+    * used before). For self-join search, we require an exact match of
+    * outerrels because we need extra clauses to be valid for our case. Also,
+    * for self-join checking we've filtered the clauses list.  Thus, we can
+    * match only the result cached for a self-join search for another
+    * self-join check.
      */
     foreach(lc, innerrel->unique_for_rels)
     {
-       Relids      unique_for_rels = (Relids) lfirst(lc);
+       uniqueRelInfo = (UniqueRelInfo *) lfirst(lc);
  
-       if (bms_is_subset(unique_for_rels, outerrelids))
+       if ((!self_join && bms_is_subset(uniqueRelInfo->outerrelids, outerrelids)) ||
+           (self_join && bms_equal(uniqueRelInfo->outerrelids, outerrelids) &&
+            uniqueRelInfo->self_join))
+       {
+           if (extra_clauses)
+               *extra_clauses = uniqueRelInfo->extra_clauses;
             return true;        /* Success! */
+       }
     }
  
     /*
@@ -1220,7 +1340,8 @@ innerrel_is_unique(PlannerInfo *root,
  
     /* No cached information, so try to make the proof. */
     if (is_innerrel_unique_for(root, joinrelids, outerrelids, innerrel,
-                              jointype, restrictlist))
+                              jointype, restrictlist,
+                              self_join ? &outer_exprs : NULL))
     {
         /*
          * Cache the positive result for future probes, being sure to keep it
@@ -1233,10 +1354,16 @@ innerrel_is_unique(PlannerInfo *root,
          * supersets of them anyway.
          */
         old_context = MemoryContextSwitchTo(root->planner_cxt);
+       uniqueRelInfo = makeNode(UniqueRelInfo);
+       uniqueRelInfo->outerrelids = bms_copy(outerrelids);
+       uniqueRelInfo->self_join = self_join;
+       uniqueRelInfo->extra_clauses = outer_exprs;
         innerrel->unique_for_rels = lappend(innerrel->unique_for_rels,
-                                           bms_copy(outerrelids));
+                                           uniqueRelInfo);
         MemoryContextSwitchTo(old_context);
  
+       if (extra_clauses)
+           *extra_clauses = outer_exprs;
         return true;            /* Success! */
     }
     else
@@ -1282,7 +1409,8 @@ is_innerrel_unique_for(PlannerInfo *root,
                        Relids outerrelids,
                        RelOptInfo *innerrel,
                        JoinType jointype,
-                      List *restrictlist)
+                      List *restrictlist,
+                      List **extra_clauses)
  {
     List       *clause_list = NIL;
     ListCell   *lc;
@@ -1312,17 +1440,895 @@ is_innerrel_unique_for(PlannerInfo *root,
             continue;           /* not mergejoinable */
  
         /*
-        * Check if clause has the form "outer op inner" or "inner op outer",
-        * and if so mark which side is inner.
+        * Check if the clause has the form "outer op inner" or "inner op
+        * outer", and if so mark which side is inner.
          */
         if (!clause_sides_match_join(restrictinfo, outerrelids,
                                      innerrel->relids))
             continue;           /* no good for these input relations */
  
-       /* OK, add to list */
+       /* OK, add to the list */
         clause_list = lappend(clause_list, restrictinfo);
     }
  
     /* Let rel_is_distinct_for() do the hard work */
-   return rel_is_distinct_for(root, innerrel, clause_list);
+   return rel_is_distinct_for(root, innerrel, clause_list, extra_clauses);
+}
+
+/*
+ * Update EC members to point to the remaining relation instead of the removed
+ * one, removing duplicates.
+ *
+ * Restriction clauses for base relations are already distributed to
+ * the respective baserestrictinfo lists (see
+ * generate_implied_equalities_for_column). The above code has already processed
+ * this list and updated these clauses to reference the remaining
+ * relation, so that we can skip them here based on their relids.
+ *
+ * Likewise, we have already processed the join clauses that join the
+ * removed relation to the remaining one.
+ *
+ * Finally, there might be join clauses tying the removed relation to
+ * some third relation.  We can't just delete the source clauses and
+ * regenerate them from the EC because the corresponding equality
+ * operators might be missing (see the handling of ec_broken).
+ * Therefore, we will update the references in the source clauses.
+ *
+ * Derived clauses can be generated again, so it is simpler just to
+ * delete them.
+ */
+static void
+update_eclasses(EquivalenceClass *ec, int from, int to)
+{
+   List       *new_members = NIL;
+   List       *new_sources = NIL;
+
+   foreach_node(EquivalenceMember, em, ec->ec_members)
+   {
+       bool        is_redundant = false;
+
+       if (!bms_is_member(from, em->em_relids))
+       {
+           new_members = lappend(new_members, em);
+           continue;
+       }
+
+       em->em_relids = adjust_relid_set(em->em_relids, from, to);
+       em->em_jdomain->jd_relids = adjust_relid_set(em->em_jdomain->jd_relids, from, to);
+
+       /* We only process inner joins */
+       ChangeVarNodes((Node *) em->em_expr, from, to, 0);
+
+       foreach_node(EquivalenceMember, other, new_members)
+       {
+           if (!equal(em->em_relids, other->em_relids))
+               continue;
+
+           if (equal(em->em_expr, other->em_expr))
+           {
+               is_redundant = true;
+               break;
+           }
+       }
+
+       if (!is_redundant)
+           new_members = lappend(new_members, em);
+   }
+
+   list_free(ec->ec_members);
+   ec->ec_members = new_members;
+
+   list_free(ec->ec_derives);
+   ec->ec_derives = NULL;
+
+   /* Update EC source expressions */
+   foreach_node(RestrictInfo, rinfo, ec->ec_sources)
+   {
+       bool        is_redundant = false;
+
+       if (!bms_is_member(from, rinfo->required_relids))
+       {
+           new_sources = lappend(new_sources, rinfo);
+           continue;
+       }
+
+       ChangeVarNodes((Node *) rinfo, from, to, 0);
+
+       /*
+        * After switching the clause to the remaining relation, check it for
+        * redundancy with existing ones. We don't have to check for
+        * redundancy with derived clauses, because we've just deleted them.
+        */
+       foreach_node(RestrictInfo, other, new_sources)
+       {
+           if (!equal(rinfo->clause_relids, other->clause_relids))
+               continue;
+
+           if (equal(rinfo->clause, other->clause))
+           {
+               is_redundant = true;
+               break;
+           }
+       }
+
+       if (!is_redundant)
+           new_sources = lappend(new_sources, rinfo);
+   }
+
+   list_free(ec->ec_sources);
+   ec->ec_sources = new_sources;
+   ec->ec_relids = adjust_relid_set(ec->ec_relids, from, to);
+}
+
+/*
+ * "Logically" compares two RestrictInfo's ignoring the 'rinfo_serial' field,
+ * which makes almost every RestrictInfo unique.  This type of comparison is
+ * useful when removing duplicates while moving RestrictInfo's from removed
+ * relation to remaining relation during self-join elimination.
+ *
+ * XXX: In the future, we might remove the 'rinfo_serial' field completely and
+ * get rid of this function.
+ */
+static bool
+restrict_infos_logically_equal(RestrictInfo *a, RestrictInfo *b)
+{
+   int         saved_rinfo_serial = a->rinfo_serial;
+   bool        result;
+
+   a->rinfo_serial = b->rinfo_serial;
+   result = equal(a, b);
+   a->rinfo_serial = saved_rinfo_serial;
+
+   return result;
+}
+
+/*
+ * This function adds all non-redundant clauses to the keeping relation
+ * during self-join elimination.  That is a contradictory operation. On the
+ * one hand, we reduce the length of the `restrict` lists, which can
+ * impact planning or executing time.  Additionally, we improve the
+ * accuracy of cardinality estimation.  On the other hand, it is one more
+ * place that can make planning time much longer in specific cases.  It
+ * would have been better to avoid calling the equal() function here, but
+ * it's the only way to detect duplicated inequality expressions.
+ *
+ * (*keep_rinfo_list) is given by pointer because it might be altered by
+ * distribute_restrictinfo_to_rels().
+ */
+static void
+add_non_redundant_clauses(PlannerInfo *root,
+                         List *rinfo_candidates,
+                         List **keep_rinfo_list,
+                         Index removed_relid)
+{
+   foreach_node(RestrictInfo, rinfo, rinfo_candidates)
+   {
+       bool        is_redundant = false;
+
+       Assert(!bms_is_member(removed_relid, rinfo->required_relids));
+
+       foreach_node(RestrictInfo, src, (*keep_rinfo_list))
+       {
+           if (!bms_equal(src->clause_relids, rinfo->clause_relids))
+               /* Can't compare trivially different clauses */
+               continue;
+
+           if (src == rinfo ||
+               (rinfo->parent_ec != NULL &&
+                src->parent_ec == rinfo->parent_ec) ||
+               restrict_infos_logically_equal(rinfo, src))
+           {
+               is_redundant = true;
+               break;
+           }
+       }
+       if (!is_redundant)
+           distribute_restrictinfo_to_rels(root, rinfo);
+   }
+}
+
+/*
+ * Remove a relation after we have proven that it participates only in an
+ * unneeded unique self-join.
+ *
+ * Replace any links in planner info structures.
+ *
+ * Transfer join and restriction clauses from the removed relation to the
+ * remaining one. We change the Vars of the clause to point to the
+ * remaining relation instead of the removed one. The clauses that require
+ * a subset of joinrelids become restriction clauses of the remaining
+ * relation, and others remain join clauses. We append them to
+ * baserestrictinfo and joininfo, respectively, trying not to introduce
+ * duplicates.
+ *
+ * We also have to process the 'joinclauses' list here, because it
+ * contains EC-derived join clauses which must become filter clauses. It
+ * is not enough to just correct the ECs because the EC-derived
+ * restrictions are generated before join removal (see
+ * generate_base_implied_equalities).
+ *
+ * NOTE: Remember to keep the code in sync with PlannerInfo to be sure all
+ * cached relids and relid bitmapsets can be correctly cleaned during the
+ * self-join elimination procedure.
+ */
+static void
+remove_self_join_rel(PlannerInfo *root, PlanRowMark *kmark, PlanRowMark *rmark,
+                    RelOptInfo *toKeep, RelOptInfo *toRemove,
+                    List *restrictlist)
+{
+   List       *joininfos;
+   ListCell   *lc;
+   int         i;
+   List       *jinfo_candidates = NIL;
+   List       *binfo_candidates = NIL;
+
+   Assert(toKeep->relid > 0);
+   Assert(toRemove->relid > 0);
+
+   /*
+    * Replace the index of the removing table with the keeping one. The
+    * technique of removing/distributing restrictinfo is used here to attach
+    * just appeared (for keeping relation) join clauses and avoid adding
+    * duplicates of those that already exist in the joininfo list.
+    */
+   joininfos = list_copy(toRemove->joininfo);
+   foreach_node(RestrictInfo, rinfo, joininfos)
+   {
+       remove_join_clause_from_rels(root, rinfo, rinfo->required_relids);
+       ChangeVarNodes((Node *) rinfo, toRemove->relid, toKeep->relid, 0);
+
+       if (bms_membership(rinfo->required_relids) == BMS_MULTIPLE)
+           jinfo_candidates = lappend(jinfo_candidates, rinfo);
+       else
+           binfo_candidates = lappend(binfo_candidates, rinfo);
+   }
+
+   /*
+    * Concatenate restrictlist to the list of base restrictions of the
+    * removing table just to simplify the replacement procedure: all of them
+    * weren't connected to any keeping relations and need to be added to some
+    * rels.
+    */
+   toRemove->baserestrictinfo = list_concat(toRemove->baserestrictinfo,
+                                            restrictlist);
+   foreach_node(RestrictInfo, rinfo, toRemove->baserestrictinfo)
+   {
+       ChangeVarNodes((Node *) rinfo, toRemove->relid, toKeep->relid, 0);
+
+       if (bms_membership(rinfo->required_relids) == BMS_MULTIPLE)
+           jinfo_candidates = lappend(jinfo_candidates, rinfo);
+       else
+           binfo_candidates = lappend(binfo_candidates, rinfo);
+   }
+
+   /*
+    * Now, add all non-redundant clauses to the keeping relation.
+    */
+   add_non_redundant_clauses(root, binfo_candidates,
+                             &toKeep->baserestrictinfo, toRemove->relid);
+   add_non_redundant_clauses(root, jinfo_candidates,
+                             &toKeep->joininfo, toRemove->relid);
+
+   list_free(binfo_candidates);
+   list_free(jinfo_candidates);
+
+   /*
+    * Arrange equivalence classes, mentioned removing a table, with the
+    * keeping one: varno of removing table should be replaced in members and
+    * sources lists. Also, remove duplicated elements if this replacement
+    * procedure created them.
+    */
+   i = -1;
+   while ((i = bms_next_member(toRemove->eclass_indexes, i)) >= 0)
+   {
+       EquivalenceClass *ec = (EquivalenceClass *) list_nth(root->eq_classes, i);
+
+       update_eclasses(ec, toRemove->relid, toKeep->relid);
+       toKeep->eclass_indexes = bms_add_member(toKeep->eclass_indexes, i);
+   }
+
+   /*
+    * Transfer the targetlist and attr_needed flags.
+    */
+
+   foreach(lc, toRemove->reltarget->exprs)
+   {
+       Node       *node = lfirst(lc);
+
+       ChangeVarNodes(node, toRemove->relid, toKeep->relid, 0);
+       if (!list_member(toKeep->reltarget->exprs, node))
+           toKeep->reltarget->exprs = lappend(toKeep->reltarget->exprs, node);
+   }
+
+   for (i = toKeep->min_attr; i <= toKeep->max_attr; i++)
+   {
+       int         attno = i - toKeep->min_attr;
+
+       toRemove->attr_needed[attno] = adjust_relid_set(toRemove->attr_needed[attno],
+                                                       toRemove->relid, toKeep->relid);
+       toKeep->attr_needed[attno] = bms_add_members(toKeep->attr_needed[attno],
+                                                    toRemove->attr_needed[attno]);
+   }
+
+   /*
+    * If the removed relation has a row mark, transfer it to the remaining
+    * one.
+    *
+    * If both rels have row marks, just keep the one corresponding to the
+    * remaining relation because we verified earlier that they have the same
+    * strength.
+    */
+   if (rmark)
+   {
+       if (kmark)
+       {
+           Assert(kmark->markType == rmark->markType);
+
+           root->rowMarks = list_delete_ptr(root->rowMarks, rmark);
+       }
+       else
+       {
+           /* Shouldn't have inheritance children here. */
+           Assert(rmark->rti == rmark->prti);
+
+           rmark->rti = rmark->prti = toKeep->relid;
+       }
+   }
+
+   /*
+    * Replace varno in all the query structures, except nodes RangeTblRef
+    * otherwise later remove_rel_from_joinlist will yield errors.
+    */
+   ChangeVarNodesExtended((Node *) root->parse, toRemove->relid, toKeep->relid, 0, false);
+
+   /* Replace links in the planner info */
+   remove_rel_from_query(root, toRemove, toKeep->relid, NULL, NULL);
+
+   /* At last, replace varno in root targetlist and HAVING clause */
+   ChangeVarNodes((Node *) root->processed_tlist, toRemove->relid, toKeep->relid, 0);
+   ChangeVarNodes((Node *) root->processed_groupClause, toRemove->relid, toKeep->relid, 0);
+
+   adjust_relid_set(root->all_result_relids, toRemove->relid, toKeep->relid);
+   adjust_relid_set(root->leaf_result_relids, toRemove->relid, toKeep->relid);
+
+   /*
+    * There may be references to the rel in root->fkey_list, but if so,
+    * match_foreign_keys_to_quals() will get rid of them.
+    */
+
+   /*
+    * Finally, remove the rel from the baserel array to prevent it from being
+    * referenced again.  (We can't do this earlier because
+    * remove_join_clause_from_rels will touch it.)
+    */
+   root->simple_rel_array[toRemove->relid] = NULL;
+
+   /* And nuke the RelOptInfo, just in case there's another access path. */
+   pfree(toRemove);
+
+   /*
+    * Now repeat construction of attr_needed bits coming from all other
+    * sources.
+    */
+   rebuild_placeholder_attr_needed(root);
+   rebuild_joinclause_attr_needed(root);
+   rebuild_eclass_attr_needed(root);
+   rebuild_lateral_attr_needed(root);
+}
+
+/*
+ * split_selfjoin_quals
+ *     Processes 'joinquals' by building two lists: one containing the quals
+ *     where the columns/exprs are on either side of the join match and
+ *     another one containing the remaining quals.
+ *
+ * 'joinquals' must only contain quals for a RTE_RELATION being joined to
+ * itself.
+ */
+static void
+split_selfjoin_quals(PlannerInfo *root, List *joinquals, List **selfjoinquals,
+                    List **otherjoinquals, int from, int to)
+{
+   List       *sjoinquals = NIL;
+   List       *ojoinquals = NIL;
+
+   foreach_node(RestrictInfo, rinfo, joinquals)
+   {
+       OpExpr     *expr;
+       Node       *leftexpr;
+       Node       *rightexpr;
+
+       /* In general, clause looks like F(arg1) = G(arg2) */
+       if (!rinfo->mergeopfamilies ||
+           bms_num_members(rinfo->clause_relids) != 2 ||
+           bms_membership(rinfo->left_relids) != BMS_SINGLETON ||
+           bms_membership(rinfo->right_relids) != BMS_SINGLETON)
+       {
+           ojoinquals = lappend(ojoinquals, rinfo);
+           continue;
+       }
+
+       expr = (OpExpr *) rinfo->clause;
+
+       if (!IsA(expr, OpExpr) || list_length(expr->args) != 2)
+       {
+           ojoinquals = lappend(ojoinquals, rinfo);
+           continue;
+       }
+
+       leftexpr = get_leftop(rinfo->clause);
+       rightexpr = copyObject(get_rightop(rinfo->clause));
+
+       if (leftexpr && IsA(leftexpr, RelabelType))
+           leftexpr = (Node *) ((RelabelType *) leftexpr)->arg;
+       if (rightexpr && IsA(rightexpr, RelabelType))
+           rightexpr = (Node *) ((RelabelType *) rightexpr)->arg;
+
+       /*
+        * Quite an expensive operation, narrowing the use case. For example,
+        * when we have cast of the same var to different (but compatible)
+        * types.
+        */
+       ChangeVarNodes(rightexpr, bms_singleton_member(rinfo->right_relids),
+                      bms_singleton_member(rinfo->left_relids), 0);
+
+       if (equal(leftexpr, rightexpr))
+           sjoinquals = lappend(sjoinquals, rinfo);
+       else
+           ojoinquals = lappend(ojoinquals, rinfo);
+   }
+
+   *selfjoinquals = sjoinquals;
+   *otherjoinquals = ojoinquals;
+}
+
+/*
+ * Check for a case when uniqueness is at least partly derived from a
+ * baserestrictinfo clause. In this case, we have a chance to return only
+ * one row (if such clauses on both sides of SJ are equal) or nothing (if they
+ * are different).
+ */
+static bool
+match_unique_clauses(PlannerInfo *root, RelOptInfo *outer, List *uclauses,
+                    Index relid)
+{
+   foreach_node(RestrictInfo, rinfo, uclauses)
+   {
+       Expr       *clause;
+       Node       *iclause;
+       Node       *c1;
+       bool        matched = false;
+
+       Assert(outer->relid > 0 && relid > 0);
+
+       /* Only filters like f(R.x1,...,R.xN) == expr we should consider. */
+       Assert(bms_is_empty(rinfo->left_relids) ^
+              bms_is_empty(rinfo->right_relids));
+
+       clause = (Expr *) copyObject(rinfo->clause);
+       ChangeVarNodes((Node *) clause, relid, outer->relid, 0);
+
+       iclause = bms_is_empty(rinfo->left_relids) ? get_rightop(clause) :
+           get_leftop(clause);
+       c1 = bms_is_empty(rinfo->left_relids) ? get_leftop(clause) :
+           get_rightop(clause);
+
+       /*
+        * Compare these left and right sides with the corresponding sides of
+        * the outer's filters. If no one is detected - return immediately.
+        */
+       foreach_node(RestrictInfo, orinfo, outer->baserestrictinfo)
+       {
+           Node       *oclause;
+           Node       *c2;
+
+           if (orinfo->mergeopfamilies == NIL)
+               /* Don't consider clauses that aren't similar to 'F(X)=G(Y)' */
+               continue;
+
+           Assert(is_opclause(orinfo->clause));
+
+           oclause = bms_is_empty(orinfo->left_relids) ?
+               get_rightop(orinfo->clause) : get_leftop(orinfo->clause);
+           c2 = (bms_is_empty(orinfo->left_relids) ?
+                 get_leftop(orinfo->clause) : get_rightop(orinfo->clause));
+
+           if (equal(iclause, oclause) && equal(c1, c2))
+           {
+               matched = true;
+               break;
+           }
+       }
+
+       if (!matched)
+           return false;
+   }
+
+   return true;
+}
+
+/*
+ * Find and remove unique self-joins in a group of base relations that have
+ * the same Oid.
+ *
+ * Returns a set of relids that were removed.
+ */
+static Relids
+remove_self_joins_one_group(PlannerInfo *root, Relids relids)
+{
+   Relids      result = NULL;
+   int         k;              /* Index of kept relation */
+   int         r = -1;         /* Index of removed relation */
+
+   while ((r = bms_next_member(relids, r)) > 0)
+   {
+       RelOptInfo *inner = root->simple_rel_array[r];
+
+       k = r;
+
+       while ((k = bms_next_member(relids, k)) > 0)
+       {
+           Relids      joinrelids = NULL;
+           RelOptInfo *outer = root->simple_rel_array[k];
+           List       *restrictlist;
+           List       *selfjoinquals;
+           List       *otherjoinquals;
+           ListCell   *lc;
+           bool        jinfo_check = true;
+           PlanRowMark *omark = NULL;
+           PlanRowMark *imark = NULL;
+           List       *uclauses = NIL;
+
+           /* A sanity check: the relations have the same Oid. */
+           Assert(root->simple_rte_array[k]->relid ==
+                  root->simple_rte_array[r]->relid);
+
+           /*
+            * It is impossible to eliminate the join of two relations if they
+            * belong to different rules of order. Otherwise, the planner
+            * can't find any variants of the correct query plan.
+            */
+           foreach(lc, root->join_info_list)
+           {
+               SpecialJoinInfo *info = (SpecialJoinInfo *) lfirst(lc);
+
+               if ((bms_is_member(k, info->syn_lefthand) ^
+                    bms_is_member(r, info->syn_lefthand)) ||
+                   (bms_is_member(k, info->syn_righthand) ^
+                    bms_is_member(r, info->syn_righthand)))
+               {
+                   jinfo_check = false;
+                   break;
+               }
+           }
+           if (!jinfo_check)
+               continue;
+
+           /*
+            * Check Row Marks equivalence. We can't remove the join if the
+            * relations have row marks of different strength (e.g., one is
+            * locked FOR UPDATE, and another just has ROW_MARK_REFERENCE for
+            * EvalPlanQual rechecking).
+            */
+           foreach(lc, root->rowMarks)
+           {
+               PlanRowMark *rowMark = (PlanRowMark *) lfirst(lc);
+
+               if (rowMark->rti == k)
+               {
+                   Assert(imark == NULL);
+                   imark = rowMark;
+               }
+               else if (rowMark->rti == r)
+               {
+                   Assert(omark == NULL);
+                   omark = rowMark;
+               }
+
+               if (omark && imark)
+                   break;
+           }
+           if (omark && imark && omark->markType != imark->markType)
+               continue;
+
+           /*
+            * We only deal with base rels here, so their relids bitset
+            * contains only one member -- their relid.
+            */
+           joinrelids = bms_add_member(joinrelids, r);
+           joinrelids = bms_add_member(joinrelids, k);
+
+           /*
+            * PHVs should not impose any constraints on removing self-joins.
+            */
+
+           /*
+            * At this stage, joininfo lists of inner and outer can contain
+            * only clauses required for a superior outer join that can't
+            * influence this optimization. So, we can avoid to call the
+            * build_joinrel_restrictlist() routine.
+            */
+           restrictlist = generate_join_implied_equalities(root, joinrelids,
+                                                           inner->relids,
+                                                           outer, NULL);
+           if (restrictlist == NIL)
+               continue;
+
+           /*
+            * Process restrictlist to separate the self-join quals from the
+            * other quals. e.g., "x = x" goes to selfjoinquals and "a = b" to
+            * otherjoinquals.
+            */
+           split_selfjoin_quals(root, restrictlist, &selfjoinquals,
+                                &otherjoinquals, inner->relid, outer->relid);
+
+           Assert(list_length(restrictlist) ==
+                  (list_length(selfjoinquals) + list_length(otherjoinquals)));
+
+           /*
+            * To enable SJE for the only degenerate case without any self
+            * join clauses at all, add baserestrictinfo to this list. The
+            * degenerate case works only if both sides have the same clause.
+            * So doesn't matter which side to add.
+            */
+           selfjoinquals = list_concat(selfjoinquals, outer->baserestrictinfo);
+
+           /*
+            * Determine if the inner table can duplicate outer rows.  We must
+            * bypass the unique rel cache here since we're possibly using a
+            * subset of join quals. We can use 'force_cache' == true when all
+            * join quals are self-join quals.  Otherwise, we could end up
+            * putting false negatives in the cache.
+            */
+           if (!innerrel_is_unique_ext(root, joinrelids, inner->relids,
+                                       outer, JOIN_INNER, selfjoinquals,
+                                       list_length(otherjoinquals) == 0,
+                                       &uclauses))
+               continue;
+
+           /*
+            * 'uclauses' is the copy of outer->baserestrictinfo that are
+            * associated with an index.  We proved by matching selfjoinquals
+            * to a unique index that the outer relation has at most one
+            * matching row for each inner row.  Sometimes that is not enough.
+            * e.g. "WHERE s1.b = s2.b AND s1.a = 1 AND s2.a = 2" when the
+            * unique index is (a,b).  Having non-empty uclauses, we must
+            * validate that the inner baserestrictinfo contains the same
+            * expressions, or we won't match the same row on each side of the
+            * join.
+            */
+           if (!match_unique_clauses(root, inner, uclauses, outer->relid))
+               continue;
+
+           /*
+            * We can remove either relation, so remove the inner one in order
+            * to simplify this loop.
+            */
+           remove_self_join_rel(root, omark, imark, outer, inner, restrictlist);
+
+           result = bms_add_member(result, r);
+
+           /* We have removed the outer relation, try the next one. */
+           break;
+       }
+   }
+
+   return result;
+}
+
+/*
+ * Gather indexes of base relations from the joinlist and try to eliminate self
+ * joins.
+ */
+static Relids
+remove_self_joins_recurse(PlannerInfo *root, List *joinlist, Relids toRemove)
+{
+   ListCell   *jl;
+   Relids      relids = NULL;
+   SelfJoinCandidate *candidates = NULL;
+   int         i;
+   int         j;
+   int         numRels;
+
+   /* Collect indexes of base relations of the join tree */
+   foreach(jl, joinlist)
+   {
+       Node       *jlnode = (Node *) lfirst(jl);
+
+       if (IsA(jlnode, RangeTblRef))
+       {
+           int         varno = ((RangeTblRef *) jlnode)->rtindex;
+           RangeTblEntry *rte = root->simple_rte_array[varno];
+
+           /*
+            * We only consider ordinary relations as candidates to be
+            * removed, and these relations should not have TABLESAMPLE
+            * clauses specified.  Removing a relation with TABLESAMPLE clause
+            * could potentially change the syntax of the query. Because of
+            * UPDATE/DELETE EPQ mechanism, currently Query->resultRelation or
+            * Query->mergeTargetRelation associated rel cannot be eliminated.
+            */
+           if (rte->rtekind == RTE_RELATION &&
+               rte->relkind == RELKIND_RELATION &&
+               rte->tablesample == NULL &&
+               varno != root->parse->resultRelation &&
+               varno != root->parse->mergeTargetRelation)
+           {
+               Assert(!bms_is_member(varno, relids));
+               relids = bms_add_member(relids, varno);
+           }
+       }
+       else if (IsA(jlnode, List))
+       {
+           /* Recursively go inside the sub-joinlist */
+           toRemove = remove_self_joins_recurse(root, (List *) jlnode,
+                                                toRemove);
+       }
+       else
+           elog(ERROR, "unrecognized joinlist node type: %d",
+                (int) nodeTag(jlnode));
+   }
+
+   numRels = bms_num_members(relids);
+
+   /* Need at least two relations for the join */
+   if (numRels < 2)
+       return toRemove;
+
+   /*
+    * In order to find relations with the same oid we first build an array of
+    * candidates and then sort it by oid.
+    */
+   candidates = (SelfJoinCandidate *) palloc(sizeof(SelfJoinCandidate) *
+                                             numRels);
+   i = -1;
+   j = 0;
+   while ((i = bms_next_member(relids, i)) >= 0)
+   {
+       candidates[j].relid = i;
+       candidates[j].reloid = root->simple_rte_array[i]->relid;
+       j++;
+   }
+
+   qsort(candidates, numRels, sizeof(SelfJoinCandidate),
+         self_join_candidates_cmp);
+
+   /*
+    * Iteratively form a group of relation indexes with the same oid and
+    * launch the routine that detects self-joins in this group and removes
+    * excessive range table entries.
+    *
+    * At the end of the iteration, exclude the group from the overall relids
+    * list. So each next iteration of the cycle will involve less and less
+    * value of relids.
+    */
+   i = 0;
+   for (j = 1; j < numRels + 1; j++)
+   {
+       if (j == numRels || candidates[j].reloid != candidates[i].reloid)
+       {
+           if (j - i >= 2)
+           {
+               /* Create a group of relation indexes with the same oid */
+               Relids      group = NULL;
+               Relids      removed;
+
+               while (i < j)
+               {
+                   group = bms_add_member(group, candidates[i].relid);
+                   i++;
+               }
+               relids = bms_del_members(relids, group);
+
+               /*
+                * Try to remove self-joins from a group of identical entries.
+                * Make the next attempt iteratively - if something is deleted
+                * from a group, changes in clauses and equivalence classes
+                * can give us a chance to find more candidates.
+                */
+               do
+               {
+                   Assert(!bms_overlap(group, toRemove));
+                   removed = remove_self_joins_one_group(root, group);
+                   toRemove = bms_add_members(toRemove, removed);
+                   group = bms_del_members(group, removed);
+               } while (!bms_is_empty(removed) &&
+                        bms_membership(group) == BMS_MULTIPLE);
+               bms_free(removed);
+               bms_free(group);
+           }
+           else
+           {
+               /* Single relation, just remove it from the set */
+               relids = bms_del_member(relids, candidates[i].relid);
+               i = j;
+           }
+       }
+   }
+
+   Assert(bms_is_empty(relids));
+
+   return toRemove;
+}
+
+/*
+ * Compare self-join candidates by their oids.
+ */
+static int
+self_join_candidates_cmp(const void *a, const void *b)
+{
+   const SelfJoinCandidate *ca = (const SelfJoinCandidate *) a;
+   const SelfJoinCandidate *cb = (const SelfJoinCandidate *) b;
+
+   if (ca->reloid != cb->reloid)
+       return (ca->reloid < cb->reloid ? -1 : 1);
+   else
+       return 0;
+}
+
+/*
+ * Find and remove useless self joins.
+ *
+ * Search for joins where a relation is joined to itself. If the join clause
+ * for each tuple from one side of the join is proven to match the same
+ * physical row (or nothing) on the other side, that self-join can be
+ * eliminated from the query.  Suitable join clauses are assumed to be in the
+ * form of X = X, and can be replaced with NOT NULL clauses.
+ *
+ * For the sake of simplicity, we don't apply this optimization to special
+ * joins. Here is a list of what we could do in some particular cases:
+ * 'a a1 semi join a a2': is reduced to inner by reduce_unique_semijoins,
+ * and then removed normally.
+ * 'a a1 anti join a a2': could simplify to a scan with 'outer quals AND
+ * (IS NULL on join columns OR NOT inner quals)'.
+ * 'a a1 left join a a2': could simplify to a scan like inner but without
+ * NOT NULL conditions on join columns.
+ * 'a a1 left join (a a2 join b)': can't simplify this, because join to b
+ * can both remove rows and introduce duplicates.
+ *
+ * To search for removable joins, we order all the relations on their Oid,
+ * go over each set with the same Oid, and consider each pair of relations
+ * in this set.
+ *
+ * To remove the join, we mark one of the participating relations as dead
+ * and rewrite all references to it to point to the remaining relation.
+ * This includes modifying RestrictInfos, EquivalenceClasses, and
+ * EquivalenceMembers. We also have to modify the row marks. The join clauses
+ * of the removed relation become either restriction or join clauses, based on
+ * whether they reference any relations not participating in the removed join.
+ *
+ * 'joinlist' is the top-level joinlist of the query. If it has any
+ * references to the removed relations, we update them to point to the
+ * remaining ones.
+ */
+List *
+remove_useless_self_joins(PlannerInfo *root, List *joinlist)
+{
+   Relids      toRemove = NULL;
+   int         relid = -1;
+
+   if (!enable_self_join_elimination || joinlist == NIL ||
+       (list_length(joinlist) == 1 && !IsA(linitial(joinlist), List)))
+       return joinlist;
+
+   /*
+    * Merge pairs of relations participated in self-join. Remove unnecessary
+    * range table entries.
+    */
+   toRemove = remove_self_joins_recurse(root, joinlist, toRemove);
+
+   if (unlikely(toRemove != NULL))
+   {
+       /* At the end, remove orphaned relation links */
+       while ((relid = bms_next_member(toRemove, relid)) >= 0)
+       {
+           int         nremoved = 0;
+
+           joinlist = remove_rel_from_joinlist(joinlist, relid, &nremoved);
+           if (nremoved != 1)
+               elog(ERROR, "failed to find relation %d in joinlist", relid);
+       }
+   }
+
+   return joinlist;
  }
diff --git a/src/backend/optimizer/plan/planmain.c b/src/backend/optimizer/plan/planmain.c

index ade23fd9d56d2a96ff62ceeeea67869aa96e368a..5467e094ca7e0ac553fbc7b1c3e42a22dee195f4 100644 (file)
--- a/src/backend/optimizer/plan/planmain.c
+++ b/src/backend/optimizer/plan/planmain.c
@@ -233,6 +233,11 @@ query_planner(PlannerInfo *root,
      */
     reduce_unique_semijoins(root);
  
+   /*
+    * Remove self joins on a unique column.
+    */
+   joinlist = remove_useless_self_joins(root, joinlist);
+
     /*
      * Now distribute "placeholders" to base rels as needed.  This has to be
      * done after join removal because removal could change whether a
diff --git a/src/backend/optimizer/prep/prepunion.c b/src/backend/optimizer/prep/prepunion.c

index 7c27dc24e217692a13ee3e67eef7d6246123d9d8..eab44da65b8f078c98a890157d196483460e5fe0 100644 (file)
--- a/src/backend/optimizer/prep/prepunion.c
+++ b/src/backend/optimizer/prep/prepunion.c
@@ -639,14 +639,17 @@ build_setop_child_paths(PlannerInfo *root, RelOptInfo *rel,
      * otherwise do statistical estimation.
      *
      * XXX you don't really want to know about this: we do the estimation
-    * using the subquery's original targetlist expressions, not the
+    * using the subroot->parse's original targetlist expressions, not the
      * subroot->processed_tlist which might seem more appropriate.  The reason
      * is that if the subquery is itself a setop, it may return a
      * processed_tlist containing "varno 0" Vars generated by
      * generate_append_tlist, and those would confuse estimate_num_groups
      * mightily.  We ought to get rid of the "varno 0" hack, but that requires
      * a redesign of the parsetree representation of setops, so that there can
-    * be an RTE corresponding to each setop's output.
+    * be an RTE corresponding to each setop's output. Note, we use this not
+    * subquery's targetlist but subroot->parse's targetlist, because it was
+    * revised by self-join removal.  subquery's targetlist might contain the
+    * references to the removed relids.
      */
     if (pNumGroups)
     {
@@ -659,7 +662,7 @@ build_setop_child_paths(PlannerInfo *root, RelOptInfo *rel,
             *pNumGroups = rel->cheapest_total_path->rows;
         else
             *pNumGroups = estimate_num_groups(subroot,
-                                             get_tlist_exprs(subquery->targetList, false),
+                                             get_tlist_exprs(subroot->parse->targetList, false),
                                               rel->cheapest_total_path->rows,
                                               NULL,
                                               NULL);
diff --git a/src/backend/rewrite/rewriteManip.c b/src/backend/rewrite/rewriteManip.c

index a115b217c91637da21f193384112468747919aeb..9433548d279a8400d4365c8967ff658146a2bba5 100644 (file)
--- a/src/backend/rewrite/rewriteManip.c
+++ b/src/backend/rewrite/rewriteManip.c
@@ -64,7 +64,6 @@ static bool locate_windowfunc_walker(Node *node,
                                      locate_windowfunc_context *context);
  static bool checkExprHasSubLink_walker(Node *node, void *context);
  static Relids offset_relid_set(Relids relids, int offset);
-static Relids adjust_relid_set(Relids relids, int oldrelid, int newrelid);
  static Node *add_nulling_relids_mutator(Node *node,
                                         add_nulling_relids_context *context);
  static Node *remove_nulling_relids_mutator(Node *node,
@@ -543,6 +542,8 @@ offset_relid_set(Relids relids, int offset)
   * (identified by sublevels_up and rt_index), and change their varno fields
   * to 'new_index'.  The varnosyn fields are changed too.  Also, adjust other
   * nodes that contain rangetable indexes, such as RangeTblRef and JoinExpr.
+ * Specifying 'change_RangeTblRef' to false allows skipping RangeTblRef.
+ * See ChangeVarNodesExtended for details.
   *
   * NOTE: although this has the form of a walker, we cheat and modify the
   * nodes in-place.  The given expression tree should have been copied
@@ -554,6 +555,7 @@ typedef struct
     int         rt_index;
     int         new_index;
     int         sublevels_up;
+   bool        change_RangeTblRef;
  } ChangeVarNodes_context;
  
  static bool
@@ -586,7 +588,7 @@ ChangeVarNodes_walker(Node *node, ChangeVarNodes_context *context)
             cexpr->cvarno = context->new_index;
         return false;
     }
-   if (IsA(node, RangeTblRef))
+   if (IsA(node, RangeTblRef) && context->change_RangeTblRef)
     {
         RangeTblRef *rtr = (RangeTblRef *) node;
  
@@ -633,6 +635,75 @@ ChangeVarNodes_walker(Node *node, ChangeVarNodes_context *context)
         }
         return false;
     }
+   if (IsA(node, RestrictInfo))
+   {
+       RestrictInfo *rinfo = (RestrictInfo *) node;
+       int         relid = -1;
+       bool        is_req_equal =
+           (rinfo->required_relids == rinfo->clause_relids);
+       bool        clause_relids_is_multiple =
+           (bms_membership(rinfo->clause_relids) == BMS_MULTIPLE);
+
+       if (bms_is_member(context->rt_index, rinfo->clause_relids))
+       {
+           expression_tree_walker((Node *) rinfo->clause, ChangeVarNodes_walker, (void *) context);
+           expression_tree_walker((Node *) rinfo->orclause, ChangeVarNodes_walker, (void *) context);
+
+           rinfo->clause_relids =
+               adjust_relid_set(rinfo->clause_relids, context->rt_index, context->new_index);
+           rinfo->num_base_rels = bms_num_members(rinfo->clause_relids);
+           rinfo->left_relids =
+               adjust_relid_set(rinfo->left_relids, context->rt_index, context->new_index);
+           rinfo->right_relids =
+               adjust_relid_set(rinfo->right_relids, context->rt_index, context->new_index);
+       }
+
+       if (is_req_equal)
+           rinfo->required_relids = rinfo->clause_relids;
+       else
+           rinfo->required_relids =
+               adjust_relid_set(rinfo->required_relids, context->rt_index, context->new_index);
+
+       rinfo->outer_relids =
+           adjust_relid_set(rinfo->outer_relids, context->rt_index, context->new_index);
+       rinfo->incompatible_relids =
+           adjust_relid_set(rinfo->incompatible_relids, context->rt_index, context->new_index);
+
+       if (rinfo->mergeopfamilies &&
+           bms_get_singleton_member(rinfo->clause_relids, &relid) &&
+           clause_relids_is_multiple &&
+           relid == context->new_index && IsA(rinfo->clause, OpExpr))
+       {
+           Expr       *leftOp;
+           Expr       *rightOp;
+
+           leftOp = (Expr *) get_leftop(rinfo->clause);
+           rightOp = (Expr *) get_rightop(rinfo->clause);
+
+           /*
+            * For self-join elimination, changing varnos could transform
+            * "t1.a = t2.a" into "t1.a = t1.a".  That is always true as long
+            * as "t1.a" is not null.  We use qual() to check for such a case,
+            * and then we replace the qual for a check for not null
+            * (NullTest).
+            */
+           if (leftOp != NULL && equal(leftOp, rightOp))
+           {
+               NullTest   *ntest = makeNode(NullTest);
+
+               ntest->arg = leftOp;
+               ntest->nulltesttype = IS_NOT_NULL;
+               ntest->argisrow = false;
+               ntest->location = -1;
+               rinfo->clause = (Expr *) ntest;
+               rinfo->mergeopfamilies = NIL;
+               rinfo->left_em = NULL;
+               rinfo->right_em = NULL;
+           }
+           Assert(rinfo->orclause == NULL);
+       }
+       return false;
+   }
     if (IsA(node, AppendRelInfo))
     {
         AppendRelInfo *appinfo = (AppendRelInfo *) node;
@@ -665,32 +736,32 @@ ChangeVarNodes_walker(Node *node, ChangeVarNodes_context *context)
     return expression_tree_walker(node, ChangeVarNodes_walker, context);
  }
  
+/*
+ * ChangeVarNodesExtended - similar to ChangeVarNodes, but has additional
+ *                         'change_RangeTblRef' param
+ *
+ * ChangeVarNodes changes a given node and all of its underlying nodes.
+ * However, self-join elimination (SJE) needs to skip the RangeTblRef node
+ * type.  During SJE's last step, remove_rel_from_joinlist() removes
+ * remaining RangeTblRefs with target relid.  If ChangeVarNodes() replaces
+ * the target relid before, remove_rel_from_joinlist() fails to identify
+ * the nodes to delete.
+ */
  void
-ChangeVarNodes(Node *node, int rt_index, int new_index, int sublevels_up)
+ChangeVarNodesExtended(Node *node, int rt_index, int new_index,
+                      int sublevels_up, bool change_RangeTblRef)
  {
     ChangeVarNodes_context context;
  
     context.rt_index = rt_index;
     context.new_index = new_index;
     context.sublevels_up = sublevels_up;
+   context.change_RangeTblRef = change_RangeTblRef;
  
-   /*
-    * Must be prepared to start with a Query or a bare expression tree; if
-    * it's a Query, go straight to query_tree_walker to make sure that
-    * sublevels_up doesn't get incremented prematurely.
-    */
     if (node && IsA(node, Query))
     {
         Query      *qry = (Query *) node;
  
-       /*
-        * If we are starting at a Query, and sublevels_up is zero, then we
-        * must also fix rangetable indexes in the Query itself --- namely
-        * resultRelation, mergeTargetRelation, exclRelIndex  and rowMarks
-        * entries.  sublevels_up cannot be zero when recursing into a
-        * subquery, so there's no need to have the same logic inside
-        * ChangeVarNodes_walker.
-        */
         if (sublevels_up == 0)
         {
             ListCell   *l;
@@ -701,7 +772,6 @@ ChangeVarNodes(Node *node, int rt_index, int new_index, int sublevels_up)
             if (qry->mergeTargetRelation == rt_index)
                 qry->mergeTargetRelation = new_index;
  
-           /* this is unlikely to ever be used, but ... */
             if (qry->onConflict && qry->onConflict->exclRelIndex == rt_index)
                 qry->onConflict->exclRelIndex = new_index;
  
@@ -719,15 +789,22 @@ ChangeVarNodes(Node *node, int rt_index, int new_index, int sublevels_up)
         ChangeVarNodes_walker(node, &context);
  }
  
+void
+ChangeVarNodes(Node *node, int rt_index, int new_index, int sublevels_up)
+{
+   ChangeVarNodesExtended(node, rt_index, new_index, sublevels_up, true);
+}
+
  /*
- * Substitute newrelid for oldrelid in a Relid set
+ * adjust_relid_set - substitute newrelid for oldrelid in a Relid set
   *
- * Note: some extensions may pass a special varno such as INDEX_VAR for
- * oldrelid.  bms_is_member won't like that, but we should tolerate it.
- * (Perhaps newrelid could also be a special varno, but there had better
- * not be a reason to inject that into a nullingrels or phrels set.)
+ * Attempt to remove oldrelid from a Relid set (as long as it's not a special
+ * varno).  If oldrelid was found and removed, insert newrelid into a Relid
+ * set (as long as it's not a special varno).  Therefore, when oldrelid is
+ * a special varno, this function does nothing.  When newrelid is a special
+ * varno, this function behaves as delete.
   */
-static Relids
+Relids
  adjust_relid_set(Relids relids, int oldrelid, int newrelid)
  {
     if (!IS_SPECIAL_VARNO(oldrelid) && bms_is_member(oldrelid, relids))
@@ -736,7 +813,8 @@ adjust_relid_set(Relids relids, int oldrelid, int newrelid)
         relids = bms_copy(relids);
         /* Remove old, add new */
         relids = bms_del_member(relids, oldrelid);
-       relids = bms_add_member(relids, newrelid);
+       if (!IS_SPECIAL_VARNO(newrelid))
+           relids = bms_add_member(relids, newrelid);
     }
     return relids;
  }
diff --git a/src/backend/utils/misc/guc_tables.c b/src/backend/utils/misc/guc_tables.c

index 42728189322d3afab4ab6b6bf4be7b40954b1eca..cce73314609069aa7721d42c55f9a9fd06bde9c4 100644 (file)
--- a/src/backend/utils/misc/guc_tables.c
+++ b/src/backend/utils/misc/guc_tables.c
@@ -988,6 +988,16 @@ struct config_bool ConfigureNamesBool[] =
         true,
         NULL, NULL, NULL
     },
+   {
+       {"enable_self_join_elimination", PGC_USERSET, QUERY_TUNING_METHOD,
+           gettext_noop("Enable removal of unique self-joins."),
+           NULL,
+           GUC_EXPLAIN | GUC_NOT_IN_SAMPLE
+       },
+       &enable_self_join_elimination,
+       true,
+       NULL, NULL, NULL
+   },
     {
         {"enable_group_by_reordering", PGC_USERSET, QUERY_TUNING_METHOD,
             gettext_noop("Enables reordering of GROUP BY keys."),
diff --git a/src/include/nodes/pathnodes.h b/src/include/nodes/pathnodes.h

index 00c700cc3e7d30b180bc74479b0a6ef6fe32ad4f..fbf05322c75f42864bb5e3f7b35e2fdb635bc59c 100644 (file)
--- a/src/include/nodes/pathnodes.h
+++ b/src/include/nodes/pathnodes.h
@@ -201,6 +201,11 @@ typedef struct PlannerGlobal
   * Not all fields are printed.  (In some cases, there is no print support for
   * the field type; in others, doing so would lead to infinite recursion or
   * bloat dump output more than seems useful.)
+ *
+ * NOTE: When adding new entries containing relids and relid bitmapsets,
+ * remember to check that they will be correctly processed by
+ * the remove_self_join_rel function - relid of removing relation will be
+ * correctly replaced with the keeping one.
   *----------
   */
  #ifndef HAVE_PLANNERINFO_TYPEDEF
@@ -753,7 +758,7 @@ typedef struct PartitionSchemeData *PartitionScheme;
   * populate these fields, for base rels; but someday they might be used for
   * join rels too:
   *
- *     unique_for_rels - list of Relid sets, each one being a set of other
+ *     unique_for_rels - list of UniqueRelInfo, each one being a set of other
   *                 rels for which this one has been proven unique
   *     non_unique_for_rels - list of Relid sets, each one being a set of
   *                 other rels for which we have tried and failed to prove
@@ -992,7 +997,7 @@ typedef struct RelOptInfo
     /*
      * cache space for remembering if we have proven this relation unique
      */
-   /* known unique for these other relid set(s) */
+   /* known unique for these other relid set(s) given in UniqueRelInfo(s) */
     List       *unique_for_rels;
     /* known not unique for these set(s) */
     List       *non_unique_for_rels;
@@ -3463,4 +3468,35 @@ typedef struct AggTransInfo
     bool        initValueIsNull;
  } AggTransInfo;
  
+/*
+ * UniqueRelInfo caches a fact that a relation is unique when being joined
+ * to other relation(s).
+ */
+typedef struct UniqueRelInfo
+{
+   pg_node_attr(no_copy_equal, no_read, no_query_jumble)
+
+   NodeTag     type;
+
+   /*
+    * The relation in consideration is unique when being joined with this set
+    * of other relation(s).
+    */
+   Relids      outerrelids;
+
+   /*
+    * The relation in consideration is unique when considering only clauses
+    * suitable for self-join (passed split_selfjoin_quals()).
+    */
+   bool        self_join;
+
+   /*
+    * Additional clauses from a baserestrictinfo list that were used to prove
+    * the uniqueness.   We cache it for the self-join checking procedure: a
+    * self-join can be removed if the outer relation contains strictly the
+    * same set of clauses.
+    */
+   List       *extra_clauses;
+} UniqueRelInfo;
+
  #endif                         /* PATHNODES_H */
diff --git a/src/include/optimizer/optimizer.h b/src/include/optimizer/optimizer.h

index bcf8ed645c24af86473b0cc63fea510b10682abd..78e05d88c8ec88b49a1ae301adebecb95a4602ec 100644 (file)
--- a/src/include/optimizer/optimizer.h
+++ b/src/include/optimizer/optimizer.h
@@ -192,6 +192,8 @@ extern SortGroupClause *get_sortgroupref_clause_noerr(Index sortref,
                                              * output list */
  #define PVC_RECURSE_PLACEHOLDERS   0x0020  /* recurse into PlaceHolderVar
                                              * arguments */
+#define PVC_INCLUDE_CONVERTROWTYPES    0x0040  /* include ConvertRowtypeExprs in
+                                            * output list */
  
  extern Bitmapset *pull_varnos(PlannerInfo *root, Node *node);
  extern Bitmapset *pull_varnos_of_level(PlannerInfo *root, Node *node, int levelsup);
diff --git a/src/include/optimizer/paths.h b/src/include/optimizer/paths.h

index 46955d128f017612af4d5893c36bfbbe16f4c775..bc5dfd7db41744728c2afb486de71c427f42924c 100644 (file)
--- a/src/include/optimizer/paths.h
+++ b/src/include/optimizer/paths.h
@@ -72,6 +72,9 @@ extern void create_index_paths(PlannerInfo *root, RelOptInfo *rel);
  extern bool relation_has_unique_index_for(PlannerInfo *root, RelOptInfo *rel,
                                           List *restrictlist,
                                           List *exprlist, List *oprlist);
+extern bool relation_has_unique_index_ext(PlannerInfo *root, RelOptInfo *rel,
+                                         List *restrictlist, List *exprlist,
+                                         List *oprlist, List **extra_clauses);
  extern bool indexcol_is_bool_constant_for_query(PlannerInfo *root,
                                                 IndexOptInfo *index,
                                                 int indexcol);
diff --git a/src/include/optimizer/planmain.h b/src/include/optimizer/planmain.h

index fee3378bbe34c7431cae050ed9a35d8248e921ae..5a93019961141fbcceb143f3726454720bfa8eb6 100644 (file)
--- a/src/include/optimizer/planmain.h
+++ b/src/include/optimizer/planmain.h
@@ -20,6 +20,7 @@
  /* GUC parameters */
  #define DEFAULT_CURSOR_TUPLE_FRACTION 0.1
  extern PGDLLIMPORT double cursor_tuple_fraction;
+extern PGDLLIMPORT bool enable_self_join_elimination;
  
  /* query_planner callback to compute query_pathkeys */
  typedef void (*query_pathkeys_callback) (PlannerInfo *root, void *extra);
@@ -113,6 +114,11 @@ extern bool query_is_distinct_for(Query *query, List *colnos, List *opids);
  extern bool innerrel_is_unique(PlannerInfo *root,
                                Relids joinrelids, Relids outerrelids, RelOptInfo *innerrel,
                                JoinType jointype, List *restrictlist, bool force_cache);
+extern bool innerrel_is_unique_ext(PlannerInfo *root, Relids joinrelids,
+                                  Relids outerrelids, RelOptInfo *innerrel,
+                                  JoinType jointype, List *restrictlist,
+                                  bool force_cache, List **uclauses);
+extern List *remove_useless_self_joins(PlannerInfo *root, List *jointree);
  
  /*
   * prototypes for plan/setrefs.c
diff --git a/src/include/rewrite/rewriteManip.h b/src/include/rewrite/rewriteManip.h

index 512823033b924adb499890735753b864fb27b8dd..5ec475c63e9bd44d69a260e3b2274f79480376fc 100644 (file)
--- a/src/include/rewrite/rewriteManip.h
+++ b/src/include/rewrite/rewriteManip.h
@@ -15,6 +15,7 @@
  #define REWRITEMANIP_H
  
  #include "nodes/parsenodes.h"
+#include "nodes/pathnodes.h"
  
  struct AttrMap;                    /* avoid including attmap.h here */
  
@@ -41,11 +42,14 @@ typedef enum ReplaceVarsNoMatchOption
  } ReplaceVarsNoMatchOption;
  
  
+extern Relids adjust_relid_set(Relids relids, int oldrelid, int newrelid);
  extern void CombineRangeTables(List **dst_rtable, List **dst_perminfos,
                                List *src_rtable, List *src_perminfos);
  extern void OffsetVarNodes(Node *node, int offset, int sublevels_up);
  extern void ChangeVarNodes(Node *node, int rt_index, int new_index,
                            int sublevels_up);
+extern void ChangeVarNodesExtended(Node *node, int rt_index, int new_index,
+                                  int sublevels_up, bool change_RangeTblRef);
  extern void IncrementVarSublevelsUp(Node *node, int delta_sublevels_up,
                                     int min_sublevels_up);
  extern void IncrementVarSublevelsUp_rtable(List *rtable,
diff --git a/src/test/regress/expected/equivclass.out b/src/test/regress/expected/equivclass.out

index 56227505009b994328ce43e746b254a323abd982..ad8ab294ff62415747b31aee6f16422f1ef6d2aa 100644 (file)
--- a/src/test/regress/expected/equivclass.out
+++ b/src/test/regress/expected/equivclass.out
@@ -434,6 +434,36 @@ explain (costs off)
     Filter: ((unique1 IS NOT NULL) AND (unique2 IS NOT NULL))
  (2 rows)
  
+-- Test that broken ECs are processed correctly during self join removal.
+-- Disable merge joins so that we don't get an error about missing commutator.
+-- Test both orientations of the join clause, because only one of them breaks
+-- the EC.
+set enable_mergejoin to off;
+explain (costs off)
+  select * from ec0 m join ec0 n on m.ff = n.ff
+  join ec1 p on m.ff + n.ff = p.f1;
+              QUERY PLAN               
+---------------------------------------
+ Nested Loop
+   Join Filter: ((n.ff + n.ff) = p.f1)
+   ->  Seq Scan on ec0 n
+   ->  Materialize
+         ->  Seq Scan on ec1 p
+(5 rows)
+
+explain (costs off)
+  select * from ec0 m join ec0 n on m.ff = n.ff
+  join ec1 p on p.f1::int8 = (m.ff + n.ff)::int8alias1;
+                          QUERY PLAN                           
+---------------------------------------------------------------
+ Nested Loop
+   Join Filter: ((p.f1)::bigint = ((n.ff + n.ff))::int8alias1)
+   ->  Seq Scan on ec0 n
+   ->  Materialize
+         ->  Seq Scan on ec1 p
+(5 rows)
+
+reset enable_mergejoin;
  -- this could be converted, but isn't at present
  explain (costs off)
    select * from tenk1 where unique1 = unique1 or unique2 = unique2;
diff --git a/src/test/regress/expected/join.out b/src/test/regress/expected/join.out

index 3ffc066b1f895555aa3da8190bcb0e9facad3e49..a57bb18c24f8b77e33600e0e19218751f783437e 100644 (file)
--- a/src/test/regress/expected/join.out
+++ b/src/test/regress/expected/join.out
@@ -5966,6 +5966,27 @@ select c.id, ss.a from c
           ->  Seq Scan on c
  (7 rows)
  
+-- check the case when the placeholder relates to an outer join and its
+-- inner in the press field but actually uses only the outer side of the join
+explain (costs off)
+SELECT q.val FROM b LEFT JOIN (
+  SELECT (q1.z IS NOT NULL) AS val
+  FROM b LEFT JOIN (
+    SELECT (t1.b_id IS NOT NULL) AS z FROM a t1 LEFT JOIN a t2 USING (id)
+    ) AS q1
+  ON true
+) AS q ON true;
+                QUERY PLAN                
+------------------------------------------
+ Nested Loop Left Join
+   ->  Seq Scan on b
+   ->  Materialize
+         ->  Nested Loop Left Join
+               ->  Seq Scan on b b_1
+               ->  Materialize
+                     ->  Seq Scan on a t1
+(7 rows)
+
  CREATE TEMP TABLE parted_b (id int PRIMARY KEY) partition by range(id);
  CREATE TEMP TABLE parted_b1 partition of parted_b for values from (0) to (10);
  -- test join removals on a partitioned table
@@ -6377,6 +6398,1068 @@ select * from
  ----+----+----+----
  (0 rows)
  
+--
+-- test that semi- or inner self-joins on a unique column are removed
+--
+-- enable only nestloop to get more predictable plans
+set enable_hashjoin to off;
+set enable_mergejoin to off;
+create table sj (a int unique, b int, c int unique);
+insert into sj values (1, null, 2), (null, 2, null), (2, 1, 1);
+analyze sj;
+-- Trivial self-join case.
+explain (costs off)
+select p.* from sj p, sj q where q.a = p.a and q.b = q.a - 1;
+                  QUERY PLAN                   
+-----------------------------------------------
+ Seq Scan on sj q
+   Filter: ((a IS NOT NULL) AND (b = (a - 1)))
+(2 rows)
+
+select p.* from sj p, sj q where q.a = p.a and q.b = q.a - 1;
+ a | b | c 
+---+---+---
+ 2 | 1 | 1
+(1 row)
+
+-- Self-join removal performs after a subquery pull-up process and could remove
+-- such kind of self-join too. Check this option.
+explain (costs off)
+select * from sj p
+where exists (select * from sj q
+              where q.a = p.a and q.b < 10);
+                QUERY PLAN                
+------------------------------------------
+ Seq Scan on sj q
+   Filter: ((a IS NOT NULL) AND (b < 10))
+(2 rows)
+
+select * from sj p
+where exists (select * from sj q
+              where q.a = p.a and q.b < 10);
+ a | b | c 
+---+---+---
+ 2 | 1 | 1
+(1 row)
+
+-- Don't remove self-join for the case of equality of two different unique columns.
+explain (costs off)
+select * from sj t1, sj t2 where t1.a = t2.c and t1.b is not null;
+              QUERY PLAN               
+---------------------------------------
+ Nested Loop
+   Join Filter: (t1.a = t2.c)
+   ->  Seq Scan on sj t2
+   ->  Materialize
+         ->  Seq Scan on sj t1
+               Filter: (b IS NOT NULL)
+(6 rows)
+
+-- Ensure that relations with TABLESAMPLE clauses are not considered as
+-- candidates to be removed
+explain (costs off)
+select * from sj t1
+    join lateral
+      (select * from sj tablesample system(t1.b)) s
+    on t1.a = s.a;
+              QUERY PLAN               
+---------------------------------------
+ Nested Loop
+   ->  Seq Scan on sj t1
+   ->  Memoize
+         Cache Key: t1.a, t1.b
+         Cache Mode: binary
+         ->  Sample Scan on sj
+               Sampling: system (t1.b)
+               Filter: (t1.a = a)
+(8 rows)
+
+-- Ensure that SJE does not form a self-referential lateral dependency
+explain (costs off)
+select * from sj t1
+    left join lateral
+      (select t1.a as t1a, * from sj t2) s
+    on true
+where t1.a = s.a;
+        QUERY PLAN         
+---------------------------
+ Seq Scan on sj t2
+   Filter: (a IS NOT NULL)
+(2 rows)
+
+-- Degenerated case.
+explain (costs off)
+select * from
+  (select a as x from sj where false) as q1,
+  (select a as y from sj where false) as q2
+where q1.x = q2.y;
+        QUERY PLAN        
+--------------------------
+ Result
+   One-Time Filter: false
+(2 rows)
+
+-- We can't use a cross-EC generated self join qual because of current logic of
+-- the generate_join_implied_equalities routine.
+explain (costs off)
+select * from sj t1, sj t2 where t1.a = t1.b and t1.b = t2.b and t2.b = t2.a;
+          QUERY PLAN          
+------------------------------
+ Nested Loop
+   Join Filter: (t1.a = t2.b)
+   ->  Seq Scan on sj t1
+         Filter: (a = b)
+   ->  Seq Scan on sj t2
+         Filter: (b = a)
+(6 rows)
+
+explain (costs off)
+select * from sj t1, sj t2, sj t3
+where t1.a = t1.b and t1.b = t2.b and t2.b = t2.a and
+      t1.b = t3.b and t3.b = t3.a;
+             QUERY PLAN             
+------------------------------------
+ Nested Loop
+   Join Filter: (t1.a = t3.b)
+   ->  Nested Loop
+         Join Filter: (t1.a = t2.b)
+         ->  Seq Scan on sj t1
+               Filter: (a = b)
+         ->  Seq Scan on sj t2
+               Filter: (b = a)
+   ->  Seq Scan on sj t3
+         Filter: (b = a)
+(10 rows)
+
+-- Double self-join removal.
+-- Use a condition on "b + 1", not on "b", for the second join, so that
+-- the equivalence class is different from the first one, and we can
+-- test the non-ec code path.
+explain (costs off)
+select *
+from  sj t1
+      join sj t2 on t1.a = t2.a and t1.b = t2.b
+     join sj t3 on t2.a = t3.a and t2.b + 1 = t3.b + 1;
+                                QUERY PLAN                                 
+---------------------------------------------------------------------------
+ Seq Scan on sj t3
+   Filter: ((a IS NOT NULL) AND (b IS NOT NULL) AND ((b + 1) IS NOT NULL))
+(2 rows)
+
+-- subselect that references the removed relation
+explain (costs off)
+select t1.a, (select a from sj where a = t2.a and a = t1.a)
+from sj t1, sj t2
+where t1.a = t2.a;
+                QUERY PLAN                
+------------------------------------------
+ Seq Scan on sj t2
+   Filter: (a IS NOT NULL)
+   SubPlan 1
+     ->  Result
+           One-Time Filter: (t2.a = t2.a)
+           ->  Seq Scan on sj
+                 Filter: (a = t2.a)
+(7 rows)
+
+-- self-join under outer join
+explain (costs off)
+select * from sj x join sj y on x.a = y.a
+left join int8_tbl z on x.a = z.q1;
+             QUERY PLAN             
+------------------------------------
+ Nested Loop Left Join
+   Join Filter: (y.a = z.q1)
+   ->  Seq Scan on sj y
+         Filter: (a IS NOT NULL)
+   ->  Materialize
+         ->  Seq Scan on int8_tbl z
+(6 rows)
+
+explain (costs off)
+select * from sj x join sj y on x.a = y.a
+left join int8_tbl z on y.a = z.q1;
+             QUERY PLAN             
+------------------------------------
+ Nested Loop Left Join
+   Join Filter: (y.a = z.q1)
+   ->  Seq Scan on sj y
+         Filter: (a IS NOT NULL)
+   ->  Materialize
+         ->  Seq Scan on int8_tbl z
+(6 rows)
+
+explain (costs off)
+select * from (
+  select t1.*, t2.a as ax from sj t1 join sj t2
+  on (t1.a = t2.a and t1.c * t1.c = t2.c + 2 and t2.b is null)
+) as q1
+left join
+  (select t3.* from sj t3, sj t4 where t3.c = t4.c) as q2
+on q1.ax = q2.a;
+                                QUERY PLAN                                 
+---------------------------------------------------------------------------
+ Nested Loop Left Join
+   Join Filter: (t2.a = t4.a)
+   ->  Seq Scan on sj t2
+         Filter: ((b IS NULL) AND (a IS NOT NULL) AND ((c * c) = (c + 2)))
+   ->  Seq Scan on sj t4
+         Filter: (c IS NOT NULL)
+(6 rows)
+
+-- Test that placeholders are updated correctly after join removal
+explain (costs off)
+select * from (values (1)) x
+left join (select coalesce(y.q1, 1) from int8_tbl y
+   right join sj j1 inner join sj j2 on j1.a = j2.a
+   on true) z
+on true;
+                QUERY PLAN                
+------------------------------------------
+ Nested Loop Left Join
+   ->  Result
+   ->  Nested Loop Left Join
+         ->  Seq Scan on sj j2
+               Filter: (a IS NOT NULL)
+         ->  Materialize
+               ->  Seq Scan on int8_tbl y
+(7 rows)
+
+-- Test that references to the removed rel in lateral subqueries are replaced
+-- correctly after join removal
+explain (verbose, costs off)
+select t3.a from sj t1
+   join sj t2 on t1.a = t2.a
+   join lateral (select t1.a offset 0) t3 on true;
+             QUERY PLAN             
+------------------------------------
+ Nested Loop
+   Output: (t2.a)
+   ->  Seq Scan on public.sj t2
+         Output: t2.a, t2.b, t2.c
+         Filter: (t2.a IS NOT NULL)
+   ->  Result
+         Output: t2.a
+(7 rows)
+
+explain (verbose, costs off)
+select t3.a from sj t1
+   join sj t2 on t1.a = t2.a
+   join lateral (select * from (select t1.a offset 0) offset 0) t3 on true;
+             QUERY PLAN             
+------------------------------------
+ Nested Loop
+   Output: (t2.a)
+   ->  Seq Scan on public.sj t2
+         Output: t2.a, t2.b, t2.c
+         Filter: (t2.a IS NOT NULL)
+   ->  Result
+         Output: t2.a
+(7 rows)
+
+explain (verbose, costs off)
+select t4.a from sj t1
+   join sj t2 on t1.a = t2.a
+   join lateral (select t3.a from sj t3, (select t1.a) offset 0) t4 on true;
+             QUERY PLAN             
+------------------------------------
+ Nested Loop
+   Output: t3.a
+   ->  Seq Scan on public.sj t2
+         Output: t2.a, t2.b, t2.c
+         Filter: (t2.a IS NOT NULL)
+   ->  Seq Scan on public.sj t3
+         Output: t3.a
+(7 rows)
+
+-- Check updating of semi_rhs_exprs links from upper-level semi join to
+-- the removing relation
+explain (verbose, costs off)
+select t1.a from sj t1 where t1.b in (
+  select t2.b from sj t2 join sj t3 on t2.c=t3.c);
+                QUERY PLAN                
+------------------------------------------
+ Nested Loop Semi Join
+   Output: t1.a
+   Join Filter: (t1.b = t3.b)
+   ->  Seq Scan on public.sj t1
+         Output: t1.a, t1.b, t1.c
+   ->  Materialize
+         Output: t3.c, t3.b
+         ->  Seq Scan on public.sj t3
+               Output: t3.c, t3.b
+               Filter: (t3.c IS NOT NULL)
+(10 rows)
+
+--
+-- SJE corner case: uniqueness of an inner is [partially] derived from
+-- baserestrictinfo clauses.
+-- XXX: We really should allow SJE for these corner cases?
+--
+INSERT INTO sj VALUES (3, 1, 3);
+-- Don't remove SJ
+EXPLAIN (COSTS OFF)
+SELECT * FROM sj j1, sj j2 WHERE j1.b = j2.b AND j1.a = 2 AND j2.a = 3;
+          QUERY PLAN          
+------------------------------
+ Nested Loop
+   Join Filter: (j1.b = j2.b)
+   ->  Seq Scan on sj j1
+         Filter: (a = 2)
+   ->  Seq Scan on sj j2
+         Filter: (a = 3)
+(6 rows)
+
+-- Return one row
+SELECT * FROM sj j1, sj j2 WHERE j1.b = j2.b AND j1.a = 2 AND j2.a = 3;
+ a | b | c | a | b | c 
+---+---+---+---+---+---
+ 2 | 1 | 1 | 3 | 1 | 3
+(1 row)
+
+-- Remove SJ, define uniqueness by a constant
+EXPLAIN (COSTS OFF)
+SELECT * FROM sj j1, sj j2 WHERE j1.b = j2.b AND j1.a = 2 AND j2.a = 2;
+               QUERY PLAN                
+-----------------------------------------
+ Seq Scan on sj j2
+   Filter: ((b IS NOT NULL) AND (a = 2))
+(2 rows)
+
+-- Return one row
+SELECT * FROM sj j1, sj j2 WHERE j1.b = j2.b AND j1.a = 2 AND j2.a = 2;
+ a | b | c | a | b | c 
+---+---+---+---+---+---
+ 2 | 1 | 1 | 2 | 1 | 1
+(1 row)
+
+-- Remove SJ, define uniqueness by a constant expression
+EXPLAIN (COSTS OFF)
+SELECT * FROM sj j1, sj j2
+WHERE j1.b = j2.b
+  AND j1.a = (EXTRACT(DOW FROM current_timestamp(0))/15 + 3)::int
+  AND (EXTRACT(DOW FROM current_timestamp(0))/15 + 3)::int = j2.a;
+                                                         QUERY PLAN                                                         
+----------------------------------------------------------------------------------------------------------------------------
+ Seq Scan on sj j2
+   Filter: ((b IS NOT NULL) AND (a = (((EXTRACT(dow FROM CURRENT_TIMESTAMP(0)) / '15'::numeric) + '3'::numeric))::integer))
+(2 rows)
+
+-- Return one row
+SELECT * FROM sj j1, sj j2
+WHERE j1.b = j2.b
+  AND j1.a = (EXTRACT(DOW FROM current_timestamp(0))/15 + 3)::int
+  AND (EXTRACT(DOW FROM current_timestamp(0))/15 + 3)::int = j2.a;
+ a | b | c | a | b | c 
+---+---+---+---+---+---
+ 3 | 1 | 3 | 3 | 1 | 3
+(1 row)
+
+-- Remove SJ
+EXPLAIN (COSTS OFF)
+SELECT * FROM sj j1, sj j2 WHERE j1.b = j2.b AND j1.a = 1 AND j2.a = 1;
+               QUERY PLAN                
+-----------------------------------------
+ Seq Scan on sj j2
+   Filter: ((b IS NOT NULL) AND (a = 1))
+(2 rows)
+
+-- Return no rows
+SELECT * FROM sj j1, sj j2 WHERE j1.b = j2.b AND j1.a = 1 AND j2.a = 1;
+ a | b | c | a | b | c 
+---+---+---+---+---+---
+(0 rows)
+
+-- Shuffle a clause. Remove SJ
+EXPLAIN (COSTS OFF)
+SELECT * FROM sj j1, sj j2 WHERE j1.b = j2.b AND 1 = j1.a AND j2.a = 1;
+               QUERY PLAN                
+-----------------------------------------
+ Seq Scan on sj j2
+   Filter: ((b IS NOT NULL) AND (a = 1))
+(2 rows)
+
+-- Return no rows
+SELECT * FROM sj j1, sj j2 WHERE j1.b = j2.b AND 1 = j1.a AND j2.a = 1;
+ a | b | c | a | b | c 
+---+---+---+---+---+---
+(0 rows)
+
+-- SJE Corner case: a 'a.x=a.x' clause, have replaced with 'a.x IS NOT NULL'
+-- after SJ elimination it shouldn't be a mergejoinable clause.
+EXPLAIN (COSTS OFF)
+SELECT t4.*
+FROM (SELECT t1.*, t2.a AS a1 FROM sj t1, sj t2 WHERE t1.b = t2.b) AS t3
+JOIN sj t4 ON (t4.a = t3.a) WHERE t3.a1 = 42;
+           QUERY PLAN            
+---------------------------------
+ Nested Loop
+   Join Filter: (t1.b = t2.b)
+   ->  Seq Scan on sj t2
+         Filter: (a = 42)
+   ->  Seq Scan on sj t1
+         Filter: (a IS NOT NULL)
+(6 rows)
+
+SELECT t4.*
+FROM (SELECT t1.*, t2.a AS a1 FROM sj t1, sj t2 WHERE t1.b = t2.b) AS t3
+JOIN sj t4 ON (t4.a = t3.a) WHERE t3.a1 = 42;
+ a | b | c 
+---+---+---
+(0 rows)
+
+-- Functional index
+CREATE UNIQUE INDEX sj_fn_idx ON sj((a * a));
+-- Remove SJ
+EXPLAIN (COSTS OFF)
+SELECT * FROM sj j1, sj j2
+   WHERE j1.b = j2.b AND j1.a*j1.a = 1 AND j2.a*j2.a = 1;
+                  QUERY PLAN                   
+-----------------------------------------------
+ Seq Scan on sj j2
+   Filter: ((b IS NOT NULL) AND ((a * a) = 1))
+(2 rows)
+
+-- Don't remove SJ
+EXPLAIN (COSTS OFF)
+SELECT * FROM sj j1, sj j2
+   WHERE j1.b = j2.b AND j1.a*j1.a = 1 AND j2.a*j2.a = 2;
+          QUERY PLAN           
+-------------------------------
+ Nested Loop
+   Join Filter: (j1.b = j2.b)
+   ->  Seq Scan on sj j1
+         Filter: ((a * a) = 1)
+   ->  Seq Scan on sj j2
+         Filter: ((a * a) = 2)
+(6 rows)
+
+-- Restriction contains expressions in both sides, Remove SJ.
+EXPLAIN (COSTS OFF)
+SELECT * FROM sj j1, sj j2
+WHERE j1.b = j2.b
+  AND (j1.a*j1.a) = (EXTRACT(DOW FROM current_timestamp(0))/15 + 3)::int
+  AND (EXTRACT(DOW FROM current_timestamp(0))/15 + 3)::int = (j2.a*j2.a);
+                                                            QUERY PLAN                                                            
+----------------------------------------------------------------------------------------------------------------------------------
+ Seq Scan on sj j2
+   Filter: ((b IS NOT NULL) AND ((a * a) = (((EXTRACT(dow FROM CURRENT_TIMESTAMP(0)) / '15'::numeric) + '3'::numeric))::integer))
+(2 rows)
+
+-- Empty set of rows should be returned
+SELECT * FROM sj j1, sj j2
+WHERE j1.b = j2.b
+  AND (j1.a*j1.a) = (EXTRACT(DOW FROM current_timestamp(0))/15 + 3)::int
+  AND (EXTRACT(DOW FROM current_timestamp(0))/15 + 3)::int = (j2.a*j2.a);
+ a | b | c | a | b | c 
+---+---+---+---+---+---
+(0 rows)
+
+-- Restriction contains volatile function - disable SJE feature.
+EXPLAIN (COSTS OFF)
+SELECT * FROM sj j1, sj j2
+WHERE j1.b = j2.b
+  AND (j1.a*j1.c/3) = (random()/3 + 3)::int
+  AND (random()/3 + 3)::int = (j2.a*j2.c/3);
+                                                QUERY PLAN                                                 
+-----------------------------------------------------------------------------------------------------------
+ Nested Loop
+   Join Filter: (j1.b = j2.b)
+   ->  Seq Scan on sj j1
+         Filter: (((a * c) / 3) = (((random() / '3'::double precision) + '3'::double precision))::integer)
+   ->  Seq Scan on sj j2
+         Filter: ((((random() / '3'::double precision) + '3'::double precision))::integer = ((a * c) / 3))
+(6 rows)
+
+-- Return one row
+SELECT * FROM sj j1, sj j2
+WHERE j1.b = j2.b
+  AND (j1.a*j1.c/3) = (random()/3 + 3)::int
+  AND (random()/3 + 3)::int = (j2.a*j2.c/3);
+ a | b | c | a | b | c 
+---+---+---+---+---+---
+ 3 | 1 | 3 | 3 | 1 | 3
+(1 row)
+
+-- Multiple filters
+CREATE UNIQUE INDEX sj_temp_idx1 ON sj(a,b,c);
+-- Remove SJ
+EXPLAIN (COSTS OFF)
+SELECT * FROM sj j1, sj j2
+   WHERE j1.b = j2.b AND j1.a = 2 AND j1.c = 3 AND j2.a = 2 AND 3 = j2.c;
+                     QUERY PLAN                      
+-----------------------------------------------------
+ Seq Scan on sj j2
+   Filter: ((b IS NOT NULL) AND (a = 2) AND (c = 3))
+(2 rows)
+
+-- Don't remove SJ
+EXPLAIN (COSTS OFF)
+   SELECT * FROM sj j1, sj j2
+   WHERE j1.b = j2.b AND 2 = j1.a AND j1.c = 3 AND j2.a = 1 AND 3 = j2.c;
+              QUERY PLAN               
+---------------------------------------
+ Nested Loop
+   Join Filter: (j1.b = j2.b)
+   ->  Seq Scan on sj j1
+         Filter: ((2 = a) AND (c = 3))
+   ->  Seq Scan on sj j2
+         Filter: ((c = 3) AND (a = 1))
+(6 rows)
+
+CREATE UNIQUE INDEX sj_temp_idx ON sj(a,b);
+-- Don't remove SJ
+EXPLAIN (COSTS OFF)
+SELECT * FROM sj j1, sj j2 WHERE j1.b = j2.b AND j1.a = 2;
+          QUERY PLAN          
+------------------------------
+ Nested Loop
+   Join Filter: (j1.b = j2.b)
+   ->  Seq Scan on sj j1
+         Filter: (a = 2)
+   ->  Seq Scan on sj j2
+(5 rows)
+
+-- Don't remove SJ
+EXPLAIN (COSTS OFF)
+SELECT * FROM sj j1, sj j2 WHERE j1.b = j2.b AND 2 = j2.a;
+          QUERY PLAN          
+------------------------------
+ Nested Loop
+   Join Filter: (j1.b = j2.b)
+   ->  Seq Scan on sj j2
+         Filter: (2 = a)
+   ->  Seq Scan on sj j1
+(5 rows)
+
+-- Don't remove SJ
+EXPLAIN (COSTS OFF)
+SELECT * FROM sj j1, sj j2 WHERE j1.b = j2.b AND (j1.a = 1 OR j2.a = 1);
+                          QUERY PLAN                           
+---------------------------------------------------------------
+ Nested Loop
+   Join Filter: ((j1.b = j2.b) AND ((j1.a = 1) OR (j2.a = 1)))
+   ->  Seq Scan on sj j1
+   ->  Materialize
+         ->  Seq Scan on sj j2
+(5 rows)
+
+DROP INDEX sj_fn_idx, sj_temp_idx1, sj_temp_idx;
+-- Test that OR predicated are updated correctly after join removal
+CREATE TABLE tab_with_flag ( id INT PRIMARY KEY, is_flag SMALLINT);
+CREATE INDEX idx_test_is_flag ON tab_with_flag (is_flag);
+EXPLAIN (COSTS OFF)
+SELECT COUNT(*) FROM tab_with_flag
+WHERE
+   (is_flag IS NULL OR is_flag = 0)
+   AND id IN (SELECT id FROM tab_with_flag WHERE id IN (2, 3));
+                        QUERY PLAN                         
+-----------------------------------------------------------
+ Aggregate
+   ->  Bitmap Heap Scan on tab_with_flag
+         Recheck Cond: (id = ANY ('{2,3}'::integer[]))
+         Filter: ((is_flag IS NULL) OR (is_flag = 0))
+         ->  Bitmap Index Scan on tab_with_flag_pkey
+               Index Cond: (id = ANY ('{2,3}'::integer[]))
+(6 rows)
+
+DROP TABLE tab_with_flag;
+-- HAVING clause
+explain (costs off)
+select p.b from sj p join sj q on p.a = q.a group by p.b having sum(p.a) = 1;
+           QUERY PLAN            
+---------------------------------
+ HashAggregate
+   Group Key: q.b
+   Filter: (sum(q.a) = 1)
+   ->  Seq Scan on sj q
+         Filter: (a IS NOT NULL)
+(5 rows)
+
+-- update lateral references and range table entry reference
+explain (verbose, costs off)
+select 1 from (select x.* from sj x, sj y where x.a = y.a) q,
+  lateral generate_series(1, q.a) gs(i);
+                      QUERY PLAN                      
+------------------------------------------------------
+ Nested Loop
+   Output: 1
+   ->  Seq Scan on public.sj y
+         Output: y.a, y.b, y.c
+         Filter: (y.a IS NOT NULL)
+   ->  Function Scan on pg_catalog.generate_series gs
+         Output: gs.i
+         Function Call: generate_series(1, y.a)
+(8 rows)
+
+explain (verbose, costs off)
+select 1 from (select y.* from sj x, sj y where x.a = y.a) q,
+  lateral generate_series(1, q.a) gs(i);
+                      QUERY PLAN                      
+------------------------------------------------------
+ Nested Loop
+   Output: 1
+   ->  Seq Scan on public.sj y
+         Output: y.a, y.b, y.c
+         Filter: (y.a IS NOT NULL)
+   ->  Function Scan on pg_catalog.generate_series gs
+         Output: gs.i
+         Function Call: generate_series(1, y.a)
+(8 rows)
+
+-- Test that a non-EC-derived join clause is processed correctly. Use an
+-- outer join so that we can't form an EC.
+explain (costs off) select * from sj p join sj q on p.a = q.a
+  left join sj r on p.a + q.a = r.a;
+             QUERY PLAN             
+------------------------------------
+ Nested Loop Left Join
+   Join Filter: ((q.a + q.a) = r.a)
+   ->  Seq Scan on sj q
+         Filter: (a IS NOT NULL)
+   ->  Materialize
+         ->  Seq Scan on sj r
+(6 rows)
+
+-- FIXME this constant false filter doesn't look good. Should we merge
+-- equivalence classes?
+explain (costs off)
+select * from sj p, sj q where p.a = q.a and p.b = 1 and q.b = 2;
+                     QUERY PLAN                      
+-----------------------------------------------------
+ Seq Scan on sj q
+   Filter: ((a IS NOT NULL) AND (b = 2) AND (b = 1))
+(2 rows)
+
+-- Check that attr_needed is updated correctly after self-join removal. In this
+-- test, the join of j1 with j2 is removed. k1.b is required at either j1 or j2.
+-- If this info is lost, join targetlist for (k1, k2) will not contain k1.b.
+-- Use index scan for k1 so that we don't get 'b' from physical tlist used for
+-- seqscan. Also disable reordering of joins because this test depends on a
+-- particular join tree.
+create table sk (a int, b int);
+create index on sk(a);
+set join_collapse_limit to 1;
+set enable_seqscan to off;
+explain (costs off) select 1 from
+   (sk k1 join sk k2 on k1.a = k2.a)
+   join (sj j1 join sj j2 on j1.a = j2.a) on j1.b = k1.b;
+                     QUERY PLAN                      
+-----------------------------------------------------
+ Nested Loop
+   Join Filter: (k1.b = j2.b)
+   ->  Nested Loop
+         ->  Index Scan using sk_a_idx on sk k1
+         ->  Index Only Scan using sk_a_idx on sk k2
+               Index Cond: (a = k1.a)
+   ->  Materialize
+         ->  Index Scan using sj_a_key on sj j2
+               Index Cond: (a IS NOT NULL)
+(9 rows)
+
+explain (costs off) select 1 from
+   (sk k1 join sk k2 on k1.a = k2.a)
+   join (sj j1 join sj j2 on j1.a = j2.a) on j2.b = k1.b;
+                     QUERY PLAN                      
+-----------------------------------------------------
+ Nested Loop
+   Join Filter: (k1.b = j2.b)
+   ->  Nested Loop
+         ->  Index Scan using sk_a_idx on sk k1
+         ->  Index Only Scan using sk_a_idx on sk k2
+               Index Cond: (a = k1.a)
+   ->  Materialize
+         ->  Index Scan using sj_a_key on sj j2
+               Index Cond: (a IS NOT NULL)
+(9 rows)
+
+reset join_collapse_limit;
+reset enable_seqscan;
+-- Check that clauses from the join filter list is not lost on the self-join removal
+CREATE TABLE emp1 (id SERIAL PRIMARY KEY NOT NULL, code int);
+EXPLAIN (VERBOSE, COSTS OFF)
+SELECT * FROM emp1 e1, emp1 e2 WHERE e1.id = e2.id AND e2.code <> e1.code;
+                QUERY PLAN                
+------------------------------------------
+ Seq Scan on public.emp1 e2
+   Output: e2.id, e2.code, e2.id, e2.code
+   Filter: (e2.code <> e2.code)
+(3 rows)
+
+-- Shuffle self-joined relations. Only in the case of iterative deletion
+-- attempts explains of these queries will be identical.
+CREATE UNIQUE INDEX ON emp1((id*id));
+EXPLAIN (COSTS OFF)
+SELECT count(*) FROM emp1 c1, emp1 c2, emp1 c3
+WHERE c1.id=c2.id AND c1.id*c2.id=c3.id*c3.id;
+               QUERY PLAN                
+-----------------------------------------
+ Aggregate
+   ->  Seq Scan on emp1 c3
+         Filter: ((id * id) IS NOT NULL)
+(3 rows)
+
+EXPLAIN (COSTS OFF)
+SELECT count(*) FROM emp1 c1, emp1 c2, emp1 c3
+WHERE c1.id=c3.id AND c1.id*c3.id=c2.id*c2.id;
+               QUERY PLAN                
+-----------------------------------------
+ Aggregate
+   ->  Seq Scan on emp1 c3
+         Filter: ((id * id) IS NOT NULL)
+(3 rows)
+
+EXPLAIN (COSTS OFF)
+SELECT count(*) FROM emp1 c1, emp1 c2, emp1 c3
+WHERE c3.id=c2.id AND c3.id*c2.id=c1.id*c1.id;
+               QUERY PLAN                
+-----------------------------------------
+ Aggregate
+   ->  Seq Scan on emp1 c3
+         Filter: ((id * id) IS NOT NULL)
+(3 rows)
+
+-- Check the usage of a parse tree by the set operations (bug #18170)
+EXPLAIN (COSTS OFF)
+SELECT c1.code FROM emp1 c1 LEFT JOIN emp1 c2 ON c1.id = c2.id
+WHERE c2.id IS NOT NULL
+EXCEPT ALL
+SELECT c3.code FROM emp1 c3;
+        QUERY PLAN         
+---------------------------
+ HashSetOp Except All
+   ->  Seq Scan on emp1 c2
+   ->  Seq Scan on emp1 c3
+(3 rows)
+
+-- Check that SJE removes references from PHVs correctly
+explain (costs off)
+select * from emp1 t1 left join
+    (select coalesce(t3.code, 1) from emp1 t2
+        left join (emp1 t3 join emp1 t4 on t3.id = t4.id)
+        on true)
+on true;
+                 QUERY PLAN                  
+---------------------------------------------
+ Nested Loop Left Join
+   ->  Seq Scan on emp1 t1
+   ->  Materialize
+         ->  Nested Loop Left Join
+               ->  Seq Scan on emp1 t2
+               ->  Materialize
+                     ->  Seq Scan on emp1 t4
+(7 rows)
+
+-- Check that SJE removes the whole PHVs correctly
+explain (verbose, costs off)
+select 1 from emp1 t1 left join
+    ((select 1 as x, * from emp1 t2) s1 inner join
+        (select * from emp1 t3) s2 on s1.id = s2.id)
+    on true
+where s1.x = 1;
+               QUERY PLAN               
+----------------------------------------
+ Nested Loop
+   Output: 1
+   ->  Seq Scan on public.emp1 t1
+         Output: t1.id, t1.code
+   ->  Materialize
+         Output: t3.id
+         ->  Seq Scan on public.emp1 t3
+               Output: t3.id
+               Filter: (1 = 1)
+(9 rows)
+
+-- Check that PHVs do not impose any constraints on removing self joins
+explain (verbose, costs off)
+select * from emp1 t1 join emp1 t2 on t1.id = t2.id left join
+    lateral (select t1.id as t1id, * from generate_series(1,1) t3) s on true;
+                        QUERY PLAN                        
+----------------------------------------------------------
+ Nested Loop Left Join
+   Output: t2.id, t2.code, t2.id, t2.code, (t2.id), t3.t3
+   ->  Seq Scan on public.emp1 t2
+         Output: t2.id, t2.code
+   ->  Function Scan on pg_catalog.generate_series t3
+         Output: t3.t3, t2.id
+         Function Call: generate_series(1, 1)
+(7 rows)
+
+explain (verbose, costs off)
+select * from generate_series(1,10) t1(id) left join
+    lateral (select t1.id as t1id, t2.id from emp1 t2 join emp1 t3 on t2.id = t3.id)
+on true;
+                      QUERY PLAN                      
+------------------------------------------------------
+ Nested Loop Left Join
+   Output: t1.id, (t1.id), t3.id
+   ->  Function Scan on pg_catalog.generate_series t1
+         Output: t1.id
+         Function Call: generate_series(1, 10)
+   ->  Seq Scan on public.emp1 t3
+         Output: t3.id, t1.id
+(7 rows)
+
+-- Check that SJE replaces join clauses involving the removed rel correctly
+explain (costs off)
+select * from emp1 t1
+   inner join emp1 t2 on t1.id = t2.id
+    left join emp1 t3 on t1.id > 1 and t1.id < 2;
+                  QUERY PLAN                  
+----------------------------------------------
+ Nested Loop Left Join
+   Join Filter: ((t2.id > 1) AND (t2.id < 2))
+   ->  Seq Scan on emp1 t2
+   ->  Materialize
+         ->  Seq Scan on emp1 t3
+(5 rows)
+
+-- Check that SJE doesn't replace the target relation
+EXPLAIN (COSTS OFF)
+WITH t1 AS (SELECT * FROM emp1)
+UPDATE emp1 SET code = t1.code + 1 FROM t1
+WHERE t1.id = emp1.id RETURNING emp1.id, emp1.code, t1.code;
+                      QUERY PLAN                       
+-------------------------------------------------------
+ Update on emp1
+   ->  Nested Loop
+         ->  Seq Scan on emp1
+         ->  Index Scan using emp1_pkey on emp1 emp1_1
+               Index Cond: (id = emp1.id)
+(5 rows)
+
+INSERT INTO emp1 VALUES (1, 1), (2, 1);
+WITH t1 AS (SELECT * FROM emp1)
+UPDATE emp1 SET code = t1.code + 1 FROM t1
+WHERE t1.id = emp1.id RETURNING emp1.id, emp1.code, t1.code;
+ id | code | code 
+----+------+------
+  1 |    2 |    1
+  2 |    2 |    1
+(2 rows)
+
+TRUNCATE emp1;
+EXPLAIN (COSTS OFF)
+UPDATE sj sq SET b = 1 FROM sj as sz WHERE sq.a = sz.a;
+             QUERY PLAN              
+-------------------------------------
+ Update on sj sq
+   ->  Nested Loop
+         Join Filter: (sq.a = sz.a)
+         ->  Seq Scan on sj sq
+         ->  Materialize
+               ->  Seq Scan on sj sz
+(6 rows)
+
+CREATE RULE sj_del_rule AS ON DELETE TO sj
+  DO INSTEAD
+    UPDATE sj SET a = 1 WHERE a = old.a;
+EXPLAIN (COSTS OFF) DELETE FROM sj;
+              QUERY PLAN              
+--------------------------------------
+ Update on sj sj_1
+   ->  Nested Loop
+         Join Filter: (sj.a = sj_1.a)
+         ->  Seq Scan on sj sj_1
+         ->  Materialize
+               ->  Seq Scan on sj
+(6 rows)
+
+DROP RULE sj_del_rule ON sj CASCADE;
+-- Check that SJE does not mistakenly omit qual clauses (bug #18187)
+insert into emp1 values (1, 1);
+explain (costs off)
+select 1 from emp1 full join
+    (select * from emp1 t1 join
+        emp1 t2 join emp1 t3 on t2.id = t3.id
+        on true
+    where false) s on true
+where false;
+        QUERY PLAN        
+--------------------------
+ Result
+   One-Time Filter: false
+(2 rows)
+
+select 1 from emp1 full join
+    (select * from emp1 t1 join
+        emp1 t2 join emp1 t3 on t2.id = t3.id
+        on true
+    where false) s on true
+where false;
+ ?column? 
+----------
+(0 rows)
+
+-- Check that SJE does not mistakenly re-use knowledge of relation uniqueness
+-- made with different set of quals
+insert into emp1 values (2, 1);
+explain (costs off)
+select * from emp1 t1 where exists (select * from emp1 t2
+                                    where t2.id = t1.code and t2.code > 0);
+                 QUERY PLAN                  
+---------------------------------------------
+ Nested Loop
+   ->  Seq Scan on emp1 t1
+   ->  Index Scan using emp1_pkey on emp1 t2
+         Index Cond: (id = t1.code)
+         Filter: (code > 0)
+(5 rows)
+
+select * from emp1 t1 where exists (select * from emp1 t2
+                                    where t2.id = t1.code and t2.code > 0);
+ id | code 
+----+------
+  1 |    1
+  2 |    1
+(2 rows)
+
+-- We can remove the join even if we find the join can't duplicate rows and
+-- the base quals of each side are different.  In the following case we end up
+-- moving quals over to s1 to make it so it can't match any rows.
+create table sl(a int, b int, c int);
+create unique index on sl(a, b);
+vacuum analyze sl;
+-- Both sides are unique, but base quals are different
+explain (costs off)
+select * from sl t1, sl t2 where t1.a = t2.a and t1.b = 1 and t2.b = 2;
+          QUERY PLAN          
+------------------------------
+ Nested Loop
+   Join Filter: (t1.a = t2.a)
+   ->  Seq Scan on sl t1
+         Filter: (b = 1)
+   ->  Seq Scan on sl t2
+         Filter: (b = 2)
+(6 rows)
+
+-- Check NullTest in baserestrictinfo list
+explain (costs off)
+select * from sl t1, sl t2
+where t1.a = t2.a and t1.b = 1 and t2.b = 2
+  and t1.c IS NOT NULL and t2.c IS NOT NULL
+  and t2.b IS NOT NULL and t1.b IS NOT NULL
+  and t1.a IS NOT NULL and t2.a IS NOT NULL;
+                                      QUERY PLAN                                       
+---------------------------------------------------------------------------------------
+ Nested Loop
+   Join Filter: (t1.a = t2.a)
+   ->  Seq Scan on sl t1
+         Filter: ((c IS NOT NULL) AND (b IS NOT NULL) AND (a IS NOT NULL) AND (b = 1))
+   ->  Seq Scan on sl t2
+         Filter: ((c IS NOT NULL) AND (b IS NOT NULL) AND (a IS NOT NULL) AND (b = 2))
+(6 rows)
+
+explain (verbose, costs off)
+select * from sl t1, sl t2
+where t1.b = t2.b and t2.a = 3 and t1.a = 3
+  and t1.c IS NOT NULL and t2.c IS NOT NULL
+  and t2.b IS NOT NULL and t1.b IS NOT NULL
+  and t1.a IS NOT NULL and t2.a IS NOT NULL;
+                                         QUERY PLAN                                          
+---------------------------------------------------------------------------------------------
+ Seq Scan on public.sl t2
+   Output: t2.a, t2.b, t2.c, t2.a, t2.b, t2.c
+   Filter: ((t2.c IS NOT NULL) AND (t2.b IS NOT NULL) AND (t2.a IS NOT NULL) AND (t2.a = 3))
+(3 rows)
+
+-- Join qual isn't mergejoinable, but inner is unique.
+EXPLAIN (COSTS OFF)
+SELECT n2.a FROM sj n1, sj n2 WHERE n1.a <> n2.a AND n2.a = 1;
+          QUERY PLAN           
+-------------------------------
+ Nested Loop
+   Join Filter: (n1.a <> n2.a)
+   ->  Seq Scan on sj n2
+         Filter: (a = 1)
+   ->  Seq Scan on sj n1
+(5 rows)
+
+EXPLAIN (COSTS OFF)
+SELECT * FROM
+(SELECT n2.a FROM sj n1, sj n2 WHERE n1.a <> n2.a) q0, sl
+WHERE q0.a = 1;
+          QUERY PLAN           
+-------------------------------
+ Nested Loop
+   Join Filter: (n1.a <> n2.a)
+   ->  Nested Loop
+         ->  Seq Scan on sl
+         ->  Seq Scan on sj n2
+               Filter: (a = 1)
+   ->  Seq Scan on sj n1
+(7 rows)
+
+-- Check optimization disabling if it will violate special join conditions.
+-- Two identical joined relations satisfies self join removal conditions but
+-- stay in different special join infos.
+CREATE TABLE sj_t1 (id serial, a int);
+CREATE TABLE sj_t2 (id serial, a int);
+CREATE TABLE sj_t3 (id serial, a int);
+CREATE TABLE sj_t4 (id serial, a int);
+CREATE UNIQUE INDEX ON sj_t3 USING btree (a,id);
+CREATE UNIQUE INDEX ON sj_t2 USING btree (id);
+EXPLAIN (COSTS OFF)
+SELECT * FROM sj_t1
+JOIN (
+   SELECT sj_t2.id AS id FROM sj_t2
+   WHERE EXISTS
+       (
+       SELECT TRUE FROM sj_t3,sj_t4 WHERE sj_t3.a = 1 AND sj_t3.id = sj_t2.id
+       )
+   ) t2t3t4
+ON sj_t1.id = t2t3t4.id
+JOIN (
+   SELECT sj_t2.id AS id FROM sj_t2
+   WHERE EXISTS
+       (
+       SELECT TRUE FROM sj_t3,sj_t4 WHERE sj_t3.a = 1 AND sj_t3.id = sj_t2.id
+       )
+   ) _t2t3t4
+ON sj_t1.id = _t2t3t4.id;
+                                     QUERY PLAN                                      
+-------------------------------------------------------------------------------------
+ Nested Loop
+   Join Filter: (sj_t3.id = sj_t1.id)
+   ->  Nested Loop
+         Join Filter: (sj_t2.id = sj_t3.id)
+         ->  Nested Loop Semi Join
+               ->  Nested Loop
+                     ->  HashAggregate
+                           Group Key: sj_t3.id
+                           ->  Nested Loop
+                                 ->  Seq Scan on sj_t4
+                                 ->  Materialize
+                                       ->  Bitmap Heap Scan on sj_t3
+                                             Recheck Cond: (a = 1)
+                                             ->  Bitmap Index Scan on sj_t3_a_id_idx
+                                                   Index Cond: (a = 1)
+                     ->  Index Only Scan using sj_t2_id_idx on sj_t2 sj_t2_1
+                           Index Cond: (id = sj_t3.id)
+               ->  Nested Loop
+                     ->  Index Only Scan using sj_t3_a_id_idx on sj_t3 sj_t3_1
+                           Index Cond: ((a = 1) AND (id = sj_t3.id))
+                     ->  Seq Scan on sj_t4 sj_t4_1
+         ->  Index Only Scan using sj_t2_id_idx on sj_t2
+               Index Cond: (id = sj_t2_1.id)
+   ->  Seq Scan on sj_t1
+(24 rows)
+
+--
+-- Test RowMarks-related code
+--
+-- Both sides have explicit LockRows marks
+EXPLAIN (COSTS OFF)
+SELECT a1.a FROM sj a1,sj a2 WHERE (a1.a=a2.a) FOR UPDATE;
+           QUERY PLAN            
+---------------------------------
+ LockRows
+   ->  Seq Scan on sj a2
+         Filter: (a IS NOT NULL)
+(3 rows)
+
+reset enable_hashjoin;
+reset enable_mergejoin;
  --
  -- Test hints given on incorrect column references are useful
  --
diff --git a/src/test/regress/expected/sysviews.out b/src/test/regress/expected/sysviews.out

index 352abc0bd424ec445f52c2450d9a4b80ac0b61c1..83228cfca293fd8d30fb900d1a8d40bd354493f2 100644 (file)
--- a/src/test/regress/expected/sysviews.out
+++ b/src/test/regress/expected/sysviews.out
@@ -168,10 +168,11 @@ select name, setting from pg_settings where name like 'enable%';
   enable_partitionwise_aggregate | off
   enable_partitionwise_join      | off
   enable_presorted_aggregate     | on
+ enable_self_join_elimination   | on
   enable_seqscan                 | on
   enable_sort                    | on
   enable_tidscan                 | on
-(23 rows)
+(24 rows)
  
  -- There are always wait event descriptions for various types.  InjectionPoint
  -- may be present or absent, depending on history since last postmaster start.
diff --git a/src/test/regress/sql/equivclass.sql b/src/test/regress/sql/equivclass.sql

index 28ed7910d014e44bca138c7b424bc3264753f3d5..7fc2159349b6cb9b8481d3d2eb5e9cdbfd5dac20 100644 (file)
--- a/src/test/regress/sql/equivclass.sql
+++ b/src/test/regress/sql/equivclass.sql
@@ -259,6 +259,22 @@ drop user regress_user_ectest;
  explain (costs off)
    select * from tenk1 where unique1 = unique1 and unique2 = unique2;
  
+-- Test that broken ECs are processed correctly during self join removal.
+-- Disable merge joins so that we don't get an error about missing commutator.
+-- Test both orientations of the join clause, because only one of them breaks
+-- the EC.
+set enable_mergejoin to off;
+
+explain (costs off)
+  select * from ec0 m join ec0 n on m.ff = n.ff
+  join ec1 p on m.ff + n.ff = p.f1;
+
+explain (costs off)
+  select * from ec0 m join ec0 n on m.ff = n.ff
+  join ec1 p on p.f1::int8 = (m.ff + n.ff)::int8alias1;
+
+reset enable_mergejoin;
+
  -- this could be converted, but isn't at present
  explain (costs off)
    select * from tenk1 where unique1 = unique1 or unique2 = unique2;
diff --git a/src/test/regress/sql/join.sql b/src/test/regress/sql/join.sql

index c7349eab933d936b6e6d4478ac9df798598620d8..c29d13b9fedae929d6b664ddcb6bfc6ddf06feeb 100644 (file)
--- a/src/test/regress/sql/join.sql
+++ b/src/test/regress/sql/join.sql
@@ -2175,6 +2175,17 @@ select c.id, ss.a from c
    left join (select d.a from onerow, d left join b on d.a = b.id) ss
    on c.id = ss.a;
  
+-- check the case when the placeholder relates to an outer join and its
+-- inner in the press field but actually uses only the outer side of the join
+explain (costs off)
+SELECT q.val FROM b LEFT JOIN (
+  SELECT (q1.z IS NOT NULL) AS val
+  FROM b LEFT JOIN (
+    SELECT (t1.b_id IS NOT NULL) AS z FROM a t1 LEFT JOIN a t2 USING (id)
+    ) AS q1
+  ON true
+) AS q ON true;
+
  CREATE TEMP TABLE parted_b (id int PRIMARY KEY) partition by range(id);
  CREATE TEMP TABLE parted_b1 partition of parted_b for values from (0) to (10);
  
@@ -2409,6 +2420,489 @@ select * from
  select * from
    int8_tbl x join (int4_tbl x cross join int4_tbl y(ff)) j on q1 = f1; -- ok
  
+--
+-- test that semi- or inner self-joins on a unique column are removed
+--
+
+-- enable only nestloop to get more predictable plans
+set enable_hashjoin to off;
+set enable_mergejoin to off;
+
+create table sj (a int unique, b int, c int unique);
+insert into sj values (1, null, 2), (null, 2, null), (2, 1, 1);
+analyze sj;
+
+-- Trivial self-join case.
+explain (costs off)
+select p.* from sj p, sj q where q.a = p.a and q.b = q.a - 1;
+select p.* from sj p, sj q where q.a = p.a and q.b = q.a - 1;
+
+-- Self-join removal performs after a subquery pull-up process and could remove
+-- such kind of self-join too. Check this option.
+explain (costs off)
+select * from sj p
+where exists (select * from sj q
+              where q.a = p.a and q.b < 10);
+select * from sj p
+where exists (select * from sj q
+              where q.a = p.a and q.b < 10);
+
+-- Don't remove self-join for the case of equality of two different unique columns.
+explain (costs off)
+select * from sj t1, sj t2 where t1.a = t2.c and t1.b is not null;
+
+-- Ensure that relations with TABLESAMPLE clauses are not considered as
+-- candidates to be removed
+explain (costs off)
+select * from sj t1
+    join lateral
+      (select * from sj tablesample system(t1.b)) s
+    on t1.a = s.a;
+
+-- Ensure that SJE does not form a self-referential lateral dependency
+explain (costs off)
+select * from sj t1
+    left join lateral
+      (select t1.a as t1a, * from sj t2) s
+    on true
+where t1.a = s.a;
+
+-- Degenerated case.
+explain (costs off)
+select * from
+  (select a as x from sj where false) as q1,
+  (select a as y from sj where false) as q2
+where q1.x = q2.y;
+
+-- We can't use a cross-EC generated self join qual because of current logic of
+-- the generate_join_implied_equalities routine.
+explain (costs off)
+select * from sj t1, sj t2 where t1.a = t1.b and t1.b = t2.b and t2.b = t2.a;
+explain (costs off)
+select * from sj t1, sj t2, sj t3
+where t1.a = t1.b and t1.b = t2.b and t2.b = t2.a and
+      t1.b = t3.b and t3.b = t3.a;
+
+-- Double self-join removal.
+-- Use a condition on "b + 1", not on "b", for the second join, so that
+-- the equivalence class is different from the first one, and we can
+-- test the non-ec code path.
+explain (costs off)
+select *
+from  sj t1
+      join sj t2 on t1.a = t2.a and t1.b = t2.b
+     join sj t3 on t2.a = t3.a and t2.b + 1 = t3.b + 1;
+
+-- subselect that references the removed relation
+explain (costs off)
+select t1.a, (select a from sj where a = t2.a and a = t1.a)
+from sj t1, sj t2
+where t1.a = t2.a;
+
+-- self-join under outer join
+explain (costs off)
+select * from sj x join sj y on x.a = y.a
+left join int8_tbl z on x.a = z.q1;
+
+explain (costs off)
+select * from sj x join sj y on x.a = y.a
+left join int8_tbl z on y.a = z.q1;
+
+explain (costs off)
+select * from (
+  select t1.*, t2.a as ax from sj t1 join sj t2
+  on (t1.a = t2.a and t1.c * t1.c = t2.c + 2 and t2.b is null)
+) as q1
+left join
+  (select t3.* from sj t3, sj t4 where t3.c = t4.c) as q2
+on q1.ax = q2.a;
+
+-- Test that placeholders are updated correctly after join removal
+explain (costs off)
+select * from (values (1)) x
+left join (select coalesce(y.q1, 1) from int8_tbl y
+   right join sj j1 inner join sj j2 on j1.a = j2.a
+   on true) z
+on true;
+
+-- Test that references to the removed rel in lateral subqueries are replaced
+-- correctly after join removal
+explain (verbose, costs off)
+select t3.a from sj t1
+   join sj t2 on t1.a = t2.a
+   join lateral (select t1.a offset 0) t3 on true;
+
+explain (verbose, costs off)
+select t3.a from sj t1
+   join sj t2 on t1.a = t2.a
+   join lateral (select * from (select t1.a offset 0) offset 0) t3 on true;
+
+explain (verbose, costs off)
+select t4.a from sj t1
+   join sj t2 on t1.a = t2.a
+   join lateral (select t3.a from sj t3, (select t1.a) offset 0) t4 on true;
+
+-- Check updating of semi_rhs_exprs links from upper-level semi join to
+-- the removing relation
+explain (verbose, costs off)
+select t1.a from sj t1 where t1.b in (
+  select t2.b from sj t2 join sj t3 on t2.c=t3.c);
+
+--
+-- SJE corner case: uniqueness of an inner is [partially] derived from
+-- baserestrictinfo clauses.
+-- XXX: We really should allow SJE for these corner cases?
+--
+
+INSERT INTO sj VALUES (3, 1, 3);
+
+-- Don't remove SJ
+EXPLAIN (COSTS OFF)
+SELECT * FROM sj j1, sj j2 WHERE j1.b = j2.b AND j1.a = 2 AND j2.a = 3;
+-- Return one row
+SELECT * FROM sj j1, sj j2 WHERE j1.b = j2.b AND j1.a = 2 AND j2.a = 3;
+
+-- Remove SJ, define uniqueness by a constant
+EXPLAIN (COSTS OFF)
+SELECT * FROM sj j1, sj j2 WHERE j1.b = j2.b AND j1.a = 2 AND j2.a = 2;
+-- Return one row
+SELECT * FROM sj j1, sj j2 WHERE j1.b = j2.b AND j1.a = 2 AND j2.a = 2;
+
+-- Remove SJ, define uniqueness by a constant expression
+EXPLAIN (COSTS OFF)
+SELECT * FROM sj j1, sj j2
+WHERE j1.b = j2.b
+  AND j1.a = (EXTRACT(DOW FROM current_timestamp(0))/15 + 3)::int
+  AND (EXTRACT(DOW FROM current_timestamp(0))/15 + 3)::int = j2.a;
+-- Return one row
+SELECT * FROM sj j1, sj j2
+WHERE j1.b = j2.b
+  AND j1.a = (EXTRACT(DOW FROM current_timestamp(0))/15 + 3)::int
+  AND (EXTRACT(DOW FROM current_timestamp(0))/15 + 3)::int = j2.a;
+
+-- Remove SJ
+EXPLAIN (COSTS OFF)
+SELECT * FROM sj j1, sj j2 WHERE j1.b = j2.b AND j1.a = 1 AND j2.a = 1;
+-- Return no rows
+SELECT * FROM sj j1, sj j2 WHERE j1.b = j2.b AND j1.a = 1 AND j2.a = 1;
+
+-- Shuffle a clause. Remove SJ
+EXPLAIN (COSTS OFF)
+SELECT * FROM sj j1, sj j2 WHERE j1.b = j2.b AND 1 = j1.a AND j2.a = 1;
+-- Return no rows
+SELECT * FROM sj j1, sj j2 WHERE j1.b = j2.b AND 1 = j1.a AND j2.a = 1;
+
+-- SJE Corner case: a 'a.x=a.x' clause, have replaced with 'a.x IS NOT NULL'
+-- after SJ elimination it shouldn't be a mergejoinable clause.
+EXPLAIN (COSTS OFF)
+SELECT t4.*
+FROM (SELECT t1.*, t2.a AS a1 FROM sj t1, sj t2 WHERE t1.b = t2.b) AS t3
+JOIN sj t4 ON (t4.a = t3.a) WHERE t3.a1 = 42;
+SELECT t4.*
+FROM (SELECT t1.*, t2.a AS a1 FROM sj t1, sj t2 WHERE t1.b = t2.b) AS t3
+JOIN sj t4 ON (t4.a = t3.a) WHERE t3.a1 = 42;
+
+-- Functional index
+CREATE UNIQUE INDEX sj_fn_idx ON sj((a * a));
+
+-- Remove SJ
+EXPLAIN (COSTS OFF)
+SELECT * FROM sj j1, sj j2
+   WHERE j1.b = j2.b AND j1.a*j1.a = 1 AND j2.a*j2.a = 1;
+-- Don't remove SJ
+EXPLAIN (COSTS OFF)
+SELECT * FROM sj j1, sj j2
+   WHERE j1.b = j2.b AND j1.a*j1.a = 1 AND j2.a*j2.a = 2;
+
+-- Restriction contains expressions in both sides, Remove SJ.
+EXPLAIN (COSTS OFF)
+SELECT * FROM sj j1, sj j2
+WHERE j1.b = j2.b
+  AND (j1.a*j1.a) = (EXTRACT(DOW FROM current_timestamp(0))/15 + 3)::int
+  AND (EXTRACT(DOW FROM current_timestamp(0))/15 + 3)::int = (j2.a*j2.a);
+-- Empty set of rows should be returned
+SELECT * FROM sj j1, sj j2
+WHERE j1.b = j2.b
+  AND (j1.a*j1.a) = (EXTRACT(DOW FROM current_timestamp(0))/15 + 3)::int
+  AND (EXTRACT(DOW FROM current_timestamp(0))/15 + 3)::int = (j2.a*j2.a);
+
+-- Restriction contains volatile function - disable SJE feature.
+EXPLAIN (COSTS OFF)
+SELECT * FROM sj j1, sj j2
+WHERE j1.b = j2.b
+  AND (j1.a*j1.c/3) = (random()/3 + 3)::int
+  AND (random()/3 + 3)::int = (j2.a*j2.c/3);
+-- Return one row
+SELECT * FROM sj j1, sj j2
+WHERE j1.b = j2.b
+  AND (j1.a*j1.c/3) = (random()/3 + 3)::int
+  AND (random()/3 + 3)::int = (j2.a*j2.c/3);
+
+-- Multiple filters
+CREATE UNIQUE INDEX sj_temp_idx1 ON sj(a,b,c);
+
+-- Remove SJ
+EXPLAIN (COSTS OFF)
+SELECT * FROM sj j1, sj j2
+   WHERE j1.b = j2.b AND j1.a = 2 AND j1.c = 3 AND j2.a = 2 AND 3 = j2.c;
+
+-- Don't remove SJ
+EXPLAIN (COSTS OFF)
+   SELECT * FROM sj j1, sj j2
+   WHERE j1.b = j2.b AND 2 = j1.a AND j1.c = 3 AND j2.a = 1 AND 3 = j2.c;
+
+CREATE UNIQUE INDEX sj_temp_idx ON sj(a,b);
+
+-- Don't remove SJ
+EXPLAIN (COSTS OFF)
+SELECT * FROM sj j1, sj j2 WHERE j1.b = j2.b AND j1.a = 2;
+
+-- Don't remove SJ
+EXPLAIN (COSTS OFF)
+SELECT * FROM sj j1, sj j2 WHERE j1.b = j2.b AND 2 = j2.a;
+
+-- Don't remove SJ
+EXPLAIN (COSTS OFF)
+SELECT * FROM sj j1, sj j2 WHERE j1.b = j2.b AND (j1.a = 1 OR j2.a = 1);
+
+DROP INDEX sj_fn_idx, sj_temp_idx1, sj_temp_idx;
+
+-- Test that OR predicated are updated correctly after join removal
+CREATE TABLE tab_with_flag ( id INT PRIMARY KEY, is_flag SMALLINT);
+CREATE INDEX idx_test_is_flag ON tab_with_flag (is_flag);
+
+EXPLAIN (COSTS OFF)
+SELECT COUNT(*) FROM tab_with_flag
+WHERE
+   (is_flag IS NULL OR is_flag = 0)
+   AND id IN (SELECT id FROM tab_with_flag WHERE id IN (2, 3));
+DROP TABLE tab_with_flag;
+
+-- HAVING clause
+explain (costs off)
+select p.b from sj p join sj q on p.a = q.a group by p.b having sum(p.a) = 1;
+
+-- update lateral references and range table entry reference
+explain (verbose, costs off)
+select 1 from (select x.* from sj x, sj y where x.a = y.a) q,
+  lateral generate_series(1, q.a) gs(i);
+
+explain (verbose, costs off)
+select 1 from (select y.* from sj x, sj y where x.a = y.a) q,
+  lateral generate_series(1, q.a) gs(i);
+
+-- Test that a non-EC-derived join clause is processed correctly. Use an
+-- outer join so that we can't form an EC.
+explain (costs off) select * from sj p join sj q on p.a = q.a
+  left join sj r on p.a + q.a = r.a;
+
+-- FIXME this constant false filter doesn't look good. Should we merge
+-- equivalence classes?
+explain (costs off)
+select * from sj p, sj q where p.a = q.a and p.b = 1 and q.b = 2;
+
+-- Check that attr_needed is updated correctly after self-join removal. In this
+-- test, the join of j1 with j2 is removed. k1.b is required at either j1 or j2.
+-- If this info is lost, join targetlist for (k1, k2) will not contain k1.b.
+-- Use index scan for k1 so that we don't get 'b' from physical tlist used for
+-- seqscan. Also disable reordering of joins because this test depends on a
+-- particular join tree.
+create table sk (a int, b int);
+create index on sk(a);
+set join_collapse_limit to 1;
+set enable_seqscan to off;
+explain (costs off) select 1 from
+   (sk k1 join sk k2 on k1.a = k2.a)
+   join (sj j1 join sj j2 on j1.a = j2.a) on j1.b = k1.b;
+explain (costs off) select 1 from
+   (sk k1 join sk k2 on k1.a = k2.a)
+   join (sj j1 join sj j2 on j1.a = j2.a) on j2.b = k1.b;
+reset join_collapse_limit;
+reset enable_seqscan;
+
+-- Check that clauses from the join filter list is not lost on the self-join removal
+CREATE TABLE emp1 (id SERIAL PRIMARY KEY NOT NULL, code int);
+EXPLAIN (VERBOSE, COSTS OFF)
+SELECT * FROM emp1 e1, emp1 e2 WHERE e1.id = e2.id AND e2.code <> e1.code;
+
+-- Shuffle self-joined relations. Only in the case of iterative deletion
+-- attempts explains of these queries will be identical.
+CREATE UNIQUE INDEX ON emp1((id*id));
+
+EXPLAIN (COSTS OFF)
+SELECT count(*) FROM emp1 c1, emp1 c2, emp1 c3
+WHERE c1.id=c2.id AND c1.id*c2.id=c3.id*c3.id;
+
+EXPLAIN (COSTS OFF)
+SELECT count(*) FROM emp1 c1, emp1 c2, emp1 c3
+WHERE c1.id=c3.id AND c1.id*c3.id=c2.id*c2.id;
+
+EXPLAIN (COSTS OFF)
+SELECT count(*) FROM emp1 c1, emp1 c2, emp1 c3
+WHERE c3.id=c2.id AND c3.id*c2.id=c1.id*c1.id;
+
+-- Check the usage of a parse tree by the set operations (bug #18170)
+EXPLAIN (COSTS OFF)
+SELECT c1.code FROM emp1 c1 LEFT JOIN emp1 c2 ON c1.id = c2.id
+WHERE c2.id IS NOT NULL
+EXCEPT ALL
+SELECT c3.code FROM emp1 c3;
+
+-- Check that SJE removes references from PHVs correctly
+explain (costs off)
+select * from emp1 t1 left join
+    (select coalesce(t3.code, 1) from emp1 t2
+        left join (emp1 t3 join emp1 t4 on t3.id = t4.id)
+        on true)
+on true;
+
+-- Check that SJE removes the whole PHVs correctly
+explain (verbose, costs off)
+select 1 from emp1 t1 left join
+    ((select 1 as x, * from emp1 t2) s1 inner join
+        (select * from emp1 t3) s2 on s1.id = s2.id)
+    on true
+where s1.x = 1;
+
+-- Check that PHVs do not impose any constraints on removing self joins
+explain (verbose, costs off)
+select * from emp1 t1 join emp1 t2 on t1.id = t2.id left join
+    lateral (select t1.id as t1id, * from generate_series(1,1) t3) s on true;
+
+explain (verbose, costs off)
+select * from generate_series(1,10) t1(id) left join
+    lateral (select t1.id as t1id, t2.id from emp1 t2 join emp1 t3 on t2.id = t3.id)
+on true;
+
+-- Check that SJE replaces join clauses involving the removed rel correctly
+explain (costs off)
+select * from emp1 t1
+   inner join emp1 t2 on t1.id = t2.id
+    left join emp1 t3 on t1.id > 1 and t1.id < 2;
+
+-- Check that SJE doesn't replace the target relation
+EXPLAIN (COSTS OFF)
+WITH t1 AS (SELECT * FROM emp1)
+UPDATE emp1 SET code = t1.code + 1 FROM t1
+WHERE t1.id = emp1.id RETURNING emp1.id, emp1.code, t1.code;
+
+INSERT INTO emp1 VALUES (1, 1), (2, 1);
+
+WITH t1 AS (SELECT * FROM emp1)
+UPDATE emp1 SET code = t1.code + 1 FROM t1
+WHERE t1.id = emp1.id RETURNING emp1.id, emp1.code, t1.code;
+
+TRUNCATE emp1;
+
+EXPLAIN (COSTS OFF)
+UPDATE sj sq SET b = 1 FROM sj as sz WHERE sq.a = sz.a;
+
+CREATE RULE sj_del_rule AS ON DELETE TO sj
+  DO INSTEAD
+    UPDATE sj SET a = 1 WHERE a = old.a;
+EXPLAIN (COSTS OFF) DELETE FROM sj;
+DROP RULE sj_del_rule ON sj CASCADE;
+
+-- Check that SJE does not mistakenly omit qual clauses (bug #18187)
+insert into emp1 values (1, 1);
+explain (costs off)
+select 1 from emp1 full join
+    (select * from emp1 t1 join
+        emp1 t2 join emp1 t3 on t2.id = t3.id
+        on true
+    where false) s on true
+where false;
+select 1 from emp1 full join
+    (select * from emp1 t1 join
+        emp1 t2 join emp1 t3 on t2.id = t3.id
+        on true
+    where false) s on true
+where false;
+
+-- Check that SJE does not mistakenly re-use knowledge of relation uniqueness
+-- made with different set of quals
+insert into emp1 values (2, 1);
+explain (costs off)
+select * from emp1 t1 where exists (select * from emp1 t2
+                                    where t2.id = t1.code and t2.code > 0);
+select * from emp1 t1 where exists (select * from emp1 t2
+                                    where t2.id = t1.code and t2.code > 0);
+
+-- We can remove the join even if we find the join can't duplicate rows and
+-- the base quals of each side are different.  In the following case we end up
+-- moving quals over to s1 to make it so it can't match any rows.
+create table sl(a int, b int, c int);
+create unique index on sl(a, b);
+vacuum analyze sl;
+
+-- Both sides are unique, but base quals are different
+explain (costs off)
+select * from sl t1, sl t2 where t1.a = t2.a and t1.b = 1 and t2.b = 2;
+
+-- Check NullTest in baserestrictinfo list
+explain (costs off)
+select * from sl t1, sl t2
+where t1.a = t2.a and t1.b = 1 and t2.b = 2
+  and t1.c IS NOT NULL and t2.c IS NOT NULL
+  and t2.b IS NOT NULL and t1.b IS NOT NULL
+  and t1.a IS NOT NULL and t2.a IS NOT NULL;
+explain (verbose, costs off)
+select * from sl t1, sl t2
+where t1.b = t2.b and t2.a = 3 and t1.a = 3
+  and t1.c IS NOT NULL and t2.c IS NOT NULL
+  and t2.b IS NOT NULL and t1.b IS NOT NULL
+  and t1.a IS NOT NULL and t2.a IS NOT NULL;
+
+-- Join qual isn't mergejoinable, but inner is unique.
+EXPLAIN (COSTS OFF)
+SELECT n2.a FROM sj n1, sj n2 WHERE n1.a <> n2.a AND n2.a = 1;
+
+EXPLAIN (COSTS OFF)
+SELECT * FROM
+(SELECT n2.a FROM sj n1, sj n2 WHERE n1.a <> n2.a) q0, sl
+WHERE q0.a = 1;
+
+-- Check optimization disabling if it will violate special join conditions.
+-- Two identical joined relations satisfies self join removal conditions but
+-- stay in different special join infos.
+CREATE TABLE sj_t1 (id serial, a int);
+CREATE TABLE sj_t2 (id serial, a int);
+CREATE TABLE sj_t3 (id serial, a int);
+CREATE TABLE sj_t4 (id serial, a int);
+
+CREATE UNIQUE INDEX ON sj_t3 USING btree (a,id);
+CREATE UNIQUE INDEX ON sj_t2 USING btree (id);
+
+EXPLAIN (COSTS OFF)
+SELECT * FROM sj_t1
+JOIN (
+   SELECT sj_t2.id AS id FROM sj_t2
+   WHERE EXISTS
+       (
+       SELECT TRUE FROM sj_t3,sj_t4 WHERE sj_t3.a = 1 AND sj_t3.id = sj_t2.id
+       )
+   ) t2t3t4
+ON sj_t1.id = t2t3t4.id
+JOIN (
+   SELECT sj_t2.id AS id FROM sj_t2
+   WHERE EXISTS
+       (
+       SELECT TRUE FROM sj_t3,sj_t4 WHERE sj_t3.a = 1 AND sj_t3.id = sj_t2.id
+       )
+   ) _t2t3t4
+ON sj_t1.id = _t2t3t4.id;
+
+--
+-- Test RowMarks-related code
+--
+
+-- Both sides have explicit LockRows marks
+EXPLAIN (COSTS OFF)
+SELECT a1.a FROM sj a1,sj a2 WHERE (a1.a=a2.a) FOR UPDATE;
+
+reset enable_hashjoin;
+reset enable_mergejoin;
+
  --
  -- Test hints given on incorrect column references are useful
  --
diff --git a/src/tools/pgindent/typedefs.list b/src/tools/pgindent/typedefs.list

index b6c170ac249857bff0b5ae3076977a898c6f373b..bce4214503d6da9f9a0cdb591a4368f17c4990b5 100644 (file)
--- a/src/tools/pgindent/typedefs.list
+++ b/src/tools/pgindent/typedefs.list
@@ -2593,6 +2593,7 @@ SeenRelsEntry
  SelectLimit
  SelectStmt
  Selectivity
+SelfJoinCandidate
  SemTPadded
  SemiAntiJoinFactors
  SeqScan
@@ -4056,6 +4057,7 @@ unicodeStyleColumnFormat
  unicodeStyleFormat
  unicodeStyleRowFormat
  unicode_linestyle
+UniqueRelInfo
  unit_conversion
  unlogged_relation_entry
  utf_local_conversion_func
author	Alexander Korotkov <akorotkov@postgresql.org>
	Wed, 12 Feb 2025 22:56:03 +0000 (00:56 +0200)
committer	Alexander Korotkov <akorotkov@postgresql.org>
	Mon, 17 Feb 2025 10:44:12 +0000 (12:44 +0200)
doc/src/sgml/config.sgml		patch \| blob \| blame \| history
src/backend/optimizer/path/equivclass.c		patch \| blob \| blame \| history
src/backend/optimizer/path/indxpath.c		patch \| blob \| blame \| history
src/backend/optimizer/plan/analyzejoins.c		patch \| blob \| blame \| history
src/backend/optimizer/plan/planmain.c		patch \| blob \| blame \| history
src/backend/optimizer/prep/prepunion.c		patch \| blob \| blame \| history
src/backend/rewrite/rewriteManip.c		patch \| blob \| blame \| history
src/backend/utils/misc/guc_tables.c		patch \| blob \| blame \| history
src/include/nodes/pathnodes.h		patch \| blob \| blame \| history
src/include/optimizer/optimizer.h		patch \| blob \| blame \| history
src/include/optimizer/paths.h		patch \| blob \| blame \| history
src/include/optimizer/planmain.h		patch \| blob \| blame \| history
src/include/rewrite/rewriteManip.h		patch \| blob \| blame \| history
src/test/regress/expected/equivclass.out		patch \| blob \| blame \| history
src/test/regress/expected/join.out		patch \| blob \| blame \| history
src/test/regress/expected/sysviews.out		patch \| blob \| blame \| history
src/test/regress/sql/equivclass.sql		patch \| blob \| blame \| history
src/test/regress/sql/join.sql		patch \| blob \| blame \| history
src/tools/pgindent/typedefs.list		patch \| blob \| blame \| history