Allow simplification of EXISTS() subqueries containing LIMIT.

author Tom Lane <tgl@sss.pgh.pa.us>

Sun, 23 Nov 2014 00:12:38 +0000 (19:12 -0500)

committer Tom Lane <tgl@sss.pgh.pa.us>

Sun, 23 Nov 2014 00:12:38 +0000 (19:12 -0500)
author Tom Lane <tgl@sss.pgh.pa.us>
Sun, 23 Nov 2014 00:12:38 +0000 (19:12 -0500)
committer Tom Lane <tgl@sss.pgh.pa.us>
Sun, 23 Nov 2014 00:12:38 +0000 (19:12 -0500)
diff --git a/src/backend/optimizer/plan/subselect.c b/src/backend/optimizer/plan/subselect.c

index 9111b56e92a560ea77a1b7beb1c31b0cf6fcc09a..42b6d0a2cb9d9b4b60b6df0b5f6802e4c65a08f0 100644 (file)
--- a/src/backend/optimizer/plan/subselect.c
+++ b/src/backend/optimizer/plan/subselect.c
@@ -70,7 +70,7 @@ static Node *convert_testexpr_mutator(Node *node,
  static bool subplan_is_hashable(Plan *plan);
  static bool testexpr_is_hashable(Node *testexpr);
  static bool hash_ok_operator(OpExpr *expr);
-static bool simplify_EXISTS_query(Query *query);
+static bool simplify_EXISTS_query(PlannerInfo *root, Query *query);
  static Query *convert_EXISTS_to_ANY(PlannerInfo *root, Query *subselect,
                       Node **testexpr, List **paramIds);
  static Node *replace_correlation_vars_mutator(Node *node, PlannerInfo *root);
@@ -452,7 +452,7 @@ make_subplan(PlannerInfo *root, Query *orig_subquery,
      * If it's an EXISTS subplan, we might be able to simplify it.
      */
     if (subLinkType == EXISTS_SUBLINK)
-       simple_exists = simplify_EXISTS_query(subquery);
+       simple_exists = simplify_EXISTS_query(root, subquery);
  
     /*
      * For an EXISTS subplan, tell lower-level planner to expect that only the
@@ -518,7 +518,7 @@ make_subplan(PlannerInfo *root, Query *orig_subquery,
         /* Make a second copy of the original subquery */
         subquery = (Query *) copyObject(orig_subquery);
         /* and re-simplify */
-       simple_exists = simplify_EXISTS_query(subquery);
+       simple_exists = simplify_EXISTS_query(root, subquery);
         Assert(simple_exists);
         /* See if it can be converted to an ANY query */
         subquery = convert_EXISTS_to_ANY(root, subquery,
@@ -1359,7 +1359,7 @@ convert_EXISTS_sublink_to_join(PlannerInfo *root, SubLink *sublink,
      * targetlist, we have to fail, because the pullup operation leaves us
      * with noplace to evaluate the targetlist.
      */
-   if (!simplify_EXISTS_query(subselect))
+   if (!simplify_EXISTS_query(root, subselect))
         return NULL;
  
     /*
@@ -1486,13 +1486,14 @@ convert_EXISTS_sublink_to_join(PlannerInfo *root, SubLink *sublink,
   * Returns TRUE if was able to discard the targetlist, else FALSE.
   */
  static bool
-simplify_EXISTS_query(Query *query)
+simplify_EXISTS_query(PlannerInfo *root, Query *query)
  {
     /*
      * We don't try to simplify at all if the query uses set operations,
-    * aggregates, modifying CTEs, HAVING, LIMIT/OFFSET, or FOR UPDATE/SHARE;
-    * none of these seem likely in normal usage and their possible effects
-    * are complex.
+    * aggregates, modifying CTEs, HAVING, OFFSET, or FOR UPDATE/SHARE; none
+    * of these seem likely in normal usage and their possible effects are
+    * complex.  (Note: we could ignore an "OFFSET 0" clause, but that
+    * traditionally is used as an optimization fence, so we don't.)
      */
     if (query->commandType != CMD_SELECT ||
         query->setOperations ||
@@ -1501,10 +1502,43 @@ simplify_EXISTS_query(Query *query)
         query->hasModifyingCTE ||
         query->havingQual ||
         query->limitOffset ||
-       query->limitCount ||
         query->rowMarks)
         return false;
  
+   /*
+    * LIMIT with a constant positive (or NULL) value doesn't affect the
+    * semantics of EXISTS, so let's ignore such clauses.  This is worth doing
+    * because people accustomed to certain other DBMSes may be in the habit
+    * of writing EXISTS(SELECT ... LIMIT 1) as an optimization.  If there's a
+    * LIMIT with anything else as argument, though, we can't simplify.
+    */
+   if (query->limitCount)
+   {
+       /*
+        * The LIMIT clause has not yet been through eval_const_expressions,
+        * so we have to apply that here.  It might seem like this is a waste
+        * of cycles, since the only case plausibly worth worrying about is
+        * "LIMIT 1" ... but what we'll actually see is "LIMIT int8(1::int4)",
+        * so we have to fold constants or we're not going to recognize it.
+        */
+       Node       *node = eval_const_expressions(root, query->limitCount);
+       Const      *limit;
+
+       /* Might as well update the query if we simplified the clause. */
+       query->limitCount = node;
+
+       if (!IsA(node, Const))
+           return false;
+
+       limit = (Const *) node;
+       Assert(limit->consttype == INT8OID);
+       if (!limit->constisnull && DatumGetInt64(limit->constvalue) <= 0)
+           return false;
+
+       /* Whether or not the targetlist is safe, we can drop the LIMIT. */
+       query->limitCount = NULL;
+   }
+
     /*
      * Mustn't throw away the targetlist if it contains set-returning
      * functions; those could affect whether zero rows are returned!
diff --git a/src/test/regress/expected/subselect.out b/src/test/regress/expected/subselect.out

index 01c91308f33351711f542508767d1e55bedf5b6f..b14410fd222ed17419ad583c3397a8be288a8191 100644 (file)
--- a/src/test/regress/expected/subselect.out
+++ b/src/test/regress/expected/subselect.out
@@ -221,6 +221,46 @@ from int8_tbl group by q1 order by q1;
   4567890123456789 |      0.6
  (2 rows)
  
+--
+-- Check EXISTS simplification with LIMIT
+--
+explain (costs off)
+select * from int4_tbl o where exists
+  (select 1 from int4_tbl i where i.f1=o.f1 limit null);
+             QUERY PLAN             
+------------------------------------
+ Hash Semi Join
+   Hash Cond: (o.f1 = i.f1)
+   ->  Seq Scan on int4_tbl o
+   ->  Hash
+         ->  Seq Scan on int4_tbl i
+(5 rows)
+
+explain (costs off)
+select * from int4_tbl o where not exists
+  (select 1 from int4_tbl i where i.f1=o.f1 limit 1);
+             QUERY PLAN             
+------------------------------------
+ Hash Anti Join
+   Hash Cond: (o.f1 = i.f1)
+   ->  Seq Scan on int4_tbl o
+   ->  Hash
+         ->  Seq Scan on int4_tbl i
+(5 rows)
+
+explain (costs off)
+select * from int4_tbl o where exists
+  (select 1 from int4_tbl i where i.f1=o.f1 limit 0);
+              QUERY PLAN              
+--------------------------------------
+ Seq Scan on int4_tbl o
+   Filter: (SubPlan 1)
+   SubPlan 1
+     ->  Limit
+           ->  Seq Scan on int4_tbl i
+                 Filter: (f1 = o.f1)
+(6 rows)
+
  --
  -- Test cases to catch unpleasant interactions between IN-join processing
  -- and subquery pullup.
diff --git a/src/test/regress/sql/subselect.sql b/src/test/regress/sql/subselect.sql

index 56707e26bbf4499462a2d972a5d46a2600746c1b..4be2e40a0002e12412a2e135b11c6861e0900333 100644 (file)
--- a/src/test/regress/sql/subselect.sql
+++ b/src/test/regress/sql/subselect.sql
@@ -92,6 +92,19 @@ SELECT '' AS eight, ss.f1 AS "Correlated Field", ss.f3 AS "Second Field"
  select q1, float8(count(*)) / (select count(*) from int8_tbl)
  from int8_tbl group by q1 order by q1;
  
+--
+-- Check EXISTS simplification with LIMIT
+--
+explain (costs off)
+select * from int4_tbl o where exists
+  (select 1 from int4_tbl i where i.f1=o.f1 limit null);
+explain (costs off)
+select * from int4_tbl o where not exists
+  (select 1 from int4_tbl i where i.f1=o.f1 limit 1);
+explain (costs off)
+select * from int4_tbl o where exists
+  (select 1 from int4_tbl i where i.f1=o.f1 limit 0);
+
  --
  -- Test cases to catch unpleasant interactions between IN-join processing
  -- and subquery pullup.
author	Tom Lane <tgl@sss.pgh.pa.us>
	Sun, 23 Nov 2014 00:12:38 +0000 (19:12 -0500)
committer	Tom Lane <tgl@sss.pgh.pa.us>
	Sun, 23 Nov 2014 00:12:38 +0000 (19:12 -0500)
src/backend/optimizer/plan/subselect.c		patch \| blob \| blame \| history
src/test/regress/expected/subselect.out		patch \| blob \| blame \| history
src/test/regress/sql/subselect.sql		patch \| blob \| blame \| history