Fix assertion failure when a SELECT DISTINCT ON expression is volatile.
authorTom Lane <tgl@sss.pgh.pa.us>
Sat, 12 Sep 2009 00:04:59 +0000 (00:04 +0000)
committerTom Lane <tgl@sss.pgh.pa.us>
Sat, 12 Sep 2009 00:04:59 +0000 (00:04 +0000)
In this case we generate two PathKey references to the expression (one for
DISTINCT and one for ORDER BY) and they really need to refer to the same
EquivalenceClass.  However get_eclass_for_sort_expr was being overly paranoid
and creating two different EC's.  Correct behavior is to use the SortGroupRef
index to decide whether two references to volatile expressions that are
equal() (ie textually equivalent) should be considered the same.

Backpatch to 8.4.  Possibly this should be changed in 8.3 as well, but
I'll refrain in the absence of evidence of a visible failure in that branch.

Per bug #5049.

src/backend/optimizer/path/equivclass.c
src/backend/optimizer/path/pathkeys.c
src/test/regress/expected/select_distinct_on.out
src/test/regress/sql/select_distinct_on.sql

index cc36cad2355eb112c46372c79842ffd64b86cc78..ef797f67bec6e8e1f6152b5e3078af7b5c08a08a 100644 (file)
@@ -357,7 +357,7 @@ add_eq_member(EquivalenceClass *ec, Expr *expr, Relids relids,
  *       EquivalenceClass for it.
  *
  * sortref is the SortGroupRef of the originating SortGroupClause, if any,
- * or zero if not.
+ * or zero if not.  (It should never be zero if the expression is volatile!)
  *
  * This can be used safely both before and after EquivalenceClass merging;
  * since it never causes merging it does not invalidate any existing ECs
@@ -388,8 +388,12 @@ get_eclass_for_sort_expr(PlannerInfo *root,
                EquivalenceClass *cur_ec = (EquivalenceClass *) lfirst(lc1);
                ListCell   *lc2;
 
-               /* Never match to a volatile EC */
-               if (cur_ec->ec_has_volatile)
+               /*
+                * Never match to a volatile EC, except when we are looking at another
+                * reference to the same volatile SortGroupClause.
+                */
+               if (cur_ec->ec_has_volatile &&
+                       (sortref == 0 || sortref != cur_ec->ec_sortref))
                        continue;
 
                if (!equal(opfamilies, cur_ec->ec_opfamilies))
@@ -433,6 +437,10 @@ get_eclass_for_sort_expr(PlannerInfo *root,
        newec->ec_broken = false;
        newec->ec_sortref = sortref;
        newec->ec_merged = NULL;
+
+       if (newec->ec_has_volatile && sortref == 0)             /* should not happen */
+               elog(ERROR, "volatile EquivalenceClass has no sortref");
+
        newem = add_eq_member(newec, expr, pull_varnos((Node *) expr),
                                                  false, expr_datatype);
 
index f63a05bfcd5c92bf2db2c39f315303b21887a25f..fdbbceef883a17a017fc5d87ef991551b8a744d9 100644 (file)
@@ -635,6 +635,15 @@ convert_subquery_pathkeys(PlannerInfo *root, RelOptInfo *rel,
                                                        exprType((Node *) tle->expr),
                                                        exprTypmod((Node *) tle->expr),
                                                        0);
+
+                               /*
+                                * Note: it might look funny to be setting sortref = 0 for
+                                * a reference to a volatile sub_eclass.  However, the
+                                * expression is *not* volatile in the outer query: it's
+                                * just a Var referencing whatever the subquery emitted.
+                                * (IOW, the outer query isn't going to re-execute the
+                                * volatile expression itself.)  So this is okay.
+                                */
                                outer_ec =
                                        get_eclass_for_sort_expr(root,
                                                                                         outer_expr,
index fc6dda7477f4c68c6b175acea9a19233f97ae564..b787b307f6950204036482f2d9566ff71a5666f9 100644 (file)
@@ -66,3 +66,10 @@ SELECT DISTINCT ON (string4, ten) string4, ten, two
  VVVVxx  |   0 |   0
 (40 rows)
 
+-- bug #5049: early 8.4.x chokes on volatile DISTINCT ON clauses
+select distinct on (1) floor(random()) as r, f1 from int4_tbl order by 1,2;
+ r |     f1      
+---+-------------
+ 0 | -2147483647
+(1 row)
+
index 54d98ca6979baf89afef1c132d425b5499c597b7..d18733d274c3b783bec57e25442b6d642551a0bc 100644 (file)
@@ -14,3 +14,6 @@ SELECT DISTINCT ON (string4, ten) string4, two, ten
 SELECT DISTINCT ON (string4, ten) string4, ten, two
    FROM tmp
    ORDER BY string4 using <, ten using >, two using <;
+
+-- bug #5049: early 8.4.x chokes on volatile DISTINCT ON clauses
+select distinct on (1) floor(random()) as r, f1 from int4_tbl order by 1,2;