Improve planner's selectivity estimates for inequalities on CTID.
authorTom Lane <tgl@sss.pgh.pa.us>
Mon, 25 Mar 2019 22:42:52 +0000 (18:42 -0400)
committerTom Lane <tgl@sss.pgh.pa.us>
Mon, 25 Mar 2019 22:42:52 +0000 (18:42 -0400)
We were getting just DEFAULT_INEQ_SEL for comparisons such as
"ctid >= constant", but it's possible to do a lot better if we don't
mind some assumptions about the table's tuple density being reasonably
uniform.  There are already assumptions much like that elsewhere in
the planner, so that hardly seems like much of an objection.

Extracted from a patch set that also proposes to introduce a special
executor node type for such queries.  Not sure if that's going to make
it into v12, but improving the selectivity estimate is useful
independently of that.

Edmund Horner, reviewed by David Rowley

Discussion: https://postgr.es/m/CAMyN-kB-nFTkF=VA_JPwFNo08S0d-Yk0F741S2B7LDmYAi8eyA@mail.gmail.com

src/backend/utils/adt/selfuncs.c

index 12d30d7d63809af47c286f76b284ba999a0e5975..ecffffbd0c8a95af085f5e16e7c240c68b5dd87c 100644 (file)
@@ -557,6 +557,81 @@ scalarineqsel(PlannerInfo *root, Oid operator, bool isgt, bool iseq,
 
        if (!HeapTupleIsValid(vardata->statsTuple))
        {
+               /*
+                * No stats are available.  Typically this means we have to fall back
+                * on the default estimate; but if the variable is CTID then we can
+                * make an estimate based on comparing the constant to the table size.
+                */
+               if (vardata->var && IsA(vardata->var, Var) &&
+                       ((Var *) vardata->var)->varattno == SelfItemPointerAttributeNumber)
+               {
+                       ItemPointer itemptr;
+                       double          block;
+                       double          density;
+
+                       /*
+                        * If the relation's empty, we're going to include all of it.
+                        * (This is mostly to avoid divide-by-zero below.)
+                        */
+                       if (vardata->rel->pages == 0)
+                               return 1.0;
+
+                       itemptr = (ItemPointer) DatumGetPointer(constval);
+                       block = ItemPointerGetBlockNumberNoCheck(itemptr);
+
+                       /*
+                        * Determine the average number of tuples per page (density).
+                        *
+                        * Since the last page will, on average, be only half full, we can
+                        * estimate it to have half as many tuples as earlier pages.  So
+                        * give it half the weight of a regular page.
+                        */
+                       density = vardata->rel->tuples / (vardata->rel->pages - 0.5);
+
+                       /* If target is the last page, use half the density. */
+                       if (block >= vardata->rel->pages - 1)
+                               density *= 0.5;
+
+                       /*
+                        * Using the average tuples per page, calculate how far into the
+                        * page the itemptr is likely to be and adjust block accordingly,
+                        * by adding that fraction of a whole block (but never more than a
+                        * whole block, no matter how high the itemptr's offset is).  Here
+                        * we are ignoring the possibility of dead-tuple line pointers,
+                        * which is fairly bogus, but we lack the info to do better.
+                        */
+                       if (density > 0.0)
+                       {
+                               OffsetNumber offset = ItemPointerGetOffsetNumberNoCheck(itemptr);
+
+                               block += Min(offset / density, 1.0);
+                       }
+
+                       /*
+                        * Convert relative block number to selectivity.  Again, the last
+                        * page has only half weight.
+                        */
+                       selec = block / (vardata->rel->pages - 0.5);
+
+                       /*
+                        * The calculation so far gave us a selectivity for the "<=" case.
+                        * We'll have one less tuple for "<" and one additional tuple for
+                        * ">=", the latter of which we'll reverse the selectivity for
+                        * below, so we can simply subtract one tuple for both cases.  The
+                        * cases that need this adjustment can be identified by iseq being
+                        * equal to isgt.
+                        */
+                       if (iseq == isgt && vardata->rel->tuples >= 1.0)
+                               selec -= (1.0 / vardata->rel->tuples);
+
+                       /* Finally, reverse the selectivity for the ">", ">=" cases. */
+                       if (isgt)
+                               selec = 1.0 - selec;
+
+                       CLAMP_PROBABILITY(selec);
+                       return selec;
+               }
+
                /* no stats available, so default result */
                return DEFAULT_INEQ_SEL;
        }