show_instrumentation_count("Rows Removed by Filter", 1,
planstate, es);
break;
+ case T_WindowAgg:
+ show_upper_qual(plan->qual, "Filter", planstate, ancestors, es);
+ if (plan->qual)
+ show_instrumentation_count("Rows Removed by Filter", 1,
+ planstate, es);
+ show_upper_qual(((WindowAgg *) plan)->runConditionOrig,
+ "Run Condition", planstate, ancestors, es);
+ break;
case T_Group:
show_group_keys(castNode(GroupState, planstate), ancestors, es);
show_upper_qual(plan->qual, "Filter", planstate, ancestors, es);
if (winstate->partition_spooled)
return; /* whole partition done already */
+ /*
+ * When in pass-through mode we can just exhaust all tuples in the current
+ * partition. We don't need these tuples for any further window function
+ * evaluation, however, we do need to keep them around if we're not the
+ * top-level window as another WindowAgg node above must see these.
+ */
+ if (winstate->status != WINDOWAGG_RUN)
+ {
+ Assert(winstate->status == WINDOWAGG_PASSTHROUGH ||
+ winstate->status == WINDOWAGG_PASSTHROUGH_STRICT);
+
+ pos = -1;
+ }
+
/*
* If the tuplestore has spilled to disk, alternate reading and writing
* becomes quite expensive due to frequent buffer flushes. It's cheaper
* XXX this is a horrid kluge --- it'd be better to fix the performance
* problem inside tuplestore. FIXME
*/
- if (!tuplestore_in_memory(winstate->buffer))
+ else if (!tuplestore_in_memory(winstate->buffer))
pos = -1;
outerPlan = outerPlanState(winstate);
}
}
- /* Still in partition, so save it into the tuplestore */
- tuplestore_puttupleslot(winstate->buffer, outerslot);
- winstate->spooled_rows++;
+ /*
+ * Remember the tuple unless we're the top-level window and we're in
+ * pass-through mode.
+ */
+ if (winstate->status != WINDOWAGG_PASSTHROUGH_STRICT)
+ {
+ /* Still in partition, so save it into the tuplestore */
+ tuplestore_puttupleslot(winstate->buffer, outerslot);
+ winstate->spooled_rows++;
+ }
}
MemoryContextSwitchTo(oldcontext);
ExecWindowAgg(PlanState *pstate)
{
WindowAggState *winstate = castNode(WindowAggState, pstate);
+ TupleTableSlot *slot;
ExprContext *econtext;
int i;
int numfuncs;
CHECK_FOR_INTERRUPTS();
- if (winstate->all_done)
+ if (winstate->status == WINDOWAGG_DONE)
return NULL;
/*
winstate->all_first = false;
}
- if (winstate->buffer == NULL)
- {
- /* Initialize for first partition and set current row = 0 */
- begin_partition(winstate);
- /* If there are no input rows, we'll detect that and exit below */
- }
- else
+ /* We need to loop as the runCondition or qual may filter out tuples */
+ for (;;)
{
- /* Advance current row within partition */
- winstate->currentpos++;
- /* This might mean that the frame moves, too */
- winstate->framehead_valid = false;
- winstate->frametail_valid = false;
- /* we don't need to invalidate grouptail here; see below */
- }
+ if (winstate->buffer == NULL)
+ {
+ /* Initialize for first partition and set current row = 0 */
+ begin_partition(winstate);
+ /* If there are no input rows, we'll detect that and exit below */
+ }
+ else
+ {
+ /* Advance current row within partition */
+ winstate->currentpos++;
+ /* This might mean that the frame moves, too */
+ winstate->framehead_valid = false;
+ winstate->frametail_valid = false;
+ /* we don't need to invalidate grouptail here; see below */
+ }
- /*
- * Spool all tuples up to and including the current row, if we haven't
- * already
- */
- spool_tuples(winstate, winstate->currentpos);
+ /*
+ * Spool all tuples up to and including the current row, if we haven't
+ * already
+ */
+ spool_tuples(winstate, winstate->currentpos);
- /* Move to the next partition if we reached the end of this partition */
- if (winstate->partition_spooled &&
- winstate->currentpos >= winstate->spooled_rows)
- {
- release_partition(winstate);
+ /* Move to the next partition if we reached the end of this partition */
+ if (winstate->partition_spooled &&
+ winstate->currentpos >= winstate->spooled_rows)
+ {
+ release_partition(winstate);
+
+ if (winstate->more_partitions)
+ {
+ begin_partition(winstate);
+ Assert(winstate->spooled_rows > 0);
+
+ /* Come out of pass-through mode when changing partition */
+ winstate->status = WINDOWAGG_RUN;
+ }
+ else
+ {
+ /* No further partitions? We're done */
+ winstate->status = WINDOWAGG_DONE;
+ return NULL;
+ }
+ }
+
+ /* final output execution is in ps_ExprContext */
+ econtext = winstate->ss.ps.ps_ExprContext;
+
+ /* Clear the per-output-tuple context for current row */
+ ResetExprContext(econtext);
- if (winstate->more_partitions)
+ /*
+ * Read the current row from the tuplestore, and save in
+ * ScanTupleSlot. (We can't rely on the outerplan's output slot
+ * because we may have to read beyond the current row. Also, we have
+ * to actually copy the row out of the tuplestore, since window
+ * function evaluation might cause the tuplestore to dump its state to
+ * disk.)
+ *
+ * In GROUPS mode, or when tracking a group-oriented exclusion clause,
+ * we must also detect entering a new peer group and update associated
+ * state when that happens. We use temp_slot_2 to temporarily hold
+ * the previous row for this purpose.
+ *
+ * Current row must be in the tuplestore, since we spooled it above.
+ */
+ tuplestore_select_read_pointer(winstate->buffer, winstate->current_ptr);
+ if ((winstate->frameOptions & (FRAMEOPTION_GROUPS |
+ FRAMEOPTION_EXCLUDE_GROUP |
+ FRAMEOPTION_EXCLUDE_TIES)) &&
+ winstate->currentpos > 0)
{
- begin_partition(winstate);
- Assert(winstate->spooled_rows > 0);
+ ExecCopySlot(winstate->temp_slot_2, winstate->ss.ss_ScanTupleSlot);
+ if (!tuplestore_gettupleslot(winstate->buffer, true, true,
+ winstate->ss.ss_ScanTupleSlot))
+ elog(ERROR, "unexpected end of tuplestore");
+ if (!are_peers(winstate, winstate->temp_slot_2,
+ winstate->ss.ss_ScanTupleSlot))
+ {
+ winstate->currentgroup++;
+ winstate->groupheadpos = winstate->currentpos;
+ winstate->grouptail_valid = false;
+ }
+ ExecClearTuple(winstate->temp_slot_2);
}
else
{
- winstate->all_done = true;
- return NULL;
+ if (!tuplestore_gettupleslot(winstate->buffer, true, true,
+ winstate->ss.ss_ScanTupleSlot))
+ elog(ERROR, "unexpected end of tuplestore");
}
- }
- /* final output execution is in ps_ExprContext */
- econtext = winstate->ss.ps.ps_ExprContext;
+ /* don't evaluate the window functions when we're in pass-through mode */
+ if (winstate->status == WINDOWAGG_RUN)
+ {
+ /*
+ * Evaluate true window functions
+ */
+ numfuncs = winstate->numfuncs;
+ for (i = 0; i < numfuncs; i++)
+ {
+ WindowStatePerFunc perfuncstate = &(winstate->perfunc[i]);
- /* Clear the per-output-tuple context for current row */
- ResetExprContext(econtext);
+ if (perfuncstate->plain_agg)
+ continue;
+ eval_windowfunction(winstate, perfuncstate,
+ &(econtext->ecxt_aggvalues[perfuncstate->wfuncstate->wfuncno]),
+ &(econtext->ecxt_aggnulls[perfuncstate->wfuncstate->wfuncno]));
+ }
- /*
- * Read the current row from the tuplestore, and save in ScanTupleSlot.
- * (We can't rely on the outerplan's output slot because we may have to
- * read beyond the current row. Also, we have to actually copy the row
- * out of the tuplestore, since window function evaluation might cause the
- * tuplestore to dump its state to disk.)
- *
- * In GROUPS mode, or when tracking a group-oriented exclusion clause, we
- * must also detect entering a new peer group and update associated state
- * when that happens. We use temp_slot_2 to temporarily hold the previous
- * row for this purpose.
- *
- * Current row must be in the tuplestore, since we spooled it above.
- */
- tuplestore_select_read_pointer(winstate->buffer, winstate->current_ptr);
- if ((winstate->frameOptions & (FRAMEOPTION_GROUPS |
- FRAMEOPTION_EXCLUDE_GROUP |
- FRAMEOPTION_EXCLUDE_TIES)) &&
- winstate->currentpos > 0)
- {
- ExecCopySlot(winstate->temp_slot_2, winstate->ss.ss_ScanTupleSlot);
- if (!tuplestore_gettupleslot(winstate->buffer, true, true,
- winstate->ss.ss_ScanTupleSlot))
- elog(ERROR, "unexpected end of tuplestore");
- if (!are_peers(winstate, winstate->temp_slot_2,
- winstate->ss.ss_ScanTupleSlot))
- {
- winstate->currentgroup++;
- winstate->groupheadpos = winstate->currentpos;
- winstate->grouptail_valid = false;
+ /*
+ * Evaluate aggregates
+ */
+ if (winstate->numaggs > 0)
+ eval_windowaggregates(winstate);
}
- ExecClearTuple(winstate->temp_slot_2);
- }
- else
- {
- if (!tuplestore_gettupleslot(winstate->buffer, true, true,
- winstate->ss.ss_ScanTupleSlot))
- elog(ERROR, "unexpected end of tuplestore");
- }
- /*
- * Evaluate true window functions
- */
- numfuncs = winstate->numfuncs;
- for (i = 0; i < numfuncs; i++)
- {
- WindowStatePerFunc perfuncstate = &(winstate->perfunc[i]);
+ /*
+ * If we have created auxiliary read pointers for the frame or group
+ * boundaries, force them to be kept up-to-date, because we don't know
+ * whether the window function(s) will do anything that requires that.
+ * Failing to advance the pointers would result in being unable to
+ * trim data from the tuplestore, which is bad. (If we could know in
+ * advance whether the window functions will use frame boundary info,
+ * we could skip creating these pointers in the first place ... but
+ * unfortunately the window function API doesn't require that.)
+ */
+ if (winstate->framehead_ptr >= 0)
+ update_frameheadpos(winstate);
+ if (winstate->frametail_ptr >= 0)
+ update_frametailpos(winstate);
+ if (winstate->grouptail_ptr >= 0)
+ update_grouptailpos(winstate);
- if (perfuncstate->plain_agg)
- continue;
- eval_windowfunction(winstate, perfuncstate,
- &(econtext->ecxt_aggvalues[perfuncstate->wfuncstate->wfuncno]),
- &(econtext->ecxt_aggnulls[perfuncstate->wfuncstate->wfuncno]));
- }
+ /*
+ * Truncate any no-longer-needed rows from the tuplestore.
+ */
+ tuplestore_trim(winstate->buffer);
- /*
- * Evaluate aggregates
- */
- if (winstate->numaggs > 0)
- eval_windowaggregates(winstate);
+ /*
+ * Form and return a projection tuple using the windowfunc results and
+ * the current row. Setting ecxt_outertuple arranges that any Vars
+ * will be evaluated with respect to that row.
+ */
+ econtext->ecxt_outertuple = winstate->ss.ss_ScanTupleSlot;
- /*
- * If we have created auxiliary read pointers for the frame or group
- * boundaries, force them to be kept up-to-date, because we don't know
- * whether the window function(s) will do anything that requires that.
- * Failing to advance the pointers would result in being unable to trim
- * data from the tuplestore, which is bad. (If we could know in advance
- * whether the window functions will use frame boundary info, we could
- * skip creating these pointers in the first place ... but unfortunately
- * the window function API doesn't require that.)
- */
- if (winstate->framehead_ptr >= 0)
- update_frameheadpos(winstate);
- if (winstate->frametail_ptr >= 0)
- update_frametailpos(winstate);
- if (winstate->grouptail_ptr >= 0)
- update_grouptailpos(winstate);
+ slot = ExecProject(winstate->ss.ps.ps_ProjInfo);
- /*
- * Truncate any no-longer-needed rows from the tuplestore.
- */
- tuplestore_trim(winstate->buffer);
+ if (winstate->status == WINDOWAGG_RUN)
+ {
+ econtext->ecxt_scantuple = slot;
- /*
- * Form and return a projection tuple using the windowfunc results and the
- * current row. Setting ecxt_outertuple arranges that any Vars will be
- * evaluated with respect to that row.
- */
- econtext->ecxt_outertuple = winstate->ss.ss_ScanTupleSlot;
+ /*
+ * Now evaluate the run condition to see if we need to go into
+ * pass-through mode, or maybe stop completely.
+ */
+ if (!ExecQual(winstate->runcondition, econtext))
+ {
+ /*
+ * Determine which mode to move into. If there is no
+ * PARTITION BY clause and we're the top-level WindowAgg then
+ * we're done. This tuple and any future tuples cannot
+ * possibly match the runcondition. However, when there is a
+ * PARTITION BY clause or we're not the top-level window we
+ * can't just stop as we need to either process other
+ * partitions or ensure WindowAgg nodes above us receive all
+ * of the tuples they need to process their WindowFuncs.
+ */
+ if (winstate->use_pass_through)
+ {
+ /*
+ * STRICT pass-through mode is required for the top window
+ * when there is a PARTITION BY clause. Otherwise we must
+ * ensure we store tuples that don't match the
+ * runcondition so they're available to WindowAggs above.
+ */
+ if (winstate->top_window)
+ {
+ winstate->status = WINDOWAGG_PASSTHROUGH_STRICT;
+ continue;
+ }
+ else
+ winstate->status = WINDOWAGG_PASSTHROUGH;
+ }
+ else
+ {
+ /*
+ * Pass-through not required. We can just return NULL.
+ * Nothing else will match the runcondition.
+ */
+ winstate->status = WINDOWAGG_DONE;
+ return NULL;
+ }
+ }
- return ExecProject(winstate->ss.ps.ps_ProjInfo);
+ /*
+ * Filter out any tuples we don't need in the top-level WindowAgg.
+ */
+ if (!ExecQual(winstate->ss.ps.qual, econtext))
+ {
+ InstrCountFiltered1(winstate, 1);
+ continue;
+ }
+
+ break;
+ }
+
+ /*
+ * When not in WINDOWAGG_RUN mode, we must still return this tuple if
+ * we're anything apart from the top window.
+ */
+ else if (!winstate->top_window)
+ break;
+ }
+
+ return slot;
}
/* -----------------
"WindowAgg Aggregates",
ALLOCSET_DEFAULT_SIZES);
+ /* Only the top-level WindowAgg may have a qual */
+ Assert(node->plan.qual == NIL || node->topWindow);
+
+ /* Initialize the qual */
+ winstate->ss.ps.qual = ExecInitQual(node->plan.qual,
+ (PlanState *) winstate);
+
+ /*
+ * Setup the run condition, if we received one from the query planner.
+ * When set, this may allow us to move into pass-through mode so that we
+ * don't have to perform any further evaluation of WindowFuncs in the
+ * current partition or possibly stop returning tuples altogether when all
+ * tuples are in the same partition.
+ */
+ winstate->runcondition = ExecInitQual(node->runCondition,
+ (PlanState *) winstate);
+
/*
- * WindowAgg nodes never have quals, since they can only occur at the
- * logical top level of a query (ie, after any WHERE or HAVING filters)
+ * When we're not the top-level WindowAgg node or we are but have a
+ * PARTITION BY clause we must move into one of the WINDOWAGG_PASSTHROUGH*
+ * modes when the runCondition becomes false.
*/
- Assert(node->plan.qual == NIL);
- winstate->ss.ps.qual = NULL;
+ winstate->use_pass_through = !node->topWindow || node->partNumCols > 0;
+
+ /* remember if we're the top-window or we are below the top-window */
+ winstate->top_window = node->topWindow;
/*
* initialize child nodes
winstate->agg_winobj = agg_winobj;
}
+ /* Set the status to running */
+ winstate->status = WINDOWAGG_RUN;
+
/* copy frame options to state node for easy access */
winstate->frameOptions = frameOptions;
PlanState *outerPlan = outerPlanState(node);
ExprContext *econtext = node->ss.ps.ps_ExprContext;
- node->all_done = false;
+ node->status = WINDOWAGG_RUN;
node->all_first = true;
/* release tuplestore et al */
COPY_SCALAR_FIELD(frameOptions);
COPY_NODE_FIELD(startOffset);
COPY_NODE_FIELD(endOffset);
+ COPY_NODE_FIELD(runCondition);
+ COPY_NODE_FIELD(runConditionOrig);
COPY_SCALAR_FIELD(startInRangeFunc);
COPY_SCALAR_FIELD(endInRangeFunc);
COPY_SCALAR_FIELD(inRangeColl);
COPY_SCALAR_FIELD(inRangeAsc);
COPY_SCALAR_FIELD(inRangeNullsFirst);
+ COPY_SCALAR_FIELD(topWindow);
return newnode;
}
COPY_SCALAR_FIELD(frameOptions);
COPY_NODE_FIELD(startOffset);
COPY_NODE_FIELD(endOffset);
+ COPY_NODE_FIELD(runCondition);
COPY_SCALAR_FIELD(startInRangeFunc);
COPY_SCALAR_FIELD(endInRangeFunc);
COPY_SCALAR_FIELD(inRangeColl);
COMPARE_SCALAR_FIELD(frameOptions);
COMPARE_NODE_FIELD(startOffset);
COMPARE_NODE_FIELD(endOffset);
+ COMPARE_NODE_FIELD(runCondition);
COMPARE_SCALAR_FIELD(startInRangeFunc);
COMPARE_SCALAR_FIELD(endInRangeFunc);
COMPARE_SCALAR_FIELD(inRangeColl);
WRITE_INT_FIELD(frameOptions);
WRITE_NODE_FIELD(startOffset);
WRITE_NODE_FIELD(endOffset);
+ WRITE_NODE_FIELD(runCondition);
+ WRITE_NODE_FIELD(runConditionOrig);
WRITE_OID_FIELD(startInRangeFunc);
WRITE_OID_FIELD(endInRangeFunc);
WRITE_OID_FIELD(inRangeColl);
WRITE_BOOL_FIELD(inRangeAsc);
WRITE_BOOL_FIELD(inRangeNullsFirst);
+ WRITE_BOOL_FIELD(topWindow);
}
static void
WRITE_NODE_FIELD(subpath);
WRITE_NODE_FIELD(winclause);
+ WRITE_NODE_FIELD(qual);
+ WRITE_BOOL_FIELD(topwindow);
}
static void
WRITE_INT_FIELD(frameOptions);
WRITE_NODE_FIELD(startOffset);
WRITE_NODE_FIELD(endOffset);
+ WRITE_NODE_FIELD(runCondition);
WRITE_OID_FIELD(startInRangeFunc);
WRITE_OID_FIELD(endInRangeFunc);
WRITE_OID_FIELD(inRangeColl);
READ_INT_FIELD(frameOptions);
READ_NODE_FIELD(startOffset);
READ_NODE_FIELD(endOffset);
+ READ_NODE_FIELD(runCondition);
READ_OID_FIELD(startInRangeFunc);
READ_OID_FIELD(endInRangeFunc);
READ_OID_FIELD(inRangeColl);
READ_INT_FIELD(frameOptions);
READ_NODE_FIELD(startOffset);
READ_NODE_FIELD(endOffset);
+ READ_NODE_FIELD(runCondition);
+ READ_NODE_FIELD(runConditionOrig);
READ_OID_FIELD(startInRangeFunc);
READ_OID_FIELD(endInRangeFunc);
READ_OID_FIELD(inRangeColl);
READ_BOOL_FIELD(inRangeAsc);
READ_BOOL_FIELD(inRangeNullsFirst);
+ READ_BOOL_FIELD(topWindow);
READ_DONE();
}
#include "miscadmin.h"
#include "nodes/makefuncs.h"
#include "nodes/nodeFuncs.h"
+#include "nodes/supportnodes.h"
#ifdef OPTIMIZER_DEBUG
#include "nodes/print.h"
#endif
return false;
}
+/*
+ * find_window_run_conditions
+ * Determine if 'wfunc' is really a WindowFunc and call its prosupport
+ * function to determine the function's monotonic properties. We then
+ * see if 'opexpr' can be used to short-circuit execution.
+ *
+ * For example row_number() over (order by ...) always produces a value one
+ * higher than the previous. If someone has a window function in a subquery
+ * and has a WHERE clause in the outer query to filter rows <= 10, then we may
+ * as well stop processing the windowagg once the row number reaches 11. Here
+ * we check if 'opexpr' might help us to stop doing needless extra processing
+ * in WindowAgg nodes.
+ *
+ * '*keep_original' is set to true if the caller should also use 'opexpr' for
+ * its original purpose. This is set to false if the caller can assume that
+ * the run condition will handle all of the required filtering.
+ *
+ * Returns true if 'opexpr' was found to be useful and was added to the
+ * WindowClauses runCondition. We also set *keep_original accordingly.
+ * If the 'opexpr' cannot be used then we set *keep_original to true and
+ * return false.
+ */
+static bool
+find_window_run_conditions(Query *subquery, RangeTblEntry *rte, Index rti,
+ AttrNumber attno, WindowFunc *wfunc, OpExpr *opexpr,
+ bool wfunc_left, bool *keep_original)
+{
+ Oid prosupport;
+ Expr *otherexpr;
+ SupportRequestWFuncMonotonic req;
+ SupportRequestWFuncMonotonic *res;
+ WindowClause *wclause;
+ List *opinfos;
+ OpExpr *runopexpr;
+ Oid runoperator;
+ ListCell *lc;
+
+ *keep_original = true;
+
+ while (IsA(wfunc, RelabelType))
+ wfunc = (WindowFunc *) ((RelabelType *) wfunc)->arg;
+
+ /* we can only work with window functions */
+ if (!IsA(wfunc, WindowFunc))
+ return false;
+
+ prosupport = get_func_support(wfunc->winfnoid);
+
+ /* Check if there's a support function for 'wfunc' */
+ if (!OidIsValid(prosupport))
+ return false;
+
+ /* get the Expr from the other side of the OpExpr */
+ if (wfunc_left)
+ otherexpr = lsecond(opexpr->args);
+ else
+ otherexpr = linitial(opexpr->args);
+
+ /*
+ * The value being compared must not change during the evaluation of the
+ * window partition.
+ */
+ if (!is_pseudo_constant_clause((Node *) otherexpr))
+ return false;
+
+ /* find the window clause belonging to the window function */
+ wclause = (WindowClause *) list_nth(subquery->windowClause,
+ wfunc->winref - 1);
+
+ req.type = T_SupportRequestWFuncMonotonic;
+ req.window_func = wfunc;
+ req.window_clause = wclause;
+
+ /* call the support function */
+ res = (SupportRequestWFuncMonotonic *)
+ DatumGetPointer(OidFunctionCall1(prosupport,
+ PointerGetDatum(&req)));
+
+ /*
+ * Nothing to do if the function is neither monotonically increasing nor
+ * monotonically decreasing.
+ */
+ if (res == NULL || res->monotonic == MONOTONICFUNC_NONE)
+ return false;
+
+ runopexpr = NULL;
+ runoperator = InvalidOid;
+ opinfos = get_op_btree_interpretation(opexpr->opno);
+
+ foreach(lc, opinfos)
+ {
+ OpBtreeInterpretation *opinfo = (OpBtreeInterpretation *) lfirst(lc);
+ int strategy = opinfo->strategy;
+
+ /* handle < / <= */
+ if (strategy == BTLessStrategyNumber ||
+ strategy == BTLessEqualStrategyNumber)
+ {
+ /*
+ * < / <= is supported for monotonically increasing functions in
+ * the form <wfunc> op <pseudoconst> and <pseudoconst> op <wfunc>
+ * for monotonically decreasing functions.
+ */
+ if ((wfunc_left && (res->monotonic & MONOTONICFUNC_INCREASING)) ||
+ (!wfunc_left && (res->monotonic & MONOTONICFUNC_DECREASING)))
+ {
+ *keep_original = false;
+ runopexpr = opexpr;
+ runoperator = opexpr->opno;
+ }
+ break;
+ }
+ /* handle > / >= */
+ else if (strategy == BTGreaterStrategyNumber ||
+ strategy == BTGreaterEqualStrategyNumber)
+ {
+ /*
+ * > / >= is supported for monotonically decreasing functions in
+ * the form <wfunc> op <pseudoconst> and <pseudoconst> op <wfunc>
+ * for monotonically increasing functions.
+ */
+ if ((wfunc_left && (res->monotonic & MONOTONICFUNC_DECREASING)) ||
+ (!wfunc_left && (res->monotonic & MONOTONICFUNC_INCREASING)))
+ {
+ *keep_original = false;
+ runopexpr = opexpr;
+ runoperator = opexpr->opno;
+ }
+ break;
+ }
+ /* handle = */
+ else if (strategy == BTEqualStrategyNumber)
+ {
+ int16 newstrategy;
+
+ /*
+ * When both monotonically increasing and decreasing then the
+ * return value of the window function will be the same each time.
+ * We can simply use 'opexpr' as the run condition without
+ * modifying it.
+ */
+ if ((res->monotonic & MONOTONICFUNC_BOTH) == MONOTONICFUNC_BOTH)
+ {
+ *keep_original = false;
+ runopexpr = opexpr;
+ break;
+ }
+
+ /*
+ * When monotonically increasing we make a qual with <wfunc> <=
+ * <value> or <value> >= <wfunc> in order to filter out values
+ * which are above the value in the equality condition. For
+ * monotonically decreasing functions we want to filter values
+ * below the value in the equality condition.
+ */
+ if (res->monotonic & MONOTONICFUNC_INCREASING)
+ newstrategy = wfunc_left ? BTLessEqualStrategyNumber : BTGreaterEqualStrategyNumber;
+ else
+ newstrategy = wfunc_left ? BTGreaterEqualStrategyNumber : BTLessEqualStrategyNumber;
+
+ /* We must keep the original equality qual */
+ *keep_original = true;
+ runopexpr = opexpr;
+
+ /* determine the operator to use for the runCondition qual */
+ runoperator = get_opfamily_member(opinfo->opfamily_id,
+ opinfo->oplefttype,
+ opinfo->oprighttype,
+ newstrategy);
+ break;
+ }
+ }
+
+ if (runopexpr != NULL)
+ {
+ Expr *newexpr;
+
+ /*
+ * Build the qual required for the run condition keeping the
+ * WindowFunc on the same side as it was originally.
+ */
+ if (wfunc_left)
+ newexpr = make_opclause(runoperator,
+ runopexpr->opresulttype,
+ runopexpr->opretset, (Expr *) wfunc,
+ otherexpr, runopexpr->opcollid,
+ runopexpr->inputcollid);
+ else
+ newexpr = make_opclause(runoperator,
+ runopexpr->opresulttype,
+ runopexpr->opretset,
+ otherexpr, (Expr *) wfunc,
+ runopexpr->opcollid,
+ runopexpr->inputcollid);
+
+ wclause->runCondition = lappend(wclause->runCondition, newexpr);
+
+ return true;
+ }
+
+ /* unsupported OpExpr */
+ return false;
+}
+
+/*
+ * check_and_push_window_quals
+ * Check if 'clause' is a qual that can be pushed into a WindowFunc's
+ * WindowClause as a 'runCondition' qual. These, when present, allow
+ * some unnecessary work to be skipped during execution.
+ *
+ * Returns true if the caller still must keep the original qual or false if
+ * the caller can safely ignore the original qual because the WindowAgg node
+ * will use the runCondition to stop returning tuples.
+ */
+static bool
+check_and_push_window_quals(Query *subquery, RangeTblEntry *rte, Index rti,
+ Node *clause)
+{
+ OpExpr *opexpr = (OpExpr *) clause;
+ bool keep_original = true;
+ Var *var1;
+ Var *var2;
+
+ /* We're only able to use OpExprs with 2 operands */
+ if (!IsA(opexpr, OpExpr))
+ return true;
+
+ if (list_length(opexpr->args) != 2)
+ return true;
+
+ /*
+ * Check for plain Vars that reference window functions in the subquery.
+ * If we find any, we'll ask find_window_run_conditions() if 'opexpr' can
+ * be used as part of the run condition.
+ */
+
+ /* Check the left side of the OpExpr */
+ var1 = linitial(opexpr->args);
+ if (IsA(var1, Var) && var1->varattno > 0)
+ {
+ TargetEntry *tle = list_nth(subquery->targetList, var1->varattno - 1);
+ WindowFunc *wfunc = (WindowFunc *) tle->expr;
+
+ if (find_window_run_conditions(subquery, rte, rti, tle->resno, wfunc,
+ opexpr, true, &keep_original))
+ return keep_original;
+ }
+
+ /* and check the right side */
+ var2 = lsecond(opexpr->args);
+ if (IsA(var2, Var) && var2->varattno > 0)
+ {
+ TargetEntry *tle = list_nth(subquery->targetList, var2->varattno - 1);
+ WindowFunc *wfunc = (WindowFunc *) tle->expr;
+
+ if (find_window_run_conditions(subquery, rte, rti, tle->resno, wfunc,
+ opexpr, false, &keep_original))
+ return keep_original;
+ }
+
+ return true;
+}
+
/*
* set_subquery_pathlist
* Generate SubqueryScan access paths for a subquery RTE
foreach(l, rel->baserestrictinfo)
{
RestrictInfo *rinfo = (RestrictInfo *) lfirst(l);
+ Node *clause = (Node *) rinfo->clause;
if (!rinfo->pseudoconstant &&
qual_is_pushdown_safe(subquery, rti, rinfo, &safetyInfo))
{
- Node *clause = (Node *) rinfo->clause;
-
/* Push it down */
subquery_push_qual(subquery, rte, rti, clause);
}
else
{
- /* Keep it in the upper query */
- upperrestrictlist = lappend(upperrestrictlist, rinfo);
+ /*
+ * Since we can't push the qual down into the subquery, check
+ * if it happens to reference a window function. If so then
+ * it might be useful to use for the WindowAgg's runCondition.
+ */
+ if (!subquery->hasWindowFuncs ||
+ check_and_push_window_quals(subquery, rte, rti, clause))
+ {
+ /*
+ * subquery has no window funcs or the clause is not a
+ * suitable window run condition qual or it is, but the
+ * original must also be kept in the upper query.
+ */
+ upperrestrictlist = lappend(upperrestrictlist, rinfo);
+ }
}
}
rel->baserestrictinfo = upperrestrictlist;
int frameOptions, Node *startOffset, Node *endOffset,
Oid startInRangeFunc, Oid endInRangeFunc,
Oid inRangeColl, bool inRangeAsc, bool inRangeNullsFirst,
+ List *runCondition, List *qual, bool topWindow,
Plan *lefttree);
static Group *make_group(List *tlist, List *qual, int numGroupCols,
AttrNumber *grpColIdx, Oid *grpOperators, Oid *grpCollations,
wc->inRangeColl,
wc->inRangeAsc,
wc->inRangeNullsFirst,
+ wc->runCondition,
+ best_path->qual,
+ best_path->topwindow,
subplan);
copy_generic_path_info(&plan->plan, (Path *) best_path);
int frameOptions, Node *startOffset, Node *endOffset,
Oid startInRangeFunc, Oid endInRangeFunc,
Oid inRangeColl, bool inRangeAsc, bool inRangeNullsFirst,
- Plan *lefttree)
+ List *runCondition, List *qual, bool topWindow, Plan *lefttree)
{
WindowAgg *node = makeNode(WindowAgg);
Plan *plan = &node->plan;
node->frameOptions = frameOptions;
node->startOffset = startOffset;
node->endOffset = endOffset;
+ node->runCondition = runCondition;
+ /* a duplicate of the above for EXPLAIN */
+ node->runConditionOrig = runCondition;
node->startInRangeFunc = startInRangeFunc;
node->endInRangeFunc = endInRangeFunc;
node->inRangeColl = inRangeColl;
node->inRangeAsc = inRangeAsc;
node->inRangeNullsFirst = inRangeNullsFirst;
+ node->topWindow = topWindow;
plan->targetlist = tlist;
plan->lefttree = lefttree;
plan->righttree = NULL;
- /* WindowAgg nodes never have a qual clause */
- plan->qual = NIL;
+ plan->qual = qual;
return node;
}
{
PathTarget *window_target;
ListCell *l;
+ List *topqual = NIL;
/*
* Since each window clause could require a different sort order, we stack
List *window_pathkeys;
int presorted_keys;
bool is_sorted;
+ bool topwindow;
window_pathkeys = make_pathkeys_for_window(root,
wc,
window_target = output_target;
}
+ /* mark the final item in the list as the top-level window */
+ topwindow = foreach_current_index(l) == list_length(activeWindows) - 1;
+
+ /*
+ * Accumulate all of the runConditions from each intermediate
+ * WindowClause. The top-level WindowAgg must pass these as a qual so
+ * that it filters out unwanted tuples correctly.
+ */
+ if (!topwindow)
+ topqual = list_concat(topqual, wc->runCondition);
+
path = (Path *)
create_windowagg_path(root, window_rel, path, window_target,
wflists->windowFuncs[wc->winref],
- wc);
+ wc, topwindow ? topqual : NIL, topwindow);
}
add_path(window_rel, path);
double num_exec;
} fix_upper_expr_context;
+typedef struct
+{
+ PlannerInfo *root;
+ indexed_tlist *subplan_itlist;
+ int newvarno;
+} fix_windowagg_cond_context;
+
/*
* Selecting the best alternative in an AlternativeSubPlan expression requires
* estimating how many times that expression will be evaluated. For an
Plan *topplan,
Index resultRelation,
int rtoffset);
+static List *set_windowagg_runcondition_references(PlannerInfo *root,
+ List *runcondition,
+ Plan *plan);
/*****************************************************************************
{
WindowAgg *wplan = (WindowAgg *) plan;
+ /*
+ * Adjust the WindowAgg's run conditions by swapping the
+ * WindowFuncs references out to instead reference the Var in
+ * the scan slot so that when the executor evaluates the
+ * runCondition, it receives the WindowFunc's value from the
+ * slot that the result has just been stored into rather than
+ * evaluating the WindowFunc all over again.
+ */
+ wplan->runCondition = set_windowagg_runcondition_references(root,
+ wplan->runCondition,
+ (Plan *) wplan);
+
set_upper_references(root, plan, rtoffset);
/*
fix_scan_expr(root, wplan->startOffset, rtoffset, 1);
wplan->endOffset =
fix_scan_expr(root, wplan->endOffset, rtoffset, 1);
+ wplan->runCondition = fix_scan_list(root,
+ wplan->runCondition,
+ rtoffset,
+ NUM_EXEC_TLIST(plan));
+ wplan->runConditionOrig = fix_scan_list(root,
+ wplan->runConditionOrig,
+ rtoffset,
+ NUM_EXEC_TLIST(plan));
}
break;
case T_Result:
return rlist;
}
+/*
+ * fix_windowagg_condition_expr_mutator
+ * Mutator function for replacing WindowFuncs with the corresponding Var
+ * in the targetlist which references that WindowFunc.
+ */
+static Node *
+fix_windowagg_condition_expr_mutator(Node *node,
+ fix_windowagg_cond_context *context)
+{
+ if (node == NULL)
+ return NULL;
+
+ if (IsA(node, WindowFunc))
+ {
+ Var *newvar;
+
+ newvar = search_indexed_tlist_for_non_var((Expr *) node,
+ context->subplan_itlist,
+ context->newvarno);
+ if (newvar)
+ return (Node *) newvar;
+ elog(ERROR, "WindowFunc not found in subplan target lists");
+ }
+
+ return expression_tree_mutator(node,
+ fix_windowagg_condition_expr_mutator,
+ (void *) context);
+}
+
+/*
+ * fix_windowagg_condition_expr
+ * Converts references in 'runcondition' so that any WindowFunc
+ * references are swapped out for a Var which references the matching
+ * WindowFunc in 'subplan_itlist'.
+ */
+static List *
+fix_windowagg_condition_expr(PlannerInfo *root,
+ List *runcondition,
+ indexed_tlist *subplan_itlist)
+{
+ fix_windowagg_cond_context context;
+
+ context.root = root;
+ context.subplan_itlist = subplan_itlist;
+ context.newvarno = 0;
+
+ return (List *) fix_windowagg_condition_expr_mutator((Node *) runcondition,
+ &context);
+}
+
+/*
+ * set_windowagg_runcondition_references
+ * Converts references in 'runcondition' so that any WindowFunc
+ * references are swapped out for a Var which references the matching
+ * WindowFunc in 'plan' targetlist.
+ */
+static List *
+set_windowagg_runcondition_references(PlannerInfo *root,
+ List *runcondition,
+ Plan *plan)
+{
+ List *newlist;
+ indexed_tlist *itlist;
+
+ itlist = build_tlist_index(plan->targetlist);
+
+ newlist = fix_windowagg_condition_expr(root, runcondition, itlist);
+
+ pfree(itlist);
+
+ return newlist;
+}
/*****************************************************************************
* QUERY DEPENDENCY MANAGEMENT
* 'target' is the PathTarget to be computed
* 'windowFuncs' is a list of WindowFunc structs
* 'winclause' is a WindowClause that is common to all the WindowFuncs
+ * 'qual' WindowClause.runconditions from lower-level WindowAggPaths.
+ * Must always be NIL when topwindow == false
+ * 'topwindow' pass as true only for the top-level WindowAgg. False for all
+ * intermediate WindowAggs.
*
* The input must be sorted according to the WindowClause's PARTITION keys
* plus ORDER BY keys.
Path *subpath,
PathTarget *target,
List *windowFuncs,
- WindowClause *winclause)
+ WindowClause *winclause,
+ List *qual,
+ bool topwindow)
{
WindowAggPath *pathnode = makeNode(WindowAggPath);
+ /* qual can only be set for the topwindow */
+ Assert(qual == NIL || topwindow);
+
pathnode->path.pathtype = T_WindowAgg;
pathnode->path.parent = rel;
pathnode->path.pathtarget = target;
pathnode->subpath = subpath;
pathnode->winclause = winclause;
+ pathnode->qual = qual;
+ pathnode->topwindow = topwindow;
/*
* For costing purposes, assume that there are no redundant partitioning
#include "nodes/supportnodes.h"
#include "optimizer/optimizer.h"
#include "utils/builtins.h"
+#include "utils/lsyscache.h"
typedef struct
return int8dec(fcinfo);
}
+/*
+ * int8inc_support
+ * prosupport function for int8inc() and int8inc_any()
+ */
+Datum
+int8inc_support(PG_FUNCTION_ARGS)
+{
+ Node *rawreq = (Node *) PG_GETARG_POINTER(0);
+
+ if (IsA(rawreq, SupportRequestWFuncMonotonic))
+ {
+ SupportRequestWFuncMonotonic *req = (SupportRequestWFuncMonotonic *) rawreq;
+ MonotonicFunction monotonic = MONOTONICFUNC_NONE;
+ int frameOptions = req->window_clause->frameOptions;
+
+ /* No ORDER BY clause then all rows are peers */
+ if (req->window_clause->orderClause == NIL)
+ monotonic = MONOTONICFUNC_BOTH;
+ else
+ {
+ /*
+ * Otherwise take into account the frame options. When the frame
+ * bound is the start of the window then the resulting value can
+ * never decrease, therefore is monotonically increasing
+ */
+ if (frameOptions & FRAMEOPTION_START_UNBOUNDED_PRECEDING)
+ monotonic |= MONOTONICFUNC_INCREASING;
+
+ /*
+ * Likewise, if the frame bound is the end of the window then the
+ * resulting value can never decrease.
+ */
+ if (frameOptions & FRAMEOPTION_END_UNBOUNDED_FOLLOWING)
+ monotonic |= MONOTONICFUNC_DECREASING;
+ }
+
+ req->monotonic = monotonic;
+ PG_RETURN_POINTER(req);
+ }
+
+ PG_RETURN_POINTER(NULL);
+}
+
Datum
int8larger(PG_FUNCTION_ARGS)
*/
#include "postgres.h"
+#include "nodes/supportnodes.h"
#include "utils/builtins.h"
#include "windowapi.h"
PG_RETURN_INT64(curpos + 1);
}
+/*
+ * window_row_number_support
+ * prosupport function for window_row_number()
+ */
+Datum
+window_row_number_support(PG_FUNCTION_ARGS)
+{
+ Node *rawreq = (Node *) PG_GETARG_POINTER(0);
+
+ if (IsA(rawreq, SupportRequestWFuncMonotonic))
+ {
+ SupportRequestWFuncMonotonic *req = (SupportRequestWFuncMonotonic *) rawreq;
+
+ /* row_number() is monotonically increasing */
+ req->monotonic = MONOTONICFUNC_INCREASING;
+ PG_RETURN_POINTER(req);
+ }
+
+ PG_RETURN_POINTER(NULL);
+}
/*
* rank
PG_RETURN_INT64(context->rank);
}
+/*
+ * window_rank_support
+ * prosupport function for window_rank()
+ */
+Datum
+window_rank_support(PG_FUNCTION_ARGS)
+{
+ Node *rawreq = (Node *) PG_GETARG_POINTER(0);
+
+ if (IsA(rawreq, SupportRequestWFuncMonotonic))
+ {
+ SupportRequestWFuncMonotonic *req = (SupportRequestWFuncMonotonic *) rawreq;
+
+ /* rank() is monotonically increasing */
+ req->monotonic = MONOTONICFUNC_INCREASING;
+ PG_RETURN_POINTER(req);
+ }
+
+ PG_RETURN_POINTER(NULL);
+}
+
/*
* dense_rank
* Rank increases by 1 when key columns change.
PG_RETURN_INT64(context->rank);
}
+/*
+ * window_dense_rank_support
+ * prosupport function for window_dense_rank()
+ */
+Datum
+window_dense_rank_support(PG_FUNCTION_ARGS)
+{
+ Node *rawreq = (Node *) PG_GETARG_POINTER(0);
+
+ if (IsA(rawreq, SupportRequestWFuncMonotonic))
+ {
+ SupportRequestWFuncMonotonic *req = (SupportRequestWFuncMonotonic *) rawreq;
+
+ /* dense_rank() is monotonically increasing */
+ req->monotonic = MONOTONICFUNC_INCREASING;
+ PG_RETURN_POINTER(req);
+ }
+
+ PG_RETURN_POINTER(NULL);
+}
+
/*
* percent_rank
* return fraction between 0 and 1 inclusive,
*/
/* yyyymmddN */
-#define CATALOG_VERSION_NO 202204075
+#define CATALOG_VERSION_NO 202204076
#endif
# count has two forms: count(any) and count(*)
{ oid => '2147',
descr => 'number of input rows for which the input expression is not null',
- proname => 'count', prokind => 'a', proisstrict => 'f', prorettype => 'int8',
- proargtypes => 'any', prosrc => 'aggregate_dummy' },
+ proname => 'count', prosupport => 'int8inc_support', prokind => 'a',
+ proisstrict => 'f', prorettype => 'int8', proargtypes => 'any',
+ prosrc => 'aggregate_dummy' },
{ oid => '2803', descr => 'number of input rows',
- proname => 'count', prokind => 'a', proisstrict => 'f', prorettype => 'int8',
- proargtypes => '', prosrc => 'aggregate_dummy' },
+ proname => 'count', prosupport => 'int8inc_support', prokind => 'a',
+ proisstrict => 'f', prorettype => 'int8', proargtypes => '',
+ prosrc => 'aggregate_dummy' },
+{ oid => '8802', descr => 'planner support for count run condition',
+ proname => 'int8inc_support', prorettype => 'internal',
+ proargtypes => 'internal', prosrc => 'int8inc_support' },
{ oid => '2718',
descr => 'population variance of bigint input values (square of the population standard deviation)',
# SQL-spec window functions
{ oid => '3100', descr => 'row number within partition',
- proname => 'row_number', prokind => 'w', proisstrict => 'f',
- prorettype => 'int8', proargtypes => '', prosrc => 'window_row_number' },
+ proname => 'row_number', prosupport => 'window_row_number_support',
+ prokind => 'w', proisstrict => 'f', prorettype => 'int8',
+ proargtypes => '', prosrc => 'window_row_number' },
+{ oid => '8799', descr => 'planner support for row_number run condition',
+ proname => 'window_row_number_support', prorettype => 'internal',
+ proargtypes => 'internal', prosrc => 'window_row_number_support' },
{ oid => '3101', descr => 'integer rank with gaps',
- proname => 'rank', prokind => 'w', proisstrict => 'f', prorettype => 'int8',
+ proname => 'rank', prosupport => 'window_rank_support',
+ prokind => 'w', proisstrict => 'f', prorettype => 'int8',
proargtypes => '', prosrc => 'window_rank' },
+{ oid => '8800', descr => 'planner support for rank run condition',
+ proname => 'window_rank_support', prorettype => 'internal',
+ proargtypes => 'internal', prosrc => 'window_rank_support' },
{ oid => '3102', descr => 'integer rank without gaps',
- proname => 'dense_rank', prokind => 'w', proisstrict => 'f',
- prorettype => 'int8', proargtypes => '', prosrc => 'window_dense_rank' },
+ proname => 'dense_rank', prosupport => 'window_dense_rank_support',
+ prokind => 'w', proisstrict => 'f', prorettype => 'int8', proargtypes => '',
+ prosrc => 'window_dense_rank' },
+{ oid => '8801', descr => 'planner support for dense rank run condition',
+ proname => 'window_dense_rank_support', prorettype => 'internal',
+ proargtypes => 'internal', prosrc => 'window_dense_rank_support' },
{ oid => '3103', descr => 'fractional rank within partition',
proname => 'percent_rank', prokind => 'w', proisstrict => 'f',
prorettype => 'float8', proargtypes => '', prosrc => 'window_percent_rank' },
typedef struct WindowStatePerFuncData *WindowStatePerFunc;
typedef struct WindowStatePerAggData *WindowStatePerAgg;
+/*
+ * WindowAggStatus -- Used to track the status of WindowAggState
+ */
+typedef enum WindowAggStatus
+{
+ WINDOWAGG_DONE, /* No more processing to do */
+ WINDOWAGG_RUN, /* Normal processing of window funcs */
+ WINDOWAGG_PASSTHROUGH, /* Don't eval window funcs */
+ WINDOWAGG_PASSTHROUGH_STRICT /* Pass-through plus don't store new
+ * tuples during spool */
+} WindowAggStatus;
+
typedef struct WindowAggState
{
ScanState ss; /* its first field is NodeTag */
struct WindowObjectData *agg_winobj; /* winobj for aggregate fetches */
int64 aggregatedbase; /* start row for current aggregates */
int64 aggregatedupto; /* rows before this one are aggregated */
+ WindowAggStatus status; /* run status of WindowAggState */
int frameOptions; /* frame_clause options, see WindowDef */
ExprState *startOffset; /* expression for starting bound offset */
MemoryContext curaggcontext; /* current aggregate's working data */
ExprContext *tmpcontext; /* short-term evaluation context */
+ ExprState *runcondition; /* Condition which must remain true otherwise
+ * execution of the WindowAgg will finish or
+ * go into pass-through mode. NULL when there
+ * is no such condition. */
+
+ bool use_pass_through; /* When false, stop execution when
+ * runcondition is no longer true. Else
+ * just stop evaluating window funcs. */
+ bool top_window; /* true if this is the top-most WindowAgg or
+ * the only WindowAgg in this query level */
bool all_first; /* true if the scan is starting */
- bool all_done; /* true if the scan is finished */
bool partition_spooled; /* true if all tuples in current partition
* have been spooled into tuplestore */
bool more_partitions; /* true if there's more partitions after
T_SupportRequestSelectivity, /* in nodes/supportnodes.h */
T_SupportRequestCost, /* in nodes/supportnodes.h */
T_SupportRequestRows, /* in nodes/supportnodes.h */
- T_SupportRequestIndexCondition /* in nodes/supportnodes.h */
+ T_SupportRequestIndexCondition, /* in nodes/supportnodes.h */
+ T_SupportRequestWFuncMonotonic /* in nodes/supportnodes.h */
} NodeTag;
/*
int frameOptions; /* frame_clause options, see WindowDef */
Node *startOffset; /* expression for starting bound, if any */
Node *endOffset; /* expression for ending bound, if any */
+ List *runCondition; /* qual to help short-circuit execution */
Oid startInRangeFunc; /* in_range function for startOffset */
Oid endInRangeFunc; /* in_range function for endOffset */
Oid inRangeColl; /* collation for in_range tests */
Path path;
Path *subpath; /* path representing input source */
WindowClause *winclause; /* WindowClause we'll be using */
+ List *qual; /* lower-level WindowAgg runconditions */
+ bool topwindow; /* false for all apart from the WindowAgg
+ * that's closest to the root of the plan */
} WindowAggPath;
/*
int frameOptions; /* frame_clause options, see WindowDef */
Node *startOffset; /* expression for starting bound, if any */
Node *endOffset; /* expression for ending bound, if any */
+ List *runCondition; /* qual to help short-circuit execution */
+ List *runConditionOrig; /* runCondition for display in EXPLAIN */
/* these fields are used with RANGE offset PRECEDING/FOLLOWING: */
Oid startInRangeFunc; /* in_range function for startOffset */
Oid endInRangeFunc; /* in_range function for endOffset */
Oid inRangeColl; /* collation for in_range tests */
bool inRangeAsc; /* use ASC sort order for in_range tests? */
bool inRangeNullsFirst; /* nulls sort first for in_range tests? */
+ bool topWindow; /* false for all apart from the WindowAgg
+ * that's closest to the root of the plan */
} WindowAgg;
/* ----------------
uint32 hashValue; /* hash value of object's cache lookup key */
} PlanInvalItem;
+/*
+ * MonotonicFunction
+ *
+ * Allows the planner to track monotonic properties of functions. A function
+ * is monotonically increasing if a subsequent call cannot yield a lower value
+ * than the previous call. A monotonically decreasing function cannot yield a
+ * higher value on subsequent calls, and a function which is both must return
+ * the same value on each call.
+ */
+typedef enum MonotonicFunction
+{
+ MONOTONICFUNC_NONE = 0,
+ MONOTONICFUNC_INCREASING = (1 << 0),
+ MONOTONICFUNC_DECREASING = (1 << 1),
+ MONOTONICFUNC_BOTH = MONOTONICFUNC_INCREASING | MONOTONICFUNC_DECREASING
+} MonotonicFunction;
+
#endif /* PLANNODES_H */
#ifndef SUPPORTNODES_H
#define SUPPORTNODES_H
-#include "nodes/primnodes.h"
+#include "nodes/plannodes.h"
struct PlannerInfo; /* avoid including pathnodes.h here */
struct IndexOptInfo;
struct SpecialJoinInfo;
-
+struct WindowClause;
/*
* The Simplify request allows the support function to perform plan-time
* equivalent of the function call */
} SupportRequestIndexCondition;
+/* ----------
+ * To support more efficient query execution of any monotonically increasing
+ * and/or monotonically decreasing window functions, we support calling the
+ * window function's prosupport function passing along this struct whenever
+ * the planner sees an OpExpr qual directly reference a window function in a
+ * subquery. When the planner encounters this, we populate this struct and
+ * pass it along to the window function's prosupport function so that it can
+ * evaluate if the given WindowFunc is;
+ *
+ * a) monotonically increasing, or
+ * b) monotonically decreasing, or
+ * c) both monotonically increasing and decreasing, or
+ * d) none of the above.
+ *
+ * A function that is monotonically increasing can never return a value that
+ * is lower than a value returned in a "previous call". A monotonically
+ * decreasing function can never return a value higher than a value returned
+ * in a previous call. A function that is both must return the same value
+ * each time.
+ *
+ * We define "previous call" to mean a previous call to the same WindowFunc
+ * struct in the same window partition.
+ *
+ * row_number() is an example of a monotonically increasing function. The
+ * return value will be reset back to 1 in each new partition. An example of
+ * a monotonically increasing and decreasing function is COUNT(*) OVER ().
+ * Since there is no ORDER BY clause in this example, all rows in the
+ * partition are peers and all rows within the partition will be within the
+ * frame bound. Likewise for COUNT(*) OVER(ORDER BY a ROWS BETWEEN UNBOUNDED
+ * PRECEDING AND UNBOUNDED FOLLOWING).
+ *
+ * COUNT(*) OVER (ORDER BY a ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING)
+ * is an example of a monotonically decreasing function.
+ *
+ * Implementations must only concern themselves with the given WindowFunc
+ * being monotonic in a single partition.
+ *
+ * Inputs:
+ * 'window_func' is the pointer to the window function being called.
+ *
+ * 'window_clause' pointer to the WindowClause data. Support functions can
+ * use this to check frame bounds, etc.
+ *
+ * Outputs:
+ * 'monotonic' the resulting MonotonicFunction value for the given input
+ * window function and window clause.
+ * ----------
+ */
+typedef struct SupportRequestWFuncMonotonic
+{
+ NodeTag type;
+
+ /* Input fields: */
+ WindowFunc *window_func; /* Pointer to the window function data */
+ struct WindowClause *window_clause; /* Pointer to the window clause data */
+
+ /* Output fields: */
+ MonotonicFunction monotonic;
+} SupportRequestWFuncMonotonic;
+
#endif /* SUPPORTNODES_H */
Path *subpath,
PathTarget *target,
List *windowFuncs,
- WindowClause *winclause);
+ WindowClause *winclause,
+ List *qual,
+ bool topwindow);
extern SetOpPath *create_setop_path(PlannerInfo *root,
RelOptInfo *rel,
Path *subpath,
-> Seq Scan on empsalary
(9 rows)
+-- Test window function run conditions are properly pushed down into the
+-- WindowAgg
+EXPLAIN (COSTS OFF)
+SELECT * FROM
+ (SELECT empno,
+ row_number() OVER (ORDER BY empno) rn
+ FROM empsalary) emp
+WHERE rn < 3;
+ QUERY PLAN
+----------------------------------------------
+ WindowAgg
+ Run Condition: (row_number() OVER (?) < 3)
+ -> Sort
+ Sort Key: empsalary.empno
+ -> Seq Scan on empsalary
+(5 rows)
+
+-- The following 3 statements should result the same result.
+SELECT * FROM
+ (SELECT empno,
+ row_number() OVER (ORDER BY empno) rn
+ FROM empsalary) emp
+WHERE rn < 3;
+ empno | rn
+-------+----
+ 1 | 1
+ 2 | 2
+(2 rows)
+
+SELECT * FROM
+ (SELECT empno,
+ row_number() OVER (ORDER BY empno) rn
+ FROM empsalary) emp
+WHERE 3 > rn;
+ empno | rn
+-------+----
+ 1 | 1
+ 2 | 2
+(2 rows)
+
+SELECT * FROM
+ (SELECT empno,
+ row_number() OVER (ORDER BY empno) rn
+ FROM empsalary) emp
+WHERE 2 >= rn;
+ empno | rn
+-------+----
+ 1 | 1
+ 2 | 2
+(2 rows)
+
+-- Ensure r <= 3 is pushed down into the run condition of the window agg
+EXPLAIN (COSTS OFF)
+SELECT * FROM
+ (SELECT empno,
+ salary,
+ rank() OVER (ORDER BY salary DESC) r
+ FROM empsalary) emp
+WHERE r <= 3;
+ QUERY PLAN
+-----------------------------------------
+ WindowAgg
+ Run Condition: (rank() OVER (?) <= 3)
+ -> Sort
+ Sort Key: empsalary.salary DESC
+ -> Seq Scan on empsalary
+(5 rows)
+
+SELECT * FROM
+ (SELECT empno,
+ salary,
+ rank() OVER (ORDER BY salary DESC) r
+ FROM empsalary) emp
+WHERE r <= 3;
+ empno | salary | r
+-------+--------+---
+ 8 | 6000 | 1
+ 10 | 5200 | 2
+ 11 | 5200 | 2
+(3 rows)
+
+-- Ensure dr = 1 is converted to dr <= 1 to get all rows leading up to dr = 1
+EXPLAIN (COSTS OFF)
+SELECT * FROM
+ (SELECT empno,
+ salary,
+ dense_rank() OVER (ORDER BY salary DESC) dr
+ FROM empsalary) emp
+WHERE dr = 1;
+ QUERY PLAN
+-----------------------------------------------------
+ Subquery Scan on emp
+ Filter: (emp.dr = 1)
+ -> WindowAgg
+ Run Condition: (dense_rank() OVER (?) <= 1)
+ -> Sort
+ Sort Key: empsalary.salary DESC
+ -> Seq Scan on empsalary
+(7 rows)
+
+SELECT * FROM
+ (SELECT empno,
+ salary,
+ dense_rank() OVER (ORDER BY salary DESC) dr
+ FROM empsalary) emp
+WHERE dr = 1;
+ empno | salary | dr
+-------+--------+----
+ 8 | 6000 | 1
+(1 row)
+
+-- Check COUNT() and COUNT(*)
+EXPLAIN (COSTS OFF)
+SELECT * FROM
+ (SELECT empno,
+ salary,
+ count(*) OVER (ORDER BY salary DESC) c
+ FROM empsalary) emp
+WHERE c <= 3;
+ QUERY PLAN
+-------------------------------------------
+ WindowAgg
+ Run Condition: (count(*) OVER (?) <= 3)
+ -> Sort
+ Sort Key: empsalary.salary DESC
+ -> Seq Scan on empsalary
+(5 rows)
+
+SELECT * FROM
+ (SELECT empno,
+ salary,
+ count(*) OVER (ORDER BY salary DESC) c
+ FROM empsalary) emp
+WHERE c <= 3;
+ empno | salary | c
+-------+--------+---
+ 8 | 6000 | 1
+ 10 | 5200 | 3
+ 11 | 5200 | 3
+(3 rows)
+
+EXPLAIN (COSTS OFF)
+SELECT * FROM
+ (SELECT empno,
+ salary,
+ count(empno) OVER (ORDER BY salary DESC) c
+ FROM empsalary) emp
+WHERE c <= 3;
+ QUERY PLAN
+---------------------------------------------------------
+ WindowAgg
+ Run Condition: (count(empsalary.empno) OVER (?) <= 3)
+ -> Sort
+ Sort Key: empsalary.salary DESC
+ -> Seq Scan on empsalary
+(5 rows)
+
+SELECT * FROM
+ (SELECT empno,
+ salary,
+ count(empno) OVER (ORDER BY salary DESC) c
+ FROM empsalary) emp
+WHERE c <= 3;
+ empno | salary | c
+-------+--------+---
+ 8 | 6000 | 1
+ 10 | 5200 | 3
+ 11 | 5200 | 3
+(3 rows)
+
+EXPLAIN (COSTS OFF)
+SELECT * FROM
+ (SELECT empno,
+ salary,
+ count(*) OVER (ORDER BY salary DESC ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) c
+ FROM empsalary) emp
+WHERE c >= 3;
+ QUERY PLAN
+-------------------------------------------
+ WindowAgg
+ Run Condition: (count(*) OVER (?) >= 3)
+ -> Sort
+ Sort Key: empsalary.salary DESC
+ -> Seq Scan on empsalary
+(5 rows)
+
+EXPLAIN (COSTS OFF)
+SELECT * FROM
+ (SELECT empno,
+ salary,
+ count(*) OVER () c
+ FROM empsalary) emp
+WHERE 11 <= c;
+ QUERY PLAN
+--------------------------------------------
+ WindowAgg
+ Run Condition: (11 <= count(*) OVER (?))
+ -> Seq Scan on empsalary
+(3 rows)
+
+EXPLAIN (COSTS OFF)
+SELECT * FROM
+ (SELECT empno,
+ salary,
+ count(*) OVER (ORDER BY salary DESC) c,
+ dense_rank() OVER (ORDER BY salary DESC) dr
+ FROM empsalary) emp
+WHERE dr = 1;
+ QUERY PLAN
+-----------------------------------------------------
+ Subquery Scan on emp
+ Filter: (emp.dr = 1)
+ -> WindowAgg
+ Run Condition: (dense_rank() OVER (?) <= 1)
+ -> Sort
+ Sort Key: empsalary.salary DESC
+ -> Seq Scan on empsalary
+(7 rows)
+
+-- Ensure we get a run condition when there's a PARTITION BY clause
+EXPLAIN (COSTS OFF)
+SELECT * FROM
+ (SELECT empno,
+ depname,
+ row_number() OVER (PARTITION BY depname ORDER BY empno) rn
+ FROM empsalary) emp
+WHERE rn < 3;
+ QUERY PLAN
+------------------------------------------------------
+ WindowAgg
+ Run Condition: (row_number() OVER (?) < 3)
+ -> Sort
+ Sort Key: empsalary.depname, empsalary.empno
+ -> Seq Scan on empsalary
+(5 rows)
+
+-- and ensure we get the correct results from the above plan
+SELECT * FROM
+ (SELECT empno,
+ depname,
+ row_number() OVER (PARTITION BY depname ORDER BY empno) rn
+ FROM empsalary) emp
+WHERE rn < 3;
+ empno | depname | rn
+-------+-----------+----
+ 7 | develop | 1
+ 8 | develop | 2
+ 2 | personnel | 1
+ 5 | personnel | 2
+ 1 | sales | 1
+ 3 | sales | 2
+(6 rows)
+
+-- likewise with count(empno) instead of row_number()
+EXPLAIN (COSTS OFF)
+SELECT * FROM
+ (SELECT empno,
+ depname,
+ salary,
+ count(empno) OVER (PARTITION BY depname ORDER BY salary DESC) c
+ FROM empsalary) emp
+WHERE c <= 3;
+ QUERY PLAN
+------------------------------------------------------------
+ WindowAgg
+ Run Condition: (count(empsalary.empno) OVER (?) <= 3)
+ -> Sort
+ Sort Key: empsalary.depname, empsalary.salary DESC
+ -> Seq Scan on empsalary
+(5 rows)
+
+-- and again, check the results are what we expect.
+SELECT * FROM
+ (SELECT empno,
+ depname,
+ salary,
+ count(empno) OVER (PARTITION BY depname ORDER BY salary DESC) c
+ FROM empsalary) emp
+WHERE c <= 3;
+ empno | depname | salary | c
+-------+-----------+--------+---
+ 8 | develop | 6000 | 1
+ 10 | develop | 5200 | 3
+ 11 | develop | 5200 | 3
+ 2 | personnel | 3900 | 1
+ 5 | personnel | 3500 | 2
+ 1 | sales | 5000 | 1
+ 4 | sales | 4800 | 3
+ 3 | sales | 4800 | 3
+(8 rows)
+
+-- Some more complex cases with multiple window clauses
+EXPLAIN (COSTS OFF)
+SELECT * FROM
+ (SELECT *,
+ count(salary) OVER (PARTITION BY depname || '') c1, -- w1
+ row_number() OVER (PARTITION BY depname) rn, -- w2
+ count(*) OVER (PARTITION BY depname) c2, -- w2
+ count(*) OVER (PARTITION BY '' || depname) c3 -- w3
+ FROM empsalary
+) e WHERE rn <= 1 AND c1 <= 3;
+ QUERY PLAN
+-------------------------------------------------------------------------------------------
+ Subquery Scan on e
+ -> WindowAgg
+ Filter: ((row_number() OVER (?)) <= 1)
+ Run Condition: (count(empsalary.salary) OVER (?) <= 3)
+ -> Sort
+ Sort Key: (((empsalary.depname)::text || ''::text))
+ -> WindowAgg
+ Run Condition: (row_number() OVER (?) <= 1)
+ -> Sort
+ Sort Key: empsalary.depname
+ -> WindowAgg
+ -> Sort
+ Sort Key: ((''::text || (empsalary.depname)::text))
+ -> Seq Scan on empsalary
+(14 rows)
+
+-- Ensure we correctly filter out all of the run conditions from each window
+SELECT * FROM
+ (SELECT *,
+ count(salary) OVER (PARTITION BY depname || '') c1, -- w1
+ row_number() OVER (PARTITION BY depname) rn, -- w2
+ count(*) OVER (PARTITION BY depname) c2, -- w2
+ count(*) OVER (PARTITION BY '' || depname) c3 -- w3
+ FROM empsalary
+) e WHERE rn <= 1 AND c1 <= 3;
+ depname | empno | salary | enroll_date | c1 | rn | c2 | c3
+-----------+-------+--------+-------------+----+----+----+----
+ personnel | 5 | 3500 | 12-10-2007 | 2 | 1 | 2 | 2
+ sales | 3 | 4800 | 08-01-2007 | 3 | 1 | 3 | 3
+(2 rows)
+
+-- Tests to ensure we don't push down the run condition when it's not valid to
+-- do so.
+-- Ensure we don't push down when the frame options show that the window
+-- function is not monotonically increasing
+EXPLAIN (COSTS OFF)
+SELECT * FROM
+ (SELECT empno,
+ salary,
+ count(*) OVER (ORDER BY salary DESC ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) c
+ FROM empsalary) emp
+WHERE c <= 3;
+ QUERY PLAN
+-----------------------------------------------
+ Subquery Scan on emp
+ Filter: (emp.c <= 3)
+ -> WindowAgg
+ -> Sort
+ Sort Key: empsalary.salary DESC
+ -> Seq Scan on empsalary
+(6 rows)
+
+-- Ensure we don't push down when the window function's monotonic properties
+-- don't match that of the clauses.
+EXPLAIN (COSTS OFF)
+SELECT * FROM
+ (SELECT empno,
+ salary,
+ count(*) OVER (ORDER BY salary) c
+ FROM empsalary) emp
+WHERE 3 <= c;
+ QUERY PLAN
+------------------------------------------
+ Subquery Scan on emp
+ Filter: (3 <= emp.c)
+ -> WindowAgg
+ -> Sort
+ Sort Key: empsalary.salary
+ -> Seq Scan on empsalary
+(6 rows)
+
+-- Ensure we don't pushdown when there are multiple window clauses to evaluate
+EXPLAIN (COSTS OFF)
+SELECT * FROM
+ (SELECT empno,
+ salary,
+ count(*) OVER (ORDER BY empno DESC) c,
+ dense_rank() OVER (ORDER BY salary DESC) dr
+ FROM empsalary) emp
+WHERE dr = 1;
+ QUERY PLAN
+-----------------------------------------------------------------
+ Subquery Scan on emp
+ Filter: (emp.dr = 1)
+ -> WindowAgg
+ Filter: ((dense_rank() OVER (?)) <= 1)
+ -> Sort
+ Sort Key: empsalary.empno DESC
+ -> WindowAgg
+ Run Condition: (dense_rank() OVER (?) <= 1)
+ -> Sort
+ Sort Key: empsalary.salary DESC
+ -> Seq Scan on empsalary
+(11 rows)
+
-- Test Sort node collapsing
EXPLAIN (COSTS OFF)
SELECT * FROM
FROM empsalary) emp
WHERE depname = 'sales';
+-- Test window function run conditions are properly pushed down into the
+-- WindowAgg
+EXPLAIN (COSTS OFF)
+SELECT * FROM
+ (SELECT empno,
+ row_number() OVER (ORDER BY empno) rn
+ FROM empsalary) emp
+WHERE rn < 3;
+
+-- The following 3 statements should result the same result.
+SELECT * FROM
+ (SELECT empno,
+ row_number() OVER (ORDER BY empno) rn
+ FROM empsalary) emp
+WHERE rn < 3;
+
+SELECT * FROM
+ (SELECT empno,
+ row_number() OVER (ORDER BY empno) rn
+ FROM empsalary) emp
+WHERE 3 > rn;
+
+SELECT * FROM
+ (SELECT empno,
+ row_number() OVER (ORDER BY empno) rn
+ FROM empsalary) emp
+WHERE 2 >= rn;
+
+-- Ensure r <= 3 is pushed down into the run condition of the window agg
+EXPLAIN (COSTS OFF)
+SELECT * FROM
+ (SELECT empno,
+ salary,
+ rank() OVER (ORDER BY salary DESC) r
+ FROM empsalary) emp
+WHERE r <= 3;
+
+SELECT * FROM
+ (SELECT empno,
+ salary,
+ rank() OVER (ORDER BY salary DESC) r
+ FROM empsalary) emp
+WHERE r <= 3;
+
+-- Ensure dr = 1 is converted to dr <= 1 to get all rows leading up to dr = 1
+EXPLAIN (COSTS OFF)
+SELECT * FROM
+ (SELECT empno,
+ salary,
+ dense_rank() OVER (ORDER BY salary DESC) dr
+ FROM empsalary) emp
+WHERE dr = 1;
+
+SELECT * FROM
+ (SELECT empno,
+ salary,
+ dense_rank() OVER (ORDER BY salary DESC) dr
+ FROM empsalary) emp
+WHERE dr = 1;
+
+-- Check COUNT() and COUNT(*)
+EXPLAIN (COSTS OFF)
+SELECT * FROM
+ (SELECT empno,
+ salary,
+ count(*) OVER (ORDER BY salary DESC) c
+ FROM empsalary) emp
+WHERE c <= 3;
+
+SELECT * FROM
+ (SELECT empno,
+ salary,
+ count(*) OVER (ORDER BY salary DESC) c
+ FROM empsalary) emp
+WHERE c <= 3;
+
+EXPLAIN (COSTS OFF)
+SELECT * FROM
+ (SELECT empno,
+ salary,
+ count(empno) OVER (ORDER BY salary DESC) c
+ FROM empsalary) emp
+WHERE c <= 3;
+
+SELECT * FROM
+ (SELECT empno,
+ salary,
+ count(empno) OVER (ORDER BY salary DESC) c
+ FROM empsalary) emp
+WHERE c <= 3;
+
+EXPLAIN (COSTS OFF)
+SELECT * FROM
+ (SELECT empno,
+ salary,
+ count(*) OVER (ORDER BY salary DESC ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) c
+ FROM empsalary) emp
+WHERE c >= 3;
+
+EXPLAIN (COSTS OFF)
+SELECT * FROM
+ (SELECT empno,
+ salary,
+ count(*) OVER () c
+ FROM empsalary) emp
+WHERE 11 <= c;
+
+EXPLAIN (COSTS OFF)
+SELECT * FROM
+ (SELECT empno,
+ salary,
+ count(*) OVER (ORDER BY salary DESC) c,
+ dense_rank() OVER (ORDER BY salary DESC) dr
+ FROM empsalary) emp
+WHERE dr = 1;
+
+-- Ensure we get a run condition when there's a PARTITION BY clause
+EXPLAIN (COSTS OFF)
+SELECT * FROM
+ (SELECT empno,
+ depname,
+ row_number() OVER (PARTITION BY depname ORDER BY empno) rn
+ FROM empsalary) emp
+WHERE rn < 3;
+
+-- and ensure we get the correct results from the above plan
+SELECT * FROM
+ (SELECT empno,
+ depname,
+ row_number() OVER (PARTITION BY depname ORDER BY empno) rn
+ FROM empsalary) emp
+WHERE rn < 3;
+
+-- likewise with count(empno) instead of row_number()
+EXPLAIN (COSTS OFF)
+SELECT * FROM
+ (SELECT empno,
+ depname,
+ salary,
+ count(empno) OVER (PARTITION BY depname ORDER BY salary DESC) c
+ FROM empsalary) emp
+WHERE c <= 3;
+
+-- and again, check the results are what we expect.
+SELECT * FROM
+ (SELECT empno,
+ depname,
+ salary,
+ count(empno) OVER (PARTITION BY depname ORDER BY salary DESC) c
+ FROM empsalary) emp
+WHERE c <= 3;
+
+-- Some more complex cases with multiple window clauses
+EXPLAIN (COSTS OFF)
+SELECT * FROM
+ (SELECT *,
+ count(salary) OVER (PARTITION BY depname || '') c1, -- w1
+ row_number() OVER (PARTITION BY depname) rn, -- w2
+ count(*) OVER (PARTITION BY depname) c2, -- w2
+ count(*) OVER (PARTITION BY '' || depname) c3 -- w3
+ FROM empsalary
+) e WHERE rn <= 1 AND c1 <= 3;
+
+-- Ensure we correctly filter out all of the run conditions from each window
+SELECT * FROM
+ (SELECT *,
+ count(salary) OVER (PARTITION BY depname || '') c1, -- w1
+ row_number() OVER (PARTITION BY depname) rn, -- w2
+ count(*) OVER (PARTITION BY depname) c2, -- w2
+ count(*) OVER (PARTITION BY '' || depname) c3 -- w3
+ FROM empsalary
+) e WHERE rn <= 1 AND c1 <= 3;
+
+-- Tests to ensure we don't push down the run condition when it's not valid to
+-- do so.
+
+-- Ensure we don't push down when the frame options show that the window
+-- function is not monotonically increasing
+EXPLAIN (COSTS OFF)
+SELECT * FROM
+ (SELECT empno,
+ salary,
+ count(*) OVER (ORDER BY salary DESC ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) c
+ FROM empsalary) emp
+WHERE c <= 3;
+
+-- Ensure we don't push down when the window function's monotonic properties
+-- don't match that of the clauses.
+EXPLAIN (COSTS OFF)
+SELECT * FROM
+ (SELECT empno,
+ salary,
+ count(*) OVER (ORDER BY salary) c
+ FROM empsalary) emp
+WHERE 3 <= c;
+
+-- Ensure we don't pushdown when there are multiple window clauses to evaluate
+EXPLAIN (COSTS OFF)
+SELECT * FROM
+ (SELECT empno,
+ salary,
+ count(*) OVER (ORDER BY empno DESC) c,
+ dense_rank() OVER (ORDER BY salary DESC) dr
+ FROM empsalary) emp
+WHERE dr = 1;
+
-- Test Sort node collapsing
EXPLAIN (COSTS OFF)
SELECT * FROM