Have the planner account for the Memoize cache key memory

author David Rowley <drowley@postgresql.org>

Mon, 20 Mar 2023 03:26:04 +0000 (16:26 +1300)

committer David Rowley <drowley@postgresql.org>

Mon, 20 Mar 2023 03:26:04 +0000 (16:26 +1300)
author David Rowley <drowley@postgresql.org>
Mon, 20 Mar 2023 03:26:04 +0000 (16:26 +1300)
committer David Rowley <drowley@postgresql.org>
Mon, 20 Mar 2023 03:26:04 +0000 (16:26 +1300)
diff --git a/src/backend/optimizer/path/costsize.c b/src/backend/optimizer/path/costsize.c

index 7918bb6f0db48c2b432e3ec46aacadc2e2a20fbb..acd859bede7d2e472d3c410584930396155457b3 100644 (file)
--- a/src/backend/optimizer/path/costsize.c
+++ b/src/backend/optimizer/path/costsize.c
@@ -189,6 +189,7 @@ static Selectivity get_foreign_key_join_selectivity(PlannerInfo *root,
  static Cost append_nonpartial_cost(List *subpaths, int numpaths,
                                    int parallel_workers);
  static void set_rel_width(PlannerInfo *root, RelOptInfo *rel);
+static int32 get_expr_width(PlannerInfo *root, const Node *expr);
  static double relation_byte_size(double tuples, int width);
  static double page_size(double tuples, int width);
  static double get_parallel_divisor(Path *path);
@@ -2481,6 +2482,7 @@ cost_memoize_rescan(PlannerInfo *root, MemoizePath *mpath,
                     Cost *rescan_startup_cost, Cost *rescan_total_cost)
  {
     EstimationInfo estinfo;
+   ListCell   *lc;
     Cost        input_startup_cost = mpath->subpath->startup_cost;
     Cost        input_total_cost = mpath->subpath->total_cost;
     double      tuples = mpath->subpath->rows;
@@ -2504,12 +2506,14 @@ cost_memoize_rescan(PlannerInfo *root, MemoizePath *mpath,
      * To provide us with better estimations on how many cache entries we can
      * store at once, we make a call to the executor here to ask it what
      * memory overheads there are for a single cache entry.
-    *
-    * XXX we also store the cache key, but that's not accounted for here.
      */
     est_entry_bytes = relation_byte_size(tuples, width) +
         ExecEstimateCacheEntryOverheadBytes(tuples);
  
+   /* include the estimated width for the cache keys */
+   foreach(lc, mpath->param_exprs)
+       est_entry_bytes += get_expr_width(root, (Node *) lfirst(lc));
+
     /* estimate on the upper limit of cache entries we can hold at once */
     est_cache_entries = floor(hash_mem_bytes / est_entry_bytes);
  
@@ -6021,54 +6025,13 @@ set_pathtarget_cost_width(PlannerInfo *root, PathTarget *target)
     {
         Node       *node = (Node *) lfirst(lc);
  
-       if (IsA(node, Var))
-       {
-           Var        *var = (Var *) node;
-           int32       item_width;
-
-           /* We should not see any upper-level Vars here */
-           Assert(var->varlevelsup == 0);
-
-           /* Try to get data from RelOptInfo cache */
-           if (!IS_SPECIAL_VARNO(var->varno) &&
-               var->varno < root->simple_rel_array_size)
-           {
-               RelOptInfo *rel = root->simple_rel_array[var->varno];
-
-               if (rel != NULL &&
-                   var->varattno >= rel->min_attr &&
-                   var->varattno <= rel->max_attr)
-               {
-                   int         ndx = var->varattno - rel->min_attr;
-
-                   if (rel->attr_widths[ndx] > 0)
-                   {
-                       tuple_width += rel->attr_widths[ndx];
-                       continue;
-                   }
-               }
-           }
+       tuple_width += get_expr_width(root, node);
  
-           /*
-            * No cached data available, so estimate using just the type info.
-            */
-           item_width = get_typavgwidth(var->vartype, var->vartypmod);
-           Assert(item_width > 0);
-           tuple_width += item_width;
-       }
-       else
+       /* For non-Vars, account for evaluation cost */
+       if (!IsA(node, Var))
         {
-           /*
-            * Handle general expressions using type info.
-            */
-           int32       item_width;
             QualCost    cost;
  
-           item_width = get_typavgwidth(exprType(node), exprTypmod(node));
-           Assert(item_width > 0);
-           tuple_width += item_width;
-
-           /* Account for cost, too */
             cost_qual_eval_node(&cost, node, root);
             target->cost.startup += cost.startup;
             target->cost.per_tuple += cost.per_tuple;
@@ -6081,6 +6044,55 @@ set_pathtarget_cost_width(PlannerInfo *root, PathTarget *target)
     return target;
  }
  
+/*
+ * get_expr_width
+ *     Estimate the width of the given expr attempting to use the width
+ *     cached in a Var's owning RelOptInfo, else fallback on the type's
+ *     average width when unable to or when the given Node is not a Var.
+ */
+static int32
+get_expr_width(PlannerInfo *root, const Node *expr)
+{
+   int32       width;
+
+   if (IsA(expr, Var))
+   {
+       const Var  *var = (const Var *) expr;
+
+       /* We should not see any upper-level Vars here */
+       Assert(var->varlevelsup == 0);
+
+       /* Try to get data from RelOptInfo cache */
+       if (!IS_SPECIAL_VARNO(var->varno) &&
+           var->varno < root->simple_rel_array_size)
+       {
+           RelOptInfo *rel = root->simple_rel_array[var->varno];
+
+           if (rel != NULL &&
+               var->varattno >= rel->min_attr &&
+               var->varattno <= rel->max_attr)
+           {
+               int         ndx = var->varattno - rel->min_attr;
+
+               if (rel->attr_widths[ndx] > 0)
+                   return rel->attr_widths[ndx];
+           }
+       }
+
+       /*
+        * No cached data available, so estimate using just the type info.
+        */
+       width = get_typavgwidth(var->vartype, var->vartypmod);
+       Assert(width > 0);
+
+       return width;
+   }
+
+   width = get_typavgwidth(exprType(expr), exprTypmod(expr));
+   Assert(width > 0);
+   return width;
+}
+
  /*
   * relation_byte_size
   *   Estimate the storage space in bytes for a given number of tuples
author	David Rowley <drowley@postgresql.org>
	Mon, 20 Mar 2023 03:26:04 +0000 (16:26 +1300)
committer	David Rowley <drowley@postgresql.org>
	Mon, 20 Mar 2023 03:26:04 +0000 (16:26 +1300)