From 3f7323cbbdd3fddc54619b8bd0e0b03a27befdfc Mon Sep 17 00:00:00 2001 From: Tom Lane Date: Sat, 27 Aug 2022 12:11:20 -0400 Subject: [PATCH] Repair rare failure of MULTIEXPR_SUBLINK subplans in inherited updates. Prior to v14, if we have a MULTIEXPR SubPlan (that is, use of the syntax UPDATE ... SET (c1, ...) = (SELECT ...)) in an UPDATE with an inherited or partitioned target table, inheritance_planner() will clone the targetlist and therefore also the MULTIEXPR SubPlan and the Param nodes referencing it for each child target table. Up to now, we've allowed all the clones to share the underlying subplan as well as the output parameter IDs -- that is, the runtime ParamExecData slots. That technique is borrowed from the far older code that supports initplans, and it works okay in that case because the cloned SubPlan nodes are essentially identical. So it doesn't matter which one of the clones the shared ParamExecData.execPlan field might point to. However, this fails to hold for MULTIEXPR SubPlans, because they can have nonempty "args" lists (values to be passed into the subplan), and those lists could get mutated to different states in the various clones. In the submitted reproducer, as well as the test case added here, one clone contains Vars with varno OUTER_VAR where another has INNER_VAR, because the child tables are respectively on the outer or inner side of the join. Sharing the execPlan pointer can result in trying to evaluate an args list that doesn't match the local execution state, with mayhem ensuing. The result often is to trigger consistency checks in the executor, but I believe this could end in a crash or incorrect updates. To fix, assign new Param IDs to each of the cloned SubPlans, so that they don't share ParamExecData slots at runtime. It still seems fine for the clones to share the underlying subplan, and extra ParamExecData slots are cheap enough that this fix shouldn't cost much. This has been busted since we invented MULTIEXPR SubPlans in 9.5. Probably the lack of previous reports is because query plans in which the different clones of a MULTIEXPR mutate to effectively-different states are pretty rare. There's no issue in v14 and later, because without inheritance_planner() there's never a reason to clone MULTIEXPR SubPlans. Per bug #17596 from Andre Lin. Patch v10-v13 only. Discussion: https://postgr.es/m/17596-c5357f61427a81dc@postgresql.org --- src/backend/executor/nodeSubplan.c | 15 ++++ src/backend/optimizer/plan/planner.c | 4 ++ src/backend/optimizer/plan/subselect.c | 95 ++++++++++++++++++++++++++ src/include/optimizer/subselect.h | 1 + src/test/regress/expected/inherit.out | 49 +++++++++++++ src/test/regress/sql/inherit.sql | 20 ++++++ 6 files changed, 184 insertions(+) diff --git a/src/backend/executor/nodeSubplan.c b/src/backend/executor/nodeSubplan.c index fadd8ea7319..c542464e5a8 100644 --- a/src/backend/executor/nodeSubplan.c +++ b/src/backend/executor/nodeSubplan.c @@ -245,6 +245,21 @@ ExecScanSubPlan(SubPlanState *node, * ones, so this should be safe.) Unlike ExecReScanSetParamPlan, we do * *not* set bits in the parent plan node's chgParam, because we don't * want to cause a rescan of the parent. + * + * Note: we are also relying on MULTIEXPR SubPlans not sharing any output + * parameters with other SubPlans, because if one does then it is unclear + * which SubPlanState node the parameter's execPlan field will be pointing + * to when we come to evaluate the parameter. We can allow plain initplan + * SubPlans to share output parameters, because it doesn't actually matter + * which initplan SubPlan we reference as long as they all point to the + * same underlying subplan. However, that fails to hold for MULTIEXPRs + * because they can have non-empty args lists, and the "same" args might + * have mutated into different forms in different parts of a plan tree. + * There is not a problem in ordinary queries because MULTIEXPR will + * appear only in an UPDATE's top-level target list, so it won't get + * duplicated anyplace. However, when inheritance_planner clones a + * partially-planned targetlist it must take care to assign non-duplicate + * param IDs to the cloned copy. */ if (subLinkType == MULTIEXPR_SUBLINK) { diff --git a/src/backend/optimizer/plan/planner.c b/src/backend/optimizer/plan/planner.c index 60e7fda6a9b..27c665ac126 100644 --- a/src/backend/optimizer/plan/planner.c +++ b/src/backend/optimizer/plan/planner.c @@ -1606,6 +1606,10 @@ inheritance_planner(PlannerInfo *root) /* and we haven't created PlaceHolderInfos, either */ Assert(subroot->placeholder_list == NIL); + /* Fix MULTIEXPR_SUBLINK params if any */ + if (root->multiexpr_params) + SS_make_multiexprs_unique(root, subroot); + /* Generate Path(s) for accessing this result relation */ grouping_planner(subroot, true, 0.0 /* retrieve all tuples */ ); diff --git a/src/backend/optimizer/plan/subselect.c b/src/backend/optimizer/plan/subselect.c index 91a8851b253..927d6a43bde 100644 --- a/src/backend/optimizer/plan/subselect.c +++ b/src/backend/optimizer/plan/subselect.c @@ -851,6 +851,101 @@ hash_ok_operator(OpExpr *expr) } } +/* + * SS_make_multiexprs_unique + * + * After cloning an UPDATE targetlist that contains MULTIEXPR_SUBLINK + * SubPlans, inheritance_planner() must call this to assign new, unique Param + * IDs to the cloned MULTIEXPR_SUBLINKs' output parameters. See notes in + * ExecScanSubPlan. + */ +void +SS_make_multiexprs_unique(PlannerInfo *root, PlannerInfo *subroot) +{ + List *new_multiexpr_params = NIL; + int offset; + ListCell *lc; + + /* + * Find MULTIEXPR SubPlans in the cloned query. We need only look at the + * top level of the targetlist. + */ + foreach(lc, subroot->parse->targetList) + { + TargetEntry *tent = (TargetEntry *) lfirst(lc); + SubPlan *splan; + Plan *plan; + List *params; + + if (!IsA(tent->expr, SubPlan)) + continue; + splan = (SubPlan *) tent->expr; + if (splan->subLinkType != MULTIEXPR_SUBLINK) + continue; + + /* Found one, get the associated subplan */ + plan = (Plan *) list_nth(root->glob->subplans, splan->plan_id - 1); + + /* + * Generate new PARAM_EXEC Param nodes, and overwrite splan->setParam + * with their IDs. This is just like what build_subplan did when it + * made the SubPlan node we're cloning. But because the param IDs are + * assigned globally, we'll get new IDs. (We assume here that the + * subroot's tlist is a clone we can scribble on.) + */ + params = generate_subquery_params(root, + plan->targetlist, + &splan->setParam); + + /* + * We will append the replacement-Params lists to + * root->multiexpr_params, but for the moment just make a local list. + * Since we lack easy access here to the original subLinkId, we have + * to fall back on the slightly shaky assumption that the MULTIEXPR + * SubPlans appear in the targetlist in subLinkId order. This should + * be safe enough given the way that the parser builds the targetlist + * today. I wouldn't want to rely on it going forward, but since this + * code has a limited lifespan it should be fine. We can partially + * protect against problems with assertions below. + */ + new_multiexpr_params = lappend(new_multiexpr_params, params); + } + + /* + * Now we must find the Param nodes that reference the MULTIEXPR outputs + * and update their sublink IDs so they'll reference the new outputs. + * Fortunately, those too must be at top level of the cloned targetlist. + */ + offset = list_length(root->multiexpr_params); + + foreach(lc, subroot->parse->targetList) + { + TargetEntry *tent = (TargetEntry *) lfirst(lc); + Param *p; + int subqueryid; + int colno; + + if (!IsA(tent->expr, Param)) + continue; + p = (Param *) tent->expr; + if (p->paramkind != PARAM_MULTIEXPR) + continue; + subqueryid = p->paramid >> 16; + colno = p->paramid & 0xFFFF; + Assert(subqueryid > 0 && + subqueryid <= list_length(new_multiexpr_params)); + Assert(colno > 0 && + colno <= list_length((List *) list_nth(new_multiexpr_params, + subqueryid - 1))); + subqueryid += offset; + p->paramid = (subqueryid << 16) + colno; + } + + /* Finally, attach new replacement lists to the global list */ + root->multiexpr_params = list_concat(root->multiexpr_params, + new_multiexpr_params); +} + /* * SS_process_ctes: process a query's WITH list diff --git a/src/include/optimizer/subselect.h b/src/include/optimizer/subselect.h index d6a872bd2c3..6f33578c8f4 100644 --- a/src/include/optimizer/subselect.h +++ b/src/include/optimizer/subselect.h @@ -16,6 +16,7 @@ #include "nodes/pathnodes.h" #include "nodes/plannodes.h" +extern void SS_make_multiexprs_unique(PlannerInfo *root, PlannerInfo *subroot); extern void SS_process_ctes(PlannerInfo *root); extern JoinExpr *convert_ANY_sublink_to_join(PlannerInfo *root, SubLink *sublink, diff --git a/src/test/regress/expected/inherit.out b/src/test/regress/expected/inherit.out index 2b68aef6548..6e134d53f64 100644 --- a/src/test/regress/expected/inherit.out +++ b/src/test/regress/expected/inherit.out @@ -1715,6 +1715,55 @@ reset enable_seqscan; reset enable_indexscan; reset enable_bitmapscan; -- +-- Check handling of MULTIEXPR SubPlans in inherited updates +-- +create table inhpar(f1 int, f2 name); +insert into inhpar select generate_series(1,10); +create table inhcld() inherits(inhpar); +insert into inhcld select generate_series(11,10000); +vacuum analyze inhcld; +vacuum analyze inhpar; +explain (verbose, costs off) +update inhpar set (f1, f2) = (select p2.unique2, p2.stringu1 + from int4_tbl limit 1) +from onek p2 where inhpar.f1 = p2.unique1; + QUERY PLAN +----------------------------------------------------------------------------- + Update on public.inhpar + Update on public.inhpar + Update on public.inhcld inhpar_1 + -> Merge Join + Output: $4, $5, (SubPlan 1 (returns $2,$3)), inhpar.ctid, p2.ctid + Merge Cond: (p2.unique1 = inhpar.f1) + -> Index Scan using onek_unique1 on public.onek p2 + Output: p2.unique2, p2.stringu1, p2.ctid, p2.unique1 + -> Sort + Output: inhpar.ctid, inhpar.f1 + Sort Key: inhpar.f1 + -> Seq Scan on public.inhpar + Output: inhpar.ctid, inhpar.f1 + SubPlan 1 (returns $2,$3) + -> Limit + Output: (p2.unique2), (p2.stringu1) + -> Seq Scan on public.int4_tbl + Output: p2.unique2, p2.stringu1 + -> Hash Join + Output: $6, $7, (SubPlan 1 (returns $2,$3)), inhpar_1.ctid, p2.ctid + Hash Cond: (inhpar_1.f1 = p2.unique1) + -> Seq Scan on public.inhcld inhpar_1 + Output: inhpar_1.ctid, inhpar_1.f1 + -> Hash + Output: p2.unique2, p2.stringu1, p2.ctid, p2.unique1 + -> Seq Scan on public.onek p2 + Output: p2.unique2, p2.stringu1, p2.ctid, p2.unique1 +(27 rows) + +update inhpar set (f1, f2) = (select p2.unique2, p2.stringu1 + from int4_tbl limit 1) +from onek p2 where inhpar.f1 = p2.unique1; +drop table inhpar cascade; +NOTICE: drop cascades to table inhcld +-- -- Check handling of a constant-null CHECK constraint -- create table cnullparent (f1 int); diff --git a/src/test/regress/sql/inherit.sql b/src/test/regress/sql/inherit.sql index 64173a8738c..fd4f252c291 100644 --- a/src/test/regress/sql/inherit.sql +++ b/src/test/regress/sql/inherit.sql @@ -629,6 +629,26 @@ reset enable_seqscan; reset enable_indexscan; reset enable_bitmapscan; +-- +-- Check handling of MULTIEXPR SubPlans in inherited updates +-- +create table inhpar(f1 int, f2 name); +insert into inhpar select generate_series(1,10); +create table inhcld() inherits(inhpar); +insert into inhcld select generate_series(11,10000); +vacuum analyze inhcld; +vacuum analyze inhpar; + +explain (verbose, costs off) +update inhpar set (f1, f2) = (select p2.unique2, p2.stringu1 + from int4_tbl limit 1) +from onek p2 where inhpar.f1 = p2.unique1; +update inhpar set (f1, f2) = (select p2.unique2, p2.stringu1 + from int4_tbl limit 1) +from onek p2 where inhpar.f1 = p2.unique1; + +drop table inhpar cascade; + -- -- Check handling of a constant-null CHECK constraint -- -- 2.30.2