From 88f55bc97622bce000a8c90f8ef58dacc926de19 Mon Sep 17 00:00:00 2001 From: Amit Langote Date: Fri, 4 Apr 2025 10:41:17 +0900 Subject: [PATCH] Make derived clause lookup in EquivalenceClass more efficient Derived clauses are stored in ec_derives, a List of RestrictInfos. These clauses are later looked up by matching the left and right EquivalenceMembers along with the clause's parent EC. This linear search becomes expensive in queries with many joins or partitions, where ec_derives may contain thousands of entries. In particular, create_join_clause() can spend significant time scanning this list. To improve performance, introduce a hash table (ec_derives_hash) that is built when the list reaches 32 entries -- the same threshold used for join_rel_hash. The original list is retained alongside the hash table to support EC merging and serialization (_outEquivalenceClass()). Each clause is stored in the hash table using a canonicalized key: the EquivalenceMember with the lower memory address is placed in the key before the one with the higher memory address. This avoids storing or searching for both permutations of the same clause. For clauses involving a constant EM, the key places NULL in the first slot and the non-constant EM in the second. The hash table is initialized using list_length(ec_derives_list) as the size hint. simplehash internally adjusts this to the next power of two after dividing by the fillfactor, so this typically results in at least 64 buckets near the threshold -- avoiding immediate resizing while adapting to the actual number of entries. The lookup logic for derived clauses is now centralized in ec_search_derived_clause_for_ems(), which consults the hash table when available and falls back to the list otherwise. The new ec_clear_derived_clauses() always frees ec_derives_list, even though some of the original code paths that cleared the old ec_derives field did not. This ensures consistent cleanup and avoids leaking memory when large lists are discarded. An assertion originally placed in find_derived_clause_for_ec_member() is moved into ec_search_derived_clause_for_ems() so that it is enforced consistently, regardless of whether the hash table or list is used for lookup. This design incorporates suggestions by David Rowley, who proposed both the key canonicalization and the initial sizing approach to balance memory usage and CPU efficiency. Author: Ashutosh Bapat Reviewed-by: Amit Langote Reviewed-by: David Rowley Tested-by: Dmitry Dolgov <9erthalion6@gmail.com> Tested-by: Alvaro Herrera Tested-by: Amit Langote Tested-by: David Rowley Discussion: https://postgr.es/m/CAExHW5vZiQtWU6moszLP5iZ8gLX_ZAUbgEX0DxGLx9PGWCtqUg@mail.gmail.com --- src/backend/nodes/outfuncs.c | 3 +- src/backend/optimizer/path/costsize.c | 3 +- src/backend/optimizer/path/equivclass.c | 435 ++++++++++++++++++---- src/backend/optimizer/plan/analyzejoins.c | 5 +- src/include/nodes/pathnodes.h | 17 +- src/include/optimizer/paths.h | 4 +- src/tools/pgindent/typedefs.list | 2 + 7 files changed, 389 insertions(+), 80 deletions(-) diff --git a/src/backend/nodes/outfuncs.c b/src/backend/nodes/outfuncs.c index bb9bdd67192..557f06e344f 100644 --- a/src/backend/nodes/outfuncs.c +++ b/src/backend/nodes/outfuncs.c @@ -467,7 +467,8 @@ _outEquivalenceClass(StringInfo str, const EquivalenceClass *node) WRITE_OID_FIELD(ec_collation); WRITE_NODE_FIELD(ec_members); WRITE_NODE_FIELD(ec_sources); - WRITE_NODE_FIELD(ec_derives); + /* Only ec_derives_list is written; hash is not serialized. */ + WRITE_NODE_FIELD(ec_derives_list); WRITE_BITMAPSET_FIELD(ec_relids); WRITE_BOOL_FIELD(ec_has_const); WRITE_BOOL_FIELD(ec_has_volatile); diff --git a/src/backend/optimizer/path/costsize.c b/src/backend/optimizer/path/costsize.c index f6f77b8fe19..c474c7a06af 100644 --- a/src/backend/optimizer/path/costsize.c +++ b/src/backend/optimizer/path/costsize.c @@ -5848,7 +5848,8 @@ get_foreign_key_join_selectivity(PlannerInfo *root, if (ec && ec->ec_has_const) { EquivalenceMember *em = fkinfo->fk_eclass_member[i]; - RestrictInfo *rinfo = find_derived_clause_for_ec_member(ec, + RestrictInfo *rinfo = find_derived_clause_for_ec_member(root, + ec, em); if (rinfo) diff --git a/src/backend/optimizer/path/equivclass.c b/src/backend/optimizer/path/equivclass.c index 493a95d26cc..9cd54c573a8 100644 --- a/src/backend/optimizer/path/equivclass.c +++ b/src/backend/optimizer/path/equivclass.c @@ -20,6 +20,7 @@ #include "access/stratnum.h" #include "catalog/pg_type.h" +#include "common/hashfn.h" #include "nodes/makefuncs.h" #include "nodes/nodeFuncs.h" #include "optimizer/appendinfo.h" @@ -72,7 +73,56 @@ static Bitmapset *get_eclass_indexes_for_relids(PlannerInfo *root, Relids relids); static Bitmapset *get_common_eclass_indexes(PlannerInfo *root, Relids relids1, Relids relids2); +static void ec_build_derives_hash(PlannerInfo *root, EquivalenceClass *ec); +static void ec_add_derived_clauses(EquivalenceClass *ec, List *clauses); +static void ec_add_derived_clause(EquivalenceClass *ec, RestrictInfo *clause); +static void ec_add_clause_to_derives_hash(EquivalenceClass *ec, RestrictInfo *rinfo); +static RestrictInfo *ec_search_clause_for_ems(PlannerInfo *root, EquivalenceClass *ec, + EquivalenceMember *leftem, + EquivalenceMember *rightem, + EquivalenceClass *parent_ec); +static RestrictInfo *ec_search_derived_clause_for_ems(PlannerInfo *root, + EquivalenceClass *ec, + EquivalenceMember *leftem, + EquivalenceMember *rightem, + EquivalenceClass *parent_ec); +/* + * Hash key identifying a derived clause. + * + * This structure should not be filled manually. Use fill_ec_derives_key() to + * set it up in canonical form. + */ +typedef struct +{ + EquivalenceMember *em1; + EquivalenceMember *em2; + EquivalenceClass *parent_ec; +} ECDerivesKey; + +/* Hash table entry in ec_derives_hash. */ +typedef struct +{ + uint32 status; + ECDerivesKey key; + RestrictInfo *rinfo; +} ECDerivesEntry; + +/* Threshold for switching from list to hash table */ +#define EC_DERIVES_HASH_THRESHOLD 32 + +#define SH_PREFIX derives +#define SH_ELEMENT_TYPE ECDerivesEntry +#define SH_KEY_TYPE ECDerivesKey +#define SH_KEY key +#define SH_HASH_KEY(tb, key) \ + hash_bytes((const unsigned char *) &(key), sizeof(ECDerivesKey)) +#define SH_EQUAL(tb, a, b) \ + ((a).em1 == (b).em1 && (a).em2 == (b).em2 && (a).parent_ec == (b).parent_ec) +#define SH_SCOPE static inline +#define SH_DECLARE +#define SH_DEFINE +#include "lib/simplehash.h" /* * process_equivalence @@ -342,7 +392,12 @@ process_equivalence(PlannerInfo *root, */ ec1->ec_members = list_concat(ec1->ec_members, ec2->ec_members); ec1->ec_sources = list_concat(ec1->ec_sources, ec2->ec_sources); - ec1->ec_derives = list_concat(ec1->ec_derives, ec2->ec_derives); + + /* + * Appends ec2's derived clauses to ec1->ec_derives_list and adds them + * to ec1->ec_derives_hash if present. + */ + ec_add_derived_clauses(ec1, ec2->ec_derives_list); ec1->ec_relids = bms_join(ec1->ec_relids, ec2->ec_relids); ec1->ec_has_const |= ec2->ec_has_const; /* can't need to set has_volatile */ @@ -355,7 +410,7 @@ process_equivalence(PlannerInfo *root, /* just to avoid debugging confusion w/ dangling pointers: */ ec2->ec_members = NIL; ec2->ec_sources = NIL; - ec2->ec_derives = NIL; + ec_clear_derived_clauses(ec2); ec2->ec_relids = NULL; ec1->ec_sources = lappend(ec1->ec_sources, restrictinfo); ec1->ec_min_security = Min(ec1->ec_min_security, @@ -412,7 +467,8 @@ process_equivalence(PlannerInfo *root, ec->ec_collation = collation; ec->ec_members = NIL; ec->ec_sources = list_make1(restrictinfo); - ec->ec_derives = NIL; + ec->ec_derives_list = NIL; + ec->ec_derives_hash = NULL; ec->ec_relids = NULL; ec->ec_has_const = false; ec->ec_has_volatile = false; @@ -671,7 +727,8 @@ get_eclass_for_sort_expr(PlannerInfo *root, newec->ec_collation = collation; newec->ec_members = NIL; newec->ec_sources = NIL; - newec->ec_derives = NIL; + newec->ec_derives_list = NIL; + newec->ec_derives_hash = NULL; newec->ec_relids = NULL; newec->ec_has_const = false; newec->ec_has_volatile = contain_volatile_functions((Node *) expr); @@ -1026,8 +1083,8 @@ relation_can_be_sorted_early(PlannerInfo *root, RelOptInfo *rel, * scanning of the quals and before Path construction begins. * * We make no attempt to avoid generating duplicate RestrictInfos here: we - * don't search ec_sources or ec_derives for matches. It doesn't really - * seem worth the trouble to do so. + * don't search existing source or derived clauses in the EC for matches. It + * doesn't really seem worth the trouble to do so. */ void generate_base_implied_equalities(PlannerInfo *root) @@ -1188,11 +1245,11 @@ generate_base_implied_equalities_const(PlannerInfo *root, /* * If the clause didn't degenerate to a constant, fill in the correct - * markings for a mergejoinable clause, and save it in ec_derives. (We - * will not re-use such clauses directly, but selectivity estimation - * may consult the list later. Note that this use of ec_derives does - * not overlap with its use for join clauses, since we never generate - * join clauses from an ec_has_const eclass.) + * markings for a mergejoinable clause, and save it as a derived + * clause. (We will not re-use such clauses directly, but selectivity + * estimation may consult those later. Note that this use of derived + * clauses does not overlap with its use for join clauses, since we + * never generate join clauses from an ec_has_const eclass.) */ if (rinfo && rinfo->mergeopfamilies) { @@ -1200,7 +1257,7 @@ generate_base_implied_equalities_const(PlannerInfo *root, rinfo->left_ec = rinfo->right_ec = ec; rinfo->left_em = cur_em; rinfo->right_em = const_em; - ec->ec_derives = lappend(ec->ec_derives, rinfo); + ec_add_derived_clause(ec, rinfo); } } } @@ -1265,10 +1322,10 @@ generate_base_implied_equalities_no_const(PlannerInfo *root, /* * If the clause didn't degenerate to a constant, fill in the - * correct markings for a mergejoinable clause. We don't put it - * in ec_derives however; we don't currently need to re-find such - * clauses, and we don't want to clutter that list with non-join - * clauses. + * correct markings for a mergejoinable clause. We don't record + * it as a derived clause, since we don't currently need to + * re-find such clauses, and don't want to clutter the + * derived-clause set with non-join clauses. */ if (rinfo && rinfo->mergeopfamilies) { @@ -1369,7 +1426,7 @@ generate_base_implied_equalities_broken(PlannerInfo *root, * we consider different join paths, we avoid generating multiple copies: * whenever we select a particular pair of EquivalenceMembers to join, * we check to see if the pair matches any original clause (in ec_sources) - * or previously-built clause (in ec_derives). This saves memory and allows + * or previously-built derived clause. This saves memory and allows * re-use of information cached in RestrictInfos. We also avoid generating * commutative duplicates, i.e. if the algorithm selects "a.x = b.y" but * we already have "b.y = a.x", we return the existing clause. @@ -1754,9 +1811,9 @@ generate_join_implied_equalities_broken(PlannerInfo *root, /* * If we have to translate, just brute-force apply adjust_appendrel_attrs * to all the RestrictInfos at once. This will result in returning - * RestrictInfos that are not listed in ec_derives, but there shouldn't be - * any duplication, and it's a sufficiently narrow corner case that we - * shouldn't sweat too much over it anyway. + * RestrictInfos that are not included in EC's derived clauses, but there + * shouldn't be any duplication, and it's a sufficiently narrow corner + * case that we shouldn't sweat too much over it anyway. * * Since inner_rel might be an indirect descendant of the baserel * mentioned in the ec_sources clauses, we have to be prepared to apply @@ -1823,43 +1880,11 @@ create_join_clause(PlannerInfo *root, { RestrictInfo *rinfo; RestrictInfo *parent_rinfo = NULL; - ListCell *lc; MemoryContext oldcontext; - /* - * Search to see if we already built a RestrictInfo for this pair of - * EquivalenceMembers. We can use either original source clauses or - * previously-derived clauses, and a commutator clause is acceptable. - * - * We used to verify that opno matches, but that seems redundant: even if - * it's not identical, it'd better have the same effects, or the operator - * families we're using are broken. - */ - foreach(lc, ec->ec_sources) - { - rinfo = (RestrictInfo *) lfirst(lc); - if (rinfo->left_em == leftem && - rinfo->right_em == rightem && - rinfo->parent_ec == parent_ec) - return rinfo; - if (rinfo->left_em == rightem && - rinfo->right_em == leftem && - rinfo->parent_ec == parent_ec) - return rinfo; - } - - foreach(lc, ec->ec_derives) - { - rinfo = (RestrictInfo *) lfirst(lc); - if (rinfo->left_em == leftem && - rinfo->right_em == rightem && - rinfo->parent_ec == parent_ec) - return rinfo; - if (rinfo->left_em == rightem && - rinfo->right_em == leftem && - rinfo->parent_ec == parent_ec) - return rinfo; - } + rinfo = ec_search_clause_for_ems(root, ec, leftem, rightem, parent_ec); + if (rinfo) + return rinfo; /* * Not there, so build it, in planner context so we can re-use it. (Not @@ -1923,7 +1948,7 @@ create_join_clause(PlannerInfo *root, rinfo->left_em = leftem; rinfo->right_em = rightem; /* and save it for possible re-use */ - ec->ec_derives = lappend(ec->ec_derives, rinfo); + ec_add_derived_clause(ec, rinfo); MemoryContextSwitchTo(oldcontext); @@ -2648,28 +2673,14 @@ match_eclasses_to_foreign_key_col(PlannerInfo *root, * Returns NULL if no such clause can be found. */ RestrictInfo * -find_derived_clause_for_ec_member(EquivalenceClass *ec, +find_derived_clause_for_ec_member(PlannerInfo *root, + EquivalenceClass *ec, EquivalenceMember *em) { - ListCell *lc; - Assert(ec->ec_has_const); Assert(!em->em_is_const); - foreach(lc, ec->ec_derives) - { - RestrictInfo *rinfo = (RestrictInfo *) lfirst(lc); - /* - * generate_base_implied_equalities_const will have put non-const - * members on the left side of derived clauses. - */ - if (rinfo->left_em == em) - { - Assert(rinfo->right_em->em_is_const); - return rinfo; - } - } - return NULL; + return ec_search_derived_clause_for_ems(root, ec, em, NULL, NULL); } @@ -3442,3 +3453,281 @@ get_common_eclass_indexes(PlannerInfo *root, Relids relids1, Relids relids2) /* Calculate and return the common EC indexes, recycling the left input. */ return bms_int_members(rel1ecs, rel2ecs); } + +/* + * ec_build_derives_hash + * Construct the auxiliary hash table for derived clause lookups. + */ +static void +ec_build_derives_hash(PlannerInfo *root, EquivalenceClass *ec) +{ + Assert(!ec->ec_derives_hash); + + /* + * Create the hash table. + * + * We pass list_length(ec->ec_derives_list) as the initial size. + * Simplehash will divide this by the fillfactor (typically 0.9) and round + * up to the next power of two, so this will usually give us at least 64 + * buckets around the threshold. That avoids immediate resizing without + * hardcoding a specific size. + */ + ec->ec_derives_hash = derives_create(root->planner_cxt, + list_length(ec->ec_derives_list), + NULL); + + foreach_node(RestrictInfo, rinfo, ec->ec_derives_list) + ec_add_clause_to_derives_hash(ec, rinfo); +} + +/* + * ec_add_derived_clause + * Add a clause to the set of derived clauses for the given + * EquivalenceClass. Always appends to ec_derives_list; also adds + * to ec_derives_hash if it exists. + * + * Also asserts expected invariants of derived clauses. + */ +static void +ec_add_derived_clause(EquivalenceClass *ec, RestrictInfo *clause) +{ + /* + * Constant, if present, is always placed on the RHS; see + * generate_base_implied_equalities_const(). LHS is never a constant. + */ + Assert(!clause->left_em->em_is_const); + + /* + * Clauses containing a constant are never considered redundant, so + * parent_ec is not set. + */ + Assert(!clause->parent_ec || !clause->right_em->em_is_const); + + ec->ec_derives_list = lappend(ec->ec_derives_list, clause); + if (ec->ec_derives_hash) + ec_add_clause_to_derives_hash(ec, clause); +} + +/* + * ec_add_derived_clauses + * Add a list of clauses to the set of clauses derived from the given + * EquivalenceClass; adding to the list and hash table if needed. + * + * This function is similar to ec_add_derived_clause() but optimized for adding + * multiple clauses at a time to the ec_derives_list. The assertions from + * ec_add_derived_clause() are not repeated here, as the input clauses are + * assumed to have already been validated. + */ +static void +ec_add_derived_clauses(EquivalenceClass *ec, List *clauses) +{ + ec->ec_derives_list = list_concat(ec->ec_derives_list, clauses); + if (ec->ec_derives_hash) + foreach_node(RestrictInfo, rinfo, clauses) + ec_add_clause_to_derives_hash(ec, rinfo); +} + +/* + * fill_ec_derives_key + * Compute a canonical key for ec_derives_hash lookup or insertion. + * + * Derived clauses are looked up using a pair of EquivalenceMembers and a + * parent EquivalenceClass. To avoid storing or searching for both EM orderings, + * we canonicalize the key: + * + * - For clauses involving two non-constant EMs, em1 is set to the EM with lower + * memory address and em2 is set to the other one. + * - For clauses involving a constant EM, the caller must pass the non-constant + * EM as leftem and NULL as rightem; we then set em1 = NULL and em2 = leftem. + */ +static inline void +fill_ec_derives_key(ECDerivesKey *key, + EquivalenceMember *leftem, + EquivalenceMember *rightem, + EquivalenceClass *parent_ec) +{ + Assert(leftem); /* Always required for lookup or insertion */ + + if (rightem == NULL) + { + key->em1 = NULL; + key->em2 = leftem; + } + else if (leftem < rightem) + { + key->em1 = leftem; + key->em2 = rightem; + } + else + { + key->em1 = rightem; + key->em2 = leftem; + } + key->parent_ec = parent_ec; +} + +/* + * ec_add_clause_to_derives_hash + * Add a derived clause to ec_derives_hash in the given EquivalenceClass. + * + * Each clause is associated with a canonicalized key. For constant-containing + * clauses, only the non-constant EM is used for lookup; see comments in + * fill_ec_derives_key(). + */ +static void +ec_add_clause_to_derives_hash(EquivalenceClass *ec, RestrictInfo *rinfo) +{ + ECDerivesKey key; + ECDerivesEntry *entry; + bool found; + + /* + * Constants are always placed on the RHS; see + * generate_base_implied_equalities_const(). + */ + Assert(!rinfo->left_em->em_is_const); + + /* + * Clauses containing a constant are never considered redundant, so + * parent_ec is not set. + */ + Assert(!rinfo->parent_ec || !rinfo->right_em->em_is_const); + + /* + * See fill_ec_derives_key() for details: we use a canonicalized key to + * avoid storing both EM orderings. For constant EMs, only the + * non-constant EM is included in the key. + */ + fill_ec_derives_key(&key, + rinfo->left_em, + rinfo->right_em->em_is_const ? NULL : rinfo->right_em, + rinfo->parent_ec); + entry = derives_insert(ec->ec_derives_hash, key, &found); + Assert(!found); + entry->rinfo = rinfo; +} + +/* + * ec_clear_derived_clauses + * Reset ec_derives_list and ec_derives_hash. + * + * We destroy the hash table explicitly, since it may consume significant + * space. The list holds the same set of entries and can become equally large + * when thousands of partitions are involved, so we free it as well -- even + * though we do not typically free lists. + */ +void +ec_clear_derived_clauses(EquivalenceClass *ec) +{ + list_free(ec->ec_derives_list); + ec->ec_derives_list = NIL; + + if (ec->ec_derives_hash) + { + derives_destroy(ec->ec_derives_hash); + ec->ec_derives_hash = NULL; + } +} + +/* + * ec_search_clause_for_ems + * Search for an existing RestrictInfo that equates the given pair + * of EquivalenceMembers, either from ec_sources or ec_derives. + * + * Returns a clause with matching operands in either given order or commuted + * order. We used to require matching operator OIDs, but dropped that since any + * semantically different operator here would indicate a broken operator family. + * + * Returns NULL if no matching clause is found. + */ +static RestrictInfo * +ec_search_clause_for_ems(PlannerInfo *root, EquivalenceClass *ec, + EquivalenceMember *leftem, EquivalenceMember *rightem, + EquivalenceClass *parent_ec) +{ + /* Check original source clauses */ + foreach_node(RestrictInfo, rinfo, ec->ec_sources) + { + if (rinfo->left_em == leftem && + rinfo->right_em == rightem && + rinfo->parent_ec == parent_ec) + return rinfo; + if (rinfo->left_em == rightem && + rinfo->right_em == leftem && + rinfo->parent_ec == parent_ec) + return rinfo; + } + + /* Not found in ec_sources; search derived clauses */ + return ec_search_derived_clause_for_ems(root, ec, leftem, rightem, + parent_ec); +} + +/* + * ec_search_derived_clause_for_ems + * Search for an existing derived clause between two EquivalenceMembers. + * + * If the number of derived clauses exceeds a threshold, switch to hash table + * lookup; otherwise, scan ec_derives_list linearly. + * + * Clauses involving constants are looked up by passing the non-constant EM + * as leftem and setting rightem to NULL. In that case, we expect to find a + * clause with a constant on the RHS. + * + * While searching the list, we compare each given EM with both sides of each + * clause. But for hash table lookups, we construct a canonicalized key and + * perform a single lookup. + */ +static RestrictInfo * +ec_search_derived_clause_for_ems(PlannerInfo *root, EquivalenceClass *ec, + EquivalenceMember *leftem, + EquivalenceMember *rightem, + EquivalenceClass *parent_ec) +{ + /* Switch to using hash lookup when list grows "too long". */ + if (!ec->ec_derives_hash && + list_length(ec->ec_derives_list) >= EC_DERIVES_HASH_THRESHOLD) + ec_build_derives_hash(root, ec); + + /* Perform hash table lookup if available */ + if (ec->ec_derives_hash) + { + ECDerivesKey key; + RestrictInfo *rinfo; + ECDerivesEntry *entry; + + fill_ec_derives_key(&key, leftem, rightem, parent_ec); + entry = derives_lookup(ec->ec_derives_hash, key); + if (entry) + { + rinfo = entry->rinfo; + Assert(rinfo); + Assert(rightem || rinfo->right_em->em_is_const); + return rinfo; + } + } + else + { + /* Fallback to linear search over ec_derives_list */ + foreach_node(RestrictInfo, rinfo, ec->ec_derives_list) + { + /* Handle special case: lookup by non-const EM alone */ + if (!rightem && + rinfo->left_em == leftem) + { + Assert(rinfo->right_em->em_is_const); + return rinfo; + } + if (rinfo->left_em == leftem && + rinfo->right_em == rightem && + rinfo->parent_ec == parent_ec) + return rinfo; + if (rinfo->left_em == rightem && + rinfo->right_em == leftem && + rinfo->parent_ec == parent_ec) + return rinfo; + } + } + + return NULL; +} diff --git a/src/backend/optimizer/plan/analyzejoins.c b/src/backend/optimizer/plan/analyzejoins.c index 8a8d4a2af33..ae20691ca91 100644 --- a/src/backend/optimizer/plan/analyzejoins.c +++ b/src/backend/optimizer/plan/analyzejoins.c @@ -749,7 +749,7 @@ remove_rel_from_eclass(EquivalenceClass *ec, SpecialJoinInfo *sjinfo, * drop them. (At this point, any such clauses would be base restriction * clauses, which we'd not need anymore anyway.) */ - ec->ec_derives = NIL; + ec_clear_derived_clauses(ec); } /* @@ -1544,8 +1544,7 @@ update_eclasses(EquivalenceClass *ec, int from, int to) list_free(ec->ec_members); ec->ec_members = new_members; - list_free(ec->ec_derives); - ec->ec_derives = NULL; + ec_clear_derived_clauses(ec); /* Update EC source expressions */ foreach_node(RestrictInfo, rinfo, ec->ec_sources) diff --git a/src/include/nodes/pathnodes.h b/src/include/nodes/pathnodes.h index ac3af528bc6..6e034960896 100644 --- a/src/include/nodes/pathnodes.h +++ b/src/include/nodes/pathnodes.h @@ -1402,6 +1402,18 @@ typedef struct JoinDomain * entry: consider SELECT random() AS a, random() AS b ... ORDER BY b,a. * So we record the SortGroupRef of the originating sort clause. * + * Derived equality clauses are stored in ec_derives_list. For small queries, + * this list is scanned directly during lookup. For larger queries -- e.g., + * with many partitions or joins -- a hash table (ec_derives_hash) is built + * when the list grows beyond a threshold, for faster lookup. When present, + * the hash table contains the same RestrictInfos and is maintained alongside + * the list. We retain the list even when the hash is used to simplify + * serialization (e.g., in _outEquivalenceClass()) and support + * EquivalenceClass merging. + * + * In contrast, ec_sources holds equality clauses that appear directly in the + * query. These are typically few and do not require a hash table for lookup. + * * NB: if ec_merged isn't NULL, this class has been merged into another, and * should be ignored in favor of using the pointed-to class. * @@ -1421,7 +1433,10 @@ typedef struct EquivalenceClass Oid ec_collation; /* collation, if datatypes are collatable */ List *ec_members; /* list of EquivalenceMembers */ List *ec_sources; /* list of generating RestrictInfos */ - List *ec_derives; /* list of derived RestrictInfos */ + List *ec_derives_list; /* list of derived RestrictInfos */ + struct derives_hash *ec_derives_hash; /* optional hash table for fast + * lookup; contains same + * RestrictInfos as list */ Relids ec_relids; /* all relids appearing in ec_members, except * for child members (see below) */ bool ec_has_const; /* any pseudoconstants in ec_members? */ diff --git a/src/include/optimizer/paths.h b/src/include/optimizer/paths.h index bc5dfd7db41..16dc4d5ee82 100644 --- a/src/include/optimizer/paths.h +++ b/src/include/optimizer/paths.h @@ -167,7 +167,8 @@ extern bool exprs_known_equal(PlannerInfo *root, Node *item1, Node *item2, extern EquivalenceClass *match_eclasses_to_foreign_key_col(PlannerInfo *root, ForeignKeyOptInfo *fkinfo, int colno); -extern RestrictInfo *find_derived_clause_for_ec_member(EquivalenceClass *ec, +extern RestrictInfo *find_derived_clause_for_ec_member(PlannerInfo *root, + EquivalenceClass *ec, EquivalenceMember *em); extern void add_child_rel_equivalences(PlannerInfo *root, AppendRelInfo *appinfo, @@ -197,6 +198,7 @@ extern bool eclass_useful_for_merging(PlannerInfo *root, extern bool is_redundant_derived_clause(RestrictInfo *rinfo, List *clauselist); extern bool is_redundant_with_indexclauses(RestrictInfo *rinfo, List *indexclauses); +extern void ec_clear_derived_clauses(EquivalenceClass *ec); /* * pathkeys.c diff --git a/src/tools/pgindent/typedefs.list b/src/tools/pgindent/typedefs.list index 8f28d8ff28e..c3f05796a7c 100644 --- a/src/tools/pgindent/typedefs.list +++ b/src/tools/pgindent/typedefs.list @@ -673,6 +673,8 @@ DumpableObjectWithAcl DynamicFileList DynamicZoneAbbrev EC_KEY +ECDerivesKey +ECDerivesEntry EDGE ENGINE EOM_flatten_into_method -- 2.30.2