#include "access/stratnum.h"
#include "catalog/pg_type.h"
+#include "common/hashfn.h"
#include "nodes/makefuncs.h"
#include "nodes/nodeFuncs.h"
#include "optimizer/appendinfo.h"
Relids relids);
static Bitmapset *get_common_eclass_indexes(PlannerInfo *root, Relids relids1,
Relids relids2);
+static void ec_build_derives_hash(PlannerInfo *root, EquivalenceClass *ec);
+static void ec_add_derived_clauses(EquivalenceClass *ec, List *clauses);
+static void ec_add_derived_clause(EquivalenceClass *ec, RestrictInfo *clause);
+static void ec_add_clause_to_derives_hash(EquivalenceClass *ec, RestrictInfo *rinfo);
+static RestrictInfo *ec_search_clause_for_ems(PlannerInfo *root, EquivalenceClass *ec,
+ EquivalenceMember *leftem,
+ EquivalenceMember *rightem,
+ EquivalenceClass *parent_ec);
+static RestrictInfo *ec_search_derived_clause_for_ems(PlannerInfo *root,
+ EquivalenceClass *ec,
+ EquivalenceMember *leftem,
+ EquivalenceMember *rightem,
+ EquivalenceClass *parent_ec);
+/*
+ * Hash key identifying a derived clause.
+ *
+ * This structure should not be filled manually. Use fill_ec_derives_key() to
+ * set it up in canonical form.
+ */
+typedef struct
+{
+ EquivalenceMember *em1;
+ EquivalenceMember *em2;
+ EquivalenceClass *parent_ec;
+} ECDerivesKey;
+
+/* Hash table entry in ec_derives_hash. */
+typedef struct
+{
+ uint32 status;
+ ECDerivesKey key;
+ RestrictInfo *rinfo;
+} ECDerivesEntry;
+
+/* Threshold for switching from list to hash table */
+#define EC_DERIVES_HASH_THRESHOLD 32
+
+#define SH_PREFIX derives
+#define SH_ELEMENT_TYPE ECDerivesEntry
+#define SH_KEY_TYPE ECDerivesKey
+#define SH_KEY key
+#define SH_HASH_KEY(tb, key) \
+ hash_bytes((const unsigned char *) &(key), sizeof(ECDerivesKey))
+#define SH_EQUAL(tb, a, b) \
+ ((a).em1 == (b).em1 && (a).em2 == (b).em2 && (a).parent_ec == (b).parent_ec)
+#define SH_SCOPE static inline
+#define SH_DECLARE
+#define SH_DEFINE
+#include "lib/simplehash.h"
/*
* process_equivalence
*/
ec1->ec_members = list_concat(ec1->ec_members, ec2->ec_members);
ec1->ec_sources = list_concat(ec1->ec_sources, ec2->ec_sources);
- ec1->ec_derives = list_concat(ec1->ec_derives, ec2->ec_derives);
+
+ /*
+ * Appends ec2's derived clauses to ec1->ec_derives_list and adds them
+ * to ec1->ec_derives_hash if present.
+ */
+ ec_add_derived_clauses(ec1, ec2->ec_derives_list);
ec1->ec_relids = bms_join(ec1->ec_relids, ec2->ec_relids);
ec1->ec_has_const |= ec2->ec_has_const;
/* can't need to set has_volatile */
/* just to avoid debugging confusion w/ dangling pointers: */
ec2->ec_members = NIL;
ec2->ec_sources = NIL;
- ec2->ec_derives = NIL;
+ ec_clear_derived_clauses(ec2);
ec2->ec_relids = NULL;
ec1->ec_sources = lappend(ec1->ec_sources, restrictinfo);
ec1->ec_min_security = Min(ec1->ec_min_security,
ec->ec_collation = collation;
ec->ec_members = NIL;
ec->ec_sources = list_make1(restrictinfo);
- ec->ec_derives = NIL;
+ ec->ec_derives_list = NIL;
+ ec->ec_derives_hash = NULL;
ec->ec_relids = NULL;
ec->ec_has_const = false;
ec->ec_has_volatile = false;
newec->ec_collation = collation;
newec->ec_members = NIL;
newec->ec_sources = NIL;
- newec->ec_derives = NIL;
+ newec->ec_derives_list = NIL;
+ newec->ec_derives_hash = NULL;
newec->ec_relids = NULL;
newec->ec_has_const = false;
newec->ec_has_volatile = contain_volatile_functions((Node *) expr);
* scanning of the quals and before Path construction begins.
*
* We make no attempt to avoid generating duplicate RestrictInfos here: we
- * don't search ec_sources or ec_derives for matches. It doesn't really
- * seem worth the trouble to do so.
+ * don't search existing source or derived clauses in the EC for matches. It
+ * doesn't really seem worth the trouble to do so.
*/
void
generate_base_implied_equalities(PlannerInfo *root)
/*
* If the clause didn't degenerate to a constant, fill in the correct
- * markings for a mergejoinable clause, and save it in ec_derives. (We
- * will not re-use such clauses directly, but selectivity estimation
- * may consult the list later. Note that this use of ec_derives does
- * not overlap with its use for join clauses, since we never generate
- * join clauses from an ec_has_const eclass.)
+ * markings for a mergejoinable clause, and save it as a derived
+ * clause. (We will not re-use such clauses directly, but selectivity
+ * estimation may consult those later. Note that this use of derived
+ * clauses does not overlap with its use for join clauses, since we
+ * never generate join clauses from an ec_has_const eclass.)
*/
if (rinfo && rinfo->mergeopfamilies)
{
rinfo->left_ec = rinfo->right_ec = ec;
rinfo->left_em = cur_em;
rinfo->right_em = const_em;
- ec->ec_derives = lappend(ec->ec_derives, rinfo);
+ ec_add_derived_clause(ec, rinfo);
}
}
}
/*
* If the clause didn't degenerate to a constant, fill in the
- * correct markings for a mergejoinable clause. We don't put it
- * in ec_derives however; we don't currently need to re-find such
- * clauses, and we don't want to clutter that list with non-join
- * clauses.
+ * correct markings for a mergejoinable clause. We don't record
+ * it as a derived clause, since we don't currently need to
+ * re-find such clauses, and don't want to clutter the
+ * derived-clause set with non-join clauses.
*/
if (rinfo && rinfo->mergeopfamilies)
{
* we consider different join paths, we avoid generating multiple copies:
* whenever we select a particular pair of EquivalenceMembers to join,
* we check to see if the pair matches any original clause (in ec_sources)
- * or previously-built clause (in ec_derives). This saves memory and allows
+ * or previously-built derived clause. This saves memory and allows
* re-use of information cached in RestrictInfos. We also avoid generating
* commutative duplicates, i.e. if the algorithm selects "a.x = b.y" but
* we already have "b.y = a.x", we return the existing clause.
/*
* If we have to translate, just brute-force apply adjust_appendrel_attrs
* to all the RestrictInfos at once. This will result in returning
- * RestrictInfos that are not listed in ec_derives, but there shouldn't be
- * any duplication, and it's a sufficiently narrow corner case that we
- * shouldn't sweat too much over it anyway.
+ * RestrictInfos that are not included in EC's derived clauses, but there
+ * shouldn't be any duplication, and it's a sufficiently narrow corner
+ * case that we shouldn't sweat too much over it anyway.
*
* Since inner_rel might be an indirect descendant of the baserel
* mentioned in the ec_sources clauses, we have to be prepared to apply
{
RestrictInfo *rinfo;
RestrictInfo *parent_rinfo = NULL;
- ListCell *lc;
MemoryContext oldcontext;
- /*
- * Search to see if we already built a RestrictInfo for this pair of
- * EquivalenceMembers. We can use either original source clauses or
- * previously-derived clauses, and a commutator clause is acceptable.
- *
- * We used to verify that opno matches, but that seems redundant: even if
- * it's not identical, it'd better have the same effects, or the operator
- * families we're using are broken.
- */
- foreach(lc, ec->ec_sources)
- {
- rinfo = (RestrictInfo *) lfirst(lc);
- if (rinfo->left_em == leftem &&
- rinfo->right_em == rightem &&
- rinfo->parent_ec == parent_ec)
- return rinfo;
- if (rinfo->left_em == rightem &&
- rinfo->right_em == leftem &&
- rinfo->parent_ec == parent_ec)
- return rinfo;
- }
-
- foreach(lc, ec->ec_derives)
- {
- rinfo = (RestrictInfo *) lfirst(lc);
- if (rinfo->left_em == leftem &&
- rinfo->right_em == rightem &&
- rinfo->parent_ec == parent_ec)
- return rinfo;
- if (rinfo->left_em == rightem &&
- rinfo->right_em == leftem &&
- rinfo->parent_ec == parent_ec)
- return rinfo;
- }
+ rinfo = ec_search_clause_for_ems(root, ec, leftem, rightem, parent_ec);
+ if (rinfo)
+ return rinfo;
/*
* Not there, so build it, in planner context so we can re-use it. (Not
rinfo->left_em = leftem;
rinfo->right_em = rightem;
/* and save it for possible re-use */
- ec->ec_derives = lappend(ec->ec_derives, rinfo);
+ ec_add_derived_clause(ec, rinfo);
MemoryContextSwitchTo(oldcontext);
* Returns NULL if no such clause can be found.
*/
RestrictInfo *
-find_derived_clause_for_ec_member(EquivalenceClass *ec,
+find_derived_clause_for_ec_member(PlannerInfo *root,
+ EquivalenceClass *ec,
EquivalenceMember *em)
{
- ListCell *lc;
-
Assert(ec->ec_has_const);
Assert(!em->em_is_const);
- foreach(lc, ec->ec_derives)
- {
- RestrictInfo *rinfo = (RestrictInfo *) lfirst(lc);
- /*
- * generate_base_implied_equalities_const will have put non-const
- * members on the left side of derived clauses.
- */
- if (rinfo->left_em == em)
- {
- Assert(rinfo->right_em->em_is_const);
- return rinfo;
- }
- }
- return NULL;
+ return ec_search_derived_clause_for_ems(root, ec, em, NULL, NULL);
}
/* Calculate and return the common EC indexes, recycling the left input. */
return bms_int_members(rel1ecs, rel2ecs);
}
+
+/*
+ * ec_build_derives_hash
+ * Construct the auxiliary hash table for derived clause lookups.
+ */
+static void
+ec_build_derives_hash(PlannerInfo *root, EquivalenceClass *ec)
+{
+ Assert(!ec->ec_derives_hash);
+
+ /*
+ * Create the hash table.
+ *
+ * We pass list_length(ec->ec_derives_list) as the initial size.
+ * Simplehash will divide this by the fillfactor (typically 0.9) and round
+ * up to the next power of two, so this will usually give us at least 64
+ * buckets around the threshold. That avoids immediate resizing without
+ * hardcoding a specific size.
+ */
+ ec->ec_derives_hash = derives_create(root->planner_cxt,
+ list_length(ec->ec_derives_list),
+ NULL);
+
+ foreach_node(RestrictInfo, rinfo, ec->ec_derives_list)
+ ec_add_clause_to_derives_hash(ec, rinfo);
+}
+
+/*
+ * ec_add_derived_clause
+ * Add a clause to the set of derived clauses for the given
+ * EquivalenceClass. Always appends to ec_derives_list; also adds
+ * to ec_derives_hash if it exists.
+ *
+ * Also asserts expected invariants of derived clauses.
+ */
+static void
+ec_add_derived_clause(EquivalenceClass *ec, RestrictInfo *clause)
+{
+ /*
+ * Constant, if present, is always placed on the RHS; see
+ * generate_base_implied_equalities_const(). LHS is never a constant.
+ */
+ Assert(!clause->left_em->em_is_const);
+
+ /*
+ * Clauses containing a constant are never considered redundant, so
+ * parent_ec is not set.
+ */
+ Assert(!clause->parent_ec || !clause->right_em->em_is_const);
+
+ ec->ec_derives_list = lappend(ec->ec_derives_list, clause);
+ if (ec->ec_derives_hash)
+ ec_add_clause_to_derives_hash(ec, clause);
+}
+
+/*
+ * ec_add_derived_clauses
+ * Add a list of clauses to the set of clauses derived from the given
+ * EquivalenceClass; adding to the list and hash table if needed.
+ *
+ * This function is similar to ec_add_derived_clause() but optimized for adding
+ * multiple clauses at a time to the ec_derives_list. The assertions from
+ * ec_add_derived_clause() are not repeated here, as the input clauses are
+ * assumed to have already been validated.
+ */
+static void
+ec_add_derived_clauses(EquivalenceClass *ec, List *clauses)
+{
+ ec->ec_derives_list = list_concat(ec->ec_derives_list, clauses);
+ if (ec->ec_derives_hash)
+ foreach_node(RestrictInfo, rinfo, clauses)
+ ec_add_clause_to_derives_hash(ec, rinfo);
+}
+
+/*
+ * fill_ec_derives_key
+ * Compute a canonical key for ec_derives_hash lookup or insertion.
+ *
+ * Derived clauses are looked up using a pair of EquivalenceMembers and a
+ * parent EquivalenceClass. To avoid storing or searching for both EM orderings,
+ * we canonicalize the key:
+ *
+ * - For clauses involving two non-constant EMs, em1 is set to the EM with lower
+ * memory address and em2 is set to the other one.
+ * - For clauses involving a constant EM, the caller must pass the non-constant
+ * EM as leftem and NULL as rightem; we then set em1 = NULL and em2 = leftem.
+ */
+static inline void
+fill_ec_derives_key(ECDerivesKey *key,
+ EquivalenceMember *leftem,
+ EquivalenceMember *rightem,
+ EquivalenceClass *parent_ec)
+{
+ Assert(leftem); /* Always required for lookup or insertion */
+
+ if (rightem == NULL)
+ {
+ key->em1 = NULL;
+ key->em2 = leftem;
+ }
+ else if (leftem < rightem)
+ {
+ key->em1 = leftem;
+ key->em2 = rightem;
+ }
+ else
+ {
+ key->em1 = rightem;
+ key->em2 = leftem;
+ }
+ key->parent_ec = parent_ec;
+}
+
+/*
+ * ec_add_clause_to_derives_hash
+ * Add a derived clause to ec_derives_hash in the given EquivalenceClass.
+ *
+ * Each clause is associated with a canonicalized key. For constant-containing
+ * clauses, only the non-constant EM is used for lookup; see comments in
+ * fill_ec_derives_key().
+ */
+static void
+ec_add_clause_to_derives_hash(EquivalenceClass *ec, RestrictInfo *rinfo)
+{
+ ECDerivesKey key;
+ ECDerivesEntry *entry;
+ bool found;
+
+ /*
+ * Constants are always placed on the RHS; see
+ * generate_base_implied_equalities_const().
+ */
+ Assert(!rinfo->left_em->em_is_const);
+
+ /*
+ * Clauses containing a constant are never considered redundant, so
+ * parent_ec is not set.
+ */
+ Assert(!rinfo->parent_ec || !rinfo->right_em->em_is_const);
+
+ /*
+ * See fill_ec_derives_key() for details: we use a canonicalized key to
+ * avoid storing both EM orderings. For constant EMs, only the
+ * non-constant EM is included in the key.
+ */
+ fill_ec_derives_key(&key,
+ rinfo->left_em,
+ rinfo->right_em->em_is_const ? NULL : rinfo->right_em,
+ rinfo->parent_ec);
+ entry = derives_insert(ec->ec_derives_hash, key, &found);
+ Assert(!found);
+ entry->rinfo = rinfo;
+}
+
+/*
+ * ec_clear_derived_clauses
+ * Reset ec_derives_list and ec_derives_hash.
+ *
+ * We destroy the hash table explicitly, since it may consume significant
+ * space. The list holds the same set of entries and can become equally large
+ * when thousands of partitions are involved, so we free it as well -- even
+ * though we do not typically free lists.
+ */
+void
+ec_clear_derived_clauses(EquivalenceClass *ec)
+{
+ list_free(ec->ec_derives_list);
+ ec->ec_derives_list = NIL;
+
+ if (ec->ec_derives_hash)
+ {
+ derives_destroy(ec->ec_derives_hash);
+ ec->ec_derives_hash = NULL;
+ }
+}
+
+/*
+ * ec_search_clause_for_ems
+ * Search for an existing RestrictInfo that equates the given pair
+ * of EquivalenceMembers, either from ec_sources or ec_derives.
+ *
+ * Returns a clause with matching operands in either given order or commuted
+ * order. We used to require matching operator OIDs, but dropped that since any
+ * semantically different operator here would indicate a broken operator family.
+ *
+ * Returns NULL if no matching clause is found.
+ */
+static RestrictInfo *
+ec_search_clause_for_ems(PlannerInfo *root, EquivalenceClass *ec,
+ EquivalenceMember *leftem, EquivalenceMember *rightem,
+ EquivalenceClass *parent_ec)
+{
+ /* Check original source clauses */
+ foreach_node(RestrictInfo, rinfo, ec->ec_sources)
+ {
+ if (rinfo->left_em == leftem &&
+ rinfo->right_em == rightem &&
+ rinfo->parent_ec == parent_ec)
+ return rinfo;
+ if (rinfo->left_em == rightem &&
+ rinfo->right_em == leftem &&
+ rinfo->parent_ec == parent_ec)
+ return rinfo;
+ }
+
+ /* Not found in ec_sources; search derived clauses */
+ return ec_search_derived_clause_for_ems(root, ec, leftem, rightem,
+ parent_ec);
+}
+
+/*
+ * ec_search_derived_clause_for_ems
+ * Search for an existing derived clause between two EquivalenceMembers.
+ *
+ * If the number of derived clauses exceeds a threshold, switch to hash table
+ * lookup; otherwise, scan ec_derives_list linearly.
+ *
+ * Clauses involving constants are looked up by passing the non-constant EM
+ * as leftem and setting rightem to NULL. In that case, we expect to find a
+ * clause with a constant on the RHS.
+ *
+ * While searching the list, we compare each given EM with both sides of each
+ * clause. But for hash table lookups, we construct a canonicalized key and
+ * perform a single lookup.
+ */
+static RestrictInfo *
+ec_search_derived_clause_for_ems(PlannerInfo *root, EquivalenceClass *ec,
+ EquivalenceMember *leftem,
+ EquivalenceMember *rightem,
+ EquivalenceClass *parent_ec)
+{
+ /* Switch to using hash lookup when list grows "too long". */
+ if (!ec->ec_derives_hash &&
+ list_length(ec->ec_derives_list) >= EC_DERIVES_HASH_THRESHOLD)
+ ec_build_derives_hash(root, ec);
+
+ /* Perform hash table lookup if available */
+ if (ec->ec_derives_hash)
+ {
+ ECDerivesKey key;
+ RestrictInfo *rinfo;
+ ECDerivesEntry *entry;
+
+ fill_ec_derives_key(&key, leftem, rightem, parent_ec);
+ entry = derives_lookup(ec->ec_derives_hash, key);
+ if (entry)
+ {
+ rinfo = entry->rinfo;
+ Assert(rinfo);
+ Assert(rightem || rinfo->right_em->em_is_const);
+ return rinfo;
+ }
+ }
+ else
+ {
+ /* Fallback to linear search over ec_derives_list */
+ foreach_node(RestrictInfo, rinfo, ec->ec_derives_list)
+ {
+ /* Handle special case: lookup by non-const EM alone */
+ if (!rightem &&
+ rinfo->left_em == leftem)
+ {
+ Assert(rinfo->right_em->em_is_const);
+ return rinfo;
+ }
+ if (rinfo->left_em == leftem &&
+ rinfo->right_em == rightem &&
+ rinfo->parent_ec == parent_ec)
+ return rinfo;
+ if (rinfo->left_em == rightem &&
+ rinfo->right_em == leftem &&
+ rinfo->parent_ec == parent_ec)
+ return rinfo;
+ }
+ }
+
+ return NULL;
+}