#include "utils/inval.h"
#include "utils/lsyscache.h"
#include "utils/partcache.h"
-#include "utils/rel.h"
#include "utils/ruleutils.h"
#include "utils/snapmgr.h"
#include "utils/syscache.h"
*/
if (partitioned && (stmt->unique || stmt->primary))
{
- PartitionKey key = rel->rd_partkey;
+ PartitionKey key = RelationGetPartitionKey(rel);
int i;
/*
#include "rewrite/rewriteManip.h"
#include "utils/lsyscache.h"
#include "utils/partcache.h"
-#include "utils/rel.h"
#include "utils/rls.h"
#include "utils/ruleutils.h"
#include "utils/hashutils.h"
#include "utils/lsyscache.h"
#include "utils/partcache.h"
-#include "utils/rel.h"
#include "utils/ruleutils.h"
#include "utils/snapmgr.h"
#include "utils/syscache.h"
/*
* Return a copy of given PartitionBoundInfo structure. The data types of bounds
* are described by given partition key specification.
+ *
+ * Note: it's important that this function and its callees not do any catalog
+ * access, nor anything else that would result in allocating memory other than
+ * the returned data structure. Since this is called in a long-lived context,
+ * that would result in unwanted memory leaks.
*/
PartitionBoundInfo
partition_bounds_copy(PartitionBoundInfo src,
PartitionDesc pd;
} PartitionDirectoryEntry;
+static void RelationBuildPartitionDesc(Relation rel);
+
+
+/*
+ * RelationGetPartitionDesc -- get partition descriptor, if relation is partitioned
+ *
+ * Note: we arrange for partition descriptors to not get freed until the
+ * relcache entry's refcount goes to zero (see hacks in RelationClose,
+ * RelationClearRelation, and RelationBuildPartitionDesc). Therefore, even
+ * though we hand back a direct pointer into the relcache entry, it's safe
+ * for callers to continue to use that pointer as long as (a) they hold the
+ * relation open, and (b) they hold a relation lock strong enough to ensure
+ * that the data doesn't become stale.
+ */
+PartitionDesc
+RelationGetPartitionDesc(Relation rel)
+{
+ if (rel->rd_rel->relkind != RELKIND_PARTITIONED_TABLE)
+ return NULL;
+
+ if (unlikely(rel->rd_partdesc == NULL))
+ RelationBuildPartitionDesc(rel);
+
+ return rel->rd_partdesc;
+}
+
/*
* RelationBuildPartitionDesc
* Form rel's partition descriptor, and store in relcache entry
*
- * Note: the descriptor won't be flushed from the cache by
- * RelationClearRelation() unless it's changed because of
- * addition or removal of a partition. Hence, code holding a lock
- * that's sufficient to prevent that can assume that rd_partdesc
- * won't change underneath it.
+ * Partition descriptor is a complex structure; to avoid complicated logic to
+ * free individual elements whenever the relcache entry is flushed, we give it
+ * its own memory context, a child of CacheMemoryContext, which can easily be
+ * deleted on its own. To avoid leaking memory in that context in case of an
+ * error partway through this function, the context is initially created as a
+ * child of CurTransactionContext and only re-parented to CacheMemoryContext
+ * at the end, when no further errors are possible. Also, we don't make this
+ * context the current context except in very brief code sections, out of fear
+ * that some of our callees allocate memory on their own which would be leaked
+ * permanently.
*/
-void
+static void
RelationBuildPartitionDesc(Relation rel)
{
PartitionDesc partdesc;
List *inhoids;
PartitionBoundSpec **boundspecs = NULL;
Oid *oids = NULL;
+ bool *is_leaf = NULL;
ListCell *cell;
int i,
nparts;
PartitionKey key = RelationGetPartitionKey(rel);
+ MemoryContext new_pdcxt;
MemoryContext oldcxt;
int *mapping;
inhoids = find_inheritance_children(RelationGetRelid(rel), NoLock);
nparts = list_length(inhoids);
- /* Allocate arrays for OIDs and boundspecs. */
+ /* Allocate working arrays for OIDs, leaf flags, and boundspecs. */
if (nparts > 0)
{
- oids = palloc(nparts * sizeof(Oid));
+ oids = (Oid *) palloc(nparts * sizeof(Oid));
+ is_leaf = (bool *) palloc(nparts * sizeof(bool));
boundspecs = palloc(nparts * sizeof(PartitionBoundSpec *));
}
/* Save results. */
oids[i] = inhrelid;
+ is_leaf[i] = (get_rel_relkind(inhrelid) != RELKIND_PARTITIONED_TABLE);
boundspecs[i] = boundspec;
++i;
}
- /* Assert we aren't about to leak any old data structure */
- Assert(rel->rd_pdcxt == NULL);
- Assert(rel->rd_partdesc == NULL);
+ /*
+ * Create PartitionBoundInfo and mapping, working in the caller's context.
+ * This could fail, but we haven't done any damage if so.
+ */
+ if (nparts > 0)
+ boundinfo = partition_bounds_create(boundspecs, nparts, key, &mapping);
/*
- * Now build the actual relcache partition descriptor. Note that the
- * order of operations here is fairly critical. If we fail partway
- * through this code, we won't have leaked memory because the rd_pdcxt is
- * attached to the relcache entry immediately, so it'll be freed whenever
- * the entry is rebuilt or destroyed. However, we don't assign to
- * rd_partdesc until the cached data structure is fully complete and
- * valid, so that no other code might try to use it.
+ * Now build the actual relcache partition descriptor, copying all the
+ * data into a new, small context. As per above comment, we don't make
+ * this a long-lived context until it's finished.
*/
- rel->rd_pdcxt = AllocSetContextCreate(CacheMemoryContext,
- "partition descriptor",
- ALLOCSET_SMALL_SIZES);
- MemoryContextCopyAndSetIdentifier(rel->rd_pdcxt,
+ new_pdcxt = AllocSetContextCreate(CurTransactionContext,
+ "partition descriptor",
+ ALLOCSET_SMALL_SIZES);
+ MemoryContextCopyAndSetIdentifier(new_pdcxt,
RelationGetRelationName(rel));
partdesc = (PartitionDescData *)
- MemoryContextAllocZero(rel->rd_pdcxt, sizeof(PartitionDescData));
+ MemoryContextAllocZero(new_pdcxt, sizeof(PartitionDescData));
partdesc->nparts = nparts;
/* If there are no partitions, the rest of the partdesc can stay zero */
if (nparts > 0)
{
- /* Create PartitionBoundInfo, using the caller's context. */
- boundinfo = partition_bounds_create(boundspecs, nparts, key, &mapping);
-
- /* Now copy all info into relcache's partdesc. */
- oldcxt = MemoryContextSwitchTo(rel->rd_pdcxt);
+ oldcxt = MemoryContextSwitchTo(new_pdcxt);
partdesc->boundinfo = partition_bounds_copy(boundinfo, key);
partdesc->oids = (Oid *) palloc(nparts * sizeof(Oid));
partdesc->is_leaf = (bool *) palloc(nparts * sizeof(bool));
- MemoryContextSwitchTo(oldcxt);
/*
* Assign OIDs from the original array into mapped indexes of the
* result array. The order of OIDs in the former is defined by the
* catalog scan that retrieved them, whereas that in the latter is
* defined by canonicalized representation of the partition bounds.
- *
- * Also record leaf-ness of each partition. For this we use
- * get_rel_relkind() which may leak memory, so be sure to run it in
- * the caller's context.
+ * Also save leaf-ness of each partition.
*/
for (i = 0; i < nparts; i++)
{
int index = mapping[i];
partdesc->oids[index] = oids[i];
- partdesc->is_leaf[index] =
- (get_rel_relkind(oids[i]) != RELKIND_PARTITIONED_TABLE);
+ partdesc->is_leaf[index] = is_leaf[i];
}
+ MemoryContextSwitchTo(oldcxt);
}
+ /*
+ * We have a fully valid partdesc ready to store into the relcache.
+ * Reparent it so it has the right lifespan.
+ */
+ MemoryContextSetParent(new_pdcxt, CacheMemoryContext);
+
+ /*
+ * But first, a kluge: if there's an old rd_pdcxt, it contains an old
+ * partition descriptor that may still be referenced somewhere. Preserve
+ * it, while not leaking it, by reattaching it as a child context of the
+ * new rd_pdcxt. Eventually it will get dropped by either RelationClose
+ * or RelationClearRelation.
+ */
+ if (rel->rd_pdcxt != NULL)
+ MemoryContextSetParent(rel->rd_pdcxt, new_pdcxt);
+ rel->rd_pdcxt = new_pdcxt;
rel->rd_partdesc = partdesc;
}
#include "utils/syscache.h"
+static void RelationBuildPartitionKey(Relation relation);
static List *generate_partition_qual(Relation rel);
+/*
+ * RelationGetPartitionKey -- get partition key, if relation is partitioned
+ *
+ * Note: partition keys are not allowed to change after the partitioned rel
+ * is created. RelationClearRelation knows this and preserves rd_partkey
+ * across relcache rebuilds, as long as the relation is open. Therefore,
+ * even though we hand back a direct pointer into the relcache entry, it's
+ * safe for callers to continue to use that pointer as long as they hold
+ * the relation open.
+ */
+PartitionKey
+RelationGetPartitionKey(Relation rel)
+{
+ if (rel->rd_rel->relkind != RELKIND_PARTITIONED_TABLE)
+ return NULL;
+
+ if (unlikely(rel->rd_partkey == NULL))
+ RelationBuildPartitionKey(rel);
+
+ return rel->rd_partkey;
+}
+
/*
* RelationBuildPartitionKey
* Build partition key data of relation, and attach to relcache
* that some of our callees allocate memory on their own which would be leaked
* permanently.
*/
-void
+static void
RelationBuildPartitionKey(Relation relation)
{
Form_pg_partitioned_table form;
tuple = SearchSysCache1(PARTRELID,
ObjectIdGetDatum(RelationGetRelid(relation)));
- /*
- * The following happens when we have created our pg_class entry but not
- * the pg_partitioned_table entry yet.
- */
if (!HeapTupleIsValid(tuple))
- return;
+ elog(ERROR, "cache lookup failed for partition key of relation %u",
+ RelationGetRelid(relation));
partkeycxt = AllocSetContextCreate(CurTransactionContext,
"partition key",
#include "catalog/catalog.h"
#include "catalog/indexing.h"
#include "catalog/namespace.h"
-#include "catalog/partition.h"
#include "catalog/pg_am.h"
#include "catalog/pg_amproc.h"
#include "catalog/pg_attrdef.h"
#include "catalog/pg_database.h"
#include "catalog/pg_namespace.h"
#include "catalog/pg_opclass.h"
-#include "catalog/pg_partitioned_table.h"
#include "catalog/pg_proc.h"
#include "catalog/pg_publication.h"
#include "catalog/pg_rewrite.h"
#include "nodes/makefuncs.h"
#include "nodes/nodeFuncs.h"
#include "optimizer/optimizer.h"
-#include "partitioning/partbounds.h"
-#include "partitioning/partdesc.h"
#include "rewrite/rewriteDefine.h"
#include "rewrite/rowsecurity.h"
#include "storage/lmgr.h"
#include "utils/inval.h"
#include "utils/lsyscache.h"
#include "utils/memutils.h"
-#include "utils/partcache.h"
#include "utils/relmapper.h"
#include "utils/resowner_private.h"
#include "utils/snapmgr.h"
relation->rd_fkeylist = NIL;
relation->rd_fkeyvalid = false;
- /* if a partitioned table, initialize key and partition descriptor info */
- if (relation->rd_rel->relkind == RELKIND_PARTITIONED_TABLE)
- {
- RelationBuildPartitionKey(relation);
- RelationBuildPartitionDesc(relation);
- }
- else
- {
- relation->rd_partkey = NULL;
- relation->rd_partkeycxt = NULL;
- relation->rd_partdesc = NULL;
- relation->rd_pdcxt = NULL;
- }
- /* ... but partcheck is not loaded till asked for */
+ /* partitioning data is not loaded till asked for */
+ relation->rd_partkey = NULL;
+ relation->rd_partkeycxt = NULL;
+ relation->rd_partdesc = NULL;
+ relation->rd_pdcxt = NULL;
relation->rd_partcheck = NIL;
relation->rd_partcheckvalid = false;
relation->rd_partcheckcxt = NULL;
/* Note: no locking manipulations needed */
RelationDecrementReferenceCount(relation);
+ /*
+ * If the relation is no longer open in this session, we can clean up any
+ * stale partition descriptors it has. This is unlikely, so check to see
+ * if there are child contexts before expending a call to mcxt.c.
+ */
+ if (RelationHasReferenceCountZero(relation) &&
+ relation->rd_pdcxt != NULL &&
+ relation->rd_pdcxt->firstchild != NULL)
+ MemoryContextDeleteChildren(relation->rd_pdcxt);
+
#ifdef RELCACHE_FORCE_RELEASE
if (RelationHasReferenceCountZero(relation) &&
relation->rd_createSubid == InvalidSubTransactionId &&
bool keep_rules;
bool keep_policies;
bool keep_partkey;
- bool keep_partdesc;
/* Build temporary entry, but don't link it into hashtable */
newrel = RelationBuildDesc(save_relid, false);
keep_policies = equalRSDesc(relation->rd_rsdesc, newrel->rd_rsdesc);
/* partkey is immutable once set up, so we can always keep it */
keep_partkey = (relation->rd_partkey != NULL);
- keep_partdesc = equalPartitionDescs(relation->rd_partkey,
- relation->rd_partdesc,
- newrel->rd_partdesc);
/*
* Perform swapping of the relcache entry contents. Within this
SWAPFIELD(Oid, rd_toastoid);
/* pgstat_info must be preserved */
SWAPFIELD(struct PgStat_TableStatus *, pgstat_info);
- /* preserve old partitioning info if no logical change */
+ /* preserve old partition key if we have one */
if (keep_partkey)
{
SWAPFIELD(PartitionKey, rd_partkey);
SWAPFIELD(MemoryContext, rd_partkeycxt);
}
- if (keep_partdesc)
- {
- SWAPFIELD(PartitionDesc, rd_partdesc);
- SWAPFIELD(MemoryContext, rd_pdcxt);
- }
- else if (rebuild && newrel->rd_pdcxt != NULL)
+ if (newrel->rd_pdcxt != NULL)
{
/*
* We are rebuilding a partitioned relation with a non-zero
- * reference count, so keep the old partition descriptor around,
- * in case there's a PartitionDirectory with a pointer to it.
- * Attach it to the new rd_pdcxt so that it gets cleaned up
- * eventually. In the case where the reference count is 0, this
- * code is not reached, which should be OK because in that case
- * there should be no PartitionDirectory with a pointer to the old
- * entry.
+ * reference count, so we must keep the old partition descriptor
+ * around, in case there's a PartitionDirectory with a pointer to
+ * it. This means we can't free the old rd_pdcxt yet. (This is
+ * necessary because RelationGetPartitionDesc hands out direct
+ * pointers to the relcache's data structure, unlike our usual
+ * practice which is to hand out copies. We'd have the same
+ * problem with rd_partkey, except that we always preserve that
+ * once created.)
+ *
+ * To ensure that it's not leaked completely, re-attach it to the
+ * new reldesc, or make it a child of the new reldesc's rd_pdcxt
+ * in the unlikely event that there is one already. (Compare hack
+ * in RelationBuildPartitionDesc.) RelationClose will clean up
+ * any such contexts once the reference count reaches zero.
+ *
+ * In the case where the reference count is zero, this code is not
+ * reached, which should be OK because in that case there should
+ * be no PartitionDirectory with a pointer to the old entry.
*
* Note that newrel and relation have already been swapped, so the
* "old" partition descriptor is actually the one hanging off of
* newrel.
*/
- MemoryContextSetParent(newrel->rd_pdcxt, relation->rd_pdcxt);
+ relation->rd_partdesc = NULL; /* ensure rd_partdesc is invalid */
+ if (relation->rd_pdcxt != NULL) /* probably never happens */
+ MemoryContextSetParent(newrel->rd_pdcxt, relation->rd_pdcxt);
+ else
+ relation->rd_pdcxt = newrel->rd_pdcxt;
+ /* drop newrel's pointers so we don't destroy it below */
newrel->rd_partdesc = NULL;
newrel->rd_pdcxt = NULL;
}
restart = true;
}
- /*
- * Reload the partition key and descriptor for a partitioned table.
- */
- if (relation->rd_rel->relkind == RELKIND_PARTITIONED_TABLE &&
- relation->rd_partkey == NULL)
- {
- RelationBuildPartitionKey(relation);
- Assert(relation->rd_partkey != NULL);
-
- restart = true;
- }
-
- if (relation->rd_rel->relkind == RELKIND_PARTITIONED_TABLE &&
- relation->rd_partdesc == NULL)
- {
- RelationBuildPartitionDesc(relation);
- Assert(relation->rd_partdesc != NULL);
-
- restart = true;
- }
-
+ /* Reload tableam data if needed */
if (relation->rd_tableam == NULL &&
(relation->rd_rel->relkind == RELKIND_RELATION ||
relation->rd_rel->relkind == RELKIND_SEQUENCE ||
PartitionBoundInfo boundinfo; /* collection of partition bounds */
} PartitionDescData;
-extern void RelationBuildPartitionDesc(Relation rel);
+
+extern PartitionDesc RelationGetPartitionDesc(Relation rel);
extern PartitionDirectory CreatePartitionDirectory(MemoryContext mcxt);
extern PartitionDesc PartitionDirectoryLookup(PartitionDirectory, Relation);
Oid *parttypcoll;
} PartitionKeyData;
-extern void RelationBuildPartitionKey(Relation relation);
+
+extern PartitionKey RelationGetPartitionKey(Relation rel);
extern List *RelationGetPartitionQual(Relation rel);
extern Expr *get_partition_qual_relid(Oid relid);
#include "catalog/pg_index.h"
#include "catalog/pg_publication.h"
#include "nodes/bitmapset.h"
+#include "partitioning/partdefs.h"
#include "rewrite/prs2lock.h"
#include "storage/block.h"
#include "storage/relfilenode.h"
List *rd_fkeylist; /* list of ForeignKeyCacheInfo (see below) */
bool rd_fkeyvalid; /* true if list has been computed */
- struct PartitionKeyData *rd_partkey; /* partition key, or NULL */
+ PartitionKey rd_partkey; /* partition key, or NULL */
MemoryContext rd_partkeycxt; /* private context for rd_partkey, if any */
- struct PartitionDescData *rd_partdesc; /* partitions, or NULL */
+ PartitionDesc rd_partdesc; /* partition descriptor, or NULL */
MemoryContext rd_pdcxt; /* private context for rd_partdesc, if any */
List *rd_partcheck; /* partition CHECK quals */
bool rd_partcheckvalid; /* true if list has been computed */
Oid *rd_opfamily; /* OIDs of op families for each index col */
Oid *rd_opcintype; /* OIDs of opclass declared input data types */
RegProcedure *rd_support; /* OIDs of support procedures */
- struct FmgrInfo *rd_supportinfo; /* lookup info for support procedures */
+ struct FmgrInfo *rd_supportinfo; /* lookup info for support procedures */
int16 *rd_indoption; /* per-column AM-specific flags */
List *rd_indexprs; /* index expression trees, if any */
List *rd_indpred; /* index predicate tree, if any */
RelationNeedsWAL(relation) && \
!IsCatalogRelation(relation))
-/*
- * RelationGetPartitionKey
- * Returns the PartitionKey of a relation
- */
-#define RelationGetPartitionKey(relation) ((relation)->rd_partkey)
-
-/*
- * RelationGetPartitionDesc
- * Returns partition descriptor for a relation.
- */
-#define RelationGetPartitionDesc(relation) ((relation)->rd_partdesc)
-
/* routines in utils/cache/relcache.c */
extern void RelationIncrementReferenceCount(Relation rel);
extern void RelationDecrementReferenceCount(Relation rel);
Partition constraint: (((a + 1) IS NOT NULL) AND (substr(b, 1, 5) IS NOT NULL) AND (((a + 1) > '-1'::integer) OR (((a + 1) = '-1'::integer) AND (substr(b, 1, 5) >= 'aaaaa'::text))) AND (((a + 1) < 100) OR (((a + 1) = 100) AND (substr(b, 1, 5) < 'ccccc'::text))))
DROP TABLE partitioned, partitioned2;
+-- check reference to partitioned table's rowtype in partition descriptor
+create table partitioned (a int, b int)
+ partition by list ((row(a, b)::partitioned));
+create table partitioned1
+ partition of partitioned for values in ('(1,2)'::partitioned);
+create table partitioned2
+ partition of partitioned for values in ('(2,4)'::partitioned);
+explain (costs off)
+select * from partitioned where row(a,b)::partitioned = '(1,2)'::partitioned;
+ QUERY PLAN
+-----------------------------------------------------------
+ Seq Scan on partitioned1 partitioned
+ Filter: (ROW(a, b)::partitioned = '(1,2)'::partitioned)
+(2 rows)
+
+drop table partitioned;
-- check that dependencies of partition columns are handled correctly
create domain intdom1 as int;
create table partitioned (
DROP TABLE partitioned, partitioned2;
+-- check reference to partitioned table's rowtype in partition descriptor
+create table partitioned (a int, b int)
+ partition by list ((row(a, b)::partitioned));
+create table partitioned1
+ partition of partitioned for values in ('(1,2)'::partitioned);
+create table partitioned2
+ partition of partitioned for values in ('(2,4)'::partitioned);
+explain (costs off)
+select * from partitioned where row(a,b)::partitioned = '(1,2)'::partitioned;
+drop table partitioned;
+
-- check that dependencies of partition columns are handled correctly
create domain intdom1 as int;