From b9ec140c6301d7f6ae29ce370109b87343547034 Mon Sep 17 00:00:00 2001 From: Tom Lane Date: Thu, 26 Mar 2009 17:15:35 +0000 Subject: [PATCH] If we expect a hash join to be performed in multiple batches, suppress "physical tlist" optimization on the outer relation (ie, force a projection step to occur in its scan). This avoids storing useless column values when the outer relation's tuples are written to temporary batch files. Modified version of a patch by Michael Henderson and Ramon Lawrence. --- src/backend/nodes/outfuncs.c | 1 + src/backend/optimizer/path/costsize.c | 2 ++ src/backend/optimizer/plan/createplan.c | 4 ++++ src/backend/optimizer/util/pathnode.c | 13 ++++++++++++- src/include/nodes/relation.h | 1 + 5 files changed, 20 insertions(+), 1 deletion(-) diff --git a/src/backend/nodes/outfuncs.c b/src/backend/nodes/outfuncs.c index e715a2128d..68a44339da 100644 --- a/src/backend/nodes/outfuncs.c +++ b/src/backend/nodes/outfuncs.c @@ -1448,6 +1448,7 @@ _outHashPath(StringInfo str, HashPath *node) _outJoinPathInfo(str, (JoinPath *) node); WRITE_NODE_FIELD(path_hashclauses); + WRITE_INT_FIELD(num_batches); } static void diff --git a/src/backend/optimizer/path/costsize.c b/src/backend/optimizer/path/costsize.c index 58beefe4a7..0ef6acd59f 100644 --- a/src/backend/optimizer/path/costsize.c +++ b/src/backend/optimizer/path/costsize.c @@ -1880,6 +1880,8 @@ cost_hashjoin(HashPath *path, PlannerInfo *root, SpecialJoinInfo *sjinfo) &numbatches, &num_skew_mcvs); virtualbuckets = (double) numbuckets *(double) numbatches; + /* mark the path with estimated # of batches */ + path->num_batches = numbatches; /* * Determine bucketsize fraction for inner relation. We use the smallest diff --git a/src/backend/optimizer/plan/createplan.c b/src/backend/optimizer/plan/createplan.c index 90fe45e859..64b0209856 100644 --- a/src/backend/optimizer/plan/createplan.c +++ b/src/backend/optimizer/plan/createplan.c @@ -1910,6 +1910,10 @@ create_hashjoin_plan(PlannerInfo *root, /* We don't want any excess columns in the hashed tuples */ disuse_physical_tlist(inner_plan, best_path->jpath.innerjoinpath); + /* If we expect batching, suppress excess columns in outer tuples too */ + if (best_path->num_batches > 1) + disuse_physical_tlist(outer_plan, best_path->jpath.outerjoinpath); + /* * If there is a single join clause and we can identify the outer * variable as a simple column reference, supply its identity for diff --git a/src/backend/optimizer/util/pathnode.c b/src/backend/optimizer/util/pathnode.c index 7ecdfda24f..e3f3f2c2f0 100644 --- a/src/backend/optimizer/util/pathnode.c +++ b/src/backend/optimizer/util/pathnode.c @@ -1480,9 +1480,20 @@ create_hashjoin_path(PlannerInfo *root, pathnode->jpath.outerjoinpath = outer_path; pathnode->jpath.innerjoinpath = inner_path; pathnode->jpath.joinrestrictinfo = restrict_clauses; - /* A hashjoin never has pathkeys, since its ordering is unpredictable */ + /* + * A hashjoin never has pathkeys, since its output ordering is + * unpredictable due to possible batching. XXX If the inner relation is + * small enough, we could instruct the executor that it must not batch, + * and then we could assume that the output inherits the outer relation's + * ordering, which might save a sort step. However there is considerable + * downside if our estimate of the inner relation size is badly off. + * For the moment we don't risk it. (Note also that if we wanted to take + * this seriously, joinpath.c would have to consider many more paths for + * the outer rel than it does now.) + */ pathnode->jpath.path.pathkeys = NIL; pathnode->path_hashclauses = hashclauses; + /* cost_hashjoin will fill in pathnode->num_batches */ cost_hashjoin(pathnode, root, sjinfo); diff --git a/src/include/nodes/relation.h b/src/include/nodes/relation.h index d4b4a76e5d..fa0e67617b 100644 --- a/src/include/nodes/relation.h +++ b/src/include/nodes/relation.h @@ -845,6 +845,7 @@ typedef struct HashPath { JoinPath jpath; List *path_hashclauses; /* join clauses used for hashing */ + int num_batches; /* number of batches expected */ } HashPath; /* -- 2.39.5