--- /dev/null
+/*-------------------------------------------------------------------------
+ *
+ * rangetypes_selfuncs.c
+ * Functions for selectivity estimation of range operators
+ *
+ * Estimates are based on histograms of lower and upper bounds, and the
+ * fraction of empty ranges.
+ *
+ * Portions Copyright (c) 1996-2012, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ *
+ * IDENTIFICATION
+ * src/backend/utils/adt/rangetypes_selfuncs.c
+ *
+ *-------------------------------------------------------------------------
+ */
+#include "postgres.h"
+
+#include "catalog/pg_operator.h"
+#include "catalog/pg_statistic.h"
+#include "utils/lsyscache.h"
+#include "utils/rangetypes.h"
+#include "utils/selfuncs.h"
+#include "utils/typcache.h"
+
+static double calc_rangesel(TypeCacheEntry *typcache, VariableStatData *vardata,
+ RangeType *constval, Oid operator);
+static double default_range_selectivity(Oid operator);
+static double calc_hist_selectivity(TypeCacheEntry *typcache,
+ VariableStatData *vardata, RangeType *constval,
+ Oid operator);
+static double calc_hist_selectivity_scalar(TypeCacheEntry *typcache,
+ RangeBound *constbound,
+ RangeBound *hist, int hist_nvalues,
+ bool equal);
+static int rbound_bsearch(TypeCacheEntry *typcache, RangeBound *value,
+ RangeBound *hist, int hist_length, bool equal);
+static float8 get_position(TypeCacheEntry *typcache, RangeBound *value,
+ RangeBound *hist1, RangeBound *hist2);
+
+/*
+ * Returns a default selectivity estimate for given operator, when we don't
+ * have statistics or cannot use them for some reason.
+ */
+static double
+default_range_selectivity(Oid operator)
+{
+ switch (operator)
+ {
+ case OID_RANGE_OVERLAP_OP:
+ return 0.01;
+
+ case OID_RANGE_CONTAINS_OP:
+ case OID_RANGE_CONTAINED_OP:
+ return 0.005;
+
+ case OID_RANGE_CONTAINS_ELEM_OP:
+ /*
+ * "range @> elem" is more or less identical to a scalar
+ * inequality "A >= b AND A <= c".
+ */
+ return DEFAULT_RANGE_INEQ_SEL;
+
+ case OID_RANGE_LESS_OP:
+ case OID_RANGE_LESS_EQUAL_OP:
+ case OID_RANGE_GREATER_OP:
+ case OID_RANGE_GREATER_EQUAL_OP:
+ case OID_RANGE_LEFT_OP:
+ case OID_RANGE_RIGHT_OP:
+ /* these are similar to regular scalar inequalities */
+ return DEFAULT_INEQ_SEL;
+
+ default:
+ /* all range operators should be handled above, but just in case */
+ return 0.01;
+ }
+}
+
+/*
+ * rangesel -- restriction selectivity for range operators
+ */
+Datum
+rangesel(PG_FUNCTION_ARGS)
+{
+ PlannerInfo *root = (PlannerInfo *) PG_GETARG_POINTER(0);
+ Oid operator = PG_GETARG_OID(1);
+ List *args = (List *) PG_GETARG_POINTER(2);
+ int varRelid = PG_GETARG_INT32(3);
+ VariableStatData vardata;
+ Node *other;
+ bool varonleft;
+ Selectivity selec;
+ TypeCacheEntry *typcache;
+ RangeType *constrange = NULL;
+
+ /*
+ * If expression is not (variable op something) or (something op
+ * variable), then punt and return a default estimate.
+ */
+ if (!get_restriction_variable(root, args, varRelid,
+ &vardata, &other, &varonleft))
+ PG_RETURN_FLOAT8(default_range_selectivity(operator));
+
+ /*
+ * Can't do anything useful if the something is not a constant, either.
+ */
+ if (!IsA(other, Const))
+ {
+ ReleaseVariableStats(vardata);
+ PG_RETURN_FLOAT8(default_range_selectivity(operator));
+ }
+
+ /*
+ * All the range operators are strict, so we can cope with a NULL constant
+ * right away.
+ */
+ if (((Const *) other)->constisnull)
+ {
+ ReleaseVariableStats(vardata);
+ PG_RETURN_FLOAT8(0.0);
+ }
+
+ /*
+ * If var is on the right, commute the operator, so that we can assume the
+ * var is on the left in what follows.
+ */
+ if (!varonleft)
+ {
+ /* we have other Op var, commute to make var Op other */
+ operator = get_commutator(operator);
+ if (!operator)
+ {
+ /* Use default selectivity (should we raise an error instead?) */
+ ReleaseVariableStats(vardata);
+ PG_RETURN_FLOAT8(default_range_selectivity(operator));
+ }
+ }
+
+ typcache = range_get_typcache(fcinfo, vardata.vartype);
+
+ /*
+ * OK, there's a Var and a Const we're dealing with here. We need the
+ * Const to be of same range type as the column, else we can't do anything
+ * useful. (Such cases will likely fail at runtime, but here we'd rather
+ * just return a default estimate.)
+ *
+ * If the operator is "range @> element", the constant should be of the
+ * element type of the range column. Convert it to a range that includes
+ * only that single point, so that we don't need special handling for
+ * that in what follows.
+ */
+ if (operator == OID_RANGE_CONTAINS_ELEM_OP)
+ {
+ if (((Const *) other)->consttype == typcache->rngelemtype->type_id)
+ {
+ RangeBound lower, upper;
+ lower.inclusive = true;
+ lower.val = ((Const *) other)->constvalue;
+ lower.infinite = false;
+ lower.lower = true;
+ upper.inclusive = true;
+ upper.val = ((Const *) other)->constvalue;
+ upper.infinite = false;
+ upper.lower = false;
+ constrange = range_serialize(typcache, &lower, &upper, false);
+ }
+ }
+ else
+ {
+ if (((Const *) other)->consttype == vardata.vartype)
+ constrange = DatumGetRangeType(((Const *) other)->constvalue);
+ }
+
+ /*
+ * If we got a valid constant on one side of the operator, proceed to
+ * estimate using statistics. Otherwise punt and return a default
+ * constant estimate.
+ */
+ if (constrange)
+ selec = calc_rangesel(typcache, &vardata, constrange, operator);
+ else
+ selec = default_range_selectivity(operator);
+
+ ReleaseVariableStats(vardata);
+
+ CLAMP_PROBABILITY(selec);
+
+ PG_RETURN_FLOAT8((float8) selec);
+}
+
+static double
+calc_rangesel(TypeCacheEntry *typcache, VariableStatData *vardata,
+ RangeType *constval, Oid operator)
+{
+ double hist_selec;
+ double selec;
+ float4 empty_frac, null_frac;
+
+ /*
+ * First look up the fraction of NULLs and empty ranges from pg_statistic.
+ */
+ if (HeapTupleIsValid(vardata->statsTuple))
+ {
+ Form_pg_statistic stats;
+ float4 *numbers;
+ int nnumbers;
+
+ stats = (Form_pg_statistic) GETSTRUCT(vardata->statsTuple);
+ null_frac = stats->stanullfrac;
+
+ /* Try to get fraction of empty ranges */
+ if (get_attstatsslot(vardata->statsTuple,
+ vardata->atttype, vardata->atttypmod,
+ STATISTIC_KIND_RANGE_EMPTY_FRAC, InvalidOid,
+ NULL,
+ NULL, NULL,
+ &numbers, &nnumbers))
+ {
+ if (nnumbers != 1)
+ elog(ERROR, "invalid empty fraction statistic"); /* shouldn't happen */
+ empty_frac = numbers[0];
+ }
+ else
+ {
+ /* No empty fraction statistic. Assume no empty ranges. */
+ empty_frac = 0.0;
+ }
+ }
+ else
+ {
+ /*
+ * No stats are available. Follow through the calculations below
+ * anyway, assuming no NULLs and no empty ranges. This still allows
+ * us to give a better-than-nothing estimate based on whether the
+ * constant is an empty range or not.
+ */
+ null_frac = 0.0;
+ empty_frac = 0.0;
+ }
+
+ if (RangeIsEmpty(constval))
+ {
+ /*
+ * An empty range matches all ranges, all empty ranges, or nothing,
+ * depending on the operator
+ */
+ switch (operator)
+ {
+ case OID_RANGE_OVERLAP_OP:
+ case OID_RANGE_OVERLAPS_LEFT_OP:
+ case OID_RANGE_OVERLAPS_RIGHT_OP:
+ case OID_RANGE_LEFT_OP:
+ case OID_RANGE_RIGHT_OP:
+ /* these return false if either argument is empty */
+ selec = 0.0;
+ break;
+
+ case OID_RANGE_CONTAINED_OP:
+ case OID_RANGE_LESS_EQUAL_OP:
+ case OID_RANGE_GREATER_EQUAL_OP:
+ /*
+ * these return true when both args are empty, false if only
+ * one is empty
+ */
+ selec = empty_frac;
+ break;
+
+ case OID_RANGE_CONTAINS_OP:
+ /* everything contains an empty range */
+ selec = 1.0;
+ break;
+
+ case OID_RANGE_CONTAINS_ELEM_OP:
+ default:
+ elog(ERROR, "unexpected operator %u", operator);
+ selec = 0.0; /* keep compiler quiet */
+ break;
+ }
+ }
+ else
+ {
+ /*
+ * Calculate selectivity using bound histograms. If that fails for
+ * some reason, e.g no histogram in pg_statistic, use the default
+ * constant estimate for the fraction of non-empty values. This is
+ * still somewhat better than just returning the default estimate,
+ * because this still takes into account the fraction of empty and
+ * NULL tuples, if we had statistics for them.
+ */
+ hist_selec = calc_hist_selectivity(typcache, vardata, constval,
+ operator);
+ if (hist_selec < 0.0)
+ hist_selec = default_range_selectivity(operator);
+
+ /*
+ * Now merge the results for the empty ranges and histogram
+ * calculations, realizing that the histogram covers only the
+ * non-null, non-empty values.
+ */
+ if (operator == OID_RANGE_CONTAINED_OP)
+ {
+ /* empty is contained by anything non-empty */
+ selec = (1.0 - empty_frac) * hist_selec + empty_frac;
+ }
+ else
+ {
+ /* with any other operator, empty Op non-empty matches nothing */
+ selec = (1.0 - empty_frac) * hist_selec;
+ }
+ }
+
+ /* all range operators are strict */
+ selec *= (1.0 - null_frac);
+
+ /* result should be in range, but make sure... */
+ CLAMP_PROBABILITY(selec);
+
+ return selec;
+}
+
+/*
+ * Calculate range operator selectivity using histograms of range bounds.
+ *
+ * This estimate is for the portion of values that are not empty and not
+ * NULL.
+ */
+static double
+calc_hist_selectivity(TypeCacheEntry *typcache, VariableStatData *vardata,
+ RangeType *constval, Oid operator)
+{
+ Datum *hist_values;
+ int nhist;
+ RangeBound *hist_lower;
+ RangeBound *hist_upper;
+ int i;
+ RangeBound const_lower;
+ RangeBound const_upper;
+ bool empty;
+ double hist_selec;
+
+ /* Try to get histogram of ranges */
+ if (!(HeapTupleIsValid(vardata->statsTuple) &&
+ get_attstatsslot(vardata->statsTuple,
+ vardata->atttype, vardata->atttypmod,
+ STATISTIC_KIND_BOUNDS_HISTOGRAM, InvalidOid,
+ NULL,
+ &hist_values, &nhist,
+ NULL, NULL)))
+ return -1.0;
+
+ /*
+ * Convert histogram of ranges into histograms of its lower and upper
+ * bounds.
+ */
+ hist_lower = (RangeBound *) palloc(sizeof(RangeBound) * nhist);
+ hist_upper = (RangeBound *) palloc(sizeof(RangeBound) * nhist);
+ for (i = 0; i < nhist; i++)
+ {
+ range_deserialize(typcache, DatumGetRangeType(hist_values[i]),
+ &hist_lower[i], &hist_upper[i], &empty);
+ /* The histogram should not contain any empty ranges */
+ if (empty)
+ elog(ERROR, "bounds histogram contains an empty range");
+ }
+
+ /* Extract the bounds of the constant value. */
+ range_deserialize(typcache, constval, &const_lower, &const_upper, &empty);
+ Assert (!empty);
+
+ /*
+ * Calculate selectivity comparing the lower or upper bound of the
+ * constant with the histogram of lower or upper bounds.
+ */
+ switch (operator)
+ {
+ case OID_RANGE_LESS_OP:
+ /*
+ * The regular b-tree comparison operators (<, <=, >, >=) compare
+ * the lower bounds first, and the upper bounds for values with
+ * equal lower bounds. Estimate that by comparing the lower bounds
+ * only. This gives a fairly accurate estimate assuming there
+ * aren't many rows with a lower bound equal to the constant's
+ * lower bound.
+ */
+ hist_selec =
+ calc_hist_selectivity_scalar(typcache, &const_lower,
+ hist_lower, nhist, false);
+ break;
+
+ case OID_RANGE_LESS_EQUAL_OP:
+ hist_selec =
+ calc_hist_selectivity_scalar(typcache, &const_lower,
+ hist_lower, nhist, true);
+ break;
+
+ case OID_RANGE_GREATER_OP:
+ hist_selec =
+ 1 - calc_hist_selectivity_scalar(typcache, &const_lower,
+ hist_lower, nhist, true);
+ break;
+
+ case OID_RANGE_GREATER_EQUAL_OP:
+ hist_selec =
+ 1 - calc_hist_selectivity_scalar(typcache, &const_lower,
+ hist_lower, nhist, false);
+ break;
+
+ case OID_RANGE_LEFT_OP:
+ /* var << const when upper(var) < lower(const) */
+ hist_selec =
+ calc_hist_selectivity_scalar(typcache, &const_lower,
+ hist_upper, nhist, false);
+ break;
+
+ case OID_RANGE_RIGHT_OP:
+ /* var >> const when lower(var) > upper(const) */
+ hist_selec =
+ 1 - calc_hist_selectivity_scalar(typcache, &const_upper,
+ hist_lower, nhist, true);
+ break;
+
+ case OID_RANGE_OVERLAPS_RIGHT_OP:
+ /* compare lower bounds */
+ hist_selec =
+ 1 - calc_hist_selectivity_scalar(typcache, &const_lower,
+ hist_lower, nhist, false);
+ break;
+
+ case OID_RANGE_OVERLAPS_LEFT_OP:
+ /* compare upper bounds */
+ hist_selec =
+ calc_hist_selectivity_scalar(typcache, &const_upper,
+ hist_upper, nhist, true);
+ break;
+
+ case OID_RANGE_OVERLAP_OP:
+ case OID_RANGE_CONTAINS_ELEM_OP:
+ /*
+ * A && B <=> NOT (A << B OR A >> B).
+ *
+ * "range @> elem" is equivalent to "range && [elem,elem]". The
+ * caller already constructed the singular range from the element
+ * constant, so just treat it the same as &&.
+ */
+ hist_selec =
+ calc_hist_selectivity_scalar(typcache, &const_lower, hist_upper,
+ nhist, false);
+ hist_selec +=
+ (1.0 - calc_hist_selectivity_scalar(typcache, &const_upper, hist_lower,
+ nhist, true));
+ hist_selec = 1.0 - hist_selec;
+ break;
+
+ case OID_RANGE_CONTAINS_OP:
+ case OID_RANGE_CONTAINED_OP:
+ /* TODO: not implemented yet */
+ hist_selec = -1.0;
+ break;
+
+ default:
+ elog(ERROR, "unknown range operator %u", operator);
+ hist_selec = -1.0; /* keep compiler quiet */
+ break;
+ }
+
+ return hist_selec;
+}
+
+
+/*
+ * Look up the fraction of values less than (or equal, if 'equal' argument
+ * is true) a given const in a histogram of range bounds.
+ */
+static double
+calc_hist_selectivity_scalar(TypeCacheEntry *typcache, RangeBound *constbound,
+ RangeBound *hist, int hist_nvalues, bool equal)
+{
+ Selectivity selec;
+ int index;
+
+ /*
+ * Find the histogram bin the given constant falls into. Estimate
+ * selectivity as the number of preceding whole bins.
+ */
+ index = rbound_bsearch(typcache, constbound, hist, hist_nvalues, equal);
+ selec = (Selectivity) (Max(index, 0)) / (Selectivity) (hist_nvalues - 1);
+
+ /* Adjust using linear interpolation within the bin */
+ if (index >= 0 && index < hist_nvalues - 1)
+ selec += get_position(typcache, constbound, &hist[index],
+ &hist[index + 1]) / (Selectivity) (hist_nvalues - 1);
+
+ return selec;
+}
+
+/*
+ * Binary search on an array of range bounds. Returns greatest index of range
+ * bound in array which is less than given range bound. If all range bounds in
+ * array are greater or equal than given range bound, return -1.
+ */
+static int
+rbound_bsearch(TypeCacheEntry *typcache, RangeBound *value, RangeBound *hist,
+ int hist_length, bool equal)
+{
+ int lower = -1,
+ upper = hist_length - 1,
+ cmp,
+ middle;
+
+ while (lower < upper)
+ {
+ middle = (lower + upper + 1) / 2;
+ cmp = range_cmp_bounds(typcache, &hist[middle], value);
+
+ if (cmp < 0 || (equal && cmp == 0))
+ lower = middle;
+ else
+ upper = middle - 1;
+ }
+ return lower;
+}
+
+/*
+ * Get relative position of value in histogram bin in [0,1] range.
+ */
+static float8
+get_position(TypeCacheEntry *typcache, RangeBound *value, RangeBound *hist1,
+ RangeBound *hist2)
+{
+ bool has_subdiff = OidIsValid(typcache->rng_subdiff_finfo.fn_oid);
+ float8 position;
+
+ if (!hist1->infinite && !hist2->infinite)
+ {
+ float8 bin_width;
+
+ /*
+ * Both bounds are finite. Assuming the subtype's comparison function
+ * works sanely, the value must be finite, too, because it lies
+ * somewhere between the bounds. If it doesn't, just return something.
+ */
+ if (value->infinite)
+ return 0.5;
+
+ /* Can't interpolate without subdiff function */
+ if (!has_subdiff)
+ return 0.5;
+
+ /* Calculate relative position using subdiff function. */
+ bin_width = DatumGetFloat8(FunctionCall2Coll(
+ &typcache->rng_subdiff_finfo,
+ typcache->rng_collation,
+ hist2->val,
+ hist1->val));
+ if (bin_width <= 0.0)
+ return 0.5; /* zero width bin */
+
+ position = DatumGetFloat8(FunctionCall2Coll(
+ &typcache->rng_subdiff_finfo,
+ typcache->rng_collation,
+ value->val,
+ hist1->val))
+ / bin_width;
+
+ /* Relative position must be in [0,1] range */
+ position = Max(position, 0.0);
+ position = Min(position, 1.0);
+ return position;
+ }
+ else if (hist1->infinite && !hist2->infinite)
+ {
+ /*
+ * Lower bin boundary is -infinite, upper is finite. If the value is
+ * -infinite, return 0.0 to indicate it's equal to the lower bound.
+ * Otherwise return 1.0 to indicate it's infinitely far from the lower
+ * bound.
+ */
+ return ((value->infinite && value->lower) ? 0.0 : 1.0);
+ }
+ else if (!hist1->infinite && hist2->infinite)
+ {
+ /* same as above, but in reverse */
+ return ((value->infinite && !value->lower) ? 1.0 : 0.0);
+ }
+ else
+ {
+ /*
+ * If both bin boundaries are infinite, they should be equal to each
+ * other, and the value should also be infinite and equal to both
+ * bounds. (But don't Assert that, to avoid crashing if a user creates
+ * a datatype with a broken comparison function).
+ *
+ * Assume the value to lie in the middle of the infinite bounds.
+ */
+ return 0.5;
+ }
+}
+
--- /dev/null
+/*-------------------------------------------------------------------------
+ *
+ * ragetypes_typanalyze.c
+ * Functions for gathering statistics from range columns
+ *
+ * For a range type column, histograms of lower and upper bounds, and
+ * the fraction of NULL and empty ranges are collected.
+ *
+ * Both histograms have the same length, and they are combined into a
+ * single array of ranges. This has the same shape as the histogram that
+ * std_typanalyze would collect, but the values are different. Each range
+ * in the array is a valid range, even though the lower and upper bounds
+ * come from different tuples. In theory, the standard scalar selectivity
+ * functions could be used with the combined histogram.
+ *
+ * Portions Copyright (c) 1996-2012, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ *
+ * IDENTIFICATION
+ * src/backend/utils/adt/rangetypes_typanalyze.c
+ *
+ *-------------------------------------------------------------------------
+ */
+#include "postgres.h"
+
+#include "catalog/pg_operator.h"
+#include "commands/vacuum.h"
+#include "utils/builtins.h"
+#include "utils/rangetypes.h"
+
+static void compute_range_stats(VacAttrStats *stats,
+ AnalyzeAttrFetchFunc fetchfunc, int samplerows, double totalrows);
+
+/*
+ * range_typanalyze -- typanalyze function for range columns
+ */
+Datum
+range_typanalyze(PG_FUNCTION_ARGS)
+{
+ VacAttrStats *stats = (VacAttrStats *) PG_GETARG_POINTER(0);
+ TypeCacheEntry *typcache;
+ Form_pg_attribute attr = stats->attr;
+
+ /* Get information about range type */
+ typcache = range_get_typcache(fcinfo, stats->attrtypid);
+
+ if (attr->attstattarget < 0)
+ attr->attstattarget = default_statistics_target;
+
+ stats->compute_stats = compute_range_stats;
+ stats->extra_data = typcache;
+ /* same as in std_typanalyze */
+ stats->minrows = 300 * attr->attstattarget;
+
+ PG_RETURN_BOOL(true);
+}
+
+/*
+ * Comparison function for sorting RangeBounds.
+ */
+static int
+range_bound_qsort_cmp(const void *a1, const void *a2, void *arg)
+{
+ RangeBound *b1 = (RangeBound *)a1;
+ RangeBound *b2 = (RangeBound *)a2;
+ TypeCacheEntry *typcache = (TypeCacheEntry *)arg;
+
+ return range_cmp_bounds(typcache, b1, b2);
+}
+
+/*
+ * compute_range_stats() -- compute statistics for a range column
+ */
+static void
+compute_range_stats(VacAttrStats *stats, AnalyzeAttrFetchFunc fetchfunc,
+ int samplerows, double totalrows)
+{
+ TypeCacheEntry *typcache = (TypeCacheEntry *) stats->extra_data;
+ int null_cnt = 0;
+ int non_null_cnt = 0;
+ int non_empty_cnt = 0;
+ int empty_cnt = 0;
+ int range_no;
+ int slot_idx;
+ int num_bins = stats->attr->attstattarget;
+ int num_hist;
+ RangeBound *lowers, *uppers;
+ double total_width = 0;
+
+ /* Allocate memory for arrays of range bounds. */
+ lowers = (RangeBound *) palloc(sizeof(RangeBound) * samplerows);
+ uppers = (RangeBound *) palloc(sizeof(RangeBound) * samplerows);
+
+ /* Loop over the sample ranges. */
+ for (range_no = 0; range_no < samplerows; range_no++)
+ {
+ Datum value;
+ bool isnull,
+ empty;
+ RangeType *range;
+ RangeBound lower,
+ upper;
+
+ vacuum_delay_point();
+
+ value = fetchfunc(stats, range_no, &isnull);
+ if (isnull)
+ {
+ /* range is null, just count that */
+ null_cnt++;
+ continue;
+ }
+
+ /*
+ * XXX: should we ignore wide values, like std_typanalyze does, to
+ * avoid bloating the statistics table?
+ */
+ total_width += VARSIZE_ANY(DatumGetPointer(value));
+
+ /* Get range and deserialize it for further analysis. */
+ range = DatumGetRangeType(value);
+ range_deserialize(typcache, range, &lower, &upper, &empty);
+
+ if (!empty)
+ {
+ /* Fill bound values for further usage in histograms */
+ lowers[non_empty_cnt] = lower;
+ uppers[non_empty_cnt] = upper;
+ non_empty_cnt++;
+ }
+ else
+ empty_cnt++;
+
+ non_null_cnt++;
+ }
+
+ slot_idx = 0;
+
+ /* We can only compute real stats if we found some non-null values. */
+ if (non_null_cnt > 0)
+ {
+ Datum *bound_hist_values;
+ int pos,
+ posfrac,
+ delta,
+ deltafrac,
+ i;
+ MemoryContext old_cxt;
+ float4 *emptyfrac;
+
+ stats->stats_valid = true;
+ /* Do the simple null-frac and width stats */
+ stats->stanullfrac = (double) null_cnt / (double) samplerows;
+ stats->stawidth = total_width / (double) non_null_cnt;
+ stats->stadistinct = -1.0;
+
+ /* Must copy the target values into anl_context */
+ old_cxt = MemoryContextSwitchTo(stats->anl_context);
+
+ if (non_empty_cnt > 0)
+ {
+ /* Sort bound values */
+ qsort_arg(lowers, non_empty_cnt, sizeof(RangeBound),
+ range_bound_qsort_cmp, typcache);
+ qsort_arg(uppers, non_empty_cnt, sizeof(RangeBound),
+ range_bound_qsort_cmp, typcache);
+
+ num_hist = non_empty_cnt;
+ if (num_hist > num_bins)
+ num_hist = num_bins + 1;
+
+ bound_hist_values = (Datum *) palloc(num_hist * sizeof(Datum));
+
+ /*
+ * The object of this loop is to construct ranges from first and
+ * last entries in lowers[] and uppers[] along with evenly-spaced
+ * values in between. So the i'th value is a range of
+ * lowers[(i * (nvals - 1)) / (num_hist - 1)] and
+ * uppers[(i * (nvals - 1)) / (num_hist - 1)]. But computing that
+ * subscript directly risks integer overflow when the stats target
+ * is more than a couple thousand. Instead we add
+ * (nvals - 1) / (num_hist - 1) to pos at each step, tracking the
+ * integral and fractional parts of the sum separately.
+ */
+ delta = (non_empty_cnt - 1) / (num_hist - 1);
+ deltafrac = (non_empty_cnt - 1) % (num_hist - 1);
+ pos = posfrac = 0;
+
+ for (i = 0; i < num_hist; i++)
+ {
+ bound_hist_values[i] = PointerGetDatum(range_serialize(
+ typcache, &lowers[pos], &uppers[pos], false));
+ pos += delta;
+ posfrac += deltafrac;
+ if (posfrac >= (num_hist - 1))
+ {
+ /* fractional part exceeds 1, carry to integer part */
+ pos++;
+ posfrac -= (num_hist - 1);
+ }
+ }
+
+ stats->stakind[slot_idx] = STATISTIC_KIND_BOUNDS_HISTOGRAM;
+ stats->stavalues[slot_idx] = bound_hist_values;
+ stats->numvalues[slot_idx] = num_hist;
+ slot_idx++;
+ }
+
+ /* Store the fraction of empty ranges */
+ emptyfrac = (float4 *) palloc(sizeof(float4));
+ *emptyfrac = ((double) empty_cnt) / ((double) non_null_cnt);
+ stats->stakind[slot_idx] = STATISTIC_KIND_RANGE_EMPTY_FRAC;
+ stats->stanumbers[slot_idx] = emptyfrac;
+ stats->numnumbers[slot_idx] = 1;
+ slot_idx++;
+
+ MemoryContextSwitchTo(old_cxt);
+ }
+ else if (null_cnt > 0)
+ {
+ /* We found only nulls; assume the column is entirely null */
+ stats->stats_valid = true;
+ stats->stanullfrac = 1.0;
+ stats->stawidth = 0; /* "unknown" */
+ stats->stadistinct = 0.0; /* "unknown" */
+ }
+ /*
+ * We don't need to bother cleaning up any of our temporary palloc's. The
+ * hashtable should also go away, as it used a child memory context.
+ */
+}