When there is just one non-null input value, and it is infinity or NaN,
aggregates such as stddev_pop and covar_pop should produce a NaN
result, because the calculation is not well-defined. They used to do
so, but since we adopted Youngs-Cramer aggregation in commit
e954a727f,
they produced zero instead. That's an oversight, so fix it. Add tests
exercising these edge cases.
Affected aggregates are
var_pop(double precision)
stddev_pop(double precision)
var_pop(real)
stddev_pop(real)
regr_sxx(double precision,double precision)
regr_syy(double precision,double precision)
regr_sxy(double precision,double precision)
regr_r2(double precision,double precision)
regr_slope(double precision,double precision)
regr_intercept(double precision,double precision)
covar_pop(double precision,double precision)
corr(double precision,double precision)
Back-patch to v12 where the behavior change was accidentally introduced.
Report and patch by me; thanks to Dean Rasheed for review.
Discussion: https://postgr.es/m/353062.
1591898766@sss.pgh.pa.us
Sxx = get_float8_nan();
}
}
+ else
+ {
+ /*
+ * At the first input, we normally can leave Sxx as 0. However, if
+ * the first input is Inf or NaN, we'd better force Sxx to NaN;
+ * otherwise we will falsely report variance zero when there are no
+ * more inputs.
+ */
+ if (isnan(newval) || isinf(newval))
+ Sxx = get_float8_nan();
+ }
/*
* If we're invoked as an aggregate, we can cheat and modify our first
Sxx = get_float8_nan();
}
}
+ else
+ {
+ /*
+ * At the first input, we normally can leave Sxx as 0. However, if
+ * the first input is Inf or NaN, we'd better force Sxx to NaN;
+ * otherwise we will falsely report variance zero when there are no
+ * more inputs.
+ */
+ if (isnan(newval) || isinf(newval))
+ Sxx = get_float8_nan();
+ }
/*
* If we're invoked as an aggregate, we can cheat and modify our first
Sxy = get_float8_nan();
}
}
+ else
+ {
+ /*
+ * At the first input, we normally can leave Sxx et al as 0. However,
+ * if the first input is Inf or NaN, we'd better force the dependent
+ * sums to NaN; otherwise we will falsely report variance zero when
+ * there are no more inputs.
+ */
+ if (isnan(newvalX) || isinf(newvalX))
+ Sxx = Sxy = get_float8_nan();
+ if (isnan(newvalY) || isinf(newvalY))
+ Syy = Sxy = get_float8_nan();
+ }
/*
* If we're invoked as an aggregate, we can cheat and modify our first
-- population variance is defined for a single tuple, sample variance
-- is not
-SELECT var_pop(1.0), var_samp(2.0);
+SELECT var_pop(1.0::float8), var_samp(2.0::float8);
+ var_pop | var_samp
+---------+----------
+ 0 |
+(1 row)
+
+SELECT stddev_pop(3.0::float8), stddev_samp(4.0::float8);
+ stddev_pop | stddev_samp
+------------+-------------
+ 0 |
+(1 row)
+
+SELECT var_pop('inf'::float8), var_samp('inf'::float8);
+ var_pop | var_samp
+---------+----------
+ NaN |
+(1 row)
+
+SELECT stddev_pop('inf'::float8), stddev_samp('inf'::float8);
+ stddev_pop | stddev_samp
+------------+-------------
+ NaN |
+(1 row)
+
+SELECT var_pop('nan'::float8), var_samp('nan'::float8);
+ var_pop | var_samp
+---------+----------
+ NaN |
+(1 row)
+
+SELECT stddev_pop('nan'::float8), stddev_samp('nan'::float8);
+ stddev_pop | stddev_samp
+------------+-------------
+ NaN |
+(1 row)
+
+SELECT var_pop(1.0::float4), var_samp(2.0::float4);
+ var_pop | var_samp
+---------+----------
+ 0 |
+(1 row)
+
+SELECT stddev_pop(3.0::float4), stddev_samp(4.0::float4);
+ stddev_pop | stddev_samp
+------------+-------------
+ 0 |
+(1 row)
+
+SELECT var_pop('inf'::float4), var_samp('inf'::float4);
+ var_pop | var_samp
+---------+----------
+ NaN |
+(1 row)
+
+SELECT stddev_pop('inf'::float4), stddev_samp('inf'::float4);
+ stddev_pop | stddev_samp
+------------+-------------
+ NaN |
+(1 row)
+
+SELECT var_pop('nan'::float4), var_samp('nan'::float4);
+ var_pop | var_samp
+---------+----------
+ NaN |
+(1 row)
+
+SELECT stddev_pop('nan'::float4), stddev_samp('nan'::float4);
+ stddev_pop | stddev_samp
+------------+-------------
+ NaN |
+(1 row)
+
+SELECT var_pop(1.0::numeric), var_samp(2.0::numeric);
var_pop | var_samp
---------+----------
0 |
0 |
(1 row)
+SELECT var_pop('nan'::numeric), var_samp('nan'::numeric);
+ var_pop | var_samp
+---------+----------
+ NaN | NaN
+(1 row)
+
+SELECT stddev_pop('nan'::numeric), stddev_samp('nan'::numeric);
+ stddev_pop | stddev_samp
+------------+-------------
+ NaN | NaN
+(1 row)
+
-- verify correct results for null and NaN inputs
select sum(null::int4) from generate_series(1,3);
sum
0.139634516517873
(1 row)
+-- check single-tuple behavior
+SELECT covar_pop(1::float8,2::float8), covar_samp(3::float8,4::float8);
+ covar_pop | covar_samp
+-----------+------------
+ 0 |
+(1 row)
+
+SELECT covar_pop(1::float8,'inf'::float8), covar_samp(3::float8,'inf'::float8);
+ covar_pop | covar_samp
+-----------+------------
+ NaN |
+(1 row)
+
+SELECT covar_pop(1::float8,'nan'::float8), covar_samp(3::float8,'nan'::float8);
+ covar_pop | covar_samp
+-----------+------------
+ NaN |
+(1 row)
+
-- test accum and combine functions directly
CREATE TABLE regr_test (x float8, y float8);
INSERT INTO regr_test VALUES (10,150),(20,250),(30,350),(80,540),(100,200);
-- population variance is defined for a single tuple, sample variance
-- is not
-SELECT var_pop(1.0), var_samp(2.0);
+SELECT var_pop(1.0::float8), var_samp(2.0::float8);
+SELECT stddev_pop(3.0::float8), stddev_samp(4.0::float8);
+SELECT var_pop('inf'::float8), var_samp('inf'::float8);
+SELECT stddev_pop('inf'::float8), stddev_samp('inf'::float8);
+SELECT var_pop('nan'::float8), var_samp('nan'::float8);
+SELECT stddev_pop('nan'::float8), stddev_samp('nan'::float8);
+SELECT var_pop(1.0::float4), var_samp(2.0::float4);
+SELECT stddev_pop(3.0::float4), stddev_samp(4.0::float4);
+SELECT var_pop('inf'::float4), var_samp('inf'::float4);
+SELECT stddev_pop('inf'::float4), stddev_samp('inf'::float4);
+SELECT var_pop('nan'::float4), var_samp('nan'::float4);
+SELECT stddev_pop('nan'::float4), stddev_samp('nan'::float4);
+SELECT var_pop(1.0::numeric), var_samp(2.0::numeric);
SELECT stddev_pop(3.0::numeric), stddev_samp(4.0::numeric);
+SELECT var_pop('nan'::numeric), var_samp('nan'::numeric);
+SELECT stddev_pop('nan'::numeric), stddev_samp('nan'::numeric);
-- verify correct results for null and NaN inputs
select sum(null::int4) from generate_series(1,3);
SELECT covar_pop(b, a), covar_samp(b, a) FROM aggtest;
SELECT corr(b, a) FROM aggtest;
+-- check single-tuple behavior
+SELECT covar_pop(1::float8,2::float8), covar_samp(3::float8,4::float8);
+SELECT covar_pop(1::float8,'inf'::float8), covar_samp(3::float8,'inf'::float8);
+SELECT covar_pop(1::float8,'nan'::float8), covar_samp(3::float8,'nan'::float8);
+
-- test accum and combine functions directly
CREATE TABLE regr_test (x float8, y float8);
INSERT INTO regr_test VALUES (10,150),(20,250),(30,350),(80,540),(100,200);