Just got a report on IRC of a bug in the array version of
percentile_cont; if two of the requested percentiles were between the
same pair of input rows, the result could be wrong or an error would
be generated.

e.g.

select percentile_cont(array[0.4,0.6]) within group (order by gs)
  from generate_series(1,2) gs;
ERROR: missing row in percentile_cont

Proposed patch (against current master) attached.

-- 
Andrew (irc:RhodiumToad)

diff --git a/src/backend/utils/adt/orderedsetaggs.c b/src/backend/utils/adt/orderedsetaggs.c
index 135a14b..1efbf07 100644
--- a/src/backend/utils/adt/orderedsetaggs.c
+++ b/src/backend/utils/adt/orderedsetaggs.c
@@ -919,7 +919,7 @@ percentile_cont_multi_final_common(FunctionCallInfo fcinfo,
 
 				rownum = target_row;
 			}
-			else
+			else if (target_row == rownum)
 			{
 				/*
 				 * We are already at the target row, so we must previously
@@ -928,15 +928,23 @@ percentile_cont_multi_final_common(FunctionCallInfo fcinfo,
 				first_val = second_val;
 			}
 
-			/* Fetch second_row if needed */
-			if (need_lerp)
+			/*
+			 * We might be lerping between the same two rows of input as the
+			 * previous value. If so, rownum will already be ahead of
+			 * target_row, and we skip any attempt to read more.
+			 */
+			if (target_row == rownum)
 			{
-				if (!tuplesort_getdatum(osastate->sortstate, true, &second_val, &isnull) || isnull)
-					elog(ERROR, "missing row in percentile_cont");
-				rownum++;
+				/* Fetch second_row if needed */
+				if (need_lerp)
+				{
+					if (!tuplesort_getdatum(osastate->sortstate, true, &second_val, &isnull) || isnull)
+						elog(ERROR, "missing row in percentile_cont");
+					rownum++;
+				}
+				else
+					second_val = first_val;
 			}
-			else
-				second_val = first_val;
 
 			/* Compute appropriate result */
 			if (need_lerp)
diff --git a/src/test/regress/expected/aggregates.out b/src/test/regress/expected/aggregates.out
index 58df854..40f5398 100644
--- a/src/test/regress/expected/aggregates.out
+++ b/src/test/regress/expected/aggregates.out
@@ -1423,11 +1423,11 @@ from tenk1;
  {{NULL,999,499},{749,249,NULL}}
 (1 row)
 
-select percentile_cont(array[0,1,0.25,0.75,0.5,1]) within group (order by x)
+select percentile_cont(array[0,1,0.25,0.75,0.5,1,0.3,0.32,0.35,0.38,0.4]) within group (order by x)
 from generate_series(1,6) x;
-    percentile_cont    
------------------------
- {1,6,2.25,4.75,3.5,6}
+             percentile_cont              
+------------------------------------------
+ {1,6,2.25,4.75,3.5,6,2.5,2.6,2.75,2.9,3}
 (1 row)
 
 select ten, mode() within group (order by string4) from tenk1 group by ten;
diff --git a/src/test/regress/sql/aggregates.sql b/src/test/regress/sql/aggregates.sql
index 8096a6f..a84327d 100644
--- a/src/test/regress/sql/aggregates.sql
+++ b/src/test/regress/sql/aggregates.sql
@@ -533,7 +533,7 @@ select percentile_cont(array[0,0.25,0.5,0.75,1]) within group (order by thousand
 from tenk1;
 select percentile_disc(array[[null,1,0.5],[0.75,0.25,null]]) within group (order by thousand)
 from tenk1;
-select percentile_cont(array[0,1,0.25,0.75,0.5,1]) within group (order by x)
+select percentile_cont(array[0,1,0.25,0.75,0.5,1,0.3,0.32,0.35,0.38,0.4]) within group (order by x)
 from generate_series(1,6) x;
 
 select ten, mode() within group (order by string4) from tenk1 group by ten;
-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

Reply via email to