On 04/09/14 21:16, Pavel Stehule wrote:
I did a review of last patch
Thanks
I found only one issue - float8 path has no own test in regress tests. When this issue will be fixed, I will mark this patch as ready for commit
Ah yeah I forgot about those, fixed in attached patch. -- Petr Jelinek http://www.2ndQuadrant.com/ PostgreSQL Development, 24x7 Support, Training & Services
diff --git a/doc/src/sgml/func.sgml b/doc/src/sgml/func.sgml index e50408c..93af6b4 100644 --- a/doc/src/sgml/func.sgml +++ b/doc/src/sgml/func.sgml @@ -901,25 +901,36 @@ <indexterm> <primary>width_bucket</primary> </indexterm> - <literal><function>width_bucket(<parameter>op</parameter> <type>numeric</type>, <parameter>b1</parameter> <type>numeric</type>, <parameter>b2</parameter> <type>numeric</type>, <parameter>count</parameter> <type>int</type>)</function></literal> + <literal><function>width_bucket(<parameter>operand</parameter> <type>numeric</type>, <parameter>b1</parameter> <type>numeric</type>, <parameter>b2</parameter> <type>numeric</type>, <parameter>count</parameter> <type>int</type>)</function></literal> </entry> <entry><type>int</type></entry> <entry>return the bucket to which <parameter>operand</> would be assigned in an equidepth histogram with <parameter>count</> - buckets, in the range <parameter>b1</> to <parameter>b2</></entry> + buckets spanning the range <parameter>b1</> to <parameter>b2</></entry> <entry><literal>width_bucket(5.35, 0.024, 10.06, 5)</literal></entry> <entry><literal>3</literal></entry> </row> <row> - <entry><literal><function>width_bucket(<parameter>op</parameter> <type>dp</type>, <parameter>b1</parameter> <type>dp</type>, <parameter>b2</parameter> <type>dp</type>, <parameter>count</parameter> <type>int</type>)</function></literal></entry> + <entry><literal><function>width_bucket(<parameter>operand</parameter> <type>dp</type>, <parameter>b1</parameter> <type>dp</type>, <parameter>b2</parameter> <type>dp</type>, <parameter>count</parameter> <type>int</type>)</function></literal></entry> <entry><type>int</type></entry> <entry>return the bucket to which <parameter>operand</> would be assigned in an equidepth histogram with <parameter>count</> - buckets, in the range <parameter>b1</> to <parameter>b2</></entry> + buckets spanning the range <parameter>b1</> to <parameter>b2</></entry> <entry><literal>width_bucket(5.35, 0.024, 10.06, 5)</literal></entry> <entry><literal>3</literal></entry> </row> + + <row> + <entry><literal><function>width_bucket(<parameter>operand</parameter> <type>anyelement</type>, <parameter>thresholds</parameter> <type>anyarray</type>)</function></literal></entry> + <entry><type>int</type></entry> + <entry>return the bucket to which <parameter>operand</> would + be assigned given an array listing the upper bounds of the buckets; + the <parameter>thresholds</> array <emphasis>must be sorted</>, + smallest first, or unexpected results will be obtained</entry> + <entry><literal>width_bucket(now(), array['yesterday', 'today', 'tomorrow']::timestamptz[])</literal></entry> + <entry><literal>2</literal></entry> + </row> </tbody> </tgroup> </table> diff --git a/src/backend/utils/adt/arrayfuncs.c b/src/backend/utils/adt/arrayfuncs.c index f8e94ec..57376ea 100644 --- a/src/backend/utils/adt/arrayfuncs.c +++ b/src/backend/utils/adt/arrayfuncs.c @@ -15,8 +15,10 @@ #include "postgres.h" #include <ctype.h> +#include <math.h> #include "access/htup_details.h" +#include "catalog/pg_type.h" #include "funcapi.h" #include "libpq/pqformat.h" #include "utils/array.h" @@ -130,6 +132,15 @@ static ArrayType *array_replace_internal(ArrayType *array, Datum replace, bool replace_isnull, bool remove, Oid collation, FunctionCallInfo fcinfo); +static int width_bucket_fixed(Datum operand, + ArrayType *thresholds, + Oid collation, + TypeCacheEntry *typentry); +static int width_bucket_fixed_float8(Datum operand, ArrayType *thresholds); +static int width_bucket_variable(Datum operand, + ArrayType *thresholds, + Oid collation, + TypeCacheEntry *typentry); /* @@ -5502,3 +5513,219 @@ array_replace(PG_FUNCTION_ARGS) fcinfo); PG_RETURN_ARRAYTYPE_P(array); } + +/* + * Implements width_bucket(anyelement, anyarray). + * + * 'thresholds' is an array containing upper bound values for each bucket; + * these must be sorted from smallest to largest, or bogus results will be + * produced. If N thresholds are supplied, the output is from 0 to N: + * 0 is for inputs < first threshold, N is for inputs >= last threshold. + */ +Datum +width_bucket_generic(PG_FUNCTION_ARGS) +{ + Datum operand = PG_GETARG_DATUM(0); + ArrayType *thresholds = PG_GETARG_ARRAYTYPE_P(1); + Oid element_type = ARR_ELEMTYPE(thresholds); + int result; + + /* Check input */ + if (ARR_NDIM(thresholds) > 1) + ereport(ERROR, + (errcode(ERRCODE_ARRAY_SUBSCRIPT_ERROR), + errmsg("thresholds must be one-dimensional array"))); + + if (array_contains_nulls(thresholds)) + ereport(ERROR, + (errcode(ERRCODE_NULL_VALUE_NOT_ALLOWED), + errmsg("thresholds array must not contain NULLs"))); + + /* For float8 use optimized implementation */ + if (element_type == FLOAT8OID) + result = width_bucket_fixed_float8(operand, thresholds); + else + { + Oid collation = PG_GET_COLLATION(); + TypeCacheEntry *typentry; + + /* Cache information about the input type */ + typentry = (TypeCacheEntry *) fcinfo->flinfo->fn_extra; + if (typentry == NULL || + typentry->type_id != element_type) + { + typentry = lookup_type_cache(element_type, + TYPECACHE_CMP_PROC_FINFO); + if (!OidIsValid(typentry->cmp_proc_finfo.fn_oid)) + ereport(ERROR, + (errcode(ERRCODE_UNDEFINED_FUNCTION), + errmsg("could not identify a comparison function for type %s", + format_type_be(element_type)))); + fcinfo->flinfo->fn_extra = (void *) typentry; + } + + /* We have two generic implementation paths for fixed- and variable-width types */ + if (typentry->typlen > 0) + result = width_bucket_fixed(operand, thresholds, collation, typentry); + else + result = width_bucket_variable(operand, thresholds, collation, typentry); + } + + /* Avoid leaking memory when handed toasted input. */ + PG_FREE_IF_COPY(thresholds, 1); + + PG_RETURN_INT32(result); +} + +/* + * width_bucket for fixed-width data types. + */ +static int +width_bucket_fixed(Datum operand, + ArrayType *thresholds, + Oid collation, + TypeCacheEntry *typentry) +{ + char *thresholds_data; + int typlen; + FunctionCallInfoData locfcinfo; + int left; + int right; + + /* + * Since we know the array contains no NULLs, we can just index it + * directly. + */ + thresholds_data = (char *) ARR_DATA_PTR(thresholds); + typlen = typentry->typlen; + + InitFunctionCallInfoData(locfcinfo, &typentry->cmp_proc_finfo, 2, + collation, NULL, NULL); + + /* Find the bucket */ + left = 0; + right = ArrayGetNItems(ARR_NDIM(thresholds), ARR_DIMS(thresholds)); + while (left < right) + { + int mid; + char *ptr; + int32 cmpresult; + + mid = (left + right) / 2; + + ptr = thresholds_data + mid * typlen; + + locfcinfo.arg[0] = operand; + locfcinfo.arg[1] = fetch_att(ptr, typentry->typbyval, typlen); + locfcinfo.argnull[0] = false; + locfcinfo.argnull[1] = false; + locfcinfo.isnull = false; + + cmpresult = DatumGetInt32(FunctionCallInvoke(&locfcinfo)); + + if (cmpresult < 0) + right = mid; + else + left = mid + 1; + } + + return left; +} + +/* + * Optimized width_bucket for float8 data types. + * + * See width_bucket_fixed. + */ +int +width_bucket_fixed_float8(Datum operand, ArrayType *thresholds) +{ + float8 op = DatumGetFloat8(operand); + float8 *thresholds_data; + int left; + int right; + + thresholds_data = (float8 *) ARR_DATA_PTR(thresholds); + + /* Find the bucket */ + left = 0; + right = ArrayGetNItems(ARR_NDIM(thresholds), ARR_DIMS(thresholds)); + while (left < right) { + int mid = (left + right) / 2; + + if ((isnan(thresholds_data[mid]) && !isnan(op)) || op < thresholds_data[mid]) + right = mid; + else + left = mid + 1; + } + + return left; +} + +/* + * width_bucket for variable-width data types. + */ +static int +width_bucket_variable(Datum operand, + ArrayType *thresholds, + Oid collation, + TypeCacheEntry *typentry) +{ + char *thresholds_data; + int typlen; + bool typbyval; + char typalign; + FunctionCallInfoData locfcinfo; + int left; + int right; + + thresholds_data = ARR_DATA_PTR(thresholds); + typlen = typentry->typlen; + typbyval = typentry->typbyval; + typalign = typentry->typalign; + + InitFunctionCallInfoData(locfcinfo, &typentry->cmp_proc_finfo, 2, + collation, NULL, NULL); + + /* Find the bucket */ + left = 0; + right = ArrayGetNItems(ARR_NDIM(thresholds), ARR_DIMS(thresholds)); + while (left < right) + { + int i; + int mid; + char *ptr = thresholds_data; + int32 cmpresult; + + mid = (left + right) / 2; + + for (i = left; i < mid; i++) + { + ptr = att_addlength_pointer(ptr, typlen, ptr); + ptr = (char *) att_align_nominal(ptr, typalign); + } + + locfcinfo.arg[0] = operand; + locfcinfo.arg[1] = fetch_att(ptr, typbyval, typlen); + locfcinfo.argnull[0] = false; + locfcinfo.argnull[1] = false; + locfcinfo.isnull = false; + + cmpresult = DatumGetInt32(FunctionCallInvoke(&locfcinfo)); + + if (cmpresult < 0) + right = mid; + else + { + /* + * Move the thresholds pointer so we don't have to seek from + * the beginning of array again. + * */ + thresholds_data = att_addlength_pointer(ptr, typlen, ptr); + thresholds_data = (char *) att_align_nominal(thresholds_data, typalign); + left = mid + 1; + } + } + + return left; +} diff --git a/src/include/catalog/pg_proc.h b/src/include/catalog/pg_proc.h index 5176ed0..9b55acc 100644 --- a/src/include/catalog/pg_proc.h +++ b/src/include/catalog/pg_proc.h @@ -885,6 +885,8 @@ DATA(insert OID = 2334 ( array_agg_finalfn PGNSP PGUID 12 1 0 0 0 f f f f f f DESCR("aggregate final function"); DATA(insert OID = 2335 ( array_agg PGNSP PGUID 12 1 0 0 0 t f f f f f i 1 0 2277 "2283" _null_ _null_ _null_ _null_ aggregate_dummy _null_ _null_ _null_ )); DESCR("concatenate aggregate input into an array"); +DATA(insert OID = 3218 ( width_bucket PGNSP PGUID 12 1 0 0 0 f f f f t f i 2 0 23 "2283 2277" _null_ _null_ _null_ _null_ width_bucket_generic _null_ _null_ _null_ )); +DESCR("bucket number of operand given a sorted array of bucket upper bounds"); DATA(insert OID = 3816 ( array_typanalyze PGNSP PGUID 12 1 0 0 0 f f f f t f s 1 0 16 "2281" _null_ _null_ _null_ _null_ array_typanalyze _null_ _null_ _null_ )); DESCR("array typanalyze"); DATA(insert OID = 3817 ( arraycontsel PGNSP PGUID 12 1 0 0 0 f f f f t f s 4 0 701 "2281 26 2281 23" _null_ _null_ _null_ _null_ arraycontsel _null_ _null_ _null_ )); diff --git a/src/include/utils/array.h b/src/include/utils/array.h index 9bbfaae..2abaccd 100644 --- a/src/include/utils/array.h +++ b/src/include/utils/array.h @@ -214,6 +214,7 @@ extern Datum array_fill_with_lower_bounds(PG_FUNCTION_ARGS); extern Datum array_unnest(PG_FUNCTION_ARGS); extern Datum array_remove(PG_FUNCTION_ARGS); extern Datum array_replace(PG_FUNCTION_ARGS); +extern Datum width_bucket_generic(PG_FUNCTION_ARGS); extern Datum array_ref(ArrayType *array, int nSubscripts, int *indx, int arraytyplen, int elmlen, bool elmbyval, char elmalign, diff --git a/src/test/regress/expected/arrays.out b/src/test/regress/expected/arrays.out index 4286691..f9d0e5d 100644 --- a/src/test/regress/expected/arrays.out +++ b/src/test/regress/expected/arrays.out @@ -1706,3 +1706,106 @@ select length(md5((f1[1]).c2)) from dest; drop table dest; drop type textandtext; +-- Tests for generic form of width_bucket(), for convinience +-- we also test float8 optimized version +SELECT + op, + width_bucket(op::numeric, ARRAY[1, 3, 5, 10.0]::numeric[]) AS wb_n1, + width_bucket(op::numeric, ARRAY[0, 5.5, 9.99]::numeric[]) AS wb_n2, + width_bucket(op::numeric, ARRAY[-6, -5, 2.0]::numeric[]) AS wb_n3, + width_bucket(op::float8, ARRAY[1, 3, 5, 10.0]::float8[]) AS wb_f1, + width_bucket(op::float8, ARRAY[0, 5.5, 9.99]::float8[]) AS wb_f2, + width_bucket(op::float8, ARRAY[-6, -5, 2.0]::float8[]) AS wb_f3 +FROM (VALUES + (-5.2), + (-0.0000000001), + (0.000000000001), + (1), + (1.99999999999999), + (2), + (2.00000000000001), + (3), + (4), + (4.5), + (5), + (5.5), + (6), + (7), + (8), + (9), + (9.99999999999999), + (10), + (10.0000000000001) +) v(op); + op | wb_n1 | wb_n2 | wb_n3 | wb_f1 | wb_f2 | wb_f3 +------------------+-------+-------+-------+-------+-------+------- + -5.2 | 0 | 0 | 1 | 0 | 0 | 1 + -0.0000000001 | 0 | 0 | 2 | 0 | 0 | 2 + 0.000000000001 | 0 | 1 | 2 | 0 | 1 | 2 + 1 | 1 | 1 | 2 | 1 | 1 | 2 + 1.99999999999999 | 1 | 1 | 2 | 1 | 1 | 2 + 2 | 1 | 1 | 3 | 1 | 1 | 3 + 2.00000000000001 | 1 | 1 | 3 | 1 | 1 | 3 + 3 | 2 | 1 | 3 | 2 | 1 | 3 + 4 | 2 | 1 | 3 | 2 | 1 | 3 + 4.5 | 2 | 1 | 3 | 2 | 1 | 3 + 5 | 3 | 1 | 3 | 3 | 1 | 3 + 5.5 | 3 | 2 | 3 | 3 | 2 | 3 + 6 | 3 | 2 | 3 | 3 | 2 | 3 + 7 | 3 | 2 | 3 | 3 | 2 | 3 + 8 | 3 | 2 | 3 | 3 | 2 | 3 + 9 | 3 | 2 | 3 | 3 | 2 | 3 + 9.99999999999999 | 3 | 3 | 3 | 3 | 3 | 3 + 10 | 4 | 3 | 3 | 4 | 3 | 3 + 10.0000000000001 | 4 | 3 | 3 | 4 | 3 | 3 +(19 rows) + +SELECT + op, + width_bucket(op, ARRAY[1, 3, 5, 10]) AS wb_1 +FROM generate_series(0,11) as op; + op | wb_1 +----+------ + 0 | 0 + 1 | 1 + 2 | 1 + 3 | 2 + 4 | 2 + 5 | 3 + 6 | 3 + 7 | 3 + 8 | 3 + 9 | 3 + 10 | 4 + 11 | 4 +(12 rows) + +SELECT width_bucket(now(), + array['yesterday', 'today', 'tomorrow']::timestamptz[]); + width_bucket +-------------- + 2 +(1 row) + +SELECT width_bucket(5, ARRAY[3]); + width_bucket +-------------- + 1 +(1 row) + +SELECT width_bucket(5, '{}'); + width_bucket +-------------- + 0 +(1 row) + +-- error cases +SELECT width_bucket('5'::text, ARRAY[3, 4]::integer[]); +ERROR: function width_bucket(text, integer[]) does not exist +LINE 1: SELECT width_bucket('5'::text, ARRAY[3, 4]::integer[]); + ^ +HINT: No function matches the given name and argument types. You might need to add explicit type casts. +SELECT width_bucket(5, ARRAY[3, 4, NULL]); +ERROR: thresholds array must not contain NULLs +SELECT width_bucket(5, ARRAY[ARRAY[1, 2], ARRAY[3, 4]]); +ERROR: thresholds must be one-dimensional array diff --git a/src/test/regress/sql/arrays.sql b/src/test/regress/sql/arrays.sql index d9f7cbf..b7e2f9d 100644 --- a/src/test/regress/sql/arrays.sql +++ b/src/test/regress/sql/arrays.sql @@ -476,3 +476,52 @@ drop table src; select length(md5((f1[1]).c2)) from dest; drop table dest; drop type textandtext; + +-- Tests for generic form of width_bucket(), for convinience +-- we also test float8 optimized version + +SELECT + op, + width_bucket(op::numeric, ARRAY[1, 3, 5, 10.0]::numeric[]) AS wb_n1, + width_bucket(op::numeric, ARRAY[0, 5.5, 9.99]::numeric[]) AS wb_n2, + width_bucket(op::numeric, ARRAY[-6, -5, 2.0]::numeric[]) AS wb_n3, + width_bucket(op::float8, ARRAY[1, 3, 5, 10.0]::float8[]) AS wb_f1, + width_bucket(op::float8, ARRAY[0, 5.5, 9.99]::float8[]) AS wb_f2, + width_bucket(op::float8, ARRAY[-6, -5, 2.0]::float8[]) AS wb_f3 +FROM (VALUES + (-5.2), + (-0.0000000001), + (0.000000000001), + (1), + (1.99999999999999), + (2), + (2.00000000000001), + (3), + (4), + (4.5), + (5), + (5.5), + (6), + (7), + (8), + (9), + (9.99999999999999), + (10), + (10.0000000000001) +) v(op); + +SELECT + op, + width_bucket(op, ARRAY[1, 3, 5, 10]) AS wb_1 +FROM generate_series(0,11) as op; + +SELECT width_bucket(now(), + array['yesterday', 'today', 'tomorrow']::timestamptz[]); + +SELECT width_bucket(5, ARRAY[3]); +SELECT width_bucket(5, '{}'); + +-- error cases +SELECT width_bucket('5'::text, ARRAY[3, 4]::integer[]); +SELECT width_bucket(5, ARRAY[3, 4, NULL]); +SELECT width_bucket(5, ARRAY[ARRAY[1, 2], ARRAY[3, 4]]);
-- Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org) To make changes to your subscription: http://www.postgresql.org/mailpref/pgsql-hackers