On Friday 22 May 2009 18:27:01 Konstantin Izmailov wrote: > 3. character_octet_length should always be double of > character_maximum_length (due to Unicode character size on Windows which is > 2).
I have the attached patch that would make character_octet_length the product of character_octet_length and the maximum octet length of a single character in the selected server encoding. So for UTF-8, this would be factor 4. This doesn't exactly correspond to the behavior that you expect, but I think it's more correct overall anyway.
diff --git a/doc/src/sgml/information_schema.sgml b/doc/src/sgml/information_schema.sgml index 8e145d7..6460862 100644 --- a/doc/src/sgml/information_schema.sgml +++ b/doc/src/sgml/information_schema.sgml @@ -343,10 +343,10 @@ <entry><type>cardinal_number</type></entry> <entry> If <literal>data_type</literal> identifies a character type, - the maximum possible length in octets (bytes) of a datum (this - should not be of concern to - <productname>PostgreSQL</productname> users); null for all - other data types. + the maximum possible length in octets (bytes) of a datum; null + for all other data types. The maximum octet length depends on + the declared character maximum length (see above) and the + server encoding. </entry> </row> @@ -947,9 +947,10 @@ <entry><type>cardinal_number</type></entry> <entry> If <literal>data_type</literal> identifies a character type, - the maximum possible length in octets (bytes) of a datum (this - should not be of concern to <productname>PostgreSQL</productname> users); null for all - other data types. + the maximum possible length in octets (bytes) of a datum; null + for all other data types. The maximum octet length depends on + the declared character maximum length (see above) and the + server encoding. </entry> </row> @@ -1688,9 +1689,9 @@ <entry><type>cardinal_number</type></entry> <entry> If the domain has a character type, the maximum possible length - in octets (bytes) of a datum (this should not be of concern to - <productname>PostgreSQL</productname> users); null for all - other data types. + in octets (bytes) of a datum; null for all other data types. + The maximum octet length depends on the declared character + maximum length (see above) and the server encoding. </entry> </row> diff --git a/src/backend/catalog/information_schema.sql b/src/backend/catalog/information_schema.sql index fe75322..cd6258b 100644 --- a/src/backend/catalog/information_schema.sql +++ b/src/backend/catalog/information_schema.sql @@ -102,11 +102,7 @@ CREATE FUNCTION _pg_char_octet_length(typid oid, typmod int4) RETURNS integer IMMUTABLE RETURNS NULL ON NULL INPUT AS -$$SELECT - CASE WHEN $1 IN (25, 1042, 1043) /* text, char, varchar */ - THEN CAST(2^30 AS integer) - ELSE null - END$$; +$$SELECT information_schema._pg_char_max_length($1, $2) * pg_encoding_max_length((SELECT encoding FROM pg_database WHERE datname = current_database()))$$; CREATE FUNCTION _pg_numeric_precision(typid oid, typmod int4) RETURNS integer LANGUAGE sql diff --git a/src/backend/utils/mb/mbutils.c b/src/backend/utils/mb/mbutils.c index 753c927..058493c 100644 --- a/src/backend/utils/mb/mbutils.c +++ b/src/backend/utils/mb/mbutils.c @@ -482,6 +482,17 @@ length_in_encoding(PG_FUNCTION_ARGS) } +Datum +pg_encoding_max_length_sql(PG_FUNCTION_ARGS) +{ + int encoding = PG_GETARG_INT32(0); + + if (PG_VALID_ENCODING(encoding)) + return pg_wchar_table[encoding].maxmblen; + else + PG_RETURN_NULL(); +} + /* * convert client encoding to server encoding. */ diff --git a/src/include/catalog/pg_proc.h b/src/include/catalog/pg_proc.h index 0285acd..e194d6a 100644 --- a/src/include/catalog/pg_proc.h +++ b/src/include/catalog/pg_proc.h @@ -2278,6 +2278,9 @@ DESCR("convert encoding name to encoding id"); DATA(insert OID = 1597 ( pg_encoding_to_char PGNSP PGUID 12 1 0 0 f f f t f s 1 0 19 "23" _null_ _null_ _null_ _null_ PG_encoding_to_char _null_ _null_ _null_ )); DESCR("convert encoding id to encoding name"); +DATA(insert OID = 2319 ( pg_encoding_max_length PGNSP PGUID 12 1 0 0 f f f t f i 1 0 23 "23" _null_ _null_ _null_ _null_ pg_encoding_max_length_sql _null_ _null_ _null_ )); +DESCR("maximum octet length of a character in an eocidng"); + DATA(insert OID = 1638 ( oidgt PGNSP PGUID 12 1 0 0 f f f t f i 2 0 16 "26 26" _null_ _null_ _null_ _null_ oidgt _null_ _null_ _null_ )); DESCR("greater-than"); DATA(insert OID = 1639 ( oidge PGNSP PGUID 12 1 0 0 f f f t f i 2 0 16 "26 26" _null_ _null_ _null_ _null_ oidge _null_ _null_ _null_ )); diff --git a/src/include/utils/builtins.h b/src/include/utils/builtins.h index c1b9393..13fd41a 100644 --- a/src/include/utils/builtins.h +++ b/src/include/utils/builtins.h @@ -948,6 +948,7 @@ extern Datum pg_convert(PG_FUNCTION_ARGS); extern Datum pg_convert_to(PG_FUNCTION_ARGS); extern Datum pg_convert_from(PG_FUNCTION_ARGS); extern Datum length_in_encoding(PG_FUNCTION_ARGS); +extern Datum pg_encoding_max_length_sql(PG_FUNCTION_ARGS); /* format_type.c */ extern Datum format_type(PG_FUNCTION_ARGS);
-- Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org) To make changes to your subscription: http://www.postgresql.org/mailpref/pgsql-hackers