On Friday 22 May 2009 18:27:01 Konstantin Izmailov wrote:
> 3. character_octet_length should always be double of
> character_maximum_length (due to Unicode character size on Windows which is
> 2).

I have the attached patch that would make character_octet_length the product 
of character_octet_length and the maximum octet length of a single character 
in the selected server encoding.  So for UTF-8, this would be factor 4.  This 
doesn't exactly correspond to the behavior that you expect, but I think it's 
more correct overall anyway.

diff --git a/doc/src/sgml/information_schema.sgml b/doc/src/sgml/information_schema.sgml
index 8e145d7..6460862 100644
--- a/doc/src/sgml/information_schema.sgml
+++ b/doc/src/sgml/information_schema.sgml
@@ -343,10 +343,10 @@
       <entry><type>cardinal_number</type></entry>
       <entry>
        If <literal>data_type</literal> identifies a character type,
-       the maximum possible length in octets (bytes) of a datum (this
-       should not be of concern to
-       <productname>PostgreSQL</productname> users); null for all
-       other data types.
+       the maximum possible length in octets (bytes) of a datum; null
+       for all other data types.  The maximum octet length depends on
+       the declared character maximum length (see above) and the
+       server encoding.
       </entry>
      </row>
 
@@ -947,9 +947,10 @@
       <entry><type>cardinal_number</type></entry>
       <entry>
        If <literal>data_type</literal> identifies a character type,
-       the maximum possible length in octets (bytes) of a datum (this
-       should not be of concern to <productname>PostgreSQL</productname> users); null for all
-       other data types.
+       the maximum possible length in octets (bytes) of a datum; null
+       for all other data types.  The maximum octet length depends on
+       the declared character maximum length (see above) and the
+       server encoding.
       </entry>
      </row>
 
@@ -1688,9 +1689,9 @@
       <entry><type>cardinal_number</type></entry>
       <entry>
        If the domain has a character type, the maximum possible length
-       in octets (bytes) of a datum (this should not be of concern to
-       <productname>PostgreSQL</productname> users); null for all
-       other data types.
+       in octets (bytes) of a datum; null for all other data types.
+       The maximum octet length depends on the declared character
+       maximum length (see above) and the server encoding.
       </entry>
      </row>
 
diff --git a/src/backend/catalog/information_schema.sql b/src/backend/catalog/information_schema.sql
index fe75322..cd6258b 100644
--- a/src/backend/catalog/information_schema.sql
+++ b/src/backend/catalog/information_schema.sql
@@ -102,11 +102,7 @@ CREATE FUNCTION _pg_char_octet_length(typid oid, typmod int4) RETURNS integer
     IMMUTABLE
     RETURNS NULL ON NULL INPUT
     AS
-$$SELECT
-  CASE WHEN $1 IN (25, 1042, 1043) /* text, char, varchar */
-       THEN CAST(2^30 AS integer)
-       ELSE null
-  END$$;
+$$SELECT information_schema._pg_char_max_length($1, $2) * pg_encoding_max_length((SELECT encoding FROM pg_database WHERE datname = current_database()))$$;
 
 CREATE FUNCTION _pg_numeric_precision(typid oid, typmod int4) RETURNS integer
     LANGUAGE sql
diff --git a/src/backend/utils/mb/mbutils.c b/src/backend/utils/mb/mbutils.c
index 753c927..058493c 100644
--- a/src/backend/utils/mb/mbutils.c
+++ b/src/backend/utils/mb/mbutils.c
@@ -482,6 +482,17 @@ length_in_encoding(PG_FUNCTION_ARGS)
 
 }
 
+Datum
+pg_encoding_max_length_sql(PG_FUNCTION_ARGS)
+{
+	int encoding = PG_GETARG_INT32(0);
+
+	if (PG_VALID_ENCODING(encoding))
+		return pg_wchar_table[encoding].maxmblen;
+	else
+		PG_RETURN_NULL();
+}
+
 /*
  * convert client encoding to server encoding.
  */
diff --git a/src/include/catalog/pg_proc.h b/src/include/catalog/pg_proc.h
index 0285acd..e194d6a 100644
--- a/src/include/catalog/pg_proc.h
+++ b/src/include/catalog/pg_proc.h
@@ -2278,6 +2278,9 @@ DESCR("convert encoding name to encoding id");
 DATA(insert OID = 1597 (  pg_encoding_to_char	   PGNSP PGUID 12 1 0 0 f f f t f s 1 0 19 "23" _null_ _null_ _null_ _null_ PG_encoding_to_char _null_ _null_ _null_ ));
 DESCR("convert encoding id to encoding name");
 
+DATA(insert OID = 2319 (  pg_encoding_max_length   PGNSP PGUID 12 1 0 0 f f f t f i 1 0 23 "23" _null_ _null_ _null_ _null_ pg_encoding_max_length_sql _null_ _null_ _null_ ));
+DESCR("maximum octet length of a character in an eocidng");
+
 DATA(insert OID = 1638 (  oidgt				   PGNSP PGUID 12 1 0 0 f f f t f i 2 0 16 "26 26" _null_ _null_ _null_ _null_ oidgt _null_ _null_ _null_ ));
 DESCR("greater-than");
 DATA(insert OID = 1639 (  oidge				   PGNSP PGUID 12 1 0 0 f f f t f i 2 0 16 "26 26" _null_ _null_ _null_ _null_ oidge _null_ _null_ _null_ ));
diff --git a/src/include/utils/builtins.h b/src/include/utils/builtins.h
index c1b9393..13fd41a 100644
--- a/src/include/utils/builtins.h
+++ b/src/include/utils/builtins.h
@@ -948,6 +948,7 @@ extern Datum pg_convert(PG_FUNCTION_ARGS);
 extern Datum pg_convert_to(PG_FUNCTION_ARGS);
 extern Datum pg_convert_from(PG_FUNCTION_ARGS);
 extern Datum length_in_encoding(PG_FUNCTION_ARGS);
+extern Datum pg_encoding_max_length_sql(PG_FUNCTION_ARGS);
 
 /* format_type.c */
 extern Datum format_type(PG_FUNCTION_ARGS);
-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

Reply via email to