Hi, Upper(), lower() or initcap() function truncates the result under Japanese Windows with e.g. the server encoding=UTF-8 and the LC_CTYPE setting Japanese_japan.932 .
Below is an example. $ psql psql (8.4devel) Type "help" for help. inoue=# \encoding sjis inoue=# show server_encoding; server_encoding ----------------- UTF8 (1 行) inoue=# show LC_CTYPE; lc_ctype -------------------- Japanese_Japan.932 (1 行) inoue=# \set jpnstr '''カタカナ''' inoue=# select char_length(:jpnstr); char_length ------------- 4 (1 行) inoue=# select upper(:jpnstr); upper -------- カタカ (1 行) inoue=# select char_length(upper(:jpnstr)); char_length ------------- 3 (1 行) The output of the last command should be 4 not 3. Attached is a patch to fix the bug. After applying the patch the result is inoue=# select upper(:jpnstr); upper ---------- カタカナ (1 行) inoue=# select char_length(upper(:jpnstr)); char_length ------------- 4 (1 行) regards, Hiroshi Inoue
Index: formatting.c =================================================================== RCS file: /projects/cvsroot/pgsql/src/backend/utils/adt/formatting.c,v retrieving revision 1.151 diff -c -c -r1.151 formatting.c *** formatting.c 1 Dec 2008 17:11:18 -0000 1.151 --- formatting.c 14 Dec 2008 09:09:00 -0000 *************** *** 1462,1467 **** --- 1462,1468 ---- { wchar_t *workspace; int curr_char = 0; + size_t max_len, alloc_size; /* Output workspace cannot have more codes than input bytes */ workspace = (wchar_t *) palloc((nbytes + 1) * sizeof(wchar_t)); *************** *** 1472,1480 **** workspace[curr_char] = towlower(workspace[curr_char]); /* Make result large enough; case change might change number of bytes */ ! result = palloc(curr_char * MB_CUR_MAX + 1); ! wchar2char(result, workspace, curr_char * MB_CUR_MAX + 1); pfree(workspace); } else --- 1473,1489 ---- workspace[curr_char] = towlower(workspace[curr_char]); /* Make result large enough; case change might change number of bytes */ ! #ifdef WIN32 ! max_len = pg_database_encoding_max_length(); ! if (MB_CUR_MAX > max_len) ! max_len = MB_CUR_MAX; ! #else ! max_len = MB_CUR_MAX; ! #endif ! alloc_size = curr_char * max_len + 1; ! result = palloc(alloc_size); ! wchar2char(result, workspace, alloc_size); pfree(workspace); } else *************** *** 1510,1515 **** --- 1519,1525 ---- { wchar_t *workspace; int curr_char = 0; + size_t max_len, alloc_size; /* Output workspace cannot have more codes than input bytes */ workspace = (wchar_t *) palloc((nbytes + 1) * sizeof(wchar_t)); *************** *** 1520,1528 **** workspace[curr_char] = towupper(workspace[curr_char]); /* Make result large enough; case change might change number of bytes */ ! result = palloc(curr_char * MB_CUR_MAX + 1); ! wchar2char(result, workspace, curr_char * MB_CUR_MAX + 1); pfree(workspace); } else --- 1530,1546 ---- workspace[curr_char] = towupper(workspace[curr_char]); /* Make result large enough; case change might change number of bytes */ ! #ifdef WIN32 ! max_len = pg_database_encoding_max_length(); ! if (MB_CUR_MAX > max_len) ! max_len = MB_CUR_MAX; ! #else ! max_len = MB_CUR_MAX; ! #endif ! alloc_size = curr_char * max_len + 1; ! result = palloc(alloc_size); ! wchar2char(result, workspace, alloc_size); pfree(workspace); } else *************** *** 1559,1564 **** --- 1577,1583 ---- { wchar_t *workspace; int curr_char = 0; + size_t max_len, alloc_size; /* Output workspace cannot have more codes than input bytes */ workspace = (wchar_t *) palloc((nbytes + 1) * sizeof(wchar_t)); *************** *** 1575,1583 **** } /* Make result large enough; case change might change number of bytes */ ! result = palloc(curr_char * MB_CUR_MAX + 1); ! wchar2char(result, workspace, curr_char * MB_CUR_MAX + 1); pfree(workspace); } else --- 1594,1610 ---- } /* Make result large enough; case change might change number of bytes */ ! #ifdef WIN32 ! max_len = pg_database_encoding_max_length(); ! if (MB_CUR_MAX > max_len) ! max_len = MB_CUR_MAX; ! #else ! max_len = MB_CUR_MAX; ! #endif ! alloc_size = curr_char * max_len + 1; ! result = palloc(alloc_size); ! wchar2char(result, workspace, alloc_size); pfree(workspace); } else
-- Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org) To make changes to your subscription: http://www.postgresql.org/mailpref/pgsql-hackers