moriyoshi Fri Sep 26 10:42:15 2003 EDT
Modified files:
/php-src/ext/mbstring mbstring.c
Log:
Fix some incompatibilities with the pre-libmbfl behaviour regarding encoding
detection.
Index: php-src/ext/mbstring/mbstring.c
diff -u php-src/ext/mbstring/mbstring.c:1.198 php-src/ext/mbstring/mbstring.c:1.199
--- php-src/ext/mbstring/mbstring.c:1.198 Tue Sep 23 09:23:30 2003
+++ php-src/ext/mbstring/mbstring.c Fri Sep 26 10:42:14 2003
@@ -17,7 +17,7 @@
+----------------------------------------------------------------------+
*/
-/* $Id: mbstring.c,v 1.198 2003/09/23 13:23:30 moriyoshi Exp $ */
+/* $Id: mbstring.c,v 1.199 2003/09/26 14:42:14 moriyoshi Exp $ */
/*
* PHP4 Multibyte String module "mbstring"
@@ -84,62 +84,65 @@
static void _php_mb_globals_dtor(zend_mbstring_globals *pglobals TSRMLS_DC);
/* }}} */
-/* {{{ php_mb_default_identify_list[] */
-#if defined(HAVE_MBSTR_JA)
-static const enum mbfl_no_encoding php_mb_default_identify_list[] = {
+/* {{{ php_mb_default_identify_list */
+typedef struct _php_mb_nls_ident_list {
+ enum mbfl_no_language lang;
+ enum mbfl_no_encoding* list;
+ int list_size;
+} php_mb_nls_ident_list;
+
+static const enum mbfl_no_encoding php_mb_default_identify_list_ja[] = {
mbfl_no_encoding_ascii,
mbfl_no_encoding_jis,
mbfl_no_encoding_utf8,
mbfl_no_encoding_euc_jp,
mbfl_no_encoding_sjis
};
-#endif
-#if defined(HAVE_MBSTR_CN) && !defined(HAVE_MBSTR_JA)
-static const enum mbfl_no_encoding php_mb_default_identify_list[] = {
+static const enum mbfl_no_encoding php_mb_default_identify_list_cn[] = {
mbfl_no_encoding_ascii,
mbfl_no_encoding_utf8,
mbfl_no_encoding_euc_cn,
mbfl_no_encoding_cp936
};
-#endif
-#if defined(HAVE_MBSTR_TW) && !defined(HAVE_MBSTR_CN) && !defined(HAVE_MBSTR_JA)
-static const enum mbfl_no_encoding php_mb_default_identify_list[] = {
+static const enum mbfl_no_encoding php_mb_default_identify_list_tw_hk[] = {
mbfl_no_encoding_ascii,
mbfl_no_encoding_utf8,
mbfl_no_encoding_euc_tw,
mbfl_no_encoding_big5
};
-#endif
-#if defined(HAVE_MBSTR_KR) && !defined(HAVE_MBSTR_TW) && !defined(HAVE_MBSTR_CN) &&
!defined(HAVE_MBSTR_JA)
-static const enum mbfl_no_encoding php_mb_default_identify_list[] = {
+static const enum mbfl_no_encoding php_mb_default_identify_list_kr[] = {
mbfl_no_encoding_ascii,
mbfl_no_encoding_utf8,
mbfl_no_encoding_euc_kr,
mbfl_no_encoding_uhc
};
-#endif
-#if defined(HAVE_MBSTR_RU) && !defined(HAVE_MBSTR_KR) && !defined(HAVE_MBSTR_TW) &&
!defined(HAVE_MBSTR_CN) && !defined(HAVE_MBSTR_JA)
-static const enum mbfl_no_encoding php_mb_default_identify_list[] = {
+static const enum mbfl_no_encoding php_mb_default_identify_list_ru[] = {
mbfl_no_encoding_ascii,
mbfl_no_encoding_utf8,
mbfl_no_encoding_koi8r,
mbfl_no_encoding_cp1251,
mbfl_no_encoding_cp866
};
-#endif
-#if !defined(HAVE_MBSTR_RU) && !defined(HAVE_MBSTR_KR) && !defined(HAVE_MBSTR_TW) &&
!defined(HAVE_MBSTR_CN) && !defined(HAVE_MBSTR_JA)
-static const enum mbfl_no_encoding php_mb_default_identify_list[] = {
+static const enum mbfl_no_encoding php_mb_default_identify_list_neut[] = {
mbfl_no_encoding_ascii,
mbfl_no_encoding_utf8
};
-#endif
-static const int php_mb_default_identify_list_size =
sizeof(php_mb_default_identify_list)/sizeof(enum mbfl_no_encoding);
+
+php_mb_nls_ident_list php_mb_default_identify_list[] = {
+ { mbfl_no_language_japanese, php_mb_default_identify_list_ja,
sizeof(php_mb_default_identify_list_ja) / sizeof(php_mb_default_identify_list_ja[0]) },
+ { mbfl_no_language_korean, php_mb_default_identify_list_kr,
sizeof(php_mb_default_identify_list_kr) / sizeof(php_mb_default_identify_list_kr[0]) },
+ { mbfl_no_language_traditional_chinese, php_mb_default_identify_list_tw_hk,
sizeof(php_mb_default_identify_list_tw_hk) /
sizeof(php_mb_default_identify_list_tw_hk[0]) },
+ { mbfl_no_language_simplified_chinese, php_mb_default_identify_list_cn,
sizeof(php_mb_default_identify_list_cn) / sizeof(php_mb_default_identify_list_cn[0]) },
+ { mbfl_no_language_russian, php_mb_default_identify_list_ru,
sizeof(php_mb_default_identify_list_ru) / sizeof(php_mb_default_identify_list_ru[0]) },
+ { mbfl_no_language_neutral, php_mb_default_identify_list_neut,
sizeof(php_mb_default_identify_list_neut) /
sizeof(php_mb_default_identify_list_neut[0]) }
+};
+
/* }}} */
static
@@ -285,11 +288,12 @@
* of parsed encodings.
*/
static int
-php_mb_parse_encoding_list(const char *value, int value_length, int **return_list,
int *return_size, int persistent)
+php_mb_parse_encoding_list(const char *value, int value_length, enum mbfl_no_encoding
**return_list, int *return_size, int persistent TSRMLS_DC)
{
- int n, l, size, bauto, *src, *list, *entry, ret = 1;
+ int n, l, size, bauto, ret = 1;
char *p, *p1, *p2, *endp, *tmpstr;
enum mbfl_no_encoding no_encoding;
+ enum mbfl_no_encoding *src, *entry, *list;
list = NULL;
if (value == NULL || value_length <= 0) {
@@ -301,6 +305,12 @@
}
return 0;
} else {
+ enum mbfl_no_encoding *identify_list;
+ int identify_list_size;
+
+ identify_list = MBSTRG(default_detect_order_list);
+ identify_list_size = MBSTRG(default_detect_order_list_size);
+
/* copy the value string for work */
if (value[0]=='"' && value[value_length-1]=='"' && value_length>2) {
tmpstr = (char *)estrndup(value+1, value_length-2);
@@ -319,7 +329,7 @@
p1 = p2 + 1;
n++;
}
- size = n + php_mb_default_identify_list_size;
+ size = n + identify_list_size;
/* make list */
list = (int *)pecalloc(size, sizeof(int), persistent);
if (list != NULL) {
@@ -343,23 +353,25 @@
p--;
}
/* convert to the encoding number and check encoding */
- no_encoding = mbfl_name2no_encoding(p1);
- if (no_encoding == mbfl_no_encoding_auto) {
+ if (strcasecmp(p1, "auto") == 0) {
if (!bauto) {
bauto = 1;
- l = php_mb_default_identify_list_size;
- src =
(int*)php_mb_default_identify_list;
+ l = identify_list_size;
+ src = identify_list;
while (l > 0) {
*entry++ = *src++;
l--;
n++;
}
}
- } else if (no_encoding != mbfl_no_encoding_invalid) {
- *entry++ = no_encoding;
- n++;
} else {
- ret = 0;
+ no_encoding = mbfl_name2no_encoding(p1);
+ if (no_encoding != mbfl_no_encoding_invalid) {
+ *entry++ = no_encoding;
+ n++;
+ } else {
+ ret = 0;
+ }
}
p1 = p2 + 1;
} while (n < size && p2 != NULL);
@@ -397,7 +409,7 @@
/* {{{ MBSTRING_API php_mb_check_encoding_list */
MBSTRING_API int php_mb_check_encoding_list(const char *encoding_list TSRMLS_DC) {
- return php_mb_parse_encoding_list(encoding_list, strlen(encoding_list), NULL,
NULL, 0);
+ return php_mb_parse_encoding_list(encoding_list, strlen(encoding_list), NULL,
NULL, 0 TSRMLS_CC);
}
/* }}} */
@@ -407,19 +419,26 @@
* of parsed encodings.
*/
static int
-php_mb_parse_encoding_array(zval *array, int **return_list, int *return_size, int
persistent)
+php_mb_parse_encoding_array(zval *array, enum mbfl_no_encoding **return_list, int
*return_size, int persistent TSRMLS_DC)
{
zval **hash_entry;
HashTable *target_hash;
- int i, n, l, size, bauto, *list, *entry, *src, ret = 1;
+ int i, n, l, size, bauto,ret = 1;
enum mbfl_no_encoding no_encoding;
+ enum mbfl_no_encoding *src, *list, *entry;
list = NULL;
if (Z_TYPE_P(array) == IS_ARRAY) {
+ enum mbfl_no_encoding *identify_list;
+ int identify_list_size;
+
+ identify_list = MBSTRG(default_detect_order_list);
+ identify_list_size = MBSTRG(default_detect_order_list_size);
+
target_hash = Z_ARRVAL_P(array);
zend_hash_internal_pointer_reset(target_hash);
i = zend_hash_num_elements(target_hash);
- size = i + php_mb_default_identify_list_size;
+ size = i + identify_list_size;
list = (int *)pecalloc(size, sizeof(int), persistent);
if (list != NULL) {
entry = list;
@@ -430,23 +449,25 @@
break;
}
convert_to_string_ex(hash_entry);
- no_encoding =
mbfl_name2no_encoding(Z_STRVAL_PP(hash_entry));
- if (no_encoding == mbfl_no_encoding_auto) {
+ if (strcasecmp(Z_STRVAL_PP(hash_entry), "auto") == 0) {
if (!bauto) {
bauto = 1;
- l = php_mb_default_identify_list_size;
- src =
(int*)php_mb_default_identify_list;
+ l = identify_list_size;
+ src = identify_list;
while (l > 0) {
*entry++ = *src++;
l--;
n++;
}
}
- } else if (no_encoding != mbfl_no_encoding_invalid) {
- *entry++ = no_encoding;
- n++;
} else {
- ret = 0;;
+ no_encoding =
mbfl_name2no_encoding(Z_STRVAL_PP(hash_entry));
+ if (no_encoding != mbfl_no_encoding_invalid) {
+ *entry++ = no_encoding;
+ n++;
+ } else {
+ ret = 0;
+ }
}
zend_hash_move_forward(target_hash);
i--;
@@ -482,6 +503,25 @@
}
/* }}} */
+/* {{{ php_mb_nls_get_default_detect_order_list */
+static int php_mb_nls_get_default_detect_order_list(enum mbfl_no_language lang, enum
mbfl_no_encoding **plist, int* plist_size)
+{
+ size_t i;
+
+ *plist = php_mb_default_identify_list_neut;
+ *plist_size = sizeof(php_mb_default_identify_list_neut) /
sizeof(php_mb_default_identify_list_neut[0]);
+
+ for (i = 0; i < sizeof(php_mb_default_identify_list) /
sizeof(php_mb_default_identify_list[0]); i++) {
+ if (php_mb_default_identify_list[i].lang == lang) {
+ *plist = php_mb_default_identify_list[i].list;
+ *plist_size = php_mb_default_identify_list[i].list_size;
+ return 1;
+ }
+ }
+ return 0;
+}
+/* }}} */
+
/* {{{ php.ini directive handler */
static PHP_INI_MH(OnUpdate_mbstring_language)
{
@@ -492,6 +532,7 @@
return FAILURE;
}
MBSTRG(language) = no_language;
+ php_mb_nls_get_default_detect_order_list(no_language,
&MBSTRG(default_detect_order_list), &MBSTRG(default_detect_order_list_size));
return SUCCESS;
}
/* }}} */
@@ -501,7 +542,7 @@
{
int *list, size;
- if (php_mb_parse_encoding_list(new_value, new_value_length, &list, &size, 1)) {
+ if (php_mb_parse_encoding_list(new_value, new_value_length, &list, &size, 1
TSRMLS_CC)) {
if (MBSTRG(detect_order_list) != NULL) {
free(MBSTRG(detect_order_list));
}
@@ -520,7 +561,7 @@
{
int *list, size;
- if (php_mb_parse_encoding_list(new_value, new_value_length, &list, &size, 1)) {
+ if (php_mb_parse_encoding_list(new_value, new_value_length, &list, &size, 1
TSRMLS_CC)) {
if (MBSTRG(http_input_list) != NULL) {
free(MBSTRG(http_input_list));
}
@@ -594,7 +635,7 @@
{
int *list, size;
- if (php_mb_parse_encoding_list(new_value, new_value_length, &list, &size, 1)) {
+ if (php_mb_parse_encoding_list(new_value, new_value_length, &list, &size, 1
TSRMLS_CC)) {
if (MBSTRG(script_encoding_list) != NULL) {
free(MBSTRG(script_encoding_list));
}
@@ -690,6 +731,8 @@
MBSTRG(detect_order_list_size) = 0;
MBSTRG(current_detect_order_list) = NULL;
MBSTRG(current_detect_order_list_size) = 0;
+ MBSTRG(default_detect_order_list) = php_mb_default_identify_list_neut;
+ MBSTRG(default_detect_order_list_size) =
sizeof(php_mb_default_identify_list_neut) /
sizeof(php_mb_default_identify_list_neut[0]);
MBSTRG(filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_CHAR;
MBSTRG(filter_illegal_substchar) = 0x3f; /* '?' */
MBSTRG(current_filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_CHAR;
@@ -838,8 +881,8 @@
n = MBSTRG(detect_order_list_size);
}
if (n <= 0) {
- list = (int*)php_mb_default_identify_list;
- n = php_mb_default_identify_list_size;
+ list = MBSTRG(default_detect_order_list);
+ n = MBSTRG(default_detect_order_list_size);
}
entry = (int *)safe_emalloc(n, sizeof(int), 0);
MBSTRG(current_detect_order_list) = entry;
@@ -990,6 +1033,7 @@
php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown language
\"%s\"", name);
RETURN_FALSE;
} else {
+ php_mb_nls_get_default_detect_order_list(no_language,
&MBSTRG(default_detect_order_list), &MBSTRG(default_detect_order_list_size));
MBSTRG(current_language) = no_language;
RETURN_TRUE;
}
@@ -1185,7 +1229,7 @@
size = 0;
switch (Z_TYPE_PP(arg1)) {
case IS_ARRAY:
- if (!php_mb_parse_encoding_array(*arg1, &list, &size, 0)) {
+ if (!php_mb_parse_encoding_array(*arg1, &list, &size, 0
TSRMLS_CC)) {
if (list) {
efree(list);
}
@@ -1194,7 +1238,7 @@
break;
default:
convert_to_string_ex(arg1);
- if (!php_mb_parse_encoding_list(Z_STRVAL_PP(arg1),
Z_STRLEN_PP(arg1), &list, &size, 0)) {
+ if (!php_mb_parse_encoding_list(Z_STRVAL_PP(arg1),
Z_STRLEN_PP(arg1), &list, &size, 0 TSRMLS_CC)) {
if (list) {
efree(list);
}
@@ -1929,7 +1973,7 @@
if (_from_encodings) {
list = NULL;
size = 0;
- php_mb_parse_encoding_list(_from_encodings, strlen(_from_encodings),
&list, &size, 0);
+ php_mb_parse_encoding_list(_from_encodings, strlen(_from_encodings),
&list, &size, 0 TSRMLS_CC);
if (size == 1) {
from_encoding = *list;
string.no_encoding = from_encoding;
@@ -2151,7 +2195,7 @@
if (ZEND_NUM_ARGS() >= 2 && Z_STRVAL_PP(arg_list)) {
switch (Z_TYPE_PP(arg_list)) {
case IS_ARRAY:
- if (!php_mb_parse_encoding_array(*arg_list, &list, &size, 0)) {
+ if (!php_mb_parse_encoding_array(*arg_list, &list, &size, 0
TSRMLS_CC)) {
if (list) {
efree(list);
size = 0;
@@ -2160,7 +2204,7 @@
break;
default:
convert_to_string_ex(arg_list);
- if (!php_mb_parse_encoding_list(Z_STRVAL_PP(arg_list),
Z_STRLEN_PP(arg_list), &list, &size, 0)) {
+ if (!php_mb_parse_encoding_list(Z_STRVAL_PP(arg_list),
Z_STRLEN_PP(arg_list), &list, &size, 0 TSRMLS_CC)) {
if (list) {
efree(list);
size = 0;
@@ -2432,11 +2476,11 @@
elistsz = 0;
switch (Z_TYPE_PP(args[1])) {
case IS_ARRAY:
- php_mb_parse_encoding_array(*args[1], &elist, &elistsz, 0);
+ php_mb_parse_encoding_array(*args[1], &elist, &elistsz, 0 TSRMLS_CC);
break;
default:
convert_to_string_ex(args[1]);
- php_mb_parse_encoding_list(Z_STRVAL_PP(args[1]), Z_STRLEN_PP(args[1]),
&elist, &elistsz, 0);
+ php_mb_parse_encoding_list(Z_STRVAL_PP(args[1]), Z_STRLEN_PP(args[1]),
&elist, &elistsz, 0 TSRMLS_CC);
break;
}
if (elistsz <= 0) {
@@ -3366,7 +3410,7 @@
/* make encoding list */
list = NULL;
size = 0;
- php_mb_parse_encoding_list(arg_list, strlen(arg_list), &list, &size,
0);
+ php_mb_parse_encoding_list(arg_list, strlen(arg_list), &list, &size, 0
TSRMLS_CC);
if (size > 0 && list != NULL) {
elist = list;
@@ -3374,16 +3418,16 @@
elist = MBSTRG(current_detect_order_list);
size = MBSTRG(current_detect_order_list_size);
if (size <= 0){
- elist = (int*)php_mb_default_identify_list;
- size = php_mb_default_identify_list_size;
+ elist = MBSTRG(default_detect_order_list);
+ size = MBSTRG(default_detect_order_list_size);
}
}
} else {
elist = MBSTRG(current_detect_order_list);
size = MBSTRG(current_detect_order_list_size);
if (size <= 0){
- elist = (int*)php_mb_default_identify_list;
- size = php_mb_default_identify_list_size;
+ elist = MBSTRG(default_detect_order_list);
+ size = MBSTRG(default_detect_order_list_size);
}
}
@@ -3465,7 +3509,7 @@
/* make encoding list */
list = NULL;
size = 0;
- php_mb_parse_encoding_list(arg_list, strlen(arg_list), &list, &size, 0);
+ php_mb_parse_encoding_list(arg_list, strlen(arg_list), &list, &size, 0
TSRMLS_CC);
if (size <= 0) {
return NULL;
}
--
PHP CVS Mailing List (http://www.php.net/)
To unsubscribe, visit: http://www.php.net/unsub.php