moriyoshi Mon, 20 Dec 2010 03:16:09 +0000
Revision: http://svn.php.net/viewvc?view=revision&revision=306486
Log:
- Avoid allocating extra buffers. This makes parsing with zend.multibyte
enabled as fast as with it disabled.
Changed paths:
U php/php-src/trunk/Zend/zend_language_scanner.h
U php/php-src/trunk/Zend/zend_language_scanner.l
Modified: php/php-src/trunk/Zend/zend_language_scanner.h
===================================================================
--- php/php-src/trunk/Zend/zend_language_scanner.h 2010-12-20 03:11:41 UTC (rev 306485)
+++ php/php-src/trunk/Zend/zend_language_scanner.h 2010-12-20 03:16:09 UTC (rev 306486)
@@ -56,9 +56,7 @@
ZEND_API void zend_save_lexical_state(zend_lex_state *lex_state TSRMLS_DC);
ZEND_API void zend_restore_lexical_state(zend_lex_state *lex_state TSRMLS_DC);
ZEND_API int zend_prepare_string_for_scanning(zval *str, char *filename TSRMLS_DC);
-ZEND_API int zend_multibyte_read_script(unsigned char *buf, size_t n TSRMLS_DC);
ZEND_API void zend_multibyte_yyinput_again(zend_encoding_filter old_input_filter, zend_encoding *old_encoding TSRMLS_DC);
-ZEND_API int zend_multibyte_yyinput(zend_file_handle *file_handle, char *buf, size_t len TSRMLS_DC);
ZEND_API int zend_multibyte_set_filter(const zend_encoding *onetime_encoding TSRMLS_DC);
END_EXTERN_C()
Modified: php/php-src/trunk/Zend/zend_language_scanner.l
===================================================================
--- php/php-src/trunk/Zend/zend_language_scanner.l 2010-12-20 03:11:41 UTC (rev 306485)
+++ php/php-src/trunk/Zend/zend_language_scanner.l 2010-12-20 03:16:09 UTC (rev 306486)
@@ -207,10 +207,6 @@
CG(zend_lineno) = lex_state->lineno;
zend_restore_compiled_filename(lex_state->filename TSRMLS_CC);
- if (SCNG(script_org)) {
- efree(SCNG(script_org));
- SCNG(script_org) = NULL;
- }
if (SCNG(script_filtered)) {
efree(SCNG(script_filtered));
SCNG(script_filtered) = NULL;
@@ -462,31 +458,23 @@
if (size != -1) {
if (CG(multibyte)) {
- if (zend_multibyte_read_script((unsigned char *)buf, size TSRMLS_CC) != 0) {
- return FAILURE;
- }
+ SCNG(script_org) = buf;
+ SCNG(script_org_size) = n;
+ SCNG(script_filtered) = NULL;
- SCNG(yy_in) = NULL;
-
zend_multibyte_set_filter(NULL TSRMLS_CC);
- if (!SCNG(input_filter)) {
- SCNG(script_filtered) = (unsigned char*)emalloc(SCNG(script_org_size)+1);
- memcpy(SCNG(script_filtered), SCNG(script_org), SCNG(script_org_size)+1);
- SCNG(script_filtered_size) = SCNG(script_org_size);
- } else {
- SCNG(input_filter)(&SCNG(script_filtered), &SCNG(script_filtered_size), SCNG(script_org), SCNG(script_org_size) TSRMLS_CC);
- if (SCNG(script_filtered) == NULL) {
+ if (SCNG(input_filter)) {
+ if ((size_t)-1 == SCNG(input_filter)(&SCNG(script_filtered), &SCNG(script_filtered_size), SCNG(script_org), SCNG(script_org_size) TSRMLS_CC)) {
zend_error_noreturn(E_COMPILE_ERROR, "Could not convert the script from the detected "
"encoding \"%s\" to a compatible encoding", zend_multibyte_get_encoding_name(LANG_SCNG(script_encoding)));
}
+ buf = SCNG(script_filtered);
+ size = SCNG(script_filtered_size);
}
- SCNG(yy_start) = SCNG(script_filtered) - offset;
- yy_scan_buffer((char *)SCNG(script_filtered), SCNG(script_filtered_size) TSRMLS_CC);
- } else {
- SCNG(yy_start) = (unsigned char *)buf - offset;
- yy_scan_buffer(buf, size TSRMLS_CC);
}
+ SCNG(yy_start) = (unsigned char *)buf - offset;
+ yy_scan_buffer(buf, size TSRMLS_CC);
} else {
zend_error_noreturn(E_COMPILE_ERROR, "zend_stream_mmap() failed");
}
@@ -615,6 +603,9 @@
ZEND_API int zend_prepare_string_for_scanning(zval *str, char *filename TSRMLS_DC)
{
+ char *buf;
+ size_t size;
+
/* enforce two trailing NULLs for flex... */
if (IS_INTERNED(str->value.str.val)) {
char *tmp = safe_emalloc(1, str->value.str.len, ZEND_MMAP_AHEAD);
@@ -626,28 +617,31 @@
memset(str->value.str.val + str->value.str.len, 0, ZEND_MMAP_AHEAD);
- SCNG(yy_in)=NULL;
+ SCNG(yy_in) = NULL;
SCNG(yy_start) = NULL;
+ buf = str->value.str.val;
+ size = str->value.str.len;
+
if (CG(multibyte)) {
- SCNG(script_org) = (unsigned char *)estrdup(str->value.str.val);
- SCNG(script_org_size) = str->value.str.len;
+ SCNG(script_org) = buf;
+ SCNG(script_org_size) = size;
+ SCNG(script_filtered) = NULL;
zend_multibyte_set_filter(zend_multibyte_get_internal_encoding(TSRMLS_C) TSRMLS_CC);
- if (!SCNG(input_filter)) {
- SCNG(script_filtered) = (unsigned char*)emalloc(SCNG(script_org_size)+1);
- memcpy(SCNG(script_filtered), SCNG(script_org), SCNG(script_org_size)+1);
- SCNG(script_filtered_size) = SCNG(script_org_size);
- } else {
- SCNG(input_filter)(&SCNG(script_filtered), &SCNG(script_filtered_size), SCNG(script_org), SCNG(script_org_size) TSRMLS_CC);
+ if (SCNG(input_filter)) {
+ if ((size_t)-1 == SCNG(input_filter)(&SCNG(script_filtered), &SCNG(script_filtered_size), SCNG(script_org), SCNG(script_org_size) TSRMLS_CC)) {
+ zend_error_noreturn(E_COMPILE_ERROR, "Could not convert the script from the detected "
+ "encoding \"%s\" to a compatible encoding", zend_multibyte_get_encoding_name(LANG_SCNG(script_encoding)));
+ }
+ buf = SCNG(script_filtered);
+ size = SCNG(script_filtered_size);
}
-
- yy_scan_buffer((char *)SCNG(script_filtered), SCNG(script_filtered_size) TSRMLS_CC);
- } else {
- yy_scan_buffer(str->value.str.val, str->value.str.len TSRMLS_CC);
}
+ yy_scan_buffer(buf, size TSRMLS_CC);
+
zend_set_compiled_filename(filename TSRMLS_CC);
CG(zend_lineno) = 1;
CG(increment_lineno) = 0;
@@ -659,11 +653,11 @@
{
size_t offset = SCNG(yy_cursor) - SCNG(yy_start);
if (SCNG(input_filter)) {
- size_t original_offset = offset, length = 0; do {
+ size_t original_offset = offset, length = 0;
+ do {
unsigned char *p = NULL;
- SCNG(input_filter)(&p, &length, SCNG(script_org), offset TSRMLS_CC);
- if (!p) {
- break;
+ if ((size_t)-1 == SCNG(input_filter)(&p, &length, SCNG(script_org), offset TSRMLS_CC)) {
+ return (size_t)-1;
}
efree(p);
if (length > original_offset) {
@@ -714,10 +708,6 @@
BEGIN(ST_IN_SCRIPTING);
compiler_result = zendparse(TSRMLS_C);
- if (SCNG(script_org)) {
- efree(SCNG(script_org));
- SCNG(script_org) = NULL;
- }
if (SCNG(script_filtered)) {
efree(SCNG(script_filtered));
SCNG(script_filtered) = NULL;
@@ -759,10 +749,6 @@
return FAILURE;
}
zend_highlight(syntax_highlighter_ini TSRMLS_CC);
- if (SCNG(script_org)) {
- efree(SCNG(script_org));
- SCNG(script_org) = NULL;
- }
if (SCNG(script_filtered)) {
efree(SCNG(script_filtered));
SCNG(script_filtered) = NULL;
@@ -786,10 +772,6 @@
}
BEGIN(INITIAL);
zend_highlight(syntax_highlighter_ini TSRMLS_CC);
- if (SCNG(script_org)) {
- efree(SCNG(script_org));
- SCNG(script_org) = NULL;
- }
if (SCNG(script_filtered)) {
efree(SCNG(script_filtered));
SCNG(script_filtered) = NULL;
@@ -801,8 +783,8 @@
ZEND_API void zend_multibyte_yyinput_again(zend_encoding_filter old_input_filter, zend_encoding *old_encoding TSRMLS_DC)
{
- size_t original_offset, offset, free_flag, new_len, length;
- unsigned char *p;
+ size_t original_offset, offset, length;
+ unsigned char *new_yy_start;
/* calculate current position */
offset = original_offset = YYCURSOR - SCNG(yy_start);
@@ -818,87 +800,31 @@
/* convert and set */
if (!SCNG(input_filter)) {
+ if (SCNG(script_filtered)) {
+ efree(SCNG(script_filtered));
+ SCNG(script_filtered) = NULL;
+ }
+ SCNG(script_filtered_size) = 0;
length = SCNG(script_org_size) - offset;
- p = SCNG(script_org) + offset;
- free_flag = 0;
+ new_yy_start = SCNG(script_org) + offset;
} else {
- SCNG(input_filter)(&p, &length, SCNG(script_org) + offset, SCNG(script_org_size) - offset TSRMLS_CC);
- free_flag = 1;
- }
-
- new_len = original_offset + length;
-
- if (new_len > YYLIMIT - SCNG(yy_start)) {
- unsigned char *new_yy_start = erealloc(SCNG(yy_start), new_len);
- SCNG(yy_cursor) = new_yy_start + (SCNG(yy_cursor) - SCNG(yy_start));
- SCNG(yy_marker) = new_yy_start + (SCNG(yy_marker) - SCNG(yy_start));
- SCNG(yy_text) = new_yy_start + (SCNG(yy_text) - SCNG(yy_start));
- SCNG(yy_start) = new_yy_start;
+ if ((size_t)-1 == SCNG(input_filter)(&new_yy_start, &length, SCNG(script_org) + offset, SCNG(script_org_size) - offset TSRMLS_CC)) {
+ zend_error_noreturn(E_COMPILE_ERROR, "Could not convert the script from the detected "
+ "encoding \"%s\" to a compatible encoding", zend_multibyte_get_encoding_name(LANG_SCNG(script_encoding)));
+ }
SCNG(script_filtered) = new_yy_start;
- SCNG(script_filtered_size) = new_len;
+ SCNG(script_filtered_size) = length;
}
- SCNG(yy_limit) = SCNG(yy_start) + new_len;
- memmove(SCNG(yy_start) + original_offset, p, length);
+ SCNG(yy_cursor) = new_yy_start + (SCNG(yy_cursor) - SCNG(yy_start));
+ SCNG(yy_marker) = new_yy_start + (SCNG(yy_marker) - SCNG(yy_start));
+ SCNG(yy_text) = new_yy_start + (SCNG(yy_text) - SCNG(yy_start));
+ SCNG(yy_limit) = new_yy_start + (SCNG(yy_limit) - SCNG(yy_start));
- if (free_flag) {
- efree(p);
- }
+ SCNG(yy_start) = new_yy_start;
}
-ZEND_API int zend_multibyte_yyinput(zend_file_handle *file_handle, char *buf, size_t len TSRMLS_DC)
-{
- size_t n;
-
- if (CG(interactive) == 0) {
- if (zend_stream_fixup(file_handle, &buf, &len TSRMLS_CC) == FAILURE) {
- return FAILURE;
- }
- n = len;
- return n;
- }
-
- /* interactive */
- if (SCNG(script_org)) {
- efree(SCNG(script_org));
- }
- if (SCNG(script_filtered)) {
- efree(SCNG(script_filtered));
- }
- SCNG(script_org) = NULL;
- SCNG(script_org_size) = 0;
-
- /* TODO: support widechars */
- if (zend_stream_fixup(file_handle, &buf, &len TSRMLS_CC) == FAILURE) {
- return FAILURE;
- }
- n = len;
-
- SCNG(script_org_size) = n;
- SCNG(script_org) = (unsigned char*)emalloc(SCNG(script_org_size) + 1);
- memcpy(SCNG(script_org), buf, n);
-
- return n;
-}
-
-
-ZEND_API int zend_multibyte_read_script(unsigned char *buf, size_t n TSRMLS_DC)
-{
- if (SCNG(script_org)) {
- efree(SCNG(script_org));
- SCNG(script_org) = NULL;
- }
- SCNG(script_org_size) = n;
-
- SCNG(script_org) = (unsigned char*)emalloc(SCNG(script_org_size) + 1);
- memcpy(SCNG(script_org), buf, n);
- *(SCNG(script_org)+SCNG(script_org_size)) = '\0';
-
- return 0;
-}
-
-
# define zend_copy_value(zendlval, yytext, yyleng) \
if (SCNG(output_filter)) { \
size_t sz = 0; \
--
PHP CVS Mailing List (http://www.php.net/)
To unsubscribe, visit: http://www.php.net/unsub.php