mattwil Tue May 5 01:35:44 2009 UTC Modified files: (Branch: PHP_5_3) /php-src NEWS /ZendEngine2 zend_highlight.c zend_language_scanner.l /php-src/ext/standard/tests/strings highlight_file.phpt Log: MFH: Implemented manual scanning for strings/comments, plus misc. fixes
http://cvs.php.net/viewvc.cgi/php-src/NEWS?r1=1.2027.2.547.2.965.2.585&r2=1.2027.2.547.2.965.2.586&diff_format=u Index: php-src/NEWS diff -u php-src/NEWS:1.2027.2.547.2.965.2.585 php-src/NEWS:1.2027.2.547.2.965.2.586 --- php-src/NEWS:1.2027.2.547.2.965.2.585 Tue May 5 00:55:27 2009 +++ php-src/NEWS Tue May 5 01:35:43 2009 @@ -59,6 +59,8 @@ - Fixed bug #47038 (Memory leak in include). (Dmitry) - Fixed bug #47021 (SoapClient stumbles over WSDL delivered with "Transfer-Encoding: chunked"). (Dmitry) +- Fixed bug #46817 (tokenizer misses last single-line comment (PHP 5.3+, with + re2c lexer)). (Matt, Shire) - Fixed bug #46108 (DateTime - Memory leak when unserializing). (Felipe) - Fixed bug #44861 (scrollable cursor don't work with pgsql). (Matteo) - Fixed bug #44409 (PDO::FETCH_SERIALIZE calls __construct()). (Matteo) http://cvs.php.net/viewvc.cgi/ZendEngine2/zend_highlight.c?r1=1.49.2.3.2.2.2.6&r2=1.49.2.3.2.2.2.7&diff_format=u Index: ZendEngine2/zend_highlight.c diff -u ZendEngine2/zend_highlight.c:1.49.2.3.2.2.2.6 ZendEngine2/zend_highlight.c:1.49.2.3.2.2.2.7 --- ZendEngine2/zend_highlight.c:1.49.2.3.2.2.2.6 Wed Dec 31 11:15:32 2008 +++ ZendEngine2/zend_highlight.c Tue May 5 01:35:44 2009 @@ -17,7 +17,7 @@ +----------------------------------------------------------------------+ */ -/* $Id: zend_highlight.c,v 1.49.2.3.2.2.2.6 2008/12/31 11:15:32 sebastian Exp $ */ +/* $Id: zend_highlight.c,v 1.49.2.3.2.2.2.7 2009/05/05 01:35:44 mattwil Exp $ */ #include "zend.h" #include <zend_language_parser.h> @@ -142,14 +142,8 @@ zend_printf("<span style=\"color: %s\">", last_color); } } - switch (token_type) { - case T_END_HEREDOC: - zend_html_puts(token.value.str.val, token.value.str.len TSRMLS_CC); - break; - default: - zend_html_puts(LANG_SCNG(yy_text), LANG_SCNG(yy_leng) TSRMLS_CC); - break; - } + + zend_html_puts(LANG_SCNG(yy_text), LANG_SCNG(yy_leng) TSRMLS_CC); if (token.type == IS_STRING) { switch (token_type) { @@ -170,19 +164,6 @@ token.type = 0; } - /* handler for trailing comments, see bug #42767 */ - if (LANG_SCNG(yy_leng) && LANG_SCNG(yy_text) < LANG_SCNG(yy_limit)) { - if (last_color != syntax_highlighter_ini->highlight_comment) { - if (last_color != syntax_highlighter_ini->highlight_html) { - zend_printf("</span>"); - } - if (syntax_highlighter_ini->highlight_comment != syntax_highlighter_ini->highlight_html) { - zend_printf("<span style=\"color: %s\">", syntax_highlighter_ini->highlight_comment); - } - } - zend_html_puts(LANG_SCNG(yy_text), (LANG_SCNG(yy_limit) - LANG_SCNG(yy_text)) TSRMLS_CC); - } - if (last_color != syntax_highlighter_ini->highlight_html) { zend_printf("</span>\n"); } http://cvs.php.net/viewvc.cgi/ZendEngine2/zend_language_scanner.l?r1=1.131.2.11.2.13.2.39&r2=1.131.2.11.2.13.2.40&diff_format=u Index: ZendEngine2/zend_language_scanner.l diff -u ZendEngine2/zend_language_scanner.l:1.131.2.11.2.13.2.39 ZendEngine2/zend_language_scanner.l:1.131.2.11.2.13.2.40 --- ZendEngine2/zend_language_scanner.l:1.131.2.11.2.13.2.39 Thu Mar 26 12:37:17 2009 +++ ZendEngine2/zend_language_scanner.l Tue May 5 01:35:44 2009 @@ -21,7 +21,7 @@ +----------------------------------------------------------------------+ */ -/* $Id: zend_language_scanner.l,v 1.131.2.11.2.13.2.39 2009/03/26 12:37:17 dmitry Exp $ */ +/* $Id: zend_language_scanner.l,v 1.131.2.11.2.13.2.40 2009/05/05 01:35:44 mattwil Exp $ */ #if 0 # define YYDEBUG(s, c) printf("state: %d char: %c\n", s, c) @@ -109,6 +109,12 @@ } \ } +/* To save initial string length after scanning to first variable, CG(doc_comment_len) can be reused */ +#define SET_DOUBLE_QUOTES_SCANNED_LENGTH(len) CG(doc_comment_len) = (len) +#define GET_DOUBLE_QUOTES_SCANNED_LENGTH() CG(doc_comment_len) + +#define IS_LABEL_START(c) (((c) >= 'a' && (c) <= 'z') || ((c) >= 'A' && (c) <= 'Z') || (c) == '_' || (c) >= 0x7F) + #define ZEND_IS_OCT(c) ((c)>='0' && (c)<='7') #define ZEND_IS_HEX(c) (((c)>='0' && (c)<='9') || ((c)>='a' && (c)<='f') || ((c)>='A' && (c)<='F')) @@ -835,63 +841,8 @@ WHITESPACE [ \n\r\t]+ TABS_AND_SPACES [ \t]* TOKENS [;:,.\[\]()|^&+-/*=%!~$<>?...@] -ANY_CHAR [^\x00] +ANY_CHAR [^] NEWLINE ("\r"|"\n"|"\r\n") -NULL [\x00]{1} - -/* - * LITERAL_DOLLAR matches unescaped $ that aren't followed by a label character - * or a { and therefore will be taken literally. The case of literal $ before - * a variable or "${" is handled in a rule for each string type - */ -DOUBLE_QUOTES_LITERAL_DOLLAR ("$"+([^a-zA-Z_\x7f-\xff$"\\{]|("\\"{ANY_CHAR}))) -BACKQUOTE_LITERAL_DOLLAR ("$"+([^a-zA-Z_\x7f-\xff$`\\{]|("\\"{ANY_CHAR}))) -HEREDOC_LITERAL_DOLLAR ("$"+([^a-zA-Z_\x7f-\xff$\n\r\\{]|("\\"[^\n\r]))) - -/* - * Usually, HEREDOC_NEWLINE will just function like a simple NEWLINE, but some - * special cases need to be handled. HEREDOC_CHARS doesn't allow a line to - * match when { or $, and/or \ is at the end. (("{"*|"$"*)"\\"?) handles that, - * along with cases where { or $, and/or \ is the ONLY thing on a line - * - * The other case is when a line contains a label, followed by ONLY - * { or $, and/or \ Handled by ({LABEL}";"?((("{"+|"$"+)"\\"?)|"\\")) - */ -HEREDOC_NEWLINE ((({LABEL}";"?((("{"+|"$"+)"\\"?)|"\\"))|(("{"*|"$"*)"\\"?)){NEWLINE}) - -/* - * This pattern is just used in the next 2 for matching { or literal $, and/or - * \ escape sequence immediately at the beginning of a line or after a label - */ -HEREDOC_CURLY_OR_ESCAPE_OR_DOLLAR (("{"+[^$\n\r\\{])|("{"*"\\"[^\n\r])|{HEREDOC_LITERAL_DOLLAR}) - -/* - * These 2 label-related patterns allow HEREDOC_CHARS to continue "regular" - * matching after a newline that starts with either a non-label character or a - * label that isn't followed by a newline. Like HEREDOC_CHARS, they won't match - * a variable or "{$" Matching a newline, and possibly label, up TO a variable - * or "{$", is handled in the heredoc rules - * - * The HEREDOC_LABEL_NO_NEWLINE pattern (";"[^$\n\r\\{]) handles cases where ; - * follows a label. [^a-zA-Z0-9_\x7f-\xff;$\n\r\\{] is needed to prevent a label - * character or ; from matching on a possible (real) ending label - */ -HEREDOC_NON_LABEL ([^a-zA-Z_\x7f-\xff$\n\r\\{]|{HEREDOC_CURLY_OR_ESCAPE_OR_DOLLAR}) -HEREDOC_LABEL_NO_NEWLINE ({LABEL}([^a-zA-Z0-9_\x7f-\xff;$\n\r\\{]|(";"[^$\n\r\\{])|(";"?{HEREDOC_CURLY_OR_ESCAPE_OR_DOLLAR}))) - -/* - * CHARS matches everything up to a variable or "{$" - * {'s are matched as long as they aren't followed by a $ - * The case of { before "{$" is handled in a rule for each string type - * - * For heredocs, matching continues across/after newlines if/when it's known - * that the next line doesn't contain a possible ending label - */ -DOUBLE_QUOTES_CHARS ("{"*([^$"\\{]|("\\"{ANY_CHAR}))|{DOUBLE_QUOTES_LITERAL_DOLLAR}) -BACKQUOTE_CHARS ("{"*([^$`\\{]|("\\"{ANY_CHAR}))|{BACKQUOTE_LITERAL_DOLLAR}) -HEREDOC_CHARS ("{"*([^$\n\r\\{]|("\\"[^\n\r]))|{HEREDOC_LITERAL_DOLLAR}|({HEREDOC_NEWLINE}+({HEREDOC_NON_LABEL}|{HEREDOC_LABEL_NO_NEWLINE}))) - -NOWDOC_CHARS ([^\n\r]|{NEWLINE}+([^a-zA-Z_\x7f-\xff\n\r]|({LABEL}([^a-zA-Z0-9_\x7f-\xff;\n\r]|(";"[^\n\r]))))) /* compute yyleng before each rule */ <!*> := yyleng = YYCURSOR - SCNG(yy_text); @@ -1530,6 +1481,14 @@ } <INITIAL>"<script"{WHITESPACE}+"language"{WHITESPACE}*"="{WHITESPACE}*("php"|"\"php\""|"'php'"){WHITESPACE}*">" { + YYCTYPE *bracket = zend_memrchr(yytext, '<', yyleng - (sizeof("script language=php>") - 1)); + + if (bracket != SCNG(yy_text)) { + /* Handle previously scanned HTML, as possible <script> tags found are assumed to not be PHP's */ + YYCURSOR = bracket; + goto inline_html; + } + HANDLE_NEWLINES(yytext, yyleng); zendlval->value.str.val = yytext; /* no copying - intentional */ zendlval->value.str.len = yyleng; @@ -1601,29 +1560,48 @@ } <INITIAL>{ANY_CHAR} { + if (YYCURSOR > YYLIMIT) { + return 0; + } inline_char_handler: while (1) { YYCTYPE *ptr = memchr(YYCURSOR, '<', YYLIMIT - YYCURSOR); - if (ptr == NULL) { - YYCURSOR = YYLIMIT; - yyleng = YYCURSOR - SCNG(yy_text); - break; - - } else { - YYCURSOR = ptr + 1; + YYCURSOR = ptr ? ptr + 1 : YYLIMIT; - /* stop if it may be an opening tag (<?, <%, <script>). this condition is not optimal though */ - if (YYCURSOR < YYLIMIT && (*YYCURSOR == '?' || *YYCURSOR == '%' || *YYCURSOR == 's')) { - --YYCURSOR; - yyleng = YYCURSOR - SCNG(yy_text); - break; + if (YYCURSOR < YYLIMIT) { + switch (*YYCURSOR) { + case '?': + if (CG(short_tags) || !strncasecmp(YYCURSOR + 1, "php", 3)) { /* Assume [ \t\n\r] follows "php" */ + break; + } + continue; + case '%': + if (CG(asp_tags)) { + break; + } + continue; + case 's': + case 'S': + /* Probably NOT an opening PHP <script> tag, so don't end the HTML chunk yet + * If it is, the PHP <script> tag rule checks for any HTML scanned before it */ + YYCURSOR--; + yymore(); + default: + continue; } + + YYCURSOR--; } + + break; } +inline_html: + yyleng = YYCURSOR - SCNG(yy_text); + #ifdef ZEND_MULTIBYTE if (SCNG(output_filter)) { int readsize; @@ -1688,7 +1666,6 @@ /* Invalid rule to return a more explicit parse error with proper line number */ yyless(0); yy_pop_state(TSRMLS_C); - ZVAL_EMPTY_STRING(zendlval); /* Empty since it won't be used */ return T_ENCAPSED_AND_WHITESPACE; } @@ -1700,91 +1677,71 @@ <ST_IN_SCRIPTING>"#"|"//" { - BEGIN(ST_ONE_LINE_COMMENT); - yymore(); -} - -<ST_ONE_LINE_COMMENT>"?"|"%"|">" { - yymore(); -} + while (YYCURSOR < YYLIMIT) { + switch (*YYCURSOR++) { + case '\r': + if (*YYCURSOR == '\n') { + YYCURSOR++; + } + /* fall through */ + case '\n': + CG(zend_lineno)++; + break; + case '%': + if (!CG(asp_tags)) { + continue; + } + /* fall through */ + case '?': + if (*YYCURSOR == '>') { + YYCURSOR--; + break; + } + /* fall through */ + default: + continue; + } -<ST_ONE_LINE_COMMENT>[^\n\r?%>]*{ANY_CHAR} { - switch (yytext[yyleng-1]) { - case '?': case '%': case '>': - yyless(yyleng-1); - yymore(); - break; - case '\n': - CG(zend_lineno)++; - /* intentional fall through */ - default: - zendlval->value.str.val = yytext; /* no copying - intentional */ - zendlval->value.str.len = yyleng; - zendlval->type = IS_STRING; - BEGIN(ST_IN_SCRIPTING); - return T_COMMENT; + break; } -} -<ST_ONE_LINE_COMMENT>{NEWLINE} { - zendlval->value.str.val = yytext; /* no copying - intentional */ - zendlval->value.str.len = yyleng; - zendlval->type = IS_STRING; - BEGIN(ST_IN_SCRIPTING); - CG(zend_lineno)++; + yyleng = YYCURSOR - SCNG(yy_text); + return T_COMMENT; } -<ST_ONE_LINE_COMMENT>"?>"|"%>" { - if (CG(asp_tags) || yytext[yyleng-2] != '%') { /* asp comment? */ - zendlval->value.str.val = yytext; /* no copying - intentional */ - zendlval->value.str.len = yyleng-2; - zendlval->type = IS_STRING; - yyless(yyleng - 2); - BEGIN(ST_IN_SCRIPTING); - return T_COMMENT; +<ST_IN_SCRIPTING>"/*"|"/**"{WHITESPACE} { + int doc_com; + + if (yyleng > 2) { + doc_com = 1; + RESET_DOC_COMMENT(); } else { - yymore(); + doc_com = 0; } -} - -<ST_IN_SCRIPTING>"/**"{WHITESPACE} { - RESET_DOC_COMMENT(); - BEGIN(ST_DOC_COMMENT); - yymore(); -} - -<ST_COMMENT,ST_DOC_COMMENT>{NULL} { - zend_error(E_COMPILE_WARNING, "Unterminated comment starting line %d", CG(zend_lineno)); - return 0; -} - -<ST_IN_SCRIPTING>"/*" { - BEGIN(ST_COMMENT); - yymore(); -} + while (YYCURSOR < YYLIMIT) { + if (*YYCURSOR++ == '*' && *YYCURSOR == '/') { + break; + } + } -<ST_COMMENT,ST_DOC_COMMENT>[^*]+ { - yymore(); -} + if (YYCURSOR < YYLIMIT) { + YYCURSOR++; + } else { + zend_error(E_COMPILE_WARNING, "Unterminated comment starting line %d", CG(zend_lineno)); + } -<ST_DOC_COMMENT>"*/" { - CG(doc_comment) = estrndup(yytext, yyleng); - CG(doc_comment_len) = yyleng; + yyleng = YYCURSOR - SCNG(yy_text); HANDLE_NEWLINES(yytext, yyleng); - BEGIN(ST_IN_SCRIPTING); - return T_DOC_COMMENT; -} -<ST_COMMENT>"*/" { - HANDLE_NEWLINES(yytext, yyleng); - BEGIN(ST_IN_SCRIPTING); - return T_COMMENT; -} + if (doc_com) { + CG(doc_comment) = estrndup(yytext, yyleng); + CG(doc_comment_len) = yyleng; + return T_DOC_COMMENT; + } -<ST_COMMENT,ST_DOC_COMMENT>"*" { - yymore(); + return T_COMMENT; } <ST_IN_SCRIPTING>("?>"|"</script"{WHITESPACE}*">"){NEWLINE}? { @@ -1810,21 +1767,31 @@ } -/* ("{"*|"$"*) handles { or $ at the end of a string (or the entire contents) - */ -<ST_IN_SCRIPTING>(b?["]{DOUBLE_QUOTES_CHARS}*("{"*|"$"*)["]) { - int bprefix = (yytext[0] != '"') ? 1 : 0; - - zend_scan_escape_string(zendlval, yytext+bprefix+1, yyleng-bprefix-2, '"' TSRMLS_CC); - return T_CONSTANT_ENCAPSED_STRING; -} - - -<ST_IN_SCRIPTING>(b?[']([^'\\]|("\\"{ANY_CHAR}))*[']) { +<ST_IN_SCRIPTING>b?['] { register char *s, *t; char *end; int bprefix = (yytext[0] != '\'') ? 1 : 0; + while (1) { + if (YYCURSOR < YYLIMIT) { + if (*YYCURSOR == '\'') { + YYCURSOR++; + yyleng = YYCURSOR - SCNG(yy_text); + + break; + } else if (*YYCURSOR++ == '\\' && YYCURSOR < YYLIMIT) { + YYCURSOR++; + } + } else { + yyleng = YYLIMIT - SCNG(yy_text); + + /* Unclosed single quotes; treat similar to double quotes, but without a separate token + * for ' (unrecognized by parser), instead of old flex fallback to "Unexpected character..." + * rule, which continued in ST_IN_SCRIPTING state after the quote */ + return T_ENCAPSED_AND_WHITESPACE; + } + } + zendlval->value.str.val = estrndup(yytext+bprefix+1, yyleng-bprefix-2); zendlval->value.str.len = yyleng-bprefix-2; zendlval->type = IS_STRING; @@ -1872,6 +1839,42 @@ <ST_IN_SCRIPTING>b?["] { + int bprefix = (yytext[0] != '"') ? 1 : 0; + + while (YYCURSOR < YYLIMIT) { + switch (*YYCURSOR++) { + case '"': + yyleng = YYCURSOR - SCNG(yy_text); + zend_scan_escape_string(zendlval, yytext+bprefix+1, yyleng-bprefix-2, '"' TSRMLS_CC); + return T_CONSTANT_ENCAPSED_STRING; + case '$': + if (IS_LABEL_START(*YYCURSOR) || *YYCURSOR == '{') { + break; + } + continue; + case '{': + if (*YYCURSOR == '$') { + break; + } + continue; + case '\\': + if (YYCURSOR < YYLIMIT) { + YYCURSOR++; + } + /* fall through */ + default: + continue; + } + + YYCURSOR--; + break; + } + + /* Remember how much was scanned to save rescanning */ + SET_DOUBLE_QUOTES_SCANNED_LENGTH(YYCURSOR - SCNG(yy_text) - yyleng); + + YYCURSOR = SCNG(yy_text) + yyleng; + BEGIN(ST_DOUBLE_QUOTES); return '"'; } @@ -1911,7 +1914,7 @@ /* Check for ending label on the next line */ if (CG(heredoc_len) < YYLIMIT - YYCURSOR && !memcmp(YYCURSOR, s, CG(heredoc_len))) { - unsigned char *end = YYCURSOR + CG(heredoc_len); + YYCTYPE *end = YYCURSOR + CG(heredoc_len); if (*end == ';') { end++; @@ -1932,49 +1935,6 @@ } -/* Match everything up to and including a possible ending label, so if the label - * doesn't match, it's kept with the rest of the string - * - * {HEREDOC_NEWLINE}+ handles the case of more than one newline sequence that - * couldn't be matched with HEREDOC_CHARS, because of the following label - */ -<ST_HEREDOC>{HEREDOC_CHARS}*{HEREDOC_NEWLINE}+{LABEL}";"?[\n\r] { - char *end = yytext + yyleng - 1; - - if (end[-1] == ';') { - end--; - yyleng--; - } - - if (yyleng > CG(heredoc_len) && !memcmp(end - CG(heredoc_len), CG(heredoc), CG(heredoc_len))) { - int len = yyleng - CG(heredoc_len) - 2; /* 2 for newline before and after label */ - - /* May have matched fooLABEL; make sure there's a newline before it */ - if (yytext[len] != '\n') { - if (yytext[len] != '\r') { - yyless(yyleng - 1); - yymore(); - } - } else if (len > 0 && yytext[len - 1] == '\r') { - len--; /* Windows newline */ - } - - /* Go back before label, to match in ST_END_HEREDOC state. yytext will include - * newline before label, for zend_highlight/strip, tokenizer, etc. */ - yyless(yyleng - CG(heredoc_len) - 1); /* 1 for newline after label */ - - CG(increment_lineno) = 1; /* For newline before label */ - BEGIN(ST_END_HEREDOC); - zend_scan_escape_string(zendlval, yytext, len, 0 TSRMLS_CC); - return T_ENCAPSED_AND_WHITESPACE; - } else { - /* Go back to end of label, so the next match works correctly in case of - * a variable or another label at the beginning of the next line */ - yyless(yyleng - 1); - yymore(); - } -} - <ST_END_HEREDOC>{ANY_CHAR} { YYCURSOR += CG(heredoc_len) - 1; yyleng = CG(heredoc_len); @@ -1988,118 +1948,250 @@ } -/* Will only match when $ follows: "{$" */ -<ST_DOUBLE_QUOTES,ST_BACKQUOTE,ST_HEREDOC>"{" { +<ST_DOUBLE_QUOTES,ST_BACKQUOTE,ST_HEREDOC>"{$" { zendlval->value.lval = (long) '{'; yy_push_state(ST_IN_SCRIPTING TSRMLS_CC); + yyless(1); return T_CURLY_OPEN; } -<ST_DOUBLE_QUOTES>{DOUBLE_QUOTES_CHARS}+ { - zend_scan_escape_string(zendlval, yytext, yyleng, '"' TSRMLS_CC); - return T_ENCAPSED_AND_WHITESPACE; +<ST_DOUBLE_QUOTES>["] { + BEGIN(ST_IN_SCRIPTING); + return '"'; } -/* "{"{2,}|"$"{2,} handles { before "{$" or literal $ before a variable or "${" - * (("{"+|"$"+)["]) handles { or $ at the end of a string - * - * Same for backquotes and heredocs, except the second case doesn't apply to - * heredocs. yyless(yyleng - 1) is used to correct taking one character too many - */ -<ST_DOUBLE_QUOTES>{DOUBLE_QUOTES_CHARS}*("{"{2,}|"$"{2,}|(("{"+|"$"+)["])) { - yyless(yyleng - 1); - zend_scan_escape_string(zendlval, yytext, yyleng, '"' TSRMLS_CC); - return T_ENCAPSED_AND_WHITESPACE; +<ST_BACKQUOTE>[`] { + BEGIN(ST_IN_SCRIPTING); + return '`'; } -<ST_BACKQUOTE>{BACKQUOTE_CHARS}+ { - zend_scan_escape_string(zendlval, yytext, yyleng, '`' TSRMLS_CC); +<ST_DOUBLE_QUOTES>{ANY_CHAR} { + if (GET_DOUBLE_QUOTES_SCANNED_LENGTH()) { + YYCURSOR += GET_DOUBLE_QUOTES_SCANNED_LENGTH() - 1; + SET_DOUBLE_QUOTES_SCANNED_LENGTH(0); + + goto double_quotes_scan_done; + } + + if (YYCURSOR > YYLIMIT) { + return 0; + } + if (yytext[0] == '\\' && YYCURSOR < YYLIMIT) { + YYCURSOR++; + } + + while (YYCURSOR < YYLIMIT) { + switch (*YYCURSOR++) { + case '"': + break; + case '$': + if (IS_LABEL_START(*YYCURSOR) || *YYCURSOR == '{') { + break; + } + continue; + case '{': + if (*YYCURSOR == '$') { + break; + } + continue; + case '\\': + if (YYCURSOR < YYLIMIT) { + YYCURSOR++; + } + /* fall through */ + default: + continue; + } + + YYCURSOR--; + break; + } + +double_quotes_scan_done: + yyleng = YYCURSOR - SCNG(yy_text); + + zend_scan_escape_string(zendlval, yytext, yyleng, '"' TSRMLS_CC); return T_ENCAPSED_AND_WHITESPACE; } -<ST_BACKQUOTE>{BACKQUOTE_CHARS}*("{"{2,}|"$"{2,}|(("{"+|"$"+)[`])) { - yyless(yyleng - 1); + +<ST_BACKQUOTE>{ANY_CHAR} { + if (YYCURSOR > YYLIMIT) { + return 0; + } + if (yytext[0] == '\\' && YYCURSOR < YYLIMIT) { + YYCURSOR++; + } + + while (YYCURSOR < YYLIMIT) { + switch (*YYCURSOR++) { + case '`': + break; + case '$': + if (IS_LABEL_START(*YYCURSOR) || *YYCURSOR == '{') { + break; + } + continue; + case '{': + if (*YYCURSOR == '$') { + break; + } + continue; + case '\\': + if (YYCURSOR < YYLIMIT) { + YYCURSOR++; + } + /* fall through */ + default: + continue; + } + + YYCURSOR--; + break; + } + + yyleng = YYCURSOR - SCNG(yy_text); + zend_scan_escape_string(zendlval, yytext, yyleng, '`' TSRMLS_CC); return T_ENCAPSED_AND_WHITESPACE; } -/* ({HEREDOC_NEWLINE}+({LABEL}";"?)?)? handles the possible case of newline - * sequences, possibly followed by a label, that couldn't be matched with - * HEREDOC_CHARS because of a following variable or "{$" - * - * This doesn't affect real ending labels, as they are followed by a newline, - * which will result in a longer match for the correct rule if present - */ -<ST_HEREDOC>{HEREDOC_CHARS}*({HEREDOC_NEWLINE}+({LABEL}";"?)?)? { - zend_scan_escape_string(zendlval, yytext, yyleng, 0 TSRMLS_CC); - return T_ENCAPSED_AND_WHITESPACE; -} +<ST_HEREDOC>{ANY_CHAR} { + int newline = 0; -<ST_HEREDOC>{HEREDOC_CHARS}*({HEREDOC_NEWLINE}+({LABEL}";"?)?)?("{"{2,}|"$"{2,}) { - yyless(yyleng - 1); - zend_scan_escape_string(zendlval, yytext, yyleng, 0 TSRMLS_CC); - return T_ENCAPSED_AND_WHITESPACE; -} + if (YYCURSOR > YYLIMIT) { + return 0; + } + YYCURSOR--; -<ST_NOWDOC>({NOWDOC_CHARS}+{NEWLINE}+|{NEWLINE}+){LABEL}";"?[\n\r] { - char *end = yytext + yyleng - 1; + while (YYCURSOR < YYLIMIT) { + switch (*YYCURSOR++) { + case '\r': + if (*YYCURSOR == '\n') { + YYCURSOR++; + } + /* fall through */ + case '\n': + /* Check for ending label on the next line */ + if (IS_LABEL_START(*YYCURSOR) && CG(heredoc_len) < YYLIMIT - YYCURSOR && !memcmp(YYCURSOR, CG(heredoc), CG(heredoc_len))) { + YYCTYPE *end = YYCURSOR + CG(heredoc_len); - if (end[-1] == ';') { - end--; - yyleng--; - } - - if (yyleng > CG(heredoc_len) && !memcmp(end - CG(heredoc_len), CG(heredoc), CG(heredoc_len))) { - int len = yyleng - CG(heredoc_len) - 2; /* 2 for newline before and after label */ - - /* May have matched fooLABEL; make sure there's a newline before it */ - if (yytext[len] != '\n') { - if (yytext[len] != '\r') { - yyless(yyleng - 1); - yymore(); - } - } else if (len > 0 && yytext[len - 1] == '\r') { - len--; /* Windows newline */ - } + if (*end == ';') { + end++; + } - /* Go back before label, to match in ST_END_HEREDOC state. yytext will include - * newline before label, for zend_highlight/strip, tokenizer, etc. */ - yyless(yyleng - CG(heredoc_len) - 1); /* 1 for newline after label */ + if (*end == '\n' || *end == '\r') { + /* newline before label will be subtracted from returned text, but + * yyleng/yytext will include it, for zend_highlight/strip, tokenizer, etc. */ + if (YYCURSOR[-2] == '\r' && YYCURSOR[-1] == '\n') { + newline = 2; /* Windows newline */ + } else { + newline = 1; + } - CG(increment_lineno) = 1; /* For newline before label */ - BEGIN(ST_END_HEREDOC); + CG(increment_lineno) = 1; /* For newline before label */ + BEGIN(ST_END_HEREDOC); - zend_copy_value(zendlval, yytext, len); - zendlval->type = IS_STRING; - HANDLE_NEWLINES(yytext, len); - return T_ENCAPSED_AND_WHITESPACE; - } else { - /* Go back to end of label, so the next match works correctly in case of - * another label at the beginning of the next line */ - yyless(yyleng - 1); - yymore(); + goto heredoc_scan_done; + } + } + continue; + case '$': + if (IS_LABEL_START(*YYCURSOR) || *YYCURSOR == '{') { + break; + } + continue; + case '{': + if (*YYCURSOR == '$') { + break; + } + continue; + case '\\': + if (YYCURSOR < YYLIMIT && *YYCURSOR != '\n' && *YYCURSOR != '\r') { + YYCURSOR++; + } + /* fall through */ + default: + continue; + } + + YYCURSOR--; + break; } -} +heredoc_scan_done: + yyleng = YYCURSOR - SCNG(yy_text); -<ST_DOUBLE_QUOTES>["] { - BEGIN(ST_IN_SCRIPTING); - return '"'; + zend_scan_escape_string(zendlval, yytext, yyleng - newline, 0 TSRMLS_CC); + return T_ENCAPSED_AND_WHITESPACE; } -<ST_BACKQUOTE>[`] { - BEGIN(ST_IN_SCRIPTING); - return '`'; +<ST_NOWDOC>{ANY_CHAR} { + int newline = 0; + + if (YYCURSOR > YYLIMIT) { + return 0; + } + + YYCURSOR--; + + while (YYCURSOR < YYLIMIT) { + switch (*YYCURSOR++) { + case '\r': + if (*YYCURSOR == '\n') { + YYCURSOR++; + } + /* fall through */ + case '\n': + /* Check for ending label on the next line */ + if (IS_LABEL_START(*YYCURSOR) && CG(heredoc_len) < YYLIMIT - YYCURSOR && !memcmp(YYCURSOR, CG(heredoc), CG(heredoc_len))) { + YYCTYPE *end = YYCURSOR + CG(heredoc_len); + + if (*end == ';') { + end++; + } + + if (*end == '\n' || *end == '\r') { + /* newline before label will be subtracted from returned text, but + * yyleng/yytext will include it, for zend_highlight/strip, tokenizer, etc. */ + if (YYCURSOR[-2] == '\r' && YYCURSOR[-1] == '\n') { + newline = 2; /* Windows newline */ + } else { + newline = 1; + } + + CG(increment_lineno) = 1; /* For newline before label */ + BEGIN(ST_END_HEREDOC); + + goto nowdoc_scan_done; + } + } + /* fall through */ + default: + continue; + } + } + +nowdoc_scan_done: + yyleng = YYCURSOR - SCNG(yy_text); + + zend_copy_value(zendlval, yytext, yyleng - newline); + zendlval->type = IS_STRING; + HANDLE_NEWLINES(yytext, yyleng - newline); + return T_ENCAPSED_AND_WHITESPACE; } -<*>{NULL} { return 0; } /* EOF */ <ST_IN_SCRIPTING,ST_VAR_OFFSET>{ANY_CHAR} { + if (YYCURSOR > YYLIMIT) { + return 0; + } + zend_error(E_COMPILE_WARNING,"Unexpected character in input: '%c' (ASCII=%d) state=%d", yytext[0], yytext[0], YYSTATE); goto restart; } http://cvs.php.net/viewvc.cgi/php-src/ext/standard/tests/strings/highlight_file.phpt?r1=1.1.2.2.2.4&r2=1.1.2.2.2.5&diff_format=u Index: php-src/ext/standard/tests/strings/highlight_file.phpt diff -u php-src/ext/standard/tests/strings/highlight_file.phpt:1.1.2.2.2.4 php-src/ext/standard/tests/strings/highlight_file.phpt:1.1.2.2.2.5 --- php-src/ext/standard/tests/strings/highlight_file.phpt:1.1.2.2.2.4 Mon Mar 16 01:40:14 2009 +++ php-src/ext/standard/tests/strings/highlight_file.phpt Tue May 5 01:35:44 2009 @@ -50,7 +50,7 @@ </span> </code>bool(true) <code><span style="color: #000000"> -<span style="color: #0000BB"><?php </span><span style="color: #007700">echo </span><span style="color: #FF9900">"test ?></span> +<span style="color: #0000BB"><?php </span><span style="color: #007700">echo </span><span style="color: #DD0000">"test ?></span> </span> </code>bool(true) <code><span style="color: #000000">
-- PHP CVS Mailing List (http://www.php.net/) To unsubscribe, visit: http://www.php.net/unsub.php