dmitry Fri May 18 13:12:47 2007 UTC
Modified files:
/ZendEngine2 zend_compile.c zend_compile.h zend_language_parser.y
zend_language_scanner.l zend_vm_def.h
zend_vm_execute.h
/php-src/ext/tokenizer tokenizer.c
/php-src/ext/tokenizer/tests 001.phpt bug26463.phpt
Log:
Improved compilation of heredocs and interpolated strings. (Matt)
http://cvs.php.net/viewvc.cgi/ZendEngine2/zend_compile.c?r1=1.748&r2=1.749&diff_format=u
Index: ZendEngine2/zend_compile.c
diff -u ZendEngine2/zend_compile.c:1.748 ZendEngine2/zend_compile.c:1.749
--- ZendEngine2/zend_compile.c:1.748 Fri May 11 09:38:52 2007
+++ ZendEngine2/zend_compile.c Fri May 18 13:12:47 2007
@@ -17,7 +17,7 @@
+----------------------------------------------------------------------+
*/
-/* $Id: zend_compile.c,v 1.748 2007/05/11 09:38:52 tony2001 Exp $ */
+/* $Id: zend_compile.c,v 1.749 2007/05/18 13:12:47 dmitry Exp $ */
#include <zend_language_parser.h>
#include "zend.h"
@@ -989,25 +989,25 @@
}
-void zend_do_add_char(znode *result, znode *op1, znode *op2 TSRMLS_DC)
-{
- zend_op *opline = get_next_op(CG(active_op_array) TSRMLS_CC);
-
- opline->opcode = ZEND_ADD_CHAR;
- opline->op1 = *op1;
- opline->op2 = *op2;
- opline->op2.op_type = IS_CONST;
- opline->extended_value = CG(literal_type);
- opline->result = opline->op1;
- *result = opline->result;
-}
-
-
void zend_do_add_string(znode *result, znode *op1, znode *op2 TSRMLS_DC)
{
- zend_op *opline = get_next_op(CG(active_op_array) TSRMLS_CC);
+ zend_op *opline;
- opline->opcode = ZEND_ADD_STRING;
+ if (Z_UNILEN(op2->u.constant) > 1) {
+ opline = get_next_op(CG(active_op_array) TSRMLS_CC);
+ opline->opcode = ZEND_ADD_STRING;
+ } else if (Z_UNILEN(op2->u.constant) == 1) {
+ int ch = (Z_TYPE(op2->u.constant) == IS_UNICODE) ?
*Z_USTRVAL(op2->u.constant) : *Z_STRVAL(op2->u.constant);
+
+ /* Free memory and use ZEND_ADD_CHAR in case of 1 character
strings */
+ efree(Z_UNIVAL(op2->u.constant).v);
+ ZVAL_LONG(&op2->u.constant, ch);
+ opline = get_next_op(CG(active_op_array) TSRMLS_CC);
+ opline->opcode = ZEND_ADD_CHAR;
+ } else { /* String can be empty after a variable at the end of a
heredoc */
+ efree(Z_UNIVAL(op2->u.constant).v);
+ return;
+ }
opline->op1 = *op1;
opline->op2 = *op2;
opline->op2.op_type = IS_CONST;
@@ -4154,33 +4154,6 @@
}
-void zend_do_end_heredoc(TSRMLS_D)
-{
- int opline_num = get_next_op_number(CG(active_op_array))-1;
- zend_op *opline = &CG(active_op_array)->opcodes[opline_num];
-
- if (opline->opcode != ZEND_ADD_STRING) {
- return;
- }
-
- if (Z_TYPE(opline->op2.u.constant) == IS_UNICODE) {
-
Z_USTRVAL(opline->op2.u.constant)[(Z_USTRLEN(opline->op2.u.constant)--)-1] = 0;
- if (Z_USTRLEN(opline->op2.u.constant)>0) {
- if
(Z_USTRVAL(opline->op2.u.constant)[Z_USTRLEN(opline->op2.u.constant)-1]=='\r') {
-
Z_USTRVAL(opline->op2.u.constant)[(Z_USTRLEN(opline->op2.u.constant)--)-1] = 0;
- }
- }
- } else {
-
Z_STRVAL(opline->op2.u.constant)[(Z_STRLEN(opline->op2.u.constant)--)-1] = 0;
- if (Z_STRLEN(opline->op2.u.constant)>0) {
- if
(Z_STRVAL(opline->op2.u.constant)[Z_STRLEN(opline->op2.u.constant)-1]=='\r') {
-
Z_STRVAL(opline->op2.u.constant)[(Z_STRLEN(opline->op2.u.constant)--)-1] = 0;
- }
- }
- }
-}
-
-
void zend_do_exit(znode *result, znode *message TSRMLS_DC)
{
zend_op *opline = get_next_op(CG(active_op_array) TSRMLS_CC);
@@ -4425,12 +4398,12 @@
{
int retval;
-again:
if (CG(increment_lineno)) {
CG(zend_lineno)++;
CG(increment_lineno) = 0;
}
+again:
Z_TYPE(zendlval->u.constant) = IS_LONG;
retval = lex_scan(&zendlval->u.constant TSRMLS_CC);
switch (retval) {
@@ -4441,8 +4414,7 @@
goto again;
case T_CLOSE_TAG:
- if (LANG_SCNG(yy_text)[LANG_SCNG(yy_leng)-1]=='\n'
- ||
(LANG_SCNG(yy_text)[LANG_SCNG(yy_leng)-2]=='\r' &&
LANG_SCNG(yy_text)[LANG_SCNG(yy_leng)-1])) {
+ if (LANG_SCNG(yy_text)[LANG_SCNG(yy_leng)-1] != '>') {
CG(increment_lineno) = 1;
}
retval = ';'; /* implicit ; */
http://cvs.php.net/viewvc.cgi/ZendEngine2/zend_compile.h?r1=1.356&r2=1.357&diff_format=u
Index: ZendEngine2/zend_compile.h
diff -u ZendEngine2/zend_compile.h:1.356 ZendEngine2/zend_compile.h:1.357
--- ZendEngine2/zend_compile.h:1.356 Thu Mar 8 17:30:28 2007
+++ ZendEngine2/zend_compile.h Fri May 18 13:12:47 2007
@@ -17,7 +17,7 @@
+----------------------------------------------------------------------+
*/
-/* $Id: zend_compile.h,v 1.356 2007/03/08 17:30:28 helly Exp $ */
+/* $Id: zend_compile.h,v 1.357 2007/05/18 13:12:47 dmitry Exp $ */
#ifndef ZEND_COMPILE_H
#define ZEND_COMPILE_H
@@ -404,7 +404,6 @@
void zend_do_free(znode *op1 TSRMLS_DC);
void zend_do_init_string(znode *result TSRMLS_DC);
-void zend_do_add_char(znode *result, znode *op1, znode *op2 TSRMLS_DC);
void zend_do_add_string(znode *result, znode *op1, znode *op2 TSRMLS_DC);
void zend_do_add_variable(znode *result, znode *op1, znode *op2 TSRMLS_DC);
@@ -499,8 +498,6 @@
void zend_do_declare_stmt(znode *var, znode *val TSRMLS_DC);
void zend_do_declare_end(znode *declare_token TSRMLS_DC);
-void zend_do_end_heredoc(TSRMLS_D);
-
void zend_do_exit(znode *result, znode *message TSRMLS_DC);
void zend_do_begin_silence(znode *strudel_token TSRMLS_DC);
http://cvs.php.net/viewvc.cgi/ZendEngine2/zend_language_parser.y?r1=1.181&r2=1.182&diff_format=u
Index: ZendEngine2/zend_language_parser.y
diff -u ZendEngine2/zend_language_parser.y:1.181
ZendEngine2/zend_language_parser.y:1.182
--- ZendEngine2/zend_language_parser.y:1.181 Thu Apr 26 15:50:12 2007
+++ ZendEngine2/zend_language_parser.y Fri May 18 13:12:47 2007
@@ -18,14 +18,12 @@
+----------------------------------------------------------------------+
*/
-/* $Id: zend_language_parser.y,v 1.181 2007/04/26 15:50:12 andrei Exp $ */
+/* $Id: zend_language_parser.y,v 1.182 2007/05/18 13:12:47 dmitry Exp $ */
/*
* LALR shift/reduce conflicts and how they are resolved:
*
* - 2 shift/reduce conflicts due to the dangeling elseif/else ambiguity.
Solved by shift.
- * - 1 shift/reduce conflict due to arrays within encapsulated strings. Solved
by shift.
- * - 1 shift/reduce conflict due to objects within encapsulated strings.
Solved by shift.
*
*/
@@ -49,7 +47,7 @@
%}
%pure_parser
-%expect 4
+%expect 2
%left T_INCLUDE T_INCLUDE_ONCE T_EVAL T_REQUIRE T_REQUIRE_ONCE
%left ','
@@ -718,9 +716,9 @@
| class_constant { $$ = $1; }
| common_scalar { $$ = $1; }
| '"' { CG(literal_type) = UG(unicode)?IS_UNICODE:IS_STRING; }
encaps_list '"' { $$ = $3; }
- | T_START_HEREDOC { CG(literal_type) =
UG(unicode)?IS_UNICODE:IS_STRING; } encaps_list T_END_HEREDOC { $$ = $3;
zend_do_end_heredoc(TSRMLS_C); }
+ | T_START_HEREDOC { CG(literal_type) =
UG(unicode)?IS_UNICODE:IS_STRING; } encaps_list T_END_HEREDOC { $$ = $3; }
| T_BINARY_DOUBLE { CG(literal_type) = IS_STRING; } encaps_list
'"' { $$ = $3; }
- | T_BINARY_HEREDOC { CG(literal_type) = IS_STRING; } encaps_list
T_END_HEREDOC { $$ = $3; zend_do_end_heredoc(TSRMLS_C); }
+ | T_BINARY_HEREDOC { CG(literal_type) = IS_STRING; } encaps_list
T_END_HEREDOC { $$ = $3; }
;
@@ -879,16 +877,7 @@
encaps_list:
encaps_list encaps_var { zend_do_end_variable_parse(BP_VAR_R, 0
TSRMLS_CC); zend_do_add_variable(&$$, &$1, &$2 TSRMLS_CC); }
- | encaps_list T_STRING {
zend_do_add_string(&$$, &$1, &$2 TSRMLS_CC); }
- | encaps_list T_NUM_STRING {
zend_do_add_string(&$$, &$1, &$2 TSRMLS_CC); }
| encaps_list T_ENCAPSED_AND_WHITESPACE {
zend_do_add_string(&$$, &$1, &$2 TSRMLS_CC); }
- | encaps_list T_CHARACTER { zend_do_add_char(&$$,
&$1, &$2 TSRMLS_CC); }
- | encaps_list T_BAD_CHARACTER {
zend_do_add_string(&$$, &$1, &$2 TSRMLS_CC); }
- | encaps_list '[' { Z_LVAL($2.u.constant) = (long) '[';
zend_do_add_char(&$$, &$1, &$2 TSRMLS_CC); }
- | encaps_list ']' { Z_LVAL($2.u.constant) = (long) ']';
zend_do_add_char(&$$, &$1, &$2 TSRMLS_CC); }
- | encaps_list '{' { Z_LVAL($2.u.constant) = (long) '{';
zend_do_add_char(&$$, &$1, &$2 TSRMLS_CC); }
- | encaps_list '}' { Z_LVAL($2.u.constant) = (long) '}';
zend_do_add_char(&$$, &$1, &$2 TSRMLS_CC); }
- | encaps_list T_OBJECT_OPERATOR { znode tmp;
Z_LVAL($2.u.constant) = (long) '-'; zend_do_add_char(&tmp, &$1, &$2
TSRMLS_CC); Z_LVAL($2.u.constant) = (long) '>'; zend_do_add_char(&$$, &tmp,
&$2 TSRMLS_CC); }
| /* empty */ { zend_do_init_string(&$$
TSRMLS_CC); }
;
http://cvs.php.net/viewvc.cgi/ZendEngine2/zend_language_scanner.l?r1=1.164&r2=1.165&diff_format=u
Index: ZendEngine2/zend_language_scanner.l
diff -u ZendEngine2/zend_language_scanner.l:1.164
ZendEngine2/zend_language_scanner.l:1.165
--- ZendEngine2/zend_language_scanner.l:1.164 Thu Apr 19 11:24:48 2007
+++ ZendEngine2/zend_language_scanner.l Fri May 18 13:12:47 2007
@@ -19,7 +19,7 @@
+----------------------------------------------------------------------+
*/
-/* $Id: zend_language_scanner.l,v 1.164 2007/04/19 11:24:48 tony2001 Exp $ */
+/* $Id: zend_language_scanner.l,v 1.165 2007/05/18 13:12:47 dmitry Exp $ */
#define yyleng SCNG(yy_leng)
#define yytext SCNG(yy_text)
@@ -37,8 +37,11 @@
%x ST_DOUBLE_QUOTES
%x ST_BACKQUOTE
%x ST_HEREDOC
+%x ST_START_HEREDOC
+%x ST_END_HEREDOC
%x ST_LOOKING_FOR_PROPERTY
%x ST_LOOKING_FOR_VARNAME
+%x ST_VAR_OFFSET
%x ST_COMMENT
%x ST_DOC_COMMENT
%x ST_ONE_LINE_COMMENT
@@ -99,9 +102,7 @@
char *p = (s), *boundary = p+(l);
\
\
while (p<boundary) {
\
- if (*p == '\n') {
\
- CG(zend_lineno)++;
\
- } else if ((*p == '\r') && (p+1 < boundary) && (*(p+1) !=
'\n')) { \
+ if (*p == '\n' || (*p == '\r' && (*(p+1) != '\n'))) {
\
CG(zend_lineno)++;
\
}
\
p++;
\
@@ -313,30 +314,6 @@
return -1;
}
-static inline zend_bool zend_digits_to_codepoint(char *s, char *end, UChar32
*c, int8_t digits)
-{
- int8_t n = 0;
- int8_t digit = 0;
- UChar32 codepoint = 0;
-
- while (s < end && n < digits) {
- digit = zend_get_hex_digit((UChar)*s);
- if (digit < 0) {
- break;
- }
- codepoint = (codepoint << 4) | digit;
- ++s;
- ++n;
- }
-
- if (n < digits) {
- return 0;
- }
-
- *c = codepoint;
- return 1;
-}
-
static inline zend_bool zend_udigits_to_codepoint(UChar *s, UChar *end,
UChar32 *c, int8_t digits)
{
int8_t n = 0;
@@ -361,20 +338,6 @@
return 1;
}
-static inline int zend_uchar_from_name(char *name, UChar32 *c)
-{
- UChar32 codepoint = 0;
- UErrorCode status = U_ZERO_ERROR;
-
- codepoint = u_charFromName(U_UNICODE_CHAR_NAME, name, &status);
- if (U_SUCCESS(status)) {
- *c = codepoint;
- return 1;
- } else {
- return 0;
- }
-}
-
static inline int zend_uchar_from_uname(UChar *name, int name_len, UChar32 *c
TSRMLS_DC)
{
UChar32 codepoint = 0;
@@ -1029,7 +992,7 @@
Z_STRVAL_P(zendlval) = (char *)estrndup(yytext, yyleng); \
Z_STRLEN_P(zendlval) = yyleng;
-int zend_scan_unicode_double_string(zval *zendlval TSRMLS_DC)
+static int zend_scan_unicode_escape_string(zval *zendlval, char *str, int len,
UChar quote_type, int type TSRMLS_DC)
{
register UChar *s, *t, c;
UChar *end;
@@ -1039,9 +1002,7 @@
int8_t bits;
int8_t n;
- HANDLE_NEWLINES(yytext, yyleng);
-
- if (!zend_copy_scanner_string(zendlval, yytext+1, yyleng-2, IS_UNICODE,
SCNG(output_conv) TSRMLS_CC)) {
+ if (!zend_copy_scanner_string(zendlval, str, len, IS_UNICODE,
SCNG(output_conv) TSRMLS_CC)) {
return 0;
}
@@ -1074,9 +1035,15 @@
*t++ = (UChar) 0x09; /*'\t'*/
Z_USTRLEN_P(zendlval)--;
break;
+ case 0x22: /*'"'*/
+ case 0x60: /*'`'*/
+ if (c != quote_type) {
+ *t++ = 0x5C; /*'\\'*/
+ *t++ = *s;
+ break;
+ }
case 0x5C: /*'\\'*/
case 0x24: /*'$'*/
- case 0x22: /*'"'*/
*t++ = *s;
Z_USTRLEN_P(zendlval)--;
break;
@@ -1110,6 +1077,20 @@
max_digits = 6;
Z_USTRLEN_P(zendlval)--;
break;
+ case 0x78: /*'x'*/
+ case 0x58: /*'X'*/
+ if ((digit =
zend_get_hex_digit(*(s+1))) >= 0) {
+ min_digits = 1;
+ max_digits = 2;
+ Z_USTRLEN_P(zendlval)--;
+ s++;
+ n = 1; /* already have one
digit */
+ codepoint = digit;
+ } else {
+ *t++ = 0x5C; /*'\\'*/
+ *t++ = *s;
+ }
+ break;
default:
digit = zend_get_octal_digit(*s);
if (digit >= 0) {
@@ -1118,14 +1099,6 @@
bits = 3;
n = 1; /* already have one
digit */
codepoint = digit;
- } else if (c == 0x78 /*'x'*/
- && (s+1) < end &&
(digit = zend_get_hex_digit(*(s+1))) >= 0) {
- min_digits = 1;
- max_digits = 2;
- Z_USTRLEN_P(zendlval)--;
- s++;
- n = 1; /* already have one
digit */
- codepoint = digit;
} else {
*t++ = 0x5C; /*'\\'*/
*t++ = *s;
@@ -1163,26 +1136,30 @@
efree(Z_USTRVAL_P(zendlval));
return 0;
}
- } else {
- s++;
+
+ /* s is already incremented and not past a
newline */
+ continue;
}
} else {
- *t++ = *s++;
+ *t++ = *s;
}
+
+ if (*s == 0x0A /*'\n'*/ || (*s == 0x0D /*'\r'*/ && (*(s+1) !=
0x0A /*'\n'*/))) {
+ CG(zend_lineno)++;
+ }
+ s++;
}
*t = 0;
- return T_CONSTANT_ENCAPSED_STRING;
+ return type;
}
-int zend_scan_unicode_single_string(zval *zendlval TSRMLS_DC)
+static int zend_scan_unicode_single_string(zval *zendlval TSRMLS_DC)
{
register UChar *s, *t;
UChar *end;
UChar32 codepoint = 0;
- HANDLE_NEWLINES(yytext, yyleng);
-
if (!zend_copy_scanner_string(zendlval, yytext+1, yyleng-2, IS_UNICODE,
SCNG(output_conv) TSRMLS_CC)) {
return 0;
}
@@ -1265,25 +1242,26 @@
*t++ = *s;
break;
}
- s++;
} else {
- *t++ = *s++;
+ *t++ = *s;
}
+
+ if (*s == 0x0A /*'\n'*/ || (*s == 0x0D /*'\r'*/ && (*(s+1) !=
0x0A /*'\n'*/))) {
+ CG(zend_lineno)++;
+ }
+ s++;
}
*t = 0;
return T_CONSTANT_ENCAPSED_STRING;
}
-int zend_scan_binary_double_string(zval *zendlval, int bprefix TSRMLS_DC)
+static void zend_scan_binary_escape_string(zval *zendlval, char *str, int len,
char quote_type TSRMLS_DC)
{
register char *s, *t;
char *end;
- Z_STRVAL_P(zendlval) = estrndup(yytext+bprefix+1, yyleng-bprefix-2);
- Z_STRLEN_P(zendlval) = yyleng-bprefix-2;
- Z_TYPE_P(zendlval) = IS_STRING;
- HANDLE_NEWLINES(yytext, yyleng);
+ ZVAL_STRINGL(zendlval, str, len, 1);
/* convert escape sequences */
s = t = Z_STRVAL_P(zendlval);
@@ -1307,12 +1285,37 @@
*t++ = '\t';
Z_STRLEN_P(zendlval)--;
break;
+ case '"':
+ case '`':
+ if (*s != quote_type) {
+ *t++ = '\\';
+ *t++ = *s;
+ break;
+ }
case '\\':
case '$':
- case '"':
*t++ = *s;
Z_STRLEN_P(zendlval)--;
break;
+ case 'x':
+ case 'X':
+ if (ZEND_IS_HEX(*(s+1))) {
+ char hex_buf[3] = { 0, 0, 0 };
+
+ Z_STRLEN_P(zendlval)--; /* for
the 'x' */
+
+ hex_buf[0] = *(++s);
+ Z_STRLEN_P(zendlval)--;
+ if (ZEND_IS_HEX(*(s+1))) {
+ hex_buf[1] = *(++s);
+ Z_STRLEN_P(zendlval)--;
+ }
+ *t++ = (char) strtol(hex_buf,
NULL, 16);
+ } else {
+ *t++ = '\\';
+ *t++ = *s;
+ }
+ break;
default:
/* check for an octal */
if (ZEND_IS_OCT(*s)) {
@@ -1320,52 +1323,39 @@
octal_buf[0] = *s;
Z_STRLEN_P(zendlval)--;
- if ((s+1)<end &&
ZEND_IS_OCT(*(s+1))) {
+ if (ZEND_IS_OCT(*(s+1))) {
octal_buf[1] = *(++s);
Z_STRLEN_P(zendlval)--;
- if ((s+1)<end &&
ZEND_IS_OCT(*(s+1))) {
+ if
(ZEND_IS_OCT(*(s+1))) {
octal_buf[2] =
*(++s);
Z_STRLEN_P(zendlval)--;
}
}
*t++ = (char) strtol(octal_buf,
NULL, 8);
- } else if (*s=='x' && (s+1)<end &&
ZEND_IS_HEX(*(s+1))) {
- char hex_buf[3] = { 0, 0, 0};
-
- Z_STRLEN_P(zendlval)--; /* for
the 'x' */
-
- hex_buf[0] = *(++s);
- Z_STRLEN_P(zendlval)--;
- if ((s+1)<end &&
ZEND_IS_HEX(*(s+1))) {
- hex_buf[1] = *(++s);
- Z_STRLEN_P(zendlval)--;
- }
- *t++ = (char) strtol(hex_buf,
NULL, 16);
} else {
*t++ = '\\';
*t++ = *s;
}
break;
}
- s++;
} else {
- *t++ = *s++;
+ *t++ = *s;
+ }
+
+ if (*s == '\n' || (*s == '\r' && (*(s+1) != '\n'))) {
+ CG(zend_lineno)++;
}
+ s++;
}
*t = 0;
-
- return T_CONSTANT_ENCAPSED_STRING;
}
-int zend_scan_binary_single_string(zval *zendlval, int bprefix TSRMLS_DC)
+static void zend_scan_binary_single_string(zval *zendlval, char *str, int len
TSRMLS_DC)
{
register char *s, *t;
char *end;
- Z_STRVAL_P(zendlval) = estrndup(yytext+bprefix+1, yyleng-bprefix-2);
- Z_STRLEN_P(zendlval) = yyleng-bprefix-2;
- Z_TYPE_P(zendlval) = IS_STRING;
- HANDLE_NEWLINES(yytext, yyleng);
+ ZVAL_STRINGL(zendlval, str, len, 1);
/* convert escape sequences */
s = t = Z_STRVAL_P(zendlval);
@@ -1387,14 +1377,16 @@
*t++ = *s;
break;
}
- s++;
} else {
- *t++ = *s++;
+ *t++ = *s;
+ }
+
+ if (*s == '\n' || (*s == '\r' && (*(s+1) != '\n'))) {
+ CG(zend_lineno)++;
}
+ s++;
}
*t = 0;
-
- return T_CONSTANT_ENCAPSED_STRING;
}
%}
@@ -1407,11 +1399,61 @@
WHITESPACE [ \n\r\t]+
TABS_AND_SPACES [ \t]*
TOKENS [;:,.\[\]()|^&+-/*=%!~$<>[EMAIL PROTECTED]
-ENCAPSED_TOKENS [\[\]{}$]
-ESCAPED_AND_WHITESPACE [\n\t\r #'.:;,()|^&+-/*=%!~<>[EMAIL PROTECTED]
ANY_CHAR (.|[\n])
NEWLINE ("\r"|"\n"|"\r\n")
+/*
+ * LITERAL_DOLLAR matches unescaped $ that aren't followed by a label character
+ * or a { and therefore will be taken literally. The case of literal $ before
+ * a variable or "${" is handled in a rule for each string type
+ */
+DOUBLE_QUOTES_LITERAL_DOLLAR ("$"+([^a-zA-Z_\x7f-\xff$"\\{]|("\\"{ANY_CHAR})))
+BACKQUOTE_LITERAL_DOLLAR ("$"+([^a-zA-Z_\x7f-\xff$`\\{]|("\\"{ANY_CHAR})))
+HEREDOC_LITERAL_DOLLAR ("$"+([^a-zA-Z_\x7f-\xff$\n\r\\{]|("\\"[^\n\r])))
+
+/*
+ * Usually, HEREDOC_NEWLINE will just function like a simple NEWLINE, but some
+ * special cases need to be handled. HEREDOC_CHARS doesn't allow a line to
+ * match when { or $, and/or \ is at the end. (("{"*|"$"*)"\\"?) handles that,
+ * along with cases where { or $, and/or \ is the ONLY thing on a line
+ *
+ * The other case is when a line contains a label, followed by ONLY
+ * { or $, and/or \ Handled by ({LABEL}";"?((("{"+|"$"+)"\\"?)|"\\"))
+ */
+HEREDOC_NEWLINE
((({LABEL}";"?((("{"+|"$"+)"\\"?)|"\\"))|(("{"*|"$"*)"\\"?)){NEWLINE})
+
+/*
+ * This pattern is just used in the next 2 for matching { or literal $, and/or
+ * \ escape sequence immediately at the beginning of a line or after a label
+ */
+HEREDOC_CURLY_OR_ESCAPE_OR_DOLLAR
(("{"+[^$\n\r\\{])|("{"*"\\"[^\n\r])|{HEREDOC_LITERAL_DOLLAR})
+
+/*
+ * These 2 label-related patterns allow HEREDOC_CHARS to continue "regular"
+ * matching after a newline that starts with either a non-label character or a
+ * label that isn't followed by a newline. Like HEREDOC_CHARS, they won't match
+ * a variable or "{$" Matching a newline, and possibly label, up TO a variable
+ * or "{$", is handled in the heredoc rules
+ *
+ * The HEREDOC_LABEL_NO_NEWLINE pattern (";"[^$\n\r\\{]) handles cases where ;
+ * follows a label. [^a-zA-Z0-9_\x7f-\xff;$\n\r\\{] is needed to prevent a
label
+ * character or ; from matching on a possible (real) ending label
+ */
+HEREDOC_NON_LABEL
([^a-zA-Z_\x7f-\xff$\n\r\\{]|{HEREDOC_CURLY_OR_ESCAPE_OR_DOLLAR})
+HEREDOC_LABEL_NO_NEWLINE
({LABEL}([^a-zA-Z0-9_\x7f-\xff;$\n\r\\{]|(";"[^$\n\r\\{])|(";"?{HEREDOC_CURLY_OR_ESCAPE_OR_DOLLAR})))
+
+/*
+ * CHARS matches everything up to a variable or "{$"
+ * {'s are matched as long as they aren't followed by a $
+ * The case of { before "{$" is handled in a rule for each string type
+ *
+ * For heredocs, matching continues across/after newlines if/when it's known
+ * that the next line doesn't contain a possible ending label
+ */
+DOUBLE_QUOTES_CHARS
("{"*([^$"\\{]|("\\"{ANY_CHAR}))|{DOUBLE_QUOTES_LITERAL_DOLLAR})
+BACKQUOTE_CHARS
("{"*([^$`\\{]|("\\"{ANY_CHAR}))|{BACKQUOTE_LITERAL_DOLLAR})
+HEREDOC_CHARS
("{"*([^$\n\r\\{]|("\\"[^\n\r]))|{HEREDOC_LITERAL_DOLLAR}|({HEREDOC_NEWLINE}+({HEREDOC_NON_LABEL}|{HEREDOC_LABEL_NO_NEWLINE})))
+
%option noyylineno
%option noyywrap
%%
@@ -1560,11 +1602,15 @@
return T_IMPLEMENTS;
}
-<ST_IN_SCRIPTING,ST_DOUBLE_QUOTES,ST_BACKQUOTE,ST_HEREDOC>"->" {
+<ST_IN_SCRIPTING>"->" {
yy_push_state(ST_LOOKING_FOR_PROPERTY TSRMLS_CC);
return T_OBJECT_OPERATOR;
}
+<ST_LOOKING_FOR_PROPERTY>"->" {
+ return T_OBJECT_OPERATOR;
+}
+
<ST_LOOKING_FOR_PROPERTY>{LABEL} {
yy_pop_state(TSRMLS_C);
if (!zend_copy_scanner_string(zendlval, yytext, yyleng,
UG(unicode)?IS_UNICODE:IS_STRING, SCNG(output_conv) TSRMLS_CC)) {
@@ -1906,7 +1952,19 @@
}
}
-<ST_DOUBLE_QUOTES,ST_BACKQUOTE,ST_HEREDOC>{LNUM}|{HNUM} { /* treat numbers
(almost) as strings inside encapsulated strings */
+<ST_VAR_OFFSET>0|([1-9][0-9]*) { /* Offset could be treated as a long */
+ if (yyleng < MAX_LENGTH_OF_LONG - 1 || (yyleng == MAX_LENGTH_OF_LONG -
1 && strcmp(yytext, long_min_digits) < 0)) {
+ Z_LVAL_P(zendlval) = strtol(yytext, NULL, 10);
+ Z_TYPE_P(zendlval) = IS_LONG;
+ } else {
+ if (!zend_copy_scanner_string(zendlval, yytext, yyleng,
CG(literal_type), SCNG(output_conv) TSRMLS_CC)) {
+ return 0;
+ }
+ }
+ return T_NUM_STRING;
+}
+
+<ST_VAR_OFFSET>{LNUM}|{HNUM} { /* Offset must be treated as a string */
if (!zend_copy_scanner_string(zendlval, yytext, yyleng,
CG(literal_type), SCNG(output_conv) TSRMLS_CC)) {
return 0;
}
@@ -2080,7 +2138,40 @@
return T_OPEN_TAG;
}
-<ST_IN_SCRIPTING,ST_DOUBLE_QUOTES,ST_HEREDOC,ST_BACKQUOTE>"$"{LABEL} {
+<ST_IN_SCRIPTING,ST_DOUBLE_QUOTES,ST_HEREDOC,ST_BACKQUOTE,ST_VAR_OFFSET>"$"{LABEL}
{
+ if (!zend_copy_scanner_string(zendlval, (yytext+1), (yyleng-1),
UG(unicode)?IS_UNICODE:IS_STRING, SCNG(output_conv) TSRMLS_CC)) {
+ return 0;
+ }
+ if (UG(unicode) && !zend_check_and_normalize_identifier(zendlval)) {
+ return 0;
+ }
+ return T_VARIABLE;
+}
+
+%{
+/* Make sure a label character follows "->", otherwise there is no property
+ * and "->" will be taken literally
+ */ %}
+<ST_DOUBLE_QUOTES,ST_HEREDOC,ST_BACKQUOTE>"$"{LABEL}"->"[a-zA-Z_\x7f-\xff] {
+ yyless(yyleng - 3);
+ yy_push_state(ST_LOOKING_FOR_PROPERTY TSRMLS_CC);
+
+ if (!zend_copy_scanner_string(zendlval, (yytext+1), (yyleng-1),
UG(unicode)?IS_UNICODE:IS_STRING, SCNG(output_conv) TSRMLS_CC)) {
+ return 0;
+ }
+ if (UG(unicode) && !zend_check_and_normalize_identifier(zendlval)) {
+ return 0;
+ }
+ return T_VARIABLE;
+}
+
+%{
+/* A [ always designates a variable offset, regardless of what follows
+ */ %}
+<ST_DOUBLE_QUOTES,ST_HEREDOC,ST_BACKQUOTE>"$"{LABEL}"[" {
+ yyless(yyleng - 1);
+ yy_push_state(ST_VAR_OFFSET TSRMLS_CC);
+
if (!zend_copy_scanner_string(zendlval, (yytext+1), (yyleng-1),
UG(unicode)?IS_UNICODE:IS_STRING, SCNG(output_conv) TSRMLS_CC)) {
return 0;
}
@@ -2090,6 +2181,21 @@
return T_VARIABLE;
}
+<ST_VAR_OFFSET>"]" {
+ yy_pop_state(TSRMLS_C);
+ return ']';
+}
+
+<ST_VAR_OFFSET>{TOKENS}|[{}] {
+ /* Only '[' can be valid, but returning other tokens will allow a more
explicit parse error */
+ return yytext[0];
+}
+
+<ST_VAR_OFFSET>[ \n\r\t'"`\\#] {
+ yyless(0);
+ yy_pop_state(TSRMLS_C);
+}
+
<ST_IN_SCRIPTING>{LABEL} {
if (!zend_copy_scanner_string(zendlval, yytext, yyleng,
UG(unicode)?IS_UNICODE:IS_STRING, SCNG(output_conv) TSRMLS_CC)) {
return 0;
@@ -2100,7 +2206,7 @@
return T_STRING;
}
-<ST_DOUBLE_QUOTES,ST_BACKQUOTE,ST_HEREDOC>{LABEL} {
+<ST_VAR_OFFSET>{LABEL} {
if (!zend_copy_scanner_string(zendlval, yytext, yyleng,
CG(literal_type), SCNG(output_conv) TSRMLS_CC)) {
return 0;
}
@@ -2230,37 +2336,44 @@
}
-<ST_IN_SCRIPTING>(["]([^$"\\]|("\\".))*["]) {
+%{
+/* ("{"*|"$"*) handles { or $ at the end of a string (or the entire contents)
+ */ %}
+<ST_IN_SCRIPTING>(["]{DOUBLE_QUOTES_CHARS}*("{"*|"$"*)["]) {
if (UG(unicode)) {
- return zend_scan_unicode_double_string(zendlval TSRMLS_CC);
+ return zend_scan_unicode_escape_string(zendlval, yytext+1,
yyleng-2, 0x22 /*'"'*/, T_CONSTANT_ENCAPSED_STRING TSRMLS_CC);
} else {
- return zend_scan_binary_double_string(zendlval, 0 TSRMLS_CC);
+ zend_scan_binary_escape_string(zendlval, yytext+1, yyleng-2,
'"' TSRMLS_CC);
+ return T_CONSTANT_ENCAPSED_STRING;
}
}
-<ST_IN_SCRIPTING>(b["]([^$"\\]|("\\".))*["]) {
- return zend_scan_binary_double_string(zendlval, 1 TSRMLS_CC);
+<ST_IN_SCRIPTING>(b["]{DOUBLE_QUOTES_CHARS}*("{"*|"$"*)["]) {
+ zend_scan_binary_escape_string(zendlval, yytext+2, yyleng-3, '"'
TSRMLS_CC);
+ return T_CONSTANT_ENCAPSED_STRING;
}
-<ST_IN_SCRIPTING>([']([^'\\]|("\\".))*[']) {
+<ST_IN_SCRIPTING>([']([^'\\]|("\\"{ANY_CHAR}))*[']) {
if (UG(unicode)) {
return zend_scan_unicode_single_string(zendlval TSRMLS_CC);
} else {
- return zend_scan_binary_single_string(zendlval, 0 TSRMLS_CC);
+ zend_scan_binary_single_string(zendlval, yytext+1, yyleng-2
TSRMLS_CC);
+ return T_CONSTANT_ENCAPSED_STRING;
}
}
-<ST_IN_SCRIPTING>("b'"([^'\\]|("\\".))*[']) {
- return zend_scan_binary_single_string(zendlval, 1 TSRMLS_CC);
+<ST_IN_SCRIPTING>("b'"([^'\\]|("\\"{ANY_CHAR}))*[']) {
+ zend_scan_binary_single_string(zendlval, yytext+2, yyleng-3 TSRMLS_CC);
+ return T_CONSTANT_ENCAPSED_STRING;
}
<ST_IN_SCRIPTING>["] {
BEGIN(ST_DOUBLE_QUOTES);
- return '\"';
+ return '"';
}
<ST_IN_SCRIPTING>b["] {
@@ -2278,7 +2391,7 @@
CG(heredoc_len)--;
}
CG(heredoc) = estrndup(s, CG(heredoc_len));
- BEGIN(ST_HEREDOC);
+ BEGIN(ST_START_HEREDOC);
return T_BINARY_HEREDOC;
}
@@ -2293,7 +2406,7 @@
CG(heredoc_len)--;
}
CG(heredoc) = estrndup(s, CG(heredoc_len));
- BEGIN(ST_HEREDOC);
+ BEGIN(ST_START_HEREDOC);
return T_START_HEREDOC;
}
@@ -2304,204 +2417,180 @@
}
-<ST_HEREDOC>^{LABEL}(";")?{NEWLINE} {
- int label_len;
+<ST_START_HEREDOC>{ANY_CHAR} {
+ yyless(0);
+ BEGIN(ST_HEREDOC);
+}
- if (yytext[yyleng-2]=='\r') {
- label_len = yyleng-2;
- } else {
- label_len = yyleng-1;
- }
+<ST_START_HEREDOC>{LABEL}";"?[\n\r] {
+ int label_len = yyleng - 1;
if (yytext[label_len-1]==';') {
label_len--;
}
+ yyless(label_len);
+
if (label_len==CG(heredoc_len) && !memcmp(yytext, CG(heredoc),
label_len)) {
- Z_STRVAL_P(zendlval) = estrndup(yytext, label_len); /* unput
destroys yytext */
+ Z_STRVAL_P(zendlval) = CG(heredoc);
Z_STRLEN_P(zendlval) = label_len;
- yyless(yyleng - (yyleng - label_len));
- efree(CG(heredoc));
CG(heredoc)=NULL;
CG(heredoc_len)=0;
BEGIN(ST_IN_SCRIPTING);
return T_END_HEREDOC;
} else {
- CG(zend_lineno)++;
- if (!zend_copy_scanner_string(zendlval, yytext, yyleng,
CG(literal_type), SCNG(output_conv) TSRMLS_CC)) {
- return 0;
- }
- return T_STRING;
+ yymore();
+ BEGIN(ST_HEREDOC);
}
}
+%{
+/* Match everything up to and including a possible ending label, so if the
label
+ * doesn't match, it's kept with the rest of the string
+ *
+ * {HEREDOC_NEWLINE}+ handles the case of more than one newline sequence that
+ * couldn't be matched with HEREDOC_CHARS, because of the following label
+ */ %}
+<ST_HEREDOC>{HEREDOC_CHARS}*{HEREDOC_NEWLINE}+{LABEL}";"?[\n\r] {
+ char *end = yytext + yyleng - 1;
-<ST_DOUBLE_QUOTES,ST_BACKQUOTE,ST_HEREDOC>{ESCAPED_AND_WHITESPACE} {
- HANDLE_NEWLINES(yytext, yyleng);
- if (!zend_copy_scanner_string(zendlval, yytext, yyleng,
CG(literal_type), SCNG(output_conv) TSRMLS_CC)) {
- return 0;
+ if (end[-1] == ';') {
+ end--;
+ yyleng--;
}
- return T_ENCAPSED_AND_WHITESPACE;
-}
-<ST_DOUBLE_QUOTES>[`]+ {
- if (!zend_copy_scanner_string(zendlval, yytext, yyleng,
CG(literal_type), SCNG(output_conv) TSRMLS_CC)) {
- return 0;
- }
- return T_ENCAPSED_AND_WHITESPACE;
-}
+ if (yyleng > CG(heredoc_len) && !memcmp(end - CG(heredoc_len),
CG(heredoc), CG(heredoc_len))) {
+ int len = yyleng - CG(heredoc_len) - 2; /* 2 for newline before
and after label */
+ if (len > 0 && yytext[len - 1] == '\r' && yytext[len] == '\n') {
+ len--;
+ }
-<ST_BACKQUOTE>["]+ {
- if (!zend_copy_scanner_string(zendlval, yytext, yyleng,
CG(literal_type), SCNG(output_conv) TSRMLS_CC)) {
- return 0;
- }
- return T_ENCAPSED_AND_WHITESPACE;
-}
+ /* Go back before last label char, to match in ST_END_HEREDOC
state */
+ yyless(yyleng - 2);
+ /* Subtract the remaining label length. yyleng must include
newline
+ * before label, for zend_highlight/strip, tokenizer, etc. */
+ yyleng -= CG(heredoc_len) - 1;
-<ST_DOUBLE_QUOTES,ST_BACKQUOTE,ST_HEREDOC>"$"[^a-zA-Z_\x7f-\xff{] {
- Z_LVAL_P(zendlval) = (long) yytext[0];
- if (yyleng == 2) {
- yyless(1);
+ CG(increment_lineno) = 1; /* For newline before label */
+ BEGIN(ST_END_HEREDOC);
+
+ if (CG(literal_type) == IS_UNICODE) {
+ return zend_scan_unicode_escape_string(zendlval,
yytext, len, 0, T_ENCAPSED_AND_WHITESPACE TSRMLS_CC);
+ } else {
+ zend_scan_binary_escape_string(zendlval, yytext, len, 0
TSRMLS_CC);
+ return T_ENCAPSED_AND_WHITESPACE;
+ }
+ } else {
+ /* Go back to end of label, so there's something to match again
in case
+ * there's a variable at the beginning of the next line */
+ yyless(yyleng - 1);
+ yymore();
}
- return T_CHARACTER;
}
-
-<ST_DOUBLE_QUOTES,ST_BACKQUOTE,ST_HEREDOC>{ENCAPSED_TOKENS} {
- Z_LVAL_P(zendlval) = (long) yytext[0];
- return yytext[0];
+<ST_END_HEREDOC>{ANY_CHAR} {
+ Z_STRVAL_P(zendlval) = CG(heredoc);
+ Z_STRLEN_P(zendlval) = CG(heredoc_len);
+ yytext = Z_STRVAL_P(zendlval);
+ yyleng = Z_STRLEN_P(zendlval);
+ CG(heredoc) = NULL;
+ CG(heredoc_len) = 0;
+ BEGIN(ST_IN_SCRIPTING);
+ return T_END_HEREDOC;
}
+
<ST_DOUBLE_QUOTES,ST_BACKQUOTE,ST_HEREDOC>"{$" {
- Z_LVAL_P(zendlval) = (long) yytext[0];
+ Z_LVAL_P(zendlval) = (long) '{';
yy_push_state(ST_IN_SCRIPTING TSRMLS_CC);
yyless(1);
return T_CURLY_OPEN;
}
-<ST_DOUBLE_QUOTES>"\\\"" {
- Z_LVAL_P(zendlval) = (long) '"';
- return T_CHARACTER;
-}
-
-<ST_BACKQUOTE>"\\`" {
- Z_LVAL_P(zendlval) = (long) '`';
- return T_CHARACTER;
+<ST_DOUBLE_QUOTES>{DOUBLE_QUOTES_CHARS}+ {
+ if (CG(literal_type) == IS_UNICODE) {
+ return zend_scan_unicode_escape_string(zendlval, yytext,
yyleng, 0x22 /*'"'*/, T_ENCAPSED_AND_WHITESPACE TSRMLS_CC);
+ } else {
+ zend_scan_binary_escape_string(zendlval, yytext, yyleng, '"'
TSRMLS_CC);
+ return T_ENCAPSED_AND_WHITESPACE;
+ }
}
-<ST_DOUBLE_QUOTES,ST_BACKQUOTE,ST_HEREDOC>"\\"[0-7]{1,3} {
- Z_LVAL_P(zendlval) = strtol(yytext+1, NULL, 8);
- return T_CHARACTER;
-}
+%{
+/* "{"{2,}|"$"{2,} handles { before "{$" or literal $ before a variable or "${"
+ * (("{"+|"$"+)["]) handles { or $ at the end of a string
+ *
+ * Same for backquotes and heredocs, except the second case doesn't apply to
+ * heredocs. yyless(yyleng - 1) is used to correct taking one character too
many
+ */ %}
+<ST_DOUBLE_QUOTES>{DOUBLE_QUOTES_CHARS}*("{"{2,}|"$"{2,}|(("{"+|"$"+)["])) {
+ yyless(yyleng - 1);
-<ST_DOUBLE_QUOTES,ST_BACKQUOTE,ST_HEREDOC>"\\x"[0-9A-Fa-f]{1,2} {
- Z_LVAL_P(zendlval) = strtol (yytext+2, NULL, 16);
- return T_CHARACTER;
+ if (CG(literal_type) == IS_UNICODE) {
+ return zend_scan_unicode_escape_string(zendlval, yytext,
yyleng, 0x22 /*'"'*/, T_ENCAPSED_AND_WHITESPACE TSRMLS_CC);
+ } else {
+ zend_scan_binary_escape_string(zendlval, yytext, yyleng, '"'
TSRMLS_CC);
+ return T_ENCAPSED_AND_WHITESPACE;
+ }
}
-<ST_DOUBLE_QUOTES,ST_BACKQUOTE,ST_HEREDOC>"\\u"[0-9A-Fa-f]{0,6} {
- UChar32 codepoint;
- int req_digits = (yytext[1] == 'U') ? 6 : 4;
-
+<ST_BACKQUOTE>{BACKQUOTE_CHARS}+ {
if (CG(literal_type) == IS_UNICODE) {
- if (zend_digits_to_codepoint(yytext+2, yytext+yyleng,
&codepoint, req_digits)) {
- if (codepoint <= 0x10FFFF) {
- Z_LVAL_P(zendlval) = (long) codepoint;
- /* give back if we grabbed more than needed for
\u case */
- if (yyleng > req_digits + 2) {
- yyless(req_digits + 2);
- }
- return T_CHARACTER;
- } else {
- zend_error(E_COMPILE_WARNING,"\\U%06x is above
the highest valid codepoint 0x10FFFF", codepoint);
- return 0;
- }
- } else {
- zend_error(E_COMPILE_WARNING,"\\%c escape sequence
requires exactly %d hexadecimal digits", yytext[1], req_digits);
- return 0;
- }
+ return zend_scan_unicode_escape_string(zendlval, yytext,
yyleng, 0x60 /*'`'*/, T_ENCAPSED_AND_WHITESPACE TSRMLS_CC);
} else {
- zend_copy_scanner_string(zendlval, yytext, yyleng,
CG(literal_type), SCNG(output_conv) TSRMLS_CC);
- return T_STRING;
+ zend_scan_binary_escape_string(zendlval, yytext, yyleng, '`'
TSRMLS_CC);
+ return T_ENCAPSED_AND_WHITESPACE;
}
}
+<ST_BACKQUOTE>{BACKQUOTE_CHARS}*("{"{2,}|"$"{2,}|(("{"+|"$"+)[`])) {
+ yyless(yyleng - 1);
-<ST_DOUBLE_QUOTES,ST_BACKQUOTE,ST_HEREDOC>"\\C"("{"[A-Z0-9 -]+"}")? {
- UChar32 codepoint;
-
- if (CG(literal_type) == IS_UNICODE && (yytext[1] == 'C')) {
- /* minimum valid string is \C{.} */
- if (yyleng >= 5) {
- /* safe, since we have } at the end */
- yytext[yyleng-1] = 0;
- if (zend_uchar_from_name(yytext+3, &codepoint)) {
- Z_LVAL_P(zendlval) = (long) codepoint;
- return T_CHARACTER;
- } else {
- zend_error(E_COMPILE_WARNING, "Invalid Unicode
character name: '%s'", yytext+3);
- return 0;
- }
- } else {
- zend_error(E_COMPILE_WARNING, "Invalid \\C{..}
sequence");
- return 0;
- }
+ if (CG(literal_type) == IS_UNICODE) {
+ return zend_scan_unicode_escape_string(zendlval, yytext,
yyleng, 0x60 /*'`'*/, T_ENCAPSED_AND_WHITESPACE TSRMLS_CC);
} else {
- zend_copy_scanner_string(zendlval, yytext, yyleng,
CG(literal_type), SCNG(output_conv) TSRMLS_CC);
- return T_STRING;
+ zend_scan_binary_escape_string(zendlval, yytext, yyleng, '`'
TSRMLS_CC);
+ return T_ENCAPSED_AND_WHITESPACE;
}
}
-<ST_DOUBLE_QUOTES,ST_BACKQUOTE,ST_HEREDOC>"\\{" {
- if (!zend_copy_scanner_string(zendlval, yytext, yyleng,
CG(literal_type), SCNG(output_conv) TSRMLS_CC)) {
- return 0;
- }
- return T_STRING;
-}
-<ST_DOUBLE_QUOTES,ST_BACKQUOTE,ST_HEREDOC>"\\"{ANY_CHAR} {
- switch (yytext[1]) {
- case 'n':
- Z_LVAL_P(zendlval) = (long) '\n';
- break;
- case 't':
- Z_LVAL_P(zendlval) = (long) '\t';
- break;
- case 'r':
- Z_LVAL_P(zendlval) = (long) '\r';
- break;
- case '\\':
- Z_LVAL_P(zendlval) = (long) '\\';
- break;
- case '$':
- Z_LVAL_P(zendlval) = (long) yytext[1];
- break;
- default:
- if (!zend_copy_scanner_string(zendlval, yytext, yyleng,
CG(literal_type), SCNG(output_conv) TSRMLS_CC)) {
- return 0;
- }
- return T_BAD_CHARACTER;
- break;
+%{
+/* ({HEREDOC_NEWLINE}+({LABEL}";"?)?)? handles the possible case of newline
+ * sequences, possibly followed by a label, that couldn't be matched with
+ * HEREDOC_CHARS because of a following variable or "{$"
+ *
+ * This doesn't affect real ending labels, as they are followed by a newline,
+ * which will result in a longer match for the correct rule if present
+ */ %}
+<ST_HEREDOC>{HEREDOC_CHARS}*({HEREDOC_NEWLINE}+({LABEL}";"?)?)? {
+ if (CG(literal_type) == IS_UNICODE) {
+ return zend_scan_unicode_escape_string(zendlval, yytext,
yyleng, 0, T_ENCAPSED_AND_WHITESPACE TSRMLS_CC);
+ } else {
+ zend_scan_binary_escape_string(zendlval, yytext, yyleng, 0
TSRMLS_CC);
+ return T_ENCAPSED_AND_WHITESPACE;
}
- return T_CHARACTER;
}
+<ST_HEREDOC>{HEREDOC_CHARS}*({HEREDOC_NEWLINE}+({LABEL}";"?)?)?("{"{2,}|"$"{2,})
{
+ yyless(yyleng - 1);
-<ST_HEREDOC>["'`]+ {
- if (!zend_copy_scanner_string(zendlval, yytext, yyleng,
CG(literal_type), SCNG(output_conv) TSRMLS_CC)) {
- return 0;
+ if (CG(literal_type) == IS_UNICODE) {
+ return zend_scan_unicode_escape_string(zendlval, yytext,
yyleng, 0, T_ENCAPSED_AND_WHITESPACE TSRMLS_CC);
+ } else {
+ zend_scan_binary_escape_string(zendlval, yytext, yyleng, 0
TSRMLS_CC);
+ return T_ENCAPSED_AND_WHITESPACE;
}
- return T_ENCAPSED_AND_WHITESPACE;
}
<ST_DOUBLE_QUOTES>["] {
BEGIN(ST_IN_SCRIPTING);
- return '\"';
+ return '"';
}
@@ -2511,10 +2600,6 @@
}
-<ST_DOUBLE_QUOTES,ST_BACKQUOTE,INITIAL,ST_IN_SCRIPTING,ST_LOOKING_FOR_PROPERTY><<EOF>>
{
- return 0;
-}
-
<ST_COMMENT,ST_DOC_COMMENT><<EOF>> {
zend_error(E_COMPILE_WARNING,"Unterminated comment starting line %d",
CG(comment_start_line));
return 0;
@@ -2522,6 +2607,6 @@
-<ST_IN_SCRIPTING,INITIAL,ST_DOUBLE_QUOTES,ST_BACKQUOTE,ST_HEREDOC>{ANY_CHAR} {
+<ST_IN_SCRIPTING,ST_VAR_OFFSET>{ANY_CHAR} {
zend_error(E_COMPILE_WARNING,"Unexpected character in input: '%c'
(ASCII=%d) state=%d", yytext[0], yytext[0], YYSTATE);
}
http://cvs.php.net/viewvc.cgi/ZendEngine2/zend_vm_def.h?r1=1.167&r2=1.168&diff_format=u
Index: ZendEngine2/zend_vm_def.h
diff -u ZendEngine2/zend_vm_def.h:1.167 ZendEngine2/zend_vm_def.h:1.168
--- ZendEngine2/zend_vm_def.h:1.167 Thu May 17 17:28:12 2007
+++ ZendEngine2/zend_vm_def.h Fri May 18 13:12:47 2007
@@ -18,7 +18,7 @@
+----------------------------------------------------------------------+
*/
-/* $Id: zend_vm_def.h,v 1.167 2007/05/17 17:28:12 tony2001 Exp $ */
+/* $Id: zend_vm_def.h,v 1.168 2007/05/18 13:12:47 dmitry Exp $ */
/* If you change this file, please regenerate the zend_vm_execute.h and
* zend_vm_opcodes.h files by running:
@@ -1629,7 +1629,7 @@
Z_STRVAL_P(tmp) = emalloc(1);
Z_STRVAL_P(tmp)[0] = 0;
Z_STRLEN_P(tmp) = 0;
- Z_TYPE_P(tmp) = EX(opline)->extended_value;
+ Z_TYPE_P(tmp) = IS_STRING;
}
tmp->refcount = 1;
tmp->is_ref = 0;
@@ -1666,15 +1666,18 @@
zend_free_op free_op1, free_op2;
zval *var = GET_OP2_ZVAL_PTR(BP_VAR_R);
zval var_copy;
- int use_copy;
+ int use_copy = 0;
- if (opline->extended_value == IS_UNICODE) {
- zend_make_unicode_zval(var, &var_copy, &use_copy);
- } else {
- zend_make_string_zval(var, &var_copy, &use_copy);
- }
- if (use_copy) {
- var = &var_copy;
+ if (Z_TYPE_P(var) != opline->extended_value) {
+ if (opline->extended_value == IS_UNICODE) {
+ zend_make_unicode_zval(var, &var_copy, &use_copy);
+ } else {
+ zend_make_string_zval(var, &var_copy, &use_copy);
+ }
+
+ if (use_copy) {
+ var = &var_copy;
+ }
}
add_string_to_string(&EX_T(opline->result.u.var).tmp_var,
GET_OP1_ZVAL_PTR(BP_VAR_NA),
var);
http://cvs.php.net/viewvc.cgi/ZendEngine2/zend_vm_execute.h?r1=1.170&r2=1.171&diff_format=u
Index: ZendEngine2/zend_vm_execute.h
diff -u ZendEngine2/zend_vm_execute.h:1.170 ZendEngine2/zend_vm_execute.h:1.171
--- ZendEngine2/zend_vm_execute.h:1.170 Thu May 17 17:28:12 2007
+++ ZendEngine2/zend_vm_execute.h Fri May 18 13:12:47 2007
@@ -122,7 +122,7 @@
Z_STRVAL_P(tmp) = emalloc(1);
Z_STRVAL_P(tmp)[0] = 0;
Z_STRLEN_P(tmp) = 0;
- Z_TYPE_P(tmp) = EX(opline)->extended_value;
+ Z_TYPE_P(tmp) = IS_STRING;
}
tmp->refcount = 1;
tmp->is_ref = 0;
@@ -5832,15 +5832,18 @@
zend_free_op free_op1, free_op2;
zval *var = _get_zval_ptr_tmp(&opline->op2, EX(Ts), &free_op2
TSRMLS_CC);
zval var_copy;
- int use_copy;
+ int use_copy = 0;
- if (opline->extended_value == IS_UNICODE) {
- zend_make_unicode_zval(var, &var_copy, &use_copy);
- } else {
- zend_make_string_zval(var, &var_copy, &use_copy);
- }
- if (use_copy) {
- var = &var_copy;
+ if (Z_TYPE_P(var) != opline->extended_value) {
+ if (opline->extended_value == IS_UNICODE) {
+ zend_make_unicode_zval(var, &var_copy, &use_copy);
+ } else {
+ zend_make_string_zval(var, &var_copy, &use_copy);
+ }
+
+ if (use_copy) {
+ var = &var_copy;
+ }
}
add_string_to_string(&EX_T(opline->result.u.var).tmp_var,
_get_zval_ptr_tmp(&opline->op1, EX(Ts), &free_op1 TSRMLS_CC), var);
@@ -6280,15 +6283,18 @@
zend_free_op free_op1, free_op2;
zval *var = _get_zval_ptr_var(&opline->op2, EX(Ts), &free_op2
TSRMLS_CC);
zval var_copy;
- int use_copy;
+ int use_copy = 0;
- if (opline->extended_value == IS_UNICODE) {
- zend_make_unicode_zval(var, &var_copy, &use_copy);
- } else {
- zend_make_string_zval(var, &var_copy, &use_copy);
- }
- if (use_copy) {
- var = &var_copy;
+ if (Z_TYPE_P(var) != opline->extended_value) {
+ if (opline->extended_value == IS_UNICODE) {
+ zend_make_unicode_zval(var, &var_copy, &use_copy);
+ } else {
+ zend_make_string_zval(var, &var_copy, &use_copy);
+ }
+
+ if (use_copy) {
+ var = &var_copy;
+ }
}
add_string_to_string(&EX_T(opline->result.u.var).tmp_var,
_get_zval_ptr_tmp(&opline->op1, EX(Ts), &free_op1 TSRMLS_CC), var);
@@ -6822,15 +6828,18 @@
zend_free_op free_op1;
zval *var = _get_zval_ptr_cv(&opline->op2, EX(Ts), BP_VAR_R TSRMLS_CC);
zval var_copy;
- int use_copy;
+ int use_copy = 0;
- if (opline->extended_value == IS_UNICODE) {
- zend_make_unicode_zval(var, &var_copy, &use_copy);
- } else {
- zend_make_string_zval(var, &var_copy, &use_copy);
- }
- if (use_copy) {
- var = &var_copy;
+ if (Z_TYPE_P(var) != opline->extended_value) {
+ if (opline->extended_value == IS_UNICODE) {
+ zend_make_unicode_zval(var, &var_copy, &use_copy);
+ } else {
+ zend_make_string_zval(var, &var_copy, &use_copy);
+ }
+
+ if (use_copy) {
+ var = &var_copy;
+ }
}
add_string_to_string(&EX_T(opline->result.u.var).tmp_var,
_get_zval_ptr_tmp(&opline->op1, EX(Ts), &free_op1 TSRMLS_CC), var);
http://cvs.php.net/viewvc.cgi/php-src/ext/tokenizer/tokenizer.c?r1=1.41&r2=1.42&diff_format=u
Index: php-src/ext/tokenizer/tokenizer.c
diff -u php-src/ext/tokenizer/tokenizer.c:1.41
php-src/ext/tokenizer/tokenizer.c:1.42
--- php-src/ext/tokenizer/tokenizer.c:1.41 Sun Apr 8 00:17:40 2007
+++ php-src/ext/tokenizer/tokenizer.c Fri May 18 13:12:47 2007
@@ -16,7 +16,7 @@
+----------------------------------------------------------------------+
*/
-/* $Id: tokenizer.c,v 1.41 2007/04/08 00:17:40 johannes Exp $ */
+/* $Id: tokenizer.c,v 1.42 2007/05/18 13:12:47 dmitry Exp $ */
#ifdef HAVE_CONFIG_H
#include "config.h"
@@ -282,12 +282,15 @@
while ((token_type = lex_scan(&token TSRMLS_CC))) {
destroy = 1;
switch (token_type) {
+ case T_CLOSE_TAG:
+ if (zendtext[zendleng - 1] != '>') {
+ CG(zend_lineno)++;
+ }
case T_OPEN_TAG:
case T_OPEN_TAG_WITH_ECHO:
case T_WHITESPACE:
case T_COMMENT:
case T_DOC_COMMENT:
- case T_CLOSE_TAG:
destroy = 0;
break;
}
@@ -297,6 +300,10 @@
array_init(keyword);
add_next_index_long(keyword, token_type);
if (token_type == T_END_HEREDOC) {
+ if (CG(increment_lineno)) {
+ token_line = ++CG(zend_lineno);
+ CG(increment_lineno) = 0;
+ }
add_next_index_stringl(keyword,
Z_STRVAL(token), Z_STRLEN(token), 1);
efree(Z_STRVAL(token));
} else {
@@ -372,8 +379,6 @@
case T_VARIABLE: return "T_VARIABLE";
case T_NUM_STRING: return "T_NUM_STRING";
case T_INLINE_HTML: return "T_INLINE_HTML";
- case T_CHARACTER: return "T_CHARACTER";
- case T_BAD_CHARACTER: return "T_BAD_CHARACTER";
case T_ENCAPSED_AND_WHITESPACE: return
"T_ENCAPSED_AND_WHITESPACE";
case T_CONSTANT_ENCAPSED_STRING: return
"T_CONSTANT_ENCAPSED_STRING";
case T_ECHO: return "T_ECHO";
http://cvs.php.net/viewvc.cgi/php-src/ext/tokenizer/tests/001.phpt?r1=1.5&r2=1.6&diff_format=u
Index: php-src/ext/tokenizer/tests/001.phpt
diff -u php-src/ext/tokenizer/tests/001.phpt:1.5
php-src/ext/tokenizer/tests/001.phpt:1.6
--- php-src/ext/tokenizer/tests/001.phpt:1.5 Tue Jun 20 18:14:12 2006
+++ php-src/ext/tokenizer/tests/001.phpt Fri May 18 13:12:47 2007
@@ -57,8 +57,6 @@
echo token_name(T_VARIABLE), "\n";
echo token_name(T_NUM_STRING), "\n";
echo token_name(T_INLINE_HTML), "\n";
-echo token_name(T_CHARACTER), "\n";
-echo token_name(T_BAD_CHARACTER), "\n";
echo token_name(T_ENCAPSED_AND_WHITESPACE), "\n";
echo token_name(T_CONSTANT_ENCAPSED_STRING), "\n";
echo token_name(T_ECHO), "\n";
@@ -185,8 +183,6 @@
T_VARIABLE
T_NUM_STRING
T_INLINE_HTML
-T_CHARACTER
-T_BAD_CHARACTER
T_ENCAPSED_AND_WHITESPACE
T_CONSTANT_ENCAPSED_STRING
T_ECHO
@@ -314,8 +310,6 @@
T_VARIABLE
T_NUM_STRING
T_INLINE_HTML
-T_CHARACTER
-T_BAD_CHARACTER
T_ENCAPSED_AND_WHITESPACE
T_CONSTANT_ENCAPSED_STRING
T_ECHO
http://cvs.php.net/viewvc.cgi/php-src/ext/tokenizer/tests/bug26463.phpt?r1=1.9&r2=1.10&diff_format=u
Index: php-src/ext/tokenizer/tests/bug26463.phpt
diff -u php-src/ext/tokenizer/tests/bug26463.phpt:1.9
php-src/ext/tokenizer/tests/bug26463.phpt:1.10
--- php-src/ext/tokenizer/tests/bug26463.phpt:1.9 Sun Apr 8 00:17:40 2007
+++ php-src/ext/tokenizer/tests/bug26463.phpt Fri May 18 13:12:47 2007
@@ -15,12 +15,12 @@
?>';
var_dump(token_get_all($str));
?>
---EXPECT--
+--EXPECTF--
array(19) {
[0]=>
array(3) {
[0]=>
- int(370)
+ int(%d)
[1]=>
string(6) "<?php
"
@@ -30,7 +30,7 @@
[1]=>
array(3) {
[0]=>
- int(311)
+ int(%d)
[1]=>
string(2) "$x"
[2]=>
@@ -41,7 +41,7 @@
[3]=>
array(3) {
[0]=>
- int(374)
+ int(%d)
[1]=>
string(6) "<<<DD
"
@@ -51,7 +51,7 @@
[4]=>
array(3) {
[0]=>
- int(309)
+ int(%d)
[1]=>
string(13) "jhdsjkfhjdsh
"
@@ -61,7 +61,7 @@
[5]=>
array(3) {
[0]=>
- int(375)
+ int(%d)
[1]=>
string(2) "DD"
[2]=>
@@ -70,7 +70,7 @@
[6]=>
array(3) {
[0]=>
- int(373)
+ int(%d)
[1]=>
string(1) "
"
@@ -82,7 +82,7 @@
[8]=>
array(3) {
[0]=>
- int(317)
+ int(%d)
[1]=>
string(2) """"
[2]=>
@@ -93,7 +93,7 @@
[10]=>
array(3) {
[0]=>
- int(373)
+ int(%d)
[1]=>
string(1) "
"
@@ -103,7 +103,7 @@
[11]=>
array(3) {
[0]=>
- int(311)
+ int(%d)
[1]=>
string(2) "$a"
[2]=>
@@ -114,7 +114,7 @@
[13]=>
array(3) {
[0]=>
- int(374)
+ int(%d)
[1]=>
string(8) "<<<DDDD
"
@@ -124,7 +124,7 @@
[14]=>
array(3) {
[0]=>
- int(309)
+ int(%d)
[1]=>
string(13) "jhdsjkfhjdsh
"
@@ -134,7 +134,7 @@
[15]=>
array(3) {
[0]=>
- int(375)
+ int(%d)
[1]=>
string(4) "DDDD"
[2]=>
@@ -145,7 +145,7 @@
[17]=>
array(3) {
[0]=>
- int(373)
+ int(%d)
[1]=>
string(1) "
"
@@ -155,7 +155,7 @@
[18]=>
array(3) {
[0]=>
- int(372)
+ int(%d)
[1]=>
string(2) "?>"
[2]=>
@@ -167,7 +167,7 @@
[0]=>
array(3) {
[0]=>
- int(370)
+ int(%d)
[1]=>
string(6) "<?php
"
@@ -177,7 +177,7 @@
[1]=>
array(3) {
[0]=>
- int(311)
+ int(%d)
[1]=>
string(2) "$x"
[2]=>
@@ -188,7 +188,7 @@
[3]=>
array(3) {
[0]=>
- int(374)
+ int(%d)
[1]=>
string(6) "<<<DD
"
@@ -198,7 +198,7 @@
[4]=>
array(3) {
[0]=>
- int(309)
+ int(%d)
[1]=>
string(13) "jhdsjkfhjdsh
"
@@ -208,7 +208,7 @@
[5]=>
array(3) {
[0]=>
- int(375)
+ int(%d)
[1]=>
string(2) "DD"
[2]=>
@@ -217,7 +217,7 @@
[6]=>
array(3) {
[0]=>
- int(373)
+ int(%d)
[1]=>
string(1) "
"
@@ -229,7 +229,7 @@
[8]=>
array(3) {
[0]=>
- int(317)
+ int(%d)
[1]=>
string(2) """"
[2]=>
@@ -240,7 +240,7 @@
[10]=>
array(3) {
[0]=>
- int(373)
+ int(%d)
[1]=>
string(1) "
"
@@ -250,7 +250,7 @@
[11]=>
array(3) {
[0]=>
- int(311)
+ int(%d)
[1]=>
string(2) "$a"
[2]=>
@@ -261,7 +261,7 @@
[13]=>
array(3) {
[0]=>
- int(374)
+ int(%d)
[1]=>
string(8) "<<<DDDD
"
@@ -271,7 +271,7 @@
[14]=>
array(3) {
[0]=>
- int(309)
+ int(%d)
[1]=>
string(13) "jhdsjkfhjdsh
"
@@ -281,7 +281,7 @@
[15]=>
array(3) {
[0]=>
- int(375)
+ int(%d)
[1]=>
string(4) "DDDD"
[2]=>
@@ -292,7 +292,7 @@
[17]=>
array(3) {
[0]=>
- int(373)
+ int(%d)
[1]=>
string(1) "
"
@@ -302,7 +302,7 @@
[18]=>
array(3) {
[0]=>
- int(372)
+ int(%d)
[1]=>
string(2) "?>"
[2]=>
--
PHP CVS Mailing List (http://www.php.net/)
To unsubscribe, visit: http://www.php.net/unsub.php