Hey,

I think this patch makes the scanner much more complicated to understand. I have an idea of a patch which would make it much cleaner although under very certain cases might be a tad bit less optimized when it comes to the amount of tokens but it'd save all of the yyless() and push_stacks which are also not the fastest.
I suggest changing the LABEL_OR_WHITESPACE (the name you gave it isn't too good so I'd change that too :)
Change it to something like:
LABEL_OR_WHITESPACE ([a-zA-Z0-9_\x7f-\xff \t\n\r #'.:;,()|^&+/*=%!~<?@] | ">-" | "-"[^>])+
(I removed the - and > and added possibilities of mixing them in all ways except for ->)
This is a very small change and much much cleaner.
The only case which wouldn't be optimized is if you have ->foo in your encapsed strings which doesn't happen too often and the speed difference would be negligible and we'd get 99% gain and a much cleaner scanner without rescanning input which is also slower (yyless()) and less state pushing.
Try it out and let me know how the results are. Also *please* send diffs also as attachments so that when people apply them we won't get bad whitespace in our sources.

Thanks!
Andi

At 03:23 PM 11/15/2002 -0500, George Schlossnagle wrote:
George Schlossnagle wrote:


I'm a tool. I sent the wrong patch to the list. Thanks to Andrei for pointing it out. Here is the _right_ patch (finally).


diff -u -3 -r1.53 zend_language_scanner.l
--- zend_language_scanner.l 8 Nov 2002 13:40:54 -0000 1.53
+++ zend_language_scanner.l 15 Nov 2002 20:20:33 -0000
@@ -37,6 +37,7 @@
%x ST_BACKQUOTE
%x ST_HEREDOC
%x ST_LOOKING_FOR_PROPERTY
+%x ST_EXPECTING_OBJECT
%x ST_LOOKING_FOR_VARNAME
%x ST_COMMENT
%x ST_ONE_LINE_COMMENT
@@ -692,6 +693,7 @@
HNUM "0x"[0-9a-fA-F]+
LABEL [a-zA-Z_\x7f-\xff][a-zA-Z0-9_\x7f-\xff]*
WHITESPACE [ \n\r\t]+
+LABEL_OR_WHITESPACE [a-zA-Z0-9_\x7f-\xff \t\n\r #'.:;,()|^&+-/*=%!~<>?@]+
TABS_AND_SPACES [ \t]*
TOKENS [;:,.\[\]()|^&+-/*=%!~$<>?@]
ENCAPSED_TOKENS [\[\]{}$]
@@ -823,13 +825,25 @@
return T_EXTENDS;
}
-<ST_IN_SCRIPTING,ST_DOUBLE_QUOTES,ST_BACKQUOTE,ST_HEREDOC>"->" {
+<ST_DOUBLE_QUOTES,ST_BACKQUOTE,ST_HEREDOC>"$"{LABEL}"->"{LABEL} {
+ yy_push_state(ST_EXPECTING_OBJECT TSRMLS_CC);
+ yyless(0);
+}
+
+
+<ST_IN_SCRIPTING,ST_EXPECTING_OBJECT>"->" {
yy_push_state(ST_LOOKING_FOR_PROPERTY TSRMLS_CC);
return T_OBJECT_OPERATOR;
}
<ST_LOOKING_FOR_PROPERTY>{LABEL} {
- yy_pop_state(TSRMLS_C);
+ if(yy_top_state(TSRMLS_C) == ST_EXPECTING_OBJECT) {
+ yy_pop_state(TSRMLS_C);
+ yy_pop_state(TSRMLS_C);
+ }
+ else {
+ yy_pop_state(TSRMLS_C);
+ }
zend_copy_value(zendlval, yytext, yyleng);
zendlval->value.str.len = yyleng;
zendlval->type = IS_STRING;
@@ -1265,7 +1279,7 @@
return T_INLINE_HTML;
}
-<ST_IN_SCRIPTING,ST_DOUBLE_QUOTES,ST_HEREDOC,ST_BACKQUOTE>"$"{LABEL} {
+<ST_EXPECTING_OBJECT,ST_IN_SCRIPTING,ST_DOUBLE_QUOTES,ST_HEREDOC,ST_BACKQUOTE>"$"{LABEL} {
zend_copy_value(zendlval, (yytext+1), (yyleng-1));
zendlval->type = IS_STRING;
return T_VARIABLE;
@@ -1278,13 +1292,26 @@
return T_STRING;
}
-
-<ST_DOUBLE_QUOTES,ST_BACKQUOTE,ST_HEREDOC>{LABEL} {
+<ST_DOUBLE_QUOTES,ST_BACKQUOTE>{LABEL_OR_WHITESPACE} {
+ HANDLE_NEWLINES(yytext, yyleng);
zend_copy_value(zendlval, yytext, yyleng);
zendlval->type = IS_STRING;
return T_STRING;
}
+<ST_HEREDOC>{LABEL} {
+ zend_copy_value(zendlval, yytext, yyleng);
+ zendlval->type = IS_STRING;
+ return T_STRING;
+}
+
+<ST_HEREDOC>{ESCAPED_AND_WHITESPACE} {
+ HANDLE_NEWLINES(yytext, yyleng);
+ zendlval->value.str.val = (char *) estrndup(yytext, yyleng);
+ zendlval->value.str.len = yyleng;
+ zendlval->type = IS_STRING;
+ return T_ENCAPSED_AND_WHITESPACE;
+}
<ST_IN_SCRIPTING>{WHITESPACE} {
zendlval->value.str.val = yytext; /* no copying - intentional */
@@ -1581,14 +1608,6 @@
}
}
-
-<ST_DOUBLE_QUOTES,ST_BACKQUOTE,ST_HEREDOC>{ESCAPED_AND_WHITESPACE} {
- HANDLE_NEWLINES(yytext, yyleng);
- zendlval->value.str.val = (char *) estrndup(yytext, yyleng);
- zendlval->value.str.len = yyleng;
- zendlval->type = IS_STRING;
- return T_ENCAPSED_AND_WHITESPACE;
-}
<ST_SINGLE_QUOTE>([^'\\]|\\[^'\\])+ {
HANDLE_NEWLINES(yytext, yyleng);


--
PHP Development Mailing List <http://www.php.net/>
To unsubscribe, visit: http://www.php.net/unsub.php

Reply via email to