On 8/21/07, Michael Wallner <[EMAIL PROTECTED]> wrote:
> Hannes Magnusson wrote:
>
> > PhD is the future. PhD is the promised land! :P
> >
> >
> > The source can be found in the `phd` cvs module on cvs.php.net.
> > Currently you need to patch PHP to be able to run it (see:
> > http://news.php.net/php.internals/31583) but that requirement will (of
> > course) be removed if the patch doesn't make it into the next PHP
> > release.
>
> I wasn't too happy about that when reading this mail, but what really
> pissed me was that the second apparently needed patch (nowdocs, README)
> is not accessible, at least I cannot connect to the mentioned host.
> You (two) could have put these patches into the phd module.  Could
> someone please mail me the nowdocs patch, so my mood raises to a
> I-gotta-rebuild-my-PHP compatible level?

The in_array() patch is no longer needed and the config (generated by
the setup, which needs nowdocs) is bundled in the GSoC archive so
there is no need for it either.

The setup/ folder is not part of the GSoC work and isn't neccisery to
run the PhD-renderer.
Attached is Gwynnes patch though if you want to have a look.

-Hannes
Index: ZendEngine2/zend_compile.c
===================================================================
RCS file: /repository/ZendEngine2/zend_compile.c,v
retrieving revision 1.647.2.27.2.40
diff -u -r1.647.2.27.2.40 zend_compile.c
--- ZendEngine2/zend_compile.c	18 May 2007 13:12:03 -0000	1.647.2.27.2.40
+++ ZendEngine2/zend_compile.c	23 Jul 2007 13:13:35 -0000
@@ -4144,6 +4144,7 @@
 			retval = T_ECHO;
 			break;
 		case T_END_HEREDOC:
+		case T_END_NOWDOC:
 			efree(Z_STRVAL(zendlval->u.constant));
 			break;
 		case EOF:
Index: ZendEngine2/zend_highlight.c
===================================================================
RCS file: /repository/ZendEngine2/zend_highlight.c,v
retrieving revision 1.49.2.3.2.1
diff -u -r1.49.2.3.2.1 zend_highlight.c
--- ZendEngine2/zend_highlight.c	1 Jan 2007 09:35:46 -0000	1.49.2.3.2.1
+++ ZendEngine2/zend_highlight.c	23 Jul 2007 13:13:35 -0000
@@ -150,6 +150,7 @@
 		}
 		switch (token_type) {
 			case T_END_HEREDOC:
+			case T_END_NOWDOC:
 				zend_html_puts(token.value.str.val, token.value.str.len TSRMLS_CC);
 				break;
 			default:
@@ -172,7 +173,7 @@
 					efree(token.value.str.val);
 					break;
 			}
-		} else if (token_type == T_END_HEREDOC) {
+		} else if (token_type == T_END_HEREDOC || token_type == T_END_NOWDOC) {
 			efree(token.value.str.val);
 		}
 		token.type = 0;
@@ -211,6 +212,7 @@
 				return;
 			
 			case T_END_HEREDOC:
+			case T_END_NOWDOC:
 				zend_write(LANG_SCNG(yy_text), LANG_SCNG(yy_leng));
 				efree(token.value.str.val);
 				/* read the following character, either newline or ; */
Index: ZendEngine2/zend_language_parser.y
===================================================================
RCS file: /repository/ZendEngine2/zend_language_parser.y,v
retrieving revision 1.160.2.4.2.6
diff -u -r1.160.2.4.2.6 zend_language_parser.y
--- ZendEngine2/zend_language_parser.y	18 May 2007 18:36:04 -0000	1.160.2.4.2.6
+++ ZendEngine2/zend_language_parser.y	23 Jul 2007 13:13:35 -0000
@@ -44,6 +44,14 @@
 #endif
 
 
+static void zend_do_safely_merge_constant_strings(znode *result, znode *op1, znode *op2 TSRMLS_DC) {
+    Z_STRVAL(result->u.constant) = erealloc(Z_STRVAL(op1->u.constant), Z_STRLEN(op1->u.constant) + Z_STRLEN(op2->u.constant) + 1);
+    Z_STRLEN(result->u.constant) = Z_STRLEN(op1->u.constant) + Z_STRLEN(op2->u.constant);
+    Z_STRVAL(result->u.constant)[Z_STRLEN(result->u.constant)] = 0;
+    memcpy(Z_STRVAL(result->u.constant)+Z_STRLEN(op1->u.constant), Z_STRVAL(op2->u.constant), Z_STRLEN(op2->u.constant));
+    zval_dtor(&op2->u.constant);
+}    
+
 %}
 
 %pure_parser
@@ -139,7 +147,9 @@
 %token T_CLOSE_TAG
 %token T_WHITESPACE
 %token T_START_HEREDOC
+%token T_START_NOWDOC
 %token T_END_HEREDOC
+%token T_END_NOWDOC
 %token T_DOLLAR_OPEN_CURLY_BRACES
 %token T_CURLY_OPEN
 %token T_PAAMAYIM_NEKUDOTAYIM
@@ -676,6 +686,12 @@
 ;
 
 
+constant_string: /* pieces of a string inside a nowdoc */
+        constant_string T_CONSTANT_ENCAPSED_STRING  { zend_do_safely_merge_constant_strings(&$$, &$1, &$2 TSRMLS_CC); }
+    |   /* empty */                 { $$.op_type = IS_CONST; ZVAL_EMPTY_STRING(&$$.u.constant); }
+;
+    
+
 common_scalar:
 		T_LNUMBER 					{ $$ = $1; }
 	|	T_DNUMBER 					{ $$ = $1; }
@@ -685,6 +701,7 @@
 	|	T_CLASS_C					{ $$ = $1; }
 	|	T_METHOD_C					{ $$ = $1; }
 	|	T_FUNC_C					{ $$ = $1; }
+    |   T_START_NOWDOC constant_string T_END_NOWDOC { $$ = $2; }
 ;
 
 
Index: ZendEngine2/zend_language_scanner.l
===================================================================
RCS file: /repository/ZendEngine2/zend_language_scanner.l,v
retrieving revision 1.131.2.11.2.12
diff -u -r1.131.2.11.2.12 zend_language_scanner.l
--- ZendEngine2/zend_language_scanner.l	24 May 2007 08:56:35 -0000	1.131.2.11.2.12
+++ ZendEngine2/zend_language_scanner.l	23 Jul 2007 13:13:35 -0000
@@ -39,6 +39,9 @@
 %x ST_HEREDOC
 %x ST_START_HEREDOC
 %x ST_END_HEREDOC
+%x ST_NOWDOC
+%x ST_START_NOWDOC
+%x ST_END_NOWDOC
 %x ST_LOOKING_FOR_PROPERTY
 %x ST_LOOKING_FOR_VARNAME
 %x ST_VAR_OFFSET
@@ -790,6 +793,56 @@
 	zendlval->value.str.len = yyleng;
 #endif /* ZEND_MULTIBYTE */
 
+static int zend_scan_single_string(zval *zendlval, char *str, int len TSRMLS_DC) {
+	register char *s, *t;
+	char *end;
+
+	zendlval->value.str.val = estrndup(str, len);
+	zendlval->value.str.len = len;
+	zendlval->type = IS_STRING;
+
+	/* convert escape sequences */
+	s = t = zendlval->value.str.val;
+	end = s+zendlval->value.str.len;
+	while (s<end) {
+		if (*s=='\\') {
+			s++;
+			if (s>=end) {
+				continue;
+			}
+			switch(*s) {
+				case '\\':
+				case '\'':
+					*t++ = *s;
+					zendlval->value.str.len--;
+					break;
+				default:
+					*t++ = '\\';
+					*t++ = *s;
+					break;
+			}
+		} else {
+			*t++ = *s;
+		}
+
+		if (*s == '\n' || (*s == '\r' && (*(s+1) != '\n'))) {
+			CG(zend_lineno)++;
+		}
+		s++;
+	}
+	*t = 0;
+
+#ifdef ZEND_MULTIBYTE
+	if (SCNG(output_filter)) {
+		s = zendlval->value.str.val;
+		SCNG(output_filter)(&(zendlval->value.str.val), &(zendlval->value.str.len), s, zendlval->value.str.len TSRMLS_CC);
+		efree(s);
+	}
+#endif /* ZEND_MULTIBYTE */
+
+	return T_CONSTANT_ENCAPSED_STRING;
+}
+
 static void zend_scan_escape_string(zval *zendlval, char *str, int len, char quote_type TSRMLS_DC)
 {
 	register char *s, *t;
@@ -905,15 +958,18 @@
 ANY_CHAR (.|[\n])
 NEWLINE ("\r"|"\n"|"\r\n")
 
+%{
 /*
  * LITERAL_DOLLAR matches unescaped $ that aren't followed by a label character
  * or a { and therefore will be taken literally. The case of literal $ before
  * a variable or "${" is handled in a rule for each string type
  */
+%}
 DOUBLE_QUOTES_LITERAL_DOLLAR ("$"+([^a-zA-Z_\x7f-\xff$"\\{]|("\\"{ANY_CHAR})))
 BACKQUOTE_LITERAL_DOLLAR     ("$"+([^a-zA-Z_\x7f-\xff$`\\{]|("\\"{ANY_CHAR})))
 HEREDOC_LITERAL_DOLLAR       ("$"+([^a-zA-Z_\x7f-\xff$\n\r\\{]|("\\"[^\n\r])))
 
+%{
 /*
  * Usually, HEREDOC_NEWLINE will just function like a simple NEWLINE, but some
  * special cases need to be handled. HEREDOC_CHARS doesn't allow a line to
@@ -923,14 +979,18 @@
  * The other case is when a line contains a label, followed by ONLY
  * { or $, and/or \  Handled by ({LABEL}";"?((("{"+|"$"+)"\\"?)|"\\"))
  */
+%}
 HEREDOC_NEWLINE ((({LABEL}";"?((("{"+|"$"+)"\\"?)|"\\"))|(("{"*|"$"*)"\\"?)){NEWLINE})
 
+%{
 /*
  * This pattern is just used in the next 2 for matching { or literal $, and/or
  * \ escape sequence immediately at the beginning of a line or after a label
  */
+%}
 HEREDOC_CURLY_OR_ESCAPE_OR_DOLLAR (("{"+[^$\n\r\\{])|("{"*"\\"[^\n\r])|{HEREDOC_LITERAL_DOLLAR})
 
+%{
 /*
  * These 2 label-related patterns allow HEREDOC_CHARS to continue "regular"
  * matching after a newline that starts with either a non-label character or a
@@ -942,9 +1002,11 @@
  * follows a label. [^a-zA-Z0-9_\x7f-\xff;$\n\r\\{] is needed to prevent a label
  * character or ; from matching on a possible (real) ending label
  */
+%}
 HEREDOC_NON_LABEL ([^a-zA-Z_\x7f-\xff$\n\r\\{]|{HEREDOC_CURLY_OR_ESCAPE_OR_DOLLAR})
 HEREDOC_LABEL_NO_NEWLINE ({LABEL}([^a-zA-Z0-9_\x7f-\xff;$\n\r\\{]|(";"[^$\n\r\\{])|(";"?{HEREDOC_CURLY_OR_ESCAPE_OR_DOLLAR})))
 
+%{
 /*
  * CHARS matches everything up to a variable or "{$"
  * {'s are matched as long as they aren't followed by a $
@@ -953,6 +1015,7 @@
  * For heredocs, matching continues across/after newlines if/when it's known
  * that the next line doesn't contain a possible ending label
  */
+%}
 DOUBLE_QUOTES_CHARS ("{"*([^$"\\{]|("\\"{ANY_CHAR}))|{DOUBLE_QUOTES_LITERAL_DOLLAR})
 BACKQUOTE_CHARS     ("{"*([^$`\\{]|("\\"{ANY_CHAR}))|{BACKQUOTE_LITERAL_DOLLAR})
 HEREDOC_CHARS       ("{"*([^$\n\r\\{]|("\\"[^\n\r]))|{HEREDOC_LITERAL_DOLLAR}|({HEREDOC_NEWLINE}+({HEREDOC_NON_LABEL}|{HEREDOC_LABEL_NO_NEWLINE})))
@@ -1623,7 +1686,8 @@
 %{
 /* Make sure a label character follows "->", otherwise there is no property
  * and "->" will be taken literally
- */ %}
+ */
+%}
 <ST_DOUBLE_QUOTES,ST_HEREDOC,ST_BACKQUOTE>"$"{LABEL}"->"[a-zA-Z_\x7f-\xff] {
 	yyless(yyleng - 3);
 	yy_push_state(ST_LOOKING_FOR_PROPERTY TSRMLS_CC);
@@ -1634,7 +1698,8 @@
 
 %{
 /* A [ always designates a variable offset, regardless of what follows
- */ %}
+ */
+%}
 <ST_DOUBLE_QUOTES,ST_HEREDOC,ST_BACKQUOTE>"$"{LABEL}"[" {
 	yyless(yyleng - 1);
 	yy_push_state(ST_VAR_OFFSET TSRMLS_CC);
@@ -1787,7 +1852,8 @@
 
 %{
 /* ("{"*|"$"*) handles { or $ at the end of a string (or the entire contents)
- */ %}
+ */
+%}
 <ST_IN_SCRIPTING>(b?["]{DOUBLE_QUOTES_CHARS}*("{"*|"$"*)["]) {
 	int bprefix = (yytext[0] != '"') ? 1 : 0;
 
@@ -1797,56 +1863,114 @@
 
 
 <ST_IN_SCRIPTING>(b?[']([^'\\]|("\\"{ANY_CHAR}))*[']) {
-	register char *s, *t;
-	char *end;
 	int bprefix = (yytext[0] != '\'') ? 1 : 0;
 
-	zendlval->value.str.val = estrndup(yytext+bprefix+1, yyleng-bprefix-2);
-	zendlval->value.str.len = yyleng-bprefix-2;
-	zendlval->type = IS_STRING;
-
-	/* convert escape sequences */
-	s = t = zendlval->value.str.val;
-	end = s+zendlval->value.str.len;
-	while (s<end) {
-		if (*s=='\\') {
-			s++;
-			if (s>=end) {
-				continue;
-			}
-			switch(*s) {
-				case '\\':
-				case '\'':
-					*t++ = *s;
-					zendlval->value.str.len--;
-					break;
-				default:
-					*t++ = '\\';
-					*t++ = *s;
-					break;
-			}
-		} else {
-			*t++ = *s;
-		}
+    return zend_scan_single_string(zendlval, yytext+bprefix+1, yyleng-bprefix-2 TSRMLS_CC);
+}
 
-		if (*s == '\n' || (*s == '\r' && (*(s+1) != '\n'))) {
-			CG(zend_lineno)++;
-		}
+%{
+/* BEGIN nowdoc */
+%}
+<ST_IN_SCRIPTING>b?"<<<~"{TABS_AND_SPACES}{LABEL}{NEWLINE} {
+	int bprefix = (yytext[0] != '<') ? 1 : 0;
+	char *s;
+	CG(zend_lineno)++;
+	CG(heredoc_len) = yyleng-bprefix-4-1-(yytext[yyleng-2]=='\r'?1:0);
+	s = yytext+bprefix+4;
+	while ((*s == ' ') || (*s == '\t')) {
 		s++;
+		CG(heredoc_len)--;
 	}
-	*t = 0;
+	CG(heredoc) = estrndup(s, CG(heredoc_len));
+	BEGIN(ST_START_NOWDOC);
+	return T_START_NOWDOC;
+}
 
-#ifdef ZEND_MULTIBYTE
-	if (SCNG(output_filter)) {
-		s = zendlval->value.str.val;
-		SCNG(output_filter)(&(zendlval->value.str.val), &(zendlval->value.str.len), s, zendlval->value.str.len TSRMLS_CC);
-		efree(s);
+<ST_START_NOWDOC>{ANY_CHAR} {
+	yyless(0);
+	BEGIN(ST_NOWDOC);
+}
+
+<ST_START_NOWDOC>{LABEL}";"?[\n\r] {
+	int label_len = yyleng - 1;
+
+	if (yytext[label_len-1]==';') {
+		label_len--;
 	}
-#endif /* ZEND_MULTIBYTE */
 
-	return T_CONSTANT_ENCAPSED_STRING;
+	yyless(label_len);
+
+	if (label_len==CG(heredoc_len) && !memcmp(yytext, CG(heredoc), label_len)) {
+		Z_STRVAL_P(zendlval) = CG(heredoc);
+		Z_STRLEN_P(zendlval) = label_len;
+		CG(heredoc)=NULL;
+		CG(heredoc_len)=0;
+		BEGIN(ST_IN_SCRIPTING);
+		return T_END_NOWDOC;
+	} else {
+		yymore();
+		BEGIN(ST_NOWDOC);
+	}
+}
+
+<ST_NOWDOC>"${"|"$"{LABEL}|"$"{LABEL}"->"[a-zA-Z_\x7f-\xff]|"$"{LABEL}"["|{HEREDOC_CHARS}*({HEREDOC_NEWLINE}+({LABEL}";"?)?)?|"{$" {
+	return zend_scan_single_string(zendlval, yytext, yyleng TSRMLS_CC);
+}
+
+<ST_NOWDOC>{HEREDOC_CHARS}*({HEREDOC_NEWLINE}+({LABEL}";"?)?)?("{"{2,}|"$"{2,}) {
+	return zend_scan_single_string(zendlval, yytext, yyleng TSRMLS_CC);
+}
+
+<ST_NOWDOC>{HEREDOC_CHARS}*{HEREDOC_NEWLINE}+{LABEL}";"?[\n\r] {
+	char *end = yytext + yyleng - 1;
+
+	if (end[-1] == ';') {
+		end--;
+		yyleng--;
+	}
+
+	if (yyleng > CG(heredoc_len) && !memcmp(end - CG(heredoc_len), CG(heredoc), CG(heredoc_len))) {
+		int len = yyleng - CG(heredoc_len) - 2; /* 2 for newline before and after label */
+
+		if (len > 0 && yytext[len - 1] == '\r' && yytext[len] == '\n') {
+			len--;
+		}
+
+		/* Go back before last label char, to match in ST_END_NOWDOC state */
+		yyless(yyleng - 2);
+
+		/* Subtract the remaining label length. yyleng must include newline
+		 * before label, for zend_highlight/strip, tokenizer, etc. */
+		yyleng -= CG(heredoc_len) - 1;
+
+		CG(increment_lineno) = 1; /* For newline before label */
+		BEGIN(ST_END_NOWDOC);
+
+    	return zend_scan_single_string(zendlval, yytext, len TSRMLS_CC);
+	} else {
+		/* Go back to end of label, so the next match works correctly in case of
+		 * a variable or another label at the beginning of the next line */
+		yyless(yyleng - 1);
+		yymore();
+	}
+}
+
+<ST_END_NOWDOC>{ANY_CHAR} {
+	Z_STRVAL_P(zendlval) = CG(heredoc);
+	Z_STRLEN_P(zendlval) = CG(heredoc_len);
+	yytext = Z_STRVAL_P(zendlval);
+	yyleng = Z_STRLEN_P(zendlval);
+	CG(heredoc) = NULL;
+	CG(heredoc_len) = 0;
+	BEGIN(ST_IN_SCRIPTING);
+	return T_END_NOWDOC;
 }
 
+%{
+/* END nowdoc */
+%}
+
+
 
 <ST_IN_SCRIPTING>b?["] {
 	BEGIN(ST_DOUBLE_QUOTES);
@@ -1910,7 +2034,8 @@
  *
  * {HEREDOC_NEWLINE}+ handles the case of more than one newline sequence that
  * couldn't be matched with HEREDOC_CHARS, because of the following label
- */ %}
+ */
+%}
 <ST_HEREDOC>{HEREDOC_CHARS}*{HEREDOC_NEWLINE}+{LABEL}";"?[\n\r] {
 	char *end = yytext + yyleng - 1;
 
@@ -1976,7 +2101,8 @@
  *
  * Same for backquotes and heredocs, except the second case doesn't apply to
  * heredocs. yyless(yyleng - 1) is used to correct taking one character too many
- */ %}
+ */
+%}
 <ST_DOUBLE_QUOTES>{DOUBLE_QUOTES_CHARS}*("{"{2,}|"$"{2,}|(("{"+|"$"+)["])) {
 	yyless(yyleng - 1);
 	zend_scan_escape_string(zendlval, yytext, yyleng, '"' TSRMLS_CC);
@@ -2003,7 +2129,8 @@
  *
  * This doesn't affect real ending labels, as they are followed by a newline,
  * which will result in a longer match for the correct rule if present
- */ %}
+ */
+%}
 <ST_HEREDOC>{HEREDOC_CHARS}*({HEREDOC_NEWLINE}+({LABEL}";"?)?)? {
 	zend_scan_escape_string(zendlval, yytext, yyleng, 0 TSRMLS_CC);
 	return T_ENCAPSED_AND_WHITESPACE;

Reply via email to