[issue1720390] Remove backslash escapes from tokenize.c.

Ron Adam Thu, 15 Nov 2007 16:36:17 -0800

Ron Adam added the comment:

It looks like the disabling of \u and \U in raw strings is done.  Does
tokenize.py need to be fixed, to match?


While working on this I was able to clean up the string parsing parts of
tokenize.c, and have a separate patch with just that.

And an updated patch with both the cleaned up tokenize.c and the no
escapes in raw strings in case it is desired after all.

Added file: http://bugs.python.org/file8762/tokenize_cleanup_patch.diff

_____________________________________
Tracker <[EMAIL PROTECTED]>
<http://bugs.python.org/issue1720390>
_____________________________________

Index: Parser/tokenizer.c
===================================================================
--- Parser/tokenizer.c	(revision 58951)
+++ Parser/tokenizer.c	(working copy)
@@ -1254,23 +1254,17 @@
 	/* Identifier (most frequent token!) */
 	nonascii = 0;
 	if (is_potential_identifier_start(c)) {
-		/* Process r"", u"" and ur"" */
-		switch (c) {
-		case 'r':
-		case 'R':
+		/* Process b"", r"" and br"" */
+		if (c == 'b' || c == 'B') {
 			c = tok_nextc(tok);
 			if (c == '"' || c == '\'')
 				goto letter_quote;
-			break;
-		case 'b':
-		case 'B':
+		}
+		if (c == 'r' || c == 'R') {
 			c = tok_nextc(tok);
-			if (c == 'r' || c == 'R')
-				c = tok_nextc(tok);
 			if (c == '"' || c == '\'')
 				goto letter_quote;
-			break;
-		}
+	    }
 		while (is_potential_identifier_char(c)) {
 			if (c >= 128)
 				nonascii = 1;
@@ -1417,59 +1411,51 @@
 		*p_end = tok->cur;
 		return NUMBER;
 	}
-
+   
   letter_quote:
 	/* String */
 	if (c == '\'' || c == '"') {
-		Py_ssize_t quote2 = tok->cur - tok->start + 1;
-		int quote = c;
-		int triple = 0;
-		int tripcount = 0;
-		for (;;) {
-			c = tok_nextc(tok);
-			if (c == '\n') {
-				if (!triple) {
-					tok->done = E_EOLS;
-					tok_backup(tok, c);
-					return ERRORTOKEN;
-				}
-				tripcount = 0;
-                                tok->cont_line = 1; /* multiline string. */
-			}
-			else if (c == EOF) {
-				if (triple)
-					tok->done = E_EOFS;
-				else
-					tok->done = E_EOLS;
-				tok->cur = tok->inp;
-				return ERRORTOKEN;
-			}
-			else if (c == quote) {
-				tripcount++;
-				if (tok->cur - tok->start == quote2) {
-					c = tok_nextc(tok);
-					if (c == quote) {
-						triple = 1;
-						tripcount = 0;
-						continue;
-					}
-					tok_backup(tok, c);
-				}
-				if (!triple || tripcount == 3)
-					break;
-			}
-			else if (c == '\\') {
-				tripcount = 0;
-				c = tok_nextc(tok);
-				if (c == EOF) {
-					tok->done = E_EOLS;
-					tok->cur = tok->inp;
-					return ERRORTOKEN;
-				}
-			}
+ 		int quote = c;
+		int quote_size = 1;             /* 1 or 3 */
+		int end_quote_size = 0;
+ 
+		/* Find the quote size and start of string */
+		c = tok_nextc(tok);
+		if (c == quote) {
+ 			c = tok_nextc(tok);
+			if (c == quote)
+				quote_size = 3;
 			else
-				tripcount = 0;
+				end_quote_size = 1;     /* empty string found */
 		}
+		if (c != quote)
+		    tok_backup(tok, c);
+
+		/* Get rest of string */
+		while (end_quote_size != quote_size) {
+ 			c = tok_nextc(tok);
+  			if (c == EOF) {
+				if (quote_size == 3)
+ 					tok->done = E_EOFS;
+ 				else
+ 					tok->done = E_EOLS;
+ 				tok->cur = tok->inp;
+ 				return ERRORTOKEN;
+ 			}
+ 			if (quote_size == 1 && c == '\n') {
+ 			    tok->done = E_EOLS;
+ 			    tok->cur = tok->inp;
+ 			    return ERRORTOKEN;
+ 			}
+ 			if (c == quote)
+ 			    end_quote_size += 1;
+ 			else {
+ 			    end_quote_size = 0;
+ 			    if (c == '\\')
+ 			        c = tok_nextc(tok);  /* skip escaped char */
+ 			}
+ 		}
+		
 		*p_start = tok->start;
 		*p_end = tok->cur;
 		return STRING;

_______________________________________________
Python-bugs-list mailing list 
Unsubscribe: 
http://mail.python.org/mailman/options/python-bugs-list/archive%40mail-archive.com

[issue1720390] Remove backslash escapes from tokenize.c.

Reply via email to