From fa34bb08f50f4611d08865d00e00d46e63b77e06 Mon Sep 17 00:00:00 2001
From: Reuben Thomas <rrt@sc3d.org>
Date: Mon, 13 Sep 2010 20:13:47 +0200
Subject: [PATCH 10/10] Add RE_PLAIN flag to match plain text patterns.

---
 lib/regcomp.c |  139 +++++++++++++++++++++++++++++----------------------------
 lib/regex.h   |    4 ++
 2 files changed, 75 insertions(+), 68 deletions(-)

diff --git a/lib/regcomp.c b/lib/regcomp.c
index 7eff569..d15c179 100644
--- a/lib/regcomp.c
+++ b/lib/regcomp.c
@@ -1793,7 +1793,7 @@ peek_token (re_token_t *token, re_string_t *input, reg_syntax_t syntax)
       return 1;
     }
 #endif
-  if (c == '\\')
+  if (!(syntax & RE_PLAIN) && c == '\\')
     {
       unsigned char c2;
       if (re_string_cur_idx (input) + 1 >= re_string_length (input))
@@ -1929,76 +1929,79 @@ peek_token (re_token_t *token, re_string_t *input, reg_syntax_t syntax)
 #endif
     token->word_char = IS_WORD_CHAR (token->opr.c);
 
-  switch (c)
+  if (!(syntax & RE_PLAIN))
     {
-    case '\n':
-      if (syntax & RE_NEWLINE_ALT)
-	token->type = OP_ALT;
-      break;
-    case '|':
-      if (!(syntax & RE_LIMITED_OPS) && (syntax & RE_NO_BK_VBAR))
-	token->type = OP_ALT;
-      break;
-    case '*':
-      token->type = OP_DUP_ASTERISK;
-      break;
-    case '+':
-      if (!(syntax & RE_LIMITED_OPS) && !(syntax & RE_BK_PLUS_QM))
-	token->type = OP_DUP_PLUS;
-      break;
-    case '?':
-      if (!(syntax & RE_LIMITED_OPS) && !(syntax & RE_BK_PLUS_QM))
-	token->type = OP_DUP_QUESTION;
-      break;
-    case '{':
-      if ((syntax & RE_INTERVALS) && (syntax & RE_NO_BK_BRACES))
-	token->type = OP_OPEN_DUP_NUM;
-      break;
-    case '}':
-      if ((syntax & RE_INTERVALS) && (syntax & RE_NO_BK_BRACES))
-	token->type = OP_CLOSE_DUP_NUM;
-      break;
-    case '(':
-      if (syntax & RE_NO_BK_PARENS)
-	token->type = OP_OPEN_SUBEXP;
-      break;
-    case ')':
-      if (syntax & RE_NO_BK_PARENS)
-	token->type = OP_CLOSE_SUBEXP;
-      break;
-    case '[':
-      token->type = OP_OPEN_BRACKET;
-      break;
-    case '.':
-      token->type = OP_PERIOD;
-      break;
-    case '^':
-      if (!(syntax & (RE_CONTEXT_INDEP_ANCHORS | RE_CARET_ANCHORS_HERE)) &&
-	  re_string_cur_idx (input) != 0)
+      switch (c)
 	{
-	  char prev = re_string_peek_byte (input, -1);
-	  if (!(syntax & RE_NEWLINE_ALT) || prev != '\n')
-	    break;
-	}
-      token->type = ANCHOR;
-      token->opr.ctx_type = LINE_FIRST;
-      break;
-    case '$':
-      if (!(syntax & RE_CONTEXT_INDEP_ANCHORS) &&
-	  re_string_cur_idx (input) + 1 != re_string_length (input))
-	{
-	  re_token_t next;
-	  re_string_skip_bytes (input, 1);
-	  peek_token (&next, input, syntax);
-	  re_string_skip_bytes (input, -1);
-	  if (next.type != OP_ALT && next.type != OP_CLOSE_SUBEXP)
-	    break;
+	case '\n':
+	  if (syntax & RE_NEWLINE_ALT)
+	    token->type = OP_ALT;
+	  break;
+	case '|':
+	  if (!(syntax & RE_LIMITED_OPS) && (syntax & RE_NO_BK_VBAR))
+	    token->type = OP_ALT;
+	  break;
+	case '*':
+	  token->type = OP_DUP_ASTERISK;
+	  break;
+	case '+':
+	  if (!(syntax & RE_LIMITED_OPS) && !(syntax & RE_BK_PLUS_QM))
+	    token->type = OP_DUP_PLUS;
+	  break;
+	case '?':
+	  if (!(syntax & RE_LIMITED_OPS) && !(syntax & RE_BK_PLUS_QM))
+	    token->type = OP_DUP_QUESTION;
+	  break;
+	case '{':
+	  if ((syntax & RE_INTERVALS) && (syntax & RE_NO_BK_BRACES))
+	    token->type = OP_OPEN_DUP_NUM;
+	  break;
+	case '}':
+	  if ((syntax & RE_INTERVALS) && (syntax & RE_NO_BK_BRACES))
+	    token->type = OP_CLOSE_DUP_NUM;
+	  break;
+	case '(':
+	  if (syntax & RE_NO_BK_PARENS)
+	    token->type = OP_OPEN_SUBEXP;
+	  break;
+	case ')':
+	  if (syntax & RE_NO_BK_PARENS)
+	    token->type = OP_CLOSE_SUBEXP;
+	  break;
+	case '[':
+	  token->type = OP_OPEN_BRACKET;
+	  break;
+	case '.':
+	  token->type = OP_PERIOD;
+	  break;
+	case '^':
+	  if (!(syntax & (RE_CONTEXT_INDEP_ANCHORS | RE_CARET_ANCHORS_HERE)) &&
+	      re_string_cur_idx (input) != 0)
+	    {
+	      char prev = re_string_peek_byte (input, -1);
+	      if (!(syntax & RE_NEWLINE_ALT) || prev != '\n')
+		break;
+	    }
+	  token->type = ANCHOR;
+	  token->opr.ctx_type = LINE_FIRST;
+	  break;
+	case '$':
+	  if (!(syntax & RE_CONTEXT_INDEP_ANCHORS) &&
+	      re_string_cur_idx (input) + 1 != re_string_length (input))
+	    {
+	      re_token_t next;
+	      re_string_skip_bytes (input, 1);
+	      peek_token (&next, input, syntax);
+	      re_string_skip_bytes (input, -1);
+	      if (next.type != OP_ALT && next.type != OP_CLOSE_SUBEXP)
+		break;
+	    }
+	  token->type = ANCHOR;
+	  token->opr.ctx_type = LINE_LAST;
+	  break;
+	default:
+	  break;
 	}
-      token->type = ANCHOR;
-      token->opr.ctx_type = LINE_LAST;
-      break;
-    default:
-      break;
     }
   return 1;
 }
diff --git a/lib/regex.h b/lib/regex.h
index 89a8143..8fab9f6 100644
--- a/lib/regex.h
+++ b/lib/regex.h
@@ -227,6 +227,10 @@ typedef unsigned long int reg_syntax_t;
    re_compile_pattern.  */
 # define RE_NO_SUB (RE_CONTEXT_INVALID_DUP << 1)
 
+/* If this bit is set, then all special characters are ignored
+   by re_compile_pattern. */
+# define RE_PLAIN (RE_NO_SUB << 1)
+
 #endif /* defined __USE_GNU_REGEX */
 
 /* This global variable defines the particular regexp syntax to use (for
-- 
1.7.0.4

