On Tue, 2 Dec 2014 08:35:32 -0800
Jim Meyering <[email protected]> wrote:
> I'm not sure I understand the above paragraph.
> Is this what you intended?
> 
>     That will cause an invalid match or an infinite loop.
>     By the way, there is no option to make grep use
>     searching mode rather than matching mode.

That will cause an invalid match or an infinite loop.
By the way, there is no option to make grep use
matching mode rather than searching mode.

> Can you provide a test case to trigger this?
> Is it easier to construct a test case using gawk?

Yes, but it uses neither grep nor gawk.  It uses an auxiliary program
writtern only to accomplish the test, as grep and gawk do *NOT* set 0 to
forth argument of dfacomp.  I added tests of failure and infloop version.

grep:dfasearch.c

  dfa = dfaalloc ();
  dfacomp (pattern, size, dfa, 1);  << not 0
  kwsmusts ();


gawk:re.c
                rp->dfa = true;
                rp->dfareg = dfaalloc();
                dfacomp(buf, len, rp->dfareg, true);  << not 0
        } else
                rp->dfa = false;

I do not change second patch.

Thanks.
From c92426984828de1029fe72b3e0c252c1d551c2cb Mon Sep 17 00:00:00 2001
From: Norihiro Tanaka <[email protected]>
Date: Tue, 2 Dec 2014 22:45:17 +0900
Subject: [PATCH 1/2] dfa: matching at next position or infinit loop in
 matching mode

After failed to match in matching mode, dfa transits initial state
when not allow newline or next state when allow new line instead of
return NULL.  That will cause next matching at position or infinit
loop.  By the way, theere is no affair for grep in order that it
always uses not matching mode but searching mode.

* src/dfa.c (dfaexec_main): After failed to match in matching mode
return NULL instead of transition to next state.
* tests/dfa-match: Add a new test.
* tests/dfa-match-aux.c: Add a new program to drive the test.
* tests/Makefile.am: Add a rule to build new test.
* NEWS (Bug fixes): Mention it.
---
 NEWS                  |  6 +++++
 src/dfa.c             | 11 +++++---
 tests/Makefile.am     |  7 ++++--
 tests/dfa-match       | 45 +++++++++++++++++++++++++++++++++
 tests/dfa-match-aux.c | 69 +++++++++++++++++++++++++++++++++++++++++++++++++++
 5 files changed, 132 insertions(+), 6 deletions(-)
 create mode 100755 tests/dfa-match
 create mode 100644 tests/dfa-match-aux.c

diff --git a/NEWS b/NEWS
index 6138c4e..8fe425c 100644
--- a/NEWS
+++ b/NEWS
@@ -2,6 +2,12 @@ GNU grep NEWS                                    -*- outline 
-*-
 
 * Noteworthy changes in release ?.? (????-??-??) [?]
 
+** Bug fixes
+
+  dfa does not cause matched at an incorrect position or an infinite
+  loop even in matching mode.  It is an internal bug so that grep always
+  use not matching mode but searching mode.
+  [bug present since "the beginning"]
 
 * Noteworthy changes in release 2.21 (2014-11-23) [stable]
 
diff --git a/src/dfa.c b/src/dfa.c
index 65862e8..4ab2b72 100644
--- a/src/dfa.c
+++ b/src/dfa.c
@@ -3498,13 +3498,16 @@ dfaexec_main (struct dfa *d, char const *begin, char 
*end,
           continue;
         }
 
-      if (p[-1] == eol && allow_nl)
+      if (p[-1] != eol || d->newlines[s1] < 0)
         {
-          s = d->newlines[s1];
-          continue;
+          p = NULL;
+          goto done;
         }
 
-      s = 0;
+      if (allow_nl)
+        s = d->newlines[s1];
+      else
+        s = 0;
     }
 
  done:
diff --git a/tests/Makefile.am b/tests/Makefile.am
index 217a731..67c165d 100644
--- a/tests/Makefile.am
+++ b/tests/Makefile.am
@@ -14,13 +14,15 @@
 # You should have received a copy of the GNU General Public License
 # along with this program.  If not, see <http://www.gnu.org/licenses/>.
 
-check_PROGRAMS = get-mb-cur-max
-AM_CPPFLAGS = -I$(top_builddir)/lib -I$(top_srcdir)/lib
+check_PROGRAMS = get-mb-cur-max dfa-match-aux
+AM_CPPFLAGS = -I$(top_builddir)/lib -I$(top_srcdir)/lib \
+  -I$(top_srcdir)/src
 AM_CFLAGS = $(WARN_CFLAGS) $(WERROR_CFLAGS)
 
 # Tell the linker to omit references to unused shared libraries.
 AM_LDFLAGS = $(IGNORE_UNUSED_LIBRARIES_CFLAGS)
 LDADD = ../lib/libgreputils.a $(LIBINTL) ../lib/libgreputils.a
+dfa_match_aux_LDADD = ../src/dfa.$(OBJEXT) $(LDADD)
 
 XFAIL_TESTS = triple-backref
 
@@ -52,6 +54,7 @@ TESTS =                                               \
   count-newline                                        \
   dfa-coverage                                 \
   dfa-heap-overrun                             \
+  dfa-match                                    \
   dfaexec-multibyte                            \
   empty                                                \
   empty-line                                   \
diff --git a/tests/dfa-match b/tests/dfa-match
new file mode 100755
index 0000000..e956532
--- /dev/null
+++ b/tests/dfa-match
@@ -0,0 +1,45 @@
+#!/bin/sh
+# This would fail for grep-2.21.
+
+# Copyright 2014 Free Software Foundation, Inc.
+
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
+
+. "${srcdir=.}/init.sh"; path_prepend_ ../src
+
+# Add "." to PATH for the use of dfa-match-aux.
+path_prepend_ .
+
+require_timeout_
+
+fail=0
+
+fail1=0
+dfa-match-aux a ba 0 > out || fail1=1
+compare /dev/null out || fail1=1
+if test $fail1 -ne 0; then
+  printf 'dfa-match test #1 failed\n'
+  fail=1
+fi
+
+fail2=0
+in=$(printf "bb\nbb")
+timeout 3 dfa-match-aux a "$in" 1 > out || fail2=1
+compare /dev/null out || fail2=1
+if test $fail2 -ne 0; then
+  printf 'dfa-match test #2 failed\n'
+  fail=1
+fi
+
+Exit $fail
diff --git a/tests/dfa-match-aux.c b/tests/dfa-match-aux.c
new file mode 100644
index 0000000..c2918e0
--- /dev/null
+++ b/tests/dfa-match-aux.c
@@ -0,0 +1,69 @@
+/* Auxiliary program to test a code which can not happen for dfa.
+   Copyright 2014 Free Software Foundation, Inc.
+
+   This program is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 3, or (at your option)
+   any later version.
+
+   This program is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program; if not, write to the Free Software
+   Foundation, Inc., 51 Franklin Street - Fifth Floor, Boston, MA
+   02110-1301, USA.  */
+
+#include <config.h>
+#include <locale.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <regex.h>
+#include <dfa.h>
+
+#include "progname.h"
+
+void
+dfaerror (char const *mesg)
+{
+  printf ("dfaerror: %s\n", mesg);
+  exit (EXIT_FAILURE);
+}
+
+void
+dfawarn (char const *mesg)
+{
+  printf ("dfawarn: %s\n", mesg);
+  exit (EXIT_FAILURE);
+}
+
+int
+main (int argc, char **argv)
+{
+  struct dfa *dfa;
+  char *beg, *end, *p;
+  int allow_nl;
+
+  if (argc < 3)
+    exit (EXIT_FAILURE);
+
+  setlocale (LC_ALL, "");
+
+  dfasyntax (RE_SYNTAX_GREP | RE_NO_EMPTY_RANGES, 0, '\n');
+  dfa = dfaalloc ();
+  dfacomp (argv[1], strlen (argv[1]), dfa, 0);
+
+  beg = argv[2];
+  end = argv[2] + strlen (argv[2]);
+  allow_nl = argc > 3 && atoi (argv[3]);
+
+  p = dfaexec (dfa, beg, end, allow_nl, NULL, NULL);
+
+  if (p != NULL)
+    printf ("%zd\n", p - beg);
+
+  exit (EXIT_SUCCESS);
+}
-- 
2.2.0

From 7578088f26ebb622884b2ab263496cc5146f23e9 Mon Sep 17 00:00:00 2001
From: Norihiro Tanaka <[email protected]>
Date: Tue, 2 Dec 2014 22:58:37 +0900
Subject: [PATCH 2/2] dfa: implification of dfaexec

* src/dfa.c (dfaexec): Simplify by rearrangement of IF conditions.  This
change does not change any logics, but minor speed-up or slowdown may be
caused due to optimization by a compiler.  This change is partial revert
for a change in bafa1341db643225c2421ecca90353e556dedae5.
---
 src/dfa.c | 46 ++++++++++++++++++----------------------------
 1 file changed, 18 insertions(+), 28 deletions(-)

diff --git a/src/dfa.c b/src/dfa.c
index 4ab2b72..41b1031 100644
--- a/src/dfa.c
+++ b/src/dfa.c
@@ -3459,13 +3459,25 @@ dfaexec_main (struct dfa *d, char const *begin, char 
*end,
             }
         }
 
-      if ((char *) p > end)
+      if (s < 0)
         {
-          p = NULL;
-          goto done;
-        }
+          if ((char *) p > end || p[-1] != eol || d->newlines[s1] < 0)
+            {
+              p = NULL;
+              goto done;
+            }
 
-      if (s >= 0 && d->fails[s])
+          /* If the previous character was a newline, count it, and skip
+             checking of multibyte character boundary until here.  */
+          nlcount++;
+          mbp = p;
+
+          if (allow_nl)
+            s = d->newlines[s1];
+          else
+            s = 0;
+        }
+      if (d->fails[s])
         {
           if (d->success[s] & sbit[*p])
             {
@@ -3479,35 +3491,13 @@ dfaexec_main (struct dfa *d, char const *begin, char 
*end,
             State_transition();
           else
             s = d->fails[s][*p++];
-          continue;
         }
-
-      /* If the previous character was a newline, count it, and skip
-         checking of multibyte character boundary until here.  */
-      if (p[-1] == eol)
-        {
-          nlcount++;
-          mbp = p;
-        }
-
-      if (s >= 0)
+      else
         {
           if (!d->trans[s])
             build_state (s, d);
           trans = d->trans;
-          continue;
         }
-
-      if (p[-1] != eol || d->newlines[s1] < 0)
-        {
-          p = NULL;
-          goto done;
-        }
-
-      if (allow_nl)
-        s = d->newlines[s1];
-      else
-        s = 0;
     }
 
  done:
-- 
2.2.0

Reply via email to