See attached patch (which I haven't pushed).
This "fixes" the feature request
https://savannah.gnu.org/bugs/index.php?42501
From 6b144b8c4d8199310ee07094ef70e7f597891bc5 Mon Sep 17 00:00:00 2001
From: James Youngman <[email protected]>
Date: Sun, 2 Jun 2024 10:34:59 +0100
Subject: [PATCH] Add -E option, selecting POSIX Extended regex syntax.
To: [email protected]
This option is also present in FreeBSD and Mac OS X.
This fixes savannah bug (really feature request) #42501.
* find/util.c(process_leading_options): implement the -E option.
* find/ftsfind.c(main): mention -E in a comment.
* find/defs.h(struct options): Likewise.
* find/testsuite/find.gnu/regexE.exp: new test for -E option.
* find/testsuite/find.gnu/regexE.xo: expected output for this.
* find/testsuite/Makefile.am: add the new test.
* doc/find.texi: Point out that the new option -E selects POSIX Extended
Regular Expression syntax.
(Invoking find): add -E.
(Regular Expressions): describe -E and explain how to choose between
it and -regextype posix-extended.
* find/find.1(SYNOPSIS): mention -E.
(OPTIONS): Document -E.
(COMPATIBILITY): Add -E, mention that FreeBSD and Mac OS X also support it.
* NEWS: mention this enhancement.
---
NEWS | 4 ++++
doc/find.texi | 37 ++++++++++++++++++++++--------
find/defs.h | 3 ++-
find/find.1 | 19 ++++++++++++---
find/ftsfind.c | 2 +-
find/testsuite/Makefile.am | 2 ++
find/testsuite/find.gnu/regexE.exp | 5 ++++
find/testsuite/find.gnu/regexE.xo | 4 ++++
find/util.c | 7 ++++++
9 files changed, 68 insertions(+), 15 deletions(-)
create mode 100644 find/testsuite/find.gnu/regexE.exp
create mode 100644 find/testsuite/find.gnu/regexE.xo
diff --git a/NEWS b/NEWS
index e2a8e067..2b5f48c4 100644
--- a/NEWS
+++ b/NEWS
@@ -2,6 +2,10 @@ GNU findutils NEWS - User visible changes. -*- outline -*- (allout)
* Noteworthy changes in release ?.? (????-??-??) [?]
+** Functional Enhancements to find
+
+ The leading -E option is a synonym for -regextype posix-extended.
+ This should improve compatibility with FreeBSD and Mac OS X.
* Noteworthy changes in release 4.10.0 (2024-06-01) [stable]
diff --git a/doc/find.texi b/doc/find.texi
index b3eff2d0..90a0555a 100644
--- a/doc/find.texi
+++ b/doc/find.texi
@@ -576,8 +576,9 @@ so regular expressions which only match something that ends in slash
will always fail.
There are several varieties of regular expressions; by default this
-test uses GNU Emacs regular expressions, but this can be changed with
-the option @samp{-regextype}.
+test uses GNU Emacs regular expressions. This can be changed with
+the @samp{-regextype} option (which can select any supported syntax)
+or the @samp{-E} option (which selects POSIX Extended syntax).
@end deffn
@deffn Option -regextype name
@@ -3394,18 +3395,24 @@ discussed in this manual.
@section Invoking @code{find}
@example
-find @r{[-H] [-L] [-P] [-D @var{debugoptions}] [-O@var{level}]} @r{[}@var{file}@dots{}@r{]} @r{[}@var{expression}@r{]}
+find @r{[-H] [-E] [-L] [-P] [-D @var{debugoptions}] [-O@var{level}]} @r{[}@var{file}@dots{}@r{]} @r{[}@var{expression}@r{]}
@end example
@code{find} searches the directory tree rooted at each file name
@var{file} by evaluating the @var{expression} on each file it finds in
the tree.
-The command line may begin with the @samp{-H}, @samp{-L}, @samp{-P},
+The command line may begin with the @samp{-H}, @samp{-E}, @samp{-L}, @samp{-P},
@samp{-D} and @samp{-O} options. These are followed by a list of
files or directories that should be searched. If no files to search
are specified, the current directory (@file{.}) is used.
+The @samp{-E} option makes @code{find} use the POSIX Extended regular
+expression syntax instead of its normal default. You can also do this
+with @samp{-regextype posix-extended}. @ref{Regular Expressions} for
+an explanation of how to choose between these two ways to select
+POSIX Extended regular expression syntax.
+
This list of files to search is followed by a list of expressions
describing the files we wish to search for. The first part of the
expression is recognised by the fact that it begins with @samp{-}
@@ -4178,12 +4185,22 @@ expression used by @code{find} and @code{locate} is almost identical to
that used in GNU Emacs. The single difference is that in @code{find}
and @code{locate}, a @samp{.} will match a newline character.
-Both @code{find} and @code{locate} provide an option which allows
-selecting an alternative regular expression syntax; for @code{find}
-this is the @samp{-regextype} option, and for @code{locate} this is
-the @samp{--regextype} option.
-
-These options take a single argument, which indicates the specific
+Both @code{find} and @code{locate} provide options which allow
+selecting an alternative regular expression syntax. For @code{find}
+the @samp{-regextype} option and @samp{-E} options do this.
+For @code{locate} this is the @samp{--regextype} option.
+
+The @samp{-E} option is a relatively new feature for @code{find}. If
+you want to maintain compatibility with older versions of GNU
+Findutils, you should prefer to use @samp{-regextype posix-extended}.
+If on the other hand you want to ensure compatibility with Mac OS,
+FreeBSD and recent versions of GNU @code{find}, you should prefer
+@samp{-E}. If you want to ensure compatibility with POSIX-compliant
+versions of @code{find} other than those already mentioned, you should
+avoid the use of @samp{-regex} entirely.
+
+The @samp{-regextype} options of @code{find} and @code{locate}
+take a single argument, which indicates the specific
regular expression syntax and behaviour that should be used. This
should be one of the following:
diff --git a/find/defs.h b/find/defs.h
index 5781fb6c..db3f899d 100644
--- a/find/defs.h
+++ b/find/defs.h
@@ -604,7 +604,8 @@ struct options
/* The variety of regular expression that we support.
* The default is POSIX Basic Regular Expressions, but this
- * can be changed with the positional option, -regextype.
+ * can be changed with option -E and the positional option,
+ * -regextype.
*/
int regex_options;
diff --git a/find/find.1 b/find/find.1
index 319aa63e..a0be5c95 100644
--- a/find/find.1
+++ b/find/find.1
@@ -4,7 +4,7 @@
find \- search for files in a directory hierarchy
.SH SYNOPSIS
.B find
-[\-H] [\-L] [\-P] [\-D debugopts] [\-Olevel] [starting-point...\&] [expression]
+[\-H] [\-E] [\-L] [\-P] [\-D debugopts] [\-Olevel] [starting-point...\&] [expression]
.
.SH DESCRIPTION
This manual page
@@ -55,9 +55,10 @@ instead, anyway).
This manual page talks about `options' within the expression list.
These options control the behaviour of
.B find
-but are specified immediately after the last path name. The five
+but are specified immediately after the last path name. The six
`real' options
.BR \-H ,
+.BR \-E ,
.BR \-L ,
.BR \-P ,
.B \-D
@@ -83,7 +84,18 @@ Alternatively, it is generally safe though non-portable to use the GNU option
.B \-files0\-from
to pass arbitrary starting points to
.BR find .
-
+.IP \-E
+Regular expressions used by
+.B find
+(for example the
+.B \-regex
+and
+.B \-iregex
+tests) should be interpreted as using the POSIX Extended regular expression syntax.
+The Texinfo documentation (see
+.B SEE
+.BR ALSO )
+explains the details of this syntax.
.IP \-P
Never follow symbolic links. This is the default behaviour. When
.B find
@@ -2683,6 +2695,7 @@ Feature Added in Also occurs in
\-files0\-from 4.9.0
\-newerXY 4.3.3 BSD
\-D 4.3.1
+\-E 4.11.0 FreeBSD, Mac OS X
\-O 4.3.1
\-readable 4.3.0
\-writable 4.3.0
diff --git a/find/ftsfind.c b/find/ftsfind.c
index 212b4bb5..cb3bccfa 100644
--- a/find/ftsfind.c
+++ b/find/ftsfind.c
@@ -770,7 +770,7 @@ main (int argc, char **argv)
error (EXIT_FAILURE, errno, _("The atexit library function failed"));
/* Check for -P, -H or -L options. Also -D and -O, which are
- * both GNU extensions.
+ * both GNU extensions, and -E which FreeBSD and Mac OS support.
*/
end_of_leading_options = process_leading_options (argc, argv);
diff --git a/find/testsuite/Makefile.am b/find/testsuite/Makefile.am
index ed2962a2..3cb2ee83 100644
--- a/find/testsuite/Makefile.am
+++ b/find/testsuite/Makefile.am
@@ -78,6 +78,7 @@ find.gnu/print0.xo \
find.gnu/prune-default-print.xo \
find.gnu/regex1.xo \
find.gnu/regex2.xo \
+find.gnu/regexE.xo \
find.gnu/samefile-copy.xo \
find.gnu/samefile-link.xo \
find.gnu/samefile-p-brokenlink.xo \
@@ -186,6 +187,7 @@ find.gnu/printf-reserved.exp \
find.gnu/prune-default-print.exp \
find.gnu/regex1.exp \
find.gnu/regex2.exp \
+find.gnu/regexE.exp \
find.gnu/samefile-copy.exp \
find.gnu/samefile-link.exp \
find.gnu/samefile-missing.exp \
diff --git a/find/testsuite/find.gnu/regexE.exp b/find/testsuite/find.gnu/regexE.exp
new file mode 100644
index 00000000..8942e1aa
--- /dev/null
+++ b/find/testsuite/find.gnu/regexE.exp
@@ -0,0 +1,5 @@
+# test for -E
+exec rm -rf tmp
+exec mkdir tmp tmp/d tmp/d/d tmp/d/d/d tmp/d/d/d/e
+find_start p {-E tmp -regex {tmp(/d)*} -print }
+exec rm -rf tmp
diff --git a/find/testsuite/find.gnu/regexE.xo b/find/testsuite/find.gnu/regexE.xo
new file mode 100644
index 00000000..c4259bb0
--- /dev/null
+++ b/find/testsuite/find.gnu/regexE.xo
@@ -0,0 +1,4 @@
+tmp
+tmp/d
+tmp/d/d
+tmp/d/d/d
diff --git a/find/util.c b/find/util.c
index a1736807..379ab2b6 100644
--- a/find/util.c
+++ b/find/util.c
@@ -917,6 +917,13 @@ process_leading_options (int argc, char *argv[])
/* Meaning: dereference symbolic links on command line, but nowhere else. */
set_follow_state (SYMLINK_DEREF_ARGSONLY);
}
+ else if (0 == strcmp ("-E", argv[i]))
+ {
+ /* Meaning: same as -regextype posix-extended.
+ * For compatibility with FreeBSD and Mac OS X.
+ */
+ options.regex_options = RE_SYNTAX_POSIX_EXTENDED;
+ }
else if (0 == strcmp ("-L", argv[i]))
{
/* Meaning: dereference all symbolic links. */
--
2.39.2