Peter Breitenlohner <[EMAIL PROTECTED]>
---------- Forwarded message ----------
Date: Mon, 7 Apr 2008 15:50:37 +0200 (CEST)
From: Peter Breitenlohner <[EMAIL PROTECTED]>
To: James Youngman <[EMAIL PROTECTED]>
Subject: findutils-4.4.0 -- feature request for databases with relative
pathnames
Hi,
you might remember mail on this same subject from Oct 2006. Somewhat later I
posted a feature request (bugzilla #19485), but so far I got no reaction.
Hereby I'd like to resubmit this request (updated to findutils-4.4.0).
Attached are 9 files:
relpath.txt: a short description
patch-02-relpath-locate: implementation in locate
This is, unfortunately, somewhat intertwined with
changes to make slocate databases working
patch-03-relpath-frcode: implementation in frcode
patch-04-relpath-updatedb: implementation in updatedb
patch-05-relpath-testcase: 2 test cases (1 for slocate)
patch-06-relpath-doc: texinfo documentation
patch-15-relpath-testcase: generated
patch-16-relpath-doc: generated
ChangeLog: proposed ChangeLog entry
=====================================
I'd appreciate comments on the whole thing. If it is acceptable in principle
I'll gladly initiate the FSF paperwork.
regards
Peter Breitenlohner <[EMAIL PROTECTED]>diff -ur -N -x find.info findutils-4.4.0.orig/locate/locate.c
findutils-4.4.0/locate/locate.c
--- findutils-4.4.0.orig/locate/locate.c 2008-03-08 22:30:51.000000000
+0100
+++ findutils-4.4.0/locate/locate.c 2008-03-25 13:43:00.000000000 +0100
@@ -332,9 +332,10 @@
struct process_data
{
int c; /* An input byte. */
- char itemcount; /* Indicates we're at the beginning of an
slocate db. */
int count; /* The length of the prefix shared with the previous database
entry. */
int len;
+ int prefix_len; /* The length of the prefix for relative paths.
*/
+ int first;
char *original_filename; /* The current input database entry. */
size_t pathsize; /* Amount allocated for it. */
char *munged_filename; /* path or basename(path) */
@@ -569,39 +570,21 @@
if (procdata->slocatedb_format)
{
- if (procdata->itemcount == 0)
- {
- ungetc(procdata->c, procdata->fp);
- procdata->count = 0;
- procdata->len = 0;
- }
- else if (procdata->itemcount == 1)
- {
- procdata->count = procdata->len-1;
- }
- else
- {
- if (procdata->c == LOCATEDB_ESCAPE)
- procdata->count += (short)get_short (procdata->fp);
- else if (procdata->c > 127)
- procdata->count += procdata->c - 256;
- else
- procdata->count += procdata->c;
- }
+ /* Manufacture initial count=0 for slocate database. */
+ ungetc (procdata->c, procdata->fp);
+ procdata->c = procdata->slocatedb_format = 0;
}
+
+ if (procdata->c == LOCATEDB_ESCAPE)
+ procdata->count += (short)get_short (procdata->fp);
+ else if (procdata->c > 127)
+ procdata->count += procdata->c - 256;
else
- {
- if (procdata->c == LOCATEDB_ESCAPE)
- procdata->count += (short)get_short (procdata->fp);
- else if (procdata->c > 127)
- procdata->count += procdata->c - 256;
- else
- procdata->count += procdata->c;
- }
+ procdata->count += procdata->c;
- if (procdata->count > procdata->len || procdata->count < 0)
+ if (procdata->count > procdata->len || procdata->count <
procdata->prefix_len)
{
- /* This should not happen generally , but since we're
+ /* This should not happen generally, but since we're
* reading in data which is outside our control, we
* cannot prevent it.
*/
@@ -613,27 +596,48 @@
nread = locate_read_str (&procdata->original_filename,
&procdata->pathsize,
procdata->fp, 0, procdata->count);
- if (nread < 0)
+ if (nread < 1)
return VISIT_ABORT;
procdata->c = getc (procdata->fp);
- procdata->len = procdata->count + nread;
- s = procdata->original_filename + procdata->len - 1; /* Move to the last
char in path. */
+ procdata->len = procdata->count + nread - 1;
+
+ if (procdata->len == 0)
+ {
+ if (procdata->first)
+ {
+ /* A database with relative pathnames.
+ * Prepend dirname(dbfile) to all stored pathnames.
+ */
+ extend (procdata, strlen (procdata->dbfile), 1u);
+ strcpy (procdata->original_filename, procdata->dbfile);
+ if (strcmp (procdata->original_filename, "<stdin>"))
+ {
+ char *p = procdata->original_filename
+ + FILE_SYSTEM_PREFIX_LEN (procdata->original_filename);
+ p[dir_len (p)] = '\0';
+ if (!strcmp (p, "") || !strcmp (p, "/"))
+ strcpy (p, ".");
+ }
+ procdata->prefix_len = procdata->len = procdata->count = strlen
(procdata->original_filename);
+ procdata->first = 0;
+ return VISIT_REJECTED; /* Ignore this entry. */
+ }
+
+ /* This should not happen generally, but since we're
+ * reading in data which is outside our control, we
+ * cannot prevent it.
+ */
+ error(1, 0, _("locate database `%s' is corrupt or invalid"),
procdata->dbfile);
+ }
+ procdata->first = 0;
+
+ s = procdata->original_filename + procdata->len; /* Move to the last char in
path. */
assert (s[0] != '\0');
assert (s[1] == '\0'); /* Our terminator. */
assert (s[2] == '\0'); /* Added by locate_read_str. */
procdata->munged_filename = procdata->original_filename;
- if (procdata->slocatedb_format)
- {
- /* Don't increment indefinitely, it might overflow. */
- if (procdata->itemcount < 6)
- {
- ++(procdata->itemcount);
- }
- }
-
-
return VISIT_CONTINUE;
}
@@ -1086,9 +1090,8 @@
oldformat = 0;
procdata.endian_state = GetwordEndianStateInitial;
- procdata.len = procdata.count = 0;
- procdata.slocatedb_format = 0;
- procdata.itemcount = 0;
+ procdata.len = procdata.count = procdata.prefix_len = 0;
+ procdata.first = 1;
procdata.dbfile = dbfile;
procdata.fp = fp;
diff -ur -N findutils-4.4.0.orig/locate/frcode.c findutils-4.4.0/locate/frcode.c
--- findutils-4.4.0.orig/locate/frcode.c 2007-09-08 13:15:03.000000000
+0200
+++ findutils-4.4.0/locate/frcode.c 2008-03-24 21:26:56.000000000 +0100
@@ -177,7 +177,7 @@
long result;
char *p;
- /* Reset errno in oreder to be able to distinguish LONG_MAX/LONG_MIN
+ /* Reset errno in order to be able to distinguish LONG_MAX/LONG_MIN
* from values whichare actually out of range
*/
errno = 0;
@@ -223,6 +223,7 @@
char *oldpath; /* The previous input entry. */
size_t pathsize, oldpathsize; /* Amounts allocated for them. */
int count, oldcount, diffcount; /* Their prefix lengths & the difference. */
+ int prefix_count;
int line_len; /* Length of input line. */
int delimiter = '\n';
int optc;
@@ -239,16 +240,19 @@
oldpath = xmalloc (oldpathsize);
oldpath[0] = 0;
- oldcount = 0;
- while ((optc = getopt_long (argc, argv, "hv0S:", longopts, (int *) 0)) != -1)
+ while ((optc = getopt_long (argc, argv, "hv0rS:", longopts, (int *) 0)) !=
-1)
switch (optc)
{
case '0':
delimiter = 0;
break;
+ case 'r':
+ strcpy (oldpath, ".");
+ break;
+
case 'S':
slocate_compat = 1;
slocate_seclevel = get_seclevel(optarg);
@@ -280,6 +284,7 @@
return 1;
}
+ prefix_count = oldcount = strlen (oldpath);
if (slocate_compat)
{
@@ -297,12 +302,21 @@
}
}
+ if (prefix_count)
+ {
+ putc ('\0', stdout);
+ putc ('\0', stdout);
+ }
while ((line_len = getdelim (&path, &pathsize, delimiter, stdin)) > 0)
{
path[line_len - 1] = '\0'; /* FIXME temporary: nuke the newline. */
count = prefix_length (oldpath, path);
+ if (count < prefix_count)
+ {
+ error(1, 0, _("Path %s is not a relative path."), path);
+ }
diffcount = count - oldcount;
if ( (diffcount > SHRT_MAX) || (diffcount < SHRT_MIN) )
{
diff -ur -N findutils-4.4.0.orig/locate/updatedb.sh
findutils-4.4.0/locate/updatedb.sh
--- findutils-4.4.0.orig/locate/updatedb.sh 2008-02-09 11:11:43.000000000
+0100
+++ findutils-4.4.0/locate/updatedb.sh 2008-03-24 21:00:56.000000000 +0100
@@ -37,7 +37,8 @@
[--localpaths='dir1 dir2...'] [--netpaths='dir1 dir2...']
[--prunepaths='dir1 dir2...'] [--prunefs='fs1 fs2...']
[--output=dbfile] [--netuser=user] [--localuser=user]
- [--old-format] [--dbformat] [--version] [--help]
+ [--old-format] [--dbformat] [--relpath=pathprefix]
+ [--version] [--help]
Report bugs to <[email protected]>."
changeto=/
@@ -60,6 +61,7 @@
--netuser) NETUSER="$val" ;;
--localuser) LOCALUSER="$val" ;;
--old-format) old=yes ;;
+ --relpath) PATHPREFIX="$val";;
--changecwd) changeto="$val" ;;
--dbformat) dbformat="$val" ;;
--version) fail=0; echo "$version" || fail=1; exit $fail ;;
@@ -151,6 +153,53 @@
# What shell shoud we use? We should use a POSIX-ish sh.
: ${SHELL="/bin/sh"}
+# If --relpath was given, check parameters for consistency
+if test -n "$PATHPREFIX" ; then
+ bad=no
+ case "$PATHPREFIX" in
+ "") $PATHPREFIX=. ;;
+ */) bad=yes ;;
+ *) test -d "$PATHPREFIX" || bad=yes ;;
+ esac
+ if test $bad = yes; then
+ echo "updatedb: invalid PATHPREFIX $PATHPREFIX
+ must be existing directory without trailing '/'" >&2
+ exit 1
+ fi
+ # We silently ignore the value given by (the undocumented) --changecwd
+ changeto="$PATHPREFIX"
+ if test "$old" = yes ; then
+ echo "updatedb: conflicting options --relpath and --old-format" >&2
+ exit 1
+ fi
+ frcode_options="$frcode_options -r"
+ if test -n "$NETPATHS" ; then
+ echo "updatedb: conflicting options --relpath and --netpaths" >&2
+ exit 1
+ fi
+ : ${LOCATE_DB=.locatedb}
+ case $LOCATE_DB in
+ */*) echo="updatedb: --output must specify a basename" >&2
+ exit 1;
+ esac
+ LOCATE_DB="$PATHPREFIX/$LOCATE_DB"
+ : ${SEARCHPATHS="."}
+ if test "$SEARCHPATHS" != . ; then
+ NEWPATHS=
+ for p in $SEARCHPATHS ; do
+ case "$p" in
+ /*) echo "updatedb: invalid searchpath $p
+ --relpath requires relative paths" >&2
+ exit 1;;
+ ./*) NEWPATHS="$NEWPATHS $p" ;;
+ *) NEWPATHS="$NEWPATHS ./$p" ;;
+ esac
+ done
+ SEARCHPATHS="$NEWPATHS"
+ fi
+ : ${PRUNEPATHS=}
+fi
+
# Non-network directories to put in the database.
: ${SEARCHPATHS="/"}
@@ -165,7 +214,7 @@
# constructs.
for p in $PRUNEPATHS; do
case "$p" in
- /*/) echo "$0: $p: pruned paths should not contain trailing slashes"
>&2
+ */) echo "$0: $p: pruned paths should not contain trailing slashes"
>&2
exit 1
esac
done
diff -ur -N findutils-4.4.0.orig/locate/testsuite/Makefile.am
findutils-4.4.0/locate/testsuite/Makefile.am
--- findutils-4.4.0.orig/locate/testsuite/Makefile.am 2007-06-09
19:51:41.000000000 +0200
+++ findutils-4.4.0/locate/testsuite/Makefile.am 2008-03-25
00:41:06.000000000 +0100
@@ -14,6 +14,8 @@
locate.gnu/ignore_case3.exp \
locate.gnu/bigprefix1.exp \
locate.gnu/regex1.exp \
+locate.gnu/relpath.exp \
+locate.gnu/relpath1.exp \
locate.gnu/exists1.exp \
locate.gnu/exists2.exp \
locate.gnu/exists3.exp \
@@ -35,6 +37,8 @@
locate.gnu/ignore_case1.xo \
locate.gnu/ignore_case2.xo \
locate.gnu/ignore_case3.xo \
+locate.gnu/relpath.xo \
+locate.gnu/relpath1.xo \
locate.gnu/exists1.xo \
locate.gnu/exists2.xo \
locate.gnu/exists3.xo \
diff -ur -N findutils-4.4.0.orig/locate/testsuite/locate.gnu/relpath.exp
findutils-4.4.0/locate/testsuite/locate.gnu/relpath.exp
--- findutils-4.4.0.orig/locate/testsuite/locate.gnu/relpath.exp
1970-01-01 01:00:00.000000000 +0100
+++ findutils-4.4.0/locate/testsuite/locate.gnu/relpath.exp 2006-10-18
20:19:13.000000000 +0200
@@ -0,0 +1,7 @@
+# tests a database with relative paths
+set tmp "tmp"
+exec rm -rf $tmp
+exec mkdir $tmp
+exec mkdir $tmp/subdir
+exec touch $tmp/subdir/fred
+locate_start p {--relpath=tmp --localpaths=./subdir}
{--database=./tmp/.locatedb fred joe} {}
diff -ur -N findutils-4.4.0.orig/locate/testsuite/locate.gnu/relpath.xo
findutils-4.4.0/locate/testsuite/locate.gnu/relpath.xo
--- findutils-4.4.0.orig/locate/testsuite/locate.gnu/relpath.xo 1970-01-01
01:00:00.000000000 +0100
+++ findutils-4.4.0/locate/testsuite/locate.gnu/relpath.xo 2006-10-15
22:23:39.000000000 +0200
@@ -0,0 +1 @@
+./tmp/subdir/fred
diff -ur -N findutils-4.4.0.orig/locate/testsuite/locate.gnu/relpath1.exp
findutils-4.4.0/locate/testsuite/locate.gnu/relpath1.exp
--- findutils-4.4.0.orig/locate/testsuite/locate.gnu/relpath1.exp
1970-01-01 01:00:00.000000000 +0100
+++ findutils-4.4.0/locate/testsuite/locate.gnu/relpath1.exp 2008-03-25
00:52:58.000000000 +0100
@@ -0,0 +1,7 @@
+# tests an slocate database with relative paths
+set tmp "tmp"
+exec rm -rf $tmp
+exec mkdir $tmp
+exec mkdir $tmp/subdir
+exec touch $tmp/subdir/fred
+locate_start p {--relpath=tmp --dbformat=slocate --localpaths=./subdir}
{--database=./tmp/.locatedb fred joe 2>/dev/null} {}
diff -ur -N findutils-4.4.0.orig/locate/testsuite/locate.gnu/relpath1.xo
findutils-4.4.0/locate/testsuite/locate.gnu/relpath1.xo
--- findutils-4.4.0.orig/locate/testsuite/locate.gnu/relpath1.xo
1970-01-01 01:00:00.000000000 +0100
+++ findutils-4.4.0/locate/testsuite/locate.gnu/relpath1.xo 2006-10-15
22:23:39.000000000 +0200
@@ -0,0 +1 @@
+./tmp/subdir/fred
diff -ur -N -x find.info findutils-4.4.0.orig/doc/find.texi
findutils-4.4.0/doc/find.texi
--- findutils-4.4.0.orig/doc/find.texi 2008-03-10 21:31:16.000000000 +0100
+++ findutils-4.4.0/doc/find.texi 2008-03-25 12:42:20.000000000 +0100
@@ -3301,16 +3301,6 @@
another shell script that ``sources'' the configuration file into the
environment and then executes @code{updatedb} in the environment.
[EMAIL PROTECTED] creates and updates the database of file names used by
[EMAIL PROTECTED] @code{updatedb} generates a list of files similar to
-the output of @code{find} and then uses utilities for optimizing the
-database for performance. @code{updatedb} is often run periodically
-as a @code{cron} job and configured with environment variables or
-command options. Typically, operating systems have a shell script
-that ``exports'' configurations for variable definitions and uses
-another shell script that ``sources'' the configuration file into the
-environment and then executes @code{updatedb} in the environment.
-
@table @code
@item --findoptions='@[EMAIL PROTECTED]'
Global options to pass on to @code{find}.
@@ -3375,6 +3365,19 @@
@code{slocate}. @xref{Database Formats}, for a detailed description
of each format.
[EMAIL PROTECTED] [EMAIL PROTECTED]
+Generate a @code{locate} database @var{pathprefix/dbfile} for files in the
+tree rooted at @var{pathprefix}, omitting this prefix from the stored file
+names and causing @code{locate} to substitute the directory containing the
+database file in its place. Assume an nfs server exporting
[EMAIL PROTECTED]/serverpath} and a client mounting this tree as
@var{/clientpath}.
+Using a @code{locate} database with relative file names for this tree yields
+the correct results on both server and client. This option is incompatible
+with @code{--old-format} and @code{--netpaths}. Furthermore all paths
+specified as @code{--localpaths} or @code{--prunepaths} must be relative and
[EMAIL PROTECTED] must be given as basename or has the default value
[EMAIL PROTECTED]
+
@item --help
Print a summary of the command line usage and exit.
@item --version
diff -ur -N findutils-4.4.0.orig/locate/testsuite/Makefile.in
findutils-4.4.0/locate/testsuite/Makefile.in
--- findutils-4.4.0.orig/locate/testsuite/Makefile.in 2008-03-15
12:54:19.000000000 +0100
+++ findutils-4.4.0/locate/testsuite/Makefile.in 2008-03-25
00:41:10.000000000 +0100
@@ -557,6 +557,8 @@
locate.gnu/ignore_case3.exp \
locate.gnu/bigprefix1.exp \
locate.gnu/regex1.exp \
+locate.gnu/relpath.exp \
+locate.gnu/relpath1.exp \
locate.gnu/exists1.exp \
locate.gnu/exists2.exp \
locate.gnu/exists3.exp \
@@ -578,6 +580,8 @@
locate.gnu/ignore_case1.xo \
locate.gnu/ignore_case2.xo \
locate.gnu/ignore_case3.xo \
+locate.gnu/relpath.xo \
+locate.gnu/relpath1.xo \
locate.gnu/exists1.xo \
locate.gnu/exists2.xo \
locate.gnu/exists3.xo \
diff -ur -N -x find.info findutils-4.4.0.orig/doc/stamp-vti
findutils-4.4.0/doc/stamp-vti
--- findutils-4.4.0.orig/doc/stamp-vti 2008-03-15 12:55:12.000000000 +0100
+++ findutils-4.4.0/doc/stamp-vti 2008-03-25 12:42:25.000000000 +0100
@@ -1,4 +1,4 @@
[EMAIL PROTECTED] UPDATED 10 March 2008
[EMAIL PROTECTED] UPDATED 25 March 2008
@set UPDATED-MONTH March 2008
@set EDITION 4.4.0
@set VERSION 4.4.0
diff -ur -N -x find.info findutils-4.4.0.orig/doc/version.texi
findutils-4.4.0/doc/version.texi
--- findutils-4.4.0.orig/doc/version.texi 2008-03-15 12:46:07.000000000
+0100
+++ findutils-4.4.0/doc/version.texi 2008-03-25 12:24:46.000000000 +0100
@@ -1,4 +1,4 @@
[EMAIL PROTECTED] UPDATED 10 March 2008
[EMAIL PROTECTED] UPDATED 25 March 2008
@set UPDATED-MONTH March 2008
@set EDITION 4.4.0
@set VERSION 4.4.0
Locate databases with relative pathnames
========================================
Updated for findutils-4.4.0
1. Purpose
----------
Assume an nfs server SERVER exporting /SERVERPATH, and some clients using
mount SERVER:/SERVERPATH /CLIENTPATH1
as well as some other clients using
mount SERVER:/SERVERPATH /CLIENTPATH2
where SERVERPATH, CLIENTPATH1, and CLIENTPATH2 may all differ.
In such a situation it is desirable to (re-)build a locate database
on SERVER for the files under /SERVERPATH (for reasons of efficiency
as well as access permissions).
If that database is located under /SERVERPATH and contains only the filename
parts relative to /SERVERPATH, the locate command on the server or clients
could prepend /SERVERPATH, /CLIENTPATH1, resp. /CLIENTPATH2 to these
filename parts, thereby producing results that are correct for the
respective hosts.
2. Basic Idea
-------------
2.0 Only implemented for new (LOCATE02 and slocate) database format, since
support for the old format will be discontinued in the near(?) future.
2.1 The database format is not modified but slightly reinterpreted as
follows: The current implementation cannot contain an empty filename. Thus,
adding a dummy empty filename as first database entry can signal to locate
that a database contains relative filenames.
We require that the database is a file in the top-level directory
(/SERVERPATH, /CLIENTPATH1, resp. /CLIENTPATH2) and thus locate can deduce
the required filename prefix from the directory part of the database name.
2.2 A new commandline option for updatedb (e.g., --relpath=PREFIX) causes
the creation of databases with relative filenames. This option would be
incompatible with --old-format and --netpaths, and would require that
--output=DBFILE and --localpaths='DIR1 DIR2...' specify explicitly relative
paths. The option --relpath=PREFIX would imply --changecwd=PREFIX and an
explicit --changecwd would be either ignored or be an error.
2.3 When a database with relative filenames is processed by an old version of
locate, not aware of this new feature, nothing really bad happens. The
filenames found by that old version of locate will be "" (once or twice)
followed by names starting with '/'.
3. Implementation
-----------------
3.1 locate: The required modifications are almost entirely in
visit_locate02_format(): detect the presence of a database with relative
paths, initialize the path prefix with dirname(procdata->dbfile), and set
prefix_len=len=count to the length of this prefix.
3.2 updatedb: Apart from parsing the arguments and checking them for
consistency, all updatedb has to do is passing the option "-r" to frcode.
The filenames passed to frcode are either "." or start with "./".
3.3 frcode: For a database with relative paths frcode outputs an empty
string as first filename and removes the mandatory leading "." from all
filenames read from stdin.
====================
2008-03-25
Peter Breitenlohner <[EMAIL PROTECTED]>
2008-03-15 Peter Breitenlohner <[EMAIL PROTECTED]>
Implement locate databases with relative file names.
* locate/locate.c (struct process_data, visit_locate02_format,
search_one_database): Implement relative file names.
Clean up some slocate mess.
* locate/frcode.c (main): Implement relative file names (using
a new option '-r'). Fix a typo.
* locate/updatedb.sh: Implement relative file names (using a
new option '--relpath').
* locate/testsuite/locate.gnu/relpath*: New tests for databases
with relative file names (locate02 and slocate format).
* locate/testsuite/Makefile.am: Add two new tests.
* doc/find.texi: Document databases with relative file names.
Remove a duplicate paragraph.