Tobias Stoeckmann wrote: > I would recommend using fgetln for the actual line parsing. Then this > kinda fragile code can be avoided (fragile: fgets and its users have a > hard time to properly handle '\0' chars inside a file).
Thank you for the advice. Here is an updated diff. It does indeed look less 'fragile' ! In locate.code.c 'mbuf' is never free()d: it is only allocated for the last line of input and after processing this line the program ends. I hope it is ok. Nicolas Bedos Index: src/usr.bin/locate//bigram/locate.bigram.c =================================================================== RCS file: /cvs/src/usr.bin/locate/bigram/locate.bigram.c,v retrieving revision 1.12 diff -u -p -u -r1.12 locate.bigram.c --- src/usr.bin/locate//bigram/locate.bigram.c 27 Oct 2009 23:59:39 -0000 1.12 +++ src/usr.bin/locate//bigram/locate.bigram.c 24 Nov 2014 19:09:03 -0000 @@ -43,13 +43,13 @@ * Use 'code' to encode a file using this output. */ +#include <limits.h> /* for PATH_MAX */ #include <stdio.h> #include <stdlib.h> -#include <sys/param.h> /* for MAXPATHLEN */ #include "locate.h" -u_char buf1[MAXPATHLEN] = " "; -u_char buf2[MAXPATHLEN]; +u_char buf1[PATH_MAX] = " "; +u_char buf2[PATH_MAX]; u_int bigram[UCHAR_MAX + 1][UCHAR_MAX + 1]; int Index: src/usr.bin/locate//code/locate.code.c =================================================================== RCS file: /cvs/src/usr.bin/locate/code/locate.code.c,v retrieving revision 1.17 diff -u -p -u -r1.17 locate.code.c --- src/usr.bin/locate//code/locate.code.c 17 Nov 2013 20:19:36 -0000 1.17 +++ src/usr.bin/locate//code/locate.code.c 24 Nov 2014 19:09:03 -0000 @@ -78,10 +78,10 @@ * Wolfram Schneider, Berlin September 1996 */ -#include <sys/param.h> #include <err.h> #include <errno.h> +#include <limits.h> #include <stdio.h> #include <stdlib.h> #include <string.h> @@ -91,8 +91,7 @@ #define BGBUFSIZE (NBG * 2) /* size of bigram buffer */ -u_char buf1[MAXPATHLEN] = " "; -u_char buf2[MAXPATHLEN]; +u_char buf[PATH_MAX] = " "; u_char bigrams[BGBUFSIZE + 1] = { 0 }; #define LOOKUP 1 /* use a lookup array instead a function, 3x faster */ @@ -115,7 +114,8 @@ extern int optopt; int main(int argc, char *argv[]) { - u_char *cp, *oldpath, *path; + u_char *cp, *oldpath, *path, *mbuf; + size_t len; int ch, code, count, diffcount, oldcount; FILE *fp; int i, j; @@ -156,21 +156,31 @@ main(int argc, char *argv[]) #endif /* LOOKUP */ - oldpath = buf1; - path = buf2; + oldpath = buf; oldcount = 0; - while (fgets(path, sizeof(buf2), stdin) != NULL) { - + while ((path=(u_char *)fgetln(stdin, &len)) != NULL) { /* skip empty lines */ if (*path == '\n') continue; - /* remove newline */ - for (cp = path; *cp != '\0'; cp++) { - /* chop newline */ - if (*cp == '\n') - *cp = '\0'; + if (path[len-1] == '\n') { + /* skip lines exceeding PATH_MAX */ + if (len > sizeof(buf)) + continue; + /* remove newline */ + path[len-1] = '\0'; + } else { + /* skip lines exceeding PATH_MAX-1 (save one byte + * to add the terminating '\0') */ + if (len > sizeof(buf)-1) + continue; + if ((mbuf = malloc(len+1)) == NULL) + err(1, "malloc"); + memcpy(mbuf, path, len); + mbuf[len] = '\0'; + len++; + path = mbuf; } /* Skip longest common prefix. */ @@ -222,13 +232,7 @@ main(int argc, char *argv[]) } } - if (path == buf1) { /* swap pointers */ - path = buf2; - oldpath = buf1; - } else { - path = buf1; - oldpath = buf2; - } + memcpy(buf, path, len); } /* Non-zero status if there were errors */ if (fflush(stdout) != 0 || ferror(stdout)) Index: src/usr.bin/locate//locate/fastfind.c =================================================================== RCS file: /cvs/src/usr.bin/locate/locate/fastfind.c,v retrieving revision 1.11 diff -u -p -u -r1.11 fastfind.c --- src/usr.bin/locate//locate/fastfind.c 25 Oct 2010 19:16:45 -0000 1.11 +++ src/usr.bin/locate//locate/fastfind.c 24 Nov 2014 19:09:04 -0000 @@ -47,7 +47,7 @@ statistic (fp, path_fcodes) u_char *p, *s; int c; int count, umlaut; - u_char bigram1[NBG], bigram2[NBG], path[MAXPATHLEN]; + u_char bigram1[NBG], bigram2[NBG], path[PATH_MAX]; for (c = 0, p = bigram1, s = bigram2; c < NBG; c++) { p[c] = check_bigram_char(getc(fp)); @@ -140,7 +140,7 @@ fastfind int c, cc; int count, found, globflag; u_char *cutoff; - u_char bigram1[NBG], bigram2[NBG], path[MAXPATHLEN]; + u_char bigram1[NBG], bigram2[NBG], path[PATH_MAX]; #ifdef FF_ICASE /* use a lookup table for case insensitive search */ Index: src/usr.bin/locate//locate/locate.c =================================================================== RCS file: /cvs/src/usr.bin/locate/locate/locate.c,v retrieving revision 1.25 diff -u -p -u -r1.25 locate.c --- src/usr.bin/locate//locate/locate.c 13 Apr 2012 15:13:07 -0000 1.25 +++ src/usr.bin/locate//locate/locate.c 24 Nov 2014 19:09:04 -0000 @@ -63,11 +63,11 @@ * in the standard 'find'. */ -#include <sys/param.h> #include <ctype.h> #include <err.h> #include <fnmatch.h> #include <libgen.h> +#include <limits.h> #include <locale.h> #include <stdio.h> #include <stdlib.h> @@ -112,6 +112,7 @@ void fastfind(FILE *, char *, char *) void fastfind_icase(FILE *, char *, char *); void fastfind_mmap(char *, caddr_t, int, char *); void fastfind_mmap_icase(char *, caddr_t, int, char *); +void sane_count(int); void search_mmap(char *, char **); void search_fopen(char *, char **); unsigned long cputime(void); @@ -334,7 +335,7 @@ usage(void) void sane_count(int count) { - if (count < 0 || count >= MAXPATHLEN) { + if (count < 0 || count >= PATH_MAX) { fprintf(stderr, "locate: corrupted database\n"); exit(1); } Index: src/usr.bin/locate//locate/util.c =================================================================== RCS file: /cvs/src/usr.bin/locate/locate/util.c,v retrieving revision 1.12 diff -u -p -u -r1.12 util.c --- src/usr.bin/locate//locate/util.c 16 Nov 2014 00:04:53 -0000 1.12 +++ src/usr.bin/locate//locate/util.c 24 Nov 2014 19:09:04 -0000 @@ -35,11 +35,11 @@ */ -#include <stdlib.h> -#include <string.h> #include <err.h> -#include <sys/param.h> +#include <limits.h> #include <stdio.h> +#include <stdlib.h> +#include <string.h> #include "locate.h" @@ -242,12 +242,12 @@ getwm(p) i = u.i; - if (i > MAXPATHLEN || i < -(MAXPATHLEN)) { + if (i > PATH_MAX || i < -(PATH_MAX)) { i = ntohl(i); - if (i > MAXPATHLEN || i < -(MAXPATHLEN)) { + if (i > PATH_MAX || i < -(PATH_MAX)) { (void)fprintf(stderr, - "integer out of +-MAXPATHLEN (%d): %d\n", - MAXPATHLEN, i); + "integer out of +-PATH_MAX (%d): %d\n", + PATH_MAX, i); exit(1); } } @@ -270,12 +270,12 @@ getwf(fp) word = getw(fp); - if (word > MAXPATHLEN || word < -(MAXPATHLEN)) { + if (word > PATH_MAX || word < -(PATH_MAX)) { word = ntohl(word); - if (word > MAXPATHLEN || word < -(MAXPATHLEN)) { + if (word > PATH_MAX || word < -(PATH_MAX)) { (void)fprintf(stderr, - "integer out of +-MAXPATHLEN (%d): %d\n", - MAXPATHLEN, word); + "integer out of +-PATH_MAX (%d): %d\n", + PATH_MAX, word); exit(1); } }