Module Name: src
Committed By: dsl
Date: Sat Sep 5 12:00:26 UTC 2009
Modified Files:
src/usr.bin/sort: append.c files.c fsort.c fsort.h msort.c radix_sort.c
sort.h
Log Message:
Now we have our own radix_sort() change the interface so that we pass
an array of 'RECHEADER *' and remove all the crappy stuff that backed up
by REC_DATA_OFFSET (etc).
Also change radix_sort() to return the number of elements, soon to be used
to drop duplicate keys (for sort -u).
To generate a diff of this commit:
cvs rdiff -u -r1.20 -r1.21 src/usr.bin/sort/append.c
cvs rdiff -u -r1.35 -r1.36 src/usr.bin/sort/files.c
cvs rdiff -u -r1.39 -r1.40 src/usr.bin/sort/fsort.c
cvs rdiff -u -r1.15 -r1.16 src/usr.bin/sort/fsort.h
cvs rdiff -u -r1.24 -r1.25 src/usr.bin/sort/msort.c
cvs rdiff -u -r1.1 -r1.2 src/usr.bin/sort/radix_sort.c
cvs rdiff -u -r1.26 -r1.27 src/usr.bin/sort/sort.h
Please note that diffs are not public domain; they are subject to the
copyright notices on the relevant files.
Modified files:
Index: src/usr.bin/sort/append.c
diff -u src/usr.bin/sort/append.c:1.20 src/usr.bin/sort/append.c:1.21
--- src/usr.bin/sort/append.c:1.20 Sat Aug 22 10:53:28 2009
+++ src/usr.bin/sort/append.c Sat Sep 5 12:00:25 2009
@@ -1,4 +1,4 @@
-/* $NetBSD: append.c,v 1.20 2009/08/22 10:53:28 dsl Exp $ */
+/* $NetBSD: append.c,v 1.21 2009/09/05 12:00:25 dsl Exp $ */
/*-
* Copyright (c) 2000-2003 The NetBSD Foundation, Inc.
@@ -64,7 +64,7 @@
#include "sort.h"
#ifndef lint
-__RCSID("$NetBSD: append.c,v 1.20 2009/08/22 10:53:28 dsl Exp $");
+__RCSID("$NetBSD: append.c,v 1.21 2009/09/05 12:00:25 dsl Exp $");
__SCCSID("@(#)append.c 8.1 (Berkeley) 6/6/93");
#endif /* not lint */
@@ -88,16 +88,16 @@
* copy sorted lines to output; check for uniqueness
*/
void
-append(const u_char **keylist, int nelem, FILE *fp, put_func_t put, u_char *wts)
+append(const RECHEADER **keylist, int nelem, FILE *fp, put_func_t put, u_char *wts)
{
- const u_char **cpos, **lastkey;
- const struct recheader *crec, *prec;
+ const RECHEADER **cpos, **lastkey;
+ const RECHEADER *crec, *prec;
size_t plen;
lastkey = keylist + nelem;
if (!UNIQUE || wts == NULL) {
for (cpos = keylist; cpos < lastkey; cpos++)
- put((const RECHEADER *)(*cpos - REC_DATA_OFFSET), fp);
+ put(*cpos, fp);
return;
}
@@ -105,13 +105,13 @@
return;
cpos = keylist;
- prec = (const RECHEADER *) (*cpos - REC_DATA_OFFSET);
+ prec = *cpos;
if (!SINGL_FLD) {
/* Key for each line is already in adjacent bytes */
plen = prec->offset;
for (cpos = &keylist[1]; cpos < lastkey; cpos++) {
- crec = (const RECHEADER *) (*cpos - REC_DATA_OFFSET);
+ crec = *cpos;
if (crec->offset == plen
&& memcmp(crec->data, prec->data, plen) == 0) {
/* Duplicate key */
@@ -130,7 +130,7 @@
/* Key for each line is already in adjacent bytes */
plen = prec->length;
for (cpos = &keylist[1]; cpos < lastkey; cpos++) {
- crec = (const RECHEADER *) (*cpos - REC_DATA_OFFSET);
+ crec = *cpos;
if (crec->length == plen
&& wt_cmp(crec->data, prec->data, plen, wts) == 0) {
/* Duplicate key */
Index: src/usr.bin/sort/files.c
diff -u src/usr.bin/sort/files.c:1.35 src/usr.bin/sort/files.c:1.36
--- src/usr.bin/sort/files.c:1.35 Sat Aug 22 10:53:28 2009
+++ src/usr.bin/sort/files.c Sat Sep 5 12:00:25 2009
@@ -1,4 +1,4 @@
-/* $NetBSD: files.c,v 1.35 2009/08/22 10:53:28 dsl Exp $ */
+/* $NetBSD: files.c,v 1.36 2009/09/05 12:00:25 dsl Exp $ */
/*-
* Copyright (c) 2000-2003 The NetBSD Foundation, Inc.
@@ -65,7 +65,7 @@
#include "fsort.h"
#ifndef lint
-__RCSID("$NetBSD: files.c,v 1.35 2009/08/22 10:53:28 dsl Exp $");
+__RCSID("$NetBSD: files.c,v 1.36 2009/09/05 12:00:25 dsl Exp $");
__SCCSID("@(#)files.c 8.1 (Berkeley) 6/6/93");
#endif /* not lint */
@@ -271,7 +271,7 @@
void
putrec(const RECHEADER *rec, FILE *fp)
{
- EWRITE(rec, 1, rec->length + REC_DATA_OFFSET, fp);
+ EWRITE(rec, 1, offsetof(RECHEADER, data) + rec->length, fp);
}
/*
@@ -289,7 +289,7 @@
void
putkeydump(const RECHEADER *rec, FILE *fp)
{
- EWRITE(rec, 1, rec->offset + REC_DATA_OFFSET, fp);
+ EWRITE(rec, 1, offsetof(RECHEADER, data) + rec->offset, fp);
}
/*
@@ -303,15 +303,15 @@
FILE *fp;
fp = fstack[flno].fp;
- if ((u_char *) rec > end - REC_DATA_OFFSET)
+ if ((u_char *)(rec + 1) > end)
return (BUFFEND);
- if (!fread(rec, 1, REC_DATA_OFFSET, fp)) {
+ if (!fread(rec, 1, offsetof(RECHEADER, data), fp)) {
fclose(fp);
fstack[flno].fp = 0;
return (EOF);
}
if (end - rec->data < (ptrdiff_t)rec->length) {
- for (i = REC_DATA_OFFSET - 1; i >= 0; i--)
+ for (i = offsetof(RECHEADER, data) - 1; i >= 0; i--)
ungetc(*((char *) rec + i), fp);
return (BUFFEND);
}
Index: src/usr.bin/sort/fsort.c
diff -u src/usr.bin/sort/fsort.c:1.39 src/usr.bin/sort/fsort.c:1.40
--- src/usr.bin/sort/fsort.c:1.39 Sat Aug 22 10:53:28 2009
+++ src/usr.bin/sort/fsort.c Sat Sep 5 12:00:25 2009
@@ -1,4 +1,4 @@
-/* $NetBSD: fsort.c,v 1.39 2009/08/22 10:53:28 dsl Exp $ */
+/* $NetBSD: fsort.c,v 1.40 2009/09/05 12:00:25 dsl Exp $ */
/*-
* Copyright (c) 2000-2003 The NetBSD Foundation, Inc.
@@ -72,17 +72,13 @@
#include "fsort.h"
#ifndef lint
-__RCSID("$NetBSD: fsort.c,v 1.39 2009/08/22 10:53:28 dsl Exp $");
+__RCSID("$NetBSD: fsort.c,v 1.40 2009/09/05 12:00:25 dsl Exp $");
__SCCSID("@(#)fsort.c 8.1 (Berkeley) 6/6/93");
#endif /* not lint */
#include <stdlib.h>
#include <string.h>
-static const u_char **keylist = 0;
-u_char *buffer = 0;
-size_t bufsize = DEFBUFSIZE;
-
struct tempfile fstack[MAXFCT];
#define SALIGN(n) ((n+sizeof(length_t)-1) & ~(sizeof(length_t)-1))
@@ -90,21 +86,24 @@
void
fsort(struct filelist *filelist, int nfiles, FILE *outfp, struct field *ftbl)
{
- const u_char **keypos, **keyp;
+ const RECHEADER **keylist;
+ const RECHEADER **keypos, **keyp;
+ RECHEADER *buffer;
+ size_t bufsize = DEFBUFSIZE;
u_char *bufend;
int mfct = 0;
int c, nelem;
get_func_t get;
- struct recheader *crec;
- u_char *nbuffer;
+ RECHEADER *crec;
+ RECHEADER *nbuffer;
FILE *fp;
- if (!buffer) {
- buffer = malloc(bufsize);
- keylist = malloc(MAXNUM * sizeof(u_char *));
- memset(keylist, 0, MAXNUM * sizeof(u_char *));
- }
- bufend = buffer + bufsize;
+ buffer = malloc(bufsize);
+ bufend = (u_char *)buffer + bufsize;
+ /* Allocate double length keymap for radix_sort */
+ keylist = malloc(2 * MAXNUM * sizeof(*keylist));
+ if (buffer == NULL || keylist == NULL)
+ err(2, "failed to malloc initial buffer or keylist");
if (SINGL_FLD)
/* Key and data are one! */
@@ -118,7 +117,7 @@
for (;;) {
keypos = keylist;
nelem = 0;
- crec = (RECHEADER *) buffer;
+ crec = buffer;
/* Loop reading records */
for (;;) {
@@ -126,13 +125,12 @@
/* 'c' is 0, EOF or BUFFEND */
if (c == 0) {
/* Save start of key in input buffer */
- *keypos++ = crec->data;
+ *keypos++ = crec;
if (++nelem == MAXNUM) {
c = BUFFEND;
break;
}
- crec = (RECHEADER *)((char *) crec +
- SALIGN(crec->length) + REC_DATA_OFFSET);
+ crec = (RECHEADER *)(crec->data + SALIGN(crec->length));
continue;
}
if (c == EOF)
@@ -154,18 +152,18 @@
for (keyp = &keypos[-1]; keyp >= keylist; keyp--)
*keyp = nbuffer + (*keyp - buffer);
- crec = (RECHEADER *) (nbuffer + ((u_char *)crec - buffer));
+ crec = nbuffer + (crec - buffer);
buffer = nbuffer;
- bufend = buffer + bufsize;
+ bufend = (u_char *)buffer + bufsize;
}
/* Sort this set of records */
if (SINGL_FLD) {
- if (radix_sort(keylist, nelem, ftbl[0].weights, REC_D))
- err(2, "single field radix_sort");
+ nelem = radix_sort(keylist, keylist + MAXNUM, nelem,
+ ftbl[0].weights, REC_D);
} else {
- if (radix_sort(keylist, nelem, unweighted, 0))
- err(2, "unweighted radix_sort");
+ nelem = radix_sort(keylist, keylist + MAXNUM, nelem,
+ unweighted, 0);
}
if (c == EOF && mfct == 0) {
Index: src/usr.bin/sort/fsort.h
diff -u src/usr.bin/sort/fsort.h:1.15 src/usr.bin/sort/fsort.h:1.16
--- src/usr.bin/sort/fsort.h:1.15 Thu Aug 20 06:36:25 2009
+++ src/usr.bin/sort/fsort.h Sat Sep 5 12:00:25 2009
@@ -1,4 +1,4 @@
-/* $NetBSD: fsort.h,v 1.15 2009/08/20 06:36:25 dsl Exp $ */
+/* $NetBSD: fsort.h,v 1.16 2009/09/05 12:00:25 dsl Exp $ */
/*-
* Copyright (c) 2000-2003 The NetBSD Foundation, Inc.
@@ -84,9 +84,6 @@
*/
#define MERGE_FNUM 16
-extern u_char *buffer;
-extern size_t bufsize;
-
/* Temporary files contian data (with record headers) in sorted order */
struct tempfile {
FILE *fp;
Index: src/usr.bin/sort/msort.c
diff -u src/usr.bin/sort/msort.c:1.24 src/usr.bin/sort/msort.c:1.25
--- src/usr.bin/sort/msort.c:1.24 Sat Aug 22 15:16:50 2009
+++ src/usr.bin/sort/msort.c Sat Sep 5 12:00:25 2009
@@ -1,4 +1,4 @@
-/* $NetBSD: msort.c,v 1.24 2009/08/22 15:16:50 dsl Exp $ */
+/* $NetBSD: msort.c,v 1.25 2009/09/05 12:00:25 dsl Exp $ */
/*-
* Copyright (c) 2000-2003 The NetBSD Foundation, Inc.
@@ -65,7 +65,7 @@
#include "fsort.h"
#ifndef lint
-__RCSID("$NetBSD: msort.c,v 1.24 2009/08/22 15:16:50 dsl Exp $");
+__RCSID("$NetBSD: msort.c,v 1.25 2009/09/05 12:00:25 dsl Exp $");
__SCCSID("@(#)msort.c 8.1 (Berkeley) 6/6/93");
#endif /* not lint */
@@ -79,7 +79,7 @@
typedef struct mfile {
u_char *end;
short flno;
- struct recheader rec[1];
+ RECHEADER rec[1];
} MFILE;
static u_char *wts;
@@ -324,15 +324,14 @@
void
order(struct filelist *filelist, get_func_t get, struct field *ftbl)
{
+ RECHEADER *crec, *prec, *trec;
u_char *crec_end, *prec_end, *trec_end;
int c;
- RECHEADER *crec, *prec, *trec;
- buffer = malloc(2 * (DEFLLEN + REC_DATA_OFFSET));
- crec = (RECHEADER *) buffer;
- crec_end = buffer + DEFLLEN + REC_DATA_OFFSET;
- prec = (RECHEADER *) (buffer + DEFLLEN + REC_DATA_OFFSET);
- prec_end = buffer + 2*(DEFLLEN + REC_DATA_OFFSET);
+ crec = malloc(offsetof(RECHEADER, data[DEFLLEN]));
+ crec_end = crec->data + DEFLLEN;
+ prec = malloc(offsetof(RECHEADER, data[DEFLLEN]));
+ prec_end = prec->data + DEFLLEN;
wts = ftbl->weights;
/* XXX this does exit(0) for overlong lines */
Index: src/usr.bin/sort/radix_sort.c
diff -u src/usr.bin/sort/radix_sort.c:1.1 src/usr.bin/sort/radix_sort.c:1.2
--- src/usr.bin/sort/radix_sort.c:1.1 Sat Sep 5 09:16:18 2009
+++ src/usr.bin/sort/radix_sort.c Sat Sep 5 12:00:25 2009
@@ -1,4 +1,4 @@
-/* $NetBSD: radix_sort.c,v 1.1 2009/09/05 09:16:18 dsl Exp $ */
+/* $NetBSD: radix_sort.c,v 1.2 2009/09/05 12:00:25 dsl Exp $ */
/*-
* Copyright (c) 1990, 1993
@@ -37,7 +37,7 @@
#if 0
static char sccsid[] = "@(#)radixsort.c 8.2 (Berkeley) 4/28/95";
#else
-__RCSID("$NetBSD: radix_sort.c,v 1.1 2009/09/05 09:16:18 dsl Exp $");
+__RCSID("$NetBSD: radix_sort.c,v 1.2 2009/09/05 12:00:25 dsl Exp $");
#endif
#endif /* LIBC_SCCS and not lint */
@@ -52,60 +52,49 @@
#include "sort.h"
typedef struct {
- const u_char **sa;
- int sn, si;
+ const RECHEADER **sa; /* Base of saved area */
+ int sn; /* Number of entries */
+ int si; /* index into data for compare */
} stack;
-static inline void simplesort(const u_char **, int, int, const u_char *, u_int);
-static void r_sort_b(const u_char **,
- const u_char **, int, int, const u_char *, u_int);
+static inline int simplesort(const RECHEADER **, int, int, const u_char *, u_int);
+static int r_sort_b(const RECHEADER **,
+ const RECHEADER **, int, int, const u_char *, u_int);
#define THRESHOLD 20 /* Divert to simplesort(). */
#define SIZE 512 /* Default stack size. */
+#define empty(s) (s >= sp)
+#define pop(a, n, i) a = (--sp)->sa, n = sp->sn, i = sp->si
+#define push(a, n, i) sp->sa = a, sp->sn = n, (sp++)->si = i
+#define swap(a, b, t) t = a, a = b, b = t
+
int
-radix_sort(const u_char **a, int n, const u_char *tab, u_int endch)
+radix_sort(const RECHEADER **a, const RECHEADER **ta, int n, const u_char *tab, u_int endch)
{
- const u_char **ta;
-
endch = tab[endch];
- if (n < THRESHOLD)
- simplesort(a, n, 0, tab, endch);
- else {
- if ((ta = malloc(n * sizeof(a))) == NULL)
- return (-1);
- r_sort_b(a, ta, n, 0, tab, endch);
- free(ta);
+ if (n < THRESHOLD && !DEBUG('r')) {
+ return simplesort(a, n, 0, tab, endch);
}
- return (0);
+ return r_sort_b(a, ta, n, 0, tab, endch);
}
-#define empty(s) (s >= sp)
-#define pop(a, n, i) a = (--sp)->sa, n = sp->sn, i = sp->si
-#define push(a, n, i) sp->sa = a, sp->sn = n, (sp++)->si = i
-#define swap(a, b, t) t = a, a = b, b = t
-
-/* Stable sort, requiring additional memory. */
-static void
-r_sort_b(const u_char **a, const u_char **ta, int n, int i, const u_char *tr,
- u_int endch)
+static int
+r_sort_b(const RECHEADER **a, const RECHEADER **ta, int n, int i, const u_char *tr, u_int endch)
{
static u_int count[256], nc, bmin;
u_int c;
- const u_char **ak, **ai;
+ const RECHEADER **ak, **ai;
stack s[512], *sp, *sp0, *sp1, temp;
- const u_char **top[256];
+ const RECHEADER **top[256];
u_int *cp, bigc;
-
- _DIAGASSERT(a != NULL);
- _DIAGASSERT(ta != NULL);
- _DIAGASSERT(tr != NULL);
+ int nrec = n;
sp = s;
push(a, n, i);
while (!empty(s)) {
pop(a, n, i);
- if (n < THRESHOLD) {
+ if (n < THRESHOLD && !DEBUG('r')) {
simplesort(a, n, i, tr, endch);
continue;
}
@@ -113,7 +102,7 @@
if (nc == 0) {
bmin = 255;
for (ak = a + n; --ak >= a;) {
- c = tr[(*ak)[i]];
+ c = tr[(*ak)->data[i]];
if (++count[c] == 1 && c != endch) {
if (c < bmin)
bmin = c;
@@ -155,28 +144,37 @@
for (ak = ta + n, ai = a+n; ak > ta;) /* Copy to temp. */
*--ak = *--ai;
for (ak = ta+n; --ak >= ta;) /* Deal to piles. */
- *--top[tr[(*ak)[i]]] = *ak;
+ *--top[tr[(*ak)->data[i]]] = *ak;
}
+
+ return nrec;
}
/* insertion sort */
-static inline void
-simplesort(const u_char **a, int n, int b, const u_char *tr, u_int endch)
+static inline int
+simplesort(const RECHEADER **a, int n, int b, const u_char *tr, u_int endch)
{
+ const RECHEADER **ak, **ai, *tmp;
+ const RECHEADER **lim = a + n;
+ const u_char *s, *t;
u_char ch;
- const u_char **ak, **ai, *s, *t;
- _DIAGASSERT(a != NULL);
- _DIAGASSERT(tr != NULL);
+ if (n <= 1)
+ return n;
- for (ak = a+1; --n >= 1; ak++)
+ for (ak = a+1; ak < lim; ak++) {
for (ai = ak; ai > a; ai--) {
- for (s = ai[0] + b, t = ai[-1] + b;
+ for (s = ai[0]->data + b, t = ai[-1]->data + b;
(ch = tr[*s]) != endch; s++, t++)
if (ch != tr[*t])
break;
- if (ch >= tr[*t])
+ if (ch >= tr[*t]) {
+
break;
- swap(ai[0], ai[-1], s);
+ }
+ swap(ai[0], ai[-1], tmp);
}
+ }
+
+ return n;
}
Index: src/usr.bin/sort/sort.h
diff -u src/usr.bin/sort/sort.h:1.26 src/usr.bin/sort/sort.h:1.27
--- src/usr.bin/sort/sort.h:1.26 Sat Sep 5 09:16:18 2009
+++ src/usr.bin/sort/sort.h Sat Sep 5 12:00:25 2009
@@ -1,4 +1,4 @@
-/* $NetBSD: sort.h,v 1.26 2009/09/05 09:16:18 dsl Exp $ */
+/* $NetBSD: sort.h,v 1.27 2009/09/05 12:00:25 dsl Exp $ */
/*-
* Copyright (c) 2000-2003 The NetBSD Foundation, Inc.
@@ -109,10 +109,6 @@
/* A record is a key/line pair starting at rec.data. It has a total length
* and an offset to the start of the line half of the pair.
- * In order to use (s)radixsort, the array of pointers often points
- * to the data field (and sometimes not the first byte even!).
- * This means the code has to 'back up' by the correct number of bytes
- * in order to get the actual header.
*/
typedef struct recheader {
length_t length;
@@ -120,8 +116,6 @@
u_char data[1];
} RECHEADER;
-#define REC_DATA_OFFSET offsetof(RECHEADER, data)
-
/* This is the column as seen by struct field. It is used by enterfield.
* They are matched with corresponding coldescs during initialization.
*/
@@ -162,7 +156,7 @@
typedef int (*get_func_t)(int, int, struct filelist *, int,
RECHEADER *, u_char *, struct field *);
-typedef void (*put_func_t)(const struct recheader *, FILE *);
+typedef void (*put_func_t)(const RECHEADER *, FILE *);
extern u_char ascii[NBINS], Rascii[NBINS], Ftable[NBINS], RFtable[NBINS];
extern u_char *const weight_tables[4]; /* ascii, Rascii, Ftable, RFtable */
@@ -177,7 +171,7 @@
#define DEBUG(ch) (debug_flags & (1 << ((ch) & 31)))
extern unsigned int debug_flags;
-void append(const u_char **, int, FILE *,
+void append(const RECHEADER **, int, FILE *,
void (*)(const RECHEADER *, FILE *), u_char *);
void concat(FILE *, FILE *);
length_t enterkey(RECHEADER *, const u_char *, u_char *, size_t, struct field *);
@@ -199,6 +193,6 @@
void putrec(const RECHEADER *, FILE *);
void putkeydump(const RECHEADER *, FILE *);
void rd_append(int, int, int, FILE *, u_char *, u_char *);
-int radix_sort(const u_char **, int, const u_char *, u_int);
+int radix_sort(const RECHEADER **, const RECHEADER **, int, const u_char *, u_int);
int setfield(const char *, struct field *, int);
void settables(void);