Hi, sort(1) does some funky things and isn't hard to break:
$ perl -e 'print "\n"x117000,"x\n"' | sort | sort -c
This patch contains a few changes from NetBSD to correct the behavior regarding
ordering of appending bins to output in certain circumstances which helps pass
more of our own regress tests and improves performance (e.g. regress suite
runtime is <40% with new code compared to old/current code on my box). The new
code is also much easier to understand..
NetBSD logs:
msort.c -r 1.9
merge(): use array of buffers instead of one big buffer for all records, and
enlarge them as necessary to read records from merged files; the
buffers
are allocated once per program run, so there shouldn't be any
performance difference
This makes sort(1) pass also regression 40B and should make it
fully arbitrary long record capable.
XXX the buffer array could probably be freed on end of fmerge() to save
memory
fsort.c -r 1.37
The code that attempted to sort large files by sorting each chunk by the
first key byte and writing to a temp file, then sorting the records from
each temp file that had the same first key byte (and repeating for upto
4 key bytes) was a nice idea, but completely doomed to failure.
Eg PR/9308 where a 70MB file has all but one record the same and short keys.
Not only does the code not work, it is rather guaranteed to be slow.
Instead always use a merge sort for fully sorted chunk of records (each
temporary file contains one lot of sorted records).
The -H option already did this, so just rip out all the code and variables
that can't be used when -H was specified.
Index: append.c
===================================================================
RCS file: /cvs/src/usr.bin/sort/append.c,v
retrieving revision 1.10
diff -u -p -r1.10 append.c
--- append.c 27 Oct 2009 23:59:43 -0000 1.10
+++ append.c 29 Jun 2014 22:17:16 -0000
@@ -148,37 +148,3 @@ append(u_char **keylist, int nelem, int
put(crec, fp);
}
}
-
-/*
- * output the already sorted eol bin.
- */
-void
-rd_append(int binno, union f_handle infl0, int nfiles, FILE *outfp,
- u_char *buffer, u_char *bufend)
-{
- RECHEADER *rec;
-
- rec = (RECHEADER *) buffer;
- if (!getnext(binno, infl0, nfiles, (RECHEADER *) buffer, bufend, 0)) {
- putline(rec, outfp);
- while (getnext(binno, infl0, nfiles, (RECHEADER *) buffer,
- bufend, 0) == 0) {
- if (!UNIQUE)
- putline(rec, outfp);
- }
- }
-}
-
-/*
- * append plain text--used after sorting the biggest bin.
- */
-void
-concat(FILE *a, FILE *b)
-{
- int nread;
- char buffer[4096];
-
- rewind(b);
- while ((nread = fread(buffer, 1, 4096, b)) > 0)
- EWRITE(buffer, 1, nread, a);
-}
Index: extern.h
===================================================================
RCS file: /cvs/src/usr.bin/sort/extern.h,v
retrieving revision 1.8
diff -u -p -r1.8 extern.h
--- extern.h 22 Dec 2009 19:47:02 -0000 1.8
+++ extern.h 29 Jun 2014 22:17:16 -0000
@@ -36,7 +36,6 @@
void append(u_char **, int, int, FILE *, void (*)(RECHEADER *, FILE *),
struct field *);
-void concat(FILE *, FILE *);
length_t enterkey(RECHEADER *, DBT *, int, struct field *);
void fixit(int *, char **);
void fldreset(struct field *);
@@ -44,11 +43,9 @@ FILE *ftmp(void);
void fmerge(int, union f_handle, int,
int (*)(int, union f_handle, int, RECHEADER *, u_char *, struct
field *),
FILE *, void (*)(RECHEADER *, FILE *), struct field *);
-void fsort(int, int, union f_handle, int, FILE *, struct field *);
+void fsort(union f_handle, int, FILE *, struct field *);
int geteasy(int, union f_handle,
int, RECHEADER *, u_char *, struct field *);
-int getnext(int, union f_handle,
- int, RECHEADER *, u_char *, struct field *);
int makekey(int, union f_handle,
int, RECHEADER *, u_char *, struct field *);
int makeline(int, union f_handle,
@@ -57,7 +54,6 @@ void merge(int, int,
int (*)(int, union f_handle, int, RECHEADER *, u_char *, struct
field *),
FILE *, void (*)(RECHEADER *, FILE *), struct field *);
void num_init(void);
-void onepass(u_char **, int, long, long *, u_char *, FILE *);
int optval(int, int);
void order(union f_handle,
int (*)(int, union f_handle, int, RECHEADER *, u_char *, struct
field *),
@@ -65,6 +61,5 @@ void order(union f_handle,
int c_warn);
void putline(RECHEADER *, FILE *);
void putrec(RECHEADER *, FILE *);
-void rd_append(int, union f_handle, int, FILE *, u_char *, u_char *);
int setfield(char *, struct field *, int);
void settables(int);
Index: files.c
===================================================================
RCS file: /cvs/src/usr.bin/sort/files.c,v
retrieving revision 1.13
diff -u -p -r1.13 files.c
--- files.c 27 Oct 2009 23:59:43 -0000 1.13
+++ files.c 29 Jun 2014 22:17:16 -0000
@@ -40,73 +40,6 @@
static int seq(FILE *, DBT *, DBT *);
/*
- * this is the subroutine for file management for fsort().
- * It keeps the buffers for all temporary files.
- */
-int
-getnext(int binno, union f_handle infl0, int nfiles, RECHEADER *pos, u_char
*end,
- struct field *dummy)
-{
- int i;
- u_char *hp;
- static size_t nleft = 0;
- static int cnt = 0, flag = -1;
- static u_char maxb = 0;
- static FILE *fp;
-
- if (nleft == 0) {
- if (binno < 0) /* reset files. */ {
- for (i = 0; i < nfiles; i++) {
- rewind(fstack[infl0.top + i].fp);
- fstack[infl0.top + i].max_o = 0;
- }
- flag = -1;
- nleft = cnt = 0;
- return (-1);
- }
- maxb = fstack[infl0.top].maxb;
- for (; nleft == 0; cnt++) {
- if (cnt >= nfiles) {
- cnt = 0;
- return (EOF);
- }
- fp = fstack[infl0.top + cnt].fp;
- fread(&nleft, sizeof(nleft), 1, fp);
- if (binno < maxb)
- fstack[infl0.top+cnt].max_o
- += sizeof(nleft) + nleft;
- else if (binno == maxb) {
- if (binno != fstack[infl0.top].lastb) {
- fseek(fp, fstack[infl0.top+
- cnt].max_o, SEEK_SET);
- fread(&nleft, sizeof(nleft), 1, fp);
- }
- if (nleft == 0)
- fclose(fp);
- } else if (binno == maxb + 1) { /* skip a bin */
- fseek(fp, nleft, SEEK_CUR);
- fread(&nleft, sizeof(nleft), 1, fp);
- flag = cnt;
- }
- }
- }
- if ((u_char *) pos > end - sizeof(TRECHEADER))
- return (BUFFEND);
- fread(pos, sizeof(TRECHEADER), 1, fp);
- if (end - pos->data < pos->length) {
- hp = ((u_char *)pos) + sizeof(TRECHEADER);
- for (i = sizeof(TRECHEADER); i ; i--)
- ungetc(*--hp, fp);
- return (BUFFEND);
- }
- fread(pos->data, pos->length, 1, fp);
- nleft -= pos->length + sizeof(TRECHEADER);
- if (nleft == 0 && binno == fstack[infl0.top].maxb)
- fclose(fp);
- return (0);
-}
-
-/*
* this is called when there is no special key. It's only called
* in the first fsort pass.
*/
Index: fsort.c
===================================================================
RCS file: /cvs/src/usr.bin/sort/fsort.c,v
retrieving revision 1.21
diff -u -p -r1.21 fsort.c
--- fsort.c 27 Oct 2009 23:59:43 -0000 1.21
+++ fsort.c 29 Jun 2014 22:17:16 -0000
@@ -33,11 +33,11 @@
*/
/*
- * Read in the next bin. If it fits in one segment sort it;
- * otherwise refine it by segment deeper by one character,
- * and try again on smaller bins. Sort the final bin at this level
- * of recursion to keep the head of fstack at 0.
- * After PANIC passes, abort to merge sort.
+ * Read in a block of records (until 'enough').
+ * sort, write to temp file.
+ * Merge sort temp files into output file
+ * Small files miss out the temp file stage.
+ * Large files might get multiple merges.
*/
#include "sort.h"
#include "fsort.h"
@@ -48,28 +48,26 @@
u_char *linebuf;
size_t linebuf_size = MAXLLEN;
struct tempfile fstack[MAXFCT];
-#define FSORTMAX 4
-int PANIC = FSORTMAX;
+
+#define CHECKFSTACK(n) \
+ if (n >= MAXFCT) \
+ errx(2, "fstack: too many temporary files; sort in pieces")
void
-fsort(int binno, int depth, union f_handle infiles, int nfiles, FILE *outfp,
+fsort(union f_handle infiles, int nfiles, FILE *outfp,
struct field *ftbl)
{
- u_char *weights, **keypos, *bufend, *tmpbuf;
+ u_char *weights, **keypos, *bufend;
static u_char *buffer, **keylist;
static size_t bufsize;
- int ntfiles, mfct = 0, total, i, maxb, lastb, panic = 0;
+ int ntfiles, mfct = 0;
int c, nelem;
- long sizes[NBINS+1];
- union f_handle tfiles, mstart = {MAXFCT-16};
+ union f_handle tfiles, mstart = { MAXFCT - MERGE_FNUM };
int (*get)(int, union f_handle, int, RECHEADER *,
u_char *, struct field *);
RECHEADER *crec;
struct field tfield[2];
- FILE *prevfp, *tailfp[FSORTMAX+1];
- memset(tailfp, 0, sizeof(tailfp));
- prevfp = outfp;
memset(tfield, 0, sizeof(tfield));
if (ftbl[0].flags & R)
tfield[0].weights = Rascii;
@@ -84,205 +82,114 @@ fsort(int binno, int depth, union f_hand
err(2, NULL);
}
bufend = buffer + bufsize - 1;
- if (binno >= 0) {
- tfiles.top = infiles.top + nfiles;
- get = getnext;
- } else {
- tfiles.top = 0;
- if (SINGL_FLD)
- get = makeline;
- else
- get = makekey;
- }
- for (;;) {
- memset(sizes, 0, sizeof(sizes));
- c = ntfiles = 0;
- if (binno == weights[REC_D] &&
- !(SINGL_FLD && ftbl[0].flags & F)) { /* pop */
- rd_append(weights[REC_D],
- infiles, nfiles, prevfp, buffer, bufend);
- break;
- } else if (binno == weights[REC_D]) {
- depth = 0; /* start over on flat weights */
- ftbl = tfield;
- weights = ftbl[0].weights;
+
+ if (SINGL_FLD)
+ get = makeline;
+ else
+ get = makekey;
+
+ c = nelem = ntfiles = 0;
+ keypos = keylist;
+ crec = (RECHEADER *) buffer;
+ while (c != EOF) {
+ keypos = keylist;
+ nelem = 0;
+ crec = (RECHEADER *) buffer;
+
+ do_read:
+ while ((c = get(-1, infiles, nfiles, crec, bufend,
+ ftbl)) == 0) {
+ *keypos++ = crec->data;
+ if (++nelem == MAXNUM) {
+ c = BUFFEND;
+ break;
+ }
+ crec = (RECHEADER *)((char *) crec +
+ SALIGN(crec->length) + sizeof(TRECHEADER));
}
- while (c != EOF) {
- keypos = keylist;
- nelem = 0;
- crec = (RECHEADER *) buffer;
- while ((c = get(binno, infiles, nfiles, crec, bufend,
- ftbl)) == 0) {
- *keypos++ = crec->data + depth;
- if (++nelem == MAXNUM) {
- c = BUFFEND;
- break;
- }
- crec = (RECHEADER *)((char *)crec +
- SALIGN(crec->length) + sizeof(TRECHEADER));
+ if (c == BUFFEND && nelem < MAXNUM &&
+ bufsize < MAXBUFSIZE) {
+ u_char **keyp;
+ u_char *oldb = buffer;
+ u_char *nbuffer;
+
+ /* buffer was too small for data, allocate
+ * bigger buffer */
+ nbuffer = realloc(buffer, bufsize * 2);
+ if (!nbuffer) {
+ err(2, "failed to realloc buffer to %lu bytes",
+ (unsigned long) bufsize * 2);
}
+ buffer = nbuffer;
+ bufsize *= 2;
+ bufend = buffer + bufsize;
+
+ /* patch up keylist[] */
+ for (keyp = &keypos[-1]; keyp >= keylist; keyp--)
+ *keyp = buffer + (*keyp - oldb);
+
+ crec = (RECHEADER *) (buffer +
+ ((u_char *)crec - oldb));
+ goto do_read;
+ }
+ if (c != BUFFEND && !ntfiles && !mfct) {
+ /* do not push */
+ continue;
+ }
+
+ /* push */
+ fstack[mstart.top + mfct].fp = ftmp();
+ if (radix_sort((const u_char **)keylist, nelem, weights,
+ REC_D))
+ err(2, NULL);
+ append(keylist, nelem, 0, fstack[mstart.top + mfct].fp,
+ putrec, ftbl);
+ mfct++;
+
+ /* reduce number of open files */
+ if (mfct == MERGE_FNUM || (c == EOF && ntfiles)) {
/*
- * buffer was too small for data, allocate
- * a bigger buffer.
+ * Only copy extra incomplete crec
+ * data if there are any.
*/
- if (c == BUFFEND && nelem == 0) {
- bufsize *= 2;
- tmpbuf = realloc(buffer, bufsize);
- if (!tmpbuf)
- err(2, "failed to realloc buffer");
- crec = (RECHEADER *)
- (tmpbuf + ((u_char *)crec - buffer));
- buffer = tmpbuf;
- bufend = buffer + bufsize - 1;
- continue;
+ int nodata = (bufend >= (u_char *)crec &&
+ bufend <= crec->data);
+ size_t sz = 0;
+ u_char *tmpbuf = NULL;
+
+ if (!nodata) {
+ sz = bufend - crec->data;
+ tmpbuf = malloc(sz);
+ memmove(tmpbuf, crec->data, sz);
}
- if (c == BUFFEND || ntfiles || mfct) { /* push */
- if (panic >= PANIC) {
- fstack[MAXFCT-16+mfct].fp = ftmp();
- if (radixsort((const u_char **)keylist,
- nelem, weights, REC_D))
- err(2, NULL);
- append(keylist, nelem, depth, fstack[
- MAXFCT-16+mfct].fp, putrec, ftbl);
- mfct++;
- /* reduce number of open files */
- if (mfct == 16 ||(c == EOF && ntfiles))
{
- fstack[tfiles.top + ntfiles].fp
- = ftmp();
- fmerge(0, mstart, mfct, geteasy,
- fstack[tfiles.top+ntfiles].fp,
- putrec, ftbl);
- ntfiles++;
- mfct = 0;
- }
- } else {
- fstack[tfiles.top + ntfiles].fp= ftmp();
- onepass(keylist, depth, nelem, sizes,
- weights, fstack[tfiles.top+ntfiles].fp);
- ntfiles++;
- }
+ CHECKFSTACK(ntfiles);
+ fstack[ntfiles].fp = ftmp();
+ fmerge(0, mstart, mfct, geteasy,
+ fstack[ntfiles].fp, putrec, ftbl);
+ ntfiles++;
+ mfct = 0;
+
+ if (!nodata) {
+ memmove(crec->data, tmpbuf, sz);
+ free(tmpbuf);
}
}
- get = getnext;
- if (!ntfiles && !mfct) { /* everything in memory--pop */
- if (nelem > 1) {
- if (STABLE) {
- i = sradixsort((const u_char **)keylist,
- nelem, weights, REC_D);
- } else {
- i = radixsort((const u_char **)keylist,
- nelem, weights, REC_D);
- }
- if (i)
- err(2, NULL);
- }
- append(keylist, nelem, depth, outfp, putline, ftbl);
- break; /* pop */
- }
- if (panic >= PANIC) {
- if (!ntfiles)
- fmerge(0, mstart, mfct, geteasy,
- outfp, putline, ftbl);
- else
- fmerge(0, tfiles, ntfiles, geteasy,
- outfp, putline, ftbl);
- break;
- }
- total = maxb = lastb = 0; /* find if one bin dominates */
- for (i = 0; i < NBINS; i++)
- if (sizes[i]) {
- if (sizes[i] > sizes[maxb])
- maxb = i;
- lastb = i;
- total += sizes[i];
- }
- if (sizes[maxb] < max((total / 2) , BUFSIZE))
- maxb = lastb; /* otherwise pop after last bin */
- fstack[tfiles.top].lastb = lastb;
- fstack[tfiles.top].maxb = maxb;
-
- /* start refining next level. */
- get(-1, tfiles, ntfiles, crec, bufend, 0); /* rewind */
- for (i = 0; i < maxb; i++) {
- if (!sizes[i]) /* bin empty; step ahead file offset */
- get(i, tfiles, ntfiles, crec, bufend, 0);
- else
- fsort(i, depth+1, tfiles, ntfiles, outfp, ftbl);
- }
- if (lastb != maxb) {
- if (prevfp != outfp)
- tailfp[panic] = prevfp;
- prevfp = ftmp();
- for (i = maxb+1; i <= lastb; i++)
- if (!sizes[i])
- get(i, tfiles, ntfiles, crec, bufend,0);
- else
- fsort(i, depth+1, tfiles, ntfiles,
- prevfp, ftbl);
- }
-
- /* sort biggest (or last) bin at this level */
- depth++;
- panic++;
- binno = maxb;
- infiles.top = tfiles.top; /* getnext will free tfiles, */
- nfiles = ntfiles; /* so overwrite them */
- }
- if (prevfp != outfp) {
- concat(outfp, prevfp);
- fclose(prevfp);
}
- for (i = panic; i >= 0; --i)
- if (tailfp[i]) {
- concat(outfp, tailfp[i]);
- fclose(tailfp[i]);
- }
-}
-
-/*
- * This is one pass of radix exchange, dumping the bins to disk.
- */
-#define swap(a, b, t) t = a, a = b, b = t
-void
-onepass(u_char **a, int depth, long n, long sizes[], u_char *tr, FILE *fp)
-{
- size_t tsizes[NBINS+1];
- u_char **bin[257], **top[256], ***bp, ***bpmax, ***tp;
- static int histo[256];
- int *hp;
- int c;
- u_char **an, *t, **aj;
- u_char **ak, *r;
-
- memset(tsizes, 0, sizeof(tsizes));
- depth += sizeof(TRECHEADER);
- an = &a[n];
- for (ak = a; ak < an; ak++) {
- histo[c = tr[**ak]]++;
- tsizes[c] += ((RECHEADER *) (*ak -= depth))->length;
+ if (!ntfiles && !mfct) { /* everything in memory--pop */
+ if (nelem > 1 && radix_sort((const u_char **)keylist,
+ nelem, weights, REC_D))
+ err(2, NULL);
+ if (nelem > 0)
+ append(keylist, nelem, 0, outfp, putline, ftbl);
}
+ if (!ntfiles)
+ fmerge(0, mstart, mfct, geteasy, outfp, putline, ftbl);
+ else
+ fmerge(0, tfiles, ntfiles, geteasy, outfp, putline,
+ ftbl);
- bin[0] = a;
- bpmax = bin + 256;
- tp = top, hp = histo;
- for (bp = bin; bp < bpmax; bp++) {
- *tp++ = *(bp + 1) = *bp + (c = *hp);
- *hp++ = 0;
- if (c <= 1)
- continue;
- }
- for (aj = a; aj < an; *aj = r, aj = bin[c + 1])
- for (r = *aj; aj < (ak = --top[c = tr[r[depth]]]); )
- swap(*ak, r, t);
-
- for (ak = a, c = 0; c < 256; c++) {
- an = bin[c + 1];
- n = an - ak;
- tsizes[c] += n * sizeof(TRECHEADER);
- /* tell getnext how many elements in this bin, this segment. */
- EWRITE(&tsizes[c], sizeof(size_t), 1, fp);
- sizes[c] += tsizes[c];
- for (; ak < an; ++ak)
- putrec((RECHEADER *) *ak, fp);
- }
+ free(keylist);
+ keylist = NULL;
+ free(buffer);
+ buffer = NULL;
}
Index: fsort.h
===================================================================
RCS file: /cvs/src/usr.bin/sort/fsort.h,v
retrieving revision 1.10
diff -u -p -r1.10 fsort.h
--- fsort.h 13 Mar 2007 17:33:58 -0000 1.10
+++ fsort.h 29 Jun 2014 22:17:16 -0000
@@ -38,9 +38,17 @@
#define BUFSIZE (1 << POW)
#define MAXNUM (BUFSIZE/10) /* lowish guess at average record size */
#define BUFFEND (EOF-2)
-#define BUFFSMALL (EOF-3) /* buffer is too small to hold line */
#define MAXFCT 1000
#define MAXLLEN ((1 << min(POW-4, 16)) - 14)
+#define MERGE_FNUM 16
+
+/*
+ * Default (initial) and maximum size of record buffer for fsort().
+ * Note that no more than MAXNUM records are stored in the buffer,
+ * even if the buffer is not full yet.
+ */
+#define DEFBUFSIZE (1 << 20) /* 1MB */
+#define MAXBUFSIZE (8 << 20) /* 10 MB */
extern u_char *linebuf;
extern size_t linebuf_size;
Index: msort.c
===================================================================
RCS file: /cvs/src/usr.bin/sort/msort.c,v
retrieving revision 1.24
diff -u -p -r1.24 msort.c
--- msort.c 13 Nov 2013 15:07:27 -0000 1.24
+++ msort.c 29 Jun 2014 22:17:16 -0000
@@ -65,6 +65,7 @@ fmerge(int binno, union f_handle files,
int i, j, last;
void (*put)(RECHEADER *, FILE *);
struct tempfile *l_fstack;
+ size_t tbufsiz;
wts = ftbl->weights;
if (!UNIQUE && SINGL_FLD && (ftbl->flags & F))
@@ -72,12 +73,12 @@ fmerge(int binno, union f_handle files,
if (cfilebuf == NULL) {
cfilebuf = malloc(MAXLLEN + sizeof(MFILE));
if (cfilebuf == NULL)
- errx(2, "cannot allocate memory");
+ errx(2, NULL);
}
- i = min(16, nfiles) * LALIGN(MAXLLEN + sizeof(MFILE));
- if (i > bufsize) {
- bufsize = i;
+ tbufsiz = min(16, nfiles) * LALIGN(MAXLLEN + sizeof(MFILE));
+ if (tbufsiz > bufsize) {
+ bufsize = tbufsiz;
if ((buffer = realloc(buffer, bufsize)) == NULL)
err(2, NULL);
}
@@ -127,22 +128,54 @@ merge(int infl0, int nfiles,
int (*get)(int, union f_handle, int, RECHEADER *, u_char *, struct field
*),
FILE *outfp, void (*put)(RECHEADER *, FILE *), struct field *ftbl)
{
- int c, i, j;
+ int c, i, j, nf = nfiles;
union f_handle dummy = {0};
- struct mfile *flist[16], *cfile;
+ struct mfile *flistb[MERGE_FNUM], **flist = flistb, *cfile;
+ size_t availsz = bufsize;
+ static void *bufs[MERGE_FNUM+1];
+ static size_t bufs_sz[MERGE_FNUM+1];
+
+ /*
+ * We need nfiles + 1 buffers. One is 'buffer', the
+ * rest needs to be allocated.
+ */
+ bufs[0] = buffer;
+ bufs_sz[0] = bufsize;
+ for(i=1; i < nfiles+1; i++) {
+ if (bufs[i])
+ continue;
+
+ bufs[i] = malloc(MAXLLEN);
+ if (!bufs[i])
+ err(2, NULL);
+ bufs_sz[i] = MAXLLEN;
+ }
for (i = j = 0; i < nfiles; i++) {
- cfile = (MFILE *) (buffer +
- i * LALIGN(MAXLLEN + sizeof(MFILE)));
- cfile->flno = j + infl0;
- cfile->end = cfile->rec->data + MAXLLEN;
+ cfile = (struct mfile *) bufs[j];
+ cfile->flno = infl0 + j;
+ cfile->end = (u_char *) bufs[j] + bufs_sz[j];
for (c = 1; c == 1;) {
- if (EOF == (c = get(j+infl0, dummy, nfiles,
+ if (EOF == (c = get(cfile->flno, dummy, nfiles,
cfile->rec, cfile->end, ftbl))) {
i--;
nfiles--;
break;
}
+
+ if (c == BUFFEND) {
+ cfile = realloc(bufs[j], bufs_sz[j] *= 2);
+ bufs[j] = (void *)cfile;
+
+ if (!cfile)
+ err(2, NULL);
+
+ cfile->end = (u_char *)cfile + bufs_sz[j];
+
+ c = 1;
+ continue;
+ }
+
if (i)
c = insert(flist, &cfile, i, !DELETE);
else
@@ -151,24 +184,49 @@ merge(int infl0, int nfiles,
j++;
}
if (nfiles > 0) {
- cfile = cfilebuf;
+ cfile = (struct mfile *) bufs[nf];
cfile->flno = flist[0]->flno;
- cfile->end = cfile->rec->data + MAXLLEN;
+ cfile->end = (u_char *) cfile + bufs_sz[nf];
while (nfiles) {
for (c = 1; c == 1;) {
if (EOF == (c = get(cfile->flno, dummy, nfiles,
cfile->rec, cfile->end, ftbl))) {
put(flist[0]->rec, outfp);
- memmove(flist, flist + 1,
- sizeof(MFILE *) * (--nfiles));
- cfile->flno = flist[0]->flno;
+ if (--nfiles > 0) {
+ flist++;
+ cfile->flno = flist[0]->flno;
+ }
break;
}
+ if (c == BUFFEND) {
+ char *oldbuf = (char *)cfile;
+ availsz = (char *)cfile->end - oldbuf;
+ availsz *= 2;
+ cfile = realloc(oldbuf, availsz);
+ if (!cfile)
+ err(2, NULL);
+
+ for (i = 0; i < nf + 1; i++) {
+ if (bufs[i] == oldbuf) {
+ bufs[i] = (char *)cfile;
+ bufs_sz[i] = availsz;
+ break;
+ }
+ }
+
+ cfile->end = (u_char *)cfile + availsz;
+ c = 1;
+ continue;
+ }
if (!(c = insert(flist, &cfile, nfiles,
DELETE)))
put(cfile->rec, outfp);
}
- }
- }
+ }
+ if (bufs_sz[0] > bufsize) {
+ buffer = bufs[0];
+ bufsize = bufs_sz[0];
+ }
+ }
}
/*
Index: sort.c
===================================================================
RCS file: /cvs/src/usr.bin/sort/sort.c,v
retrieving revision 1.41
diff -u -p -r1.41 sort.c
--- sort.c 13 Nov 2013 15:07:27 -0000 1.41
+++ sort.c 29 Jun 2014 22:17:16 -0000
@@ -44,13 +44,14 @@
#include <sys/types.h>
#include <sys/stat.h>
+
+#include <err.h>
#include <locale.h>
#include <paths.h>
#include <signal.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
-#include <err.h>
int REC_D = '\n';
u_char d_mask[NBINS]; /* flags for rec_d, field_d, <blank> */
@@ -71,6 +72,8 @@ struct coldesc *clist;
int ncols = 0;
int ND = 10; /* limit on number of -k options. */
+int (*radix_sort)(const u_char **, int, const u_char *, u_int) = radixsort;
+
char *devstdin = _PATH_STDIN;
char *tmpdir = _PATH_VARTMP;
char toutpath[PATH_MAX];
@@ -172,7 +175,6 @@ main(int argc, char *argv[])
mflag = 1;
break;
case 'H':
- PANIC = 0;
break;
case 'y':
/* accept -y for backwards compat. */
@@ -186,6 +188,7 @@ main(int argc, char *argv[])
break;
case 's':
STABLE = 1;
+ radix_sort = sradixsort;
break;
case '?':
default:
@@ -292,8 +295,8 @@ main(int argc, char *argv[])
act.sa_handler = onsig;
for (i = 0; sigtable[i]; ++i) /* always unlink toutpath */
if (sigaction(sigtable[i], NULL, &oact) < 0 ||
- oact.sa_handler != SIG_IGN &&
- sigaction(sigtable[i], &act, NULL) < 0)
+ (oact.sa_handler != SIG_IGN &&
+ sigaction(sigtable[i], &act, NULL) < 0))
err(2, "sigaction");
} else
outfile = outpath;
@@ -302,7 +305,7 @@ main(int argc, char *argv[])
if (mflag)
fmerge(-1, filelist, argc-optind, get, outfp, putline, fldtab);
else
- fsort(-1, 0, filelist, argc-optind, outfp, fldtab);
+ fsort(filelist, argc-optind, outfp, fldtab);
if (outfile != outpath) {
if (access(outfile, 0))
err(2, "%s", outfile);
Index: sort.h
===================================================================
RCS file: /cvs/src/usr.bin/sort/sort.h,v
retrieving revision 1.7
diff -u -p -r1.7 sort.h
--- sort.h 21 Aug 2007 20:29:25 -0000 1.7
+++ sort.h 29 Jun 2014 22:17:16 -0000
@@ -129,13 +129,14 @@ union f_handle {
int top;
char **names;
};
-extern int PANIC; /* maximum depth of fsort before fmerge is called */
+
extern u_char ascii[NBINS], Rascii[NBINS], Ftable[NBINS], RFtable[NBINS];
extern u_char alltable[NBINS], dtable[NBINS], itable[NBINS];
extern u_char d_mask[NBINS];
extern int SINGL_FLD, SEP_FLAG, UNIQUE, STABLE;
extern int REC_D;
extern char *tmpdir;
+extern int (*radix_sort)(const u_char **, int, const u_char *, u_int);
extern int ND; /* limit on number of -k options. */
#include "extern.h"
pgpUsYQ6klQT5.pgp
Description: PGP signature
