On Wed, Jul 02, 2014 at 04:37:05PM +0200, Otto Moerbeek wrote:
>
> > Op 2 jul. 2014 om 15:03 heeft Jared Yanovich <[email protected]> het
> > volgende geschreven:
> >
> >> On Wed, Jul 02, 2014 at 12:37:53PM +0200, Otto Moerbeek wrote:
> >>
> >> On Tue, Jul 01, 2014 at 03:56:39PM -0400, Jared Yanovich wrote:
> >>
> >> This works better indeed. But is initing the int member only safe? If
> >> sizeof(top) < sizeof(names), only init some bits of the pointer.
> >
> > To be safe, we can switch to memset. But upon a cursory look, names is only
> > used via that path when binno=-1, which is not the case with tfiles.
>
> We could also swap the union members, so that the biggest comes first.
True but memset just future proofs any kind of changes down the road...
Just completed 'make build' on amd64 with this. There may be some useful tests
from GNU coreutils I can throw at it that I will look into later. Any other
comments from anyone?
Index: append.c
===================================================================
RCS file: /cvs/src/usr.bin/sort/append.c,v
retrieving revision 1.10
diff -u -p -r1.10 append.c
--- append.c 27 Oct 2009 23:59:43 -0000 1.10
+++ append.c 2 Jul 2014 16:43:33 -0000
@@ -148,37 +148,3 @@ append(u_char **keylist, int nelem, int
put(crec, fp);
}
}
-
-/*
- * output the already sorted eol bin.
- */
-void
-rd_append(int binno, union f_handle infl0, int nfiles, FILE *outfp,
- u_char *buffer, u_char *bufend)
-{
- RECHEADER *rec;
-
- rec = (RECHEADER *) buffer;
- if (!getnext(binno, infl0, nfiles, (RECHEADER *) buffer, bufend, 0)) {
- putline(rec, outfp);
- while (getnext(binno, infl0, nfiles, (RECHEADER *) buffer,
- bufend, 0) == 0) {
- if (!UNIQUE)
- putline(rec, outfp);
- }
- }
-}
-
-/*
- * append plain text--used after sorting the biggest bin.
- */
-void
-concat(FILE *a, FILE *b)
-{
- int nread;
- char buffer[4096];
-
- rewind(b);
- while ((nread = fread(buffer, 1, 4096, b)) > 0)
- EWRITE(buffer, 1, nread, a);
-}
Index: extern.h
===================================================================
RCS file: /cvs/src/usr.bin/sort/extern.h,v
retrieving revision 1.8
diff -u -p -r1.8 extern.h
--- extern.h 22 Dec 2009 19:47:02 -0000 1.8
+++ extern.h 2 Jul 2014 16:43:33 -0000
@@ -36,7 +36,6 @@
void append(u_char **, int, int, FILE *, void (*)(RECHEADER *, FILE *),
struct field *);
-void concat(FILE *, FILE *);
length_t enterkey(RECHEADER *, DBT *, int, struct field *);
void fixit(int *, char **);
void fldreset(struct field *);
@@ -44,11 +43,9 @@ FILE *ftmp(void);
void fmerge(int, union f_handle, int,
int (*)(int, union f_handle, int, RECHEADER *, u_char *, struct
field *),
FILE *, void (*)(RECHEADER *, FILE *), struct field *);
-void fsort(int, int, union f_handle, int, FILE *, struct field *);
+void fsort(union f_handle, int, FILE *, struct field *);
int geteasy(int, union f_handle,
int, RECHEADER *, u_char *, struct field *);
-int getnext(int, union f_handle,
- int, RECHEADER *, u_char *, struct field *);
int makekey(int, union f_handle,
int, RECHEADER *, u_char *, struct field *);
int makeline(int, union f_handle,
@@ -57,7 +54,6 @@ void merge(int, int,
int (*)(int, union f_handle, int, RECHEADER *, u_char *, struct
field *),
FILE *, void (*)(RECHEADER *, FILE *), struct field *);
void num_init(void);
-void onepass(u_char **, int, long, long *, u_char *, FILE *);
int optval(int, int);
void order(union f_handle,
int (*)(int, union f_handle, int, RECHEADER *, u_char *, struct
field *),
@@ -65,6 +61,5 @@ void order(union f_handle,
int c_warn);
void putline(RECHEADER *, FILE *);
void putrec(RECHEADER *, FILE *);
-void rd_append(int, union f_handle, int, FILE *, u_char *, u_char *);
int setfield(char *, struct field *, int);
void settables(int);
Index: files.c
===================================================================
RCS file: /cvs/src/usr.bin/sort/files.c,v
retrieving revision 1.13
diff -u -p -r1.13 files.c
--- files.c 27 Oct 2009 23:59:43 -0000 1.13
+++ files.c 2 Jul 2014 16:43:33 -0000
@@ -40,73 +40,6 @@
static int seq(FILE *, DBT *, DBT *);
/*
- * this is the subroutine for file management for fsort().
- * It keeps the buffers for all temporary files.
- */
-int
-getnext(int binno, union f_handle infl0, int nfiles, RECHEADER *pos, u_char
*end,
- struct field *dummy)
-{
- int i;
- u_char *hp;
- static size_t nleft = 0;
- static int cnt = 0, flag = -1;
- static u_char maxb = 0;
- static FILE *fp;
-
- if (nleft == 0) {
- if (binno < 0) /* reset files. */ {
- for (i = 0; i < nfiles; i++) {
- rewind(fstack[infl0.top + i].fp);
- fstack[infl0.top + i].max_o = 0;
- }
- flag = -1;
- nleft = cnt = 0;
- return (-1);
- }
- maxb = fstack[infl0.top].maxb;
- for (; nleft == 0; cnt++) {
- if (cnt >= nfiles) {
- cnt = 0;
- return (EOF);
- }
- fp = fstack[infl0.top + cnt].fp;
- fread(&nleft, sizeof(nleft), 1, fp);
- if (binno < maxb)
- fstack[infl0.top+cnt].max_o
- += sizeof(nleft) + nleft;
- else if (binno == maxb) {
- if (binno != fstack[infl0.top].lastb) {
- fseek(fp, fstack[infl0.top+
- cnt].max_o, SEEK_SET);
- fread(&nleft, sizeof(nleft), 1, fp);
- }
- if (nleft == 0)
- fclose(fp);
- } else if (binno == maxb + 1) { /* skip a bin */
- fseek(fp, nleft, SEEK_CUR);
- fread(&nleft, sizeof(nleft), 1, fp);
- flag = cnt;
- }
- }
- }
- if ((u_char *) pos > end - sizeof(TRECHEADER))
- return (BUFFEND);
- fread(pos, sizeof(TRECHEADER), 1, fp);
- if (end - pos->data < pos->length) {
- hp = ((u_char *)pos) + sizeof(TRECHEADER);
- for (i = sizeof(TRECHEADER); i ; i--)
- ungetc(*--hp, fp);
- return (BUFFEND);
- }
- fread(pos->data, pos->length, 1, fp);
- nleft -= pos->length + sizeof(TRECHEADER);
- if (nleft == 0 && binno == fstack[infl0.top].maxb)
- fclose(fp);
- return (0);
-}
-
-/*
* this is called when there is no special key. It's only called
* in the first fsort pass.
*/
Index: fsort.c
===================================================================
RCS file: /cvs/src/usr.bin/sort/fsort.c,v
retrieving revision 1.21
diff -u -p -r1.21 fsort.c
--- fsort.c 27 Oct 2009 23:59:43 -0000 1.21
+++ fsort.c 2 Jul 2014 16:43:33 -0000
@@ -33,11 +33,11 @@
*/
/*
- * Read in the next bin. If it fits in one segment sort it;
- * otherwise refine it by segment deeper by one character,
- * and try again on smaller bins. Sort the final bin at this level
- * of recursion to keep the head of fstack at 0.
- * After PANIC passes, abort to merge sort.
+ * Read in a block of records (until 'enough').
+ * sort, write to temp file.
+ * Merge sort temp files into output file
+ * Small files miss out the temp file stage.
+ * Large files might get multiple merges.
*/
#include "sort.h"
#include "fsort.h"
@@ -48,28 +48,28 @@
u_char *linebuf;
size_t linebuf_size = MAXLLEN;
struct tempfile fstack[MAXFCT];
-#define FSORTMAX 4
-int PANIC = FSORTMAX;
+
+#define CHECKFSTACK(n) \
+ if (n >= MAXFCT) \
+ errx(2, "fstack: too many temporary files; sort in pieces")
void
-fsort(int binno, int depth, union f_handle infiles, int nfiles, FILE *outfp,
+fsort(union f_handle infiles, int nfiles, FILE *outfp,
struct field *ftbl)
{
- u_char *weights, **keypos, *bufend, *tmpbuf;
+ u_char *weights, **keypos, *bufend;
static u_char *buffer, **keylist;
static size_t bufsize;
- int ntfiles, mfct = 0, total, i, maxb, lastb, panic = 0;
+ int ntfiles, mfct = 0;
int c, nelem;
- long sizes[NBINS+1];
- union f_handle tfiles, mstart = {MAXFCT-16};
+ union f_handle tfiles, mstart = { MAXFCT - MERGE_FNUM };
int (*get)(int, union f_handle, int, RECHEADER *,
u_char *, struct field *);
RECHEADER *crec;
struct field tfield[2];
- FILE *prevfp, *tailfp[FSORTMAX+1];
- memset(tailfp, 0, sizeof(tailfp));
- prevfp = outfp;
+ memset(&tfiles, 0, sizeof(tfiles));
+
memset(tfield, 0, sizeof(tfield));
if (ftbl[0].flags & R)
tfield[0].weights = Rascii;
@@ -84,205 +84,114 @@ fsort(int binno, int depth, union f_hand
err(2, NULL);
}
bufend = buffer + bufsize - 1;
- if (binno >= 0) {
- tfiles.top = infiles.top + nfiles;
- get = getnext;
- } else {
- tfiles.top = 0;
- if (SINGL_FLD)
- get = makeline;
- else
- get = makekey;
- }
- for (;;) {
- memset(sizes, 0, sizeof(sizes));
- c = ntfiles = 0;
- if (binno == weights[REC_D] &&
- !(SINGL_FLD && ftbl[0].flags & F)) { /* pop */
- rd_append(weights[REC_D],
- infiles, nfiles, prevfp, buffer, bufend);
- break;
- } else if (binno == weights[REC_D]) {
- depth = 0; /* start over on flat weights */
- ftbl = tfield;
- weights = ftbl[0].weights;
+
+ if (SINGL_FLD)
+ get = makeline;
+ else
+ get = makekey;
+
+ c = nelem = ntfiles = 0;
+ keypos = keylist;
+ crec = (RECHEADER *) buffer;
+ while (c != EOF) {
+ keypos = keylist;
+ nelem = 0;
+ crec = (RECHEADER *) buffer;
+
+ do_read:
+ while ((c = get(-1, infiles, nfiles, crec, bufend,
+ ftbl)) == 0) {
+ *keypos++ = crec->data;
+ if (++nelem == MAXNUM) {
+ c = BUFFEND;
+ break;
+ }
+ crec = (RECHEADER *)((char *) crec +
+ SALIGN(crec->length) + sizeof(TRECHEADER));
}
- while (c != EOF) {
- keypos = keylist;
- nelem = 0;
- crec = (RECHEADER *) buffer;
- while ((c = get(binno, infiles, nfiles, crec, bufend,
- ftbl)) == 0) {
- *keypos++ = crec->data + depth;
- if (++nelem == MAXNUM) {
- c = BUFFEND;
- break;
- }
- crec = (RECHEADER *)((char *)crec +
- SALIGN(crec->length) + sizeof(TRECHEADER));
+ if (c == BUFFEND && nelem < MAXNUM &&
+ bufsize < MAXBUFSIZE) {
+ u_char **keyp;
+ u_char *oldb = buffer;
+ u_char *nbuffer;
+
+ /* buffer was too small for data, allocate
+ * bigger buffer */
+ nbuffer = realloc(buffer, bufsize * 2);
+ if (!nbuffer) {
+ err(2, "failed to realloc buffer to %lu bytes",
+ (unsigned long) bufsize * 2);
}
+ buffer = nbuffer;
+ bufsize *= 2;
+ bufend = buffer + bufsize;
+
+ /* patch up keylist[] */
+ for (keyp = &keypos[-1]; keyp >= keylist; keyp--)
+ *keyp = buffer + (*keyp - oldb);
+
+ crec = (RECHEADER *) (buffer +
+ ((u_char *)crec - oldb));
+ goto do_read;
+ }
+ if (c != BUFFEND && !ntfiles && !mfct) {
+ /* do not push */
+ continue;
+ }
+
+ /* push */
+ fstack[mstart.top + mfct].fp = ftmp();
+ if (radix_sort((const u_char **)keylist, nelem, weights,
+ REC_D))
+ err(2, NULL);
+ append(keylist, nelem, 0, fstack[mstart.top + mfct].fp,
+ putrec, ftbl);
+ mfct++;
+
+ /* reduce number of open files */
+ if (mfct == MERGE_FNUM || (c == EOF && ntfiles)) {
/*
- * buffer was too small for data, allocate
- * a bigger buffer.
+ * Only copy extra incomplete crec
+ * data if there are any.
*/
- if (c == BUFFEND && nelem == 0) {
- bufsize *= 2;
- tmpbuf = realloc(buffer, bufsize);
- if (!tmpbuf)
- err(2, "failed to realloc buffer");
- crec = (RECHEADER *)
- (tmpbuf + ((u_char *)crec - buffer));
- buffer = tmpbuf;
- bufend = buffer + bufsize - 1;
- continue;
+ int nodata = (bufend >= (u_char *)crec &&
+ bufend <= crec->data);
+ size_t sz = 0;
+ u_char *tmpbuf = NULL;
+
+ if (!nodata) {
+ sz = bufend - crec->data;
+ tmpbuf = malloc(sz);
+ memmove(tmpbuf, crec->data, sz);
}
- if (c == BUFFEND || ntfiles || mfct) { /* push */
- if (panic >= PANIC) {
- fstack[MAXFCT-16+mfct].fp = ftmp();
- if (radixsort((const u_char **)keylist,
- nelem, weights, REC_D))
- err(2, NULL);
- append(keylist, nelem, depth, fstack[
- MAXFCT-16+mfct].fp, putrec, ftbl);
- mfct++;
- /* reduce number of open files */
- if (mfct == 16 ||(c == EOF && ntfiles))
{
- fstack[tfiles.top + ntfiles].fp
- = ftmp();
- fmerge(0, mstart, mfct, geteasy,
- fstack[tfiles.top+ntfiles].fp,
- putrec, ftbl);
- ntfiles++;
- mfct = 0;
- }
- } else {
- fstack[tfiles.top + ntfiles].fp= ftmp();
- onepass(keylist, depth, nelem, sizes,
- weights, fstack[tfiles.top+ntfiles].fp);
- ntfiles++;
- }
+ CHECKFSTACK(ntfiles);
+ fstack[ntfiles].fp = ftmp();
+ fmerge(0, mstart, mfct, geteasy,
+ fstack[ntfiles].fp, putrec, ftbl);
+ ntfiles++;
+ mfct = 0;
+
+ if (!nodata) {
+ memmove(crec->data, tmpbuf, sz);
+ free(tmpbuf);
}
}
- get = getnext;
- if (!ntfiles && !mfct) { /* everything in memory--pop */
- if (nelem > 1) {
- if (STABLE) {
- i = sradixsort((const u_char **)keylist,
- nelem, weights, REC_D);
- } else {
- i = radixsort((const u_char **)keylist,
- nelem, weights, REC_D);
- }
- if (i)
- err(2, NULL);
- }
- append(keylist, nelem, depth, outfp, putline, ftbl);
- break; /* pop */
- }
- if (panic >= PANIC) {
- if (!ntfiles)
- fmerge(0, mstart, mfct, geteasy,
- outfp, putline, ftbl);
- else
- fmerge(0, tfiles, ntfiles, geteasy,
- outfp, putline, ftbl);
- break;
- }
- total = maxb = lastb = 0; /* find if one bin dominates */
- for (i = 0; i < NBINS; i++)
- if (sizes[i]) {
- if (sizes[i] > sizes[maxb])
- maxb = i;
- lastb = i;
- total += sizes[i];
- }
- if (sizes[maxb] < max((total / 2) , BUFSIZE))
- maxb = lastb; /* otherwise pop after last bin */
- fstack[tfiles.top].lastb = lastb;
- fstack[tfiles.top].maxb = maxb;
-
- /* start refining next level. */
- get(-1, tfiles, ntfiles, crec, bufend, 0); /* rewind */
- for (i = 0; i < maxb; i++) {
- if (!sizes[i]) /* bin empty; step ahead file offset */
- get(i, tfiles, ntfiles, crec, bufend, 0);
- else
- fsort(i, depth+1, tfiles, ntfiles, outfp, ftbl);
- }
- if (lastb != maxb) {
- if (prevfp != outfp)
- tailfp[panic] = prevfp;
- prevfp = ftmp();
- for (i = maxb+1; i <= lastb; i++)
- if (!sizes[i])
- get(i, tfiles, ntfiles, crec, bufend,0);
- else
- fsort(i, depth+1, tfiles, ntfiles,
- prevfp, ftbl);
- }
-
- /* sort biggest (or last) bin at this level */
- depth++;
- panic++;
- binno = maxb;
- infiles.top = tfiles.top; /* getnext will free tfiles, */
- nfiles = ntfiles; /* so overwrite them */
}
- if (prevfp != outfp) {
- concat(outfp, prevfp);
- fclose(prevfp);
- }
- for (i = panic; i >= 0; --i)
- if (tailfp[i]) {
- concat(outfp, tailfp[i]);
- fclose(tailfp[i]);
- }
-}
-
-/*
- * This is one pass of radix exchange, dumping the bins to disk.
- */
-#define swap(a, b, t) t = a, a = b, b = t
-void
-onepass(u_char **a, int depth, long n, long sizes[], u_char *tr, FILE *fp)
-{
- size_t tsizes[NBINS+1];
- u_char **bin[257], **top[256], ***bp, ***bpmax, ***tp;
- static int histo[256];
- int *hp;
- int c;
- u_char **an, *t, **aj;
- u_char **ak, *r;
-
- memset(tsizes, 0, sizeof(tsizes));
- depth += sizeof(TRECHEADER);
- an = &a[n];
- for (ak = a; ak < an; ak++) {
- histo[c = tr[**ak]]++;
- tsizes[c] += ((RECHEADER *) (*ak -= depth))->length;
+ if (!ntfiles && !mfct) { /* everything in memory--pop */
+ if (nelem > 1 && radix_sort((const u_char **)keylist,
+ nelem, weights, REC_D))
+ err(2, NULL);
+ if (nelem > 0)
+ append(keylist, nelem, 0, outfp, putline, ftbl);
}
+ if (!ntfiles)
+ fmerge(0, mstart, mfct, geteasy, outfp, putline, ftbl);
+ else
+ fmerge(0, tfiles, ntfiles, geteasy, outfp, putline,
+ ftbl);
- bin[0] = a;
- bpmax = bin + 256;
- tp = top, hp = histo;
- for (bp = bin; bp < bpmax; bp++) {
- *tp++ = *(bp + 1) = *bp + (c = *hp);
- *hp++ = 0;
- if (c <= 1)
- continue;
- }
- for (aj = a; aj < an; *aj = r, aj = bin[c + 1])
- for (r = *aj; aj < (ak = --top[c = tr[r[depth]]]); )
- swap(*ak, r, t);
-
- for (ak = a, c = 0; c < 256; c++) {
- an = bin[c + 1];
- n = an - ak;
- tsizes[c] += n * sizeof(TRECHEADER);
- /* tell getnext how many elements in this bin, this segment. */
- EWRITE(&tsizes[c], sizeof(size_t), 1, fp);
- sizes[c] += tsizes[c];
- for (; ak < an; ++ak)
- putrec((RECHEADER *) *ak, fp);
- }
+ free(keylist);
+ keylist = NULL;
+ free(buffer);
+ buffer = NULL;
}
Index: fsort.h
===================================================================
RCS file: /cvs/src/usr.bin/sort/fsort.h,v
retrieving revision 1.10
diff -u -p -r1.10 fsort.h
--- fsort.h 13 Mar 2007 17:33:58 -0000 1.10
+++ fsort.h 2 Jul 2014 16:43:33 -0000
@@ -38,9 +38,17 @@
#define BUFSIZE (1 << POW)
#define MAXNUM (BUFSIZE/10) /* lowish guess at average record size */
#define BUFFEND (EOF-2)
-#define BUFFSMALL (EOF-3) /* buffer is too small to hold line */
#define MAXFCT 1000
#define MAXLLEN ((1 << min(POW-4, 16)) - 14)
+#define MERGE_FNUM 16
+
+/*
+ * Default (initial) and maximum size of record buffer for fsort().
+ * Note that no more than MAXNUM records are stored in the buffer,
+ * even if the buffer is not full yet.
+ */
+#define DEFBUFSIZE (1 << 20) /* 1MB */
+#define MAXBUFSIZE (8 << 20) /* 10 MB */
extern u_char *linebuf;
extern size_t linebuf_size;
Index: msort.c
===================================================================
RCS file: /cvs/src/usr.bin/sort/msort.c,v
retrieving revision 1.24
diff -u -p -r1.24 msort.c
--- msort.c 13 Nov 2013 15:07:27 -0000 1.24
+++ msort.c 2 Jul 2014 16:43:33 -0000
@@ -65,6 +65,7 @@ fmerge(int binno, union f_handle files,
int i, j, last;
void (*put)(RECHEADER *, FILE *);
struct tempfile *l_fstack;
+ size_t tbufsiz;
wts = ftbl->weights;
if (!UNIQUE && SINGL_FLD && (ftbl->flags & F))
@@ -72,12 +73,12 @@ fmerge(int binno, union f_handle files,
if (cfilebuf == NULL) {
cfilebuf = malloc(MAXLLEN + sizeof(MFILE));
if (cfilebuf == NULL)
- errx(2, "cannot allocate memory");
+ errx(2, NULL);
}
- i = min(16, nfiles) * LALIGN(MAXLLEN + sizeof(MFILE));
- if (i > bufsize) {
- bufsize = i;
+ tbufsiz = min(16, nfiles) * LALIGN(MAXLLEN + sizeof(MFILE));
+ if (tbufsiz > bufsize) {
+ bufsize = tbufsiz;
if ((buffer = realloc(buffer, bufsize)) == NULL)
err(2, NULL);
}
@@ -93,8 +94,7 @@ fmerge(int binno, union f_handle files,
if (nfiles <= 16) {
tout = outfp;
put = fput;
- }
- else
+ } else
tout = ftmp();
last = min(16, nfiles - j);
if (binno < 0) {
@@ -104,12 +104,12 @@ fmerge(int binno, union f_handle files,
err(2, "%s", files.names[j+i]);
merge(MAXFCT-1-16, last, get, tout, put, ftbl);
} else {
- for (i = 0; i< last; i++)
+ for (i = 0; i < last; i++)
rewind(l_fstack[i+j].fp);
merge(files.top+j, last, get, tout, put, ftbl);
}
if (nfiles > 16)
- l_fstack[j/16].fp = tout;
+ l_fstack[j / 16].fp = tout;
}
nfiles = (nfiles + 15) / 16;
if (nfiles == 1)
@@ -127,22 +127,54 @@ merge(int infl0, int nfiles,
int (*get)(int, union f_handle, int, RECHEADER *, u_char *, struct field
*),
FILE *outfp, void (*put)(RECHEADER *, FILE *), struct field *ftbl)
{
- int c, i, j;
+ int c, i, j, nf = nfiles;
union f_handle dummy = {0};
- struct mfile *flist[16], *cfile;
+ struct mfile *flistb[MERGE_FNUM], **flist = flistb, *cfile;
+ size_t availsz = bufsize;
+ static void *bufs[MERGE_FNUM+1];
+ static size_t bufs_sz[MERGE_FNUM+1];
+
+ /*
+ * We need nfiles + 1 buffers. One is 'buffer', the
+ * rest needs to be allocated.
+ */
+ bufs[0] = buffer;
+ bufs_sz[0] = bufsize;
+ for(i=1; i < nfiles+1; i++) {
+ if (bufs[i])
+ continue;
+
+ bufs[i] = malloc(MAXLLEN);
+ if (!bufs[i])
+ err(2, NULL);
+ bufs_sz[i] = MAXLLEN;
+ }
for (i = j = 0; i < nfiles; i++) {
- cfile = (MFILE *) (buffer +
- i * LALIGN(MAXLLEN + sizeof(MFILE)));
- cfile->flno = j + infl0;
- cfile->end = cfile->rec->data + MAXLLEN;
+ cfile = (struct mfile *) bufs[j];
+ cfile->flno = infl0 + j;
+ cfile->end = (u_char *) bufs[j] + bufs_sz[j];
for (c = 1; c == 1;) {
- if (EOF == (c = get(j+infl0, dummy, nfiles,
+ if (EOF == (c = get(cfile->flno, dummy, nfiles,
cfile->rec, cfile->end, ftbl))) {
i--;
nfiles--;
break;
}
+
+ if (c == BUFFEND) {
+ cfile = realloc(bufs[j], bufs_sz[j] *= 2);
+ bufs[j] = (void *)cfile;
+
+ if (!cfile)
+ err(2, NULL);
+
+ cfile->end = (u_char *)cfile + bufs_sz[j];
+
+ c = 1;
+ continue;
+ }
+
if (i)
c = insert(flist, &cfile, i, !DELETE);
else
@@ -151,24 +183,49 @@ merge(int infl0, int nfiles,
j++;
}
if (nfiles > 0) {
- cfile = cfilebuf;
+ cfile = (struct mfile *) bufs[nf];
cfile->flno = flist[0]->flno;
- cfile->end = cfile->rec->data + MAXLLEN;
+ cfile->end = (u_char *) cfile + bufs_sz[nf];
while (nfiles) {
for (c = 1; c == 1;) {
if (EOF == (c = get(cfile->flno, dummy, nfiles,
cfile->rec, cfile->end, ftbl))) {
put(flist[0]->rec, outfp);
- memmove(flist, flist + 1,
- sizeof(MFILE *) * (--nfiles));
- cfile->flno = flist[0]->flno;
+ if (--nfiles > 0) {
+ flist++;
+ cfile->flno = flist[0]->flno;
+ }
break;
}
+ if (c == BUFFEND) {
+ char *oldbuf = (char *)cfile;
+ availsz = (char *)cfile->end - oldbuf;
+ availsz *= 2;
+ cfile = realloc(oldbuf, availsz);
+ if (!cfile)
+ err(2, NULL);
+
+ for (i = 0; i < nf + 1; i++) {
+ if (bufs[i] == oldbuf) {
+ bufs[i] = (char *)cfile;
+ bufs_sz[i] = availsz;
+ break;
+ }
+ }
+
+ cfile->end = (u_char *)cfile + availsz;
+ c = 1;
+ continue;
+ }
if (!(c = insert(flist, &cfile, nfiles,
DELETE)))
put(cfile->rec, outfp);
}
- }
- }
+ }
+ if (bufs_sz[0] > bufsize) {
+ buffer = bufs[0];
+ bufsize = bufs_sz[0];
+ }
+ }
}
/*
Index: sort.c
===================================================================
RCS file: /cvs/src/usr.bin/sort/sort.c,v
retrieving revision 1.41
diff -u -p -r1.41 sort.c
--- sort.c 13 Nov 2013 15:07:27 -0000 1.41
+++ sort.c 2 Jul 2014 16:43:33 -0000
@@ -44,13 +44,14 @@
#include <sys/types.h>
#include <sys/stat.h>
+
+#include <err.h>
#include <locale.h>
#include <paths.h>
#include <signal.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
-#include <err.h>
int REC_D = '\n';
u_char d_mask[NBINS]; /* flags for rec_d, field_d, <blank> */
@@ -71,6 +72,8 @@ struct coldesc *clist;
int ncols = 0;
int ND = 10; /* limit on number of -k options. */
+int (*radix_sort)(const u_char **, int, const u_char *, u_int) = radixsort;
+
char *devstdin = _PATH_STDIN;
char *tmpdir = _PATH_VARTMP;
char toutpath[PATH_MAX];
@@ -172,7 +175,6 @@ main(int argc, char *argv[])
mflag = 1;
break;
case 'H':
- PANIC = 0;
break;
case 'y':
/* accept -y for backwards compat. */
@@ -186,6 +188,7 @@ main(int argc, char *argv[])
break;
case 's':
STABLE = 1;
+ radix_sort = sradixsort;
break;
case '?':
default:
@@ -292,8 +295,8 @@ main(int argc, char *argv[])
act.sa_handler = onsig;
for (i = 0; sigtable[i]; ++i) /* always unlink toutpath */
if (sigaction(sigtable[i], NULL, &oact) < 0 ||
- oact.sa_handler != SIG_IGN &&
- sigaction(sigtable[i], &act, NULL) < 0)
+ (oact.sa_handler != SIG_IGN &&
+ sigaction(sigtable[i], &act, NULL) < 0))
err(2, "sigaction");
} else
outfile = outpath;
@@ -302,7 +305,7 @@ main(int argc, char *argv[])
if (mflag)
fmerge(-1, filelist, argc-optind, get, outfp, putline, fldtab);
else
- fsort(-1, 0, filelist, argc-optind, outfp, fldtab);
+ fsort(filelist, argc-optind, outfp, fldtab);
if (outfile != outpath) {
if (access(outfile, 0))
err(2, "%s", outfile);
Index: sort.h
===================================================================
RCS file: /cvs/src/usr.bin/sort/sort.h,v
retrieving revision 1.7
diff -u -p -r1.7 sort.h
--- sort.h 21 Aug 2007 20:29:25 -0000 1.7
+++ sort.h 2 Jul 2014 16:43:33 -0000
@@ -129,13 +129,14 @@ union f_handle {
int top;
char **names;
};
-extern int PANIC; /* maximum depth of fsort before fmerge is called */
+
extern u_char ascii[NBINS], Rascii[NBINS], Ftable[NBINS], RFtable[NBINS];
extern u_char alltable[NBINS], dtable[NBINS], itable[NBINS];
extern u_char d_mask[NBINS];
extern int SINGL_FLD, SEP_FLAG, UNIQUE, STABLE;
extern int REC_D;
extern char *tmpdir;
+extern int (*radix_sort)(const u_char **, int, const u_char *, u_int);
extern int ND; /* limit on number of -k options. */
#include "extern.h"