On Sun, Jun 29, 2014 at 06:48:32PM -0400, Jared Yanovich wrote: > Hi, > > sort(1) does some funky things and isn't hard to break: > > $ perl -e 'print "\n"x117000,"x\n"' | sort | sort -c > > This patch contains a few changes from NetBSD to correct the behavior > regarding > ordering of appending bins to output in certain circumstances which helps pass > more of our own regress tests and improves performance (e.g. regress suite > runtime is <40% with new code compared to old/current code on my box). The > new > code is also much easier to understand.. > > NetBSD logs: > > msort.c -r 1.9 > merge(): use array of buffers instead of one big buffer for all records, > and > enlarge them as necessary to read records from merged files; the > buffers > are allocated once per program run, so there shouldn't be any > performance difference > This makes sort(1) pass also regression 40B and should make it > fully arbitrary long record capable. > XXX the buffer array could probably be freed on end of fmerge() to save > memory > > fsort.c -r 1.37 > The code that attempted to sort large files by sorting each chunk by the > first key byte and writing to a temp file, then sorting the records from > each temp file that had the same first key byte (and repeating for upto > 4 key bytes) was a nice idea, but completely doomed to failure. > Eg PR/9308 where a 70MB file has all but one record the same and short > keys. > Not only does the code not work, it is rather guaranteed to be slow. > Instead always use a merge sort for fully sorted chunk of records (each > temporary file contains one lot of sorted records). > The -H option already did this, so just rip out all the code and variables > that can't be used when -H was specified.
Hi Jared, good to see you active again ;-) This indeed solves some problems, but I have a test file on which it cores. The testfile is available at http://www.drijf.net/openbsd/test4.gz -Otto > > Index: append.c > =================================================================== > RCS file: /cvs/src/usr.bin/sort/append.c,v > retrieving revision 1.10 > diff -u -p -r1.10 append.c > --- append.c 27 Oct 2009 23:59:43 -0000 1.10 > +++ append.c 29 Jun 2014 22:17:16 -0000 > @@ -148,37 +148,3 @@ append(u_char **keylist, int nelem, int > put(crec, fp); > } > } > - > -/* > - * output the already sorted eol bin. > - */ > -void > -rd_append(int binno, union f_handle infl0, int nfiles, FILE *outfp, > - u_char *buffer, u_char *bufend) > -{ > - RECHEADER *rec; > - > - rec = (RECHEADER *) buffer; > - if (!getnext(binno, infl0, nfiles, (RECHEADER *) buffer, bufend, 0)) { > - putline(rec, outfp); > - while (getnext(binno, infl0, nfiles, (RECHEADER *) buffer, > - bufend, 0) == 0) { > - if (!UNIQUE) > - putline(rec, outfp); > - } > - } > -} > - > -/* > - * append plain text--used after sorting the biggest bin. > - */ > -void > -concat(FILE *a, FILE *b) > -{ > - int nread; > - char buffer[4096]; > - > - rewind(b); > - while ((nread = fread(buffer, 1, 4096, b)) > 0) > - EWRITE(buffer, 1, nread, a); > -} > Index: extern.h > =================================================================== > RCS file: /cvs/src/usr.bin/sort/extern.h,v > retrieving revision 1.8 > diff -u -p -r1.8 extern.h > --- extern.h 22 Dec 2009 19:47:02 -0000 1.8 > +++ extern.h 29 Jun 2014 22:17:16 -0000 > @@ -36,7 +36,6 @@ > > void append(u_char **, int, int, FILE *, void (*)(RECHEADER *, FILE *), > struct field *); > -void concat(FILE *, FILE *); > length_t enterkey(RECHEADER *, DBT *, int, struct field *); > void fixit(int *, char **); > void fldreset(struct field *); > @@ -44,11 +43,9 @@ FILE *ftmp(void); > void fmerge(int, union f_handle, int, > int (*)(int, union f_handle, int, RECHEADER *, u_char *, struct > field *), > FILE *, void (*)(RECHEADER *, FILE *), struct field *); > -void fsort(int, int, union f_handle, int, FILE *, struct field *); > +void fsort(union f_handle, int, FILE *, struct field *); > int geteasy(int, union f_handle, > int, RECHEADER *, u_char *, struct field *); > -int getnext(int, union f_handle, > - int, RECHEADER *, u_char *, struct field *); > int makekey(int, union f_handle, > int, RECHEADER *, u_char *, struct field *); > int makeline(int, union f_handle, > @@ -57,7 +54,6 @@ void merge(int, int, > int (*)(int, union f_handle, int, RECHEADER *, u_char *, struct > field *), > FILE *, void (*)(RECHEADER *, FILE *), struct field *); > void num_init(void); > -void onepass(u_char **, int, long, long *, u_char *, FILE *); > int optval(int, int); > void order(union f_handle, > int (*)(int, union f_handle, int, RECHEADER *, u_char *, struct > field *), > @@ -65,6 +61,5 @@ void order(union f_handle, > int c_warn); > void putline(RECHEADER *, FILE *); > void putrec(RECHEADER *, FILE *); > -void rd_append(int, union f_handle, int, FILE *, u_char *, u_char *); > int setfield(char *, struct field *, int); > void settables(int); > Index: files.c > =================================================================== > RCS file: /cvs/src/usr.bin/sort/files.c,v > retrieving revision 1.13 > diff -u -p -r1.13 files.c > --- files.c 27 Oct 2009 23:59:43 -0000 1.13 > +++ files.c 29 Jun 2014 22:17:16 -0000 > @@ -40,73 +40,6 @@ > static int seq(FILE *, DBT *, DBT *); > > /* > - * this is the subroutine for file management for fsort(). > - * It keeps the buffers for all temporary files. > - */ > -int > -getnext(int binno, union f_handle infl0, int nfiles, RECHEADER *pos, u_char > *end, > - struct field *dummy) > -{ > - int i; > - u_char *hp; > - static size_t nleft = 0; > - static int cnt = 0, flag = -1; > - static u_char maxb = 0; > - static FILE *fp; > - > - if (nleft == 0) { > - if (binno < 0) /* reset files. */ { > - for (i = 0; i < nfiles; i++) { > - rewind(fstack[infl0.top + i].fp); > - fstack[infl0.top + i].max_o = 0; > - } > - flag = -1; > - nleft = cnt = 0; > - return (-1); > - } > - maxb = fstack[infl0.top].maxb; > - for (; nleft == 0; cnt++) { > - if (cnt >= nfiles) { > - cnt = 0; > - return (EOF); > - } > - fp = fstack[infl0.top + cnt].fp; > - fread(&nleft, sizeof(nleft), 1, fp); > - if (binno < maxb) > - fstack[infl0.top+cnt].max_o > - += sizeof(nleft) + nleft; > - else if (binno == maxb) { > - if (binno != fstack[infl0.top].lastb) { > - fseek(fp, fstack[infl0.top+ > - cnt].max_o, SEEK_SET); > - fread(&nleft, sizeof(nleft), 1, fp); > - } > - if (nleft == 0) > - fclose(fp); > - } else if (binno == maxb + 1) { /* skip a bin */ > - fseek(fp, nleft, SEEK_CUR); > - fread(&nleft, sizeof(nleft), 1, fp); > - flag = cnt; > - } > - } > - } > - if ((u_char *) pos > end - sizeof(TRECHEADER)) > - return (BUFFEND); > - fread(pos, sizeof(TRECHEADER), 1, fp); > - if (end - pos->data < pos->length) { > - hp = ((u_char *)pos) + sizeof(TRECHEADER); > - for (i = sizeof(TRECHEADER); i ; i--) > - ungetc(*--hp, fp); > - return (BUFFEND); > - } > - fread(pos->data, pos->length, 1, fp); > - nleft -= pos->length + sizeof(TRECHEADER); > - if (nleft == 0 && binno == fstack[infl0.top].maxb) > - fclose(fp); > - return (0); > -} > - > -/* > * this is called when there is no special key. It's only called > * in the first fsort pass. > */ > Index: fsort.c > =================================================================== > RCS file: /cvs/src/usr.bin/sort/fsort.c,v > retrieving revision 1.21 > diff -u -p -r1.21 fsort.c > --- fsort.c 27 Oct 2009 23:59:43 -0000 1.21 > +++ fsort.c 29 Jun 2014 22:17:16 -0000 > @@ -33,11 +33,11 @@ > */ > > /* > - * Read in the next bin. If it fits in one segment sort it; > - * otherwise refine it by segment deeper by one character, > - * and try again on smaller bins. Sort the final bin at this level > - * of recursion to keep the head of fstack at 0. > - * After PANIC passes, abort to merge sort. > + * Read in a block of records (until 'enough'). > + * sort, write to temp file. > + * Merge sort temp files into output file > + * Small files miss out the temp file stage. > + * Large files might get multiple merges. > */ > #include "sort.h" > #include "fsort.h" > @@ -48,28 +48,26 @@ > u_char *linebuf; > size_t linebuf_size = MAXLLEN; > struct tempfile fstack[MAXFCT]; > -#define FSORTMAX 4 > -int PANIC = FSORTMAX; > + > +#define CHECKFSTACK(n) \ > + if (n >= MAXFCT) \ > + errx(2, "fstack: too many temporary files; sort in pieces") > > void > -fsort(int binno, int depth, union f_handle infiles, int nfiles, FILE *outfp, > +fsort(union f_handle infiles, int nfiles, FILE *outfp, > struct field *ftbl) > { > - u_char *weights, **keypos, *bufend, *tmpbuf; > + u_char *weights, **keypos, *bufend; > static u_char *buffer, **keylist; > static size_t bufsize; > - int ntfiles, mfct = 0, total, i, maxb, lastb, panic = 0; > + int ntfiles, mfct = 0; > int c, nelem; > - long sizes[NBINS+1]; > - union f_handle tfiles, mstart = {MAXFCT-16}; > + union f_handle tfiles, mstart = { MAXFCT - MERGE_FNUM }; > int (*get)(int, union f_handle, int, RECHEADER *, > u_char *, struct field *); > RECHEADER *crec; > struct field tfield[2]; > - FILE *prevfp, *tailfp[FSORTMAX+1]; > > - memset(tailfp, 0, sizeof(tailfp)); > - prevfp = outfp; > memset(tfield, 0, sizeof(tfield)); > if (ftbl[0].flags & R) > tfield[0].weights = Rascii; > @@ -84,205 +82,114 @@ fsort(int binno, int depth, union f_hand > err(2, NULL); > } > bufend = buffer + bufsize - 1; > - if (binno >= 0) { > - tfiles.top = infiles.top + nfiles; > - get = getnext; > - } else { > - tfiles.top = 0; > - if (SINGL_FLD) > - get = makeline; > - else > - get = makekey; > - } > - for (;;) { > - memset(sizes, 0, sizeof(sizes)); > - c = ntfiles = 0; > - if (binno == weights[REC_D] && > - !(SINGL_FLD && ftbl[0].flags & F)) { /* pop */ > - rd_append(weights[REC_D], > - infiles, nfiles, prevfp, buffer, bufend); > - break; > - } else if (binno == weights[REC_D]) { > - depth = 0; /* start over on flat weights */ > - ftbl = tfield; > - weights = ftbl[0].weights; > + > + if (SINGL_FLD) > + get = makeline; > + else > + get = makekey; > + > + c = nelem = ntfiles = 0; > + keypos = keylist; > + crec = (RECHEADER *) buffer; > + while (c != EOF) { > + keypos = keylist; > + nelem = 0; > + crec = (RECHEADER *) buffer; > + > + do_read: > + while ((c = get(-1, infiles, nfiles, crec, bufend, > + ftbl)) == 0) { > + *keypos++ = crec->data; > + if (++nelem == MAXNUM) { > + c = BUFFEND; > + break; > + } > + crec = (RECHEADER *)((char *) crec + > + SALIGN(crec->length) + sizeof(TRECHEADER)); > } > - while (c != EOF) { > - keypos = keylist; > - nelem = 0; > - crec = (RECHEADER *) buffer; > - while ((c = get(binno, infiles, nfiles, crec, bufend, > - ftbl)) == 0) { > - *keypos++ = crec->data + depth; > - if (++nelem == MAXNUM) { > - c = BUFFEND; > - break; > - } > - crec = (RECHEADER *)((char *)crec + > - SALIGN(crec->length) + sizeof(TRECHEADER)); > + if (c == BUFFEND && nelem < MAXNUM && > + bufsize < MAXBUFSIZE) { > + u_char **keyp; > + u_char *oldb = buffer; > + u_char *nbuffer; > + > + /* buffer was too small for data, allocate > + * bigger buffer */ > + nbuffer = realloc(buffer, bufsize * 2); > + if (!nbuffer) { > + err(2, "failed to realloc buffer to %lu bytes", > + (unsigned long) bufsize * 2); > } > + buffer = nbuffer; > + bufsize *= 2; > + bufend = buffer + bufsize; > + > + /* patch up keylist[] */ > + for (keyp = &keypos[-1]; keyp >= keylist; keyp--) > + *keyp = buffer + (*keyp - oldb); > + > + crec = (RECHEADER *) (buffer + > + ((u_char *)crec - oldb)); > + goto do_read; > + } > + if (c != BUFFEND && !ntfiles && !mfct) { > + /* do not push */ > + continue; > + } > + > + /* push */ > + fstack[mstart.top + mfct].fp = ftmp(); > + if (radix_sort((const u_char **)keylist, nelem, weights, > + REC_D)) > + err(2, NULL); > + append(keylist, nelem, 0, fstack[mstart.top + mfct].fp, > + putrec, ftbl); > + mfct++; > + > + /* reduce number of open files */ > + if (mfct == MERGE_FNUM || (c == EOF && ntfiles)) { > /* > - * buffer was too small for data, allocate > - * a bigger buffer. > + * Only copy extra incomplete crec > + * data if there are any. > */ > - if (c == BUFFEND && nelem == 0) { > - bufsize *= 2; > - tmpbuf = realloc(buffer, bufsize); > - if (!tmpbuf) > - err(2, "failed to realloc buffer"); > - crec = (RECHEADER *) > - (tmpbuf + ((u_char *)crec - buffer)); > - buffer = tmpbuf; > - bufend = buffer + bufsize - 1; > - continue; > + int nodata = (bufend >= (u_char *)crec && > + bufend <= crec->data); > + size_t sz = 0; > + u_char *tmpbuf = NULL; > + > + if (!nodata) { > + sz = bufend - crec->data; > + tmpbuf = malloc(sz); > + memmove(tmpbuf, crec->data, sz); > } > - if (c == BUFFEND || ntfiles || mfct) { /* push */ > - if (panic >= PANIC) { > - fstack[MAXFCT-16+mfct].fp = ftmp(); > - if (radixsort((const u_char **)keylist, > - nelem, weights, REC_D)) > - err(2, NULL); > - append(keylist, nelem, depth, fstack[ > - MAXFCT-16+mfct].fp, putrec, ftbl); > - mfct++; > - /* reduce number of open files */ > - if (mfct == 16 ||(c == EOF && ntfiles)) > { > - fstack[tfiles.top + ntfiles].fp > - = ftmp(); > - fmerge(0, mstart, mfct, geteasy, > - fstack[tfiles.top+ntfiles].fp, > - putrec, ftbl); > - ntfiles++; > - mfct = 0; > - } > - } else { > - fstack[tfiles.top + ntfiles].fp= ftmp(); > - onepass(keylist, depth, nelem, sizes, > - weights, fstack[tfiles.top+ntfiles].fp); > - ntfiles++; > - } > + CHECKFSTACK(ntfiles); > + fstack[ntfiles].fp = ftmp(); > + fmerge(0, mstart, mfct, geteasy, > + fstack[ntfiles].fp, putrec, ftbl); > + ntfiles++; > + mfct = 0; > + > + if (!nodata) { > + memmove(crec->data, tmpbuf, sz); > + free(tmpbuf); > } > } > - get = getnext; > - if (!ntfiles && !mfct) { /* everything in memory--pop */ > - if (nelem > 1) { > - if (STABLE) { > - i = sradixsort((const u_char **)keylist, > - nelem, weights, REC_D); > - } else { > - i = radixsort((const u_char **)keylist, > - nelem, weights, REC_D); > - } > - if (i) > - err(2, NULL); > - } > - append(keylist, nelem, depth, outfp, putline, ftbl); > - break; /* pop */ > - } > - if (panic >= PANIC) { > - if (!ntfiles) > - fmerge(0, mstart, mfct, geteasy, > - outfp, putline, ftbl); > - else > - fmerge(0, tfiles, ntfiles, geteasy, > - outfp, putline, ftbl); > - break; > - } > - total = maxb = lastb = 0; /* find if one bin dominates */ > - for (i = 0; i < NBINS; i++) > - if (sizes[i]) { > - if (sizes[i] > sizes[maxb]) > - maxb = i; > - lastb = i; > - total += sizes[i]; > - } > - if (sizes[maxb] < max((total / 2) , BUFSIZE)) > - maxb = lastb; /* otherwise pop after last bin */ > - fstack[tfiles.top].lastb = lastb; > - fstack[tfiles.top].maxb = maxb; > - > - /* start refining next level. */ > - get(-1, tfiles, ntfiles, crec, bufend, 0); /* rewind */ > - for (i = 0; i < maxb; i++) { > - if (!sizes[i]) /* bin empty; step ahead file offset */ > - get(i, tfiles, ntfiles, crec, bufend, 0); > - else > - fsort(i, depth+1, tfiles, ntfiles, outfp, ftbl); > - } > - if (lastb != maxb) { > - if (prevfp != outfp) > - tailfp[panic] = prevfp; > - prevfp = ftmp(); > - for (i = maxb+1; i <= lastb; i++) > - if (!sizes[i]) > - get(i, tfiles, ntfiles, crec, bufend,0); > - else > - fsort(i, depth+1, tfiles, ntfiles, > - prevfp, ftbl); > - } > - > - /* sort biggest (or last) bin at this level */ > - depth++; > - panic++; > - binno = maxb; > - infiles.top = tfiles.top; /* getnext will free tfiles, */ > - nfiles = ntfiles; /* so overwrite them */ > - } > - if (prevfp != outfp) { > - concat(outfp, prevfp); > - fclose(prevfp); > } > - for (i = panic; i >= 0; --i) > - if (tailfp[i]) { > - concat(outfp, tailfp[i]); > - fclose(tailfp[i]); > - } > -} > - > -/* > - * This is one pass of radix exchange, dumping the bins to disk. > - */ > -#define swap(a, b, t) t = a, a = b, b = t > -void > -onepass(u_char **a, int depth, long n, long sizes[], u_char *tr, FILE *fp) > -{ > - size_t tsizes[NBINS+1]; > - u_char **bin[257], **top[256], ***bp, ***bpmax, ***tp; > - static int histo[256]; > - int *hp; > - int c; > - u_char **an, *t, **aj; > - u_char **ak, *r; > - > - memset(tsizes, 0, sizeof(tsizes)); > - depth += sizeof(TRECHEADER); > - an = &a[n]; > - for (ak = a; ak < an; ak++) { > - histo[c = tr[**ak]]++; > - tsizes[c] += ((RECHEADER *) (*ak -= depth))->length; > + if (!ntfiles && !mfct) { /* everything in memory--pop */ > + if (nelem > 1 && radix_sort((const u_char **)keylist, > + nelem, weights, REC_D)) > + err(2, NULL); > + if (nelem > 0) > + append(keylist, nelem, 0, outfp, putline, ftbl); > } > + if (!ntfiles) > + fmerge(0, mstart, mfct, geteasy, outfp, putline, ftbl); > + else > + fmerge(0, tfiles, ntfiles, geteasy, outfp, putline, > + ftbl); > > - bin[0] = a; > - bpmax = bin + 256; > - tp = top, hp = histo; > - for (bp = bin; bp < bpmax; bp++) { > - *tp++ = *(bp + 1) = *bp + (c = *hp); > - *hp++ = 0; > - if (c <= 1) > - continue; > - } > - for (aj = a; aj < an; *aj = r, aj = bin[c + 1]) > - for (r = *aj; aj < (ak = --top[c = tr[r[depth]]]); ) > - swap(*ak, r, t); > - > - for (ak = a, c = 0; c < 256; c++) { > - an = bin[c + 1]; > - n = an - ak; > - tsizes[c] += n * sizeof(TRECHEADER); > - /* tell getnext how many elements in this bin, this segment. */ > - EWRITE(&tsizes[c], sizeof(size_t), 1, fp); > - sizes[c] += tsizes[c]; > - for (; ak < an; ++ak) > - putrec((RECHEADER *) *ak, fp); > - } > + free(keylist); > + keylist = NULL; > + free(buffer); > + buffer = NULL; > } > Index: fsort.h > =================================================================== > RCS file: /cvs/src/usr.bin/sort/fsort.h,v > retrieving revision 1.10 > diff -u -p -r1.10 fsort.h > --- fsort.h 13 Mar 2007 17:33:58 -0000 1.10 > +++ fsort.h 29 Jun 2014 22:17:16 -0000 > @@ -38,9 +38,17 @@ > #define BUFSIZE (1 << POW) > #define MAXNUM (BUFSIZE/10) /* lowish guess at average record size */ > #define BUFFEND (EOF-2) > -#define BUFFSMALL (EOF-3) /* buffer is too small to hold line */ > #define MAXFCT 1000 > #define MAXLLEN ((1 << min(POW-4, 16)) - 14) > +#define MERGE_FNUM 16 > + > +/* > + * Default (initial) and maximum size of record buffer for fsort(). > + * Note that no more than MAXNUM records are stored in the buffer, > + * even if the buffer is not full yet. > + */ > +#define DEFBUFSIZE (1 << 20) /* 1MB */ > +#define MAXBUFSIZE (8 << 20) /* 10 MB */ > > extern u_char *linebuf; > extern size_t linebuf_size; > Index: msort.c > =================================================================== > RCS file: /cvs/src/usr.bin/sort/msort.c,v > retrieving revision 1.24 > diff -u -p -r1.24 msort.c > --- msort.c 13 Nov 2013 15:07:27 -0000 1.24 > +++ msort.c 29 Jun 2014 22:17:16 -0000 > @@ -65,6 +65,7 @@ fmerge(int binno, union f_handle files, > int i, j, last; > void (*put)(RECHEADER *, FILE *); > struct tempfile *l_fstack; > + size_t tbufsiz; > > wts = ftbl->weights; > if (!UNIQUE && SINGL_FLD && (ftbl->flags & F)) > @@ -72,12 +73,12 @@ fmerge(int binno, union f_handle files, > if (cfilebuf == NULL) { > cfilebuf = malloc(MAXLLEN + sizeof(MFILE)); > if (cfilebuf == NULL) > - errx(2, "cannot allocate memory"); > + errx(2, NULL); > } > > - i = min(16, nfiles) * LALIGN(MAXLLEN + sizeof(MFILE)); > - if (i > bufsize) { > - bufsize = i; > + tbufsiz = min(16, nfiles) * LALIGN(MAXLLEN + sizeof(MFILE)); > + if (tbufsiz > bufsize) { > + bufsize = tbufsiz; > if ((buffer = realloc(buffer, bufsize)) == NULL) > err(2, NULL); > } > @@ -127,22 +128,54 @@ merge(int infl0, int nfiles, > int (*get)(int, union f_handle, int, RECHEADER *, u_char *, struct field > *), > FILE *outfp, void (*put)(RECHEADER *, FILE *), struct field *ftbl) > { > - int c, i, j; > + int c, i, j, nf = nfiles; > union f_handle dummy = {0}; > - struct mfile *flist[16], *cfile; > + struct mfile *flistb[MERGE_FNUM], **flist = flistb, *cfile; > + size_t availsz = bufsize; > + static void *bufs[MERGE_FNUM+1]; > + static size_t bufs_sz[MERGE_FNUM+1]; > + > + /* > + * We need nfiles + 1 buffers. One is 'buffer', the > + * rest needs to be allocated. > + */ > + bufs[0] = buffer; > + bufs_sz[0] = bufsize; > + for(i=1; i < nfiles+1; i++) { > + if (bufs[i]) > + continue; > + > + bufs[i] = malloc(MAXLLEN); > + if (!bufs[i]) > + err(2, NULL); > + bufs_sz[i] = MAXLLEN; > + } > > for (i = j = 0; i < nfiles; i++) { > - cfile = (MFILE *) (buffer + > - i * LALIGN(MAXLLEN + sizeof(MFILE))); > - cfile->flno = j + infl0; > - cfile->end = cfile->rec->data + MAXLLEN; > + cfile = (struct mfile *) bufs[j]; > + cfile->flno = infl0 + j; > + cfile->end = (u_char *) bufs[j] + bufs_sz[j]; > for (c = 1; c == 1;) { > - if (EOF == (c = get(j+infl0, dummy, nfiles, > + if (EOF == (c = get(cfile->flno, dummy, nfiles, > cfile->rec, cfile->end, ftbl))) { > i--; > nfiles--; > break; > } > + > + if (c == BUFFEND) { > + cfile = realloc(bufs[j], bufs_sz[j] *= 2); > + bufs[j] = (void *)cfile; > + > + if (!cfile) > + err(2, NULL); > + > + cfile->end = (u_char *)cfile + bufs_sz[j]; > + > + c = 1; > + continue; > + } > + > if (i) > c = insert(flist, &cfile, i, !DELETE); > else > @@ -151,24 +184,49 @@ merge(int infl0, int nfiles, > j++; > } > if (nfiles > 0) { > - cfile = cfilebuf; > + cfile = (struct mfile *) bufs[nf]; > cfile->flno = flist[0]->flno; > - cfile->end = cfile->rec->data + MAXLLEN; > + cfile->end = (u_char *) cfile + bufs_sz[nf]; > while (nfiles) { > for (c = 1; c == 1;) { > if (EOF == (c = get(cfile->flno, dummy, nfiles, > cfile->rec, cfile->end, ftbl))) { > put(flist[0]->rec, outfp); > - memmove(flist, flist + 1, > - sizeof(MFILE *) * (--nfiles)); > - cfile->flno = flist[0]->flno; > + if (--nfiles > 0) { > + flist++; > + cfile->flno = flist[0]->flno; > + } > break; > } > + if (c == BUFFEND) { > + char *oldbuf = (char *)cfile; > + availsz = (char *)cfile->end - oldbuf; > + availsz *= 2; > + cfile = realloc(oldbuf, availsz); > + if (!cfile) > + err(2, NULL); > + > + for (i = 0; i < nf + 1; i++) { > + if (bufs[i] == oldbuf) { > + bufs[i] = (char *)cfile; > + bufs_sz[i] = availsz; > + break; > + } > + } > + > + cfile->end = (u_char *)cfile + availsz; > + c = 1; > + continue; > + } > if (!(c = insert(flist, &cfile, nfiles, > DELETE))) > put(cfile->rec, outfp); > } > - } > - } > + } > + if (bufs_sz[0] > bufsize) { > + buffer = bufs[0]; > + bufsize = bufs_sz[0]; > + } > + } > } > > /* > Index: sort.c > =================================================================== > RCS file: /cvs/src/usr.bin/sort/sort.c,v > retrieving revision 1.41 > diff -u -p -r1.41 sort.c > --- sort.c 13 Nov 2013 15:07:27 -0000 1.41 > +++ sort.c 29 Jun 2014 22:17:16 -0000 > @@ -44,13 +44,14 @@ > > #include <sys/types.h> > #include <sys/stat.h> > + > +#include <err.h> > #include <locale.h> > #include <paths.h> > #include <signal.h> > #include <stdlib.h> > #include <string.h> > #include <unistd.h> > -#include <err.h> > > int REC_D = '\n'; > u_char d_mask[NBINS]; /* flags for rec_d, field_d, <blank> */ > @@ -71,6 +72,8 @@ struct coldesc *clist; > int ncols = 0; > int ND = 10; /* limit on number of -k options. */ > > +int (*radix_sort)(const u_char **, int, const u_char *, u_int) = radixsort; > + > char *devstdin = _PATH_STDIN; > char *tmpdir = _PATH_VARTMP; > char toutpath[PATH_MAX]; > @@ -172,7 +175,6 @@ main(int argc, char *argv[]) > mflag = 1; > break; > case 'H': > - PANIC = 0; > break; > case 'y': > /* accept -y for backwards compat. */ > @@ -186,6 +188,7 @@ main(int argc, char *argv[]) > break; > case 's': > STABLE = 1; > + radix_sort = sradixsort; > break; > case '?': > default: > @@ -292,8 +295,8 @@ main(int argc, char *argv[]) > act.sa_handler = onsig; > for (i = 0; sigtable[i]; ++i) /* always unlink toutpath */ > if (sigaction(sigtable[i], NULL, &oact) < 0 || > - oact.sa_handler != SIG_IGN && > - sigaction(sigtable[i], &act, NULL) < 0) > + (oact.sa_handler != SIG_IGN && > + sigaction(sigtable[i], &act, NULL) < 0)) > err(2, "sigaction"); > } else > outfile = outpath; > @@ -302,7 +305,7 @@ main(int argc, char *argv[]) > if (mflag) > fmerge(-1, filelist, argc-optind, get, outfp, putline, fldtab); > else > - fsort(-1, 0, filelist, argc-optind, outfp, fldtab); > + fsort(filelist, argc-optind, outfp, fldtab); > if (outfile != outpath) { > if (access(outfile, 0)) > err(2, "%s", outfile); > Index: sort.h > =================================================================== > RCS file: /cvs/src/usr.bin/sort/sort.h,v > retrieving revision 1.7 > diff -u -p -r1.7 sort.h > --- sort.h 21 Aug 2007 20:29:25 -0000 1.7 > +++ sort.h 29 Jun 2014 22:17:16 -0000 > @@ -129,13 +129,14 @@ union f_handle { > int top; > char **names; > }; > -extern int PANIC; /* maximum depth of fsort before fmerge is called */ > + > extern u_char ascii[NBINS], Rascii[NBINS], Ftable[NBINS], RFtable[NBINS]; > extern u_char alltable[NBINS], dtable[NBINS], itable[NBINS]; > extern u_char d_mask[NBINS]; > extern int SINGL_FLD, SEP_FLAG, UNIQUE, STABLE; > extern int REC_D; > extern char *tmpdir; > +extern int (*radix_sort)(const u_char **, int, const u_char *, u_int); > extern int ND; /* limit on number of -k options. */ > > #include "extern.h"