On Wed, Jul 02, 2014 at 04:37:05PM +0200, Otto Moerbeek wrote:

> 
> > Op 2 jul. 2014 om 15:03 heeft Jared Yanovich <[email protected]> het 
> > volgende geschreven:
> > 
> >> On Wed, Jul 02, 2014 at 12:37:53PM +0200, Otto Moerbeek wrote:
> >> 
> >> On Tue, Jul 01, 2014 at 03:56:39PM -0400, Jared Yanovich wrote:
> >> 
> >> This works better indeed. But is initing the int member only safe? If
> >> sizeof(top) < sizeof(names), only init some bits of the pointer.
> > 
> > To be safe, we can switch to memset.  But upon a cursory look, names is only
> > used via that path when binno=-1, which is not the case with tfiles.
> 
> We could also swap the union members, so that the biggest comes first. 

True but memset just future proofs any kind of changes down the road...

Just completed 'make build' on amd64 with this.  There may be some useful tests
from GNU coreutils I can throw at it that I will look into later.  Any other
comments from anyone?

Index: append.c
===================================================================
RCS file: /cvs/src/usr.bin/sort/append.c,v
retrieving revision 1.10
diff -u -p -r1.10 append.c
--- append.c    27 Oct 2009 23:59:43 -0000      1.10
+++ append.c    2 Jul 2014 16:43:33 -0000
@@ -148,37 +148,3 @@ append(u_char **keylist, int nelem, int 
                put(crec, fp);
        }
 }
-
-/*
- * output the already sorted eol bin.
- */
-void
-rd_append(int binno, union f_handle infl0, int nfiles, FILE *outfp,
-    u_char *buffer, u_char *bufend)
-{
-       RECHEADER *rec;
-
-       rec = (RECHEADER *) buffer;
-       if (!getnext(binno, infl0, nfiles, (RECHEADER *) buffer, bufend, 0)) {
-               putline(rec, outfp);
-               while (getnext(binno, infl0, nfiles, (RECHEADER *) buffer,
-                       bufend, 0) == 0) {
-                       if (!UNIQUE)
-                               putline(rec, outfp);
-               }
-       }
-}
-
-/*
- * append plain text--used after sorting the biggest bin.
- */
-void
-concat(FILE *a, FILE *b)
-{
-        int nread;
-        char buffer[4096];
-
-       rewind(b);
-        while ((nread = fread(buffer, 1, 4096, b)) > 0)
-                EWRITE(buffer, 1, nread, a);
-}
Index: extern.h
===================================================================
RCS file: /cvs/src/usr.bin/sort/extern.h,v
retrieving revision 1.8
diff -u -p -r1.8 extern.h
--- extern.h    22 Dec 2009 19:47:02 -0000      1.8
+++ extern.h    2 Jul 2014 16:43:33 -0000
@@ -36,7 +36,6 @@
 
 void    append(u_char **, int, int, FILE *, void (*)(RECHEADER *, FILE *),
            struct field *);
-void    concat(FILE *, FILE *);
 length_t enterkey(RECHEADER *, DBT *, int, struct field *);
 void    fixit(int *, char **);
 void    fldreset(struct field *);
@@ -44,11 +43,9 @@ FILE *ftmp(void);
 void    fmerge(int, union f_handle, int,
            int (*)(int, union f_handle, int, RECHEADER *, u_char *, struct 
field *),
            FILE *, void (*)(RECHEADER *, FILE *), struct field *);
-void    fsort(int, int, union f_handle, int, FILE *, struct field *);
+void    fsort(union f_handle, int, FILE *, struct field *);
 int     geteasy(int, union f_handle,
            int, RECHEADER *, u_char *, struct field *);
-int     getnext(int, union f_handle,
-           int, RECHEADER *, u_char *, struct field *);
 int     makekey(int, union f_handle,
            int, RECHEADER *, u_char *, struct field *);
 int     makeline(int, union f_handle,
@@ -57,7 +54,6 @@ void   merge(int, int,
            int (*)(int, union f_handle, int, RECHEADER *, u_char *, struct 
field *),
            FILE *, void (*)(RECHEADER *, FILE *), struct field *);
 void    num_init(void);
-void    onepass(u_char **, int, long, long *, u_char *, FILE *);
 int     optval(int, int);
 void    order(union f_handle,
            int (*)(int, union f_handle, int, RECHEADER *, u_char *, struct 
field *),
@@ -65,6 +61,5 @@ void   order(union f_handle,
            int c_warn);
 void    putline(RECHEADER *, FILE *);
 void    putrec(RECHEADER *, FILE *);
-void    rd_append(int, union f_handle, int, FILE *, u_char *, u_char *);
 int     setfield(char *, struct field *, int);
 void    settables(int);
Index: files.c
===================================================================
RCS file: /cvs/src/usr.bin/sort/files.c,v
retrieving revision 1.13
diff -u -p -r1.13 files.c
--- files.c     27 Oct 2009 23:59:43 -0000      1.13
+++ files.c     2 Jul 2014 16:43:33 -0000
@@ -40,73 +40,6 @@
 static int     seq(FILE *, DBT *, DBT *);
 
 /*
- * this is the subroutine for file management for fsort().
- * It keeps the buffers for all temporary files.
- */
-int
-getnext(int binno, union f_handle infl0, int nfiles, RECHEADER *pos, u_char 
*end,
-    struct field *dummy)
-{
-       int i;
-       u_char *hp;
-       static size_t nleft = 0;
-       static int cnt = 0, flag = -1;
-       static u_char maxb = 0;
-       static FILE *fp;
-
-       if (nleft == 0) {
-               if (binno < 0)  /* reset files. */ {
-                       for (i = 0; i < nfiles; i++) {
-                               rewind(fstack[infl0.top + i].fp);
-                               fstack[infl0.top + i].max_o = 0;
-                       }
-                       flag = -1;
-                       nleft = cnt = 0;
-                       return (-1);
-               }
-               maxb = fstack[infl0.top].maxb;
-               for (; nleft == 0; cnt++) {
-                       if (cnt >= nfiles) {
-                               cnt = 0;
-                               return (EOF);
-                       }
-                       fp = fstack[infl0.top + cnt].fp;
-                       fread(&nleft, sizeof(nleft), 1, fp);
-                       if (binno < maxb)
-                               fstack[infl0.top+cnt].max_o
-                                       += sizeof(nleft) + nleft;
-                       else if (binno == maxb) {
-                               if (binno != fstack[infl0.top].lastb) {
-                                       fseek(fp, fstack[infl0.top+
-                                               cnt].max_o, SEEK_SET);
-                                       fread(&nleft, sizeof(nleft), 1, fp);
-                               }
-                               if (nleft == 0)
-                                       fclose(fp);
-                       } else if (binno == maxb + 1) {         /* skip a bin */
-                               fseek(fp, nleft, SEEK_CUR);
-                               fread(&nleft, sizeof(nleft), 1, fp);
-                               flag = cnt;
-                       }
-               }
-       }
-       if ((u_char *) pos > end - sizeof(TRECHEADER))
-               return (BUFFEND);
-       fread(pos, sizeof(TRECHEADER), 1, fp);
-       if (end - pos->data < pos->length) {
-               hp = ((u_char *)pos) + sizeof(TRECHEADER);
-               for (i = sizeof(TRECHEADER); i ;  i--)
-                       ungetc(*--hp, fp);
-               return (BUFFEND);
-       }
-       fread(pos->data, pos->length, 1, fp);
-       nleft -= pos->length + sizeof(TRECHEADER);
-       if (nleft == 0 && binno == fstack[infl0.top].maxb)
-               fclose(fp);
-       return (0);
-}
-
-/*
  * this is called when there is no special key. It's only called
  * in the first fsort pass.
  */
Index: fsort.c
===================================================================
RCS file: /cvs/src/usr.bin/sort/fsort.c,v
retrieving revision 1.21
diff -u -p -r1.21 fsort.c
--- fsort.c     27 Oct 2009 23:59:43 -0000      1.21
+++ fsort.c     2 Jul 2014 16:43:33 -0000
@@ -33,11 +33,11 @@
  */
 
 /*
- * Read in the next bin.  If it fits in one segment sort it;
- * otherwise refine it by segment deeper by one character,
- * and try again on smaller bins.  Sort the final bin at this level
- * of recursion to keep the head of fstack at 0.
- * After PANIC passes, abort to merge sort.
+ * Read in a block of records (until 'enough').
+ * sort, write to temp file.
+ * Merge sort temp files into output file
+ * Small files miss out the temp file stage.
+ * Large files might get multiple merges.
  */
 #include "sort.h"
 #include "fsort.h"
@@ -48,28 +48,28 @@
 u_char *linebuf;
 size_t linebuf_size = MAXLLEN;
 struct tempfile fstack[MAXFCT];
-#define FSORTMAX 4
-int PANIC = FSORTMAX;
+
+#define CHECKFSTACK(n)                                 \
+       if (n >= MAXFCT)                                \
+               errx(2, "fstack: too many temporary files; sort in pieces")
 
 void
-fsort(int binno, int depth, union f_handle infiles, int nfiles, FILE *outfp,
+fsort(union f_handle infiles, int nfiles, FILE *outfp,
     struct field *ftbl)
 {
-       u_char *weights, **keypos, *bufend, *tmpbuf;
+       u_char *weights, **keypos, *bufend;
        static u_char *buffer, **keylist;
        static size_t bufsize;
-       int ntfiles, mfct = 0, total, i, maxb, lastb, panic = 0;
+       int ntfiles, mfct = 0;
        int c, nelem;
-       long sizes[NBINS+1];
-       union f_handle tfiles, mstart = {MAXFCT-16};
+       union f_handle tfiles, mstart = { MAXFCT - MERGE_FNUM };
        int (*get)(int, union f_handle, int, RECHEADER *,
                u_char *, struct field *);
        RECHEADER *crec;
        struct field tfield[2];
-       FILE *prevfp, *tailfp[FSORTMAX+1];
 
-       memset(tailfp, 0, sizeof(tailfp));
-       prevfp = outfp;
+       memset(&tfiles, 0, sizeof(tfiles));
+
        memset(tfield, 0, sizeof(tfield));
        if (ftbl[0].flags & R)
                tfield[0].weights = Rascii;
@@ -84,205 +84,114 @@ fsort(int binno, int depth, union f_hand
                        err(2, NULL);
        }
        bufend = buffer + bufsize - 1;
-       if (binno >= 0) {
-               tfiles.top = infiles.top + nfiles;
-               get = getnext;
-       } else {
-               tfiles.top = 0;
-               if (SINGL_FLD)
-                       get = makeline;
-               else
-                       get = makekey;
-       }                               
-       for (;;) {
-               memset(sizes, 0, sizeof(sizes));
-               c = ntfiles = 0;
-               if (binno == weights[REC_D] &&
-                   !(SINGL_FLD && ftbl[0].flags & F)) {        /* pop */
-                       rd_append(weights[REC_D],
-                           infiles, nfiles, prevfp, buffer, bufend);
-                       break;
-               } else if (binno == weights[REC_D]) {
-                       depth = 0;              /* start over on flat weights */
-                       ftbl = tfield;
-                       weights = ftbl[0].weights;
+
+       if (SINGL_FLD)
+               get = makeline;
+       else
+               get = makekey;
+
+       c = nelem = ntfiles = 0;
+       keypos = keylist;
+       crec = (RECHEADER *) buffer;
+       while (c != EOF) {
+               keypos = keylist;
+               nelem = 0;
+               crec = (RECHEADER *) buffer;
+
+ do_read:
+               while ((c = get(-1, infiles, nfiles, crec, bufend,
+                   ftbl)) == 0) {
+                       *keypos++ = crec->data;
+                       if (++nelem == MAXNUM) {
+                               c = BUFFEND;
+                               break;
+                       }
+                       crec = (RECHEADER *)((char *) crec +
+                           SALIGN(crec->length) + sizeof(TRECHEADER));
                }
-               while (c != EOF) {
-                       keypos = keylist;
-                       nelem = 0;
-                       crec = (RECHEADER *) buffer;
-                       while ((c = get(binno, infiles, nfiles, crec, bufend,
-                           ftbl)) == 0) {
-                               *keypos++ = crec->data + depth;
-                               if (++nelem == MAXNUM) {
-                                       c = BUFFEND;
-                                       break;
-                               }
-                               crec = (RECHEADER *)((char *)crec +
-                                   SALIGN(crec->length) + sizeof(TRECHEADER));
+               if (c == BUFFEND && nelem < MAXNUM &&
+                   bufsize < MAXBUFSIZE) {
+                       u_char **keyp;
+                       u_char *oldb = buffer;
+                       u_char *nbuffer;
+
+                       /* buffer was too small for data, allocate
+                        * bigger buffer */
+                       nbuffer = realloc(buffer, bufsize * 2);
+                       if (!nbuffer) {
+                               err(2, "failed to realloc buffer to %lu bytes",
+                                   (unsigned long) bufsize * 2);
                        }
+                       buffer = nbuffer;
+                       bufsize *= 2;
+                       bufend = buffer + bufsize;
+
+                       /* patch up keylist[] */
+                       for (keyp = &keypos[-1]; keyp >= keylist; keyp--)
+                               *keyp = buffer + (*keyp - oldb);
+
+                       crec = (RECHEADER *) (buffer +
+                           ((u_char *)crec - oldb));
+                       goto do_read;
+               }
+               if (c != BUFFEND && !ntfiles && !mfct) {
+                       /* do not push */
+                       continue;
+               }
+
+               /* push */
+               fstack[mstart.top + mfct].fp = ftmp();
+               if (radix_sort((const u_char **)keylist, nelem, weights,
+                   REC_D))
+                       err(2, NULL);
+               append(keylist, nelem, 0, fstack[mstart.top + mfct].fp,
+                   putrec, ftbl);
+               mfct++;
+
+               /* reduce number of open files */
+               if (mfct == MERGE_FNUM || (c == EOF && ntfiles)) {
                        /*
-                        * buffer was too small for data, allocate
-                        * a bigger buffer.
+                        * Only copy extra incomplete crec
+                        * data if there are any.
                         */
-                       if (c == BUFFEND && nelem == 0) {
-                               bufsize *= 2;
-                               tmpbuf = realloc(buffer, bufsize);
-                               if (!tmpbuf)
-                                       err(2, "failed to realloc buffer");
-                               crec = (RECHEADER *)
-                                   (tmpbuf + ((u_char *)crec - buffer));
-                               buffer = tmpbuf;
-                               bufend = buffer + bufsize - 1;
-                               continue;
+                       int nodata = (bufend >= (u_char *)crec &&
+                           bufend <= crec->data);
+                       size_t sz = 0;
+                       u_char *tmpbuf = NULL;
+
+                       if (!nodata) {
+                               sz = bufend - crec->data;
+                               tmpbuf = malloc(sz);
+                               memmove(tmpbuf, crec->data, sz);
                        }
-                       if (c == BUFFEND || ntfiles || mfct) {  /* push */
-                               if (panic >= PANIC) {
-                                       fstack[MAXFCT-16+mfct].fp = ftmp();
-                                       if (radixsort((const u_char **)keylist,
-                                           nelem, weights, REC_D))
-                                               err(2, NULL);
-                                       append(keylist, nelem, depth, fstack[
-                                        MAXFCT-16+mfct].fp, putrec, ftbl);
-                                       mfct++;
-                                       /* reduce number of open files */
-                                       if (mfct == 16 ||(c == EOF && ntfiles)) 
{
-                                               fstack[tfiles.top + ntfiles].fp
-                                                   = ftmp();
-                                               fmerge(0, mstart, mfct, geteasy,
-                                                 fstack[tfiles.top+ntfiles].fp,
-                                                 putrec, ftbl);
-                                               ntfiles++;
-                                               mfct = 0;
-                                       }
-                               } else {
-                                       fstack[tfiles.top + ntfiles].fp= ftmp();
-                                       onepass(keylist, depth, nelem, sizes,
-                                       weights, fstack[tfiles.top+ntfiles].fp);
-                                       ntfiles++;
-                               }
+                       CHECKFSTACK(ntfiles);
+                       fstack[ntfiles].fp = ftmp();
+                       fmerge(0, mstart, mfct, geteasy,
+                           fstack[ntfiles].fp, putrec, ftbl);
+                       ntfiles++;
+                       mfct = 0;
+
+                       if (!nodata) {
+                               memmove(crec->data, tmpbuf, sz);
+                               free(tmpbuf);
                        }
                }
-               get = getnext;
-               if (!ntfiles && !mfct) {        /* everything in memory--pop */
-                       if (nelem > 1) {
-                               if (STABLE) {
-                                       i = sradixsort((const u_char **)keylist,
-                                           nelem, weights, REC_D);
-                               } else {
-                                       i = radixsort((const u_char **)keylist,
-                                           nelem, weights, REC_D);
-                               }
-                               if (i)
-                                       err(2, NULL);
-                       }
-                       append(keylist, nelem, depth, outfp, putline, ftbl);
-                       break;                                  /* pop */
-               }
-               if (panic >= PANIC) {
-                       if (!ntfiles)
-                               fmerge(0, mstart, mfct, geteasy,
-                                   outfp, putline, ftbl);
-                       else
-                               fmerge(0, tfiles, ntfiles, geteasy,
-                                   outfp, putline, ftbl);
-                       break;
-               }
-               total = maxb = lastb = 0;       /* find if one bin dominates */
-               for (i = 0; i < NBINS; i++)
-                 if (sizes[i]) {
-                       if (sizes[i] > sizes[maxb])
-                               maxb = i;
-                       lastb = i;
-                       total += sizes[i];
-               }
-               if (sizes[maxb] < max((total / 2) , BUFSIZE))
-                       maxb = lastb;   /* otherwise pop after last bin */
-               fstack[tfiles.top].lastb = lastb;
-               fstack[tfiles.top].maxb = maxb;
-
-                       /* start refining next level. */
-               get(-1, tfiles, ntfiles, crec, bufend, 0);      /* rewind */
-               for (i = 0; i < maxb; i++) {
-                       if (!sizes[i])  /* bin empty; step ahead file offset */
-                               get(i, tfiles, ntfiles, crec, bufend, 0);
-                       else
-                               fsort(i, depth+1, tfiles, ntfiles, outfp, ftbl);
-               }
-               if (lastb != maxb) {
-                       if (prevfp != outfp)
-                               tailfp[panic] = prevfp;
-                       prevfp = ftmp();
-                       for (i = maxb+1; i <= lastb; i++)
-                               if (!sizes[i])
-                                       get(i, tfiles, ntfiles, crec, bufend,0);
-                               else
-                                       fsort(i, depth+1, tfiles, ntfiles,
-                                           prevfp, ftbl);
-               }
-
-               /* sort biggest (or last) bin at this level */
-               depth++;
-               panic++;
-               binno = maxb;
-               infiles.top = tfiles.top;       /* getnext will free tfiles, */
-               nfiles = ntfiles;               /* so overwrite them */
        }
-       if (prevfp != outfp) {
-               concat(outfp, prevfp);
-               fclose(prevfp);
-       }
-       for (i = panic; i >= 0; --i)
-               if (tailfp[i]) {
-                       concat(outfp, tailfp[i]);
-                       fclose(tailfp[i]);
-               }
-}
-
-/*
- * This is one pass of radix exchange, dumping the bins to disk.
- */
-#define swap(a, b, t) t = a, a = b, b = t
-void
-onepass(u_char **a, int depth, long n, long sizes[], u_char *tr, FILE *fp)
-{
-       size_t tsizes[NBINS+1];
-       u_char **bin[257], **top[256], ***bp, ***bpmax, ***tp;
-       static int histo[256];
-       int *hp;
-       int c;
-       u_char **an, *t, **aj;
-       u_char **ak, *r;
-
-       memset(tsizes, 0, sizeof(tsizes));
-       depth += sizeof(TRECHEADER);
-       an = &a[n];
-       for (ak = a; ak < an; ak++) {
-               histo[c = tr[**ak]]++;
-               tsizes[c] += ((RECHEADER *) (*ak -= depth))->length;
+       if (!ntfiles && !mfct) {        /* everything in memory--pop */
+               if (nelem > 1 && radix_sort((const u_char **)keylist,
+                   nelem, weights, REC_D))
+                       err(2, NULL);
+               if (nelem > 0)
+                       append(keylist, nelem, 0, outfp, putline, ftbl);
        }
+       if (!ntfiles)
+               fmerge(0, mstart, mfct, geteasy, outfp, putline, ftbl);
+       else
+               fmerge(0, tfiles, ntfiles, geteasy, outfp, putline,
+                   ftbl);
 
-       bin[0] = a;
-       bpmax = bin + 256;
-       tp = top, hp = histo;
-       for (bp = bin; bp < bpmax; bp++) {
-               *tp++ = *(bp + 1) = *bp + (c = *hp);
-               *hp++ = 0;
-               if (c <= 1)
-                       continue;
-       }
-       for (aj = a; aj < an; *aj = r, aj = bin[c + 1]) 
-               for (r = *aj; aj < (ak = --top[c = tr[r[depth]]]); )
-                       swap(*ak, r, t);
-
-       for (ak = a, c = 0; c < 256; c++) {
-               an = bin[c + 1];
-               n = an - ak;
-               tsizes[c] += n * sizeof(TRECHEADER);
-               /* tell getnext how many elements in this bin, this segment. */
-               EWRITE(&tsizes[c], sizeof(size_t), 1, fp);
-               sizes[c] += tsizes[c];
-               for (; ak < an; ++ak)
-                       putrec((RECHEADER *) *ak, fp);
-       }
+       free(keylist);
+       keylist = NULL;
+       free(buffer);
+       buffer = NULL;
 }
Index: fsort.h
===================================================================
RCS file: /cvs/src/usr.bin/sort/fsort.h,v
retrieving revision 1.10
diff -u -p -r1.10 fsort.h
--- fsort.h     13 Mar 2007 17:33:58 -0000      1.10
+++ fsort.h     2 Jul 2014 16:43:33 -0000
@@ -38,9 +38,17 @@
 #define BUFSIZE (1 << POW)
 #define MAXNUM (BUFSIZE/10)    /* lowish guess at average record size */
 #define BUFFEND (EOF-2)
-#define BUFFSMALL (EOF-3)      /* buffer is too small to hold line */
 #define MAXFCT 1000
 #define MAXLLEN ((1 << min(POW-4, 16)) - 14)
+#define MERGE_FNUM     16 
+
+/*
+ * Default (initial) and maximum size of record buffer for fsort().
+ * Note that no more than MAXNUM records are stored in the buffer,
+ * even if the buffer is not full yet.
+ */
+#define DEFBUFSIZE      (1 << 20)       /* 1MB */
+#define MAXBUFSIZE      (8 << 20)       /* 10 MB */
 
 extern u_char *linebuf;
 extern size_t linebuf_size;
Index: msort.c
===================================================================
RCS file: /cvs/src/usr.bin/sort/msort.c,v
retrieving revision 1.24
diff -u -p -r1.24 msort.c
--- msort.c     13 Nov 2013 15:07:27 -0000      1.24
+++ msort.c     2 Jul 2014 16:43:33 -0000
@@ -65,6 +65,7 @@ fmerge(int binno, union f_handle files, 
        int i, j, last;
        void (*put)(RECHEADER *, FILE *);
        struct tempfile *l_fstack;
+       size_t tbufsiz;
 
        wts = ftbl->weights;
        if (!UNIQUE && SINGL_FLD && (ftbl->flags & F))
@@ -72,12 +73,12 @@ fmerge(int binno, union f_handle files, 
        if (cfilebuf == NULL) {
                cfilebuf = malloc(MAXLLEN + sizeof(MFILE));
                if (cfilebuf == NULL)
-                       errx(2, "cannot allocate memory");
+                       errx(2, NULL);
        }
 
-       i = min(16, nfiles) * LALIGN(MAXLLEN + sizeof(MFILE));
-       if (i > bufsize) {
-               bufsize = i;
+       tbufsiz = min(16, nfiles) * LALIGN(MAXLLEN + sizeof(MFILE));
+       if (tbufsiz > bufsize) {
+               bufsize = tbufsiz;
                if ((buffer = realloc(buffer, bufsize)) == NULL)
                        err(2, NULL);
        }
@@ -93,8 +94,7 @@ fmerge(int binno, union f_handle files, 
                        if (nfiles <= 16) {
                                tout = outfp;
                                put = fput;
-                       }
-                       else
+                       } else
                                tout = ftmp();
                        last = min(16, nfiles - j);
                        if (binno < 0) {
@@ -104,12 +104,12 @@ fmerge(int binno, union f_handle files, 
                                                err(2, "%s", files.names[j+i]);
                                merge(MAXFCT-1-16, last, get, tout, put, ftbl);
                        } else {
-                               for (i = 0; i< last; i++)
+                               for (i = 0; i < last; i++)
                                        rewind(l_fstack[i+j].fp);
                                merge(files.top+j, last, get, tout, put, ftbl);
                        }
                        if (nfiles > 16)
-                               l_fstack[j/16].fp = tout;
+                               l_fstack[j / 16].fp = tout;
                }
                nfiles = (nfiles + 15) / 16;
                if (nfiles == 1)
@@ -127,22 +127,54 @@ merge(int infl0, int nfiles,
     int (*get)(int, union f_handle, int, RECHEADER *, u_char *, struct field 
*),
     FILE *outfp, void (*put)(RECHEADER *, FILE *), struct field *ftbl)
 {
-       int c, i, j;
+       int c, i, j, nf = nfiles;
        union f_handle dummy = {0};
-       struct mfile *flist[16], *cfile;
+       struct mfile *flistb[MERGE_FNUM], **flist = flistb, *cfile;
+       size_t availsz = bufsize;
+       static void *bufs[MERGE_FNUM+1];
+       static size_t bufs_sz[MERGE_FNUM+1];
+
+       /*
+        * We need nfiles + 1 buffers. One is 'buffer', the
+        * rest needs to be allocated.
+        */
+       bufs[0] = buffer;
+       bufs_sz[0] = bufsize;
+       for(i=1; i < nfiles+1; i++) {
+               if (bufs[i])
+                       continue;
+
+               bufs[i] = malloc(MAXLLEN);
+               if (!bufs[i])
+                       err(2, NULL);
+               bufs_sz[i] = MAXLLEN;
+       }
 
        for (i = j = 0; i < nfiles; i++) {
-               cfile = (MFILE *) (buffer +
-                   i * LALIGN(MAXLLEN + sizeof(MFILE)));
-               cfile->flno = j + infl0;
-               cfile->end = cfile->rec->data + MAXLLEN;
+               cfile = (struct mfile *) bufs[j];
+               cfile->flno = infl0 + j;
+               cfile->end = (u_char *) bufs[j] + bufs_sz[j];
                for (c = 1; c == 1;) {
-                       if (EOF == (c = get(j+infl0, dummy, nfiles,
+                       if (EOF == (c = get(cfile->flno, dummy, nfiles,
                           cfile->rec, cfile->end, ftbl))) {
                                i--;
                                nfiles--;
                                break;
                        }
+
+                       if (c == BUFFEND) {
+                               cfile = realloc(bufs[j], bufs_sz[j] *= 2);
+                               bufs[j] = (void *)cfile;
+
+                               if (!cfile)
+                                       err(2, NULL);
+
+                               cfile->end = (u_char *)cfile + bufs_sz[j];
+
+                               c = 1;
+                               continue;
+                       }
+
                        if (i)
                                c = insert(flist, &cfile, i, !DELETE);
                        else
@@ -151,24 +183,49 @@ merge(int infl0, int nfiles,
                j++;
        }
        if (nfiles > 0) {
-               cfile = cfilebuf;
+               cfile = (struct mfile *) bufs[nf];
                cfile->flno = flist[0]->flno;
-               cfile->end = cfile->rec->data + MAXLLEN;
+               cfile->end = (u_char *) cfile + bufs_sz[nf];
                while (nfiles) {
                        for (c = 1; c == 1;) {
                                if (EOF == (c = get(cfile->flno, dummy, nfiles,
                                   cfile->rec, cfile->end, ftbl))) {
                                        put(flist[0]->rec, outfp);
-                                       memmove(flist, flist + 1,
-                                           sizeof(MFILE *) * (--nfiles));
-                                       cfile->flno = flist[0]->flno;
+                                       if (--nfiles > 0) {
+                                               flist++;
+                                               cfile->flno = flist[0]->flno;
+                                       }
                                        break;
                                }
+                               if (c == BUFFEND) {
+                                       char *oldbuf = (char *)cfile;
+                                       availsz = (char *)cfile->end - oldbuf;
+                                       availsz *= 2;
+                                       cfile = realloc(oldbuf, availsz);
+                                       if (!cfile)
+                                               err(2, NULL);
+
+                                       for (i = 0; i < nf + 1; i++) {
+                                               if (bufs[i] == oldbuf) {
+                                                       bufs[i] = (char *)cfile;
+                                                       bufs_sz[i] = availsz;
+                                                       break;
+                                               }
+                                       }
+
+                                       cfile->end = (u_char *)cfile + availsz;
+                                       c = 1;
+                                       continue;
+                               }
                                if (!(c = insert(flist, &cfile, nfiles, 
DELETE)))
                                        put(cfile->rec, outfp);
                        }
-               }       
-       }       
+               }
+               if (bufs_sz[0] > bufsize) {
+                       buffer = bufs[0];
+                       bufsize = bufs_sz[0];
+               }
+       }
 }
 
 /*
Index: sort.c
===================================================================
RCS file: /cvs/src/usr.bin/sort/sort.c,v
retrieving revision 1.41
diff -u -p -r1.41 sort.c
--- sort.c      13 Nov 2013 15:07:27 -0000      1.41
+++ sort.c      2 Jul 2014 16:43:33 -0000
@@ -44,13 +44,14 @@
 
 #include <sys/types.h>
 #include <sys/stat.h>
+
+#include <err.h>
 #include <locale.h>
 #include <paths.h>
 #include <signal.h>
 #include <stdlib.h>
 #include <string.h>
 #include <unistd.h>
-#include <err.h>
 
 int REC_D = '\n';
 u_char d_mask[NBINS];          /* flags for rec_d, field_d, <blank> */
@@ -71,6 +72,8 @@ struct coldesc *clist;
 int ncols = 0;
 int ND = 10;                   /* limit on number of -k options. */
 
+int (*radix_sort)(const u_char **, int, const u_char *, u_int) = radixsort;
+
 char *devstdin = _PATH_STDIN;
 char *tmpdir = _PATH_VARTMP;
 char toutpath[PATH_MAX];
@@ -172,7 +175,6 @@ main(int argc, char *argv[])
                        mflag = 1;
                        break;
                case 'H':
-                       PANIC = 0;
                        break;
                case 'y':
                        /* accept -y for backwards compat. */
@@ -186,6 +188,7 @@ main(int argc, char *argv[])
                        break;
                case 's':
                        STABLE = 1;
+                       radix_sort = sradixsort;
                        break;
                case '?':
                default:
@@ -292,8 +295,8 @@ main(int argc, char *argv[])
                act.sa_handler = onsig;
                for (i = 0; sigtable[i]; ++i)   /* always unlink toutpath */
                        if (sigaction(sigtable[i], NULL, &oact) < 0 ||
-                           oact.sa_handler != SIG_IGN &&
-                           sigaction(sigtable[i], &act, NULL) < 0)
+                           (oact.sa_handler != SIG_IGN &&
+                           sigaction(sigtable[i], &act, NULL) < 0))
                                err(2, "sigaction");
        } else
                outfile = outpath;
@@ -302,7 +305,7 @@ main(int argc, char *argv[])
        if (mflag)
                fmerge(-1, filelist, argc-optind, get, outfp, putline, fldtab);
        else
-               fsort(-1, 0, filelist, argc-optind, outfp, fldtab);
+               fsort(filelist, argc-optind, outfp, fldtab);
        if (outfile != outpath) {
                if (access(outfile, 0))
                        err(2, "%s", outfile);
Index: sort.h
===================================================================
RCS file: /cvs/src/usr.bin/sort/sort.h,v
retrieving revision 1.7
diff -u -p -r1.7 sort.h
--- sort.h      21 Aug 2007 20:29:25 -0000      1.7
+++ sort.h      2 Jul 2014 16:43:33 -0000
@@ -129,13 +129,14 @@ union f_handle {
        int top;
        char **names;
 };
-extern int PANIC;      /* maximum depth of fsort before fmerge is called */
+
 extern u_char ascii[NBINS], Rascii[NBINS], Ftable[NBINS], RFtable[NBINS];
 extern u_char alltable[NBINS], dtable[NBINS], itable[NBINS];
 extern u_char d_mask[NBINS];
 extern int SINGL_FLD, SEP_FLAG, UNIQUE, STABLE;
 extern int REC_D;
 extern char *tmpdir;
+extern int (*radix_sort)(const u_char **, int, const u_char *, u_int);
 extern int ND;         /* limit on number of -k options. */
 
 #include "extern.h"

Reply via email to