---
LICENSE  |   1 +
Makefile |   1 +
cut.1    |  60 +++++++++++++++++++++++
cut.c    | 164 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
4 files changed, 226 insertions(+)
create mode 100644 cut.1
create mode 100644 cut.c

diff --git a/LICENSE b/LICENSE
index 927f594..36f0d2c 100644
--- a/LICENSE
+++ b/LICENSE
@@ -14,6 +14,7 @@ MIT/X Consortium License
© 2012 Robert Ransom <[email protected]>
© 2013 Jakob Kramer <[email protected]>
© 2013 Anselm R Garbe <[email protected]>
+© 2013 Truls Becken <[email protected]>

Permission is hereby granted, free of charge, to any person obtaining a
copy of this software and associated documentation files (the "Software"),
diff --git a/Makefile b/Makefile
index 3c4840d..ce7d95a 100644
--- a/Makefile
+++ b/Makefile
@@ -36,6 +36,7 @@ SRC = \
        cmp.c      \
        comm.c     \
        cp.c       \
+       cut.c      \
        date.c     \
        dirname.c  \
        echo.c     \
diff --git a/cut.1 b/cut.1
new file mode 100644
index 0000000..0e78ddd
--- /dev/null
+++ b/cut.1
@@ -0,0 +1,60 @@
+.TH CUT 1 sbase\-VERSION
+.SH NAME
+cut \- extract columns of data
+.SH SYNOPSIS
+.B cut \-b
+.I list
+.RB [ \-n ]
+.RI [ file ...]
+.br
+.B cut \-c
+.I list
+.RI [ file ...]
+.br
+.B cut \-f
+.I list
+.RB [ \-d
+.IR delim ]
+.RB [ \-s ]
+.RI [ file ...]
+.SH DESCRIPTION
+.B cut
+out bytes, characters, or delimited fields from each line of the given
+files and write to stdout. With no file, or when file is `-', cut reads
+from stdin.
+.P
+.I list
+is a comma or space separated list of numbers and ranges where numbering
+starts from 1. Ranges are on the form `N-M'. If N or M is missing, the
+beginning or end of line is assumed. Numbers and ranges may be repeated,
+overlapping, and in any order. Selected input is written in the same
+order that it is read, and is written exactly once.
+.SH OPTIONS
+.TP
+.BI \-b \ list
+The
+.I list
+specifies byte positions.
+.TP
+.BI \-c \ list
+The
+.I list
+specifies character positions.
+.TP
+.BI \-d \ delim
+Use first byte of
+.I delim
+as field delimiter, instead of tab.
+.TP
+.BI \-f \ list
+The
+.I list
+specifies field numbers. Lines not containing field delimiters are
+passed through untouched.
+.TP
+.B \-n
+Do not split characters. A character is output if its last byte is
+selected.
+.TP
+.B \-s
+Suppress lines not containing field delimiters.
diff --git a/cut.c b/cut.c
new file mode 100644
index 0000000..72c20bc
--- /dev/null
+++ b/cut.c
@@ -0,0 +1,164 @@
+/* See LICENSE file for copyright and license details. */
+#include <stdbool.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include "text.h"
+#include "util.h"
+
+static void
+usage(void)
+{
+       eprintf("usage: cut -b list [-n] [file...]\n"
+               "       cut -c list [file...]\n"
+               "       cut -f list [-d delim] [-s] [file...]\n");
+}
+
+typedef struct Range {
+       size_t min, max;
+       struct Range *next;
+} Range;
+
+static Range *list = NULL;
+static char mode = 0;
+static char delim = '\t';
+static bool nflag = false;
+static bool sflag = false;
+
+static void
+insert(Range *r)
+{
+       Range *l, *p, *t;
+
+       for(p = NULL, l = list; l; p = l, l = l->next) {
+               if(r->max && r->max+1 < l->min) {
+                       r->next = l;
+                       break;
+               } else if(!l->max || r->min < l->max+2) {
+                       l->min = MIN(r->min, l->min);
+                       for(p = l, t = l->next; t; p = t, t = t->next)
+                               if(r->max && r->max+1 < t->min) break;
+                       l->max = (p->max && r->max) ? MAX(p->max, r->max) : 0;
+                       l->next = t;
+                       return;
+               }
+       }
+       if(p) p->next = r; else list = r;
+}
+
+static void
+parselist(char *str)
+{
+       char *s;
+       size_t n = 1;
+       Range *r;
+
+       for(s = str; *s; s++) {
+               if(*s == ' ') *s = ',';
+               if(*s == ',') n++;
+       }
+       if(!(r = malloc(n * sizeof(Range))))
+               eprintf("malloc:");
+       for(s = str; n; n--, s++) {
+               r->min = (*s == '-') ? 1 : strtoul(s, &s, 10);
+               r->max = (*s == '-') ? strtoul(++s, &s, 10) : r->min;
+               r->next = NULL;
+               if(!r->min || (r->max && r->max < r->min) || (*s && *s != ','))
+                       eprintf("cut: bad list value\n");
+               insert(r++);
+       }
+}
+
+static size_t
+seek(const char *s, size_t pos, size_t *prev, size_t count)
+{
+       const char *t;
+       size_t n = pos - *prev;
+
+       if(mode == 'b') {
+               if((t = memchr(s, 0, n)))
+                       return t - s;
+               if(nflag)
+                       while(n && !UTF8_POINT(s[n])) n--;
+               *prev += n;
+               return n;
+       } else if(mode == 'c') {
+               for(n++, t = s; *t; t++)
+                       if(UTF8_POINT(*t) && !--n) break;
+       } else {
+               for(t = (count < 2) ? s : s+1; n && *t; t++)
+                       if(*t == delim && !--n && count) break;
+       }
+       *prev = pos;
+       return t - s;
+}
+
+static void
+cut(FILE *fp)
+{
+       static char *buf = NULL;
+       static size_t size = 0;
+       char *s;
+       size_t i, n, p;
+       Range *r;
+
+       while(afgets(&buf, &size, fp)) {
+               if(buf[i = strlen(buf)-1] == '\n')
+                       buf[i] = 0;
+               if(mode == 'f' && !strchr(buf, delim)) {
+                       if(!sflag)
+                               puts(buf);
+                       continue;
+               }
+               for(i = 0, p = 1, s = buf, r = list; r; r = r->next, s += n) {
+                       s += seek(s, r->min, &p, i++);
+                       if(!*s) break;
+                       if(!r->max) {
+                               fputs(s, stdout);
+                               break;
+                       }
+                       n = seek(s, r->max + 1, &p, i++);
+                       if(fwrite(s, 1, n, stdout) != n)
+                               eprintf("write error:");
+               }
+               putchar('\n');
+       }
+}
+
+int
+main(int argc, char *argv[])
+{
+       FILE *fp;
+
+       ARGBEGIN {
+       case 'b':
+       case 'c':
+       case 'f':
+               mode = ARGC();
+               parselist(ARGF());
+               break;
+       case 'd':
+               delim = *ARGF();
+               break;
+       case 'n':
+               nflag = true;
+               break;
+       case 's':
+               sflag = true;
+               break;
+       default:
+               usage();
+       } ARGEND;
+
+       if(!mode)
+               usage();
+       if(!argc)
+               cut(stdin);
+       else for(; argc--; argv++) {
+               if(!(fp = strcmp(*argv, "-") ? fopen(*argv, "r") : stdin))
+                       eprintf("fopen %s:", *argv);
+               cut(fp);
+               fclose(fp);
+       }
+       return EXIT_SUCCESS;
+}
-- 
1.8.3.1


Reply via email to