[dev] [sbase] [PATCH] Add cut(1)

2013-10-08 Thread Truls Becken
---
LICENSE  |   1 +
Makefile |   1 +
cut.1|  60 +++
cut.c| 164 +++
4 files changed, 226 insertions(+)
create mode 100644 cut.1
create mode 100644 cut.c

diff --git a/LICENSE b/LICENSE
index 927f594..36f0d2c 100644
--- a/LICENSE
+++ b/LICENSE
@@ -14,6 +14,7 @@ MIT/X Consortium License
© 2012 Robert Ransom rransom.8...@gmail.com
© 2013 Jakob Kramer jakob.kra...@gmx.de
© 2013 Anselm R Garbe ans...@garbe.us
+© 2013 Truls Becken truls.bec...@gmail.com

Permission is hereby granted, free of charge, to any person obtaining a
copy of this software and associated documentation files (the Software),
diff --git a/Makefile b/Makefile
index 3c4840d..ce7d95a 100644
--- a/Makefile
+++ b/Makefile
@@ -36,6 +36,7 @@ SRC = \
cmp.c  \
comm.c \
cp.c   \
+   cut.c  \
date.c \
dirname.c  \
echo.c \
diff --git a/cut.1 b/cut.1
new file mode 100644
index 000..0e78ddd
--- /dev/null
+++ b/cut.1
@@ -0,0 +1,60 @@
+.TH CUT 1 sbase\-VERSION
+.SH NAME
+cut \- extract columns of data
+.SH SYNOPSIS
+.B cut \-b
+.I list
+.RB [ \-n ]
+.RI [ file ...]
+.br
+.B cut \-c
+.I list
+.RI [ file ...]
+.br
+.B cut \-f
+.I list
+.RB [ \-d
+.IR delim ]
+.RB [ \-s ]
+.RI [ file ...]
+.SH DESCRIPTION
+.B cut
+out bytes, characters, or delimited fields from each line of the given
+files and write to stdout. With no file, or when file is `-', cut reads
+from stdin.
+.P
+.I list
+is a comma or space separated list of numbers and ranges where numbering
+starts from 1. Ranges are on the form `N-M'. If N or M is missing, the
+beginning or end of line is assumed. Numbers and ranges may be repeated,
+overlapping, and in any order. Selected input is written in the same
+order that it is read, and is written exactly once.
+.SH OPTIONS
+.TP
+.BI \-b \ list
+The
+.I list
+specifies byte positions.
+.TP
+.BI \-c \ list
+The
+.I list
+specifies character positions.
+.TP
+.BI \-d \ delim
+Use first byte of
+.I delim
+as field delimiter, instead of tab.
+.TP
+.BI \-f \ list
+The
+.I list
+specifies field numbers. Lines not containing field delimiters are
+passed through untouched.
+.TP
+.B \-n
+Do not split characters. A character is output if its last byte is
+selected.
+.TP
+.B \-s
+Suppress lines not containing field delimiters.
diff --git a/cut.c b/cut.c
new file mode 100644
index 000..72c20bc
--- /dev/null
+++ b/cut.c
@@ -0,0 +1,164 @@
+/* See LICENSE file for copyright and license details. */
+#include stdbool.h
+#include stdio.h
+#include stdlib.h
+#include string.h
+#include text.h
+#include util.h
+
+static void
+usage(void)
+{
+   eprintf(usage: cut -b list [-n] [file...]\n
+  cut -c list [file...]\n
+  cut -f list [-d delim] [-s] [file...]\n);
+}
+
+typedef struct Range {
+   size_t min, max;
+   struct Range *next;
+} Range;
+
+static Range *list = NULL;
+static char mode = 0;
+static char delim = '\t';
+static bool nflag = false;
+static bool sflag = false;
+
+static void
+insert(Range *r)
+{
+   Range *l, *p, *t;
+
+   for(p = NULL, l = list; l; p = l, l = l-next) {
+   if(r-max  r-max+1  l-min) {
+   r-next = l;
+   break;
+   } else if(!l-max || r-min  l-max+2) {
+   l-min = MIN(r-min, l-min);
+   for(p = l, t = l-next; t; p = t, t = t-next)
+   if(r-max  r-max+1  t-min) break;
+   l-max = (p-max  r-max) ? MAX(p-max, r-max) : 0;
+   l-next = t;
+   return;
+   }
+   }
+   if(p) p-next = r; else list = r;
+}
+
+static void
+parselist(char *str)
+{
+   char *s;
+   size_t n = 1;
+   Range *r;
+
+   for(s = str; *s; s++) {
+   if(*s == ' ') *s = ',';
+   if(*s == ',') n++;
+   }
+   if(!(r = malloc(n * sizeof(Range
+   eprintf(malloc:);
+   for(s = str; n; n--, s++) {
+   r-min = (*s == '-') ? 1 : strtoul(s, s, 10);
+   r-max = (*s == '-') ? strtoul(++s, s, 10) : r-min;
+   r-next = NULL;
+   if(!r-min || (r-max  r-max  r-min) || (*s  *s != ','))
+   eprintf(cut: bad list value\n);
+   insert(r++);
+   }
+}
+
+static size_t
+seek(const char *s, size_t pos, size_t *prev, size_t count)
+{
+   const char *t;
+   size_t n = pos - *prev;
+
+   if(mode == 'b') {
+   if((t = memchr(s, 0, n)))
+   return t - s;
+   if(nflag)
+   while(n  !UTF8_POINT(s[n])) n--;
+   *prev += n;
+   return n;
+   } else if(mode == 'c') {
+   for(n++, t = s; *t; t++)
+   if(UTF8_POINT(*t)  !--n) break;
+   } else {
+   for(t = (count  2) ? s : s+1; n 

Re: [dev] [sbase] [PATCH] Add cut(1)

2013-10-08 Thread sin
On Tue, Oct 08, 2013 at 09:23:43PM +0200, Truls Becken wrote:
 ---
 LICENSE  |   1 +
 Makefile |   1 +
 cut.1|  60 +++
 cut.c| 164 +++
 4 files changed, 226 insertions(+)
 create mode 100644 cut.1
 create mode 100644 cut.c

Looks good, will test this and apply it tomorrow.

Thanks,
sin