commit d63e45fc3f33cf4f7fa40d271d14c332f4191b0f
Author: FRIGN <[email protected]>
Date:   Thu Jan 22 20:19:48 2015 +0100

    Add support for arbitrary length delimiters in cut(1)
    
    Having multibyte delimiters is not enough. For full flexibility,
    the possiblity of cutting input lines with arbitrary length delimiters
    is the real deal.
    Given this functionality, it only sounds reasonable to also add support
    to resolve escapes.
    Thanks to Truls Becken for making the suggestion and designing such a
    flexible cut(1)-implementation!

diff --git a/cut.1 b/cut.1
index 72654e1..bd242a0 100644
--- a/cut.1
+++ b/cut.1
@@ -67,4 +67,4 @@ utility is compliant with the
 specification.
 .Pp
 The possibility of separating numbers and ranges with a space
-and specifying multibyte delimiters is an extension to that specification.
+and specifying multibyte delimiters of arbitrary length is an extension to 
that specification.
diff --git a/cut.c b/cut.c
index 0e74ccd..5515f20 100644
--- a/cut.c
+++ b/cut.c
@@ -14,7 +14,7 @@ typedef struct Range {
 
 static Range *list     = NULL;
 static char   mode     = 0;
-static Rune   delim    = '\t';
+static char  *delim    = "\t";
 static size_t delimlen = 1;
 static int    nflag    = 0;
 static int    sflag    = 0;
@@ -73,7 +73,6 @@ seek(const char *s, size_t pos, size_t *prev, size_t count)
 {
        const char *t;
        size_t n = pos - *prev, i;
-       Rune r;
 
        if (mode == 'b') {
                if ((t = memchr(s, '\0', n)))
@@ -89,12 +88,13 @@ seek(const char *s, size_t pos, size_t *prev, size_t count)
                                break;
        } else {
                for (t = (count < delimlen + 1) ? s : s + delimlen; n && *t; ) {
-                       for (i = 1; t[i]; i++)
-                               if (fullrune(t, i))
+                       if (!strncmp(t, delim, delimlen)) {
+                               if (!--n && count)
                                        break;
-                       charntorune(&r, t, i);
-                       if (r == delim && !--n && count)
-                               break;
+                               t += delimlen;
+                               continue;
+                       }
+                       for (i = 1; !fullrune(t, i); i++);
                        t += i;
                }
        }
@@ -116,7 +116,7 @@ cut(FILE *fp)
        while ((len = getline(&buf, &size, fp)) != -1) {
                if (len && buf[len - 1] == '\n')
                        buf[len - 1] = '\0';
-               if (mode == 'f' && !utfrune(buf, delim)) {
+               if (mode == 'f' && !utfutf(buf, delim)) {
                        if (!sflag)
                                puts(buf);
                        continue;
@@ -139,6 +139,36 @@ cut(FILE *fp)
        }
 }
 
+static size_t
+resolveescapes(char *s, size_t len)
+{
+       size_t i, off, m;
+
+       for (i = 0; i < len - 1; i++) {
+               if (s[i] != '\\')
+                       continue;
+               off = 0;
+
+               switch (s[i + 1]) {
+                case '\\': s[i] = '\\'; off++; break;
+                case 'a':  s[i] = '\a'; off++; break;
+                case 'b':  s[i] = '\b'; off++; break;
+                case 'f':  s[i] = '\f'; off++; break;
+                case 'n':  s[i] = '\n'; off++; break;
+                case 'r':  s[i] = '\r'; off++; break;
+                case 't':  s[i] = '\t'; off++; break;
+                case 'v':  s[i] = '\v'; off++; break;
+               default:   continue;
+               }
+
+               for (m = i + 1; m <= len - off; m++)
+                       s[m] = s[m + off];
+               len -= off;
+       }
+
+       return len;
+}
+
 static void
 usage(void)
 {
@@ -151,24 +181,17 @@ int
 main(int argc, char *argv[])
 {
        FILE *fp;
-       int i;
-       char *m, *d;
 
        ARGBEGIN {
        case 'b':
        case 'c':
        case 'f':
                mode = ARGC();
-               m = EARGF(usage());
-               parselist(m);
+               parselist(EARGF(usage()));
                break;
        case 'd':
-               d = EARGF(usage());
-               for (i = 1; i <= strlen(d); i++)
-                       if (fullrune(d, i))
-                               break;
-               charntorune(&delim, d, i);
-               delimlen = i;
+               delim = EARGF(usage());
+               delimlen = resolveescapes(delim, strlen(delim));
                break;
        case 'n':
                nflag = 1;

Reply via email to