Module Name: othersrc
Committed By: dyoung
Date: Tue Sep 22 01:12:09 UTC 2015
Modified Files:
othersrc/external/bsd/arfe/dt: Makefile README dt.c hex.c hex.h ipv4.c
ipv4.h macaddr.c
othersrc/external/bsd/arfe/it: Makefile README
othersrc/external/bsd/arfe/tt: Makefile README
Added Files:
othersrc/external/bsd/arfe/dt: core.c core.h
othersrc/external/bsd/arfe/it: it.c
othersrc/external/bsd/arfe/tt: tt.c
Removed Files:
othersrc/external/bsd/arfe/dt: dt.h
Log Message:
Give dt, it, and tt custom main() routines instead of using a bunch
of conditionals. Put the core algorithms and data structures into
core.[ch]. Retire dt.h (it became core.h). Update Makefiles to suit
and alphabetize SRCS.
To generate a diff of this commit:
cvs rdiff -u -r1.4 -r1.5 othersrc/external/bsd/arfe/dt/Makefile \
othersrc/external/bsd/arfe/dt/hex.c othersrc/external/bsd/arfe/dt/hex.h \
othersrc/external/bsd/arfe/dt/ipv4.c othersrc/external/bsd/arfe/dt/ipv4.h
cvs rdiff -u -r1.8 -r1.9 othersrc/external/bsd/arfe/dt/README
cvs rdiff -u -r0 -r1.1 othersrc/external/bsd/arfe/dt/core.c \
othersrc/external/bsd/arfe/dt/core.h
cvs rdiff -u -r1.12 -r1.13 othersrc/external/bsd/arfe/dt/dt.c
cvs rdiff -u -r1.4 -r0 othersrc/external/bsd/arfe/dt/dt.h
cvs rdiff -u -r1.2 -r1.3 othersrc/external/bsd/arfe/dt/macaddr.c
cvs rdiff -u -r1.3 -r1.4 othersrc/external/bsd/arfe/it/Makefile
cvs rdiff -u -r1.6 -r1.7 othersrc/external/bsd/arfe/it/README
cvs rdiff -u -r0 -r1.1 othersrc/external/bsd/arfe/it/it.c
cvs rdiff -u -r1.1 -r1.2 othersrc/external/bsd/arfe/tt/Makefile
cvs rdiff -u -r1.3 -r1.4 othersrc/external/bsd/arfe/tt/README
cvs rdiff -u -r0 -r1.1 othersrc/external/bsd/arfe/tt/tt.c
Please note that diffs are not public domain; they are subject to the
copyright notices on the relevant files.
Modified files:
Index: othersrc/external/bsd/arfe/dt/Makefile
diff -u othersrc/external/bsd/arfe/dt/Makefile:1.4 othersrc/external/bsd/arfe/dt/Makefile:1.5
--- othersrc/external/bsd/arfe/dt/Makefile:1.4 Fri Sep 11 01:50:42 2015
+++ othersrc/external/bsd/arfe/dt/Makefile Tue Sep 22 01:12:09 2015
@@ -1,9 +1,9 @@
-# $ARFE: Makefile 239 2015-09-10 22:49:40Z dyoung $
+# $ARFE: Makefile 250 2015-09-22 01:04:13Z dyoung $
NOMAN=
.include <bsd.own.mk>
PROG=dt
-SRCS+=dt.c hex.c ipv4.c macaddr.c
+SRCS+=core.c dt.c hex.c ipv4.c macaddr.c
#CPPFLAGS+=-DHB_DEBUG
CPPFLAGS+=-DHB_ASSERT
DBG+=-g -O3
Index: othersrc/external/bsd/arfe/dt/hex.c
diff -u othersrc/external/bsd/arfe/dt/hex.c:1.4 othersrc/external/bsd/arfe/dt/hex.c:1.5
--- othersrc/external/bsd/arfe/dt/hex.c:1.4 Mon Sep 14 02:58:17 2015
+++ othersrc/external/bsd/arfe/dt/hex.c Tue Sep 22 01:12:09 2015
@@ -1,5 +1,5 @@
-/* $NetBSD: hex.c,v 1.4 2015/09/14 02:58:17 dyoung Exp $ */
-/* $ARFE: hex.c 245 2015-09-11 02:13:21Z dyoung $ */
+/* $NetBSD: hex.c,v 1.5 2015/09/22 01:12:09 dyoung Exp $ */
+/* $ARFE: hex.c 251 2015-09-22 01:09:13Z dyoung $ */
/*-
* Copyright (c) 2014,2015 David Young <[email protected]>
@@ -30,7 +30,7 @@
#include <stdlib.h>
#include <string.h>
-#include "dt.h"
+#include "core.h"
#include "hex.h"
typedef enum hex_op {
Index: othersrc/external/bsd/arfe/dt/hex.h
diff -u othersrc/external/bsd/arfe/dt/hex.h:1.4 othersrc/external/bsd/arfe/dt/hex.h:1.5
--- othersrc/external/bsd/arfe/dt/hex.h:1.4 Mon Sep 14 02:58:17 2015
+++ othersrc/external/bsd/arfe/dt/hex.h Tue Sep 22 01:12:09 2015
@@ -1,5 +1,5 @@
-/* $NetBSD: hex.h,v 1.4 2015/09/14 02:58:17 dyoung Exp $ */
-/* $ARFE: hex.h 245 2015-09-11 02:13:21Z dyoung $ */
+/* $NetBSD: hex.h,v 1.5 2015/09/22 01:12:09 dyoung Exp $ */
+/* $ARFE: hex.h 251 2015-09-22 01:09:13Z dyoung $ */
/*-
* Copyright (c) 2014,2015 David Young <[email protected]>
Index: othersrc/external/bsd/arfe/dt/ipv4.c
diff -u othersrc/external/bsd/arfe/dt/ipv4.c:1.4 othersrc/external/bsd/arfe/dt/ipv4.c:1.5
--- othersrc/external/bsd/arfe/dt/ipv4.c:1.4 Mon Sep 14 02:58:17 2015
+++ othersrc/external/bsd/arfe/dt/ipv4.c Tue Sep 22 01:12:09 2015
@@ -1,5 +1,5 @@
-/* $NetBSD: ipv4.c,v 1.4 2015/09/14 02:58:17 dyoung Exp $ */
-/* $ARFE: ipv4.c 247 2015-09-14 02:52:57Z dyoung $ */
+/* $NetBSD: ipv4.c,v 1.5 2015/09/22 01:12:09 dyoung Exp $ */
+/* $ARFE: ipv4.c 251 2015-09-22 01:09:13Z dyoung $ */
/*-
* Copyright (c) 2014,2015 David Young <[email protected]>
@@ -33,7 +33,7 @@
#include <sys/cdefs.h> /* for __predict_true() */
-#include "dt.h"
+#include "core.h"
#include "ipv4.h"
typedef enum ipv4_op {
Index: othersrc/external/bsd/arfe/dt/ipv4.h
diff -u othersrc/external/bsd/arfe/dt/ipv4.h:1.4 othersrc/external/bsd/arfe/dt/ipv4.h:1.5
--- othersrc/external/bsd/arfe/dt/ipv4.h:1.4 Mon Sep 14 02:58:17 2015
+++ othersrc/external/bsd/arfe/dt/ipv4.h Tue Sep 22 01:12:09 2015
@@ -1,5 +1,5 @@
-/* $NetBSD: ipv4.h,v 1.4 2015/09/14 02:58:17 dyoung Exp $ */
-/* $ARFE: ipv4.h 245 2015-09-11 02:13:21Z dyoung $ */
+/* $NetBSD: ipv4.h,v 1.5 2015/09/22 01:12:09 dyoung Exp $ */
+/* $ARFE: ipv4.h 251 2015-09-22 01:09:13Z dyoung $ */
/*-
* Copyright (c) 2014,2015 David Young <[email protected]>
Index: othersrc/external/bsd/arfe/dt/README
diff -u othersrc/external/bsd/arfe/dt/README:1.8 othersrc/external/bsd/arfe/dt/README:1.9
--- othersrc/external/bsd/arfe/dt/README:1.8 Mon Sep 14 02:58:17 2015
+++ othersrc/external/bsd/arfe/dt/README Tue Sep 22 01:12:09 2015
@@ -1,5 +1,5 @@
-$ARFE: README 245 2015-09-11 02:13:21Z dyoung $
-$NetBSD: README,v 1.8 2015/09/14 02:58:17 dyoung Exp $
+$ARFE: README 251 2015-09-22 01:09:13Z dyoung $
+$NetBSD: README,v 1.9 2015/09/22 01:12:09 dyoung Exp $
DT---(d)ifferentiate (t)ext---finds a longest common subsequence (LCS)
of two texts where the numbers and IPv4 addresses are "wild": a span
Index: othersrc/external/bsd/arfe/dt/dt.c
diff -u othersrc/external/bsd/arfe/dt/dt.c:1.12 othersrc/external/bsd/arfe/dt/dt.c:1.13
--- othersrc/external/bsd/arfe/dt/dt.c:1.12 Mon Sep 14 02:58:17 2015
+++ othersrc/external/bsd/arfe/dt/dt.c Tue Sep 22 01:12:09 2015
@@ -1,5 +1,5 @@
-/* $NetBSD: dt.c,v 1.12 2015/09/14 02:58:17 dyoung Exp $ */
-/* $ARFE: dt.c 247 2015-09-14 02:52:57Z dyoung $ */
+/* $NetBSD: dt.c,v 1.13 2015/09/22 01:12:09 dyoung Exp $ */
+/* $ARFE: dt.c 251 2015-09-22 01:09:13Z dyoung $ */
/*-
* Copyright (c) 2014,2015 David Young <[email protected]>
@@ -25,11 +25,9 @@
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
* THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
-#include <assert.h>
#include <ctype.h> /* for ispunct(), isspace(), isxdigit() */
#include <err.h>
#include <errno.h>
-#include <fcntl.h>
#include <inttypes.h>
#include <stdarg.h>
#include <stdbool.h>
@@ -39,1080 +37,12 @@
#include <limits.h>
#include <string.h>
-#include <sys/socket.h> /* for inet_net_pton */
-#include <netinet/in.h> /* for inet_net_pton */
-#include <arpa/inet.h> /* for inet_net_pton */
-
#include <sys/param.h> /* for MAX() */
#include <sys/queue.h> /* for TAILQ_*() */
#include <sys/mman.h>
#include <sys/stat.h>
-#include "dt.h"
-#include "hex.h"
-#include "ipv4.h"
-#include "macaddr.h"
-
-#if defined(HB_ASSERT)
-#define dbg_assert assert
-#else
-#define dbg_assert(__x) do { } while (false) /* do nothing */
-#endif
-
-typedef struct origin {
- int i;
- int j;
-} origin_t;
-
-typedef struct slice {
- const char *first;
- const char *last;
- bool backward;
-} slice_t;
-
-typedef enum {
- KIND_NONE = 0
- , KIND_INTMAX_DEC
- , KIND_INTMAX_HEX
- , KIND_INTMAX_0xHEX
- , KIND_IPv4
- , KIND_MACADDR
- , KIND_STRING
-} clocc_kind_t;
-
-typedef enum {
- QUAL_NONE = 0
- , QUAL_ALLCAPS
-} clocc_qual_t;
-
-struct chainelt;
-typedef struct chainelt chainelt_t;
-
-typedef struct clocc {
- int column;
- int first;
- int last;
- clocc_kind_t kind;
- clocc_qual_t qual;
- union {
- intmax_t u_intmax;
- uintmax_t u_uintmax;
- uint32_t u_ipv4;
- uint8_t u_macaddr[6];
- } val_u;
- TAILQ_ENTRY(clocc) bucket;
- chainelt_t *match;
-} clocc_t;
-
-struct chainelt {
- TAILQ_ENTRY(chainelt) ce_link;
- int ce_column; /* 0: no column alignment,
- * -x: starts at column x (1 = first column)
- * x: ends at column x (1 = first column)
- */
- char *ce_content;
- int ce_match_idx;
-};
-
-typedef struct chain {
- TAILQ_HEAD(chainhead, chainelt) c_head;
-} chain_t;
-
-TAILQ_HEAD(clocc_bucket, clocc);
-
-typedef struct clocc_bucket clocc_bucket_t;
-
-enum {
- MATCH_TBL_IDX = 0
- , TRANSFORM_TBL_IDX = 1
- , NTBL = 2
-};
-
-typedef struct clocc_htbl {
- clocc_bucket_t bucket[1024];
-} clocc_htbl_t;
-
-typedef struct cloccs {
- int noccs;
- clocc_t occs[1024];
- clocc_htbl_t htbl;
-} cloccs_t;
-
-typedef struct scratch {
- size_t *L1; /* n + 1 */
- size_t *L2; /* n + 1 */
- size_t *K[2]; /* 2 x (n + 1) */
- size_t *Kclocc; /* n + 1 */
- cloccs_t Aoccs, Boccs;
- slice_t *Abasis, *Bbasis;
- size_t expected_lcs;
- char ipv4_op; /* 'p' for prefix (smallest containing subnet),
- * 'l' for copy left argument
- */
- char macaddr_op;/* 'p' for prefix (leading octets in common),
- * 'l' for copy left argument
- */
- char dec_op; /* '+' or '-' for add or subtract, 'l' for copy left
- * argument
- */
- char hex_op; /* '&' or '|' for bitwise-AND and -OR, 'l' for copy
- * left argument
- */
-} scratch_t;
-
-union freeslice;
-typedef union freeslice freeslice_t;
-
-union freeslice {
- slice_t s;
- freeslice_t *next;
-};
-
-static const char *names[] = {
- "abe"
- , "ada"
- , "daria"
-};
-
-static const size_t no_expected_lcs = SIZE_T_MAX;
-
-static bool freeslices_initialized __aligned(COHERENCY_UNIT);
-static freeslice_t *first_freeslice __aligned(COHERENCY_UNIT);
-static freeslice_t freeslices[2048];
-
-static void
-init_freeslices(void)
-{
- unsigned int i;
-
- for (i = 0; i < __arraycount(freeslices) - 1; i++)
- freeslices[i].next = &freeslices[i + 1];
- freeslices[i].next = NULL;
-
- first_freeslice = &freeslices[0];
-
- freeslices_initialized = true;
-}
-
-static slice_t *
-slice_alloc(void)
-{
- slice_t *s;
-
- if (!freeslices_initialized)
- init_freeslices();
-
- s = &first_freeslice->s;
- first_freeslice = first_freeslice->next;
- return s;
-}
-
-static void
-slice_free(slice_t *s)
-{
- freeslice_t *fs = (freeslice_t *)s;
-
- fs->next = first_freeslice;
- first_freeslice = fs;
-}
-
-static slice_t *
-newslice(char *buf, size_t buflen)
-{
- slice_t *s = slice_alloc();
-
- if (s == NULL)
- return NULL;
-
- s->backward = false;
-
- s->first = buf;
- s->last = buf + buflen - 1;
-
- return s;
-}
-
-static slice_t *
-file_to_slice(const char *fn)
-{
- const int fd = open(fn, O_RDONLY);
- struct stat st;
- void *p;
-
- if (fd == -1)
- err(EXIT_FAILURE, "%s: open", __func__);
-
- if (fstat(fd, &st) == -1)
- err(EXIT_FAILURE, "%s: stat", __func__);
-
- p = mmap(NULL, st.st_size, PROT_READ, MAP_SHARED|MAP_FILE, fd, 0);
-
- if (p == MAP_FAILED)
- err(EXIT_FAILURE, "%s: mmap", __func__);
-
- return newslice(p, st.st_size);
-}
-
-static ssize_t
-length(const slice_t *s)
-{
- if (s->first > s->last)
- return 0;
-
- return s->last - s->first + 1;
-}
-
-static bool
-ispresent(const slice_t *s, char c)
-{
- const char *p;
-
- for (p = s->first; p <= s->last; p++) {
- if (c == *p)
- return true;
- }
- return false;
-}
-
-static inline char
-get(const slice_t *s, int i)
-{
- dbg_assert(0 <= i && i < length(s));
-
- return (s->backward) ? s->last[-i] : s->first[i];
-}
-
-static inline const char *
-getp(const slice_t *s, int i)
-{
- dbg_assert(0 <= i && i < length(s));
-
- return (s->backward) ? &s->last[-i] : &s->first[i];
-}
-
-static void
-extract(const slice_t *s, unsigned int i, unsigned int j, char *o)
-{
- unsigned int k, inc;
- char *p;
-
- inc = (i < j) ? 1 : -1;
- p = o;
-
- for (k = i; ; k += inc) {
- *p++ = get(s, k);
- if (k == j)
- break;
- }
-
- *p = '\0';
-}
-
-static slice_t *
-subslice(const slice_t *s, int i, int j)
-{
- slice_t *ss;
- const char *first, *last;
-
- dbg_assert(MAX(i, j) < length(s));
-
- ss = slice_alloc();
-
- if (ss == NULL)
- return NULL;
-
- first = getp(s, i);
- last = getp(s, j);
-
- ss->backward = first > last;
-
- if (ss->backward) {
- ss->first = last;
- ss->last = first;
- } else {
- ss->first = first;
- ss->last = last;
- }
-
- return ss;
-}
-
-static bool
-clocc_starts_at(const cloccs_t *o, int first, ssize_t *wlenp,
- clocc_kind_t *kindp)
-{
- const clocc_t *occ;
- int l, m, r;
-
- l = 0;
- r = o->noccs - 1;
-
- while (l <= r) {
- m = (l + r) / 2;
- occ = &o->occs[m];
- if (first < occ->first)
- r = m - 1;
- else if (occ->first < first)
- l = m + 1;
- else
- goto found;
- }
- return false;
-
-found:
- if (kindp != NULL)
- *kindp = occ->kind;
- if (wlenp != NULL)
- *wlenp = occ->last - occ->first + 1;
- return true;
-}
-
-static bool
-clocc_ends_at(const cloccs_t *o, int last, ssize_t *wlenp, clocc_kind_t *kindp)
-{
- const clocc_t *occ;
- int l, m, r;
-
- l = 0;
- r = o->noccs - 1;
-
- while (l <= r) {
- m = (l + r) / 2;
- occ = &o->occs[m];
- if (last < occ->last)
- r = m - 1;
- else if (occ->last < last)
- l = m + 1;
- else
- goto found;
- }
- return false;
-
-found:
- if (kindp != NULL)
- *kindp = occ->kind;
- if (wlenp != NULL)
- *wlenp = occ->last - occ->first + 1;
- return true;
-}
-
-static int
-index_versus_basis(const slice_t *basis, const slice_t *s, int i)
-{
- dbg_assert(!basis->backward);
-
- return getp(s, i) - getp(basis, 0);
-}
-
-static bool
-clocc_starts_in_slice_at(const cloccs_t *o, const slice_t *basis,
- const slice_t *s, int i, size_t *wlenp)
-{
- const int iprime = index_versus_basis(basis, s, i);
-
- return (s->backward) ? clocc_ends_at(o, iprime, NULL, NULL)
- : clocc_starts_at(o, iprime, NULL, NULL);
-}
-
-static bool
-clocc_ends_in_slice_at(const cloccs_t *o, const slice_t *basis,
- const slice_t *s, int i, ssize_t *wlenp, clocc_kind_t *kindp)
-{
- const int iprime = index_versus_basis(basis, s, i);
-
- return (s->backward) ? clocc_starts_at(o, iprime, wlenp, kindp)
- : clocc_ends_at(o, iprime, wlenp, kindp);
-}
-
-static chain_t *
-newchain_with_match_idx(char *content, int column, int midx)
-{
- chain_t *c;
- chainelt_t *ce;
-
- c = malloc(sizeof(*c));
- assert(c != NULL);
-
- ce = malloc(sizeof(*ce));
- assert(c != NULL);
-
- ce->ce_column = column;
- ce->ce_content = content;
- ce->ce_match_idx = midx;
- TAILQ_INIT(&c->c_head);
- TAILQ_INSERT_TAIL(&c->c_head, ce, ce_link);
-
- return c;
-}
-
-static chain_t *
-newchain(char *content, int column)
-{
- return newchain_with_match_idx(content, column, -1);
-}
-
-/* Return 0 if the `a' and `b' are not left- or right-aligned on
- * the same column, the column they right-align on, or zero minus
- * the column they left-align on.
- */
-static int
-cloccs_to_column(const clocc_t * const a, const clocc_t * const b)
-{
- if (a->column + a->last - a->first ==
- b->column + b->last - b->first)
- return b->column + b->last - b->first;
- else if (a->column == b->column)
- return -a->column;
- else
- return 0;
-}
-
-static unsigned int
-clocc_hash(const slice_t *s, const clocc_t *c)
-{
- unsigned int hash;
- int i;
-
- assert(c->last >= c->first);
-
- for (hash = c->kind, i = c->first; i <= c->last; i++) {
- if (((hash << 1) >> 1) != hash) {
- hash <<= 1;
- hash |= 0x1;
- } else
- hash <<= 1;
-
- hash ^= get(s, i);
- }
-
- return hash;
-}
-
-/* Look up the class occurrence `occ` on slice `s` in the class occurrences
- * `occs` occurring on slice `os`.
- */
-static clocc_t *
-clocc_cross_lookup(cloccs_t *occs, const slice_t *os, const slice_t *s,
- clocc_t *occ)
-{
- clocc_htbl_t *tbl = &occs->htbl;
- unsigned int hash = clocc_hash(s, occ);
- clocc_bucket_t *bkt = &tbl->bucket[hash % __arraycount(tbl->bucket)];
- clocc_t *bocc;
- int i, n;
-
- TAILQ_FOREACH(bocc, bkt, bucket) {
- assert(bocc->first <= bocc->last);
- if (bocc->last - bocc->first != occ->last - occ->first)
- continue;
- n = bocc->last - bocc->first + 1;
- for (i = 0; i < n; i++) {
- if (get(os, bocc->first + i) != get(s, occ->first + i))
- break;
- }
- if (i == n)
- return bocc;
- }
- return NULL;
-}
-
-static chain_t *
-mac_clocc(const scratch_t *scratch, int idx_a, int idx_b)
-{
- int column, i, rc;
- char *buf;
- uint8_t combo[6];
- const clocc_t * const a = &scratch->Aoccs.occs[idx_a],
- * const b = &scratch->Boccs.occs[idx_b];
-
- column = cloccs_to_column(a, b);
-
- switch (scratch->macaddr_op) {
- case 'p':
- for (i = 0; i < 6; i++) {
- uint8_t l, r;
- l = a->val_u.u_macaddr[i];
- r = b->val_u.u_macaddr[i];
- combo[i] = (l == r) ? l : 0;
- }
- break;
- default:
- case 'l':
- memcpy(combo, a->val_u.u_macaddr, sizeof(combo));
- break;
- }
-
- rc = asprintf(&buf, "%02" PRIx8 ":%02" PRIx8 ":%02" PRIx8 ":"
- "%02" PRIx8 ":%02" PRIx8 ":%02" PRIx8,
- combo[0], combo[1], combo[2], combo[3], combo[4], combo[5]);
-
- if (rc != -1)
- return newchain_with_match_idx(buf, column, idx_b);
-
- return newchain_with_match_idx(strdup("<macaddr>"), column, idx_b);
-}
-
-static chain_t *
-ipv4_clocc(const scratch_t *scratch, int idx_a, int idx_b)
-{
- int column;
- char *erasep, *ret;
- uint32_t combo, mask, discrepancies;
- const clocc_t * const a = &scratch->Aoccs.occs[idx_a],
- * const b = &scratch->Boccs.occs[idx_b];
- char buf[sizeof("255.255.255.255/32 ")];
-
- column = cloccs_to_column(a, b);
-
- switch (scratch->ipv4_op) {
- case 'p':
- discrepancies = b->val_u.u_ipv4 ^ a->val_u.u_ipv4;
- break;
- default:
- case 'l':
- discrepancies = 0;
- break;
- }
-
- for (mask = 0xffffffff; (discrepancies & mask) != 0; mask &= (mask - 1))
- ; // do nothing
-
- combo = htonl(a->val_u.u_ipv4 & mask);
-
- ret = inet_net_ntop(AF_INET, &combo, 32, buf, sizeof(buf));
-
- if ((erasep = strchr(buf, '/')) != NULL)
- *erasep = '\0';
-
- if (ret != NULL)
- return newchain_with_match_idx(strdup(buf), column, idx_b);
-
- return newchain_with_match_idx(strdup("<ipv4>"), column, idx_b);
-}
-
-static chain_t *
-hex_clocc(const scratch_t *scratch, int idx_a, int idx_b, clocc_kind_t kind,
- clocc_qual_t qual)
-{
- char *buf;
- int column, rc;
- uintmax_t result;
- const clocc_t * const a = &scratch->Aoccs.occs[idx_a],
- * const b = &scratch->Boccs.occs[idx_b];
-
- column = cloccs_to_column(a, b);
-
- dbg2_printf("0x%jx @ %d / 0x%jx @ %d -> column %d\n",
- a->val_u.u_uintmax, a->column,
- b->val_u.u_uintmax, b->column, column);
-
- switch (scratch->hex_op) {
- case '&':
- result = b->val_u.u_uintmax & a->val_u.u_uintmax;
- break;
- case '|':
- result = b->val_u.u_uintmax | a->val_u.u_uintmax;
- break;
- case 'l':
- default:
- result = a->val_u.u_uintmax;
- break;
- }
-
- if (result == 0 && kind == KIND_INTMAX_0xHEX)
- return newchain_with_match_idx(strdup("0x0"), column, idx_b);
-
- if (qual == QUAL_ALLCAPS) {
- rc = asprintf(&buf,
- (kind == KIND_INTMAX_0xHEX) ? "0x%jX" : "%jX", result);
- } else {
- rc = asprintf(&buf,
- (kind == KIND_INTMAX_0xHEX) ? "0x%jx" : "%jx", result);
- }
-
- if (rc != -1)
- return newchain_with_match_idx(buf, column, idx_b);
-
- return newchain_with_match_idx(strdup("<hexmax>"), column, idx_b);
-}
-
-static chain_t *
-int_clocc(const scratch_t *scratch, int idx_a, int idx_b)
-{
- char *ret;
- int column;
- intmax_t result;
- const clocc_t * const a = &scratch->Aoccs.occs[idx_a],
- * const b = &scratch->Boccs.occs[idx_b];
-
- column = cloccs_to_column(a, b);
-
- dbg2_printf("%jd @ %d / %jd @ %d -> column %d\n",
- a->val_u.u_intmax, a->column,
- b->val_u.u_intmax, b->column, column);
-
- switch (scratch->dec_op) {
- case '-':
- result = b->val_u.u_intmax - a->val_u.u_intmax;
- break;
- case '+':
- result = b->val_u.u_intmax + a->val_u.u_intmax;
- break;
- case 'l':
- default:
- result = a->val_u.u_intmax;
- break;
- }
-
- if (asprintf(&ret, "%jd", result) != -1)
- return newchain_with_match_idx(ret, column, idx_b);
-
- return newchain_with_match_idx(strdup("<intmax>"), column, idx_b);
-}
-
-static chain_t *
-clocc(const scratch_t *scratch, int idx_a, int idx_b)
-{
- const clocc_t * const a = &scratch->Aoccs.occs[idx_a],
- * const b = &scratch->Boccs.occs[idx_b];
-
- if (a->kind != b->kind)
- return newchain(strdup("<clocc*>"), 0);
-
- if (b->kind == KIND_INTMAX_0xHEX || b->kind == KIND_INTMAX_HEX) {
- return hex_clocc(scratch, idx_a, idx_b, b->kind,
- (a->qual == b->qual) ? b->qual : QUAL_NONE);
- }
-
- if (b->kind == KIND_INTMAX_DEC)
- return int_clocc(scratch, idx_a, idx_b);
-
- if (b->kind == KIND_IPv4)
- return ipv4_clocc(scratch, idx_a, idx_b);
-
- if (b->kind == KIND_MACADDR)
- return mac_clocc(scratch, idx_a, idx_b);
-
- if (b->kind == KIND_STRING)
- return newchain(strdup("<word>"), 0);
-
- return newchain(strdup("<clocc>"), 0);
-}
-
-static chain_t *
-empty(void)
-{
- return newchain(strdup(""), 0);
-}
-
-static void
-algb(const slice_t *A, const slice_t *B, scratch_t *scratch, size_t *LL,
- bool do_clocc)
-{
- const ssize_t m = length(A);
- const ssize_t n = length(B);
- ssize_t i, j;
- size_t *K[2], *Kclocc, *Kcur, *Kprev, *Ktmp;
- struct {
- clocc_kind_t A, B;
- } kind;
- struct {
- ssize_t A, B;
- } wlen;
- bool inside_clocc = false;
-
- K[0] = scratch->K[0];
- K[1] = scratch->K[1];
-
- /* A copy of the K[] at which a clocc last began. */
- Kclocc = scratch->Kclocc;
-
- for (j = 0; j < n + 1; j++)
- K[0][j] = 0;
-
- /* make -1 a valid index on all arrays */
- Kprev = K[0] + 1;
- Kcur = K[1] + 1;
- Kclocc++;
-
- for (i = 0; i < m; i++) {
- const bool clocc_starts_this_row = do_clocc &&
- clocc_starts_in_slice_at(&scratch->Aoccs, scratch->Abasis,
- A, i, NULL);
-
- if (clocc_starts_this_row) {
- assert(!inside_clocc);
- dbg_printf("%s: row %zd, clocc starts\n", __func__, i);
- memcpy(&Kclocc[-1], &Kprev[-1],
- sizeof(Kclocc[0]) * (n + 1));
- inside_clocc = true;
- }
-
- const bool clocc_ends_this_row = inside_clocc &&
- clocc_ends_in_slice_at(&scratch->Aoccs, scratch->Abasis,
- A, i, &wlen.A, &kind.A);
-
- for (j = 0; j < n; j++) {
-
- size_t u = Kprev[j], ul = Kprev[j - 1], l = Kcur[j - 1];
-
- if (get(A, i) == get(B, j))
- ul++;
-
- if (clocc_ends_this_row &&
- clocc_ends_in_slice_at(&scratch->Boccs,
- scratch->Bbasis, B, j,
- &wlen.B, &kind.B) &&
- kind.A == kind.B && /* match kind */
- wlen.B <= j + 1 /* must not begin outside of B */) {
- const size_t step = wlen.A * wlen.B + 1,
- nscore = Kclocc[j - wlen.B] + step;
- dbg_printf("%s visits clocc boundary at A[%d] "
- "and B[%d] step %zu\n", __func__,
- index_versus_basis(scratch->Abasis, A, i),
- index_versus_basis(scratch->Bbasis, B, j),
- step);
- Kcur[j] = MAX(MAX(l, u),
- MAX(ul, nscore));
- } else
- Kcur[j] = MAX(MAX(u, ul), l);
- dbg_printf("%s set K[%zd][%zd] to %zu%s\n",
- __func__, i, j, Kcur[j], inside_clocc ? "*" : "");
- }
- if (clocc_ends_this_row) {
- dbg_printf("%s: row %zd, clocc ends\n", __func__, i);
- inside_clocc = false;
- }
-
- Ktmp = Kprev;
- Kprev = Kcur;
- Kcur = Ktmp;
- }
- assert(Kprev[-1] == 0 && Kcur[-1] == 0 && Kclocc[-1] == 0);
- Kprev--;
- for (j = 0; j < n + 1; j++)
- LL[j] = Kprev[j];
-}
-
-static const clocc_t *
-index_to_clocc_container(const cloccs_t *o, int i)
-{
- const clocc_t *occ;
- int l, m, r;
-
- l = 0;
- r = o->noccs - 1;
-
- while (l <= r) {
- m = (l + r) / 2;
- occ = &o->occs[m];
- if (i < occ->first)
- r = m - 1;
- else if (occ->last < i)
- l = m + 1;
- else
- goto found;
- }
- return NULL;
-
-found:
- return occ;
-}
-
-static int
-split_interval(cloccs_t *o, int origin, int len)
-{
- struct {
- int lo, mid, hi;
- } bkpt;
-
- bkpt.mid = origin + len / 2;
-
- dbg_printf("searching [%d, %d] for split\n", origin, origin + len - 1);
-
- const clocc_t *occ = index_to_clocc_container(o, bkpt.mid);
-
- if (occ == NULL ||
- occ->first < origin || origin + len < occ->last + 1 ||
- (occ->first == origin && origin + len == occ->last + 1)) {
- dbg_printf("split %d\n", bkpt.mid);
- return bkpt.mid - origin;
- }
-
- bkpt.lo = MAX(occ->first, origin);
- bkpt.hi = MIN(occ->last, origin + len - 1);
-
- dbg_printf("split %d in [%d, %d] ([%d, %d])\n", bkpt.mid,
- occ->first, occ->last, bkpt.lo, bkpt.hi);
-
- if (bkpt.lo == origin) {
- dbg_printf("split %d -> %d (l.%d)\n", bkpt.mid,
- bkpt.hi + 1, __LINE__);
- return bkpt.hi + 1 - origin;
- }
- if (bkpt.hi == origin + len - 1) {
- dbg_printf("split %d -> %d (l.%d)\n", bkpt.mid, bkpt.lo,
- __LINE__);
- return bkpt.lo - origin;
- }
-
- if (bkpt.mid - bkpt.lo < bkpt.hi - bkpt.mid) {
- dbg_printf("split %d -> %d (l.%d)\n", bkpt.mid, bkpt.lo,
- __LINE__);
- return bkpt.lo - origin;
- }
-
- dbg_printf("split %d -> %d (l.%d)\n", bkpt.mid, bkpt.hi + 1,
- __LINE__);
- return bkpt.hi + 1 - origin;
-}
-
-/* Find the widest clocc contained entirely in [origin + first, origin + last].
- * Return its offset from origin at startp, and the last character's offset from
- * origin at endp. Return the index of the clocc in `o', or
- * -1 if no clocc matches.
- */
-static int
-widest_clocc_in_interval(cloccs_t *o, int origin, int first, int last,
- int *startp, int *endp, clocc_kind_t kind)
-{
- int i, widest = -1, maxw = 0;
-
- for (i = 0; i < o->noccs; i++) {
- int w;
-
- if (origin + first > o->occs[i].first)
- continue;
- if (o->occs[i].last > origin + last)
- break;
-
- if (o->occs[i].kind != kind)
- continue;
-
- w = o->occs[i].last - o->occs[i].first + 1;
- if (w > maxw) {
- maxw = w;
- *startp = o->occs[i].first - origin;
- *endp = o->occs[i].last - origin;
- widest = i;
- }
- }
- return widest;
-}
-
-static int
-interval_to_clocc_index(cloccs_t *o, int origin, int first, int last, clocc_kind_t *kindp)
-{
- int i;
-
- for (i = 0; i < o->noccs; i++) {
- if (origin + first == o->occs[i].first &&
- origin + last == o->occs[i].last) {
- if (kindp != NULL)
- *kindp = o->occs[i].kind;
- return i;
- }
- }
- return -1;
-}
-
-static inline chain_t *
-algc_return(const size_t m, const size_t n, const origin_t origin,
- size_t *lcsp, size_t lcs, chain_t *c)
-{
- if (lcsp != NULL) {
- dbg_printf("%s([%d, %zu], [%d, %zu]) -> %zu\n", __func__,
- origin.i, origin.i + m - 1,
- origin.j, origin.j + n - 1, lcs);
- *lcsp = lcs;
- }
-
- return c;
-}
-
-static chain_t *
-joinchains(chain_t *ac, chain_t *bc)
-{
- chainelt_t *ace, *bce;
-
- ace = TAILQ_LAST(&ac->c_head, chainhead);
- bce = TAILQ_FIRST(&bc->c_head);
-
- if (ace->ce_column == 0 && bce->ce_column == 0 &&
- ace->ce_match_idx == -1 && bce->ce_match_idx == -1) {
- char *c, *c1, *c2;
- c1 = ace->ce_content;
- c2 = bce->ce_content;
- c = malloc(strlen(c1) + strlen(c2) + 1);
- assert(c != NULL);
- strcpy(c, c1);
- strcat(c, c2);
- free(c1);
- free(c2);
- ace->ce_content = c;
- TAILQ_REMOVE(&bc->c_head, bce, ce_link);
- TAILQ_CONCAT(&ac->c_head, &bc->c_head, ce_link);
- free(bc);
- } else {
- TAILQ_CONCAT(&ac->c_head, &bc->c_head, ce_link);
- free(bc);
- }
- return ac;
-}
-
-static chain_t *
-algc(const slice_t *A, const slice_t *B, scratch_t *scratch,
- const origin_t origin, const size_t expected_lcs, size_t *lcsp)
-{
- chain_t *c1, *c2, *c;
- unsigned int j, k;
- const size_t m = length(A), n = length(B);
- struct {
- int A, B;
- } ivlidx;
- struct {
- size_t topl;
- size_t botr;
- size_t tot;
- } lcs = {.tot = 0};
- const slice_t *B1n;
- slice_t *A1i, *Amip1, *Bn1;
- slice_t *B1k, *Aip1m, *Bkp1n;
-
- dbg_printf("%s([%d, %zu], [%d, %zu])\n", __func__,
- origin.i, origin.i + m - 1,
- origin.j, origin.j + n - 1);
-
- if (n == 0) {
- assert(expected_lcs == no_expected_lcs || 0 == expected_lcs);
- return algc_return(m, n, origin, lcsp, 0, empty());
- }
-
- clocc_kind_t kind;
-
- ivlidx.A = interval_to_clocc_index(&scratch->Aoccs, origin.i, 0, m - 1, &kind);
-
- /* If A is a clocc occurrence, must try to match A with a clocc in B! */
- if (ivlidx.A != -1) {
- int start = 0, end = 0;
-
- ivlidx.B = widest_clocc_in_interval(&scratch->Boccs, origin.j,
- 0, n - 1, &start, &end, kind);
-
- if (ivlidx.B != -1) {
- dbg_assert(expected_lcs == no_expected_lcs ||
- (size_t)(end - start + 1) * m + 1 == expected_lcs);
- return algc_return(m, n, origin, lcsp,
- (size_t)(end - start + 1) * m + 1,
- clocc(scratch, ivlidx.A, ivlidx.B));
- }
- }
- if (m == 1) {
- char A0 = get(A, 0);
-
- if (ispresent(B, A0)) {
- dbg_assert(expected_lcs == no_expected_lcs ||
- 1 == expected_lcs);
- return algc_return(m, n, origin, lcsp, 1,
- newchain(strndup(&A0, 1), 0));
- }
-
- assert(expected_lcs == no_expected_lcs || 0 == expected_lcs);
-
- return algc_return(m, n, origin, lcsp, 0, empty());
- }
-
- const unsigned int i = split_interval(&scratch->Aoccs, origin.i, m);
- A1i = subslice(A, 0, i - 1);
- B1n = B;
- Amip1 = subslice(A, m - 1, i);
- Bn1 = subslice(B, n - 1, 0);
- algb(A1i, B1n, scratch, scratch->L1, true);
- dbg_printf("%s called algb once\n", __func__);
- algb(Amip1, Bn1, scratch, scratch->L2, true);
- dbg_printf("%s called algb twice\n", __func__);
-
- for (k = j = 0; j < n + 1; j++) {
- if (scratch->L1[j] + scratch->L2[n - j] > lcs.tot) {
- k = j;
- lcs.topl = scratch->L1[j];
- lcs.botr = scratch->L2[n - j];
- lcs.tot = lcs.topl + lcs.botr;
- }
- }
- dbg_printf("%s chose k %u (%zu - %u = %zu) lcs %zu\n", __func__,
- k, n, k, n - k, lcs.tot);
-
- assert(k != 0 || lcs.topl == 0);
-
- if (expected_lcs != no_expected_lcs && lcs.tot != expected_lcs) {
- struct {
- char l[12], c, r[12];
- } actx = {.l = "12345.12345", .r = "12345.12345"},
- bctx = {.l = "12345.12345", .r = "12345.12345"};
-
- dbg_printf("%s expected lcs %zu, got %zu + %zu = %zu at "
- "[%d | %d + %u | %d + %zu, %d | %d + %u | %d + %zu]\n",
- __func__, expected_lcs, lcs.topl, lcs.botr, lcs.tot,
- origin.i, origin.i, i - 1, origin.i, m - 1,
- origin.j, origin.j, k, origin.j, n - 1);
-
- extract(A, i - MIN(i, 6), i - MIN(i, 2), actx.l);
- actx.c = get(A, i - 1);
- extract(A, i, MIN(i + 4, m - 1), actx.r);
-
- dbg_printf("%s A context %s [ %c ] %s",
- __func__, actx.l, actx.c, actx.r);
-
- if (k > 0) {
- extract(B, k - MIN(k, 6), k - MIN(k, 2), bctx.l);
- bctx.c = get(B, k - 1);
- extract(B, MIN(k, n - 1), MIN(k + 4, n - 1), bctx.r);
-
- dbg_printf(", B context %s [ %c ] %s\n",
- bctx.l, bctx.c, bctx.r);
- } else
- dbg_printf("\n");
-
- dbg_printf("bailing\n");
- algb(A, B, scratch, scratch->L1, true);
- const slice_t *Am1 = subslice(A, m - 1, 0);
- algb(Am1, Bn1, scratch, scratch->L2, true);
-
- dbg_printf("algb(A, B) -> %zu fwd, -> %zu bwd\n",
- scratch->L1[n], scratch->L2[n]);
- abort();
- }
-
- size_t lcs1, lcs2;
-
- if (k >= 1) {
- B1k = subslice(B, 0, k - 1);
- c1 = algc(A1i, B1k, scratch, origin, lcs.topl, &lcs1);
- slice_free(B1k);
- } else {
- lcs1 = 0;
- c1 = empty();
- }
-
- if (k < n) {
- Aip1m = subslice(A, i, m - 1);
- Bkp1n = subslice(B, k, n - 1);
- c2 = algc(Aip1m, Bkp1n, scratch,
- (origin_t){origin.i + i, origin.j + k}, lcs.botr, &lcs2);
- slice_free(Bkp1n);
- slice_free(Aip1m);
- } else {
- lcs2 = 0;
- c2 = empty();
- }
-
- slice_free(A1i);
- slice_free(Amip1);
- slice_free(Bn1);
- c = joinchains(c1, c2);
- return algc_return(m, n, origin, lcsp, lcs1 + lcs2, c);
-}
+#include "core.h"
static void
usage(void)
@@ -1121,439 +51,19 @@ usage(void)
exit(EXIT_FAILURE);
}
-static void
-scratch_init(scratch_t *scratch, size_t n, slice_t *Abasis, slice_t *Bbasis)
-{
- scratch->L1 = malloc(sizeof(scratch->L1[0]) * (n + 1));
- scratch->L2 = malloc(sizeof(scratch->L2[0]) * (n + 1));
- scratch->K[0] = malloc(sizeof(scratch->K[0][0]) * (n + 1));
- scratch->K[1] = malloc(sizeof(scratch->K[1][0]) * (n + 1));
- scratch->Kclocc = malloc(sizeof(scratch->Kclocc[0]) * (n + 1));
- scratch->Abasis = Abasis;
- scratch->Bbasis = Bbasis;
- scratch->expected_lcs = SIZE_T_MAX;
-}
-
-static int
-slicestr(const slice_t *s, const char *t)
-{
- ssize_t i;
- const char *p;
-
- p = t;
-
- for (i = 0; i < length(s); i++) {
- if (get(s, i) != *p) {
- p = t;
- continue;
- }
- if (*++p == '\0')
- return i - (p - t - 1);
- }
- return -1;
-}
-
-static void
-emit_mac_clocc(mac_detection_t *d, void *arg)
-{
- cloccs_t *o = arg;
- uint8_t addr[6];
- int rc;
-
- dbg_printf("found mac string %s @ [%d, %d]\n", d->d_string,
- d->d_idx.start, d->d_idx.stop);
-
- if (o->noccs >= (int)__arraycount(o->occs))
- return;
-
- rc = sscanf(d->d_string,
- "%" SCNx8 ":%" SCNx8 ":%" SCNx8 ":%" SCNx8 ":%" SCNx8 ":%" SCNx8 "",
- &addr[0], &addr[1], &addr[2], &addr[3], &addr[4], &addr[5]);
-
- assert(rc == 6);
-
- dbg_printf("converted ipv4 string %s @ [%d, %d] -> "
- "%02" PRIx8 ":%02" PRIx8 ":%02" PRIx8 ":"
- "%02" PRIx8 ":%02" PRIx8 ":%02" PRIx8 "\n",
- d->d_string, d->d_idx.start, d->d_idx.stop,
- addr[0], addr[1], addr[2], addr[3], addr[4], addr[5]);
-
- o->occs[o->noccs].column = d->d_column.start;
- o->occs[o->noccs].first = d->d_idx.start;
- o->occs[o->noccs].last = d->d_idx.stop;
- o->occs[o->noccs].kind = KIND_MACADDR;
- o->occs[o->noccs].qual = QUAL_NONE;
- memcpy(&o->occs[o->noccs].val_u.u_macaddr[0], &addr[0],
- sizeof(o->occs[o->noccs].val_u.u_macaddr));
- o->noccs++;
-}
-
-static void
-emit_ipv4_clocc(ipv4_detection_t *d, void *arg)
-{
- cloccs_t *o = arg;
- struct {
- uint32_t net;
- uint32_t host;
- } addr;
- int rc;
-
- dbg_printf("found ipv4 string %s @ [%d, %d]\n", d->d_string,
- d->d_idx.start, d->d_idx.stop);
-
- if (o->noccs >= (int)__arraycount(o->occs))
- return;
-
- rc = inet_net_pton(AF_INET, d->d_string, &addr.net, sizeof(addr.net));
-
- assert(rc != -1);
-
- addr.host = ntohl(addr.net);
-
- dbg_printf("converted ipv4 string %s @ [%d, %d] -> %08" PRIx32 "\n",
- d->d_string, d->d_idx.start, d->d_idx.stop, addr.host);
-
- o->occs[o->noccs].column = d->d_column.start;
- o->occs[o->noccs].first = d->d_idx.start;
- o->occs[o->noccs].last = d->d_idx.stop;
- o->occs[o->noccs].kind = KIND_IPv4;
- o->occs[o->noccs].qual = QUAL_NONE;
- o->occs[o->noccs].val_u.u_ipv4 = addr.host;
- o->noccs++;
-}
-
-static void
-emit_hex_clocc(hex_detection_t *d, void *arg, bool use0x)
-{
- cloccs_t *o = arg;
- intmax_t val;
- char *end;
- bool allcaps;
-
- if (!use0x && strspn(d->d_digits, "0123456789") == strlen(d->d_digits))
- return;
-
- allcaps = strcspn(d->d_digits, "abcdef") == strlen(d->d_digits);
-
- dbg_printf("found hex string %s @ [%d, %d]%s\n", d->d_digits,
- d->d_idx.start, d->d_idx.stop, allcaps ? " all CAPS" : "");
-
- if (o->noccs >= (int)__arraycount(o->occs))
- return;
-
- val = strtoimax(d->d_digits, &end, 16);
-
- if (*end != '\0' && errno == ERANGE)
- warnx("%s: over/underflow at %d", __func__, d->d_column.start);
-
- o->occs[o->noccs].column = d->d_column.start;
- o->occs[o->noccs].first = d->d_idx.start;
- o->occs[o->noccs].last = d->d_idx.stop;
- o->occs[o->noccs].kind = use0x ? KIND_INTMAX_0xHEX : KIND_INTMAX_HEX;
- o->occs[o->noccs].qual = allcaps ? QUAL_ALLCAPS : QUAL_NONE;
- o->occs[o->noccs].val_u.u_uintmax = val;
- o->noccs++;
-}
-
-static int
-clocc_cmp(const void *l0, const void *r0)
-{
- const clocc_t *l = l0, *r = r0;
-
- return l->first - r->first;
-}
-
-static int
-clocc_width(const clocc_t *c)
-{
- assert(c->last >= c->first);
- return c->last - c->first + 1;
-}
-
-static void
-cloccs_dedup(cloccs_t *o)
-{
- int i, j;
-
- for (i = 1; i < o->noccs;) {
- clocc_t *l, *r;
- l = &o->occs[i - 1];
- r = &o->occs[i];
- if (l->last < r->first) {
- i++;
- continue;
- }
- if (clocc_width(r) > clocc_width(l))
- *l = *r;
- for (j = i + 1; j < o->noccs; j++)
- o->occs[j - 1] = o->occs[j];
- o->noccs--;
- }
-}
-
-static void
-clocc_htbl_init(clocc_htbl_t *tbl)
-{
- int i;
-
- for (i = 0; i < (int)__arraycount(tbl->bucket); i++)
- TAILQ_INIT(&tbl->bucket[i]);
-}
-
-static void
-clocc_htbl_insert(clocc_htbl_t *tbl, const slice_t *s, clocc_t *occ)
-{
- int i = clocc_hash(s, occ) % __arraycount(tbl->bucket);
- TAILQ_INSERT_HEAD(&tbl->bucket[i], occ, bucket);
-}
-
-static void
-cloccs_hash(cloccs_t *o, const slice_t *s)
-{
- int i;
-
- for (i = 0; i < o->noccs; i++)
- clocc_htbl_insert(&o->htbl, s, &o->occs[i]);
-}
-
-static void
-cloccs_init(cloccs_t *o, const slice_t *s)
-{
- hex_parser_t *hex_parser;
- ipv4_parser_t *ipv4_parser;
- mac_parser_t *mac_parser;
- size_t column, i;
- size_t n = length(s);
- unsigned int j, k;
- char digits[64 / 3 + 1]; /* XXX how many digits in an intmax_t? */
-
- clocc_htbl_init(&o->htbl);
-
- o->noccs = 0;
- for (i = 0; i < __arraycount(names); i++) {
- int loc;
-
- loc = slicestr(s, names[i]);
- if (loc == -1)
- continue;
-
- o->occs[o->noccs].first = loc;
- o->occs[o->noccs].last = loc + strlen(names[i]) - 1;
- o->occs[o->noccs].kind = KIND_STRING;
- dbg_printf("found %s at %d - %d\n",
- names[i], o->occs[o->noccs].first, o->occs[o->noccs].last);
- o->noccs++;
- }
-
- mac_parser = mac_parser_alloc(emit_mac_clocc, o);
- if (mac_parser == NULL)
- goto parse_ipv4;
-
- column = 0;
- for (j = 0; j < n; j++) {
- char c;
-
- c = get(s, j);
- ++column;
- mac_parser_drive(mac_parser, j, column, c);
- if (c == '\n')
- column = 0;
- }
- mac_parser_drive(mac_parser, n, column + 1, -1);
-
- mac_parser_free(mac_parser);
-
-parse_ipv4:
- ipv4_parser = ipv4_parser_alloc(emit_ipv4_clocc, o);
- if (ipv4_parser == NULL)
- goto parse_hexadecimal;
-
- column = 0;
- for (j = 0; j < n; j++) {
- char c;
-
- c = get(s, j);
- ++column;
- ipv4_parser_drive(ipv4_parser, j, column, c);
- if (c == '\n')
- column = 0;
- }
- ipv4_parser_drive(ipv4_parser, n, column + 1, -1);
-
- ipv4_parser_free(ipv4_parser);
-
-parse_hexadecimal:
- hex_parser = hex_parser_alloc(emit_hex_clocc, o);
- if (hex_parser == NULL)
- goto parse_decimal;
-
- column = 0;
- for (j = 0; j < n; j++) {
- char c;
-
- c = get(s, j);
- ++column;
- hex_parser_drive(hex_parser, j, column, c);
- if (c == '\n')
- column = 0;
- }
- hex_parser_drive(hex_parser, n, column + 1, -1);
-
- hex_parser_free(hex_parser);
-
-parse_decimal:
-
- j = 0;
- column = 0;
- while (o->noccs < (int)__arraycount(o->occs)) {
- intmax_t val;
- char *end;
- char c;
-
- ++column;
-
- if (j == n)
- break;
-
- c = get(s, j);
- for (k = j + 1; k < n; k++) {
- if (strchr("0123456789", get(s, k)) == NULL)
- break;
- }
- if (c == '-') {
- if (j == k - 1) {
- j++;
- continue;
- }
- } else if (strchr("0123456789", c) == NULL) {
- if (c == '\n')
- column = 0;
- j++;
- continue;
- }
-
- if (k - j > __arraycount(digits) - 1)
- goto next;
-
- extract(s, j, k - 1, digits);
-
- val = strtoimax(digits, &end, 10);
- if (*end == '\0') {
- o->occs[o->noccs].column = column;
- o->occs[o->noccs].first = j;
- o->occs[o->noccs].last = k - 1;
- o->occs[o->noccs].kind = KIND_INTMAX_DEC;
- o->occs[o->noccs].val_u.u_intmax = val;
- dbg_printf("found %jd at %d - %d\n",
- val, o->occs[o->noccs].first,
- o->occs[o->noccs].last);
- o->noccs++;
- } else if (errno == ERANGE)
- warnx("%s: over/underflow at %d", __func__, j);
-next:
- column += k - 1 - j;
- j = k;
- }
- mergesort(o->occs, o->noccs, sizeof(*o->occs), clocc_cmp);
- cloccs_dedup(o);
-}
-
-static void
-printchain(chain_t *c)
-{
- chainelt_t *ce;
- int column = 1;
-
- TAILQ_FOREACH(ce, &c->c_head, ce_link) {
- char *found, *s;
- int nextcolumn;
-
- for (nextcolumn = column, s = ce->ce_content;
- *s != '\0' && (found = strchr(s, '\n')) != NULL;
- s = found + 1)
- nextcolumn = 1;
-
- nextcolumn += strlen(s);
- while (ce->ce_column < 0 && column < -ce->ce_column) {
- fputc(' ', stdout);
- column++;
- }
- while (ce->ce_column > 0 && nextcolumn - 1 < ce->ce_column) {
- fputc(' ', stdout);
- nextcolumn++;
- }
- printf("%s", ce->ce_content);
- column = nextcolumn;
- }
-}
-
-static void
-emit_transformed_text(chain_t *O, cloccs_t *Moccs, cloccs_t *Toccs,
- const slice_t *match, const slice_t *transform)
-{
- chainelt_t *ce;
- int i, next;
-
- TAILQ_FOREACH(ce, &O->c_head, ce_link) {
- clocc_t *occ = &Moccs->occs[ce->ce_match_idx];
-
- if (ce->ce_match_idx == -1)
- continue;
-
- if (occ->match == NULL)
- occ->match = ce;
- }
-
- for (next = i = 0; i < Toccs->noccs; i++) {
- clocc_t *Tocc = &Toccs->occs[i];
- clocc_t *Mocc = clocc_cross_lookup(Moccs, match, transform,
- Tocc);
-
- for (; next < Tocc->first; next++) {
- char c = get(transform, next);
- fputc(c, stdout); // XXX check error
- }
-
- if (Mocc != NULL && Mocc->match != NULL) {
- fprintf(stdout, "%s", Mocc->match->ce_content);
- next = Tocc->last + 1;
- } else
- next = Tocc->first;
- }
-
- for (; next < (int)length(transform); next++) {
- char c = get(transform, next);
- fputc(c, stdout); // XXX check error
- }
-}
-
int
main(int argc, char **argv)
{
- slice_t *left, *match, *transform;
+ slice_t *left, *match;
size_t n;
chain_t *O;
scratch_t scratch;
size_t lcs;
- bool dotransform = false;
- cloccs_t Toccs;
setprogname(argv[0]);
- if (strcmp(getprogname(), "tt") == 0)
- dotransform = true;
-
- switch (argc) {
- case 4:
- if (!dotransform)
- usage();
- /*FALLTHROUGH*/
- case 3:
- break;
- default:
+ if (argc != 3)
usage();
- break;
- }
left = file_to_slice(argv[1]);
match = file_to_slice(argv[2]);
@@ -1564,35 +74,16 @@ main(int argc, char **argv)
cloccs_init(&scratch.Boccs, match);
cloccs_hash(&scratch.Boccs, match);
- if (dotransform) {
- scratch.macaddr_op = 'l';
- scratch.ipv4_op = 'l';
- scratch.dec_op = 'l';
- scratch.hex_op = 'l';
- transform = file_to_slice(argv[3]);
- cloccs_init(&Toccs, transform);
- } else if (strcmp(getprogname(), "dt") == 0) {
- scratch.macaddr_op = 'p';
- scratch.ipv4_op = 'p';
- scratch.dec_op = '-';
- scratch.hex_op = '&';
- } else if (strcmp(getprogname(), "it") == 0) {
- scratch.macaddr_op = 'p';
- scratch.ipv4_op = 'p';
- scratch.dec_op = '+';
- scratch.hex_op = '|';
- } else
- errx(EXIT_FAILURE, "not implemented");
+ scratch.macaddr_op = 'p';
+ scratch.ipv4_op = 'p';
+ scratch.dec_op = '-';
+ scratch.hex_op = '&';
O = algc(left, match, &scratch, (origin_t){0, 0}, no_expected_lcs,
&lcs);
dbg_printf("lcs = %zu\n", lcs);
- if (dotransform) {
- emit_transformed_text(O, &scratch.Boccs, &Toccs, match,
- transform);
- } else
- printchain(O);
+ printchain(O);
dbg_printf("\n");
return EXIT_SUCCESS;
Index: othersrc/external/bsd/arfe/dt/macaddr.c
diff -u othersrc/external/bsd/arfe/dt/macaddr.c:1.2 othersrc/external/bsd/arfe/dt/macaddr.c:1.3
--- othersrc/external/bsd/arfe/dt/macaddr.c:1.2 Mon Sep 14 02:58:17 2015
+++ othersrc/external/bsd/arfe/dt/macaddr.c Tue Sep 22 01:12:09 2015
@@ -1,4 +1,4 @@
-/* $NetBSD: macaddr.c,v 1.2 2015/09/14 02:58:17 dyoung Exp $ */
+/* $NetBSD: macaddr.c,v 1.3 2015/09/22 01:12:09 dyoung Exp $ */
/* $ARFE:47:33Z dyoung $ */
/*-
@@ -33,7 +33,7 @@
#include <sys/cdefs.h> /* for __predict_true() */
-#include "dt.h"
+#include "core.h"
#include "macaddr.h"
typedef enum mac_op {
Index: othersrc/external/bsd/arfe/it/Makefile
diff -u othersrc/external/bsd/arfe/it/Makefile:1.3 othersrc/external/bsd/arfe/it/Makefile:1.4
--- othersrc/external/bsd/arfe/it/Makefile:1.3 Fri Sep 11 01:50:43 2015
+++ othersrc/external/bsd/arfe/it/Makefile Tue Sep 22 01:12:09 2015
@@ -2,8 +2,9 @@ NOMAN=
.include <bsd.own.mk>
PROG=it
-SRCS+=dt.c hex.c ipv4.c macaddr.c
+SRCS+=core.c hex.c ipv4.c it.c macaddr.c
#CPPFLAGS+=-DHB_DEBUG
+CPPFLAGS+=-I$(.CURDIR)/../dt
DBG+=-g -O3
#DBG+=-g -O0
#COPTS+=-pg
Index: othersrc/external/bsd/arfe/it/README
diff -u othersrc/external/bsd/arfe/it/README:1.6 othersrc/external/bsd/arfe/it/README:1.7
--- othersrc/external/bsd/arfe/it/README:1.6 Mon Sep 14 02:58:17 2015
+++ othersrc/external/bsd/arfe/it/README Tue Sep 22 01:12:09 2015
@@ -1,5 +1,5 @@
-$ARFE: README 245 2015-09-11 02:13:21Z dyoung $
-$NetBSD: README,v 1.6 2015/09/14 02:58:17 dyoung Exp $
+$ARFE: README 252 2015-09-22 01:10:07Z dyoung $
+$NetBSD: README,v 1.7 2015/09/22 01:12:09 dyoung Exp $
IT---(i)ntegrate (t)ext---is a variation on DT that adds decimal numbers
instead of subtracts, and bitwise-ORs hexadecimal numbers instead of
Index: othersrc/external/bsd/arfe/tt/Makefile
diff -u othersrc/external/bsd/arfe/tt/Makefile:1.1 othersrc/external/bsd/arfe/tt/Makefile:1.2
--- othersrc/external/bsd/arfe/tt/Makefile:1.1 Fri Sep 11 01:50:43 2015
+++ othersrc/external/bsd/arfe/tt/Makefile Tue Sep 22 01:12:09 2015
@@ -2,9 +2,10 @@ NOMAN=
.include <bsd.own.mk>
PROG=tt
-SRCS+=dt.c hex.c ipv4.c macaddr.c
+SRCS+=core.c hex.c ipv4.c macaddr.c tt.c
#CPPFLAGS+=-DHB_DEBUG
CPPFLAGS+=-DHB_ASSERT
+CPPFLAGS+=-I$(.CURDIR)/../dt
DBG+=-g -O3
#DBG+=-g -O0
#COPTS+=-pg
Index: othersrc/external/bsd/arfe/tt/README
diff -u othersrc/external/bsd/arfe/tt/README:1.3 othersrc/external/bsd/arfe/tt/README:1.4
--- othersrc/external/bsd/arfe/tt/README:1.3 Mon Sep 14 02:58:17 2015
+++ othersrc/external/bsd/arfe/tt/README Tue Sep 22 01:12:09 2015
@@ -1,5 +1,5 @@
-$ARFE: README 245 2015-09-11 02:13:21Z dyoung $
-$NetBSD: README,v 1.3 2015/09/14 02:58:17 dyoung Exp $
+$ARFE: README 253 2015-09-22 01:10:15Z dyoung $
+$NetBSD: README,v 1.4 2015/09/22 01:12:09 dyoung Exp $
TT---(t)ransform (t)ext---transforms its input based on a
match/transform-template pair that exemplifies the changes that should
Added files:
Index: othersrc/external/bsd/arfe/dt/core.c
diff -u /dev/null othersrc/external/bsd/arfe/dt/core.c:1.1
--- /dev/null Tue Sep 22 01:12:09 2015
+++ othersrc/external/bsd/arfe/dt/core.c Tue Sep 22 01:12:09 2015
@@ -0,0 +1,1379 @@
+#include <err.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <stdbool.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include <sys/param.h> /* for MAX() */
+#include <sys/mman.h>
+#include <sys/stat.h>
+
+#include <sys/socket.h> /* for inet_net_pton */
+#include <netinet/in.h> /* for inet_net_pton */
+#include <arpa/inet.h> /* for inet_net_pton */
+
+#include "core.h"
+#include "hex.h"
+#include "ipv4.h"
+#include "macaddr.h"
+
+union freeslice;
+typedef union freeslice freeslice_t;
+
+union freeslice {
+ slice_t s;
+ freeslice_t *next;
+};
+
+static const char *names[] = {
+ "abe"
+ , "ada"
+ , "daria"
+};
+
+const size_t no_expected_lcs = SIZE_T_MAX;
+
+static bool freeslices_initialized __aligned(COHERENCY_UNIT);
+static freeslice_t *first_freeslice __aligned(COHERENCY_UNIT);
+static freeslice_t freeslices[2048];
+
+static void
+init_freeslices(void)
+{
+ unsigned int i;
+
+ for (i = 0; i < __arraycount(freeslices) - 1; i++)
+ freeslices[i].next = &freeslices[i + 1];
+ freeslices[i].next = NULL;
+
+ first_freeslice = &freeslices[0];
+
+ freeslices_initialized = true;
+}
+
+static slice_t *
+slice_alloc(void)
+{
+ slice_t *s;
+
+ if (!freeslices_initialized)
+ init_freeslices();
+
+ s = &first_freeslice->s;
+ first_freeslice = first_freeslice->next;
+ return s;
+}
+
+static void
+slice_free(slice_t *s)
+{
+ freeslice_t *fs = (freeslice_t *)s;
+
+ fs->next = first_freeslice;
+ first_freeslice = fs;
+}
+
+static slice_t *
+newslice(char *buf, size_t buflen)
+{
+ slice_t *s = slice_alloc();
+
+ if (s == NULL)
+ return NULL;
+
+ s->backward = false;
+
+ s->first = buf;
+ s->last = buf + buflen - 1;
+
+ return s;
+}
+
+slice_t *
+file_to_slice(const char *fn)
+{
+ const int fd = open(fn, O_RDONLY);
+ struct stat st;
+ void *p;
+
+ if (fd == -1)
+ err(EXIT_FAILURE, "%s: open", __func__);
+
+ if (fstat(fd, &st) == -1)
+ err(EXIT_FAILURE, "%s: stat", __func__);
+
+ p = mmap(NULL, st.st_size, PROT_READ, MAP_SHARED|MAP_FILE, fd, 0);
+
+ if (p == MAP_FAILED)
+ err(EXIT_FAILURE, "%s: mmap", __func__);
+
+ return newslice(p, st.st_size);
+}
+
+ssize_t
+length(const slice_t *s)
+{
+ if (s->first > s->last)
+ return 0;
+
+ return s->last - s->first + 1;
+}
+
+static bool
+ispresent(const slice_t *s, char c)
+{
+ const char *p;
+
+ for (p = s->first; p <= s->last; p++) {
+ if (c == *p)
+ return true;
+ }
+ return false;
+}
+
+static inline char
+get(const slice_t *s, int i)
+{
+ dbg_assert(0 <= i && i < length(s));
+
+ return (s->backward) ? s->last[-i] : s->first[i];
+}
+
+static inline const char *
+getp(const slice_t *s, int i)
+{
+ dbg_assert(0 <= i && i < length(s));
+
+ return (s->backward) ? &s->last[-i] : &s->first[i];
+}
+
+static void
+extract(const slice_t *s, unsigned int i, unsigned int j, char *o)
+{
+ unsigned int k, inc;
+ char *p;
+
+ inc = (i < j) ? 1 : -1;
+ p = o;
+
+ for (k = i; ; k += inc) {
+ *p++ = get(s, k);
+ if (k == j)
+ break;
+ }
+
+ *p = '\0';
+}
+
+static slice_t *
+subslice(const slice_t *s, int i, int j)
+{
+ slice_t *ss;
+ const char *first, *last;
+
+ dbg_assert(MAX(i, j) < length(s));
+
+ ss = slice_alloc();
+
+ if (ss == NULL)
+ return NULL;
+
+ first = getp(s, i);
+ last = getp(s, j);
+
+ ss->backward = first > last;
+
+ if (ss->backward) {
+ ss->first = last;
+ ss->last = first;
+ } else {
+ ss->first = first;
+ ss->last = last;
+ }
+
+ return ss;
+}
+
+static bool
+clocc_starts_at(const cloccs_t *o, int first, ssize_t *wlenp,
+ clocc_kind_t *kindp)
+{
+ const clocc_t *occ;
+ int l, m, r;
+
+ l = 0;
+ r = o->noccs - 1;
+
+ while (l <= r) {
+ m = (l + r) / 2;
+ occ = &o->occs[m];
+ if (first < occ->first)
+ r = m - 1;
+ else if (occ->first < first)
+ l = m + 1;
+ else
+ goto found;
+ }
+ return false;
+
+found:
+ if (kindp != NULL)
+ *kindp = occ->kind;
+ if (wlenp != NULL)
+ *wlenp = occ->last - occ->first + 1;
+ return true;
+}
+
+static bool
+clocc_ends_at(const cloccs_t *o, int last, ssize_t *wlenp, clocc_kind_t *kindp)
+{
+ const clocc_t *occ;
+ int l, m, r;
+
+ l = 0;
+ r = o->noccs - 1;
+
+ while (l <= r) {
+ m = (l + r) / 2;
+ occ = &o->occs[m];
+ if (last < occ->last)
+ r = m - 1;
+ else if (occ->last < last)
+ l = m + 1;
+ else
+ goto found;
+ }
+ return false;
+
+found:
+ if (kindp != NULL)
+ *kindp = occ->kind;
+ if (wlenp != NULL)
+ *wlenp = occ->last - occ->first + 1;
+ return true;
+}
+
+static int
+index_versus_basis(const slice_t *basis, const slice_t *s, int i)
+{
+ dbg_assert(!basis->backward);
+
+ return getp(s, i) - getp(basis, 0);
+}
+
+static bool
+clocc_starts_in_slice_at(const cloccs_t *o, const slice_t *basis,
+ const slice_t *s, int i, size_t *wlenp)
+{
+ const int iprime = index_versus_basis(basis, s, i);
+
+ return (s->backward) ? clocc_ends_at(o, iprime, NULL, NULL)
+ : clocc_starts_at(o, iprime, NULL, NULL);
+}
+
+static bool
+clocc_ends_in_slice_at(const cloccs_t *o, const slice_t *basis,
+ const slice_t *s, int i, ssize_t *wlenp, clocc_kind_t *kindp)
+{
+ const int iprime = index_versus_basis(basis, s, i);
+
+ return (s->backward) ? clocc_starts_at(o, iprime, wlenp, kindp)
+ : clocc_ends_at(o, iprime, wlenp, kindp);
+}
+
+static chain_t *
+newchain_with_match_idx(char *content, int column, int midx)
+{
+ chain_t *c;
+ chainelt_t *ce;
+
+ c = malloc(sizeof(*c));
+ assert(c != NULL);
+
+ ce = malloc(sizeof(*ce));
+ assert(c != NULL);
+
+ ce->ce_column = column;
+ ce->ce_content = content;
+ ce->ce_match_idx = midx;
+ TAILQ_INIT(&c->c_head);
+ TAILQ_INSERT_TAIL(&c->c_head, ce, ce_link);
+
+ return c;
+}
+
+static chain_t *
+newchain(char *content, int column)
+{
+ return newchain_with_match_idx(content, column, -1);
+}
+
+/* Return 0 if the `a' and `b' are not left- or right-aligned on
+ * the same column, the column they right-align on, or zero minus
+ * the column they left-align on.
+ */
+static int
+cloccs_to_column(const clocc_t * const a, const clocc_t * const b)
+{
+ if (a->column + a->last - a->first ==
+ b->column + b->last - b->first)
+ return b->column + b->last - b->first;
+ else if (a->column == b->column)
+ return -a->column;
+ else
+ return 0;
+}
+
+static unsigned int
+clocc_hash(const slice_t *s, const clocc_t *c)
+{
+ unsigned int hash;
+ int i;
+
+ assert(c->last >= c->first);
+
+ for (hash = c->kind, i = c->first; i <= c->last; i++) {
+ if (((hash << 1) >> 1) != hash) {
+ hash <<= 1;
+ hash |= 0x1;
+ } else
+ hash <<= 1;
+
+ hash ^= get(s, i);
+ }
+
+ return hash;
+}
+
+/* Look up the class occurrence `occ` on slice `s` in the class occurrences
+ * `occs` occurring on slice `os`.
+ */
+static clocc_t *
+clocc_cross_lookup(cloccs_t *occs, const slice_t *os, const slice_t *s,
+ clocc_t *occ)
+{
+ clocc_htbl_t *tbl = &occs->htbl;
+ unsigned int hash = clocc_hash(s, occ);
+ clocc_bucket_t *bkt = &tbl->bucket[hash % __arraycount(tbl->bucket)];
+ clocc_t *bocc;
+ int i, n;
+
+ TAILQ_FOREACH(bocc, bkt, bucket) {
+ assert(bocc->first <= bocc->last);
+ if (bocc->last - bocc->first != occ->last - occ->first)
+ continue;
+ n = bocc->last - bocc->first + 1;
+ for (i = 0; i < n; i++) {
+ if (get(os, bocc->first + i) != get(s, occ->first + i))
+ break;
+ }
+ if (i == n)
+ return bocc;
+ }
+ return NULL;
+}
+
+static chain_t *
+mac_clocc(const scratch_t *scratch, int idx_a, int idx_b)
+{
+ int column, i, rc;
+ char *buf;
+ uint8_t combo[6];
+ const clocc_t * const a = &scratch->Aoccs.occs[idx_a],
+ * const b = &scratch->Boccs.occs[idx_b];
+
+ column = cloccs_to_column(a, b);
+
+ switch (scratch->macaddr_op) {
+ case 'p':
+ for (i = 0; i < 6; i++) {
+ uint8_t l, r;
+ l = a->val_u.u_macaddr[i];
+ r = b->val_u.u_macaddr[i];
+ combo[i] = (l == r) ? l : 0;
+ }
+ break;
+ default:
+ case 'l':
+ memcpy(combo, a->val_u.u_macaddr, sizeof(combo));
+ break;
+ }
+
+ rc = asprintf(&buf, "%02" PRIx8 ":%02" PRIx8 ":%02" PRIx8 ":"
+ "%02" PRIx8 ":%02" PRIx8 ":%02" PRIx8,
+ combo[0], combo[1], combo[2], combo[3], combo[4], combo[5]);
+
+ if (rc != -1)
+ return newchain_with_match_idx(buf, column, idx_b);
+
+ return newchain_with_match_idx(strdup("<macaddr>"), column, idx_b);
+}
+
+static chain_t *
+ipv4_clocc(const scratch_t *scratch, int idx_a, int idx_b)
+{
+ int column;
+ char *erasep, *ret;
+ uint32_t combo, mask, discrepancies;
+ const clocc_t * const a = &scratch->Aoccs.occs[idx_a],
+ * const b = &scratch->Boccs.occs[idx_b];
+ char buf[sizeof("255.255.255.255/32 ")];
+
+ column = cloccs_to_column(a, b);
+
+ switch (scratch->ipv4_op) {
+ case 'p':
+ discrepancies = b->val_u.u_ipv4 ^ a->val_u.u_ipv4;
+ break;
+ default:
+ case 'l':
+ discrepancies = 0;
+ break;
+ }
+
+ for (mask = 0xffffffff; (discrepancies & mask) != 0; mask &= (mask - 1))
+ ; // do nothing
+
+ combo = htonl(a->val_u.u_ipv4 & mask);
+
+ ret = inet_net_ntop(AF_INET, &combo, 32, buf, sizeof(buf));
+
+ if ((erasep = strchr(buf, '/')) != NULL)
+ *erasep = '\0';
+
+ if (ret != NULL)
+ return newchain_with_match_idx(strdup(buf), column, idx_b);
+
+ return newchain_with_match_idx(strdup("<ipv4>"), column, idx_b);
+}
+
+static chain_t *
+hex_clocc(const scratch_t *scratch, int idx_a, int idx_b, clocc_kind_t kind,
+ clocc_qual_t qual)
+{
+ char *buf;
+ int column, rc;
+ uintmax_t result;
+ const clocc_t * const a = &scratch->Aoccs.occs[idx_a],
+ * const b = &scratch->Boccs.occs[idx_b];
+
+ column = cloccs_to_column(a, b);
+
+ dbg2_printf("0x%jx @ %d / 0x%jx @ %d -> column %d\n",
+ a->val_u.u_uintmax, a->column,
+ b->val_u.u_uintmax, b->column, column);
+
+ switch (scratch->hex_op) {
+ case '&':
+ result = b->val_u.u_uintmax & a->val_u.u_uintmax;
+ break;
+ case '|':
+ result = b->val_u.u_uintmax | a->val_u.u_uintmax;
+ break;
+ case 'l':
+ default:
+ result = a->val_u.u_uintmax;
+ break;
+ }
+
+ if (result == 0 && kind == KIND_INTMAX_0xHEX)
+ return newchain_with_match_idx(strdup("0x0"), column, idx_b);
+
+ if (qual == QUAL_ALLCAPS) {
+ rc = asprintf(&buf,
+ (kind == KIND_INTMAX_0xHEX) ? "0x%jX" : "%jX", result);
+ } else {
+ rc = asprintf(&buf,
+ (kind == KIND_INTMAX_0xHEX) ? "0x%jx" : "%jx", result);
+ }
+
+ if (rc != -1)
+ return newchain_with_match_idx(buf, column, idx_b);
+
+ return newchain_with_match_idx(strdup("<hexmax>"), column, idx_b);
+}
+
+static chain_t *
+int_clocc(const scratch_t *scratch, int idx_a, int idx_b)
+{
+ char *ret;
+ int column;
+ intmax_t result;
+ const clocc_t * const a = &scratch->Aoccs.occs[idx_a],
+ * const b = &scratch->Boccs.occs[idx_b];
+
+ column = cloccs_to_column(a, b);
+
+ dbg2_printf("%jd @ %d / %jd @ %d -> column %d\n",
+ a->val_u.u_intmax, a->column,
+ b->val_u.u_intmax, b->column, column);
+
+ switch (scratch->dec_op) {
+ case '-':
+ result = b->val_u.u_intmax - a->val_u.u_intmax;
+ break;
+ case '+':
+ result = b->val_u.u_intmax + a->val_u.u_intmax;
+ break;
+ case 'l':
+ default:
+ result = a->val_u.u_intmax;
+ break;
+ }
+
+ if (asprintf(&ret, "%jd", result) != -1)
+ return newchain_with_match_idx(ret, column, idx_b);
+
+ return newchain_with_match_idx(strdup("<intmax>"), column, idx_b);
+}
+
+static chain_t *
+clocc(const scratch_t *scratch, int idx_a, int idx_b)
+{
+ const clocc_t * const a = &scratch->Aoccs.occs[idx_a],
+ * const b = &scratch->Boccs.occs[idx_b];
+
+ if (a->kind != b->kind)
+ return newchain(strdup("<clocc*>"), 0);
+
+ if (b->kind == KIND_INTMAX_0xHEX || b->kind == KIND_INTMAX_HEX) {
+ return hex_clocc(scratch, idx_a, idx_b, b->kind,
+ (a->qual == b->qual) ? b->qual : QUAL_NONE);
+ }
+
+ if (b->kind == KIND_INTMAX_DEC)
+ return int_clocc(scratch, idx_a, idx_b);
+
+ if (b->kind == KIND_IPv4)
+ return ipv4_clocc(scratch, idx_a, idx_b);
+
+ if (b->kind == KIND_MACADDR)
+ return mac_clocc(scratch, idx_a, idx_b);
+
+ if (b->kind == KIND_STRING)
+ return newchain(strdup("<word>"), 0);
+
+ return newchain(strdup("<clocc>"), 0);
+}
+
+static chain_t *
+empty(void)
+{
+ return newchain(strdup(""), 0);
+}
+
+static void
+algb(const slice_t *A, const slice_t *B, scratch_t *scratch, size_t *LL,
+ bool do_clocc)
+{
+ const ssize_t m = length(A);
+ const ssize_t n = length(B);
+ ssize_t i, j;
+ size_t *K[2], *Kclocc, *Kcur, *Kprev, *Ktmp;
+ struct {
+ clocc_kind_t A, B;
+ } kind;
+ struct {
+ ssize_t A, B;
+ } wlen;
+ bool inside_clocc = false;
+
+ K[0] = scratch->K[0];
+ K[1] = scratch->K[1];
+
+ /* A copy of the K[] at which a clocc last began. */
+ Kclocc = scratch->Kclocc;
+
+ for (j = 0; j < n + 1; j++)
+ K[0][j] = 0;
+
+ /* make -1 a valid index on all arrays */
+ Kprev = K[0] + 1;
+ Kcur = K[1] + 1;
+ Kclocc++;
+
+ for (i = 0; i < m; i++) {
+ const bool clocc_starts_this_row = do_clocc &&
+ clocc_starts_in_slice_at(&scratch->Aoccs, scratch->Abasis,
+ A, i, NULL);
+
+ if (clocc_starts_this_row) {
+ assert(!inside_clocc);
+ dbg_printf("%s: row %zd, clocc starts\n", __func__, i);
+ memcpy(&Kclocc[-1], &Kprev[-1],
+ sizeof(Kclocc[0]) * (n + 1));
+ inside_clocc = true;
+ }
+
+ const bool clocc_ends_this_row = inside_clocc &&
+ clocc_ends_in_slice_at(&scratch->Aoccs, scratch->Abasis,
+ A, i, &wlen.A, &kind.A);
+
+ for (j = 0; j < n; j++) {
+
+ size_t u = Kprev[j], ul = Kprev[j - 1], l = Kcur[j - 1];
+
+ if (get(A, i) == get(B, j))
+ ul++;
+
+ if (clocc_ends_this_row &&
+ clocc_ends_in_slice_at(&scratch->Boccs,
+ scratch->Bbasis, B, j,
+ &wlen.B, &kind.B) &&
+ kind.A == kind.B && /* match kind */
+ wlen.B <= j + 1 /* must not begin outside of B */) {
+ const size_t step = wlen.A * wlen.B + 1,
+ nscore = Kclocc[j - wlen.B] + step;
+ dbg_printf("%s visits clocc boundary at A[%d] "
+ "and B[%d] step %zu\n", __func__,
+ index_versus_basis(scratch->Abasis, A, i),
+ index_versus_basis(scratch->Bbasis, B, j),
+ step);
+ Kcur[j] = MAX(MAX(l, u),
+ MAX(ul, nscore));
+ } else
+ Kcur[j] = MAX(MAX(u, ul), l);
+ dbg_printf("%s set K[%zd][%zd] to %zu%s\n",
+ __func__, i, j, Kcur[j], inside_clocc ? "*" : "");
+ }
+ if (clocc_ends_this_row) {
+ dbg_printf("%s: row %zd, clocc ends\n", __func__, i);
+ inside_clocc = false;
+ }
+
+ Ktmp = Kprev;
+ Kprev = Kcur;
+ Kcur = Ktmp;
+ }
+ assert(Kprev[-1] == 0 && Kcur[-1] == 0 && Kclocc[-1] == 0);
+ Kprev--;
+ for (j = 0; j < n + 1; j++)
+ LL[j] = Kprev[j];
+}
+
+static const clocc_t *
+index_to_clocc_container(const cloccs_t *o, int i)
+{
+ const clocc_t *occ;
+ int l, m, r;
+
+ l = 0;
+ r = o->noccs - 1;
+
+ while (l <= r) {
+ m = (l + r) / 2;
+ occ = &o->occs[m];
+ if (i < occ->first)
+ r = m - 1;
+ else if (occ->last < i)
+ l = m + 1;
+ else
+ goto found;
+ }
+ return NULL;
+
+found:
+ return occ;
+}
+
+static int
+split_interval(cloccs_t *o, int origin, int len)
+{
+ struct {
+ int lo, mid, hi;
+ } bkpt;
+
+ bkpt.mid = origin + len / 2;
+
+ dbg_printf("searching [%d, %d] for split\n", origin, origin + len - 1);
+
+ const clocc_t *occ = index_to_clocc_container(o, bkpt.mid);
+
+ if (occ == NULL ||
+ occ->first < origin || origin + len < occ->last + 1 ||
+ (occ->first == origin && origin + len == occ->last + 1)) {
+ dbg_printf("split %d\n", bkpt.mid);
+ return bkpt.mid - origin;
+ }
+
+ bkpt.lo = MAX(occ->first, origin);
+ bkpt.hi = MIN(occ->last, origin + len - 1);
+
+ dbg_printf("split %d in [%d, %d] ([%d, %d])\n", bkpt.mid,
+ occ->first, occ->last, bkpt.lo, bkpt.hi);
+
+ if (bkpt.lo == origin) {
+ dbg_printf("split %d -> %d (l.%d)\n", bkpt.mid,
+ bkpt.hi + 1, __LINE__);
+ return bkpt.hi + 1 - origin;
+ }
+ if (bkpt.hi == origin + len - 1) {
+ dbg_printf("split %d -> %d (l.%d)\n", bkpt.mid, bkpt.lo,
+ __LINE__);
+ return bkpt.lo - origin;
+ }
+
+ if (bkpt.mid - bkpt.lo < bkpt.hi - bkpt.mid) {
+ dbg_printf("split %d -> %d (l.%d)\n", bkpt.mid, bkpt.lo,
+ __LINE__);
+ return bkpt.lo - origin;
+ }
+
+ dbg_printf("split %d -> %d (l.%d)\n", bkpt.mid, bkpt.hi + 1,
+ __LINE__);
+ return bkpt.hi + 1 - origin;
+}
+
+/* Find the widest clocc contained entirely in [origin + first, origin + last].
+ * Return its offset from origin at startp, and the last character's offset from
+ * origin at endp. Return the index of the clocc in `o', or
+ * -1 if no clocc matches.
+ */
+static int
+widest_clocc_in_interval(cloccs_t *o, int origin, int first, int last,
+ int *startp, int *endp, clocc_kind_t kind)
+{
+ int i, widest = -1, maxw = 0;
+
+ for (i = 0; i < o->noccs; i++) {
+ int w;
+
+ if (origin + first > o->occs[i].first)
+ continue;
+ if (o->occs[i].last > origin + last)
+ break;
+
+ if (o->occs[i].kind != kind)
+ continue;
+
+ w = o->occs[i].last - o->occs[i].first + 1;
+ if (w > maxw) {
+ maxw = w;
+ *startp = o->occs[i].first - origin;
+ *endp = o->occs[i].last - origin;
+ widest = i;
+ }
+ }
+ return widest;
+}
+
+static int
+interval_to_clocc_index(cloccs_t *o, int origin, int first, int last, clocc_kind_t *kindp)
+{
+ int i;
+
+ for (i = 0; i < o->noccs; i++) {
+ if (origin + first == o->occs[i].first &&
+ origin + last == o->occs[i].last) {
+ if (kindp != NULL)
+ *kindp = o->occs[i].kind;
+ return i;
+ }
+ }
+ return -1;
+}
+
+static inline chain_t *
+algc_return(const size_t m, const size_t n, const origin_t origin,
+ size_t *lcsp, size_t lcs, chain_t *c)
+{
+ if (lcsp != NULL) {
+ dbg_printf("%s([%d, %zu], [%d, %zu]) -> %zu\n", __func__,
+ origin.i, origin.i + m - 1,
+ origin.j, origin.j + n - 1, lcs);
+ *lcsp = lcs;
+ }
+
+ return c;
+}
+
+static chain_t *
+joinchains(chain_t *ac, chain_t *bc)
+{
+ chainelt_t *ace, *bce;
+
+ ace = TAILQ_LAST(&ac->c_head, chainhead);
+ bce = TAILQ_FIRST(&bc->c_head);
+
+ if (ace->ce_column == 0 && bce->ce_column == 0 &&
+ ace->ce_match_idx == -1 && bce->ce_match_idx == -1) {
+ char *c, *c1, *c2;
+ c1 = ace->ce_content;
+ c2 = bce->ce_content;
+ c = malloc(strlen(c1) + strlen(c2) + 1);
+ assert(c != NULL);
+ strcpy(c, c1);
+ strcat(c, c2);
+ free(c1);
+ free(c2);
+ ace->ce_content = c;
+ TAILQ_REMOVE(&bc->c_head, bce, ce_link);
+ TAILQ_CONCAT(&ac->c_head, &bc->c_head, ce_link);
+ free(bc);
+ } else {
+ TAILQ_CONCAT(&ac->c_head, &bc->c_head, ce_link);
+ free(bc);
+ }
+ return ac;
+}
+
+chain_t *
+algc(const slice_t *A, const slice_t *B, scratch_t *scratch,
+ const origin_t origin, const size_t expected_lcs, size_t *lcsp)
+{
+ chain_t *c1, *c2, *c;
+ unsigned int j, k;
+ const size_t m = length(A), n = length(B);
+ struct {
+ int A, B;
+ } ivlidx;
+ struct {
+ size_t topl;
+ size_t botr;
+ size_t tot;
+ } lcs = {.tot = 0};
+ const slice_t *B1n;
+ slice_t *A1i, *Amip1, *Bn1;
+ slice_t *B1k, *Aip1m, *Bkp1n;
+
+ dbg_printf("%s([%d, %zu], [%d, %zu])\n", __func__,
+ origin.i, origin.i + m - 1,
+ origin.j, origin.j + n - 1);
+
+ if (n == 0) {
+ assert(expected_lcs == no_expected_lcs || 0 == expected_lcs);
+ return algc_return(m, n, origin, lcsp, 0, empty());
+ }
+
+ clocc_kind_t kind;
+
+ ivlidx.A = interval_to_clocc_index(&scratch->Aoccs, origin.i, 0, m - 1, &kind);
+
+ /* If A is a clocc occurrence, must try to match A with a clocc in B! */
+ if (ivlidx.A != -1) {
+ int start = 0, end = 0;
+
+ ivlidx.B = widest_clocc_in_interval(&scratch->Boccs, origin.j,
+ 0, n - 1, &start, &end, kind);
+
+ if (ivlidx.B != -1) {
+ dbg_assert(expected_lcs == no_expected_lcs ||
+ (size_t)(end - start + 1) * m + 1 == expected_lcs);
+ return algc_return(m, n, origin, lcsp,
+ (size_t)(end - start + 1) * m + 1,
+ clocc(scratch, ivlidx.A, ivlidx.B));
+ }
+ }
+ if (m == 1) {
+ char A0 = get(A, 0);
+
+ if (ispresent(B, A0)) {
+ dbg_assert(expected_lcs == no_expected_lcs ||
+ 1 == expected_lcs);
+ return algc_return(m, n, origin, lcsp, 1,
+ newchain(strndup(&A0, 1), 0));
+ }
+
+ assert(expected_lcs == no_expected_lcs || 0 == expected_lcs);
+
+ return algc_return(m, n, origin, lcsp, 0, empty());
+ }
+
+ const unsigned int i = split_interval(&scratch->Aoccs, origin.i, m);
+ A1i = subslice(A, 0, i - 1);
+ B1n = B;
+ Amip1 = subslice(A, m - 1, i);
+ Bn1 = subslice(B, n - 1, 0);
+ algb(A1i, B1n, scratch, scratch->L1, true);
+ dbg_printf("%s called algb once\n", __func__);
+ algb(Amip1, Bn1, scratch, scratch->L2, true);
+ dbg_printf("%s called algb twice\n", __func__);
+
+ for (k = j = 0; j < n + 1; j++) {
+ if (scratch->L1[j] + scratch->L2[n - j] > lcs.tot) {
+ k = j;
+ lcs.topl = scratch->L1[j];
+ lcs.botr = scratch->L2[n - j];
+ lcs.tot = lcs.topl + lcs.botr;
+ }
+ }
+ dbg_printf("%s chose k %u (%zu - %u = %zu) lcs %zu\n", __func__,
+ k, n, k, n - k, lcs.tot);
+
+ assert(k != 0 || lcs.topl == 0);
+
+ if (expected_lcs != no_expected_lcs && lcs.tot != expected_lcs) {
+ struct {
+ char l[12], c, r[12];
+ } actx = {.l = "12345.12345", .r = "12345.12345"},
+ bctx = {.l = "12345.12345", .r = "12345.12345"};
+
+ dbg_printf("%s expected lcs %zu, got %zu + %zu = %zu at "
+ "[%d | %d + %u | %d + %zu, %d | %d + %u | %d + %zu]\n",
+ __func__, expected_lcs, lcs.topl, lcs.botr, lcs.tot,
+ origin.i, origin.i, i - 1, origin.i, m - 1,
+ origin.j, origin.j, k, origin.j, n - 1);
+
+ extract(A, i - MIN(i, 6), i - MIN(i, 2), actx.l);
+ actx.c = get(A, i - 1);
+ extract(A, i, MIN(i + 4, m - 1), actx.r);
+
+ dbg_printf("%s A context %s [ %c ] %s",
+ __func__, actx.l, actx.c, actx.r);
+
+ if (k > 0) {
+ extract(B, k - MIN(k, 6), k - MIN(k, 2), bctx.l);
+ bctx.c = get(B, k - 1);
+ extract(B, MIN(k, n - 1), MIN(k + 4, n - 1), bctx.r);
+
+ dbg_printf(", B context %s [ %c ] %s\n",
+ bctx.l, bctx.c, bctx.r);
+ } else
+ dbg_printf("\n");
+
+ dbg_printf("bailing\n");
+ algb(A, B, scratch, scratch->L1, true);
+ const slice_t *Am1 = subslice(A, m - 1, 0);
+ algb(Am1, Bn1, scratch, scratch->L2, true);
+
+ dbg_printf("algb(A, B) -> %zu fwd, -> %zu bwd\n",
+ scratch->L1[n], scratch->L2[n]);
+ abort();
+ }
+
+ size_t lcs1, lcs2;
+
+ if (k >= 1) {
+ B1k = subslice(B, 0, k - 1);
+ c1 = algc(A1i, B1k, scratch, origin, lcs.topl, &lcs1);
+ slice_free(B1k);
+ } else {
+ lcs1 = 0;
+ c1 = empty();
+ }
+
+ if (k < n) {
+ Aip1m = subslice(A, i, m - 1);
+ Bkp1n = subslice(B, k, n - 1);
+ c2 = algc(Aip1m, Bkp1n, scratch,
+ (origin_t){origin.i + i, origin.j + k}, lcs.botr, &lcs2);
+ slice_free(Bkp1n);
+ slice_free(Aip1m);
+ } else {
+ lcs2 = 0;
+ c2 = empty();
+ }
+
+ slice_free(A1i);
+ slice_free(Amip1);
+ slice_free(Bn1);
+ c = joinchains(c1, c2);
+ return algc_return(m, n, origin, lcsp, lcs1 + lcs2, c);
+}
+
+void
+scratch_init(scratch_t *scratch, size_t n, slice_t *Abasis, slice_t *Bbasis)
+{
+ scratch->L1 = malloc(sizeof(scratch->L1[0]) * (n + 1));
+ scratch->L2 = malloc(sizeof(scratch->L2[0]) * (n + 1));
+ scratch->K[0] = malloc(sizeof(scratch->K[0][0]) * (n + 1));
+ scratch->K[1] = malloc(sizeof(scratch->K[1][0]) * (n + 1));
+ scratch->Kclocc = malloc(sizeof(scratch->Kclocc[0]) * (n + 1));
+ scratch->Abasis = Abasis;
+ scratch->Bbasis = Bbasis;
+ scratch->expected_lcs = SIZE_T_MAX;
+}
+
+static int
+slicestr(const slice_t *s, const char *t)
+{
+ ssize_t i, n = length(s);
+ const char *p;
+
+ p = t;
+
+ for (i = 0; i < n; i++) {
+ if (get(s, i) != *p) {
+ p = t;
+ continue;
+ }
+ if (*++p == '\0')
+ return i - (p - t - 1);
+ }
+ return -1;
+}
+
+static void
+emit_mac_clocc(mac_detection_t *d, void *arg)
+{
+ cloccs_t *o = arg;
+ uint8_t addr[6];
+ int rc;
+
+ dbg_printf("found mac string %s @ [%d, %d]\n", d->d_string,
+ d->d_idx.start, d->d_idx.stop);
+
+ if (o->noccs >= (int)__arraycount(o->occs))
+ return;
+
+ rc = sscanf(d->d_string,
+ "%" SCNx8 ":%" SCNx8 ":%" SCNx8 ":%" SCNx8 ":%" SCNx8 ":%" SCNx8 "",
+ &addr[0], &addr[1], &addr[2], &addr[3], &addr[4], &addr[5]);
+
+ assert(rc == 6);
+
+ dbg_printf("converted ipv4 string %s @ [%d, %d] -> "
+ "%02" PRIx8 ":%02" PRIx8 ":%02" PRIx8 ":"
+ "%02" PRIx8 ":%02" PRIx8 ":%02" PRIx8 "\n",
+ d->d_string, d->d_idx.start, d->d_idx.stop,
+ addr[0], addr[1], addr[2], addr[3], addr[4], addr[5]);
+
+ o->occs[o->noccs].column = d->d_column.start;
+ o->occs[o->noccs].first = d->d_idx.start;
+ o->occs[o->noccs].last = d->d_idx.stop;
+ o->occs[o->noccs].kind = KIND_MACADDR;
+ o->occs[o->noccs].qual = QUAL_NONE;
+ memcpy(&o->occs[o->noccs].val_u.u_macaddr[0], &addr[0],
+ sizeof(o->occs[o->noccs].val_u.u_macaddr));
+ o->noccs++;
+}
+
+static void
+emit_ipv4_clocc(ipv4_detection_t *d, void *arg)
+{
+ cloccs_t *o = arg;
+ struct {
+ uint32_t net;
+ uint32_t host;
+ } addr;
+ int rc;
+
+ dbg_printf("found ipv4 string %s @ [%d, %d]\n", d->d_string,
+ d->d_idx.start, d->d_idx.stop);
+
+ if (o->noccs >= (int)__arraycount(o->occs))
+ return;
+
+ rc = inet_net_pton(AF_INET, d->d_string, &addr.net, sizeof(addr.net));
+
+ assert(rc != -1);
+
+ addr.host = ntohl(addr.net);
+
+ dbg_printf("converted ipv4 string %s @ [%d, %d] -> %08" PRIx32 "\n",
+ d->d_string, d->d_idx.start, d->d_idx.stop, addr.host);
+
+ o->occs[o->noccs].column = d->d_column.start;
+ o->occs[o->noccs].first = d->d_idx.start;
+ o->occs[o->noccs].last = d->d_idx.stop;
+ o->occs[o->noccs].kind = KIND_IPv4;
+ o->occs[o->noccs].qual = QUAL_NONE;
+ o->occs[o->noccs].val_u.u_ipv4 = addr.host;
+ o->noccs++;
+}
+
+static void
+emit_hex_clocc(hex_detection_t *d, void *arg, bool use0x)
+{
+ cloccs_t *o = arg;
+ intmax_t val;
+ char *end;
+ bool allcaps;
+
+ if (!use0x && strspn(d->d_digits, "0123456789") == strlen(d->d_digits))
+ return;
+
+ allcaps = strcspn(d->d_digits, "abcdef") == strlen(d->d_digits);
+
+ dbg_printf("found hex string %s @ [%d, %d]%s\n", d->d_digits,
+ d->d_idx.start, d->d_idx.stop, allcaps ? " all CAPS" : "");
+
+ if (o->noccs >= (int)__arraycount(o->occs))
+ return;
+
+ val = strtoimax(d->d_digits, &end, 16);
+
+ if (*end != '\0' && errno == ERANGE)
+ warnx("%s: over/underflow at %d", __func__, d->d_column.start);
+
+ o->occs[o->noccs].column = d->d_column.start;
+ o->occs[o->noccs].first = d->d_idx.start;
+ o->occs[o->noccs].last = d->d_idx.stop;
+ o->occs[o->noccs].kind = use0x ? KIND_INTMAX_0xHEX : KIND_INTMAX_HEX;
+ o->occs[o->noccs].qual = allcaps ? QUAL_ALLCAPS : QUAL_NONE;
+ o->occs[o->noccs].val_u.u_uintmax = val;
+ o->noccs++;
+}
+
+static int
+clocc_cmp(const void *l0, const void *r0)
+{
+ const clocc_t *l = l0, *r = r0;
+
+ return l->first - r->first;
+}
+
+static int
+clocc_width(const clocc_t *c)
+{
+ assert(c->last >= c->first);
+ return c->last - c->first + 1;
+}
+
+static void
+cloccs_dedup(cloccs_t *o)
+{
+ int i, j;
+
+ for (i = 1; i < o->noccs;) {
+ clocc_t *l, *r;
+ l = &o->occs[i - 1];
+ r = &o->occs[i];
+ if (l->last < r->first) {
+ i++;
+ continue;
+ }
+ if (clocc_width(r) > clocc_width(l))
+ *l = *r;
+ for (j = i + 1; j < o->noccs; j++)
+ o->occs[j - 1] = o->occs[j];
+ o->noccs--;
+ }
+}
+
+static void
+clocc_htbl_init(clocc_htbl_t *tbl)
+{
+ int i;
+
+ for (i = 0; i < (int)__arraycount(tbl->bucket); i++)
+ TAILQ_INIT(&tbl->bucket[i]);
+}
+
+static void
+clocc_htbl_insert(clocc_htbl_t *tbl, const slice_t *s, clocc_t *occ)
+{
+ int i = clocc_hash(s, occ) % __arraycount(tbl->bucket);
+ TAILQ_INSERT_HEAD(&tbl->bucket[i], occ, bucket);
+}
+
+void
+cloccs_hash(cloccs_t *o, const slice_t *s)
+{
+ int i;
+
+ for (i = 0; i < o->noccs; i++)
+ clocc_htbl_insert(&o->htbl, s, &o->occs[i]);
+}
+
+void
+cloccs_init(cloccs_t *o, const slice_t *s)
+{
+ hex_parser_t *hex_parser;
+ ipv4_parser_t *ipv4_parser;
+ mac_parser_t *mac_parser;
+ size_t column, i;
+ size_t n = length(s);
+ unsigned int j, k;
+ char digits[64 / 3 + 1]; /* XXX how many digits in an intmax_t? */
+
+ clocc_htbl_init(&o->htbl);
+
+ o->noccs = 0;
+ for (i = 0; i < __arraycount(names); i++) {
+ int loc;
+
+ loc = slicestr(s, names[i]);
+ if (loc == -1)
+ continue;
+
+ o->occs[o->noccs].first = loc;
+ o->occs[o->noccs].last = loc + strlen(names[i]) - 1;
+ o->occs[o->noccs].kind = KIND_STRING;
+ dbg_printf("found %s at %d - %d\n",
+ names[i], o->occs[o->noccs].first, o->occs[o->noccs].last);
+ o->noccs++;
+ }
+
+ mac_parser = mac_parser_alloc(emit_mac_clocc, o);
+ if (mac_parser == NULL)
+ goto parse_ipv4;
+
+ column = 0;
+ for (j = 0; j < n; j++) {
+ char c;
+
+ c = get(s, j);
+ ++column;
+ mac_parser_drive(mac_parser, j, column, c);
+ if (c == '\n')
+ column = 0;
+ }
+ mac_parser_drive(mac_parser, n, column + 1, -1);
+
+ mac_parser_free(mac_parser);
+
+parse_ipv4:
+ ipv4_parser = ipv4_parser_alloc(emit_ipv4_clocc, o);
+ if (ipv4_parser == NULL)
+ goto parse_hexadecimal;
+
+ column = 0;
+ for (j = 0; j < n; j++) {
+ char c;
+
+ c = get(s, j);
+ ++column;
+ ipv4_parser_drive(ipv4_parser, j, column, c);
+ if (c == '\n')
+ column = 0;
+ }
+ ipv4_parser_drive(ipv4_parser, n, column + 1, -1);
+
+ ipv4_parser_free(ipv4_parser);
+
+parse_hexadecimal:
+ hex_parser = hex_parser_alloc(emit_hex_clocc, o);
+ if (hex_parser == NULL)
+ goto parse_decimal;
+
+ column = 0;
+ for (j = 0; j < n; j++) {
+ char c;
+
+ c = get(s, j);
+ ++column;
+ hex_parser_drive(hex_parser, j, column, c);
+ if (c == '\n')
+ column = 0;
+ }
+ hex_parser_drive(hex_parser, n, column + 1, -1);
+
+ hex_parser_free(hex_parser);
+
+parse_decimal:
+
+ j = 0;
+ column = 0;
+ while (o->noccs < (int)__arraycount(o->occs)) {
+ intmax_t val;
+ char *end;
+ char c;
+
+ ++column;
+
+ if (j == n)
+ break;
+
+ c = get(s, j);
+ for (k = j + 1; k < n; k++) {
+ if (strchr("0123456789", get(s, k)) == NULL)
+ break;
+ }
+ if (c == '-') {
+ if (j == k - 1) {
+ j++;
+ continue;
+ }
+ } else if (strchr("0123456789", c) == NULL) {
+ if (c == '\n')
+ column = 0;
+ j++;
+ continue;
+ }
+
+ if (k - j > __arraycount(digits) - 1)
+ goto next;
+
+ extract(s, j, k - 1, digits);
+
+ val = strtoimax(digits, &end, 10);
+ if (*end == '\0') {
+ o->occs[o->noccs].column = column;
+ o->occs[o->noccs].first = j;
+ o->occs[o->noccs].last = k - 1;
+ o->occs[o->noccs].kind = KIND_INTMAX_DEC;
+ o->occs[o->noccs].val_u.u_intmax = val;
+ dbg_printf("found %jd at %d - %d\n",
+ val, o->occs[o->noccs].first,
+ o->occs[o->noccs].last);
+ o->noccs++;
+ } else if (errno == ERANGE)
+ warnx("%s: over/underflow at %d", __func__, j);
+next:
+ column += k - 1 - j;
+ j = k;
+ }
+ mergesort(o->occs, o->noccs, sizeof(*o->occs), clocc_cmp);
+ cloccs_dedup(o);
+}
+
+void
+printchain(chain_t *c)
+{
+ chainelt_t *ce;
+ int column = 1;
+
+ TAILQ_FOREACH(ce, &c->c_head, ce_link) {
+ char *found, *s;
+ int nextcolumn;
+
+ for (nextcolumn = column, s = ce->ce_content;
+ *s != '\0' && (found = strchr(s, '\n')) != NULL;
+ s = found + 1)
+ nextcolumn = 1;
+
+ nextcolumn += strlen(s);
+ while (ce->ce_column < 0 && column < -ce->ce_column) {
+ fputc(' ', stdout);
+ column++;
+ }
+ while (ce->ce_column > 0 && nextcolumn - 1 < ce->ce_column) {
+ fputc(' ', stdout);
+ nextcolumn++;
+ }
+ printf("%s", ce->ce_content);
+ column = nextcolumn;
+ }
+}
+
+void
+emit_transformed_text(chain_t *O, cloccs_t *Moccs, cloccs_t *Toccs,
+ const slice_t *match, const slice_t *transform)
+{
+ chainelt_t *ce;
+ int i, next;
+
+ TAILQ_FOREACH(ce, &O->c_head, ce_link) {
+ clocc_t *occ = &Moccs->occs[ce->ce_match_idx];
+
+ if (ce->ce_match_idx == -1)
+ continue;
+
+ if (occ->match == NULL)
+ occ->match = ce;
+ }
+
+ for (next = i = 0; i < Toccs->noccs; i++) {
+ clocc_t *Tocc = &Toccs->occs[i];
+ clocc_t *Mocc = clocc_cross_lookup(Moccs, match, transform,
+ Tocc);
+
+ for (; next < Tocc->first; next++) {
+ char c = get(transform, next);
+ fputc(c, stdout); // XXX check error
+ }
+
+ if (Mocc != NULL && Mocc->match != NULL) {
+ fprintf(stdout, "%s", Mocc->match->ce_content);
+ next = Tocc->last + 1;
+ } else
+ next = Tocc->first;
+ }
+
+ for (; next < (int)length(transform); next++) {
+ char c = get(transform, next);
+ fputc(c, stdout); // XXX check error
+ }
+}
Index: othersrc/external/bsd/arfe/dt/core.h
diff -u /dev/null othersrc/external/bsd/arfe/dt/core.h:1.1
--- /dev/null Tue Sep 22 01:12:09 2015
+++ othersrc/external/bsd/arfe/dt/core.h Tue Sep 22 01:12:09 2015
@@ -0,0 +1,201 @@
+/* $NetBSD: core.h,v 1.1 2015/09/22 01:12:09 dyoung Exp $ */
+/* $ARFE: core.h 250 2015-09-22 01:04:13Z dyoung $ */
+
+/*-
+ * Copyright (c) 2014,2015 David Young <[email protected]>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+#ifndef _CORE_H_
+#define _CORE_H_
+
+#include <sys/cdefs.h> /* for __printf_like */
+#include <assert.h>
+#include <stdarg.h> /* for va_list, va_start, va_end */
+#include <stdbool.h>
+#include <stdio.h>
+#include <inttypes.h>
+
+#include <sys/queue.h> /* for TAILQ_*() */
+
+#include "portability.h"
+
+static int dbg_printf(const char *, ...) __printflike(1, 2);
+static int dbg2_printf(const char *, ...) __printflike(1, 2);
+
+#if defined(HB_ASSERT)
+#define dbg_assert assert
+#else
+#define dbg_assert(__x) do { } while (false) /* do nothing */
+#endif
+
+#if defined(HB_DEBUG)
+static inline int
+dbg_printf(const char *fmt, ...)
+{
+ va_list ap;
+
+ va_start(ap, fmt);
+ const int rc = vfprintf(stderr, fmt, ap);
+ va_end(ap);
+ return rc;
+}
+
+static inline int
+dbg2_printf(const char *fmt, ...)
+{
+ va_list ap;
+
+ va_start(ap, fmt);
+ const int rc = vfprintf(stderr, fmt, ap);
+ va_end(ap);
+ return rc;
+}
+#else
+static inline int
+dbg_printf(const char *fmt, ...)
+{
+ return 0;
+}
+
+static inline int
+dbg2_printf(const char *fmt, ...)
+{
+ return 0;
+}
+#endif
+
+typedef struct origin {
+ int i;
+ int j;
+} origin_t;
+
+typedef struct slice {
+ const char *first;
+ const char *last;
+ bool backward;
+} slice_t;
+
+typedef enum {
+ KIND_NONE = 0
+ , KIND_INTMAX_DEC
+ , KIND_INTMAX_HEX
+ , KIND_INTMAX_0xHEX
+ , KIND_IPv4
+ , KIND_MACADDR
+ , KIND_STRING
+} clocc_kind_t;
+
+typedef enum {
+ QUAL_NONE = 0
+ , QUAL_ALLCAPS
+} clocc_qual_t;
+
+struct chainelt;
+typedef struct chainelt chainelt_t;
+
+typedef struct clocc {
+ int column;
+ int first;
+ int last;
+ clocc_kind_t kind;
+ clocc_qual_t qual;
+ union {
+ intmax_t u_intmax;
+ uintmax_t u_uintmax;
+ uint32_t u_ipv4;
+ uint8_t u_macaddr[6];
+ } val_u;
+ TAILQ_ENTRY(clocc) bucket;
+ chainelt_t *match;
+} clocc_t;
+
+struct chainelt {
+ TAILQ_ENTRY(chainelt) ce_link;
+ int ce_column; /* 0: no column alignment,
+ * -x: starts at column x (1 = first column)
+ * x: ends at column x (1 = first column)
+ */
+ char *ce_content;
+ int ce_match_idx;
+};
+
+typedef struct chain {
+ TAILQ_HEAD(chainhead, chainelt) c_head;
+} chain_t;
+
+TAILQ_HEAD(clocc_bucket, clocc);
+
+typedef struct clocc_bucket clocc_bucket_t;
+
+enum {
+ MATCH_TBL_IDX = 0
+ , TRANSFORM_TBL_IDX = 1
+ , NTBL = 2
+};
+
+typedef struct clocc_htbl {
+ clocc_bucket_t bucket[1024];
+} clocc_htbl_t;
+
+typedef struct cloccs {
+ int noccs;
+ clocc_t occs[1024];
+ clocc_htbl_t htbl;
+} cloccs_t;
+
+typedef struct scratch {
+ size_t *L1; /* n + 1 */
+ size_t *L2; /* n + 1 */
+ size_t *K[2]; /* 2 x (n + 1) */
+ size_t *Kclocc; /* n + 1 */
+ cloccs_t Aoccs, Boccs;
+ slice_t *Abasis, *Bbasis;
+ size_t expected_lcs;
+ char ipv4_op; /* 'p' for prefix (smallest containing subnet),
+ * 'l' for copy left argument
+ */
+ char macaddr_op;/* 'p' for prefix (leading octets in common),
+ * 'l' for copy left argument
+ */
+ char dec_op; /* '+' or '-' for add or subtract, 'l' for copy left
+ * argument
+ */
+ char hex_op; /* '&' or '|' for bitwise-AND and -OR, 'l' for copy
+ * left argument
+ */
+} scratch_t;
+
+extern const size_t no_expected_lcs;
+
+slice_t *file_to_slice(const char *);
+void emit_transformed_text(chain_t *, cloccs_t *, cloccs_t *,
+ const slice_t *, const slice_t *);
+void printchain(chain_t *);
+void cloccs_init(cloccs_t *, const slice_t *);
+void cloccs_hash(cloccs_t *, const slice_t *);
+chain_t *algc(const slice_t *, const slice_t *, scratch_t *, const origin_t,
+ const size_t, size_t *);
+void scratch_init(scratch_t *, size_t, slice_t *, slice_t *);
+ssize_t length(const slice_t *);
+
+#endif /* _CORE_H_ */
Index: othersrc/external/bsd/arfe/it/it.c
diff -u /dev/null othersrc/external/bsd/arfe/it/it.c:1.1
--- /dev/null Tue Sep 22 01:12:09 2015
+++ othersrc/external/bsd/arfe/it/it.c Tue Sep 22 01:12:09 2015
@@ -0,0 +1,90 @@
+/* $NetBSD: it.c,v 1.1 2015/09/22 01:12:09 dyoung Exp $ */
+/* $ARFE: dt.c 247 2015-09-14 02:52:57Z dyoung $ */
+
+/*-
+ * Copyright (c) 2014,2015 David Young <[email protected]>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+#include <ctype.h> /* for ispunct(), isspace(), isxdigit() */
+#include <err.h>
+#include <errno.h>
+#include <inttypes.h>
+#include <stdarg.h>
+#include <stdbool.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <limits.h>
+#include <string.h>
+
+#include <sys/param.h> /* for MAX() */
+#include <sys/queue.h> /* for TAILQ_*() */
+#include <sys/mman.h>
+#include <sys/stat.h>
+
+#include "core.h"
+
+static void
+usage(void)
+{
+ fprintf(stderr, "usage: %s string-a string-b\n", getprogname());
+ exit(EXIT_FAILURE);
+}
+
+int
+main(int argc, char **argv)
+{
+ slice_t *left, *match;
+ size_t n;
+ chain_t *O;
+ scratch_t scratch;
+ size_t lcs;
+
+ setprogname(argv[0]);
+
+ if (argc != 3)
+ usage();
+
+ left = file_to_slice(argv[1]);
+ match = file_to_slice(argv[2]);
+
+ n = length(match);
+ scratch_init(&scratch, n, left, match);
+ cloccs_init(&scratch.Aoccs, left);
+ cloccs_init(&scratch.Boccs, match);
+ cloccs_hash(&scratch.Boccs, match);
+
+ scratch.macaddr_op = 'p';
+ scratch.ipv4_op = 'p';
+ scratch.dec_op = '+';
+ scratch.hex_op = '|';
+
+ O = algc(left, match, &scratch, (origin_t){0, 0}, no_expected_lcs,
+ &lcs);
+
+ dbg_printf("lcs = %zu\n", lcs);
+ printchain(O);
+ dbg_printf("\n");
+
+ return EXIT_SUCCESS;
+}
Index: othersrc/external/bsd/arfe/tt/tt.c
diff -u /dev/null othersrc/external/bsd/arfe/tt/tt.c:1.1
--- /dev/null Tue Sep 22 01:12:09 2015
+++ othersrc/external/bsd/arfe/tt/tt.c Tue Sep 22 01:12:09 2015
@@ -0,0 +1,94 @@
+/* $NetBSD: tt.c,v 1.1 2015/09/22 01:12:09 dyoung Exp $ */
+/* $ARFE$ */
+
+/*-
+ * Copyright (c) 2014,2015 David Young <[email protected]>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+#include <ctype.h> /* for ispunct(), isspace(), isxdigit() */
+#include <err.h>
+#include <errno.h>
+#include <inttypes.h>
+#include <stdarg.h>
+#include <stdbool.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <limits.h>
+#include <string.h>
+
+#include <sys/param.h> /* for MAX() */
+#include <sys/queue.h> /* for TAILQ_*() */
+#include <sys/mman.h>
+#include <sys/stat.h>
+
+#include "core.h"
+
+static void
+usage(void)
+{
+ fprintf(stderr, "usage: %s string-a string-b\n", getprogname());
+ exit(EXIT_FAILURE);
+}
+
+int
+main(int argc, char **argv)
+{
+ slice_t *left, *match, *transform;
+ size_t n;
+ chain_t *O;
+ scratch_t scratch;
+ size_t lcs;
+ cloccs_t Toccs;
+
+ setprogname(argv[0]);
+
+ if (argc != 4)
+ usage();
+ /*FALLTHROUGH*/
+
+ left = file_to_slice(argv[1]);
+ match = file_to_slice(argv[2]);
+
+ n = length(match);
+ scratch_init(&scratch, n, left, match);
+ cloccs_init(&scratch.Aoccs, left);
+ cloccs_init(&scratch.Boccs, match);
+ cloccs_hash(&scratch.Boccs, match);
+
+ scratch.macaddr_op = 'l';
+ scratch.ipv4_op = 'l';
+ scratch.dec_op = 'l';
+ scratch.hex_op = 'l';
+ transform = file_to_slice(argv[3]);
+ cloccs_init(&Toccs, transform);
+
+ O = algc(left, match, &scratch, (origin_t){0, 0}, no_expected_lcs,
+ &lcs);
+
+ dbg_printf("lcs = %zu\n", lcs);
+ emit_transformed_text(O, &scratch.Boccs, &Toccs, match, transform);
+ dbg_printf("\n");
+
+ return EXIT_SUCCESS;
+}