[Toybox] [New Toy] hexdump/hd

Moritz Röhrich via Toybox Sat, 25 Sep 2021 16:34:14 -0700

Dear Mr Landley,

attached is a patch for a naive hexdump implementation. It can do hexadecimal,
octal, decimal and printable character output and will concatenate the input
if given multiple files. It also comes with some tests. I hope it meets quality
expectations, and I'd greatly appreciate feedback.


On a different note, I have been playing with lcov and toybox lately and it does
not seem to pick up coverage data correctly. For some commands it seems to work
ok, but for most commands it thinks the code never gets executed. Is this a
known issue? Are there linker or compiler knobs that need to be turned to give
correct coverage data?

Best regards, Moritz Röhrich

>From 8ababc17054689d63d0feb7291215f3e433b1851 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Moritz=20R=C3=B6hrich?= <mor...@ildefons.de>
Date: Thu, 19 Aug 2021 22:47:37 +0200
Subject: [PATCH] new toy: Simple hexdump implementation

- Add simple hexdump implementation
- Add tests for hexdump
---
 tests/hexdump.test     | 135 ++++++++++++++++++++++++++++++++++++
 toys/pending/hexdump.c | 153 +++++++++++++++++++++++++++++++++++++++++
 2 files changed, 288 insertions(+)
 create mode 100755 tests/hexdump.test
 create mode 100644 toys/pending/hexdump.c

diff --git a/tests/hexdump.test b/tests/hexdump.test
new file mode 100755
index 00000000..e319957c
--- /dev/null
+++ b/tests/hexdump.test
@@ -0,0 +1,135 @@
+#!/bin/bash
+
+[ -f testing.sh ] && . testing.sh
+
+testcmd "simple file" "input" "0000000 6973 706d 656c 000a\n0000007\n" "simple\\n" ""
+testcmd "simple file -b" "-b input" "0000000 163 151 155 160 154 145 012\n0000007\n" "simple\\n" ""
+testcmd "simple file -c" "-c input" "0000000   s   i   m   p   l   e  \\\\n\n0000007\n" "simple\\n" ""
+testcmd "simple file -d" "-d input" "0000000 26995 28781 25964 00010\n0000007\n" "simple\\n" ""
+testcmd "simple file -o" "-o input" "0000000 064563 070155 062554 000012\n0000007\n" "simple\\n" ""
+testcmd "simple file -x" "-x input" "0000000 6973 706d 656c 000a\n0000007\n" "simple\\n" ""
+
+testcmd \
+  "simple file canonical output -C" \
+  "-C input" \
+  "\
+00000000  73 69 6d 70 6c 65 0a                              |simple.|\n\
+00000007\n" \
+  "simple\n" \
+  ""
+testcmd \
+  "simple file canonical output -C multiline" \
+  "-C input" \
+  "\
+00000000  73 69 6d 70 6c 65 0a 62  61 72 66 6f 6f 62 61 72  |simple.barfoobar|\n\
+00000010  66 6f 6f 62 61 72 0a                              |foobar.|\n\
+00000017\n" \
+  "\
+simple\n\
+barfoobarfoobar\n" \
+  ""
+
+testcmd \
+  "head of file -n 10" \
+  "-n 10 input" \
+  "\
+0000000 6973 706d 656c 730a 6d69\n\
+000000a\n" \
+  "simple\nsimple\n" \
+  ""
+testcmd \
+  "skip head of file -s 10" \
+  "-s 10 input" \
+  "\
+000000a 6c70 0a65\n\
+000000e\n" \
+  "simple\nsimple\n" \
+  ""
+
+testcmd \
+  "squeeze repeating lines" \
+  "input" \
+  "\
+0000000 6161 6161 6161 6161 6161 6161 6161 0a61\n\
+*\n\
+0000070 6f66 006f\n\
+0000073\n" \
+  "\
+aaaaaaaaaaaaaaa\n\
+aaaaaaaaaaaaaaa\n\
+aaaaaaaaaaaaaaa\n\
+aaaaaaaaaaaaaaa\n\
+aaaaaaaaaaaaaaa\n\
+aaaaaaaaaaaaaaa\n\
+aaaaaaaaaaaaaaa\n\
+foo" \
+  ""
+testcmd \
+  "squeeze repeating lines" \
+  "input" \
+  "\
+0000000 6161 6161 6161 6161 6161 6161 6161 0a61\n\
+*\n\
+0000030 6262 6262 6262 6262 6262 6262 6262 0a62\n\
+0000040 6161 6161 6161 6161 6161 6161 6161 0a61\n\
+*\n\
+0000070 6262 6262 6262 6262 6262 6262 6262 0a62\n\
+0000080\n" \
+  "\
+aaaaaaaaaaaaaaa\n\
+aaaaaaaaaaaaaaa\n\
+aaaaaaaaaaaaaaa\n\
+bbbbbbbbbbbbbbb\n\
+aaaaaaaaaaaaaaa\n\
+aaaaaaaaaaaaaaa\n\
+aaaaaaaaaaaaaaa\n\
+bbbbbbbbbbbbbbb\n" \
+  ""
+testcmd \
+  "don't squeeze repeating lines" \
+  "-v input" \
+  "\
+0000000 6161 6161 6161 6161 6161 6161 6161 0a61\n\
+0000010 6161 6161 6161 6161 6161 6161 6161 0a61\n\
+0000020 6161 6161 6161 6161 6161 6161 6161 0a61\n\
+0000030 6161 6161 6161 6161 6161 6161 6161 0a61\n\
+0000040 6161 6161 6161 6161 6161 6161 6161 0a61\n\
+0000050 6161 6161 6161 6161 6161 6161 6161 0a61\n\
+0000060 6161 6161 6161 6161 6161 6161 6161 0a61\n\
+0000070 6f66 006f\n\
+0000073\n" \
+  "\
+aaaaaaaaaaaaaaa\n\
+aaaaaaaaaaaaaaa\n\
+aaaaaaaaaaaaaaa\n\
+aaaaaaaaaaaaaaa\n\
+aaaaaaaaaaaaaaa\n\
+aaaaaaaaaaaaaaa\n\
+aaaaaaaaaaaaaaa\n\
+foo" \
+  ""
+
+for _ in {1..25}; do echo "foobar" >> file1; done
+for _ in {1..25}; do echo "buzzbar" >> file2; done
+
+testcmd \
+  "accumulate offset accross files" \
+  "file1 file2" \
+  "0000000 6f66 626f 7261 660a 6f6f 6162 0a72 6f66\n\
+0000010 626f 7261 660a 6f6f 6162 0a72 6f66 626f\n\
+0000020 7261 660a 6f6f 6162 0a72 6f66 626f 7261\n\
+0000030 660a 6f6f 6162 0a72 6f66 626f 7261 660a\n\
+0000040 6f6f 6162 0a72 6f66 626f 7261 660a 6f6f\n\
+0000050 6162 0a72 6f66 626f 7261 660a 6f6f 6162\n\
+0000060 0a72 6f66 626f 7261 660a 6f6f 6162 0a72\n\
+0000070 6f66 626f 7261 660a 6f6f 6162 0a72 6f66\n\
+0000080 626f 7261 660a 6f6f 6162 0a72 6f66 626f\n\
+0000090 7261 660a 6f6f 6162 0a72 6f66 626f 7261\n\
+00000a0 660a 6f6f 6162 0a72 6f66 626f 7261 620a\n\
+00000b0 7a75 627a 7261 620a 7a75 627a 7261 620a\n\
+*\n\
+0000170 7a75 627a 7261 000a\n\
+0000177\n" \
+  "" \
+  ""
+rm file1 file2
diff --git a/toys/pending/hexdump.c b/toys/pending/hexdump.c
new file mode 100644
index 00000000..53bee903
--- /dev/null
+++ b/toys/pending/hexdump.c
@@ -0,0 +1,153 @@
+/* hexdump.c - Dump file content in hexadecimal format to stdout
+ *
+ * Copyright 2021 Moritz Röhrich <mor...@ildefons.de>
+ *
+ * No standard
+ *
+ * TODO:
+ *  - Implement format strings (see man (1) hexdump)
+
+USE_HEXDUMP(NEWTOY(hexdump, "bcCdn#<0os#<0vx[!bcCdox]", TOYFLAG_USR|TOYFLAG_BIN))
+USE_HD(OLDTOY(hd, hexdump, TOYFLAG_USR|TOYFLAG_BIN))
+
+config HEXDUMP
+  bool "hexdump"
+  default n
+  help
+    usage: hexdump [-b|-c|-C|-d|-o|-x] [-v] [-n bytes] [-s bytes] FILES
+
+    Dump file content in hexadecimal format to stdout.
+
+    -b            One-byte octal display
+    -c            One-byte character display
+    -C            Canonical (hex + ASCII) display
+    -d            Two-bytes decimal display
+    -n <bytes>    Dump only <bytes> bytes of the input
+    -o            Two-bytes octal display
+    -s <bytes>    Skip <bytes> bytes from the beginning of the input
+    -v            Do not squeeze identical lines in the output together
+    -x            Two-bytes hexadecimal display (default)
+
+config HD
+  bool "hd"
+  default HEXDUMP
+  help
+    See hexdump
+*/
+
+#define FOR_hexdump
+#include "toys.h"
+
+GLOBALS(
+    long s, n;
+    long long len, pos, ppos;
+    const char *fmt;
+    unsigned int fn, bc;  // file number and byte count
+    char linebuf[16];  // line buffer - serves double duty for sqeezing repeat
+                       // lines and for accumulating full lines accross file
+                       // boundaries if necessesary.
+)
+
+const char *make_printable(unsigned char byte) {
+  switch (byte) {
+    case '\0': return "\\0";
+    case '\a': return "\\a";
+    case '\b': return "\\b";
+    case '\t': return "\\t";
+    case '\n': return "\\n";
+    case '\v': return "\\v";
+    case '\f': return "\\f";
+    default: return "??";  // for all unprintable bytes
+  }
+}
+
+void do_hexdump(int fd, char *name) {
+  unsigned short block, adv, i;
+  int sl, fs;  // skip line, file size
+
+  TT.fn++;  // keep track of how many files have been printed.
+  // skipp ahead, if neccessary skip entire files:
+  if (FLAG(s) && (TT.s-TT.pos>0)) {
+    fs = xlseek(fd, 0L, SEEK_END);
+
+    if (fs < TT.s) {
+      TT.pos += fs;
+      TT.ppos += fs;
+    } else {
+      xlseek(fd, TT.s-TT.pos, SEEK_SET);
+      TT.ppos = TT.s;
+      TT.pos = TT.s;
+    }
+  }
+
+  for (sl = 0;
+       0 < (TT.len = readall(fd, toybuf,
+                             (TT.n && TT.s+TT.n-TT.pos<16-(TT.bc%16))
+                                ? TT.s+TT.n-TT.pos : 16-(TT.bc%16)));
+       TT.pos += TT.len) {
+    // This block compares the data read from file to the last line printed.
+    // If they don't match a new line is printed, else the line is skipped.
+    // If a * has already been printed to indicate a skipped line, printing the
+    // * is also skipped.
+    for (i = 0; i < 16 && i < TT.len; i++){
+      if (FLAG(v) || TT.len < 16 || toybuf[i] != TT.linebuf[i]) goto newline;
+    }
+    if (sl == 0) {
+      printf("*\n");
+      sl = 1;
+    }
+    TT.ppos += TT.len;
+    continue;
+
+newline:
+    strncpy(TT.linebuf+(TT.bc%16), toybuf, TT.len);
+    TT.bc = TT.bc % 16 + TT.len;
+    sl = 0;
+    if (TT.pos + TT.bc == TT.s+TT.n || TT.fn == toys.optc || TT.bc == 16) {
+      if (!FLAG(C) && !FLAG(c)) {
+        printf("%07llx", TT.ppos);
+        adv = FLAG(b) ? 1 : 2;
+        for (i = 0; i < TT.bc; i += adv) {
+          block = (FLAG(b) || i == TT.bc-1)
+            ? TT.linebuf[i] : (TT.linebuf[i] | TT.linebuf[i+1] << 8);
+          printf(TT.fmt, block);
+        }
+      } else if (FLAG(C)) {
+        printf("%08llx", TT.ppos);
+        for (i = 0; i < 16; i++) {
+          if (!(i % 8)) putchar(' ');
+          if (i < TT.bc) printf(" %02x", TT.linebuf[i]);
+          else printf("   ");
+        }
+        printf("  |");
+        for (i = 0; i < TT.bc; i++) {
+          if (TT.linebuf[i] < ' ' || TT.linebuf[i] > '~') putchar('.');
+          else putchar(TT.linebuf[i]);
+        }
+        putchar('|');
+      } else {
+        printf("%07llx", TT.ppos);
+        for (i = 0; i < TT.bc; i++) {
+          if (TT.linebuf[i] >= ' ' && TT.linebuf[i] <= '~')
+            printf("%4c", TT.linebuf[i]);
+          else printf("%4s", make_printable(TT.linebuf[i]));
+        }
+      }
+      putchar('\n');
+      TT.ppos += TT.bc;
+    }
+  }
+
+  if (TT.len < 0) perror_exit("read");
+}
+
+void hexdump_main(void) {
+  TT.fn = 0;
+  if FLAG(b) TT.fmt = " %03o";
+  else if FLAG(d) TT.fmt = " %05d";
+  else if FLAG(o) TT.fmt = " %06o";
+  else TT.fmt = " %04x";
+
+  loopfiles(toys.optargs, do_hexdump);
+  FLAG(C) ? printf("%08llx\n", TT.pos) : printf("%07llx\n", TT.pos);
+}
-- 
2.30.2

_______________________________________________
Toybox mailing list
Toybox@lists.landley.net
http://lists.landley.net/listinfo.cgi/toybox-landley.net

[Toybox] [New Toy] hexdump/hd

Reply via email to