lloda pushed a commit to branch main
in repository guile.
commit db74281dcdd4aa4051f0615b3a783f67eb95a8e6
Author: Rob Browning <[email protected]>
AuthorDate: Fri Apr 11 17:43:57 2025 -0500
Write srfi-207 bytestrings when (print-enable 'bytestrings)
* doc/ref/api-evaluation.texi: add bytestrings print option.
* doc/ref/srfi-modules.texi: mention bytestrings print option.
* libguile/bytevectors.c (scm_i_print_bytevector): print bytestrings
when bytestrings print option is enabled.
* libguile/print.c: add bytestrings print option.
* libguile/private-options.h: : add bytestrings print option.
* test-suite/tests/srfi-207.test: Add bytestring write tests.
---
doc/ref/api-evaluation.texi | 1 +
doc/ref/srfi-modules.texi | 8 ++--
libguile/bytevectors.c | 86 +++++++++++++++++++++++++++++++++++++++++-
libguile/print.c | 3 ++
libguile/private-options.h | 3 +-
test-suite/tests/srfi-207.test | 22 ++++++++++-
6 files changed, 116 insertions(+), 7 deletions(-)
diff --git a/doc/ref/api-evaluation.texi b/doc/ref/api-evaluation.texi
index 1b764ba53..3071cdce7 100644
--- a/doc/ref/api-evaluation.texi
+++ b/doc/ref/api-evaluation.texi
@@ -499,6 +499,7 @@ escape-newlines yes Render newlines as \n
when printing
using `write'.
r7rs-symbols no Escape symbols using R7RS |...| symbol
notation.
+bytestrings no Render bytevectors as bytestrings (SRFI-207)
@end smalllisp
These options may be modified with the print-set! syntax.
diff --git a/doc/ref/srfi-modules.texi b/doc/ref/srfi-modules.texi
index 6a07510bf..1cbb0c030 100644
--- a/doc/ref/srfi-modules.texi
+++ b/doc/ref/srfi-modules.texi
@@ -7425,9 +7425,11 @@ bytestrings and bytevectors are exactly the same type.
@subsubsection External Notation
@cindex bytestring notation
-The basic form of a string-notated bytevector is @code{#u8"CONTENT"}.
-The Scheme reader will read them if bytestrings are enabled via
-@code{(read-enable 'bytestrings)}.
+The basic form of a string-notated bytevector is
+@code{#u8"CONTENT"}. The Scheme reader will read them if bytestrings are
+enabled via @code{(read-enable 'bytestrings)}, and the Scheme writer
+will write them if they are enabled via @code{(print-enable
+'bytestrings)}.
To avoid character encoding issues within string-notated bytevectors,
only printable ASCII characters (that is, Unicode codepoints in the
diff --git a/libguile/bytevectors.c b/libguile/bytevectors.c
index 6b14c7246..3b984c6cd 100644
--- a/libguile/bytevectors.c
+++ b/libguile/bytevectors.c
@@ -26,6 +26,7 @@
#include <byteswap.h>
#include <intprops.h>
#include <errno.h>
+#include <stdio.h>
#include <striconveh.h>
#include <uniconv.h>
#include <unistr.h>
@@ -52,6 +53,7 @@
#include "numbers.h"
#include "pairs.h"
#include "ports.h"
+#include "private-options.h"
#include "srfi-4.h"
#include "strings.h"
#include "symbols.h"
@@ -488,8 +490,8 @@ scm_c_bytevector_set_x (SCM bv, size_t index, uint8_t value)
-int
-scm_i_print_bytevector (SCM bv, SCM port, scm_print_state *pstate SCM_UNUSED)
+static int
+print_bytevector (SCM bv, SCM port, scm_print_state *pstate SCM_UNUSED)
{
ssize_t ubnd, inc, i;
scm_t_array_handle h;
@@ -511,6 +513,86 @@ scm_i_print_bytevector (SCM bv, SCM port, scm_print_state
*pstate SCM_UNUSED)
return 1;
}
+
+
+static int
+print_bytestring (SCM bv, SCM port, scm_print_state *pstate SCM_UNUSED)
+{
+ assert (scm_is_bytevector (bv));
+
+ const size_t c_len = SCM_BYTEVECTOR_LENGTH (bv);
+ const uint8_t * const c_bv = (uint8_t *) SCM_BYTEVECTOR_CONTENTS (bv);
+
+ scm_puts ("#u8\"", port);
+ for (size_t i = 0; i < c_len; i++)
+ {
+ switch (c_bv[i])
+ {
+ case '\n': scm_puts("\\n", port); break;
+ case '\r': scm_puts("\\r", port); break;
+ case '"': scm_puts("\\\"", port); break;
+ case '\\': scm_puts("\\\\", port); break;
+ case '\t': scm_puts("\\t", port); break;
+ case '|': scm_puts("\\|", port); break;
+ case '\a': scm_puts("\\a", port); break;
+ case '\b': scm_puts("\\b", port); break;
+ case '\0': scm_puts("\\x0;", port); break;
+ case 0x01: scm_puts("\\x1;", port); break;
+ case 0x02: scm_puts("\\x2;", port); break;
+ case 0x03: scm_puts("\\x3;", port); break;
+ case 0x04: scm_puts("\\x4;", port); break;
+ case 0x05: scm_puts("\\x5;", port); break;
+ case 0x06: scm_puts("\\x6;", port); break;
+ case 0x0b: scm_puts("\\xb;", port); break;
+ case 0x0c: scm_puts("\\xc;", port); break;
+ case 0x0e: scm_puts("\\xe;", port); break;
+ case 0x0f: scm_puts("\\xf;", port); break;
+ case 0x10: scm_puts("\\x10;", port); break;
+ case 0x11: scm_puts("\\x11;", port); break;
+ case 0x12: scm_puts("\\x12;", port); break;
+ case 0x13: scm_puts("\\x13;", port); break;
+ case 0x14: scm_puts("\\x14;", port); break;
+ case 0x15: scm_puts("\\x15;", port); break;
+ case 0x16: scm_puts("\\x16;", port); break;
+ case 0x17: scm_puts("\\x17;", port); break;
+ case 0x18: scm_puts("\\x18;", port); break;
+ case 0x19: scm_puts("\\x19;", port); break;
+ case 0x1a: scm_puts("\\x1a;", port); break;
+ case 0x1b: scm_puts("\\x1b;", port); break;
+ case 0x1c: scm_puts("\\x1c;", port); break;
+ case 0x1d: scm_puts("\\x1d;", port); break;
+ case 0x1e: scm_puts("\\x1e;", port); break;
+ case 0x1f: scm_puts("\\x1f;", port); break;
+ default:
+ if (c_bv[i] <= 0x7e)
+ scm_putc (c_bv[i], port);
+ else
+ {
+ char digits[3];
+ const int n = snprintf(digits, 3, "%2x", c_bv[i]);
+ assert (n == 2);
+ scm_puts ("\\x", port);
+ scm_puts (digits, port);
+ scm_puts (";", port);
+ }
+ }
+ }
+ scm_putc ('"', port);
+ return 1;
+}
+
+
+
+int
+scm_i_print_bytevector (SCM bv, SCM port, scm_print_state *pstate SCM_UNUSED)
+{
+ if (!SCM_PRINT_BYTESTRINGS_P)
+ return print_bytevector(bv, port, pstate);
+ else
+ return print_bytestring(bv, port, pstate);
+}
+
+
/* General operations. */
diff --git a/libguile/print.c b/libguile/print.c
index b10f0f8a8..c85c3a66f 100644
--- a/libguile/print.c
+++ b/libguile/print.c
@@ -121,6 +121,9 @@ scm_t_option scm_print_opts[] = {
"Render newlines as \\n when printing using `write'." },
{ SCM_OPTION_BOOLEAN, "r7rs-symbols", 0,
"Escape symbols using R7RS |...| symbol notation." },
+ { SCM_OPTION_BOOLEAN, "bytestrings", 0,
+ "Render bytevectors as bytestrings (SRFI 207), "
+ "e.g. #u8\"\\xe2;\\x88;\\x9e; Improbability\"." },
{ 0 },
};
diff --git a/libguile/private-options.h b/libguile/private-options.h
index 9018532c0..e2d237fb3 100644
--- a/libguile/private-options.h
+++ b/libguile/private-options.h
@@ -48,7 +48,8 @@ SCM_INTERNAL scm_t_option scm_print_opts[];
#define SCM_PRINT_KEYWORD_STYLE (SCM_PACK (scm_print_opts[2].val))
#define SCM_PRINT_ESCAPE_NEWLINES_P scm_print_opts[3].val
#define SCM_PRINT_R7RS_SYMBOLS_P scm_print_opts[4].val
-#define SCM_N_PRINT_OPTIONS 5
+#define SCM_PRINT_BYTESTRINGS_P scm_print_opts[5].val
+#define SCM_N_PRINT_OPTIONS 6
/*
diff --git a/test-suite/tests/srfi-207.test b/test-suite/tests/srfi-207.test
index 735e92cd7..c7973b720 100644
--- a/test-suite/tests/srfi-207.test
+++ b/test-suite/tests/srfi-207.test
@@ -479,6 +479,13 @@
(dynamic-wind (λ () (read-enable 'bytestrings))
(λ () (read port))
(λ () (unless keep (read-disable 'bytestrings))))))))
+ (define (captured-bytestring-output f)
+ (call-with-output-string
+ (λ (port)
+ (let ((keep (memq 'bytestrings (print-options))))
+ (dynamic-wind (λ () (print-enable 'bytestrings))
+ (λ () (f port))
+ (λ () (unless keep (print-disable 'bytestrings))))))))
(pass-if-exception "reading when not enabled" exception:read-error
(call-with-input-string "#u8\"\\xe2;\\x88;\\x9e; Improbability\"" read))
@@ -487,4 +494,17 @@
(read-with-bytestrings-enabled "#u8\"\\xe2;\\x88;\\x9e; Improbability\""))
(pass-if-equal "reading all encodings"
all-decoded
- (read-with-bytestrings-enabled all-encoded)))
+ (read-with-bytestrings-enabled all-encoded))
+
+ (pass-if-equal "writing when not enabled"
+ "#vu8(226 136 158 32 73 109 112 114 111 98 97 98 105 108 105 116 121)"
+ (call-with-output-string
+ (λ (port) (write (string->utf8 "∞ Improbability") port))))
+ (pass-if-equal "writing when enabled"
+ "#u8\"\\xe2;\\x88;\\x9e; Improbability\""
+ (captured-bytestring-output
+ (λ (port) (write (string->utf8 "∞ Improbability") port))))
+ (pass-if-equal "writing all encodings"
+ all-encoded
+ (captured-bytestring-output
+ (λ (port) (write (u8-list->bytevector (iota 256)) port)))))