lloda pushed a commit to branch main
in repository guile.
commit 79c22b16752e70a9928c58626bd81e6b2f90d365
Author: Rob Browning <[email protected]>
AuthorDate: Thu Apr 10 14:08:22 2025 -0500
Add (srfi srfi-207) with read-textual-bytestring; enable tests
https://srfi.schemers.org/srfi-207/srfi-207.html
Add a (srfi srfi-207) module to integrate the upstream code and provide
the remaining elements like the parser which will eventually be moved to
and shared with the reader in (ice-9 read). Rewrite the relevant
functions to avoid needing list->generator, u8vector-for-each, and
u8vector-unfold.
* am/bootstrap.am: Add srfi-207 include-related deps.
(SOURCES): Add srfi-207.scm.
* module/srfi/srfi-207.scm: Add (srfi srfi-207) module.
* module/srfi/srfi-207/upstream/bytestrings-impl.scm
(hex-string->bytevector): Replace u8vector-unfold with loop.
(make-bytestring-generator): Drop list->generator.
(write-textual-bytestring): drop u8vector-for-each.
* test-suite/Makefile.am: Add srfi-207.test.
* test-suite/tests/srfi-207.test: port to (test-suite lib).
---
am/bootstrap.am | 6 +
module/srfi/srfi-207.scm | 220 +++++++++++++++++++++
module/srfi/srfi-207/upstream/bytestrings-impl.scm | 99 +++++-----
test-suite/Makefile.am | 1 +
test-suite/tests/srfi-207.test | 155 ++++++++-------
5 files changed, 361 insertions(+), 120 deletions(-)
diff --git a/am/bootstrap.am b/am/bootstrap.am
index 8faed0934..bec34ee1f 100644
--- a/am/bootstrap.am
+++ b/am/bootstrap.am
@@ -59,6 +59,11 @@ ice-9/match.go: ice-9/match.scm ice-9/match.upstream.scm
ice-9/psyntax-pp.go: ice-9/psyntax.scm ice-9/psyntax-pp.scm
$(COMPILE) -o "$@" "$<"
+srfi/srfi-207.go: \
+ srfi/srfi-207.scm \
+ srfi/srfi-207/upstream/base64.scm \
+ srfi/srfi-207/upstream/bytestrings-impl.scm
+
# All sources. We can compile these in any order; the order below is
# designed to hopefully result in the lowest total compile time.
SOURCES = \
@@ -352,6 +357,7 @@ SOURCES = \
srfi/srfi-171/gnu.scm \
srfi/srfi-171/meta.scm \
srfi/srfi-197.scm \
+ srfi/srfi-207.scm \
\
statprof.scm \
\
diff --git a/module/srfi/srfi-207.scm b/module/srfi/srfi-207.scm
new file mode 100644
index 000000000..b77016f3c
--- /dev/null
+++ b/module/srfi/srfi-207.scm
@@ -0,0 +1,220 @@
+;;;; SRFI 207: String-notated bytevectors
+;;;;
+;;;; Copyright (C) 2025 Free Software Foundation, Inc.
+;;;;
+;;;; This library is free software: you can redistribute it and/or
+;;;; modify it under the terms of the GNU Lesser General Public License
+;;;; as published by the Free Software Foundation, either version 3 of
+;;;; the License, or (at your option) any later version.
+;;;;
+;;;; This library is distributed in the hope that it will be useful, but
+;;;; WITHOUT ANY WARRANTY; without even the implied warranty of
+;;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+;;;; Lesser General Public License for more details.
+;;;;
+;;;; You should have received a copy of the GNU Lesser General Public
+;;;; License along with this program. If not, see
+;;;; <http://www.gnu.org/licenses/>.
+
+;;; Commentary:
+;;;
+;;; This is an implementation of SRFI 207: String-notated bytevectors.
+;;;
+;;; Code:
+
+(define-module (srfi srfi-207)
+ #:use-module ((ice-9 exceptions)
+ #:select (&error
+ define-exception-type
+ make-exception-with-message
+ make-exception-with-irritants))
+ #:use-module ((rnrs arithmetic bitwise) #:select (bitwise-and bitwise-ior))
+ #:use-module ((rnrs bytevectors)
+ #:select (bytevector->u8-list u8-list->bytevector))
+ #:use-module ((rnrs io ports) #:select (string->bytevector))
+ #:use-module ((scheme base)
+ #:select (binary-port?
+ bytevector
+ bytevector-copy
+ bytevector-copy!
+ bytevector-length
+ bytevector-u8-ref
+ bytevector-u8-set!
+ bytevector?
+ define-record-type
+ eof-object
+ get-output-bytevector
+ let-values
+ make-bytevector
+ open-output-bytevector
+ read-string
+ utf8->string
+ write-bytevector
+ write-string
+ write-u8))
+ #:use-module ((srfi srfi-1)
+ #:select (fold list-tabulate fold-right unfold unfold-right))
+ #:use-module ((srfi srfi-43) #:select (vector-unfold))
+ #:use-module ((srfi srfi-60) #:select (arithmetic-shift bit-field))
+ #:export (base64->bytevector
+ bytestring
+ bytestring->list
+ bytestring-break
+ bytestring-error?
+ bytestring-index
+ bytestring-index-right
+ bytestring-join
+ bytestring-pad
+ bytestring-pad-right
+ bytestring-replace
+ bytestring-span
+ bytestring-split
+ bytestring-trim
+ bytestring-trim-both
+ bytestring-trim-right
+ bytestring<=?
+ bytestring<?
+ bytestring>=?
+ bytestring>?
+ bytevector->base64
+ bytevector->hex-string
+ hex-string->bytevector
+ make-bytestring
+ make-bytestring!
+ make-bytestring-generator
+ read-textual-bytestring
+ write-binary-bytestring
+ write-textual-bytestring))
+
+(cond-expand-provide (current-module) '(srfi-207))
+
+;; From the upstream 207.sld library definition
+(define-syntax assume
+ (syntax-rules ()
+ ((_ pred) (unless pred (error "invalid assumption:" (quote pred))))
+ ((_ pred msg ...) (unless pred (error msg ...)))))
+
+(define-exception-type &bytestring-error &error
+ make-bytestring-error bytestring-error?)
+
+(define (bytestring-error message . irritants)
+ (raise-exception (make-exception (make-bytestring-error)
+ (make-exception-with-message message)
+ (make-exception-with-irritants irritants))))
+
+(include-from-path "srfi/srfi-207/upstream/base64.scm")
+(include-from-path "srfi/srfi-207/upstream/bytestrings-impl.scm")
+
+(define (read-bytestring-content port)
+ ;; Must use port, not (peek)/(next).
+ (let ((ch (read-char port)))
+ (when (eof-object? ch)
+ (bytestring-error "end of input instead of bytestring opening #\\\""))
+ (unless (eqv? ch #\")
+ (bytestring-error "expected bytestring opening #\\\"" ch)))
+ (let lp ((out '()))
+ (let ((ch (read-char port)))
+ (cond
+ ((eof-object? ch)
+ (bytestring-error "unexpected end of input while reading bytestring"))
+ ((eqv? ch #\")
+ (list->typed-array 'vu8 1 (reverse! out)))
+ ((eqv? ch #\\)
+ (let* ((ch (read-char port)))
+ (when (eof-object? ch)
+ (bytestring-error "unexpected end of input within escape
sequence"))
+ (case ch
+ ((#\a) (lp (cons 7 out)))
+ ((#\b) (lp (cons 8 out)))
+ ((#\t) (lp (cons 9 out)))
+ ((#\n) (lp (cons 10 out)))
+ ((#\r) (lp (cons 13 out)))
+ ((#\") (lp (cons 34 out)))
+ ((#\\) (lp (cons 92 out)))
+ ((#\|) (lp (cons 124 out)))
+ ((#\x)
+ (define (skip-prefix-zeros)
+ ;; Leave one zero before a ; to handle \x0;
+ (let ((ch (peek-char port)))
+ (cond
+ ((eof-object? ch) ch)
+ ((char=? ch #\0)
+ (let ((zero (read-char port)))
+ (if (char=? (peek-char port) #\;)
+ (unread-char zero port)
+ (skip-prefix-zeros)))))))
+ (define (read-hex which)
+ (let* ((h (read-char port)))
+ (when (eof-object? h)
+ (bytestring-error
+ (format #f "end of input at ~s bytestring hex escape char"
which)))
+ (case h
+ ((#\;) h)
+ ((#\0) 0)
+ ((#\1) 1)
+ ((#\2) 2)
+ ((#\3) 3)
+ ((#\4) 4)
+ ((#\5) 5)
+ ((#\6) 6)
+ ((#\7) 7)
+ ((#\8) 8)
+ ((#\9) 9)
+ ((#\a #\A) 10)
+ ((#\b #\B) 11)
+ ((#\c #\C) 12)
+ ((#\d #\D) 13)
+ ((#\e #\E) 14)
+ ((#\f #\F) 15)
+ (else
+ (bytestring-error
+ (format #f "non-hex ~a character in bytestring hex
escape" which)
+ h)))))
+ (skip-prefix-zeros)
+ (let* ((h1 (read-hex "first"))
+ (h2 (read-hex "second")))
+ (if (eqv? h2 #\;)
+ (lp (cons h1 out))
+ (let ((term (read-char port)))
+
+ (unless (char=? term #\;)
+ (bytestring-error "not bytestring hex escape semicolon"
term))
+ (lp (cons (+ (* 16 h1) h2) out))))))
+ (else ;; newline surrounded by optional interline blanks
+ (define (intraline? ch)
+ (and (char-whitespace? ch) (not (char=? ch #\newline))))
+ (define (skip-intraline)
+ (let ((ch (peek-char port)))
+ (when (and (not (eof-object? ch)) (intraline? ch))
+ (read-char port)
+ (skip-intraline))))
+ (cond
+ ((char=? ch #\newline) (skip-intraline) (lp out))
+ ((char-whitespace? ch)
+ (skip-intraline)
+ (unless (char=? (read-char port) #\newline)
+ (bytestring-error "expected newline after backslash and
optional spaces" ch))
+ (skip-intraline)
+ (lp out))
+ (else
+ (bytestring-error "unexpected character after bytesstring
backslash" ch)))))))
+ (else
+ (let ((i (char->integer ch)))
+ (unless (<= 20 i 127)
+ (bytestring-error "bytestring char not in valid ASCII range" ch))
+ (lp (cons i out))))))))
+
+(define read-textual-bytestring
+ (case-lambda
+ ((prefix) (read-textual-bytestring prefix (current-input-port)))
+ ((prefix in)
+ (unless (boolean? prefix)
+ (scm-error 'wrong-type-arg "read-textual-bytestring"
+ "Non-boolean prefix argument: ~s" (list prefix) (list
prefix)))
+ (when prefix
+ (let ((s (read-string 3 in)))
+ (cond ((eof-object? s)
+ (bytestring-error "end of input within bytestring content"))
+ ((string=? s "#u8") #t)
+ (else (bytestring-error "invalid bytestring prefix" s)))))
+ (read-bytestring-content in))))
diff --git a/module/srfi/srfi-207/upstream/bytestrings-impl.scm
b/module/srfi/srfi-207/upstream/bytestrings-impl.scm
index d30424867..fb8908b60 100644
--- a/module/srfi/srfi-207/upstream/bytestrings-impl.scm
+++ b/module/srfi/srfi-207/upstream/bytestrings-impl.scm
@@ -120,20 +120,21 @@
(integer->hex-string (bytevector-u8-ref bv i))))))
(define (hex-string->bytevector hex-str)
- (assume (string? hex-str))
- (let ((len (string-length hex-str)))
- (unless (even? len)
+ (unless (string? hex-str)
+ (bytestring-error "invalid hex-str argument" hex-str))
+ (let ((sn (string-length hex-str)))
+ (unless (even? sn)
(bytestring-error "incomplete hexadecimal string" hex-str))
- (u8vector-unfold
- (lambda (_ i)
- (let* ((end (+ i 2))
- (s (substring hex-str i end))
- (n (string->number s 16)))
- (if n
- (values n end)
- (bytestring-error "invalid hexadecimal sequence" s))))
- (truncate-quotient len 2)
- 0)))
+ (let* ((result (make-bytevector (/ sn 2))))
+ (do ((si 0 (+ si 2))
+ (vi 0 (1+ vi)))
+ ((= si sn) result)
+ (let* ((s (substring hex-str si (+ si 2)))
+ (n (string->number s 16)))
+ (unless n
+ (bytestring-error "invalid hexadecimal sequence in hex-str"
+ s hex-str))
+ (bytevector-u8-set! result vi n))))))
(define bytevector->base64
(case-lambda
@@ -176,26 +177,36 @@
(lambda (i) (+ i 1))
start))))
-;; Lazily generate the bytestring constructed from objs.
-(define (make-bytestring-generator . objs)
- (list->generator (flatten-bytestring-segments objs)))
-
-;; Convert and flatten chars and strings, and flatten bytevectors
-;; to yield a flat list of bytes.
-(define (flatten-bytestring-segments objs)
- (fold-right
- (lambda (x res)
- (cond ((and (exact-natural? x) (< x 256)) (cons x res))
- ((and (char? x) (char<=? x #\delete))
- (cons (char->integer x) res))
+(define (make-bytestring-generator . args)
+ "Return a thunk that returns the consecutive bytes, one per
+invocation, of the bytevector that (apply bytestring args) would
+produce. The elements of args are validated before
+make-bytestring-generator returns, and if invalid, an error satisfying
+bytestring-error? is raised."
+ (define (generate)
+ (if (null? args)
+ (eof-object)
+ (let ((x (car args)))
+ (cond
+ ((integer? x)
+ (set! args (cdr args))
+ x)
+ ((char? x)
+ (set! args (cdr args))
+ (char->integer x))
((bytevector? x)
- (append (bytevector->u8-list x) res))
- ((string-ascii? x)
- (append (map char->integer (string->list x)) res))
+ (set! args (append! (bytevector->u8-list x) (cdr args)))
+ (generate))
+ ((string? x)
+ (set! args (append! (string->list x) (cdr args)))
+ (generate))
(else
- (bytestring-error "invalid bytestring segment" x))))
- '()
- objs))
+ (bytestring-error "invalid bytestring segment" x))))))
+ (for-each (λ (arg)
+ (or (valid-bytestring-segment? arg)
+ (bytestring-error "invalid bytestring segment" arg)))
+ args)
+ generate)
;;;; Selection
@@ -475,19 +486,19 @@
((bstring port)
(parameterize ((current-output-port port))
(write-string "#u8\"")
- (u8vector-for-each
- (lambda (b)
- (cond ((assv b backslash-codepoints) =>
- (lambda (p)
- (write-char #\\)
- (write-char (cdr p))))
- ((and (>= b #x20) (<= b #x7e))
- (write-char (integer->char b)))
- (else
- (write-string "\\x")
- (write-string (number->string b 16))
- (write-char #\;))))
- bstring)
+ (do ((i 0 (1+ i)))
+ ((= i (bytevector-length bstring)))
+ (let ((b (bytevector-u8-ref bstring i)))
+ (cond ((assv b backslash-codepoints) =>
+ (lambda (p)
+ (write-char #\\)
+ (write-char (cdr p))))
+ ((and (>= b #x20) (<= b #x7e))
+ (write-char (integer->char b)))
+ (else
+ (write-string "\\x")
+ (write-string (number->string b 16))
+ (write-char #\;)))))
(write-char #\")))))
(define (write-binary-bytestring port . args)
diff --git a/test-suite/Makefile.am b/test-suite/Makefile.am
index ecf4a3175..492214845 100644
--- a/test-suite/Makefile.am
+++ b/test-suite/Makefile.am
@@ -162,6 +162,7 @@ SCM_TESTS = tests/00-initial-env.test \
tests/srfi-119.test \
tests/srfi-171.test \
tests/srfi-197.sr64 \
+ tests/srfi-207.test \
tests/srfi-4.test \
tests/srfi-9.test \
tests/statprof.test \
diff --git a/test-suite/tests/srfi-207.test b/test-suite/tests/srfi-207.test
index b5c55cbf7..8f21b1d73 100644
--- a/test-suite/tests/srfi-207.test
+++ b/test-suite/tests/srfi-207.test
@@ -1,4 +1,7 @@
+;;;; srfi-207.test --- SRFI 207 test suite -*- scheme -*-
+
;;; Copyright (C) 2020 Wolfgang Corcoran-Mathe
+;;; Copyright (C) 2025 Free Software Foundation, Inc.
;;;
;;; Permission is hereby granted, free of charge, to any person obtaining a
;;; copy of this software and associated documentation files (the
@@ -19,63 +22,65 @@
;;; TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
;;; SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
-(import (scheme base))
-(import (scheme write))
-(import (srfi 207))
-(import (only (srfi 1) list-tabulate every))
-
-(cond-expand
- ((library (srfi 158))
- (import (only (srfi 158) generator->list)))
- (else
- (begin
- (define (generator->list gen)
- (let rec ((x (gen)))
- (if (eof-object? x)
- '()
- (cons x (rec (gen)))))))))
-
-(cond-expand
- ((library (srfi 78))
- (import (srfi 78)))
- (else
- (begin
- (define *tests-failed* 0)
- (define-syntax check
- (syntax-rules (=>)
- ((check expr => expected)
- (if (equal? expr expected)
- (begin
- (display 'expr)
- (display " => ")
- (display expected)
- (display " ; correct")
- (newline))
- (begin
- (set! *tests-failed* (+ *tests-failed* 1))
- (display "FAILED: for ")
- (display 'expr)
- (display " expected ")
- (display expected)
- (display " but got ")
- (display expr)
- (newline))))))
- (define (check-report)
- (if (zero? *tests-failed*)
- (begin
- (display "All tests passed.")
- (newline))
- (begin
- (display "TESTS FAILED: ")
- (display *tests-failed*)
- (newline)))))))
+(define-module (srfi-207-test)
+ #:use-module ((srfi srfi-1) #:select (every list-tabulate))
+ #:use-module ((srfi srfi-207)
+ #:select (base64->bytevector
+ bytestring
+ bytestring->list
+ bytestring-break
+ bytestring-error?
+ bytestring-index
+ bytestring-index-right
+ bytestring-join
+ bytestring-pad
+ bytestring-pad-right
+ bytestring-replace
+ bytestring-span
+ bytestring-split
+ bytestring-trim
+ bytestring-trim-both
+ bytestring-trim-right
+ bytestring<=?
+ bytestring<?
+ bytestring>=?
+ bytestring>?
+ bytevector->base64
+ bytevector->hex-string
+ hex-string->bytevector
+ make-bytestring
+ make-bytestring!
+ make-bytestring-generator
+ read-textual-bytestring
+ write-binary-bytestring
+ write-textual-bytestring))
+ #:use-module ((srfi srfi-34) #:select (guard))
+ #:use-module ((test-suite lib)
+ #:select (pass-if-equal pass-if-exception with-test-prefix))
+ #:use-module ((scheme base)
+ #:select (bytevector
+ bytevector-length
+ bytevector-u8-ref
+ get-output-bytevector
+ make-bytevector
+ open-output-bytevector
+ utf8->string)))
+
+;; No srfi-158
+(define (generator->list gen)
+ (let rec ((x (gen)))
+ (if (eof-object? x)
+ '()
+ (cons x (rec (gen))))))
+
+(define-syntax check
+ (syntax-rules (=>)
+ ((check expr => expected)
+ (pass-if-equal expected expr))))
;;;; Utility
-(define (print-header message)
- (newline)
- (display (string-append ";;; " message))
- (newline))
+(define (print-header message) #t)
(define-syntax constantly
(syntax-rules ()
@@ -96,7 +101,9 @@
(syntax-rules ()
((_ expr)
(guard (condition ((bytestring-error? condition) 'bytestring-error)
- (else #f))
+ (else
+ (format (current-error-port) "exception: ~s\n"
condition)
+ #f))
expr))))
;; Testing shorthand for write-binary-bytestring.
@@ -128,16 +135,19 @@
(define test-bstring (bytestring "lorem"))
(define homer
- (bytestring "The Man, O Muse, informe, who many a way / \
- Wound in his wisedome to his wished stay;"))
+ (bytestring
+ (string-append "The Man, O Muse, informe, who many a way / "
+ "Wound in his wisedome to his wished stay;")))
(define homer64
- "VGhlIE1hbiwgTyBNdXNlLCBpbmZvcm1lLCB3aG8gbWFueSBhIHdheSAvIFdvd\
- W5kIGluIGhpcyB3aXNlZG9tZSB0byBoaXMgd2lzaGVkIHN0YXk7")
+ (string-append
+ "VGhlIE1hbiwgTyBNdXNlLCBpbmZvcm1lLCB3aG8gbWFueSBhIHdheSAvIFdvd"
+ "W5kIGluIGhpcyB3aXNlZG9tZSB0byBoaXMgd2lzaGVkIHN0YXk7"))
(define homer64-w
- "VGhlIE1hb iwgTyBNdXNlL CBpbmZvcm1lL\nCB3aG8gbWF\tueSBhIH\rdheSAvIFdvd\
- W5kIGluI GhpcyB 3aXNlZ\t\t\nG9tZSB0b yBoaXMgd\t2lzaGVkIHN0YXk7")
+ (string-append
+ "VGhlIE1hb iwgTyBNdXNlL CBpbmZvcm1lL\nCB3aG8gbWF\tueSBhIH\rdheSAvIFdvd"
+ "W5kIGluI GhpcyB 3aXNlZ\t\t\nG9tZSB0b yBoaXMgd\t2lzaGVkIHN0YXk7"))
;;;; Constructors
@@ -169,14 +179,14 @@
(check (bytevector->base64 test-bstring) => "bG9yZW0=")
(check (bytevector->base64 #u8(#xff #xef #xff)) => "/+//")
(check (bytevector->base64 #u8(#xff #xef #xff) "*@") => "@*@@")
- (check (equal? (bytevector->base64 homer) homer64) => #t)
+ (check (bytevector->base64 homer) => homer64)
(check (bytevector->base64 #u8(1)) => "AQ==")
(check (bytevector->base64 #u8()) => "")
(check (base64->bytevector "bG9yZW0=") => test-bstring)
(check (base64->bytevector "/+//") => #u8(#xff #xef #xff))
(check (base64->bytevector "@*@@" "*@") => #u8(#xff #xef #xff))
- (check (equal? (base64->bytevector homer64) homer) => #t)
- (check (equal? (base64->bytevector homer64-w) homer) => #t)
+ (check (base64->bytevector homer64) => homer)
+ (check (base64->bytevector homer64-w) => homer)
(check (base64->bytevector "AQ==") => #u8(1))
(check (base64->bytevector "") => #u8())
(check (base64->bytevector "\n\n\n==\t\r\n") => #u8())
@@ -215,8 +225,7 @@
(check (catch-bytestring-error (make-bytestring-generator "λ" #\m #\u))
=> 'bytestring-error)
(check (catch-bytestring-error (make-bytestring-generator 89 90 300))
- => 'bytestring-error)
-)
+ => 'bytestring-error))
(define (check-selection)
(print-header "Running selection tests...")
@@ -268,8 +277,7 @@
(check (bytestring-replace bv1 bv2 2 2 0 5) => (bytestring "food food")))
(let ((bv1 (bytestring "food food")))
(check (bytestring-replace bv1 (bytevector) 2 7 0 0)
- => (bytestring "food")))
-)
+ => (bytestring "food"))))
(define (check-comparison)
(define short-bstring (bytestring "lore"))
@@ -292,8 +300,7 @@
(check (bytestring>=? test-bstring short-bstring) => #t)
(check (bytestring>=? test-bstring mixed-case-bstring) => #t)
(check (bytestring>=? mixed-case-bstring test-bstring) => #f)
- (check (bytestring>=? short-bstring test-bstring) => #f)
-)
+ (check (bytestring>=? short-bstring test-bstring) => #f))
(define (check-searching)
(define (eq-r? b) (= b #x72))
@@ -425,8 +432,7 @@
(every (lambda (bvec)
(equal? bvec (parse-SNB/prefix (%bytestring->SNB bvec))))
test-bstrings)
- => #t))
-)
+ => #t)))
(define (check-all)
(check-constructor)
@@ -436,9 +442,6 @@
(check-comparison)
(check-searching)
(check-join-and-split)
- (check-io)
-
- (newline)
- (check-report))
+ (check-io))
(check-all)