On 2018-03-02 00:56, Mark H Weaver wrote:

I would not consider Guile's 'read' to be trustworthy when processing
potentially malicious inputs.

      Mark

Thanks for the input.

FWIW, I've written a procedure 'read' that is AFAIK safe but can
_fail_ on malicious input.

It can read:

- strings,
- booleans,
- simple numbers like 123456,
- rationals like 1/4 and
- simple symbols .ie unlike what is generated by 'gensym'

I attached to this mail all the files. It based on the stream
library I've been working on and a simple parser combinator
library based on it.

The main issue I see is that combinatorix doesn't take a port
as input. I could probably turn a port into a functional stream
but right now I don't need it. Patch welcome :)

Also I greatly improved error reporting compared to my last
attempt at building a parser combinator. Now the library
will tell you the failing parser with the argument that were
passed to it and the char line and column that triggered the
error.

For instance, given the following definitions:

  (define parse-a (parse-xchar #\a))
  (define parse-b (parse-xchar #\b))
  (define parse-c (parse-xchar #\c))

  (define parse-abc (each parse-a parse-b parse-c))

If you run the parser like that:

  (pk (parse parse-abc "ab©"))

You get the following exception:

neon/read.scm:75:4: Throw to key `combinatorix' with args `(#<<error> value: <xchar #\© [1,3] @ 2> parser: #<procedure parse-xchar (char)> args: #\c>)'.

That is parse-xchar #\c that is failing. Things could be improved
but its encouraging I think.

For info on parser combinators see https://epsil.github.io/gll/


Happy hacking!
(define-module (neon read))

(use-modules ((ice-9 match)))
(use-modules ((ice-9 rdelim)))
(use-modules ((neon combinatorix)))


(define char-set:lisp-delimiters
  (char-set-union char-set:whitespace
                  (char-set #\( #\) #\[ #\] #\{ #\})))

(define char-set:number-digits (char-set #\0 #\1 #\2 #\3 #\4 #\5 #\6 #\7 #\8 
#\9))

(define char-set:lisp-symbol
  (char-set-complement char-set:lisp-delimiters))

(define %space #(ws))

(define (not-space? v)
  (not (eq? v %space)))

(define parse-whitespace (lift (const %space)
                               (one-or-more (parse-char-set 
char-set:whitespace))))

(define (xchars->string xchars)
  (list->string (map xchar-char xchars)))

(define parse-string (lift (match-lambda ((dq1 xchars dq2) (xchars->string 
xchars)))
                           (each (parse-xchar #\")
                                 (zero-or-more (either (lift (lambda (x) (cadr 
x))
                                                               (each 
(parse-xchar #\\) (parse-xchar #\")))
                                                       (otherwise (parse-xchar 
#\") any)))
                                 (parse-xchar #\"))))

(define parse-boolean (either (lift (const #f) (each (parse-xchar #\#) 
(parse-xchar #\f)))
                              (lift (const #t) (each (parse-xchar #\#) 
(parse-xchar #\t)))))

(define parse-symbol (lift
                      (compose string->symbol xchars->string)
                      (one-or-more (parse-char-set char-set:lisp-symbol))))


(define parse-rational (lift (match-lambda ((a b c) (string->number
                                                     (string-append 
(xchars->string a)
                                                                    "/"
                                                                    
(xchars->string c)))))
                             (each (one-or-more (parse-char-set 
char-set:number-digits))
                                   (parse-xchar #\/)
                                   (one-or-more (parse-char-set 
char-set:number-digits)))))

(define parse-number (lift (compose string->number xchars->string)
                                   (one-or-more (parse-char-set 
char-set:number-digits))))

(define parse-open-paren (lift (const #f) (parse-xchar #\()))
(define parse-close-paren (lift (const #f) (parse-xchar #\))))

(define parse-exp (lift cadr
                        (each parse-open-paren
                              (lift (lambda (x) (filter not-space? x))
                                    (zero-or-more (either parse-exp
                                                          parse-boolean
                                                          parse-rational
                                                          parse-number
                                                          parse-string
                                                          parse-symbol
                                                          parse-whitespace)))
                              parse-close-paren)))

(define exp '(proc (string-append "héllo" "world" "with a \"") 123 #t #f 1/4))

;; (pk (equal? (parse parse-exp (pk (call-with-output-string (lambda (port) 
(write exp port))))) exp))

(define-public (string->scm string)
  (parse parse-exp string))

(define-public read (compose string->scm read-string))
(define-module (neon streams))

(use-modules (ice-9 match))
(use-modules (srfi srfi-26))

;;; Comments:
;;
;; - 2018/02/21: imported from guile-wiredtiger grf3 library, changed
;; the name to 'streams'
;;
;; - 2018/02/25: replace the use of 'throw' and 'cons' with 'values'
;; because it is faster
;;

(define-public (list->stream lst)
  (let loop ((lst lst))
    (lambda ()
      (if (null? lst)
          (values #f #f)
          (values (car lst) (loop (cdr lst)))))))

(define-public (stream->list stream)
  (let loop ((stream stream)
             (out '()))
    (call-with-values stream
      (lambda (value next)
        (if next
            (loop next (cons value out))
            (reverse! out))))))

(define-public stream-null
  (lambda ()
    (values #f #f)))

(define-public (stream-null? stream)
  (call-with-values stream
    (lambda (value next)
      (eq? next #f))))

(define-public (stream-car stream)
  (call-with-values stream
    (lambda (value next)
      value)))

(define-public (stream-cdr stream)
  (call-with-values stream
    (lambda (value next)
      next)))

(define-public (stream-map proc stream)
  (let loop ((stream stream))
    (lambda ()
      (call-with-values stream
        (lambda (value next)
          (if next
              (values (proc value) (loop next))
              (values #f #f)))))))

(define-public (stream-for-each proc stream)
  (let loop ((stream stream))
    (call-with-values stream
      (lambda (value next)
        (when next
          (proc value)
          (loop next))))))

(define-public (stream-filter predicate? stream)
  (let loop1 ((stream stream))
    (lambda ()
      (let loop2 ((stream stream))
        (call-with-values stream
          (lambda (value next)
            (if next
                (if (predicate? value)
                    (values value (loop1 next))
                    (loop2 next))
                (values #f #f))))))))

(define-public (stream-append . streams)
  (cond
   ((null? streams) (lambda () (values #f #f)))
   ;; wanna be fast path for the common case, if there is single
   ;; stream, return the first stream
   ((null? (cdr streams)) (car streams))
   ;; otherwise, unroll each stream...
   (else (let loop1 ((streams streams))
           (if (null? streams)
               (lambda () (values #f #f))
               (let loop2 ((stream (car streams)))
                 (call-with-values stream
                   (lambda (value next)
                     (if next
                         (lambda () (values value (loop2 next)))
                         (loop1 (cdr streams)))))))))))


;; (define-public (stream-take count stream)
;;   (let loop ((stream stream)
;;              (count count))
;;     (lambda ()
;;       (if (eq? count 0)
;;           '()
;;           (match (stream)
;;             ('() '())
;;             ((item . next) (cons item (loop next (1- count)))))))))

;; (define-public (stream-drop count stream)
;;   (let loop ((stream stream)
;;              (count count))
;;     (lambda ()
;;       (match (stream)
;;         ('() '())
;;         ((item . next) (if (eq? count 0)
;;                            (cons item (loop next 0))
;;                            ((loop next (1- count)))))))))


;; (define-public (stream-paginator count stream)
;;   (throw 'stream "not implemented error"))

(define-public (stream-length stream)
  (let loop ((stream stream)
             (count 0))
    (call-with-values stream
      (lambda (value next)
        (if next
            (loop next (+ 1 count))
            count)))))

;; (define-public (stream-scatter stream)
;;   "Take a stream of lists and returns a stream made of all the
;;    elements of all the lists. parents are inherited."
;;   (let loop ((stream stream)
;;              (lst '())
;;              (parents '()))
;;     (lambda ()
;;       (if (null? lst)
;;           (match (stream)
;;             ('() '())
;;             ((item . next)
;;              (let ((lst (car item))
;;                    (parents (cdr item)))
;;                (if (null? lst)
;;                    ((loop next '() '()))
;;                    (cons (cons (car lst) parents)
;;                          (loop next (cdr lst) parents))))))
;;           (cons (cons (car lst) parents)
;;                 (loop stream (cdr lst) parents))))))

;; (define-public (stream-unique stream)
;;   (let ((seen '()))  ;; TODO: replace with a hash table
;;     (let loop1 ((stream stream))
;;       (lambda ()
;;         (let loop2 ((stream stream))
;;           (match (stream)
;;             ('() '())
;;             ((item . next) (if (list-index (cut equal? <> (car item)) seen)
;;                                (loop2 next)
;;                                (begin (set! seen (cons (car item) seen))
;;                                       (cons item (loop1 next)))))))))))

(define-public (stream-group predicate? proc stream)
  "Return a new stream of stream values from STREAM. STREAM must be sorted.
Values from STREAM are grouped according to PROC. The value returned by
PROC must be comparable with PREDICATE?."
  (define (%stream-next stream key)
    ;; TODO: maybe memoize that procedure, because if the underlying
    ;; stream is a cursor stream (see cursor->stream) it leads to
    ;; multiple cursor-key-set + cursor-search which can be expensive.

    ;; XXX: This only called in the case where the previous stream was
    ;; not fully consumed ie. next-group-callback is replaced in most
    ;; cases by a lambda returning a value without computation, see
    ;; %stream-group procedure.
    (let loop ((stream stream))
      (call-with-values stream
        (lambda (value next)
          (if next
              (if (predicate? (proc value) key)
                  (loop next)
                  (lambda () (values value next))) ;; next-group
              (lambda () (values #f #f))))))) ;; end of stream

  (define (%stream-group stream key)
    ;; worst case scenario, stream was not consumed, but the user
    ;; request the next group

    ;; TODO: use make-paramater instead of set!
    (let ((next-group-callback (lambda () (%stream-next stream key))))
      (values (let loop ((stream stream))
                (lambda ()
                  (call-with-values stream
                    (lambda (value next)
                      (if next
                          (if (predicate? (proc value) key)
                              ;; save advance stream
                              (begin (set! next-group-callback (lambda () 
(%stream-next next key)))
                                     (values value (loop next)))
                              (and (set! next-group-callback (lambda () 
stream)) ;; next group
                                   (values #f #f))) ;; end of group stream
                          (and (set! next-group-callback (lambda () (lambda () 
(values #f #f)))) ;; end of stream
                               (values #f #f))))))) ;; end of group
              (lambda () (next-group-callback)))))



  (let loop ((stream (lambda () stream)))
    (lambda ()
      ;; the whole thing must appear pure, but depending on whether a
      ;; group is consumed, the next stream code path changes. That's
      ;; why, the loop's STREAM is wrapped in lambda as a thunk, to
      ;; allow 'next-group-stream' callback returned by %stream-group,
      ;; to return the correct/current next-group-callback depending
      ;; on whether the next group stream was computed or not and
      ;; compute it if it wasn't computed. TBH I am not sure this is
      ;; the right level of lambda nesting. It seems like there is too
      ;; much callback.
      (call-with-values (stream)
        (lambda (value next)
          (if next
              (call-with-values (lambda () (%stream-group (stream) (proc 
value)))
                (lambda (group next-group-stream)
                  (values group (loop next-group-stream))))
              (values #f #f)))))))


;; (define (hash-increment ht key)
;;   (let ((value (hash-ref ht key)))
;;     (if (not value)
;;         (hash-set! ht key 1)
;;         (hash-set! ht key (1+ value)))))

;; (define-public (stream-group-count stream)
;;   (let ((groups (make-hash-table)))
;;     (let loop ((stream stream))
;;       (match (stream)
;;         ('() (sort (hash-map->list cons groups) (lambda (a b) (> (cdr a) 
(cdr b)))))
;;         ((item . next)
;;          (hash-increment groups (car item))
;;          (loop next))))))

(define-public (stream-sort stream less?)
  (list->stream (sort! (stream->list stream) less?))) ;; TODO: improve 
preformance with a binary tree
;;; combinatorix
;;;
;;; Copyright © 2018 Amirouche Boubekki <amirou...@hypermove.net>
;;;
;;; This module is free software; you can redistribute it and/or
;;; modify it under the terms of the GNU Lesser General Public License
;;; as published by the Free Software Foundation; either version 3 of
;;; the License, or (at your option) any later version.
;;;
;;; This module is distributed in the hope that it will be useful, but
;;; WITHOUT ANY WARRANTY; without even the implied warranty of
;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
;;; Lesser General Public License for more details.
;;;
;;; You should have received a copy of the GNU Lesser General Public
;;; License along with this module.  If not, see
;;; <http://www.gnu.org/licenses/>.

;;; Commentary:
;;
;; Parser combinators.
;;
;; TODO:
;;
;;  - improve error handling
;;
;; Also see:
;;
;; - https://epsil.github.io/gll/
;; - https://docs.racket-lang.org/parsack/index.html
;; - https://docs.racket-lang.org/megaparsack/
;; - https://git.dthompson.us/guile-parser-combinators.git
;; - https://gitlab.com/tampe/stis-parser
;;
;;; Code:
(define-module (neon combinatorix))

(use-modules ((srfi srfi-9)))
(use-modules ((srfi srfi-9 gnu)))
(use-modules ((ice-9 match)))

(use-modules ((neon streams)))


;; test macro
(define-syntax-rule (test-check test-name expr expected)
  (when (getenv "DEBUG")
    (format #t "* ~a: " test-name)
    (let ((expr* expr)
          (expected* expected))
      (if (equal? expr* expected*)
          (format #t "PASS :)\n")
          (begin
            (format #t "FAILED :(\n")
            (format #t "** expected: ~s\n" expected*)
            (format #t "** found: ~s\n" expr*))))))

(define-record-type <result>
  (make-result value stream)
  result?
  (value result-value)
  (stream result-stream))

(define-record-type <error>
  (make-error value parser args)
  error?
  (value error-value)
  (parser error-parser)
  (args error-args))

(define continue make-result)
(define (fail stream parser args)
  (make-error (stream-car stream) parser args))

(define-record-type <xchar>
  (make-xchar char line column offset)
  xchar?
  (char xchar-char)
  (line xchar-line)
  (column xchar-column)
  (offset xchar-offset))

(define-public (lift proc parser)
  "Apply PROC to the result of PARSER"
  (lambda (stream)
    (match (parser stream)
      (($ <result> value stream) (continue (proc value) stream))
      (else else))))

(export xchar-char)

(define (xchar-format xchar port)
  (format port "<xchar ~s [~a,~a] @ ~a>"
          (xchar-char xchar)
          (xchar-line xchar)
          (xchar-column xchar)
          (xchar-offset xchar)))

(set-record-type-printer! <xchar> xchar-format)

(define (string->xchar-stream string)
  ;; TODO: optimize
  (let loop ((chars (string->list string))
             (line 1)
             (column 1)
             (offset 0)
             (out '()))
    (if (null? chars)
        (list->stream (reverse! out))
        (if (eq? (car chars) #\newline)
            (loop (cdr chars)
                  (+ 1 line)
                  1
                  (+ 1 offset)
                  (cons (make-xchar #\newline line column offset) out))
            (loop (cdr chars)
                  line
                  (+ 1 column)
                  (+ 1 offset)
                  (cons (make-xchar (car chars) line column offset) out))))))

(define-public (parse parser string)
  (match (parser (string->xchar-stream string))
    (($ <result> value (? stream-null? stream)) value)
    (else (throw 'combinatorix else))))

(define-public (parse-xchar char)
  (lambda (stream)
    (call-with-values stream
      (lambda (value next)
        (if next
            (if (char=? (xchar-char value) char)
                (continue value next)
                (fail stream parse-xchar char))
            (fail stream parse-xchar char))))))

(test-check "parse-xchar"
  (xchar-char (parse (parse-xchar #\c) "c"))
  #\c)

(define (either2 one two)
  (lambda (stream)
    (let ((result (one stream)))
      (if (result? result)
          result
          (two stream)))))

(test-check "either2 az 1"
  (xchar-char (parse (either2 (parse-xchar #\a) (parse-xchar #\z)) "a"))
  #\a)

(test-check "either2 az 2"
  (xchar-char (parse (either2 (parse-xchar #\a) (parse-xchar #\z)) "z"))
  #\z)

(define (each2 one two)
  (lambda (stream)
    (match (one stream)
      (($ <result> a next0)
       (match (two next0)
         (($ <result> b next1)
          (continue (cons a b) next1))
         (else else)))
      (else else))))

(test-check "each2 az"
  ((match-lambda ((a . b) (cons (xchar-char a) (xchar-char b))))
   (parse (each2 (parse-xchar #\a) (parse-xchar #\z)) "az"))
  (cons #\a #\z))

(test-check "each2+either2 ae"
  ((match-lambda ((a . b) (cons (xchar-char a) (xchar-char b))))
   (parse (each2 (either2 (parse-xchar #\a) (parse-xchar #\z))
                (parse-xchar #\e))
          "ae"))
  (cons #\a #\e))

(test-check "each2+either2 ze"
  ((match-lambda ((a . b) (cons (xchar-char a) (xchar-char b))))
   (parse (each2 (either2 (parse-xchar #\a) (parse-xchar #\z))
                (parse-xchar #\e))
          "ze"))
  (cons #\z #\e))

(define (%either . parsers)
  (lambda (stream)
    (let loop ((parsers parsers))
      (if (null? parsers)
          (fail stream %either (map (lambda (x) (x)) parsers))
          (let ((continue (((car parsers)) stream)))
            (if (result? continue)
                continue
                (loop (cdr parsers))))))))

(define-syntax-rule (either parser ...)
  (%either (lambda () parser) ...))

(export either)

(test-check "either abc 1"
  (xchar-char (parse (either (parse-xchar #\a) (parse-xchar #\b) (parse-xchar 
#\c))
                     "a"))
  #\a)

(test-check "either abc 2"
  (xchar-char (parse (either (parse-xchar #\a) (parse-xchar #\b) (parse-xchar 
#\c))
                     "b"))
  #\b)

(test-check "either abc 3"
  (xchar-char (parse (either (parse-xchar #\a) (parse-xchar #\b) (parse-xchar 
#\c))
                     "c"))
  #\c)

(define (%each . parsers)
  (lambda (stream)
    (let loop ((parsers parsers)
               (stream stream)
               (out '()))
      (if (null? parsers)
          (continue (reverse! out) stream)
          (match (((car parsers)) stream)
            (($ <result> value stream) (loop (cdr parsers) stream (cons value 
out)))
            (else else))))))

(define-syntax-rule (each parser ...)
  (%each (lambda () parser) ...))

(export each)

(test-check "each abc"
  (list->string (map xchar-char (parse (each (parse-xchar #\a)
                                             (parse-xchar #\b)
                                             (parse-xchar #\c)
                                             (parse-xchar #\d))
                                       "abcd")))
  "abcd")

(define-public (zero-or-more parser)
  (lambda (stream)
    (let loop ((stream stream)
               (out '()))
      (match (parser stream)
        (($ <result> value next)
         (loop next (cons value out)))
        (else (continue (reverse! out) stream))))))

(test-check "zero or more 1"
  (list->string (map xchar-char (parse (zero-or-more (parse-xchar #\a)) "aaa")))
  "aaa")

(test-check "zero or more 2"
  (list->string (map xchar-char (parse (zero-or-more (parse-xchar #\a)) "")))
  "")

(define-public (one-or-more parser)
  (lift (lambda (x) (apply cons x)) (each parser (zero-or-more parser))))

(test-check "one or more"
  (list->string (map xchar-char (parse (one-or-more (parse-xchar #\a)) "aaa")))
  "aaa")

(define-public (otherwise predicate parser)
  (lambda (stream)
    (if (error? (predicate stream))
        (parser stream)
        (fail stream predicate parser))))

(test-check "recursive lift..."
    (letrec ((recursive (lift (lambda (a) (if (pair? a) a (list a)))
                              (either
                               (lift (lambda (a) (apply cons* a))
                                     (each (parse-xchar #\a) (parse-xchar #\b) 
(parse-xchar #\c) recursive))
                               (parse-xchar #\x)))))
      (list->string (map xchar-char (parse recursive "abcabcx"))))
  "abcabcx")

(define-public (parse-char-set char-set)
  (lambda (stream)
    (call-with-values stream
      (lambda (value next)
        (if next
            (if (char-set-contains? char-set (xchar-char value))
                (continue value next)
                (fail stream parse-char-set char-set))
            (fail stream parse-char-set char-set))))))

(define-public any
  (lambda (stream)
    (call-with-values stream
      (lambda (value next)
        (if next
            (continue value next)
            (fail stream any '()))))))

Reply via email to