From: Rutger van Beusekom <[email protected]>
This allows production of incomplete parse trees, without errors, e.g.,
for code completion.
* module/ice-9/peg/codegen.scm (%peg:fall-back?): New exported
parameter.
(%enable-expect, %continuation, %final-continuation): New parameter.
(final-continuation): New function.
(cg-or-rest): New function.
(cg-and-int): Recover from expectation failures, fall-back by skipping
forward or escalating upward.
(cg-*): Prepare fall-back %continuation.
* test-suite/tests/peg.test ("Fall-back parser"): Test it.
* doc/ref/api-peg.texi (PEG Internals): Document it.
Co-authored-by: Janneke Nieuwenhuizen <[email protected]>
fall-back
fall-back
fallback
---
doc/ref/api-peg.texi | 14 ++++
module/ice-9/peg.scm | 5 +-
module/ice-9/peg/codegen.scm | 146 +++++++++++++++++++++++++++--------
test-suite/tests/peg.test | 24 +++++-
4 files changed, 151 insertions(+), 38 deletions(-)
diff --git a/doc/ref/api-peg.texi b/doc/ref/api-peg.texi
index 733cb1c6d..4c96b2acf 100644
--- a/doc/ref/api-peg.texi
+++ b/doc/ref/api-peg.texi
@@ -1116,3 +1116,17 @@ language. Putting a @code{#} behind a terminal or
non-terminal
indicates that its parsing must succeed, otherwise an exception is
thrown containing the current parser state providing a hook to produce
informative parse errors.
+
+@subsubheading Fallback parsing
+
+A natural extension to expect parsing is fallback parsing. It is
+enabled by setting parameter @var{%peg:fall-back?} to @code{#t}.
+Fallback parsing is implemented by catching the exception thrown by the
+expect operator. At this point the parser attempts to recover its state
+by eating away at the input until the input runs out or until one of the
+grammar continuations matches and parsing continues regularly.
+
+When error occurs, @var{%peg:error} is invoked.
+
+@deffn {Scheme Procedure} %peg:error str line-number column-number error-type
error
+@end deffn
diff --git a/module/ice-9/peg.scm b/module/ice-9/peg.scm
index fd9dce54c..aa7ddc743 100644
--- a/module/ice-9/peg.scm
+++ b/module/ice-9/peg.scm
@@ -25,13 +25,15 @@
;; peg-sexp-compile.
#:use-module (ice-9 peg simplify-tree)
#:use-module (ice-9 peg using-parsers)
- #:use-module (ice-9 peg cache)
+
#:re-export (define-peg-pattern
define-peg-string-patterns
define-skip-parser
%peg:debug?
+ %peg:fall-back?
%peg:locations?
%peg:skip?
+ %peg:error
match-pattern
search-for-pattern
compile-peg-pattern
@@ -43,4 +45,3 @@
peg:tree
peg:substring
peg-record?))
-
diff --git a/module/ice-9/peg/codegen.scm b/module/ice-9/peg/codegen.scm
index 458a7e3ab..642f31c63 100644
--- a/module/ice-9/peg/codegen.scm
+++ b/module/ice-9/peg/codegen.scm
@@ -23,9 +23,12 @@
add-peg-compiler!
define-skip-parser
%peg:debug?
+ %peg:error
+ %peg:fall-back?
%peg:locations?
%peg:skip?)
+ #:use-module (srfi srfi-1)
#:use-module (ice-9 pretty-print)
#:use-module (system base pmatch))
@@ -60,6 +63,8 @@ return EXP."
(set! lst (cons obj lst)))))
+(define %peg:fall-back? (make-parameter #f)) ;; public interface, enable
fall-back parsing
+
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;;;; CODE GENERATORS
;; These functions generate scheme code for parsing PEGs.
@@ -169,6 +174,71 @@ return EXP."
((eq? accum 'none) 'none)))
(define baf builtin-accum-filter)
+(define (final-continuation str strlen at) #f)
+
+(define %continuation (make-parameter final-continuation))
+
+(define %fall-back-skip-at (make-parameter #f))
+
+;;Fallback parsing is triggered by a syntax-error exception
+;;the 'at' parameter is then pointing to "incomplete or erroneous" input
+;;and moves ahead in the input until one of the continuations
+;;of the production rules in the current callstack matches the input at that
point.
+;;At this point parsing continues regularly, but with an incomplete or
erroneous parse tree.
+;;If none of the continuations match then parsing fails without a result.
+;;The operators involved for determining a continuation are: '(+ * and)
+;;operator / is naturally not combined with the use of #
+;;operators '(! &) may be considered later, since they may prove useful as
asserts
+
+(define (format-error error str)
+ "Return procedure with two parameters (FROM TO) that formats parser
+exception ERROR (offset . error) according using the source text in STR
+and collects it using procedure (%peg:error)."
+ (define (get-error-type from to)
+ (if (< from to)
+ 'expected
+ 'error))
+ (lambda (from to)
+ (let* ((error-type (get-error-type from to))
+ (error-pos (caar error))
+ (line-number (1+ (string-count str #\newline 0 error-pos)))
+ (col-number (- error-pos
+ (or (string-rindex str #\newline 0 error-pos) -1))))
+ ((%peg:error) str line-number col-number error-type error))))
+
+(define* (fall-back-skip kernel #:optional sequence?)
+ (if (not (%peg:fall-back?)) kernel
+ (lambda (str strlen start)
+ (catch 'syntax-error
+ (lambda _
+ (kernel str strlen start))
+ (lambda (key . args)
+ (let* ((expected (cadar args))
+ (format-error (format-error args str)))
+ (let loop ((at start))
+ (cond ((or (= at strlen)
+ ;; TODO: decide what to do; inspecting at might not
be enough?!!
+ (unless (and (%fall-back-skip-at)
+ (eq? (%fall-back-skip-at) at))
+ (parameterize ((%fall-back-skip-at at))
+ ((%continuation) str strlen at))))
+ (format-error start at)
+ (if sequence? `(,at ()) `(,at (,expected))))
+ (else
+ (let ((res (false-if-exception (kernel str strlen (1+
at)))))
+ (if res
+ (begin
+ (format-error (or (string-index str
(char-set-complement char-set:whitespace) start at) start) at)
+ res)
+ (loop (1+ at)))))))))))))
+
+
+(define (partial-match kernel sym)
+ (lambda (str strlen at)
+ (catch 'syntax-error
+ (lambda _ (kernel str strlen at))
+ (lambda (key . args) (and (< at (caar args)) (car args))))))
+
;; Top-level function builder for AND. Reduces to a call to CG-AND-INT.
(define (cg-and clauses accum)
#`(lambda (str len pos)
@@ -181,8 +251,17 @@ return EXP."
(()
(cggr accum 'cg-and #`(reverse #,body) at))
((first rest ...)
- #`(let ((res (#,(compile-peg-pattern #'first accum) #,str #,strlen #,at)))
- (and res
+ #`(let* ((next #,(cg-or #'(rest ...) 'body))
+ (kernel #,(compile-peg-pattern #'first accum))
+ (res (parameterize
+ ((%continuation
+ (let ((after-that (%continuation)))
+ (lambda (str strlen at)
+ (or ((partial-match next 'next) str strlen at)
+ ((partial-match after-that 'after-that)
+ str strlen at))))))
+ ((fall-back-skip kernel) #,str #,strlen #,at))))
+ (and res
;; update AT and BODY then recurse
(let ((newat (car res))
(newbody (cadr res)))
@@ -207,42 +286,40 @@ return EXP."
(define (cg-* args accum)
(syntax-case args ()
((pat)
- #`(lambda (str strlen at)
- (let ((body '()))
- (let lp ((end at) (count 0))
- (let* ((match (#,(compile-peg-pattern #'pat (baf accum))
- str strlen end))
- (new-end (if match (car match) end))
- (count (if (> new-end end) (1+ count) count)))
- (if (> new-end end)
- (push-not-null! body (single-filter (cadr match))))
- (if (and (> new-end end)
- #,#t)
- (lp new-end count)
- (let ((success #,#t))
- #,#`(and success
- #,(cggr (baf accum) 'cg-body
- #'(reverse body) #'new-end)))))))))))
+ #`(let* ((kernel #,(compile-peg-pattern #'pat (baf accum)))
+ (kleene (lambda (str strlen at)
+ (let ((body '()))
+ (let lp ((end at) (count 0))
+ (let* ((match ((fall-back-skip kernel #t) str
strlen end))
+ (new-end (if match (car match) end))
+ (count (if (> new-end end) (1+ count)
count)))
+ (when (> new-end end)
+ (push-not-null! body (single-filter (cadr
match))))
+ (if (and (> new-end end) #,#t) (lp new-end count)
+ (let ((success #,#t))
+ #,#`(and success
+ #,(cggr (baf accum) 'cg-body
+ #'(reverse body)
#'new-end))))))))))
+ kleene))))
(define (cg-+ args accum)
(syntax-case args ()
((pat)
- #`(lambda (str strlen at)
- (let ((body '()))
- (let lp ((end at) (count 0))
- (let* ((match (#,(compile-peg-pattern #'pat (baf accum))
- str strlen end))
- (new-end (if match (car match) end))
- (count (if (> new-end end) (1+ count) count)))
- (if (> new-end end)
- (push-not-null! body (single-filter (cadr match))))
- (if (and (> new-end end)
- #,#t)
- (lp new-end count)
- (let ((success #,#'(>= count 1)))
- #,#`(and success
- #,(cggr (baf accum) 'cg-body
- #'(reverse body) #'new-end)))))))))))
+ #`(let* ((kernel #,(compile-peg-pattern #'pat (baf accum)))
+ (multiple (lambda (str strlen at)
+ (let ((body '()))
+ (let lp ((end at) (count 0))
+ (let* ((match ((fall-back-skip kernel #t) str
strlen end))
+ (new-end (if match (car match) end))
+ (count (if (> new-end end) (1+ count)
count)))
+ (when (> new-end end)
+ (push-not-null! body (single-filter (cadr
match))))
+ (if (and (> new-end end) #,#t) (lp new-end
count)
+ (let ((success #,#'(>= count 1)))
+ #,#`(and success
+ #,(cggr (baf accum) 'cg-body
+ #'(reverse body)
#'new-end))))))))))
+ multiple))))
(define (cg-? args accum)
(syntax-case args ()
@@ -351,6 +428,7 @@ return EXP."
;; Packages the results of a parser
+(define %peg:error (make-parameter (const #f)))
(define %peg:debug? (make-parameter #f))
(define %peg:locations? (make-parameter #f))
(define %peg:skip? (make-parameter (lambda (str strlen at) `(,at ()))))
diff --git a/test-suite/tests/peg.test b/test-suite/tests/peg.test
index 4f267f561..8a20cda41 100644
--- a/test-suite/tests/peg.test
+++ b/test-suite/tests/peg.test
@@ -6,6 +6,7 @@
(define-module (test-suite test-peg)
#:use-module (srfi srfi-1)
+ #:use-module (srfi srfi-26)
#:use-module (test-suite lib)
#:use-module (ice-9 peg)
#:use-module (ice-9 pretty-print))
@@ -310,8 +311,7 @@ trace-grammar := \"foobarbarbaz\" next: \"\"
"expect-grammar <-- one two three / .*
one <-- 'one'#
two <-- 'two'#
-three <-- 'three'"
-)
+three <-- 'three'")
(with-test-prefix "Parsing expect"
(pass-if-equal "expect okay"
@@ -379,3 +379,23 @@ baz
(%peg:locations? #t))
(match-pattern trace-grammar program-text))
peg:tree)))
+
+(with-test-prefix "Fall-back parser"
+ (pass-if-equal "only one"
+ '(expect-grammar "one")
+ (and=> (parameterize ((%peg:skip? peg-skip)
+ (%peg:fall-back? #t))
+ (match-pattern expect-grammar "one"))
+ peg:tree))
+ (pass-if-equal "no two"
+ '(expect-grammar (one "one") (three "three"))
+ (and=> (parameterize ((%peg:skip? peg-skip)
+ (%peg:fall-back? #t))
+ (match-pattern expect-grammar "one three"))
+ (compose (cute remove string? <>) peg:tree)))
+ (pass-if-equal "missing one"
+ '(expect-grammar (two "two") (three "three"))
+ (and=> (parameterize ((%peg:skip? peg-skip)
+ (%peg:fall-back? #t))
+ (match-pattern expect-grammar "two three"))
+ (compose (cute remove string? <>) peg:tree))))
--
2.46.0