rekado pushed a commit to branch python-team in repository guix. commit fa9f3680f54635a1ab40fc99d841e52dc748469c Author: Lars-Dominik Braun <l...@6xq.net> AuthorDate: Sun Jul 23 11:20:03 2023 +0200
guix: toml: Add TOML parser. * guix/build/toml.scm: New file. * tests/toml.scm: New file. * Makefile.am: Register new files. --- Makefile.am | 2 + guix/build/toml.scm | 478 ++++++++++++++++++++++++++++++++++++++++++++++++++++ tests/toml.scm | 442 ++++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 922 insertions(+) diff --git a/Makefile.am b/Makefile.am index cef972880c..a1c79ba71f 100644 --- a/Makefile.am +++ b/Makefile.am @@ -283,6 +283,7 @@ MODULES = \ guix/build/qt-utils.scm \ guix/build/zig-build-system.scm \ guix/build/make-bootstrap.scm \ + guix/build/toml.scm \ guix/search-paths.scm \ guix/packages.scm \ guix/import/cabal.scm \ @@ -589,6 +590,7 @@ SCM_TESTS = \ tests/system.scm \ tests/style.scm \ tests/texlive.scm \ + tests/toml.scm \ tests/transformations.scm \ tests/ui.scm \ tests/union.scm \ diff --git a/guix/build/toml.scm b/guix/build/toml.scm new file mode 100644 index 0000000000..d5ea01d001 --- /dev/null +++ b/guix/build/toml.scm @@ -0,0 +1,478 @@ +;;; GNU Guix --- Functional package management for GNU +;;; Copyright © 2023 Lars-Dominik Braun <l...@6xq.net> +;;; +;;; This file is part of GNU Guix. +;;; +;;; GNU Guix is free software; you can redistribute it and/or modify it +;;; under the terms of the GNU General Public License as published by +;;; the Free Software Foundation; either version 3 of the License, or (at +;;; your option) any later version. +;;; +;;; GNU Guix is distributed in the hope that it will be useful, but +;;; WITHOUT ANY WARRANTY; without even the implied warranty of +;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +;;; GNU General Public License for more details. +;;; +;;; You should have received a copy of the GNU General Public License +;;; along with GNU Guix. If not, see <http://www.gnu.org/licenses/>. + +;; This is a TOML parser adapted from the ABNF for v1.0.0 from +;; https://github.com/toml-lang/toml/blob/1.0.0/toml.abnf +;; The PEG grammar tries to follow the ABNF as closely as possible with +;; few deviations commented. +;; +;; The semantics are defined in https://toml.io/en/v1.0.0 +;; Currently unimplemented: +;; - Array of Tables + +(define-module (guix build toml) + #:use-module (ice-9 match) + #:use-module (ice-9 peg) + #:use-module (ice-9 textual-ports) + #:use-module (srfi srfi-1) + #:use-module (srfi srfi-19) + #:use-module (srfi srfi-35) + #:export (parse-toml parse-toml-file recursive-assoc-ref &file-not-consumed &already-defined)) + +(define-condition-type &toml-error &error toml-error?) +(define-condition-type &file-not-consumed &toml-error file-not-consumed?) +(define-condition-type &already-defined &toml-error already-defined?) + +;; Overall Structure +(define-peg-pattern toml-file body (and expression + (* (and ignore-newline expression)))) +(define-peg-pattern expression body (or + (and ws keyval ws (? comment)) + (and ws table ws (? comment)) + (and ws (? comment)))) + +;; Whitespace +(define-peg-pattern ws none (* wschar)) +(define-peg-pattern wschar body (or " " "\t")) + +;; Newline +(define-peg-pattern newline body (or "\n" "\r\n")) +;; This newline’s content is ignored, so we don’t need a bunch of (ignore newline). +(define-peg-pattern ignore-newline none newline) + +;; Comment +(define-peg-pattern non-ascii body (or (range #\x80 #\xd7ff) + (range #\xe000 #\x10ffff))) +(define-peg-pattern non-eol body (or "\t" (range #\x20 #\x7f) non-ascii)) + +(define-peg-pattern comment none (and "#" (* non-eol))) + +;; Key-Value pairs +(define-peg-pattern keyval all (and key keyval-sep val)) + +(define-peg-pattern key body (or dotted-key + simple-key)) +(define-peg-pattern simple-key all (or quoted-key + unquoted-key)) +(define-peg-pattern unquoted-key body (+ (or (range #\A #\Z) + (range #\a #\z) + (range #\0 #\9) + "-" + "_"))) +(define-peg-pattern quoted-key all (or basic-string + literal-string)) +(define-peg-pattern dotted-key body (and simple-key + (+ (and dot-sep simple-key)))) +(define-peg-pattern dot-sep none (and ws "." ws)) +(define-peg-pattern keyval-sep none (and ws "=" ws)) + +(define-peg-pattern val body (or string + boolean + array + inline-table + date-time + float + integer)) + +;; String +(define-peg-pattern string all (or ml-basic-string + basic-string + ml-literal-string + literal-string)) + +;; Basic String +(define-peg-pattern basic-string body (and (ignore "\"") + (* basic-char) + (ignore "\""))) +(define-peg-pattern basic-char body (or basic-unescaped escaped)) +(define-peg-pattern basic-unescaped body (or wschar + "\x21" + (range #\x23 #\x5B) + (range #\x5D #\x7E) + non-ascii)) +(define-peg-pattern escaped all (and + (ignore "\\") + (or "\"" "\\" "b" "f" "n" "r" "t" + (and (ignore "u") + HEXDIG HEXDIG HEXDIG HEXDIG) + (and (ignore "U") + HEXDIG HEXDIG HEXDIG HEXDIG + HEXDIG HEXDIG HEXDIG HEXDIG)))) + +;; Multiline Basic String +(define-peg-pattern ml-basic-string body (and + ml-basic-string-delim + (? ignore-newline) + ml-basic-body + ml-basic-string-delim)) +(define-peg-pattern ml-basic-string-delim none "\"\"\"") +(define-peg-pattern ml-basic-body body (and + (* mlb-content) + (* (and mlb-quotes (+ mlb-content))) + (? mlb-quotes-final))) + +(define-peg-pattern mlb-content body (or mlb-char newline mlb-escaped-nl)) +(define-peg-pattern mlb-char body (or mlb-unescaped escaped)) +(define-peg-pattern mlb-quotes body (or "\"\"" "\"")) +;; We need to convince the parser to backtrack here, thus the additional followed-by rule. +(define-peg-pattern mlb-quotes-final body (or (and "\"\"" (followed-by + ml-basic-string-delim)) + (and "\"" (followed-by + ml-basic-string-delim)))) +(define-peg-pattern mlb-unescaped body (or wschar + "\x21" + (range #\x23 #\x5B) + (range #\x5D #\x7E) + non-ascii)) +;; Escaped newlines and following whitespace are removed from the output. +(define-peg-pattern mlb-escaped-nl none (and "\\" ws newline + (* (or wschar newline)))) + +;; Literal String +(define-peg-pattern literal-string body (and (ignore "'") + (* literal-char) + (ignore "'"))) +(define-peg-pattern literal-char body (or "\x09" + (range #\x20 #\x26) + (range #\x28 #\x7E) + non-ascii)) + +;; Multiline Literal String +(define-peg-pattern ml-literal-string body (and + ml-literal-string-delim + (? ignore-newline) + ml-literal-body + ml-literal-string-delim)) +(define-peg-pattern ml-literal-string-delim none "'''") +(define-peg-pattern ml-literal-body body (and + (* mll-content) + (* (and mll-quotes (+ mll-content))) + (? mll-quotes-final))) + +(define-peg-pattern mll-content body (or mll-char newline)) +(define-peg-pattern mll-char body (or "\x09" + (range #\x20 #\x26) + (range #\x28 #\x7E) + non-ascii)) +(define-peg-pattern mll-quotes body (or "''" "'")) +;; We need to convince the parser to backtrack here, thus the additional followed-by rule. +(define-peg-pattern mll-quotes-final body (or (and "''" (followed-by + ml-literal-string-delim)) + (and "'" (followed-by + ml-literal-string-delim)))) + +;; Integer +(define-peg-pattern integer all (or hex-int oct-int bin-int dec-int)) + +(define-peg-pattern digit1-9 body (range #\1 #\9)) +(define-peg-pattern digit0-7 body (range #\0 #\7)) +(define-peg-pattern digit0-1 body (range #\0 #\1)) +(define-peg-pattern DIGIT body (range #\0 #\9)) +(define-peg-pattern HEXDIG body (or DIGIT + (range #\a #\f) + (range #\A #\F))) + +(define-peg-pattern dec-int all (and (? (or "-" "+")) unsigned-dec-int)) +(define-peg-pattern unsigned-dec-int body (or (and digit1-9 (+ (or DIGIT (and (ignore "_") DIGIT)))) + DIGIT)) + +(define-peg-pattern hex-int all (and (ignore "0x") + HEXDIG + (* (or HEXDIG (and (ignore "_") HEXDIG))))) +(define-peg-pattern oct-int all (and (ignore "0o") + digit0-7 + (* (or digit0-7 (and (ignore "_") digit0-7))))) +(define-peg-pattern bin-int all (and (ignore "0b") + digit0-1 + (* (or digit0-1 (and (ignore "_") digit0-1))))) + +;; Float +(define-peg-pattern float all (or + (and float-int-part (or exp (and frac (? exp)))) + special-float)) +(define-peg-pattern float-int-part body dec-int) +(define-peg-pattern frac body (and "." zero-prefixable-int)) +(define-peg-pattern zero-prefixable-int body (and DIGIT (* (or DIGIT (and (ignore "_") DIGIT))))) + +(define-peg-pattern exp body (and (or "e" "E") float-exp-part)) +(define-peg-pattern float-exp-part body (and (? (or "-" "+")) zero-prefixable-int)) +(define-peg-pattern special-float body (and (? (or "-" "+")) (or "inf" "nan"))) + +;; Boolean +(define-peg-pattern boolean all (or "true" "false")) + +;; Date and Time (as defined in RFC 3339) + +(define-peg-pattern date-time body (or offset-date-time + local-date-time + local-date + local-time)) + +(define-peg-pattern date-fullyear all (and DIGIT DIGIT DIGIT DIGIT)) +(define-peg-pattern date-month all (and DIGIT DIGIT)) ; 01-12 +(define-peg-pattern date-mday all (and DIGIT DIGIT)) ; 01-28, 01-29, 01-30, 01-31 based on month/year +(define-peg-pattern time-delim none (or "T" "t" " ")) ; T, t, or space +(define-peg-pattern time-hour all (and DIGIT DIGIT)) ; 00-23 +(define-peg-pattern time-minute all (and DIGIT DIGIT)) ; 00-59 +(define-peg-pattern time-second all (and DIGIT DIGIT)) ; 00-58, 00-59, 00-60 based on leap second rules +(define-peg-pattern time-secfrac all (and (ignore ".") (+ DIGIT))) +(define-peg-pattern time-numoffset body (and (or "+" "-") + time-hour + (ignore ":") + time-minute)) +(define-peg-pattern time-offset all (or "Z" time-numoffset)) + +(define-peg-pattern partial-time body (and time-hour + (ignore ":") + time-minute + (ignore ":") + time-second + (? time-secfrac))) +(define-peg-pattern full-date body (and date-fullyear + (ignore "-") + date-month + (ignore "-") + date-mday)) +(define-peg-pattern full-time body (and partial-time time-offset)) + +;; Offset Date-Time +(define-peg-pattern offset-date-time all (and full-date time-delim full-time)) + +;; Local Date-Time +(define-peg-pattern local-date-time all (and full-date time-delim partial-time)) + +;; Local Date +(define-peg-pattern local-date all full-date) + +;; Local Time +(define-peg-pattern local-time all partial-time) + +;; Array +(define-peg-pattern array all (and (ignore "[") + (? array-values) + (ignore ws-comment-newline) + (ignore "]"))) + +(define-peg-pattern array-values body (or + (and ws-comment-newline + val + ws-comment-newline + (ignore ",") + array-values) + (and ws-comment-newline + val + ws-comment-newline + (ignore (? ","))))) + +(define-peg-pattern ws-comment-newline none (* (or wschar (and (? comment) ignore-newline)))) + +;; Table +(define-peg-pattern table all (or array-table + std-table)) + +;; Standard Table +(define-peg-pattern std-table all (and (ignore "[") ws key ws (ignore "]"))) +(define-peg-pattern array-table all (and (ignore "[[") ws key ws (ignore "]]"))) + +;; Inline Table +(define-peg-pattern inline-table all (and (ignore "{") + (* ws) + (? inline-table-keyvals) + (* ws) + (ignore "}"))) +(define-peg-pattern inline-table-sep none (and ws "," ws)) +(define-peg-pattern inline-table-keyvals body (and keyval + (? (and inline-table-sep inline-table-keyvals)))) + + +;; Parsing + +(define (recursive-acons key value alist) + "Add a VALUE to ALIST of alists descending into keys according to the +list in KEY. For instance of KEY is (a b) this would create +alist[a][b] = value." + (match key + (((? string? key)) + (if (assoc-ref alist key) + (raise (condition (&already-defined))) + (alist-cons key value alist))) + ((elem rest ...) (match (assoc-ref alist elem) + (#f + (acons elem (recursive-acons rest value '()) alist)) + (old-value + (acons elem (recursive-acons rest value old-value) (alist-delete elem alist))))) + (() alist))) + +(define (recursive-assoc-ref alist key) + "Retrieve a value from ALIST of alists, descending into each value of +the list KEY. For instance a KEY (a b) would retrieve alist[a][b]." + (match key + (((? string? key)) (assoc-ref alist key)) + ((elem rest ...) (recursive-assoc-ref (assoc-ref alist elem) rest)))) + +(define (eval-toml-file parse-tree) + "Convert toml parse tree to alist." + + (define (assoc-ref->number alist key) + (and=> (and=> (assq-ref alist key) car) string->number)) + + (define (eval-date rest) + (let ((args (keyword-flatten '(date-fullyear + date-month + date-mday + time-hour + time-minute + time-second + time-secfrac + time-offset) + rest))) + (make-date + (assoc-ref->number args 'time-secfrac) + (assoc-ref->number args 'time-second) + (assoc-ref->number args 'time-minute) + (assoc-ref->number args 'time-hour) + (assoc-ref->number args 'date-mday) + (assoc-ref->number args 'date-month) + (assoc-ref->number args 'date-fullyear) + (match (assq-ref args 'time-offset) + (("Z") 0) + ((sign ('time-hour hour) ('time-minute minute)) + (* (+ + (* (string->number (string-append sign hour)) 60) + (string->number minute)) 60)) + (#f #f))))) + + (define (eval-value value) + "Evaluate right-hand-side of 'keyval token (i.e., a value)." + (match value + (('boolean "true") + #t) + (('boolean "false") + #f) + (('integer ('dec-int int)) + (string->number int 10)) + (('integer ('hex-int int)) + (string->number int 16)) + (('integer ('oct-int int)) + (string->number int 8)) + (('integer ('bin-int int)) + (string->number int 2)) + (('float ('dec-int int) b) + (string->number (string-append int b) 10)) + (('float other) + (match other + ("inf" +inf.0) + ("+inf" +inf.0) + ("-inf" -inf.0) + ("nan" +nan.0) + ("+nan" +nan.0) + ("-nan" -nan.0))) + (('offset-date-time rest ...) + (eval-date rest)) + (('local-date-time rest ...) + (eval-date rest)) + (('local-date rest ...) + (eval-date rest)) + (('local-time rest ...) + (eval-date rest)) + (('string str ...) + (apply string-append + (map (match-lambda + (('escaped "\"") "\"") + (('escaped "\\") "\\") + (('escaped "b") "\b") + (('escaped "t") "\t") + (('escaped "n") "\n") + (('escaped (? (lambda (x) (>= (string-length x) 4)) u)) + (list->string (list (integer->char (string->number u 16))))) + ((? string? s) s)) + (keyword-flatten '(escaped) str)))) + ('string "") + (('array tails ...) + (map eval-value (keyword-flatten '(boolean integer float string array + inline-table offset-date-time + local-date-time local-date + local-time) + tails))) + ('array (list)) + (('inline-table tails ...) + (eval (keyword-flatten '(keyval) tails) '() '())))) + + (define (ensure-list value) + (if (list? value) + value + (list value))) + + (define (simple-key->list keys) + (map + (match-lambda + (('simple-key 'quoted-key) "") + (('simple-key ('quoted-key k)) k) + (('simple-key (? string? k)) k) + (other (raise-exception `(invalid-simple-key ,other)))) + (keyword-flatten '(simple-key) keys))) + + (define (skip-keyval tails) + "Skip key-value pairs in tails until the next table." + (match tails + ((('keyval key val) tails ...) + (skip-keyval tails)) + (('keyval keyval) + '()) + (other other))) + + (define (eval parse-tree current-table result) + "Evaluate toml file body." + + (match parse-tree + ((('table ('std-table names ...)) tails ...) + (eval tails (simple-key->list names) result)) + ((('table ('array-table names ...)) tails ...) + ;; Not implemented. + (eval (skip-keyval tails) '() result)) + ((('keyval key val) tails ...) + (recursive-acons + (append current-table (ensure-list (simple-key->list key))) + (eval-value val) + (eval tails current-table result))) + (('keyval key val) + (recursive-acons + (append current-table (ensure-list (simple-key->list key))) + (eval-value val) + result)) + (() + '()))) + + (eval parse-tree '() '())) + +(define (parse-toml str) + "Parse and evaluate toml document from string STR." + + (let* ((match (match-pattern toml-file str)) + (end (peg:end match)) + (tree (peg:tree match)) + (flat-tree (keyword-flatten '(table keyval) tree))) + (if (eq? end (string-length str)) + (eval-toml-file flat-tree) + (raise (condition (&file-not-consumed)))))) + +(define (parse-toml-file file) + "Parse and evaluate toml document from file FILE." + + (parse-toml (call-with-input-file file get-string-all))) + diff --git a/tests/toml.scm b/tests/toml.scm new file mode 100644 index 0000000000..cd731cd2f0 --- /dev/null +++ b/tests/toml.scm @@ -0,0 +1,442 @@ +;;; GNU Guix --- Functional package management for GNU +;;; Copyright © 2023 Lars-Dominik Braun <l...@6xq.net> +;;; +;;; This file is part of GNU Guix. +;;; +;;; GNU Guix is free software; you can redistribute it and/or modify it +;;; under the terms of the GNU General Public License as published by +;;; the Free Software Foundation; either version 3 of the License, or (at +;;; your option) any later version. +;;; +;;; GNU Guix is distributed in the hope that it will be useful, but +;;; WITHOUT ANY WARRANTY; without even the implied warranty of +;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +;;; GNU General Public License for more details. +;;; +;;; You should have received a copy of the GNU General Public License +;;; along with GNU Guix. If not, see <http://www.gnu.org/licenses/>. + +(define-module (test-toml) + #:use-module (guix build toml) + #:use-module (guix tests) + #:use-module (srfi srfi-19) ; For datetime. + #:use-module (srfi srfi-64) + #:use-module (ice-9 match)) + +(test-begin "toml") + +;; Tests taken from https://toml.io/en/v1.0.0 + +(test-error "parse-toml: Unspecified key" + &file-not-consumed + (parse-toml "key = # INVALID")) + +(test-error "parse-toml: Missing EOL" + &file-not-consumed + (parse-toml "first = \"Tom\" last = \"Preston-Werner\" # INVALID")) + +(test-equal "parse-toml: Bare keys" + '(("key" . "value") ("bare_key" . "value") ("bare-key" . "value") ("1234" . "value")) + (parse-toml "key = \"value\" +bare_key = \"value\" +bare-key = \"value\" +1234 = \"value\"")) + +(test-equal "parse-toml: Quoted keys" + '(("127.0.0.1" . "value") + ("character encoding" . "value") + ("ʎǝʞ" . "value") + ("key2" . "value") + ("quoted \"value\"" . "value")) + (parse-toml "\"127.0.0.1\" = \"value\" +\"character encoding\" = \"value\" +\"ʎǝʞ\" = \"value\" +'key2' = \"value\" +'quoted \"value\"' = \"value\"")) + +(test-equal "parse-toml: No key" + #f + (parse-toml "= \"no key name\"")) + +(test-equal "parse-toml: Empty key" + '(("" . "blank")) + (parse-toml "\"\" = \"blank\"")) + +(test-equal "parse-toml: Dotted keys" + '(("name" . "Orange") + ("physical" ("color" . "orange") + ("shape" . "round")) + ("site" ("google.com" . #t))) + (parse-toml "name = \"Orange\" +physical.color = \"orange\" +physical.shape = \"round\" +site.\"google.com\" = true")) + +(test-equal "parse-toml: Dotted keys with whitespace" + '(("fruit" ("name" . "banana") ("color" . "yellow") ("flavor" . "banana"))) + (parse-toml "fruit.name = \"banana\" # this is best practice +fruit. color = \"yellow\" # same as fruit.color +fruit . flavor = \"banana\" # same as fruit.flavor")) + +(test-error "parse-toml: Multiple keys" + &already-defined + (parse-toml "name = \"Tom\" +name = \"Pradyun\"")) + +(test-equal "parse-toml: Implicit tables" + '(("fruit" ("apple" ("smooth" . #t)) ("orange" . 2))) + (parse-toml "fruit.apple.smooth = true +fruit.orange = 2")) + +(test-error "parse-toml: Write to value" + &already-defined + (parse-toml "fruit.apple = 1 +fruit.apple.smooth = true")) + +(test-equal "parse-toml: String" + '(("str" . "I'm a string. \"You can quote me\". Name\tJos\u00E9\nLocation\tSF.")) + (parse-toml "str = \"I'm a string. \\\"You can quote me\\\". Name\\tJos\\u00E9\\nLocation\\tSF.\"")) + +(test-equal "parse-toml: Empty string" + '(("str1" . "") + ("str2" . "") + ("str3" . "") + ("str4" . "")) + (parse-toml "str1 = \"\" +str2 = '' +str3 = \"\"\"\"\"\" +str4 = ''''''")) + +(test-equal "parse-toml: Multi-line basic strings" + '(("str1" . "Roses are red\nViolets are blue") + ("str2" . "The quick brown fox jumps over the lazy dog.") + ("str3" . "The quick brown fox jumps over the lazy dog.") + ("str4" . "Here are two quotation marks: \"\". Simple enough.") + ("str5" . "Here are three quotation marks: \"\"\".") + ("str6" . "Here are fifteen quotation marks: \"\"\"\"\"\"\"\"\"\"\"\"\"\"\".") + ("str7" . "\"This,\" she said, \"is just a pointless statement.\"")) + (parse-toml "str1 = \"\"\" +Roses are red +Violets are blue\"\"\" + +str2 = \"\"\" +The quick brown \\ + + + fox jumps over \\ + the lazy dog.\"\"\" + +str3 = \"\"\"\\ + The quick brown \\ + fox jumps over \\ + the lazy dog.\\ + \"\"\" + +str4 = \"\"\"Here are two quotation marks: \"\". Simple enough.\"\"\" +# str5 = \"\"\"Here are three quotation marks: \"\"\".\"\"\" # INVALID +str5 = \"\"\"Here are three quotation marks: \"\"\\\".\"\"\" +str6 = \"\"\"Here are fifteen quotation marks: \"\"\\\"\"\"\\\"\"\"\\\"\"\"\\\"\"\"\\\".\"\"\" + +# \"This,\" she said, \"is just a pointless statement.\" +str7 = \"\"\"\"This,\" she said, \"is just a pointless statement.\"\"\"\"")) + +(test-equal "parse-toml: Literal string" + '(("winpath" . "C:\\Users\\nodejs\\templates") + ("winpath2" . "\\\\ServerX\\admin$\\system32\\") + ("quoted" . "Tom \"Dubs\" Preston-Werner") + ("regex" . "<\\i\\c*\\s*>")) + (parse-toml "winpath = 'C:\\Users\\nodejs\\templates' +winpath2 = '\\\\ServerX\\admin$\\system32\\' +quoted = 'Tom \"Dubs\" Preston-Werner' +regex = '<\\i\\c*\\s*>'")) + +(test-equal "parse-toml: Multi-line literal strings" + '(("regex2" . "I [dw]on't need \\d{2} apples") + ("lines" . "The first newline is\ntrimmed in raw strings.\n All other whitespace\n is preserved.\n") + ("quot15" . "Here are fifteen quotation marks: \"\"\"\"\"\"\"\"\"\"\"\"\"\"\"") + ("apos15" . "Here are fifteen apostrophes: '''''''''''''''") + ("str" . "'That,' she said, 'is still pointless.'")) + (parse-toml "regex2 = '''I [dw]on't need \\d{2} apples''' +lines = ''' +The first newline is +trimmed in raw strings. + All other whitespace + is preserved. +''' +quot15 = '''Here are fifteen quotation marks: \"\"\"\"\"\"\"\"\"\"\"\"\"\"\"''' + +# apos15 = '''Here are fifteen apostrophes: '''''''''''''''''' # INVALID +apos15 = \"Here are fifteen apostrophes: '''''''''''''''\" + +# 'That,' she said, 'is still pointless.' +str = ''''That,' she said, 'is still pointless.''''")) + +(test-equal "parse-toml: Decimal integer" + '(("int1" . 99) ("int2" . 42) ("int3" . 0) ("int4" . -17)) + (parse-toml "int1 = +99 +int2 = 42 +int3 = 0 +int4 = -17")) + +(test-equal "parse-toml: Decimal integer underscores" + '(("int5" . 1000) ("int6" . 5349221) ("int7" . 5349221) ("int8" . 12345)) + (parse-toml "int5 = 1_000 +int6 = 5_349_221 +int7 = 53_49_221 # Indian number system grouping +int8 = 1_2_3_4_5 # VALID but discouraged")) + +(test-equal "parse-toml: Hexadecimal" + `(("hex1" . ,#xdeadbeef) ("hex2" . ,#xdeadbeef) ("hex3" . ,#xdeadbeef)) + (parse-toml "hex1 = 0xDEADBEEF +hex2 = 0xdeadbeef +hex3 = 0xdead_beef")) + +(test-equal "parse-toml: Octal" + `(("oct1" . ,#o01234567) ("oct2" . #o755)) + (parse-toml "oct1 = 0o01234567 +oct2 = 0o755")) + +(test-equal "parse-toml: Binary" + `(("bin1" . ,#b11010110)) + (parse-toml "bin1 = 0b11010110")) + +(test-equal "parse-toml: Float" + '(("flt1" . 1.0) + ("flt2" . 3.1415) + ("flt3" . -0.01) + ("flt4" . 5e+22) + ("flt5" . 1e06) + ("flt6" . -2e-2) + ("flt7" . 6.626e-34) + ("flt8" . 224617.445991228)) + (parse-toml "# fractional +flt1 = +1.0 +flt2 = 3.1415 +flt3 = -0.01 + +# exponent +flt4 = 5e+22 +flt5 = 1e06 +flt6 = -2E-2 + +# both +flt7 = 6.626e-34 + +flt8 = 224_617.445_991_228")) + +(test-equal "parse-toml: Float" + '(("sf1" . +inf.0) + ("sf2" . +inf.0) + ("sf3" . -inf.0) + ("sf4" . +nan.0) + ("sf5" . +nan.0) + ("sf6" . -nan.0)) + (parse-toml "# infinity +sf1 = inf # positive infinity +sf2 = +inf # positive infinity +sf3 = -inf # negative infinity + +# not a number +sf4 = nan # actual sNaN/qNaN encoding is implementation-specific +sf5 = +nan # same as `nan` +sf6 = -nan # valid, actual encoding is implementation-specific")) + +(test-equal "parse-toml: Boolean" + '(("bool1" . #t) + ("bool2" . #f)) + (parse-toml "bool1 = true +bool2 = false")) + +(test-equal "parse-toml: Offset date-time" + `(("odt1" . ,(make-date #f 0 32 7 27 5 1979 0)) + ("odt2" . ,(make-date #f 0 32 0 27 5 1979 (* -7 60 60))) + ("odt3" . ,(make-date 999999 0 32 0 27 5 1979 (* 7 60 60))) + ("odt4" . ,(make-date #f 0 32 7 27 5 1979 0))) + (parse-toml "odt1 = 1979-05-27T07:32:00Z +odt2 = 1979-05-27T00:32:00-07:00 +odt3 = 1979-05-27T00:32:00.999999+07:00 +odt4 = 1979-05-27 07:32:00Z")) + +(test-equal "parse-toml: Local date-time" + `(("ldt1" . ,(make-date #f 0 32 7 27 5 1979 #f)) + ("ldt2" . ,(make-date 999999 0 32 0 27 5 1979 #f))) + (parse-toml "ldt1 = 1979-05-27T07:32:00 +ldt2 = 1979-05-27T00:32:00.999999")) + +(test-equal "parse-toml: Local date" + `(("ld1" . ,(make-date #f #f #f #f 27 5 1979 #f))) + (parse-toml "ld1 = 1979-05-27")) + +(test-equal "parse-toml: Local time" + `(("lt1" . ,(make-date #f 0 32 7 #f #f #f #f)) + ("lt2" . ,(make-date 999999 0 32 0 #f #f #f #f))) + (parse-toml "lt1 = 07:32:00 +lt2 = 00:32:00.999999")) + +(test-equal "parse-toml: Arrays" + '(("integers" 1 2 3) + ("colors" "red" "yellow" "green") + ("nested_arrays_of_ints" (1 2) (3 4 5)) + ("nested_mixed_array" (1 2) ("a" "b" "c")) + ("string_array" "all" "strings") + ("numbers" 0.1 0.2 0.5 1 2 5) + ("contributors" "Foo Bar <f...@example.com>" (("name" . "Baz Qux") ("email" . "baz...@example.com") ("url" . "https://example.com/bazqux"))) + ("integers2" 1 2 3) + ("integers3" 1 2)) + (parse-toml "integers = [ 1, 2, 3 ] +colors = [ \"red\", \"yellow\", \"green\" ] +nested_arrays_of_ints = [ [ 1, 2 ], [3, 4, 5] ] +nested_mixed_array = [ [ 1, 2 ], [\"a\", \"b\", \"c\"] ] +string_array = [ \"all\", 'strings' ] + +# Mixed-type arrays are allowed +numbers = [ 0.1, 0.2, 0.5, 1, 2, 5 ] +contributors = [ + \"Foo Bar <f...@example.com>\", + { name = \"Baz Qux\", email = \"baz...@example.com\", url = \"https://example.com/bazqux\" } +] + +integers2 = [ + 1, 2, 3 +] + +integers3 = [ + 1, + 2, # this is ok +]")) + +(test-equal "parse-toml: Tables" + '(("table-1" ("key1" . "some string") + ("key2" . 123)) + ("table-2" ("key1" . "another string") + ("key2" . 456))) + (parse-toml "[table-1] +key1 = \"some string\" +key2 = 123 + +[table-2] +key1 = \"another string\" +key2 = 456")) + + +(test-equal "parse-toml: Dotted table" + '(("dog" ("tater.man" ("type" ("name" . "pug"))))) + (parse-toml "[dog.\"tater.man\"] +type.name = \"pug\"")) + + +(test-equal "parse-toml: Dotted table with whitespace" + '(("a" ("b" ("c" ("x" . 1)))) + ("d" ("e" ("f" ("x" . 1)))) + ("g" ("h" ("i" ("x" . 1)))) + ("j" ("ʞ" ("l" ("x" . 1))))) + (parse-toml "[a.b.c] # this is best practice +x=1 +[ d.e.f ] # same as [d.e.f] +x=1 +[ g . h . i ] # same as [g.h.i] +x=1 +[ j . \"ʞ\" . 'l' ] # same as [j.\"ʞ\".'l'] +x=1")) + +;; XXX: technically this is not allowed, but we permit it. +(test-equal "parse-toml: Multiple tables" + '(("fruit" ("apple" . "red") ("orange" . "orange"))) + (parse-toml "[fruit] +apple = \"red\" + +[fruit] +orange = \"orange\"")) + +(test-equal "parse-toml: Assignment to non-table" + #f + (parse-toml "[fruit] +apple = \"red\" + +[fruit.apple] +texture = \"smooth\"")) + +(test-equal "parse-toml: Dotted keys create tables" + '(("fruit" ("apple" ("color" . "red") ("taste" ("sweet" . #t))))) + (parse-toml "fruit.apple.color = \"red\" +fruit.apple.taste.sweet = true")) + +(test-equal "parse-toml: Inline tables" + '(("name" ("first" . "Tom") ("last" . "Preston-Werner")) + ("point" ("x" . 1) ("y" . 2)) + ("animal" ("type" ("name" . "pug")))) + (parse-toml "name = { first = \"Tom\", last = \"Preston-Werner\" } +point = { x = 1, y = 2 } +animal = { type.name = \"pug\" }")) + +(test-error "parse-toml: Invalid assignment to inline table" + #t + (parse-toml "[product] +type = { name = \"Nail\" } +type.edible = false # INVALID")) + +;; We do not catch this semantic error yet. +(test-expect-fail 1) +(test-error "parse-toml: Invalid assignment to implicit table" + #f + (parse-toml "[product] +type.name = \"Nail\" +type = { edible = false } # INVALID")) + +;; Not implemented. +(test-expect-fail 1) +(test-equal "parse-toml: Array of tables" + '(("products" (("name" . "Hammer") ("sku" . 738594937)) + () + (("name" . "Nail") ("sku" . 284758393) ("color" . "gray")))) + (parse-toml "[[products]] +name = \"Hammer\" +sku = 738594937 + +[[products]] # empty table within the array + +[[products]] +name = \"Nail\" +sku = 284758393 + +color = \"gray\"")) + +;; Not implemented. +(test-expect-fail 1) +(test-equal "parse-toml: Array of tables" + '(("fruits" ((("name" . "apple") + ("physical" (("color" . "red") ("shape" . "round"))) + ("varieties" ((("name" . "red delicious")) (("name" . "granny smith"))))) + (("name" . "banana") + ("varieties" (((("name" . "plantain"))))))))) + (parse-toml "[[fruits]] +name = \"apple\" + +[fruits.physical] # subtable +color = \"red\" +shape = \"round\" + +[[fruits.varieties]] # nested array of tables +name = \"red delicious\" + +[[fruits.varieties]] +name = \"granny smith\" + + +[[fruits]] +name = \"banana\" + +[[fruits.varieties]] +name = \"plantain\"")) + +;; Not implemented. +(test-expect-fail 1) +(test-error "parse-toml: Assignment to statically defined array" + #f + (parse-toml "fruits = [] + +[[fruits]] +x=1")) + +(test-end "toml") +