Hi Philip, > Since Cabal 2.2 conditional blocks support the `elif` construct [0], but our > hackage importer does not currently support such expressions. This causes > importing packages like `raaz` [1] to fail. attached patch series fixes the import for `raaz` by adding support for elif and other minor adjustments. There still is syntax we do not support, most importantly mixed indentation, which is already documented as xfail via a testcase. I’m adding a few more.
Could you have a look please if these make sense? Cheers, Lars
>From 6b47c1d399922b60dafa01105daa1b7ea3da3935 Mon Sep 17 00:00:00 2001 From: Lars-Dominik Braun <l...@6xq.net> Date: Sat, 30 Apr 2022 15:38:44 +0200 Subject: [PATCH 1/5] import: cabal: Support elif statement. * guix/import/cabal.scm (make-cabal-parser): Replace if-then-else grammar case with elif-else, modify if-then accordingly. (is-elif): New procedure. (lex-elif): Likewise. (is-id): Add elif keyword. (lex-word): Add test for elif. * tests/hackage.scm (test-cabal-if): New variale. (test-cabal-else): Likewise. (test-cabal-elif): Likewise. (test-cabal-elif-brackets): Likewise. (match-ghc-elif): Likewise. ("hackage->guix-package test lonely if statement", "hackage->guix-package test else statement", "hackage->guix-package test elif statement", "hackage->guix-package test elif statement with brackets"): New tests. --- guix/import/cabal.scm | 63 ++++++++++++++------------ tests/hackage.scm | 102 ++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 136 insertions(+), 29 deletions(-) diff --git a/guix/import/cabal.scm b/guix/import/cabal.scm index 98d7234098..e1a082a31a 100644 --- a/guix/import/cabal.scm +++ b/guix/import/cabal.scm @@ -149,7 +149,7 @@ (define (make-cabal-parser) (right: IF FLAG EXEC TEST-SUITE CUSTOM-SETUP SOURCE-REPO BENCHMARK LIB COMMON OCURLY) (left: OR) (left: PROPERTY AND) - (right: ELSE NOT)) + (right: ELIF ELSE NOT)) ;; --- rules (body (properties sections) : (append $1 $2)) (sections (sections flags) : (append $1 $2) @@ -193,32 +193,32 @@ (define (make-cabal-parser) (LIB open exprs close) : `(section library ,$3)) (exprs (exprs PROPERTY) : (append $1 (list $2)) (PROPERTY) : (list $1) - (exprs if-then-else) : (append $1 (list $2)) - (if-then-else) : (list $1) - (exprs if-then) : (append $1 (list $2)) - (if-then) : (list $1)) - (if-then-else (IF tests OCURLY exprs CCURLY ELSE OCURLY exprs CCURLY) - : `(if ,$2 ,$4 ,$8) - (IF tests open exprs close ELSE OCURLY exprs CCURLY) - : `(if ,$2 ,$4 ,$8) - ;; The 'open' token after 'tests' is shifted after an 'exprs' - ;; is found. This is because, instead of 'exprs' a 'OCURLY' - ;; token is a valid alternative. For this reason, 'open' - ;; pushes a <parse-context> with a line indentation equal to - ;; the indentation of 'exprs'. - ;; - ;; Differently from this, without the rule above this - ;; comment, when an 'ELSE' token is found, the 'open' token - ;; following the 'ELSE' would be shifted immediately, before - ;; the 'exprs' is found (because there are no other valid - ;; tokens). The 'open' would therefore create a - ;; <parse-context> with the indentation of 'ELSE' and not - ;; 'exprs', creating an inconsistency. We therefore allow - ;; mixed style conditionals. - (IF tests open exprs close ELSE open exprs close) - : `(if ,$2 ,$4 ,$8)) - (if-then (IF tests OCURLY exprs CCURLY) : `(if ,$2 ,$4 ()) - (IF tests open exprs close) : `(if ,$2 ,$4 ())) + (exprs elif-else) : (append $1 (list ($2 '(())))) + (elif-else) : (list ($1 '(())))) + ;; LALR(1) parsers prefer to be left-recursive, which make if-statements slightly involved. + ;; XXX: This technically allows multiple else statements. + (elif-else (elif-else ELIF tests OCURLY exprs CCURLY) : (lambda (y) ($1 (list (append (list 'if $3 $5) y)))) + (elif-else ELIF tests open exprs close) : (lambda (y) ($1 (list (append (list 'if $3 $5) y)))) + (elif-else ELSE OCURLY exprs CCURLY) : (lambda (y) ($1 (list $4))) + ;; The 'open' token after 'tests' is shifted after an 'exprs' + ;; is found. This is because, instead of 'exprs' a 'OCURLY' + ;; token is a valid alternative. For this reason, 'open' + ;; pushes a <parse-context> with a line indentation equal to + ;; the indentation of 'exprs'. + ;; + ;; Differently from this, without the rule above this + ;; comment, when an 'ELSE' token is found, the 'open' token + ;; following the 'ELSE' would be shifted immediately, before + ;; the 'exprs' is found (because there are no other valid + ;; tokens). The 'open' would therefore create a + ;; <parse-context> with the indentation of 'ELSE' and not + ;; 'exprs', creating an inconsistency. We therefore allow + ;; mixed style conditionals. + (elif-else ELSE open exprs close) : (lambda (y) ($1 (list $4))) + ;; Terminating rule. + (if-then) : (lambda (y) (append $1 y))) + (if-then (IF tests OCURLY exprs CCURLY) : (list 'if $2 $4) + (IF tests open exprs close) : (list 'if $2 $4)) (tests (TEST OPAREN ID CPAREN) : `(,$1 ,$3) (TRUE) : 'true (FALSE) : 'false @@ -386,6 +386,8 @@ (define is-lib (make-rx-matcher "^library *" regexp/icase)) (define is-else (make-rx-matcher "^else" regexp/icase)) +(define (is-elif s) (string-ci=? s "elif")) + (define (is-if s) (string-ci=? s "if")) (define (is-true s) (string-ci=? s "true")) @@ -402,8 +404,8 @@ (define (is-or s) (string=? s "||")) (define (is-id s port loc) (let ((cabal-reserved-words - '("if" "else" "library" "flag" "executable" "test-suite" "custom-setup" - "source-repository" "benchmark" "common")) + '("if" "else" "elif" "library" "flag" "executable" "test-suite" + "custom-setup" "source-repository" "benchmark" "common")) (spaces (read-while (cut char-set-contains? char-set:blank <>) port)) (c (peek-char port))) (unread-string spaces port) @@ -494,6 +496,8 @@ (define (lex-lib loc) (make-lexical-token 'LIB loc #f)) (define (lex-else loc) (make-lexical-token 'ELSE loc #f)) +(define (lex-elif loc) (make-lexical-token 'ELIF loc #f)) + (define (lex-if loc) (make-lexical-token 'IF loc #f)) (define (lex-true loc) (make-lexical-token 'TRUE loc #t)) @@ -568,6 +572,7 @@ (define (lex-word port loc) LOC is the current port location." (let* ((w (read-delimited " <>=()\t\n" port 'peek))) (cond ((is-if w) (lex-if loc)) + ((is-elif w) (lex-elif loc)) ((is-test w port) (lex-test w loc)) ((is-true w) (lex-true loc)) ((is-false w) (lex-false loc)) diff --git a/tests/hackage.scm b/tests/hackage.scm index 189b9af173..38f75b268e 100644 --- a/tests/hackage.scm +++ b/tests/hackage.scm @@ -309,6 +309,108 @@ (define test-cabal-flag-executable (test-assert "hackage->guix-package test flag executable" (eval-test-with-cabal test-cabal-flag-executable match-ghc-foo)) +;; Check if-elif-else statements +(define test-cabal-if + "name: foo +version: 1.0.0 +homepage: http://test.org +synopsis: synopsis +description: description +license: BSD3 +library + if os(first) + Build-depends: ghc-c +") + +(define test-cabal-else + "name: foo +version: 1.0.0 +homepage: http://test.org +synopsis: synopsis +description: description +license: BSD3 +library + if os(first) + Build-depends: ghc-a + else + Build-depends: ghc-c +") + +(define test-cabal-elif + "name: foo +version: 1.0.0 +homepage: http://test.org +synopsis: synopsis +description: description +license: BSD3 +library + if os(first) + Build-depends: ghc-a + elif os(second) + Build-depends: ghc-b + elif os(guix) + Build-depends: ghc-c + elif os(third) + Build-depends: ghc-d + else + Build-depends: ghc-e +") + +;; Try the same with different bracket styles +(define test-cabal-elif-brackets + "name: foo +version: 1.0.0 +homepage: http://test.org +synopsis: synopsis +description: description +license: BSD3 +library + if os(first) { + Build-depends: ghc-a + } + elif os(second) + Build-depends: ghc-b + elif os(guix) { Build-depends: ghc-c } + elif os(third) { + Build-depends: ghc-d } + else + Build-depends: ghc-e +") + +(define-package-matcher match-ghc-elif + ('package + ('name "ghc-foo") + ('version "1.0.0") + ('source + ('origin + ('method 'url-fetch) + ('uri ('hackage-uri "foo" 'version)) + ('sha256 + ('base32 + (? string? hash))))) + ('build-system 'haskell-build-system) + ('inputs ('list 'ghc-c)) + ('home-page "http://test.org") + ('synopsis (? string?)) + ('description (? string?)) + ('license 'license:bsd-3))) + +(test-assert "hackage->guix-package test lonely if statement" + (eval-test-with-cabal test-cabal-else match-ghc-elif + #:cabal-environment '(("os" . "guix")))) + +(test-assert "hackage->guix-package test else statement" + (eval-test-with-cabal test-cabal-else match-ghc-elif + #:cabal-environment '(("os" . "guix")))) + +(test-assert "hackage->guix-package test elif statement" + (eval-test-with-cabal test-cabal-elif match-ghc-elif + #:cabal-environment '(("os" . "guix")))) + +(test-assert "hackage->guix-package test elif statement with brackets" + (eval-test-with-cabal test-cabal-elif-brackets match-ghc-elif + #:cabal-environment '(("os" . "guix")))) + ;; Check Hackage Cabal revisions. (define test-cabal-revision "name: foo -- 2.35.1
>From dad8dbb8dde18716921523b5db722a168410740a Mon Sep 17 00:00:00 2001 From: Lars-Dominik Braun <l...@6xq.net> Date: Sat, 30 Apr 2022 15:39:34 +0200 Subject: [PATCH 2/5] import: cabal: Allow curly brackets in more positions. * guix/import/cabal.scm (is-layout-property): Do not expect end of line. (lex-layout-property): Check for newline. (lex-property): Stop reading on closing curly bracket. * tests/hackage.scm (test-read-cabal-2): New variable. ("read-cabal test: if brackets on the same line"): New test. --- guix/import/cabal.scm | 11 ++++++++--- tests/hackage.scm | 16 ++++++++++++++++ 2 files changed, 24 insertions(+), 3 deletions(-) diff --git a/guix/import/cabal.scm b/guix/import/cabal.scm index e1a082a31a..364fcc3176 100644 --- a/guix/import/cabal.scm +++ b/guix/import/cabal.scm @@ -354,7 +354,7 @@ (define* (make-rx-matcher pat #:optional (flag #f)) (make-regexp pat)))) (cut regexp-exec rx <>))) -(define is-layout-property (make-rx-matcher "([a-z0-9-]+)[ \t]*:[ \t]*(\\w?[^{}]*)$" +(define is-layout-property (make-rx-matcher "([a-z0-9-]+)[ \t]*:[ \t]*(\\w?[^{}]*)" regexp/icase)) (define is-braced-property (make-rx-matcher "([a-z0-9-]+)[ \t]*:[ \t]*\\{[ \t]*$" @@ -465,7 +465,10 @@ (define (lex-layout-property k-v-rx-res loc port) (value (match:substring k-v-rx-res 2))) (make-lexical-token 'PROPERTY loc - (list key `(,(read-value port value (current-indentation))))))) + (list key `(,(if (eqv? (peek-char port) #\newline) ; The next character + ; is not necessarily a newline if a bracket follows the property. + (read-value port value (current-indentation)) + value)))))) (define (lex-braced-property k-rx-res loc port) (let ((key (string-downcase (match:substring k-rx-res 1)))) @@ -600,7 +603,9 @@ (define (lex-line port loc) (else (unread-string s port) #f)))) (define (lex-property port loc) - (let* ((s (read-delimited "\n" port 'peek))) + ;; Stop reading on a }, so closing brackets (for example during + ;; if-clauses) work properly. + (let* ((s (read-delimited "\n}" port 'peek))) (cond ((is-braced-property s) => (cut lex-braced-property <> loc port)) ((is-layout-property s) => (cut lex-layout-property <> loc port)) diff --git a/tests/hackage.scm b/tests/hackage.scm index 38f75b268e..15309a3381 100644 --- a/tests/hackage.scm +++ b/tests/hackage.scm @@ -156,6 +156,12 @@ (define test-read-cabal-1 Exposed-Modules: Test.QuickCheck.Exception") +(define test-read-cabal-2 + "name: test-me +common defaults + if os(foobar) { cc-options: -DBARBAZ } +") ; Intentional newline. + (test-begin "hackage") (define-syntax-rule (define-package-matcher name pattern) @@ -471,6 +477,16 @@ (define-package-matcher match-ghc-foo-revision #t) (x (pk 'fail x #f)))) +(test-assert "read-cabal test: if brackets on the same line" + (match (call-with-input-string test-read-cabal-2 read-cabal) + ((("name" ("test-me")) + ('section 'common "defaults" + (('if ('os "foobar") + (("cc-options" ("-DBARBAZ "))) + ())))) + #t) + (x (pk 'fail x #f)))) + (define test-cabal-import "name: foo version: 1.0.0 -- 2.35.1
>From ba96d2e9af8f0b952bfa90f548e4e06dbdec4777 Mon Sep 17 00:00:00 2001 From: Lars-Dominik Braun <l...@6xq.net> Date: Sat, 30 Apr 2022 15:39:51 +0200 Subject: [PATCH 3/5] import: cabal: Allow properties without space between key and value. * guix/import/cabal.scm (lex-word): Add colon to delimiters. * tests/hackage.scm (test-cabal-property-no-space): New variable. ("hackage->guix-package test properties without space"): New test. --- guix/import/cabal.scm | 2 +- tests/hackage.scm | 19 +++++++++++++++++++ 2 files changed, 20 insertions(+), 1 deletion(-) diff --git a/guix/import/cabal.scm b/guix/import/cabal.scm index 364fcc3176..9f3862fa14 100644 --- a/guix/import/cabal.scm +++ b/guix/import/cabal.scm @@ -573,7 +573,7 @@ (define (lex-single-char port loc) (define (lex-word port loc) "Process tokens which can be recognized by reading the next word form PORT. LOC is the current port location." - (let* ((w (read-delimited " <>=()\t\n" port 'peek))) + (let* ((w (read-delimited " <>=():\t\n" port 'peek))) (cond ((is-if w) (lex-if loc)) ((is-elif w) (lex-elif loc)) ((is-test w port) (lex-test w loc)) diff --git a/tests/hackage.scm b/tests/hackage.scm index 15309a3381..4ce48b6baf 100644 --- a/tests/hackage.scm +++ b/tests/hackage.scm @@ -315,6 +315,25 @@ (define test-cabal-flag-executable (test-assert "hackage->guix-package test flag executable" (eval-test-with-cabal test-cabal-flag-executable match-ghc-foo)) +;; There is no mandatory space between property name and value. +(define test-cabal-property-no-space + "name:foo +version:1.0.0 +homepage:http://test.org +synopsis:synopsis +description:description +license:BSD3 +common bench-defaults + ghc-options:-Wall +executable cabal + build-depends: + HTTP >= 4000.2.5 && < 4000.3, + mtl >= 2.0 && < 3 +") + +(test-assert "hackage->guix-package test properties without space" + (eval-test-with-cabal test-cabal-property-no-space match-ghc-foo)) + ;; Check if-elif-else statements (define test-cabal-if "name: foo -- 2.35.1
>From 5f4e1c75a34744f04916e1db8056669e20708ef0 Mon Sep 17 00:00:00 2001 From: Lars-Dominik Braun <l...@6xq.net> Date: Sun, 1 May 2022 08:34:42 +0200 Subject: [PATCH 4/5] import: cabal: Allow curly bracket before else statement. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * guix/import/cabal.scm (is-else): Turn into procedure. (lex-line): Move IS-ELSE… (lex-word): …here. * tests/hackage.scm (test-cabal-elif-brackets): Extend testcase. --- guix/import/cabal.scm | 4 ++-- tests/hackage.scm | 5 ++++- 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/guix/import/cabal.scm b/guix/import/cabal.scm index 9f3862fa14..8f59a63cb9 100644 --- a/guix/import/cabal.scm +++ b/guix/import/cabal.scm @@ -384,7 +384,7 @@ (define is-benchmark (make-rx-matcher "^benchmark +([a-z0-9_-]+)" (define is-lib (make-rx-matcher "^library *" regexp/icase)) -(define is-else (make-rx-matcher "^else" regexp/icase)) +(define (is-else s) (string-ci=? s "else")) (define (is-elif s) (string-ci=? s "elif")) @@ -576,6 +576,7 @@ (define (lex-word port loc) (let* ((w (read-delimited " <>=():\t\n" port 'peek))) (cond ((is-if w) (lex-if loc)) ((is-elif w) (lex-elif loc)) + ((is-else w) (lex-else loc)) ((is-test w port) (lex-test w loc)) ((is-true w) (lex-true loc)) ((is-false w) (lex-false loc)) @@ -599,7 +600,6 @@ (define (lex-line port loc) ((is-custom-setup s) => (cut lex-custom-setup <> loc)) ((is-benchmark s) => (cut lex-benchmark <> loc)) ((is-lib s) (lex-lib loc)) - ((is-else s) (lex-else loc)) (else (unread-string s port) #f)))) (define (lex-property port loc) diff --git a/tests/hackage.scm b/tests/hackage.scm index 4ce48b6baf..98f9c34fd2 100644 --- a/tests/hackage.scm +++ b/tests/hackage.scm @@ -398,7 +398,10 @@ (define test-cabal-elif-brackets elif os(guix) { Build-depends: ghc-c } elif os(third) { Build-depends: ghc-d } - else + elif os(fourth) + { + Build-depends: ghc-d + } else Build-depends: ghc-e ") -- 2.35.1
>From bd67403967e86d41235ef3af2e142869bc6c5c48 Mon Sep 17 00:00:00 2001 From: Lars-Dominik Braun <l...@6xq.net> Date: Sat, 14 May 2022 15:38:14 +0200 Subject: [PATCH 5/5] import: cabal: Document failing syntax through tests. * tests/hackage.scm (test-read-cabal-brackets-newline): New variable. (test-cabal-no-final-newline): Likewise. ("hackage->guix-package test without final newline", "read-cabal test: property brackets on new line"): New tests. --- tests/hackage.scm | 34 ++++++++++++++++++++++++++++++++++ 1 file changed, 34 insertions(+) diff --git a/tests/hackage.scm b/tests/hackage.scm index 98f9c34fd2..d7ecd0cc21 100644 --- a/tests/hackage.scm +++ b/tests/hackage.scm @@ -162,6 +162,16 @@ (define test-read-cabal-2 if os(foobar) { cc-options: -DBARBAZ } ") ; Intentional newline. +;; Test opening bracket on new line. +(define test-read-cabal-brackets-newline + "name: test-me +common defaults + build-depends: + { foobar + , barbaz + } +") + (test-begin "hackage") (define-syntax-rule (define-package-matcher name pattern) @@ -334,6 +344,21 @@ (define test-cabal-property-no-space (test-assert "hackage->guix-package test properties without space" (eval-test-with-cabal test-cabal-property-no-space match-ghc-foo)) +;; There may be no final newline terminating a property. +(define test-cabal-no-final-newline +"name: foo +version: 1.0.0 +homepage: http://test.org +synopsis: synopsis +description: description +license: BSD3 +executable cabal + build-depends: HTTP >= 4000.2.5 && < 4000.3, mtl >= 2.0 && < 3") + +(test-expect-fail 1) +(test-assert "hackage->guix-package test without final newline" + (eval-test-with-cabal test-cabal-no-final-newline match-ghc-foo)) + ;; Check if-elif-else statements (define test-cabal-if "name: foo @@ -509,6 +534,15 @@ (define-package-matcher match-ghc-foo-revision #t) (x (pk 'fail x #f)))) +(test-expect-fail 1) +(test-assert "read-cabal test: property brackets on new line" + (match (call-with-input-string test-read-cabal-brackets-newline read-cabal) + ((("name" ("test-me")) + ('section 'common "defaults" + (("build-depends" ("foobar , barbaz"))))) + #t) + (x (pk 'fail x #f)))) + (define test-cabal-import "name: foo version: 1.0.0 -- 2.35.1