z572 pushed a commit to branch core-packages-team
in repository guix.
commit fa71e36aa23599f85d3603e0e76db43a54451cb0
Author: Tomas Volf <[email protected]>
AuthorDate: Thu Jan 23 23:57:06 2025 +0100
gexp: Improve support of Unicode characters.
Support for non-ASCII characters was mixed. Some gexp forms did support
them,
while others did not. Combined with current value for
%default-port-conversion-strategy, that sometimes led to unpleasant
surprises.
For example:
(scheme-file "utf8" #~(with-output-to-file #$output
(λ _ (display "猫"))))
Was written to the store as:
((? _ (display "\u732b")))
No, that is not font issue on your part, that is an actual #\? instead of
the
lambda character. Which, surprisingly, does not do what it should when
executed.
The solution is to switch to C.UTF-8 LC_CTYPE where possible, since it is
now
always available. Or to explicitly set the port encoding.
No tests are provided, since majority of tests/gexp.scm use guile in version
2, and it tends to work under it. The issues occur mostly with guile 3.
I did test it locally using:
#!/bin/sh
set -eu
set -x
[ -f guix.scm ] || { echo >&2 Run from root of Guix repo.; exit 1; }
[ -f gnu.scm ] || { echo >&2 Run from root of Guix repo.; exit 1; }
cat >猫.scm <<'EOF'
(define-module (猫)
#:export (say))
(define (say)
"nyaaaa~~~~!")
EOF
mkdir -p dir-with-utf8-file
cp 猫.scm dir-with-utf8-file/
cat >repro.scm <<'EOF'
(use-modules (guix build utils)
(guix derivations)
(guix gexp)
(guix store)
(ice-9 ftw)
(ice-9 textual-ports))
(define cat "猫")
(define (drv-content drv)
(call-with-input-file (derivation->output-path drv)
get-string-all))
(define (out-content out)
(call-with-input-file out
get-string-all))
(define (drv-listing drv)
(scandir (derivation->output-path drv)))
(define (dir-listing dir)
(scandir dir))
(define-macro (test exp lower? report)
(let ((type (car exp)))
`(false-if-exception
(let ((drv (with-store %store
(run-with-store %store
(,(if lower? lower-object identity) ,exp)))))
(format #t "~%~a:~%" ',type)
(when (with-store %store
(build-derivations %store (list drv)))
(format #t "~a~%" (,report drv)))))))
(test (computed-file "utf8"
#~(with-output-to-file #$output
(λ _ (display #$cat))))
#t drv-content)
(test (program-file "utf8"
#~((λ _ (display #$cat))))
#t drv-content)
(test (scheme-file "utf8"
#~((λ _ (display #$cat))))
#t drv-content)
(test (text-file* "utf8" cat cat cat)
#f drv-content)
(test (compiled-modules '((猫)))
#f drv-listing)
(test (file-union "utf8" `((,cat ,(plain-file "utf8" cat))))
#t drv-listing)
;;; No fix needed:
(test (imported-modules '((猫)))
#f dir-listing)
(test (local-file "dir-with-utf8-file" #:recursive? #t)
#t dir-listing)
(test (plain-file "utf8" cat)
#t out-content)
(test (mixed-text-file "utf8" cat cat cat)
#t drv-content)
(test (directory-union "utf8" (list (local-file "dir-with-utf8-file"
#:recursive? #t)))
#t dir-listing)
EOF
guix shell -CWN -D guix glibc-locales -- \
env LANG=C.UTF-8 ./pre-inst-env guix repl -- ./repro.scm
Before this series, the output is:
+ '[' -f guix.scm ']'
+ '[' -f gnu.scm ']'
+ cat
+ mkdir -p dir-with-utf8-file
+ cp 猫.scm dir-with-utf8-file/
+ cat
+ guix shell -CWN -D guix glibc-locales -- env LANG=C.UTF-8
./pre-inst-env guix repl -- ./repro.scm
computed-file:
?
program-file:
#!/gnu/store/mfkz7fvlfpv3ppwbkv0imb19nrf95akf-guile-3.0.9/bin/guile
--no-auto-compile
!#
((? _ (display "\u732b")))
scheme-file:
((? _ (display "\u732b")))
text-file*:
???
compiled-modules:
building path(s)
`/gnu/store/ay3jifyvliigfgnz67jf0kgngzpya5a5-module-import-compiled'
Backtrace:
5 (primitive-load
"/gnu/store/rn7b0dq6iqfmmqyqzamix2mjmfy?")
In ice-9/eval.scm:
619:8 4 (_ #f)
In srfi/srfi-1.scm:
460:18 3 (fold #<procedure 7ffff79245e0 at ice-9/eval.scm:336:1?>
?)
In ice-9/eval.scm:
245:16 2 (_ #(#(#<directory (guix build utils) 7ffff779f320>) #
?))
In ice-9/boot-9.scm:
1982:24 1 (_ _)
In unknown file:
0 (stat "./???.scm" #<undefined>)
ERROR: In procedure stat:
In procedure stat: No such file or directory: "./???.scm"
builder for
`/gnu/store/dxg87135zcd6a1c92dlrkyvxlbhfwfld-module-import-compiled.drv' failed
with exit code 1
file-union:
(. .. ?)
imported-modules:
(. .. 猫.scm)
local-file:
(. .. 猫.scm)
plain-file:
猫
mixed-text-file:
猫猫猫
directory-union:
(. .. 猫.scm)
Which I think you will agree is far from optimal. After these fixes the
output changes to:
+ '[' -f guix.scm ']'
+ '[' -f gnu.scm ']'
+ cat
+ mkdir -p dir-with-utf8-file
+ cp 猫.scm dir-with-utf8-file/
+ cat
+ guix shell -CWN -D guix glibc-locales -- env LANG=C.UTF-8
./pre-inst-env guix repl -- ./repro.scm
computed-file:
猫
program-file:
#!/gnu/store/8kbmn359jqkgsbqgqxnmiryvd9ynz8w7-guile-3.0.9/bin/guile
--no-auto-compile
!#
((λ _ (display "猫")))
scheme-file:
((λ _ (display "猫")))
text-file*:
猫猫猫
compiled-modules:
(. .. 猫.go)
file-union:
(. .. 猫)
imported-modules:
(. .. 猫.scm)
local-file:
(. .. 猫.scm)
plain-file:
猫
mixed-text-file:
猫猫猫
directory-union:
(. .. 猫.scm)
Which is actually what the user would expect.
* guix/gexp.scm (gexp->derivation): Default LC_CTYPE to C.UTF-8.
(gexp->script, text-file*): Set port encoding to UTF-8.
Change-Id: Ie92a57fe1c3b45d1c7a5e8865fcf291c5f590c11
Signed-off-by: Janneke Nieuwenhuizen <[email protected]>
---
guix/gexp.scm | 6 +++++-
1 file changed, 5 insertions(+), 1 deletion(-)
diff --git a/guix/gexp.scm b/guix/gexp.scm
index ad51bc55b7..15cf13addd 100644
--- a/guix/gexp.scm
+++ b/guix/gexp.scm
@@ -5,6 +5,7 @@
;;; Copyright © 2019, 2020 Mathieu Othacehe <[email protected]>
;;; Copyright © 2020 Maxim Cournoyer <[email protected]>
;;; Copyright © 2021, 2022 Maxime Devos <[email protected]>
+;;; Copyright © 2025 Tomas Volf <[email protected]>
;;;
;;; This file is part of GNU Guix.
;;;
@@ -1149,7 +1150,7 @@ derivations--e.g., code evaluated for its side effects."
#:key
system (target 'current)
hash hash-algo recursive?
- (env-vars '())
+ (env-vars '(("LC_CTYPE" . "C.UTF-8")))
(modules '())
(module-path %load-path)
(guile-for-build (%guile-for-build))
@@ -2018,6 +2019,8 @@ imported modules in its search path. Look up EXP's
modules in MODULE-PATH."
(gexp
(call-with-output-file (ungexp output)
(lambda (port)
+ (set-port-encoding! port "UTF-8")
+
;; Note: that makes a long shebang. When the store
;; is /gnu/store, that fits within the 128-byte
;; limit imposed by Linux, but that may go beyond
@@ -2116,6 +2119,7 @@ resulting store file holds references to all these."
(define builder
(gexp (call-with-output-file (ungexp output "out")
(lambda (port)
+ (set-port-encoding! port "UTF-8")
(display (string-append (ungexp-splicing text)) port)))))
(gexp->derivation name builder