pelzflorian pushed a commit to branch wip-i18n in repository guix-artwork. commit 77675f1778bbc8602e16f6b52ce66524cc892710 Author: Florian Pelz <pelzflor...@pelzflorian.de> Date: Wed Oct 30 08:22:47 2019 +0000
website: Add custom xgettext to extract from nested sexps for i18n. * website/po/POTFILES: New file. List apps files here. * website/po/LINGUAS: New file. List en_US lingua. * website/po/ietf-tags.scm: New file. Add association for en_US lingua. * website/scripts/sexp-xgettext.scm: New file for generating a POT file. (<keyword-spec>, <po-entry>, <construct-fold-state>): New record types. (combine-duplicate-po-entries, complex-keyword-spec?, parse-scheme-file, po-equal?, write-po-entry, update-ecomments-string!, update-file-name!, update-old-line-number!, update-line-number!, incr-line-number!, incr-line-number-for-each-nl!, current-ref, make-simple-po-entry, matching-keyword, nth-exp, more-than-one-exp?, token->string-symbol-or-keyw, complex-marked-list->po-entries, construct-po-entries, tag, construct-msgid-and-po-entries, scheme-file->po-entries): New procedures. (%keyword-specs, %options, %comments-line, %ecomments-string, %file-name, %old-line-number, %line-number, %files-from-port, %source-files, %output-po-entries, %output-port): New variables. * website/sexp-xgettext.scm: New file with module for looking up translations. (%complex-keywords, %simple-keywords, %plural-numbers, %linguas): New variables. (<construct-fold-state>, <deconstruct-fold-state>): New record types. (set-complex-keywords!, set-simple-keywords!, gettext-keyword?, tag, sexp->msgid, deconstruct): New procedures. (sgettext, spgettext, sngettext, snpgettext): New macro helpers. * website/apps/i18n.scm: New file. (G_, N_, C_, NC_, ietf-tags-file-contents): New syntax to use for i18n. (%current-ietf-tag, %current-lang, %current-lingua): New variables. (builder->localized-builder, builders->localized-builders, localized-root-path, first-value): New utility procedures. (<asset>, <page>): New imports from Haunt. * website/haunt.scm: Wrap each builder to build the locale set in LC_ALL. * website/.guix.scm: Make Haunt build directory writable so Haunt can overwrite duplicate assets. Convert PO files to MO files and build for each lingua. * website/README: Adapt build instructions for i18n. * website/i18n-howto: New file with usage instructions. --- website/.guix.scm | 80 +++- website/README | 8 +- website/apps/i18n.scm | 132 ++++++ website/haunt.scm | 19 +- website/i18n-howto.txt | 86 ++++ website/po/LINGUAS | 3 + website/po/POTFILES | 36 ++ website/po/ietf-tags.scm | 9 + website/scripts/sexp-xgettext.scm | 830 ++++++++++++++++++++++++++++++++++++++ website/sexp-xgettext.scm | 530 ++++++++++++++++++++++++ 10 files changed, 1707 insertions(+), 26 deletions(-) diff --git a/website/.guix.scm b/website/.guix.scm index 8f44c90..9510779 100644 --- a/website/.guix.scm +++ b/website/.guix.scm @@ -1,5 +1,6 @@ ;;; GNU Guix web site ;;; Copyright © 2017, 2019 Ludovic Courtès <l...@gnu.org> +;;; Copyright © 2019 Florian Pelz <pelzflor...@pelzflorian.de> ;;; ;;; This file is part of the GNU Guix web site. ;;; @@ -18,16 +19,27 @@ ;; Run 'guix build -f .guix.scm' to build the web site. +(define this-directory + (dirname (current-filename))) + +;; Make sure po/LINGUAS will be found in the current working +;; directory. +(chdir this-directory) + +;; We need %linguas from the (sexp-xgettext) module. +;; Therefore, we add its path to the load path. +(set! %load-path (cons this-directory %load-path)) + (use-modules (guix) (gnu) (guix modules) (guix git-download) (guix gexp) (guix channels) (srfi srfi-9) - (ice-9 match)) - -(define this-directory - (dirname (current-filename))) + (ice-9 match) + (ice-9 rdelim) + (ice-9 regex) + (sexp-xgettext)) (define source (local-file this-directory "guix-web-site" @@ -73,9 +85,7 @@ (setvbuf (current-output-port) 'line) (setvbuf (current-error-port) 'line) - (copy-recursively #$source ".") - ;; Set 'GUILE_LOAD_PATH' so that Haunt find the Guix modules and ;; its dependencies. To find out the load path of Guix and its ;; dependencies, fetch its value over 'guix repl'. @@ -96,24 +106,62 @@ ":")))) (close-pipe pipe)) + ;; Make the copy writable so Haunt can overwrite duplicate assets. + (invoke #+(file-append (specification->package "coreutils") + "/bin/chmod") + "--recursive" "u+w" ".") + + ;; For translations, create MO files from PO files. + (for-each + (lambda (lingua) + (let* ((msgfmt #+(file-append + (specification->package "gettext-minimal") + "/bin/msgfmt")) + (lingua-file (string-append "po/" lingua ".po")) + (lang (car (string-split lingua #\_))) + (lang-file (string-append "po/" lang ".po"))) + (define (create-mo filename) + (begin + (invoke msgfmt filename) + (mkdir-p (string-append lingua "/LC_MESSAGES")) + (rename-file "messages.mo" + (string-append lingua "/LC_MESSAGES/" + "guix-website.mo")))) + (cond + ((file-exists? lingua-file) + (create-mo lingua-file)) + ((file-exists? lang-file) + (create-mo lang-file)) + (else #t)))) + (list #$@%linguas)) + ;; So we can read/write UTF-8 files. (setenv "GUIX_LOCPATH" #+(file-append (specification->package "glibc-utf8-locales") "/lib/locale")) - (setenv "LC_ALL" "en_US.utf8") ;; Use a sane default. (setenv "XDG_CACHE_HOME" "/tmp/.cache") - (format #t "Running 'haunt build'...~%") - (invoke #+(file-append (specification->package "haunt") - "/bin/haunt") - "build") - - (mkdir-p #$output) - (copy-recursively "/tmp/gnu.org/software/guix" #$output - #:log (%make-void-port "w")) - (symlink "guix.html" (string-append #$output "/index.html")))))) + ;; Build the website for each translation. + (for-each + (lambda (lingua) + (begin + (setenv "LC_ALL" (string-append lingua ".utf8")) + (format #t "Running 'haunt build' for lingua ~a...~%" lingua) + (invoke #+(file-append (specification->package "haunt") + "/bin/haunt") + "build") + (mkdir-p #$output) + (copy-recursively "/tmp/gnu.org/software/guix" #$output + #:log (%make-void-port "w")) + (let ((tag (assoc-ref + (call-with-input-file "po/ietf-tags.scm" + (lambda (port) (read port))) + lingua))) + (symlink "guix.html" + (string-append #$output "/" tag "/index.html"))))) + (list #$@%linguas)))))) (computed-file "guix-web-site" build) diff --git a/website/README b/website/README index d3a3a78..ff54053 100644 --- a/website/README +++ b/website/README @@ -24,14 +24,18 @@ commands: #+BEGIN_EXAMPLE $ cd path/to/guix-artwork/website -$ GUIX_WEB_SITE_LOCAL=yes haunt build +$ export GUILE_LOAD_PATH=$(guix build guile-syntax-highlight)/share/guile/site/2.2:$GUILE_LOAD_PATH +$ LC_ALL=en_US.utf8 GUIX_WEB_SITE_LOCAL=yes haunt build $ haunt serve #+END_EXAMPLE -Then, visit http://localhost:8080/guix.html in a web browser. +Then, visit http://localhost:8080/en/guix.html in a web browser. You can stop the server pressing ~Ctrl + C~ twice. +See also the file i18n-howto.txt for information on working with +translations. + * Deploying Like the pages of many GNU websites, this website is managed through diff --git a/website/apps/i18n.scm b/website/apps/i18n.scm new file mode 100644 index 0000000..d88333a --- /dev/null +++ b/website/apps/i18n.scm @@ -0,0 +1,132 @@ +;;; GNU Guix web site +;;; Copyright © 2019 Florian Pelz <pelzflor...@pelzflorian.de> +;;; +;;; This file is part of the GNU Guix web site. +;;; +;;; The GNU Guix web site is free software; you can redistribute it and/or modify it +;;; under the terms of the GNU Affero General Public License as published by +;;; the Free Software Foundation; either version 3 of the License, or (at +;;; your option) any later version. +;;; +;;; The GNU Guix web site is distributed in the hope that it will be useful, but +;;; WITHOUT ANY WARRANTY; without even the implied warranty of +;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +;;; GNU Affero General Public License for more details. +;;; +;;; You should have received a copy of the GNU Affero General Public License +;;; along with the GNU Guix web site. If not, see <http://www.gnu.org/licenses/>. + +(define-module (apps i18n) + #:use-module (haunt asset) + #:use-module (haunt page) + #:use-module (haunt utils) + #:use-module (ice-9 match) + #:use-module (sexp-xgettext) + #:use-module (srfi srfi-1) + #:export (G_ + N_ + C_ + NC_ + %current-ietf-tag + %current-lang + %current-lingua + builder->localized-builder + builders->localized-builders + ietf-tags-file-contents + localized-root-path)) + +(define %gettext-domain + "guix-website") + +(bindtextdomain %gettext-domain (getcwd)) +(bind-textdomain-codeset %gettext-domain "UTF-8") +(textdomain %gettext-domain) + +;; NOTE: The sgettext macros have no hygiene because they use +;; datum->syntax and do not preserve the semantics of anything looking +;; like an sgettext macro. This is an exceptional use case; do not +;; try this at home. + +(define-syntax G_ + sgettext) + +(set-simple-keywords! '(G_)) + +(define-syntax N_ ;like ngettext + sngettext) + +(define-syntax C_ ;like pgettext + spgettext) + +(define-syntax NC_ ;like npgettext + snpgettext) + +(set-complex-keywords! '(N_ C_ NC_)) + +(define %current-lingua + ;; strip the character encoding: + (car (string-split (setlocale LC_ALL) #\.))) + +(define-syntax ietf-tags-file-contents + (identifier-syntax + (force (delay (call-with-input-file + "po/ietf-tags.scm" + (lambda (port) (read port))))))) + + +(define %current-ietf-tag + (or (assoc-ref ietf-tags-file-contents %current-lingua) + "en")) + +(define %current-lang + (car (string-split %current-ietf-tag #\-))) + +(define* (localized-root-path url #:key (lingua %current-ietf-tag)) + "Given a URL as used in a href attribute, return the URL prefix +'builder->localized-builder' would use for the URL when called with +LINGUA." + (if (or (string-suffix? ".html" url) + (string-suffix? "/" url)) + (string-append lingua "/") + "")) + +(define (first-value arg) + "For some reason the builder returned by static-directory returns +multiple values. This procedure is used to retain only the first +return value. TODO: This should not be necessary." + arg) + +(define <asset> + (@@ (haunt asset) <asset>)) + +(define <page> + (@@ (haunt page) <page>)) + +(define (builder->localized-builder builder) + "Return a Haunt builder procedure generated from an existing BUILDER +with translations for the current system locale coming from +sexp-xgettext." + (compose + (lambda (pages-and-assets) + (map + (lambda (page-or-asset) + (match page-or-asset + (($ <page> file-name contents writer) + (let ((new-name (string-append (localized-root-path file-name) + file-name))) + (make-page new-name contents writer))) + (($ <asset> source target) + (let ((new-name (string-append (localized-root-path target) target))) + (make-asset source new-name))))) + pages-and-assets)) + (lambda (site posts) + (first-value (builder site posts))))) + +(define (builders->localized-builders builders) + "Return a list of new Haunt builder procedures generated from +BUILDERS and localized via sexp-xgettext for the current system +locale." + (flatten + (map-in-order + builder->localized-builder + builders))) diff --git a/website/haunt.scm b/website/haunt.scm index 0cb7177..01e2af7 100644 --- a/website/haunt.scm +++ b/website/haunt.scm @@ -5,22 +5,25 @@ (use-modules ((apps base builder) #:prefix base:) ((apps blog builder) #:prefix blog:) ((apps download builder) #:prefix download:) + (apps i18n) ((apps media builder) #:prefix media:) ((apps packages builder) #:prefix packages:) (haunt asset) (haunt builder assets) (haunt reader) (haunt reader commonmark) - (haunt site)) - + (haunt site) + (ice-9 rdelim) + (srfi srfi-1)) (site #:title "GNU Guix" #:domain "https://guix.gnu.org" #:build-directory "/tmp/gnu.org/software/guix" #:readers (list sxml-reader html-reader commonmark-reader) - #:builders (list base:builder - blog:builder - download:builder - media:builder - packages:builder - (static-directory "static"))) + #:builders (builders->localized-builders + (list base:builder + blog:builder + download:builder + media:builder + packages:builder + (static-directory "static")))) diff --git a/website/i18n-howto.txt b/website/i18n-howto.txt new file mode 100644 index 0000000..54f85f0 --- /dev/null +++ b/website/i18n-howto.txt @@ -0,0 +1,86 @@ +With sexp-xgettext, arbitrary s-expressions can be marked for +translation (not only strings like with normal xgettext). + +S-expressions can be marked with G_ (simple marking for translation), +N_ (“complex” marking with different forms depending on number like +ngettext), C_ (“complex” marking distinguished from other markings by +a msgctxt like pgettext) or NC_ (mix of both). + +Marking a string for translation behaves like normal gettext. Marking +a parenthesized expression (i.e. a list or procedure call) extracts +each string from the parenthesized expression. If a symbol, keyword +or other parenthesized expression occurs between the strings, it is +extracted as an XML element. Expressions before or after all strings +are not extracted. If strings from a parenthesized sub-expression +shall be extracted too, the sub-expression must again be marked with +G_ unless it is the only sub-expression or it follows a quote, +unquote, quasiquote or unquote-splicing. The order of XML elements +can be changed in the translation to produce a different ordering +inside a parenthesized expression. If a string shall not be extracted +from a marked expression, it must be wrapped, for example by a call to +the identity procedure. Be careful when marking non-SHTML content +such as procedure calls for translation: Additional strings will be +inserted between non-string elements. + +Known issues: + +* Line numbers are sometimes off. + +* Some less important other TODOs in the comments. + +===== + +The following commands are an example of the translation for locale +de_DE. Adapt as necessary. We assume the software requirements +mentioned in the README are installed. + +To create a pot file: + +guile scripts/sexp-xgettext.scm -f po/POTFILES \ + -o po/guix-website.pot \ + --from-code=UTF-8 \ + --copyright-holder="Ludovic Courtès" \ + --package-name="guix-website" \ + --msgid-bugs-address="l...@gnu.org" \ + --keyword=G_ \ + --keyword=N_:1,2 \ + --keyword=C_:1c,2 \ + --keyword=NC_:1c,2,3 + +To create a po file from a pot file, do the usual: + +cd po +msginit -l de --no-translator + +To merge an existing po file with a new pot file: + +cd po +msgmerge --previous -U de.po guix-website.pot + +To update mo files: + +mkdir -p de/LC_MESSAGES +cd po +msgfmt de.po +cd .. +mv po/messages.mo de/LC_MESSAGES/guix-website.mo + +To build all languages: + +guix build -f .guix.scm + +To test the de_DE translation, update its mo file as above, then: + +guix environment --ad-hoc haunt +LC_ALL=de_DE.utf8 \ + GUILE_LOAD_PATH=$(guix build guile-syntax-highlight)/share/guile/site/2.2:$GUILE_LOAD_PATH \ + GUIX_WEB_SITE_LOCAL=yes \ + haunt build +GUILE_LOAD_PATH=$(guix build guile-syntax-highlight)/share/guile/site/2.2:$GUILE_LOAD_PATH \ + haunt serve + +For checking for errors / debugging newly marked files you can try: + +GUILE_LOAD_PATH=.:$(guix build haunt)/share/guile/site/2.2:\ +$(guix build guile-syntax-highlight)/share/guile/site/2.2:$GUILE_LOAD_PATH \ + guile apps/base/templates/about.scm # an example for debugging about.scm diff --git a/website/po/LINGUAS b/website/po/LINGUAS new file mode 100644 index 0000000..d4dd759 --- /dev/null +++ b/website/po/LINGUAS @@ -0,0 +1,3 @@ +# Translation with sexp-xgettext requires the full LL_CC locale name +# to be specified. +en_US diff --git a/website/po/POTFILES b/website/po/POTFILES new file mode 100644 index 0000000..e538f84 --- /dev/null +++ b/website/po/POTFILES @@ -0,0 +1,36 @@ +# high-priority files that should come first in the PO file +apps/base/utils.scm +apps/base/templates/home.scm +apps/base/templates/theme.scm +apps/base/templates/components.scm +apps/base/templates/about.scm +apps/base/data.scm +apps/base/templates/help.scm +# other files +apps/base/templates/contact.scm +apps/base/templates/contribute.scm +apps/base/templates/donate.scm +apps/base/templates/graphics.scm +apps/base/templates/irc.scm +apps/base/templates/menu.scm +apps/base/templates/security.scm +apps/blog/templates/components.scm +apps/blog/templates/feed.scm +apps/blog/templates/post-list.scm +apps/blog/templates/post.scm +apps/blog/templates/tag.scm +apps/download/data.scm +apps/download/templates/components.scm +apps/download/templates/download.scm +apps/media/data.scm +apps/media/templates/components.scm +apps/media/templates/screenshot.scm +apps/media/templates/screenshots-overview.scm +apps/media/templates/video.scm +apps/media/templates/video-list.scm +apps/packages/templates/components.scm +apps/packages/templates/detailed-index.scm +apps/packages/templates/detailed-package-list.scm +apps/packages/templates/index.scm +apps/packages/templates/package-list.scm +apps/packages/templates/package.scm diff --git a/website/po/ietf-tags.scm b/website/po/ietf-tags.scm new file mode 100644 index 0000000..8102a49 --- /dev/null +++ b/website/po/ietf-tags.scm @@ -0,0 +1,9 @@ +;;; This file contains an association list for each translation from +;;; the locale to an IETF language tag to be used in the URL path of +;;; translated pages. The language tag results from the translation +;;; team’s language code from +;;; <https://translationproject.org/team/index.html>. The underscore +;;; in the team’s code is replaced by a hyphen. For example, az would +;;; be used for the Azerbaijani language (not az-Latn) and zh-CN would +;;; be used for mainland Chinese (not zh-Hans-CN). +(("en_US" . "en")) diff --git a/website/scripts/sexp-xgettext.scm b/website/scripts/sexp-xgettext.scm new file mode 100644 index 0000000..aba527f --- /dev/null +++ b/website/scripts/sexp-xgettext.scm @@ -0,0 +1,830 @@ +;;; GNU Guix web site +;;; Copyright © 2019 Florian Pelz <pelzflor...@pelzflorian.de> +;;; +;;; This file is part of the GNU Guix web site. +;;; +;;; The GNU Guix web site is free software; you can redistribute it and/or modify it +;;; under the terms of the GNU Affero General Public License as published by +;;; the Free Software Foundation; either version 3 of the License, or (at +;;; your option) any later version. +;;; +;;; The GNU Guix web site is distributed in the hope that it will be useful, but +;;; WITHOUT ANY WARRANTY; without even the implied warranty of +;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +;;; GNU Affero General Public License for more details. +;;; +;;; You should have received a copy of the GNU Affero General Public License +;;; along with the GNU Guix web site. If not, see <http://www.gnu.org/licenses/>. + +(use-modules (ice-9 getopt-long) + (ice-9 match) + (ice-9 peg) + (ice-9 receive) + (ice-9 regex) + (ice-9 textual-ports) + (srfi srfi-1) ;lists + (srfi srfi-9) ;records + (srfi srfi-19) ;date + (srfi srfi-26)) ;cut + +;;; This script imitates xgettext, but combines nested s-expressions +;;; in the input Scheme files to a single msgstr in the PO file. It +;;; works by first reading the keywords specified on the command-line, +;;; then dealing with the remaining options using (ice-9 getopt-long). +;;; Then, it parses each Scheme file in the POTFILES file specified +;;; with --files-from and constructs po entries from it. For parsing, +;;; a PEG is used instead of Scheme’s read, because we can extract +;;; comments with it. The po entries are written to the PO file +;;; specified with the --output option. Scheme code can then use the +;;; (sexp-xgettext) module to deconstruct the msgids looked up in the +;;; PO file via gettext. + +(define-record-type <keyword-spec> + (make-keyword-spec id sg pl c total xcomment) + keyword-spec? + (id keyword-spec-id) ;identifier + (sg keyword-spec-sg) ;arg with singular + (pl keyword-spec-pl) ;arg with plural + (c keyword-spec-c) ;arg with msgctxt or 'mixed if sg is mixed msgctxt|singular + (total keyword-spec-total) ;total number of args + (xcomment keyword-spec-xcomment)) + +(define (complex-keyword-spec? keyword-spec) + "Return for a keyword passed on the command-line whether it is +complex, i.e. whether occurrences inside another marked expression may +be part of that other expression. See i18n-howto.txt." + (match keyword-spec + (($ <keyword-spec> _ _ #f #f _ #f) #f) + (else #t))) + +(define %keyword-specs + ;; List of valid xgettext keyword options. + ;; Read keywords from command-line options. + (let loop ((opts (cdr (command-line)));command-line options from + ;which to extract --keyword + ;options + (remaining-opts '()) ;unhandled opts + (specs '())) + (define (string->integer str) + (if (string-match "[0-9]+" str) + (string->number str) + (error "Not a decimal integer."))) + (define* (argnums->spec id #:optional (argnums '())) + (let loop ((sg #f) + (pl #f) + (c #f) + (total #f) + (xcomment #f) + (argnums argnums)) + (match argnums + (() (make-keyword-spec id + (if sg sg 1) + pl + c + total + xcomment)) + ((arg . argnums) + (cond + ((string-suffix? "c" arg) + (cond (c (error "c suffix clashes")) + (else + (let* ((number-str (string-drop-right arg 1)) + (number (string->integer number-str))) + (loop sg pl number total xcomment argnums))))) + ((string-suffix? "g" arg) + (cond + (sg (error "Only first argnum can have g suffix.")) + (c (error "g suffix clashes.")) + (else + (let* ((number-str (string-drop-right arg 1)) + (number (string->integer number-str))) + (loop number #f 'mixed total xcomment argnums))))) + ((string-suffix? "t" arg) + (cond (total (error "t suffix clashes")) + (else + (let* ((number-str (string-drop-right arg 1)) + (number (string->integer number-str))) + (loop sg pl c number xcomment argnums))))) + ((string-suffix? "\"" arg) + (cond (xcomment (error "xcomment clashes")) + (else + (let* ((comment (substring arg + 1 + (- (string-length arg) 1)))) + (loop sg pl c total comment argnums))))) + (else + (let* ((number (string->integer arg))) + (if sg + (if pl + (error "Too many argnums.") + (loop sg number c total xcomment argnums)) + (loop number #f c total xcomment argnums))))))))) + + (define (string->spec str) ;see `info xgettext` + (match (string-split str #\:) + ((id) (argnums->spec id)) + ((id argnums) + (argnums->spec id (string-split argnums #\,))))) + (match opts + (() (begin + ;; remove recognized --keyword command-line options: + (set-program-arguments (cons (car (command-line)) + (reverse remaining-opts))) + specs)) + ((current-opt . rest) + (cond + ((string=? "--" current-opt) specs) + ((string-prefix? "--keyword=" current-opt) + (let ((keyword (string-drop current-opt (string-length "--keyword=")))) + (loop rest remaining-opts (cons (string->spec keyword) specs)))) + ((or (string=? "--keyword" current-opt) + (string=? "-k" current-opt)) + (let ((next-opt (car rest))) + (loop (cdr rest) + remaining-opts + (cons (string->spec next-opt) specs)))) + (else (loop rest (cons current-opt remaining-opts) specs))))))) + +;;; Other options are not repeated, so we can use getopt-long: + +(define %options ;; Corresponds to what is documented at `info xgettext`. + (let ((option-spec + `((files (single-char #\f) (value #t)) + (directory (single-char #\D) (value #t)) + (default-domain (single-char #\d) (value #t)) + (output (single-char #\o) (value #t)) + (output-dir (single-char #\p) (value #t)) + (from-code (value #t)) + (join-existing (single-char #\j) (value #f)) + (exclude-file (single-char #\x) (value #t)) + (add-comments (single-char #\c) (value #t)) + + ;; Because getopt-long does not support repeated options, + ;; we took care of --keyword options further up. + ;; (keyword (single-char #\k) (value #t)) + + (flag (value #t)) + (force-po (value #f)) + (indent (single-char #\i) (value #f)) + (no-location (value #f)) + (add-location (single-char #\n) (value #t)) + (width (single-char #\w) (value #t)) + (no-wrap (value #f)) + (sort-output (single-char #\s) (value #f)) + (sort-by-file (single-char #\F) (value #f)) + (omit-header (value #f)) + (copyright-holder (value #t)) + (foreign-user (value #f)) + (package-name (value #t)) + (package-version (value #t)) + (msgid-bugs-address (value #t)) + (msgstr-prefix (single-char #\m) (value #t)) + (msgstr-suffix (single-char #\m) (value #t)) + (help (value #f)) + (pack (value #f))))) + (getopt-long (command-line) option-spec))) + + +(define parse-scheme-file + ;; This procedure parses FILE and returns a parse tree. + (let () + ;;TODO: Optionally ignore case. + (define-peg-pattern NL all "\n") + (define-peg-pattern comment all (and ";" + (* (and peg-any + (not-followed-by NL))) + (and peg-any (followed-by NL)))) + (define-peg-pattern empty none (or " " "\t")) + (define-peg-pattern whitespace body (or empty NL)) + (define-peg-pattern quotation body (or "'" "`" "," ",@")) + ;TODO: Allow user to specify + ;other quote reader macros to + ;be ignored and also ignore + ;quote spelled out without + ;reader macro. + (define-peg-pattern open body (and (? quotation) + (or "(" "[" "{"))) + (define-peg-pattern close body (or ")" "]" "}")) + (define-peg-pattern string body (and (followed-by "\"") + (* (or "\\\"" + (and (or NL peg-any) + (not-followed-by "\"")))) + (and (or NL peg-any) + (followed-by "\"")) + "\"")) + (define-peg-pattern token all (or string + (and + (not-followed-by open) + (not-followed-by close) + (not-followed-by comment) + (* (and peg-any + (not-followed-by open) + (not-followed-by close) + (not-followed-by comment) + (not-followed-by string) + (not-followed-by whitespace))) + (or + (and peg-any (followed-by open)) + (and peg-any (followed-by close)) + (and peg-any (followed-by comment)) + (and peg-any (followed-by string)) + (and peg-any (followed-by whitespace)) + (not-followed-by peg-any))))) + (define-peg-pattern list all (or (and (? quotation) "(" program ")") + (and (? quotation) "[" program "]") + (and (? quotation) "{" program "}"))) + (define-peg-pattern t-or-s body (or token list)) + (define-peg-pattern program all (* (or whitespace + comment + t-or-s))) + (lambda (file) + (call-with-input-file file + (lambda (port) + ;; It would be nice to match port directly without + ;; converting to a string first, but apparently guile cannot + ;; do that yet. + (let ((string (get-string-all port))) + (peg:tree (match-pattern program string)))))))) + + +(define-record-type <po-entry> + (make-po-entry ecomments ref flags ctxt id idpl) + po-entry? +;;; irrelevant: (tcomments po-entry-tcomments) ;translator-comments + (ecomments po-entry-ecomments) ;extracted-comments + (ref po-entry-ref) ;reference + (flags po-entry-flags) +;;; irrelevant: (prevctxt po-entry-prevctxt) ;previous-ctxt +;;; irrelevant: (prev po-entry-prev) ;previous-translation + (ctxt po-entry-ctxt) ;msgctxt + (id po-entry-id) ;msgid + (idpl po-entry-idpl) ;msgid-plural +;;; irrelevant: (str po-entry-str) ;msgstr string or association list +;;; ;integer to string + ) + +(define (po-equal? po1 po2) + "Return whether PO1 and PO2 have equal ctxt, id and idpl." + (and (equal? (po-entry-ctxt po1) (po-entry-ctxt po2)) + (equal? (po-entry-id po1) (po-entry-id po2)) + (equal? (po-entry-idpl po1) (po-entry-idpl po2)))) + +(define (combine-duplicate-po-entries list) + "Return LIST with duplicate po entries replaced by a single PO entry +with both refs." + (let loop ((remaining list)) + (match remaining + (() '()) + ((head . tail) + (receive (before from) + (break (cut po-equal? head <>) tail) + (cond + ((null? from) (cons head (loop tail))) + (else + (loop + (cons + (match head + (($ <po-entry> ecomments1 ref1 flags ctxt id idpl) + (match (car from) + (($ <po-entry> ecomments2 ref2 _ _ _ _) + (let ((ecomments (if (or ecomments1 ecomments2) + (append (or ecomments1 '()) + (or ecomments2 '())) + #f)) + (ref (if (or ref1 ref2) + (string-join + (cons + (or ref1 "") + (cons + (or ref2 "") + '()))) + #f))) + (make-po-entry ecomments ref flags ctxt id idpl)))))) + (append before (cdr from))))))))))) + +(define (write-po-entry po-entry) + (define (prepare-text text) + "If TEXT is false, return #f. Otherwise correct the formatting of +TEXT by escaping backslashes and newlines and enclosing TEXT in +quotes. Note that Scheme’s write is insufficient because it would +escape far more. TODO: Strings should be wrappable to a maximum line +width." + (and text + (string-append "\"" + (with-output-to-string + (lambda () + (call-with-input-string text + (lambda (port) + (let loop ((c (get-char port))) + (unless (eof-object? c) + (case c + ((#\\) (display "\\")) + ((#\newline) (display "\\n")) + (else (write-char c))) + (loop (get-char port)))))))) + "\""))) + (define (write-component c prefix) + (when c + (begin (display prefix) + (display " ") + (display c) + (newline)))) + (match po-entry + (($ <po-entry> ecomments ref flags ctxt id idpl) + (let ((prepared-ctxt (prepare-text ctxt)) + (prepared-id (prepare-text id)) + (prepared-idpl (prepare-text idpl))) + (when ecomments + (for-each + (lambda (line) + (write-component line "#.")) + (reverse ecomments))) + (write-component ref "#:") + (write-component (and flags (string-join flags ", ")) "#,") + (write-component prepared-ctxt "msgctxt") + (write-component prepared-id "msgid") + (write-component prepared-idpl "msgid_plural") + (if idpl + (begin + (display "msgstr[0] \"\"") + (newline) + (display "msgstr[1] \"\"")) + (display "msgstr \"\"")) + (newline))))) + +;; Extraction of TRANSLATORS comments: + +(define %comments-line + (make-parameter #f)) + +(define %ecomments-string + (make-parameter #f)) + +(define (update-ecomments-string! str) + "Sets the value of the parameter object %ecomments-string if str is +an ecomments string. An ecomments string is extracted from a comment +because it starts with TRANSLATORS or a key specified with +--add-comments." ;TODO: Support for other keys is missing. + (cond + ((not str) (%ecomments-string #f)) + ((= (1+ (or (%comments-line) + -42)) ;arbitrary unequal initial value + (or (%line-number) 0)) + (let ((m (string-match ";+[ \t]*(.*)" str))) + (when m + (%comments-line (%line-number)) + (%ecomments-string + (if (%ecomments-string) + (cons (match:substring m 1) (%ecomments-string)) + (list (match:substring m 1))))))) + (else + (let ((m (string-match ";+[ \t]*(TRANSLATORS:.*)" str))) + (if m + (begin + (%comments-line (%line-number)) + (%ecomments-string + (if (%ecomments-string) + (cons (match:substring m 1) (%ecomments-string)) + (list (match:substring m 1))))) + (%ecomments-string '#f)))))) + +(define %file-name + (make-parameter #f)) + +(define (update-file-name! name) + "Sets the value of the parameter object %file-name to NAME." + (%file-name name)) + +(define %old-line-number + (make-parameter #f)) + +(define (update-old-line-number! number) + "Sets the value of the parameter object %old-line-number to NUMBER." + (%old-line-number number)) + +(define %line-number + (make-parameter #f)) + +(define (update-line-number! number) + "Sets the value of the parameter object %line-number to NUMBER." + (%line-number number)) + +(define (incr-line-number!) + "Increments the value of the parameter object %line-number by 1." + (%line-number (1+ (%line-number)))) + +(define (incr-line-number-for-each-nl! list) + "Increments %line-number once for each NL recursively in LIST. Does +nothing if LIST is no list but e.g. an empty 'program." + (when (list? list) + (for-each + (lambda (part) + (match part + ('NL (incr-line-number!)) + ((? list?) (incr-line-number-for-each-nl! part)) + (else #f))) + list))) + +(define (current-ref) + "Return the location field for a PO entry." + (let ((add (option-ref %options 'add-location 'full))) + (cond + ((option-ref %options 'no-location #f) #f) + ((eq? add 'full) + (string-append (%file-name) ":" (number->string (%line-number)))) + ((eq? add 'file) + (%file-name)) + ((eq? add 'never) + #f)))) + +(define (make-simple-po-entry msgid) + (let ((po (make-po-entry + (%ecomments-string) + (current-ref) + #f ;TODO: Use scheme-format for format strings? + #f ;no ctxt + msgid + #f))) + (update-ecomments-string! #f) + po)) + + +(define (matching-keyword id) + "Return the keyword-spec whose identifier is the same as ID, or #f +if ID is no string or no such keyword-spec exists." + (and (symbol? id) + (let ((found (member (symbol->string id) + %keyword-specs + (lambda (id spec) + (string=? id (keyword-spec-id spec)))))) + (and found (car found))))) + +(define (nth-exp program n) + "Return the Nth 'token or 'list inside the PROGRAM parse tree or #f +if no tokens or lists exist." + (let loop ((i 0) + (rest program)) + (define (on-hit exp) + (if (= i n) exp + ;; else: + (loop (1+ i) (cdr rest)))) + (match rest + (() #f) + ((('token . _) . _) (on-hit (car rest))) + ((('list open-paren exp close-paren) . _) (on-hit (car rest))) + ((_ . _) (loop i (cdr rest))) + (else #f)))) + +(define (more-than-one-exp? program) + "Return true if PROGRAM consiste of more than one expression." + (if (matching-keyword (token->string-symbol-or-keyw (nth-exp program 0))) + (nth-exp program 2) ;if there is third element, keyword does not count + (nth-exp program 1))) + +(define (token->string-symbol-or-keyw tok) + "For a parse tree TOK, if it is a 'token parse tree, return its +value as a string, symbol or #:-keyword, otherwise return #f." + (match tok + (('token (parts ...) . remaining) + ;; This is a string with line breaks in it. + (with-input-from-string + (string-append + (apply string-append + (map-in-order + (lambda (part) + (match part + (('NL _) + (begin (incr-line-number!) + "\n")) + (else part))) + parts)) + (car remaining)) + (lambda () + (read)))) + (('token exp) + (with-input-from-string exp + (lambda () + (read)))) + (else #f))) + +(define (complex-marked-list->po-entries parse-tree) + "Check if PARSE-TREE is marked by a keyword. If yes, for a complex +keyword spec, return a list of po-entries for it. For a simple +keyword spec, return the argument number of its singular form. +Otherwise return #f." + (let* ((first (nth-exp parse-tree 0)) + (spec (matching-keyword (token->string-symbol-or-keyw first)))) + (if spec + (if ;if the identifier of a complex keyword occurs first + (complex-keyword-spec? spec) + ;; then make po entries for it + (match spec + (($ <keyword-spec> id sg pl c total xcomment) + (if (eq? c 'mixed) ; if msgctxt and singular msgid are in one string + (let* ((exp (nth-exp parse-tree sg)) + (val (token->string-symbol-or-keyw exp)) + (idx (if (string? val) (string-rindex val #\|)))) + (list + (let ((po (make-po-entry + (%ecomments-string) + (current-ref) + #f ;TODO: Use scheme-format for format strings? + (string-take val idx) + (string-drop val (1+ idx)) + #f))) ;plural forms are unsupported here + (update-ecomments-string! #f) + po))) + ;; else construct msgids + (receive (pl-id pl-entries) + (match pl + (#f (values #f '())) + (else (construct-msgid-and-po-entries + (nth-exp parse-tree pl)))) + (receive (sg-id sg-entries) + (construct-msgid-and-po-entries + (nth-exp parse-tree sg)) + (cons + (let ((po (make-po-entry + (%ecomments-string) + (current-ref) + #f ;TODO: Use scheme-format for format strings? + (and c (token->string-symbol-or-keyw + (nth-exp parse-tree c))) + sg-id + pl-id))) + (update-ecomments-string! #f) + po) + (append sg-entries pl-entries))))))) + ;; else if it is a simple keyword, return the argnum: + (keyword-spec-sg spec)) + ;; if no keyword occurs, then false + #f))) + +(define (construct-po-entries parse-tree) + "Converts a PARSE-TREE resulting from a call to parse-scheme-file to +a list of po-entry records. Unlike construct-msgid-and-po-entries, +strings are not collected to a msgid. The list of po-entry records is +the return value." + (let ((entries (complex-marked-list->po-entries parse-tree))) + (cond + ((list? entries) entries) + ((number? entries) ;parse-tree yields a single, simple po entry + (update-old-line-number! (%line-number)) + (receive (id entries) + (construct-msgid-and-po-entries + (nth-exp parse-tree entries)) + (update-line-number! (%old-line-number)) + (let ((po (make-simple-po-entry id))) + (incr-line-number-for-each-nl! parse-tree) + (cons po entries)))) + (else ;search for marked translations in parse-tree + (match parse-tree + (() '()) + (('comment str) (begin + (update-ecomments-string! str) + '())) + (('NL _) (begin (incr-line-number!) '())) + (('token . _) (begin (incr-line-number-for-each-nl! parse-tree) '())) + (('list open-paren program close-paren) + (construct-po-entries program)) + (('program . components) + (append-map construct-po-entries components)) + ;; Note: PEG compresses empty programs to non-lists: + ('program + '())))))) + +(define* (tag counter prefix #:key (flavor 'start)) + "Formats the number COUNTER as a tag according to FLAVOR, which is +either 'start, 'end or 'empty for a start, end or empty tag, +respectively." + (string-append "<" + (if (eq? flavor 'end) "/" "") + prefix + (number->string counter) + (if (eq? flavor 'empty) "/" "") + ">")) + +(define-record-type <construct-fold-state> + (make-construct-fold-state msgid-string maybe-part counter po-entries) + construct-fold-state? + ;; msgid constructed so far; #f if none, "" if only empty string: + (msgid-string construct-fold-state-msgid-string) + ;; only append this if string follows: + (maybe-part construct-fold-state-maybe-part) + ;; counter for next tag: + (counter construct-fold-state-counter) + ;; complete po entries from marked sub-expressions: + (po-entries construct-fold-state-po-entries)) + +(define* (construct-msgid-and-po-entries parse-tree + #:optional + (prefix "")) + "Like construct-po-entries, but with two return values. The first +is an accumulated msgid constructed from all components in PARSE-TREE +for use in make-po-entry. Non-strings are replaced by tags containing +PREFIX. The second return value is a list of po entries for +sub-expressions marked with a complex keyword spec." + (match parse-tree + (() (values "" '())) + ;; Note: PEG compresses empty programs to non-lists: + ('program (values "" '())) + (('comment str) (begin + (update-ecomments-string! str) + (values "" '()))) + (('NL _) (begin (incr-line-number!) + (error "Program consists only of line break." + `(,(%file-name) ,(%line-number))))) + (('token . _) + (let ((maybe-string (token->string-symbol-or-keyw parse-tree))) + (if (string? maybe-string) + (values maybe-string '()) + (error "Single symbol marked for translation." + `(,maybe-string ,(%file-name) ,(%line-number)))))) + (('list open-paren program close-paren) + ;; parse program instead + (construct-msgid-and-po-entries program prefix)) + (('program (? matching-keyword)) + (error "Double-marked for translation." + `(,parse-tree ,(%file-name) ,(%line-number)))) + (('program . components) + ;; Concatenate strings in parse-tree to a new msgid and add an + ;; <x> tag for each list in between. + (match + (fold + (lambda (component prev-state) + (match prev-state + (($ <construct-fold-state> msgid-string maybe-part + counter po-entries) + (match component + (('comment str) (begin (update-ecomments-string! str) + prev-state)) + (('NL _) (begin (incr-line-number!) + prev-state)) + (('token . _) + (let ((maybe-string (token->string-symbol-or-keyw component))) + (cond + ((string? maybe-string) + ;; if string, append maybe-string to previous msgid + (make-construct-fold-state + (string-append (or msgid-string "") + maybe-part maybe-string) + "" + counter + po-entries)) + ((and (more-than-one-exp? components) ;not the only symbol + (or (not msgid-string) ;no string so far + (string-suffix? ">" msgid-string))) ;tag before + prev-state) ;then ignore + (else ;append tag representing the token + (make-construct-fold-state + msgid-string + (string-append + maybe-part + (tag counter prefix #:flavor 'empty)) + (1+ counter) + po-entries))))) + (('list open-paren program close-paren) + (let ((first (nth-exp program 0))) + (incr-line-number-for-each-nl! list) + (match (complex-marked-list->po-entries program) + ((? list? result) + (make-construct-fold-state + msgid-string + (string-append + maybe-part + (tag counter prefix #:flavor 'empty)) + (1+ counter) + (append result po-entries))) + (result + (cond + ((number? result) + (receive (id entries) + (construct-msgid-and-po-entries + (nth-exp program result) + (string-append prefix + (number->string counter) + ".")) + (make-construct-fold-state + (string-append (or msgid-string "") + maybe-part + (tag counter prefix + #:flavor 'start) + id + (tag counter prefix + #:flavor 'end)) + "" + (1+ counter) + (append entries po-entries)))) + ((not (more-than-one-exp? components)) + ;; Singletons do not need to be marked. + (receive (id entries) + (construct-msgid-and-po-entries + program + prefix) + (make-construct-fold-state + id + "" + counter + (append entries po-entries)))) + (else ;unmarked list + (if (not msgid-string) + ;; then ignore + prev-state + ;; else: + (make-construct-fold-state + msgid-string + (string-append + maybe-part + (tag counter prefix #:flavor 'empty)) + (1+ counter) + po-entries)))))))))))) + (make-construct-fold-state #f "" 1 '()) + components) + (($ <construct-fold-state> msgid-string maybe-part counter po-entries) + (values (or msgid-string + (error "Marking for translation yields empty msgid." + %file-name %line-number)) + po-entries)))))) + +(define scheme-file->po-entries + (compose construct-po-entries + parse-scheme-file)) + +(define %files-from-port + (let ((files-from (option-ref %options 'files #f))) + (if files-from + (open-input-file files-from) + (current-input-port)))) + +(define %source-files + (let loop ((line (get-line %files-from-port)) + (source-files '())) + (if (eof-object? line) + (begin + (close-port %files-from-port) + source-files) + ;; else read file names before comment + (let ((before-comment (car (string-split line #\#)))) + (loop (get-line %files-from-port) + (append + (map match:substring (list-matches "[^ \t]+" before-comment)) + source-files)))))) + +(define %output-po-entries + (fold (lambda (scheme-file po-entries) + (begin + (update-file-name! scheme-file) + (update-line-number! 1) + (update-old-line-number! #f) + (%comments-line #f) + (append (scheme-file->po-entries scheme-file) + po-entries))) + '() + %source-files)) + +(define %output-port + (let ((output (option-ref %options 'output #f)) + (domain (option-ref %options 'default-domain #f))) + (cond + (output (open-output-file output)) + (domain (open-output-file (string-append domain ".po"))) + (else (open-output-file "messages.po"))))) + +(with-output-to-port %output-port + (lambda () + (let ((copyright (option-ref %options 'copyright-holder + "THE PACKAGE'S COPYRIGHT HOLDER")) + (package (option-ref %options 'package-name "PACKAGE")) + (version (option-ref %options 'package-version #f)) + (bugs-email (option-ref %options 'msgid-bugs-address ""))) + (display "# SOME DESCRIPTIVE TITLE.\n") + (display (string-append "# Copyright (C) YEAR " copyright "\n")) + (display (string-append "# This file is distributed under the same \ +license as the " package " package.\n")) + (display "# FIRST AUTHOR <EMAIL@ADDRESS>, YEAR.\n") + (display "#\n") + (write-po-entry (make-po-entry #f #f '("fuzzy") #f "" #f)) + (display (string-append "\"Project-Id-Version: " + package + (if version + (string-append " " version) + "") + "\\n\"\n")) + (display (string-append "\"Report-Msgid-Bugs-To: " + bugs-email + "\\n\"\n")) + (display (string-append "\"POT-Creation-Date: " + (date->string (current-date) "~1 ~H:~M~z") + "\\n\"\n")) + (display "\"PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\\n\"\n") + (display "\"Last-Translator: FULL NAME <EMAIL@ADDRESS>\\n\"\n") + (display "\"Language-Team: LANGUAGE <l...@li.org>\\n\"\n") + (display "\"Language: \\n\"\n") + (display "\"MIME-Version: 1.0\\n\"\n") + (display "\"Content-Type: text/plain; charset=UTF-8\\n\"\n") + (display "\"Content-Transfer-Encoding: 8bit\\n\"\n") + (for-each (lambda (po-entry) + (begin + (newline) + (write-po-entry po-entry))) + (combine-duplicate-po-entries %output-po-entries))))) diff --git a/website/sexp-xgettext.scm b/website/sexp-xgettext.scm new file mode 100644 index 0000000..71ef4a9 --- /dev/null +++ b/website/sexp-xgettext.scm @@ -0,0 +1,530 @@ +;;; GNU Guix web site +;;; Copyright © 2019 Florian Pelz <pelzflor...@pelzflorian.de> +;;; +;;; This file is part of the GNU Guix web site. +;;; +;;; The GNU Guix web site is free software; you can redistribute it and/or modify it +;;; under the terms of the GNU Affero General Public License as published by +;;; the Free Software Foundation; either version 3 of the License, or (at +;;; your option) any later version. +;;; +;;; The GNU Guix web site is distributed in the hope that it will be useful, but +;;; WITHOUT ANY WARRANTY; without even the implied warranty of +;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +;;; GNU Affero General Public License for more details. +;;; +;;; You should have received a copy of the GNU Affero General Public License +;;; along with the GNU Guix web site. If not, see <http://www.gnu.org/licenses/>. + +(define-module (sexp-xgettext) + #:use-module (ice-9 match) + #:use-module (ice-9 peg) + #:use-module (ice-9 rdelim) + #:use-module (ice-9 receive) + #:use-module (ice-9 regex) + #:use-module (srfi srfi-1) ;lists + #:use-module (srfi srfi-9) ;records + #:export (set-complex-keywords! + set-simple-keywords! + sgettext + sngettext + spgettext + snpgettext + %linguas)) + +(define %complex-keywords + ;; Use set-complex-keywords! to change this to a list of keywords + ;; for sexp-xgettext functions other than sgettext. + (make-parameter '())) + +(define (set-complex-keywords! kw) + (%complex-keywords kw)) + +(define %simple-keywords + ;; Use set-simple-keywords! to change this to a list of keywords + ;; for sgettext. + (make-parameter '())) + +(define (set-simple-keywords! kw) + (%simple-keywords kw)) + +(define (gettext-keyword? id) + (or (member id (%complex-keywords)) + (member id (%simple-keywords)))) + +;;COPIED FROM scripts/sexp-xgettext.scm: +(define* (tag counter prefix #:key (flavor 'start)) + "Formats the number COUNTER as a tag according to FLAVOR, which is +either 'start, 'end or 'empty for a start, end or empty tag, +respectively." + (string-append "<" + (if (eq? flavor 'end) "/" "") + prefix + (number->string counter) + (if (eq? flavor 'empty) "/" "") + ">")) +;;END COPIED FROM scripts/sexp-xgettext.scm + +;;ADAPTED FROM scripts/sexp-xgettext.scm +(define-record-type <construct-fold-state> + (make-construct-fold-state msgid-string maybe-part counter) + construct-fold-state? + ;; msgid constructed so far; #f if none, "" if only empty string + (msgid-string construct-fold-state-msgid-string) + ;; only append this if string follows: + (maybe-part construct-fold-state-maybe-part) + ;; counter for next tag: + (counter construct-fold-state-counter)) +;;END ADAPTED FROM scripts/sexp-xgettext.scm + +(define (sexp->msgid exp) + "Return the msgid as constructed by construct-msgid-and-po-entries +in scripts/sexp-xgettext.scm from the expression EXP." + (let loop ((exp exp) + (prefix "")) + (match exp + (() "") + ((or ('quote inner-exp) + ('quasiquote inner-exp) + ('unquote inner-exp) + ('unquote-splicing inner-exp)) + (loop inner-exp prefix)) + ((first-component . components) + (cond + ((gettext-keyword? first-component) + (error "Double-marked for translation." exp)) + (else + (or + (construct-fold-state-msgid-string + (fold + (lambda (component prev-state) + (match prev-state + (($ <construct-fold-state> msgid-string maybe-part counter) + (let inner-loop ((exp component)) + (match exp + ((or (? symbol?) (? keyword?)) + (if (not msgid-string) + ;; ignore symbols at the beginning + prev-state + ;; else make a tag for the symbol + (make-construct-fold-state + msgid-string + (string-append maybe-part + (tag counter prefix #:flavor 'empty)) + (1+ counter)))) + ((? string?) + (make-construct-fold-state + (string-append (or msgid-string "") + maybe-part exp) + "" counter)) + ((? list?) + (match exp + (() ;ignore empty list + prev-state) + ((or (singleton) + ('quote singleton) + ('quasiquote singleton) + ('unquote singleton) + ('unquote-splicing singleton)) + (inner-loop singleton)) + ((components ...) + (cond + ((and (not (null? components)) + (member (car components) (%simple-keywords))) + ;; if marked for translation, insert inside tag + (make-construct-fold-state + (string-append (or msgid-string "") + maybe-part + (tag counter prefix #:flavor 'start) + (loop (cadr components) + (string-append + prefix + (number->string counter) + ".")) + (tag counter prefix #:flavor 'end)) + "" + (1+ counter))) + ;; else ignore if first + ((not msgid-string) + prev-state) + ;; else make empty tag + (else (make-construct-fold-state + msgid-string + (string-append + maybe-part + (tag counter prefix #:flavor 'empty)) + (1+ counter)))))))))))) + (make-construct-fold-state #f "" 1) + exp)) + (error "Marking for translation yields empty msgid." exp))))) + ((? string?) exp) + (else (error "Single symbol marked for translation." exp))))) + +(define-record-type <deconstruct-fold-state> + (make-deconstruct-fold-state tagged maybe-tagged counter) + deconstruct-fold-state? + ;; XML-tagged expressions as an association list name->expression: + (tagged deconstruct-fold-state-tagged) + ;; associate this not-yet-tagged expression with pre if string + ;; follows, with post if not: + (maybe-tagged deconstruct-fold-state-maybe-tagged) + ;; counter for next tag: + (counter deconstruct-fold-state-counter)) + +(define (deconstruct exp msgstr) + "Return an s-expression like EXP, but filled with the content from +MSGSTR." + (define (find-empty-element msgstr name) + "Return the regex match structure for the empty tag for XML +element of type NAME inside MSGSTR. If the element does not exist or +is more than the empty tag, #f is returned." + (string-match (string-append "<" (regexp-quote name) "/>") msgstr)) + (define (find-element-with-content msgstr name) + "Return the regex match structure for the non-empty XML element of +type NAME inside MSGSTR. Submatch 1 is its content. If the element +does not exist or is just the empty tag, #f is returned." + (string-match (string-append "<" (regexp-quote name) ">" + "(.*)" + "</" (regexp-quote name) ">") + msgstr)) + (define (get-first-element-name prefix msgstr) + "Return the name of the first XML element in MSGSTR whose name +begins with PREFIX, or #f if there is none." + (let ((m (string-match + (string-append "<(" (regexp-quote prefix) "[^>/.]+)/?>") msgstr))) + (and m (match:substring m 1)))) + (define (prefix+counter prefix counter) + "Return PREFIX with the number COUNTER appended." + (string-append prefix (number->string counter))) + (let loop ((exp exp) + (msgstr msgstr) + (prefix "")) + (define (unwrap-marked-expression exp) + "Return two values for an expression EXP containing a (possibly +quoted/unquoted) marking for translation with a simple keyword at its +root. The first return value is a list with the inner expression, the +second is a procedure to wrap the processed inner expression in the +same quotes or unquotes again." + (match exp + (('quote inner-exp) + (receive (unwrapped quotation) + (unwrap-marked-expression inner-exp) + (values unwrapped + (lambda (res) + (list 'quote (quotation res)))))) + (('quasiquote inner-exp) + (receive (unwrapped quotation) + (unwrap-marked-expression inner-exp) + (values unwrapped + (lambda (res) + (list 'quasiquote (quotation res)))))) + (('unquote inner-exp) + (receive (unwrapped quotation) + (unwrap-marked-expression inner-exp) + (values unwrapped + (lambda (res) + (list 'unquote (quotation res)))))) + (('unquote-splicing inner-exp) + (receive (unwrapped quotation) + (unwrap-marked-expression inner-exp) + (values unwrapped + (lambda (res) + (list 'unquote-splicing (quotation res)))))) + ((marking . rest) ;list with marking as car + ;; assume arg to translate is first argument to marking: + (values (list-ref rest 0) identity)))) + (define (assemble-parenthesized-expression prefix tagged) + "Return a parenthesized expression deconstructed from MSGSTR +with the meaning of XML elements taken from the name->expression +association list TAGGED. The special tags [prefix]pre and +[prefix]post are associated with a list of expressions before or after +all others in the parenthesized expression with the prefix, +respectively, in reverse order." + (append ;prepend pre elements to what is in msgstr + (reverse (or (assoc-ref tagged (string-append prefix "pre")) '())) + (let assemble ((rest msgstr)) + (let ((name (get-first-element-name prefix rest))) + (cond + ((and name (find-empty-element rest name)) => + ;; first XML element in rest is empty element + (lambda (m) + (cons* + (match:prefix m) ;prepend string before name + (assoc-ref tagged name) ;and expression for name + (assemble (match:suffix m))))) + ((and name (find-element-with-content rest name)) => + ;; first XML element in rest has content + (lambda (m) + (receive (unwrapped quotation) + (unwrap-marked-expression (assoc-ref tagged name)) + (cons* + (match:prefix m) ;prepend string before name + ;; and the deconstructed element with the content as msgstr: + (quotation + (loop + unwrapped + (match:substring m 1) + (string-append name "."))) + (assemble (match:suffix m)))))) + (else + ;; there is no first element + (cons + rest ;return remaining string + (reverse ;and post expressions + (or (assoc-ref tagged (string-append prefix "post")) '()))))))))) + (match exp + (() '()) + (('quote singleton) + (cons 'quote (list (loop singleton msgstr prefix)))) + (('quasiquote singleton) + (cons 'quasiquote (list (loop singleton msgstr prefix)))) + (('unquote singleton) + (cons 'unquote (list (loop singleton msgstr prefix)))) + (('unquote-splicing singleton) + (cons 'unquote-splicing (list (loop singleton msgstr prefix)))) + ((singleton) + (list (loop singleton msgstr prefix))) + ((first-component . components) + (cond + ((gettext-keyword? first-component) + ;; another marking for translation + ;; -> should be an error anyway; just retain exp + exp) + (else + ;; This handles a single level of a parenthesized expression. + ;; assemble-parenthesized-expression will call loop to + ;; recurse to deeper levels. + (let ((tagged-state + (fold + (lambda (component prev-state) + (match prev-state + (($ <deconstruct-fold-state> tagged maybe-tagged counter) + (let inner-loop ((exp component) ;sexp to handle + (quoting identity)) ;for wrapping state + (define (tagged-with-maybes) + "Return the value of tagged after adding all +maybe-tagged expressions. This should be used as the base value for +tagged when a string or marked expression is seen." + (match counter + (#f + (alist-cons (string-append prefix "pre") + maybe-tagged + tagged)) + ((? number?) + (let accumulate ((prev-counter counter) + (maybes (reverse maybe-tagged))) + (match maybes + (() tagged) + ((head . tail) + (alist-cons + (prefix+counter prefix prev-counter) + head + (accumulate (1+ prev-counter) tail)))))))) + (define (add-maybe exp) + "Return a deconstruct-fold-state with EXP +added to maybe-tagged. This should be used for expressions that are +neither strings nor marked for translation with a simple keyword." + (make-deconstruct-fold-state + tagged + (cons (quoting exp) maybe-tagged) + counter)) + (define (counter-with-maybes) + "Return the old counter value incremented by +one for each expression in maybe-tagged. This should be used together +with tagged-with-maybes." + (match counter + ((? number?) + (+ counter (length maybe-tagged))) + (#f + 1))) + (define (add-tagged exp) + "Return a deconstruct-fold-state with an +added association in tagged from the current counter to EXP. If +MAYBE-TAGGED is not empty, associations for its expressions are added +to pre or their respective counter. This should be used for +expressions marked for translation with a simple keyword." + (let ((c (counter-with-maybes))) + (make-deconstruct-fold-state + (alist-cons + (prefix+counter prefix c) + (quoting exp) + (tagged-with-maybes)) + '() + (1+ c)))) + (match exp + (('quote inner-exp) + (inner-loop inner-exp + (lambda (res) + (list 'quote res)))) + (('quasiquote inner-exp) + (inner-loop inner-exp + (lambda (res) + (list 'quasiquote res)))) + (('unquote inner-exp) + (inner-loop inner-exp + (lambda (res) + (list 'unquote res)))) + (('unquote-splicing inner-exp) + (inner-loop inner-exp + (lambda (res) + (list 'unquote-splicing res)))) + (((? gettext-keyword?) . rest) + (add-tagged exp)) + ((or (? symbol?) (? keyword?) (? list?)) + (add-maybe exp)) + ((? string?) + ;; elements in maybe-tagged appear between strings + (let ((c (counter-with-maybes))) + (make-deconstruct-fold-state + (tagged-with-maybes) + '() + c)))))))) + (make-deconstruct-fold-state '() '() #f) + exp))) + (match tagged-state + (($ <deconstruct-fold-state> tagged maybe-tagged counter) + (assemble-parenthesized-expression + prefix + (match maybe-tagged + (() tagged) + (else ;associate maybe-tagged with pre or post + (alist-cons + (cond ;if there already is a pre, use post + ((assoc-ref tagged (string-append prefix "pre")) + (string-append prefix "post")) + (else (string-append prefix "pre"))) + maybe-tagged + tagged)))))))))) + ((? string?) msgstr) + (else (error "Single symbol marked for translation." exp))))) + +;; NOTE: The sgettext macros have no hygiene because they use +;; datum->syntax and do not preserve the semantics of anything looking +;; like an sgettext macro. This is an exceptional use case; do not +;; try this at home. + +(define (sgettext x) + "After choosing an identifier for marking s-expressions for +translation, make it usable by defining a macro with it calling +sgettext. If for example the chosen identifier is G_, +use (define-syntax G_ sgettext)." + (syntax-case x () + ((id exp) + (let* ((msgid (sexp->msgid (syntax->datum #'exp))) + (new-exp (deconstruct (syntax->datum #'exp) + (gettext msgid)))) + (datum->syntax #'id new-exp))))) + +;; gettext’s share/gettext/gettext.h tells us we can prepend a msgctxt +;; and #\eot before a msgid in a gettext call. + +(define (spgettext x) + "After choosing an identifier for behavior similar to pgettext:1c,2, +make it usable like (define-syntax C_ spgettext)." + (syntax-case x () + ((id msgctxt exp) + (let* ((gettext-context-glue #\eot) ;as defined in gettext.h + (lookup (string-append (syntax->datum #'msgctxt) + (string gettext-context-glue) + (sexp->msgid (syntax->datum #'exp)))) + (msgstr (car (reverse (string-split (gettext lookup) + gettext-context-glue)))) + (new-exp (deconstruct (syntax->datum #'exp) + msgstr))) + (datum->syntax #'id new-exp))))) + +(define %plural-numbers + ;; Hard-coded list of input numbers such that for each language’s + ;; plural formula, for each possible output grammatical number, + ;; there is an n among %plural-numbers that yields this output (for + ;; any language documented when running “info "(gettext) Plural + ;; forms"”), except 1 is omitted from this list because it is a + ;; special case for sngettext. That is, calling ngettext with each + ;; number from %plural-numbers and with 1 in any locale is + ;; guaranteed to return each plural form at least once. It would be + ;; more resilient towards new languages if instead of hard-coding we + ;; computed this from the Plural-Forms in the MO file header entry, + ;; but that is not worth the incurred code complexity. + '(0 2 3 11 100)) + +(define (sngettext x) + "After choosing an identifier for behavior similar to ngettext:1,2, +make it usable like (define-syntax N_ sngettext). sngettext takes +into account that not all languages have only singular and plural +forms." + (syntax-case x () + ((id exp1 exp2 n) + (let* ((msgid1 (sexp->msgid (syntax->datum #'exp1))) + (msgid2 (sexp->msgid (syntax->datum #'exp2))) + (msgstr1 (ngettext msgid1 msgid2 1)) + (result (acons ;return an association list msgstr->deconstructed + ;; msgstr for n=1: + msgstr1 + `(,'unquote ,(deconstruct (syntax->datum #'exp1) + msgstr1)) + ;; other msgstr for n of each plural form: + (map + (lambda (n) + (let ((msgstr (ngettext msgid1 msgid2 n))) + (cons msgstr `(,'unquote + ,(deconstruct (syntax->datum #'exp2) + msgstr))))) + %plural-numbers)))) + (datum->syntax + #'id + `(,assoc-ref (,'quasiquote ,result) + (,ngettext ,msgid1 ,msgid2 ,(syntax->datum #'n)))))))) + +(define (snpgettext x) + "After choosing an identifier for behavior similar to npgettext:1c,2,3, +make it usable like (define-syntax NC_ snpgettext)." + (syntax-case x () + ((id msgctxt exp1 exp2 n) + (let* ((gettext-context-glue #\eot) ;as defined in gettext.h + (msgid1 (string-append (syntax->datum #'msgctxt) + (string gettext-context-glue) + (sexp->msgid (syntax->datum #'exp1)))) + ;; gettext.h implementation shows: msgctxt is only part of msgid1. + (msgid2 (sexp->msgid (syntax->datum #'exp2))) + (msgstr1 (car + (reverse + (string-split + (ngettext msgid1 msgid2 1) + gettext-context-glue)))) + (result (acons ;return an association list msgstr->deconstructed + ;; msgstr for n=1: + msgstr1 + `(,'unquote ,(deconstruct (syntax->datum #'exp1) + msgstr1)) + ;; other msgstr for n of each plural form: + (map + (lambda (n) + (let ((msgstr (car + (reverse + (string-split + (ngettext msgid1 msgid2 n) + gettext-context-glue))))) + (cons msgstr `(,'unquote + ,(deconstruct (syntax->datum #'exp2) + msgstr))))) + %plural-numbers)))) + (datum->syntax + #'id + `(,assoc-ref (,'quasiquote ,result) + (,car + (,reverse + (,string-split + (,ngettext ,msgid1 ,msgid2 ,(syntax->datum #'n)) + ,gettext-context-glue))))))))) + +(define %linguas + (with-input-from-file "po/LINGUAS" + (lambda _ + (let loop ((line (read-line))) + (if (eof-object? line) + '() + ;; else read linguas before comment + (let ((before-comment (car (string-split line #\#)))) + (append + (map match:substring (list-matches "[^ \t]+" before-comment)) + (loop (read-line)))))))))