civodul pushed a commit to branch master in repository maintenance. commit 8e152a0e188549c0091620ab53b4620498a3b62d Author: Ludovic Courtès <l...@gnu.org> Date: Thu Jul 6 18:56:20 2017 +0200
gpce-2017: Write some more. --- doc/gpce-2017/categories.tex | 23 +++ doc/gpce-2017/code/gexp-expansion.scm | 65 ++++--- doc/gpce-2017/code/system-test.scm | 19 ++ doc/gpce-2017/gpce.skb | 350 ++++++++++++++++++++++++---------- doc/gpce-2017/staging.sbib | 35 +++- 5 files changed, 355 insertions(+), 137 deletions(-) diff --git a/doc/gpce-2017/categories.tex b/doc/gpce-2017/categories.tex new file mode 100644 index 0000000..5988a97 --- /dev/null +++ b/doc/gpce-2017/categories.tex @@ -0,0 +1,23 @@ +\begin{CCSXML} +<ccs2012> +<concept> +<concept_id>10011007.10011006.10011041.10011047</concept_id> +<concept_desc>Software and its engineering~Source code generation</concept_desc> +<concept_significance>500</concept_significance> +</concept> +<concept> +<concept_id>10011007.10011006.10011008.10011009.10011012</concept_id> +<concept_desc>Software and its engineering~Functional languages</concept_desc> +<concept_significance>300</concept_significance> +</concept> +<concept> +<concept_id>10011007.10011074.10011111.10011697</concept_id> +<concept_desc>Software and its engineering~System administration</concept_desc> +<concept_significance>300</concept_significance> +</concept> +</ccs2012> +\end{CCSXML} + +\ccsdesc[500]{Software and its engineering~Source code generation} +\ccsdesc[300]{Software and its engineering~Functional languages} +\ccsdesc[300]{Software and its engineering~System administration} diff --git a/doc/gpce-2017/code/gexp-expansion.scm b/doc/gpce-2017/code/gexp-expansion.scm index 003b767..111fa81 100644 --- a/doc/gpce-2017/code/gexp-expansion.scm +++ b/doc/gpce-2017/code/gexp-expansion.scm @@ -1,33 +1,40 @@ (use-modules (guix)) -;;!begin-condensed +;;!begin-gexp-expansion #~(list (string-append #$imagemagick "/bin/convert") (string-append #$emacs "/bin/emacs")) -;;!end-condensed - -;;!begin-gexp-without-reader-macro -(gexp (list (string-append (ungexp imagemagick) - "/bin/convert") - (string-append (ungexp emacs) - "/bin/emacs"))) -;;!end-gexp-without-reader-macro - -;;!begin-expansion -(let ((references - (list (gexp-input imagemagick) - (gexp-input emacs))) - (proc (lambda (a b) - (list 'list - (list 'string-append a - "/bin/convert") - (list 'string-append b - "/bin/emacs"))))) - (make-gexp references proc)) -;;!end-expansion - -;;!begin-staged-sexp -(list (string-append "/gnu/store/65qrc…-imagemagick-6.9" - "/bin/convert") - (string-append "/gnu/store/825n3…-emacs-25.2" - "/bin/emacs")) -;;!end-staged-sexp + + +⇒ (gexp (list (string-append (ungexp imagemagick) + "/bin/convert") + (string-append (ungexp emacs) + "/bin/emacs"))) + +⇒ (let ((references + (list (gexp-input imagemagick) + (gexp-input emacs))) + (proc (lambda (a b) + (list 'list + (list 'string-append a + "/bin/convert") + (list 'string-append b + "/bin/emacs"))))) + (make-gexp references proc)) + +⇝ (list (string-append "/gnu/store/65qrc…-imagemagick-6.9" + "/bin/convert") + (string-append "/gnu/store/825n3…-emacs-25.2" + "/bin/emacs")) +;;!end-gexp-expansion + +;;!begin-gexp-hygiene +(let ((gen-body (lambda (x) + #~(let ((x 40)) + (+ x #$x))))) + #~(let ((x 2)) + #$(gen-body #~x)) + +⇒ (let ((x0 2)) + (let ((x1 40)) (+ x1 x0))) +;;!end-gexp-hygiene + diff --git a/doc/gpce-2017/code/system-test.scm b/doc/gpce-2017/code/system-test.scm new file mode 100644 index 0000000..6a086b7 --- /dev/null +++ b/doc/gpce-2017/code/system-test.scm @@ -0,0 +1,19 @@ +#~(begin + (use-modules (gnu build marionette) + (srfi srfi-64) (ice-9 match)) + + ;; Spawn the VM that runs the declared OS. + (define marionette (make-marionette (list #$run))) + + (test-begin "basic") + (test-assert "uname" + (match (marionette-eval '(uname) marionette) + (#("Linux" host-name version _ architecture) + (and (string=? host-name + #$(operating-system-host-name os)) + (string-prefix? #$(package-version + (operating-system-kernel os)) + version) + (string-prefix? architecture %host-type))))) + (test-end) + (exit (= (test-runner-fail-count (test-runner-current)) 0)))) diff --git a/doc/gpce-2017/gpce.skb b/doc/gpce-2017/gpce.skb index 48b2733..8101a93 100644 --- a/doc/gpce-2017/gpce.skb +++ b/doc/gpce-2017/gpce.skb @@ -79,14 +79,37 @@ "\\usepackage{microtype}\n" ;; "\\usepackage[hypcap]{caption}\n" ;; "\\DeclareCaptionType{copyrightbox}\n" - "\\usepackage{balance}\n" - ))) - -(engine-custom-set! (find-engine 'latex) 'documentclass - "\\documentclass[sigplan, anonymous, review]{acmart}") + ;; "\\usepackage{balance}\n" + + ;; Recognize ‘⇒’ and ‘⇝’ in LaTeX input. + "\\DeclareUnicodeCharacter{21D2}{$\\Rightarrow$}\n" + "\\DeclareUnicodeCharacter{21DD}{$\\rightsquigarrow$}\n" + + ;; Trick so that ‘…’ is properly + ;; typeset inside teletype text. + "\\DeclareUnicodeCharacter{2026}{\\textrm{\\ldots}}\n" + + "\ +\\acmConference[GPCE'17]{International Conference on Generative Programming: Concepts \\& Experience}{October 2017}{Vancouver, Canada} +\\acmYear{2017} +\\copyrightyear{2017}\n"))) + +(let ((latex (find-engine 'latex))) + (engine-custom-set! latex 'documentclass + "\\documentclass[sigplan, anonymous, review]{acmart}") + + (engine-custom-set! latex 'maketitle #f) + + (markup-writer '&acm-category latex + :options '(:index :section :subsection) + :action (lambda (n e) + (display "\\ccsdesc[") + (display (markup-option n :index)) + (display "]") + (display "{") + (display (markup-option n :section)) + (display "}\n"))) -(let* ((latex (find-engine 'latex)) - (usep (engine-custom latex 'usepackage))) (markup-writer 'figure latex :options '(:legend :number :multicolumns) :action (lambda (n e) @@ -118,6 +141,9 @@ (engine-custom-set! latex 'source-comment-color "#776600")) + +(define (acmart-abstract . body) + (!latex "\\begin{abstract}\n$1\n\\end{abstract}\n" body)) (bibliography "../els-2013/guix.sbib") (bibliography "../reppar-2015/reppar.sbib") @@ -126,8 +152,7 @@ (document :title [Code Staging in GNU Guix] - (abstract - ;chapter :title [Abstract] + (acmart-abstract (p [GNU Guix is a “functional” package manager that builds upon earlier work on Nix. Guix implements high-level abstractions such as @@ -140,14 +165,22 @@ eventual execution.]) (p [In this paper we present ,(emph [G-expressions]) or “,(emph [gexps])”, the staging mechanism we devised for Guix. We explain our journey from traditional Lisp S-expressions to G-expressions, which -augment the former with contextual information, and we discuss the -implementation of gexps. We report on our experience using gexps in a -variety of operating system use cases—from package build processes, to -initial RAM disk code, to system services. To our knowledge, gexps +augment the former with contextual information and ensure hygienic code staging. +We discuss the +implementation of gexps and report on our experience using them in a +variety of operating system use cases—from package build processes +to system services. To our knowledge, gexps provide a unique way to cover many aspects of OS configuration in a single, multi-tier language, and to facilitate code reuse and code -sharing. Finally we compare to related work on staging.])) +sharing.])) + ;; See <http://dl.acm.org/ccs/ccs_flat.cfm>. + (!latex "\\input{categories.tex}\n") + + (acm-keywords [Code staging, Scheme, Functional package management]) + + (!latex "\\maketitle\n") + (chapter :title [Introduction] (p [Users of free operating systems such as GNU/Linux are used @@ -199,7 +232,7 @@ interface.] :ident "fig-package-def" :multicolumns #t - (prog + (prog :line #f (source :language guix [ (define hello (package @@ -209,7 +242,7 @@ interface.] (method url-fetch) (uri (string-append "mirror://gnu/hello/hello-" version ".tar.gz")) ;!package-uri - (sha256 (base32 "0wqd8...")))) ;!base32 + (sha256 (base32 "0wqd8…")))) ;!base32 (build-system gnu-build-system) ;!build-system (arguments '(#:configure-flags ;!config-flags @@ -227,11 +260,11 @@ interface.] in Figure ,(ref :figure "fig-package-def") are compiled to ,(emph [derivations]), the low-level representation of build actions inherited from Nix. A derivation specifies: a command to run to -perform the build (the “build program”), environment variables to be +perform the build (the ,(emph [build program])), environment variables to be defined, and derivations whose build result it depends on. Derivations are sent to a privileged daemon, which is responsible for building them on behalf of clients. The build daemon creates isolated -environments (isolated “containers” in a chroot) in which it spawns +environments (isolated ,(emph [containers]) in a chroot) in which it spawns the build program; since build environments are isolated, this ensures that build programs do not depend on undeclared inputs.]) (p [The second way in which Guix departs from Nix is by using @@ -368,7 +401,10 @@ code.]) of Scheme’s ,(tt [unquote]): it allows quoted to refer to values in the host language. These values can be of any of Scheme’s primitive data types, but we are specifically interested in values such as -package objects that can be “compiled” to elements in the store.])) +package objects that can be “compiled” to elements in the store.]) + (item [,(tt [ungexp-splicing]), abbreviated ,(tt [#$@]), allows a +list of elements to be “spliced” in the surrounding list, similar to +Scheme’s ,(tt [unquote-splicing]).])) The example in Figure ,(ref :figure "fig-build-sexp"), rewritten as a gexp, is shown in Figure ,(ref :figure "fig-build-gexp"). We have all @@ -406,39 +442,38 @@ is a function of the system type, ,(tt [gexp->derivation]) must make sure to use the file name of ImageMagick corresponding to its ,(tt [#:system]) argument. Therefore, this substitution must happen when ,(tt [gexp->derivation]) is invoked, and ,(emph [not]) when the gexp -is created.])) +is created.]) + (p [G-expressions are “hygienic”: ,(emph [they preserve +lexical scope across stages]) ,(ref :bib '(rhiger2012:hygienic +kiselyov2008:metascheme kohlbecker1986:hygienic)).] + + (figure :legend [Lexical scope preservation across stages.] + :ident "fig-gexp-hygiene" + + (prog :line #f + (source :language guix :file "code/gexp-expansion.scm" + :start ";;!begin-gexp-hygiene" + :stop ";;!end-gexp-hygiene"))) + + [Figure ,(ref :figure "fig-gexp-hygiene") illustrates two +well-known properties of hygienic multi-stage programs: first, binding +,(tt [x]) in one stage (outside the gexp) is distinguished from +binding ,(tt [x]) in another stage (inside the gexp); second, binding +,(tt [x]) introduced inside ,(tt [gen-body]) does not shadow binding +,(tt [x]) in the outer gexp thanks to the renaming of these variables.])) (section :title [Implementation] + :ident "implementation" (figure - :legend [Macro expansion of a G-expression.] + :legend [Macro expansion (⇒) of a G-expression and code +generation (⇝).] :ident "fig-gexp-expansion" (prog :line #f (source :language guix :file "code/gexp-expansion.scm" - :start ";;!begin-condensed" - :stop ";;!end-condensed")) - - (p [… is equivalent to:]) - - (prog :line #f - (source :language guix :file "code/gexp-expansion.scm" - :start ";;!begin-gexp-without-reader-macro" - :stop ";;!end-gexp-without-reader-macro")) - - (p [… which expands to:]) - - (prog :line #f - (source :language guix :file "code/gexp-expansion.scm" - :start ";;!begin-expansion" - :stop ";;!end-expansion")) - - (p [Once staged, this generates an sexp along these lines:]) - - (prog :line #f - (source :language guix :file "code/gexp-expansion.scm" - :start ";;!begin-staged-sexp" - :stop ";;!end-staged-sexp"))) + :start ";;!begin-gexp-expansion" + :stop ";;!end-gexp-expansion"))) (p [As can be seen from the example above, gexps are first-class Scheme values: a variable can be bound to a gexp, and @@ -447,9 +482,10 @@ consists of two parts: a syntactic layer that turns ,(tt [#~]) forms into code that instantiates gexp records, and run-time support procedures to serialize gexps and to “lower” their inputs.]) (p [Scheme is extensible through macros, so ,(tt [gexp]) is a -“hygienic” (referentially-transparent) macro (,(tt [#~]) is a “reader -macro” that expands to a ,(tt [gexp]) form, nothing more.) This is -implemented as a library for GNU,(~)Guile, an R5RS/R6RS Scheme +“hygienic” ,(tt [syntax-case]) macro ,(ref :bib +'dybvig1992:syntax-case); ,(tt [#~]) and ,(tt [#$]) are ,(it [reader +macros]) that expand to a ,(tt [gexp]) or ,(tt [ungexp]) sexps. This +is implemented as a library for GNU,(~)Guile, an R5RS/R6RS Scheme implementation, ,(emph [without any modification to its compiler]). Figure ,(ref :figure "fig-gexp-expansion") shows what our ,(tt [gexp]) macro expands to. In the expanded code, ,(tt [gexp-input]) returns a @@ -462,7 +498,7 @@ Intenally, ,(tt [gexp->sexp]), the function that converts gexps to sexps, calls this two-argument procedure passing it the store file names of ImageMagick and Emacs. This strategy gives us constant-time substitutions.]) - (p [The internal ,(tt [gexp-inputs]) function returns, for a + (p [The internal ,(tt [gexp-input]) function returns, for a given gexp, store, and system type, the derivations that the gexp depends on. In this example, it returns the derivations for ImageMagick and Emacs, as computed by the ,(tt [package-derivation]) @@ -470,6 +506,33 @@ function seen earlier. Gexps can be nested, as in ,(tt [#~#$#~(string-append #$emacs "/bin/emacs")]). The input list returned by ,(tt [gexp-inputs]) for the outermost gexp is the sum of the inputs of outermost gexp and the inputs nested gexps.]) + (p [The ,(tt [gexp]) macro performs several passes on its body: + +,(enumerate + (item [The first pass ,(emph [,(symbol "alpha")-renames lexical +bindings]) introduced by the gexp in order to preserve lexical scope, +as illustrated by Figure ,(ref :figure "fig-gexp-hygiene"). The +implementation is similar to MetaScheme ,(ref :bib +'kiselyov2008:metascheme) and to that described by Rhiger ,(ref :bib +'rhiger2012:hygienic), with caveats discussed in ,(numref :text +[Section] :ident "limitations"). Unlike the examples usually given in +the literature, identifiers must be generated in a ,(emph +[deterministic]) fashion: if they were not, we would produce different +derivations at each run, which in turn would trigger full rebuilds of +the package graph. Thus, instead of relying on ,(tt [gensym]) and +,(tt [generate-temporaries]), we generate identifiers using a hash for +the input expression as a stem, along with lexical nesting level of +the identifer.]) + (item [The second pass ,(emph [collects the escape forms]) (,(tt +[ungexp]) variants) in the input source. The list of escape forms is +needed to construct the list of inputs recorded in the ,(tt [<gexp>]) +record, and to construct the formal argument list of the gexp’s code +generation function shown in Figure ,(ref :figure +"fig-gexp-expansion").]) + (item [The third pass ,(emph [substitutes escape forms]) with +references to the corresponding formal arguments of the code +generation function. This leads to the sexp-construction expression +shown in Figure ,(ref :figure "fig-gexp-expansion").]))]) (figure :legend [The gexp compilers for package objects and for @@ -502,17 +565,17 @@ assuming the variable ,(tt [emacs]) is bound to a package object, ,(tt [(string-append "/gnu/store/…-emacs-25.2" "/bin/emacs")]), as we have seen earlier. We defined a ,(tt [file-append]) function that returns objects with a custom expander: one that performs string concatenation -when generating the sexp. We can now write gexps like: - -,(prog :line #f - (source :language guix [#~(list #$(file-name emacs "/bin/emacs"))])) +when generating the sexp. We can now write gexps like:] -which expands to this sexp: + (!latex "\\\\[0.3cm]\n") + (prog :line #f + (source :language guix [ +#~(execl #$(file-append emacs "/bin/emacs")) -,(prog :line #f - (source :language guix [(list "/gnu/store/…-emacs-25.2/bin/emacs")])) +⇝ (execl "/gnu/store/…-emacs-25.2/bin/emacs")])) + (!latex "\\\\[0.3cm]\n") -This is convenient in situations where we do not want or cannot impose +[This is convenient in situations where we do not want or cannot impose a build-side ,(tt [string-append]) code.])) (section :title [Extensions] @@ -597,10 +660,6 @@ package. If we instead allow for direct use of ,(tt [#$]) in package be seen how we can allow ,(tt [#$]) forms while not sacrificing this flexibility.)])) - (section :title [Program Generation] - - (p [])) - (section :title [System Services] (p [GuixSD, the Guix-based GNU/Linux distribution, was one of @@ -618,7 +677,9 @@ the kernel Linux.]) :ident "fig-initrd" (prog :line #f - (source :language guix :file "code/initrd.scm"))) + (source :language guix :file "code/initrd.scm" + :start ";;!begin-initrd" + :stop ";;!end-initrd"))) (p [The initrd is a small file system image that the kernel Linux mounts as its initial file system. It then runs the ,(tt @@ -638,8 +699,8 @@ module, and the ,(tt [kodir]) store item which contains kernel modules (drivers). That all the relevant store items referred to by the gexp, directly or indirectly, are “pulled” in the initrd comes for free.]) (p [Once the root file system is mounted, the initrd passes -control to the Shepherd, our daemon-managing daemon,(ref :url (url -"https://gnu.org/s/shepherd/")). The Shepherd is responsible for +control to the Shepherd, our daemon-managing daemon,(footnote (ref :url (url +"https://gnu.org/s/shepherd/"))). The Shepherd is responsible for starting system services—from the email or SSH daemon to the X graphical display server—and for doing other initialization operations such as mounting additional file systems. The Shepherd is written in @@ -659,35 +720,99 @@ to do to achieve this was to (1) wrap our ,(tt [start]) gexp in ,(tt functionality, and (2) use our start-process-in-container function lieu of the Shepherd’s own start-process function. This is a good example of cross-stage code sharing, where the second stage in this -case is the operating system’s run-time environment.]))) +case is the operating system’s run-time environment.])) + + (section :title [System Tests] + + (figure :legend [Core of a whole-system test.] + :ident "fig-system-test" + + (prog :line #f + (source :language guix :file "code/system-test.scm"))) + + (p [GuixSD comes with a set of ,(emph [whole-system tests]). +Each of them takes an ,(tt [operating-system]) definition, which defines +the OS configuration, instantiates it in a virtual machine (VM), and +verifies that system running in a VM matches some of the settings. The +guest OS is instrumented with a Scheme interpreter that evaluates +expressions sent by the host OS (we call it “marionette”).]) + (p [Whole-system tests are derivations whose build programs are +gexps that resemble that of Figure ,(ref :figure "fig-system-test"). +The build program passes ,(tt [run]), the script to spawn the VM, to the +instrumentation tool. The test then uses ,(tt [marionette-eval]) to +call the ,(tt [uname]) function: an ,(emph [additional code stage]) is +introduced here, this time using ,(tt [quote]). The test matches the +return value of ,(tt [uname]) against the expected vector, and makes +sure the information corresponds to the various bits declared in ,(tt +[os]), our OS definition.]))) + (chapter :title [Limitations] :ident "limitations" - (p [,(bold [Hygiene.]) A limitation of gexp that should be -visible to anyone used to the wonders of Scheme macros is the lack of -“hygiene”. Namely, our implementation of gexps does not preserve -scope in its output. For example, in ,(tt [(let ((inner #~x)) -#~(lambda (x) #$inner))]), the ,(tt [inner]) staged expression refers -to the argument of the staged lambda even though it is not in scope. -This example illustrated ,(emph [unintended variable capture]): ,(tt -[x]) in ,(tt [inner]) should be seen as a free variable, but it -instead ends up referring to the ,(tt [x]) that is in scope in the -staged code.]) - (p [A well-documented approach to the problem is ,(symbol -"alpha")-renaming binding that are introduced ,(ref :bib -'(dybvig1992:syntax-case kiselyov2008:metascheme)). However, to do -that, we must first be able to identify binding constructs. This -turns out to be hard to achieve in Scheme because macros can define -new bindings constructs. The macro expander, of course, does this and -more already. However, we do not want to macro-expand staged code; -instead, macro expansion should be performed “the normal way”, by the -Guile program that compiles or evaluate the staged code. Again, this -ensures reproducibility across Guix installations since we control -precisely the Guile variant used in derivations whereas we do not -control the Guile variant used to evaluate “host-side” code.]) - (p [,(bold [Modules in scope.])]) - (p [,(bold [Cross-stage debugging.])])) + (p [,(bold [Hygiene.]) Our implementation of hygiene, discussed +in ,(numref :text [Section] :ident "implementation"), follows the +well-documented approach to the problem ,(ref :bib +'(rhiger2012:hygienic kiselyov2008:metascheme)). Rhiger’s +implementation handles a single binding construct (,(tt [lambda])) and +MetaScheme handles a couple more constructs, but of course, ours had +to deal with many more binding constructs: R6RS defines around ten +binding constructs (including binding constructs for syntactic +keywords such as ,(tt [let-syntax])), and Guile adds a couple more.]) + (p [Fundamentally, this is all about identifying binding +constructs. This turns out to be hard to achieve in Scheme because +macros can define ,(emph [new]) bindings constructs. +Our ,(symbol "alpha")-renaming pass is oblivious to those so it will +not properly rename bindings introduced by user-defined macros. The +macro expander, of course, does this and more already, so it would be +tempting to reuse it rather than duplicate part of its work. However, +we do not want to macro-expand staged code; instead, macro expansion +should be performed “the normal way”, by the Guile program that +compiles or evaluate the staged code. Again, this ensures +reproducibility across Guix installations since we control precisely +the Guile variant used in derivations whereas we do not control the +Guile variant used to evaluate “host-side” code. How we could hook +into Guile’s macro expander, based on ,(tt [psyntax]) ,(ref :bib +'dybvig1992:syntax-case), is still an open question. To our +knowledge, this problem of hygienic staging of a language with macros +has not been addressed in literature.]) + (p [On top of that, ,(tt [gexp]) must track the ,(emph +[quotation level]) of several types of quotation: ,(tt [gexp]), ,(tt +[quote]), ,(tt [quasiquote]), and ,(tt [syntax]) (though our +implementation currently leaves out ,(tt [syntax]) handling). For +each quotation type, ,(symbol "alpha")-renaming must be skipped when +the quotation level is greater than zero. For example, in ,(tt +[#~(lambda (x) `(x ,x))]), the first ,(tt [x]) must ,(emph [not]) be +renamed, while the second one must be renamed. Needless to say, the +resulting implementation lacks the conciseness of those found in the +literature. This is another area that could use help from the macro +expander.]) + (p [,(bold [Modules in scope.]) The ,(tt +[with-imported-modules]) form allows to specify which modules a gexp +expects in its execution environment, but we currently lack a way to +specify ,(emph [which modules should be in scope]), which could be +useful in some situations. Part of the reason is that in Guile ,(tt +[use-modules]) clauses must appear at the top level, and thus they +cannot be used in a gexp that ends up being inserted in a +non-top-level position. Scoped ,(tt [use-modules]) clauses would help +to some extent, but there are still open questions open question +regarding potential name clashes.]) + (p [,(bold [Cross-stage debugging.]) ,(tt [gexp->derivation]) +emits build programs as sexps in a file in ,(tt [/gnu/store]), using +Scheme ,(tt [write]), which writes the whole sexp as one line. When +an error occurs during the execution of these programs, Guile prints a +backtrace that refers to source code locations ,(emph [inside the +generated code]). What we would like, instead, is for the backtrace +to refer to the location ,(emph [of the gexp itself]). C has ,(tt +[#line]) directives, which code generators insert in generated code to +,(emph [map]) generated code to its source. Assuming a similar +feature was available in Scheme, it would be unsuitable: moving the +source code where a gexp appears would lead to a different derivation, +in turn triggering a rebuild of everything that depends on it, which +is undesirable. Instead we would need a way to pass source code +mapping information ,(emph [off-band]), in a way that does not affect +the derivation that is produced. We are still investigating ways to +achieve that.])) (chapter :title [Related Work] :ident "related" @@ -720,25 +845,28 @@ introduced “hygienic” macros in Scheme—i.e., macros that generate well-scoped code, without unintended capture of variables—which later made it into the Sixth Report on Scheme (R6RS). MacroML achieves something similar in the context of ML, which is statically-typed -,(ref :bib 'ganz2001:macroml). Both tools allow uses to define new +,(ref :bib 'ganz2001:macroml). Both tools allow users to define new binding constructs; the macro expander recognizes those bindings constructs, which allows it to track bindings and preserve hygiene, notably by ,(symbol "alpha")-renaming introduced bindings.]) - (p [Kiselyov wrote MetaScheme as a translation of MetaOCaml’s -staging primitives, ,(tt [bracket]), ,(tt [escape]), and ,(tt [lift]) -,(ref :bib 'kiselyov2008:metascheme). The beauty of MetaScheme is -that it extends Scheme through a set of macros and does not -necessitate any modification to the host Scheme implementation. -MetaScheme demonstrates an implementation of hygience through ,(symbol -"alpha")-renaming; however, it only considers a few core binding -constructs and does not address hygiene in the presence of -user-defined binding constructs (macros). This strategy is + (p [MetaScheme is a translation of MetaOCaml’s staging +primitives, ,(tt [bracket]), ,(tt [escape]), and ,(tt [lift]) ,(ref +:bib 'kiselyov2008:metascheme). The beauty of MetaScheme is that it +extends Scheme through a set of macros and does not necessitate any +modification to the host Scheme implementation. MetaScheme inspired +the ,(symbol "alpha")-renaming pass described in ,(numref :text +[Section] :ident "implementation"). However, it only considers a few +core binding constructs and does not address hygiene in the presence +of user-defined binding constructs (macros). This strategy is appropriate in a macro-less language with a fixed set of binding -constructs like OCaml, but unsuitable for Scheme programs that use -macros. MetaScheme supports cross-stage persistence (CSP) but the +constructs like OCaml, but we have seen that languages such as Scheme +that support user-defined binding constructs create additional +challenges. MetaScheme supports cross-stage persistence (CSP) but the implementation provided assumes that an array of cross-stage values is -shared among stages, which is hard to transpose to concrete -settings.]) +shared among stages, which is hard to transpose to concrete settings. +Rhiger’s work ,(ref :bib 'rhiger2012:hygienic) follows a similar +approach but chooses to redefine Scheme’s quasiquotation rather than +introduce new constructs.]) (p [Staged Scheme, or S,(sup [2]), also improved on Lisp quasiquotations by providing bracket, escape, and lift forms separate from ,(tt [quasiquote]) and ,(tt [unquote]) ,(ref :bib 'wang2002:s2). @@ -752,15 +880,15 @@ two-stage programs. The article discusses ,(emph [code regeneration]) at run time; gexps have a similar requirement here: at run time a given gexp may be instantiated for different system types, for instance ,(tt [x86_64-linux]) and ,(tt [i686-linux]).]) - (p [While Guix uses “homogeneous” staging, where the source and -staged language are the same, Hop instead performs ,(emph + (p [While Guix uses ,(emph [homogeneous]) staging, where the +source and staged language are the same, Hop instead performs ,(emph [heterogenous staging]): the source language is Scheme, but the generated code is JavaScript ,(ref :bib 'serrano2010:multitier). Hop has a ,(tt [~]) (tilde) form to introduce staged expressions, and a ,(tt [$]) (dollar) form to escape to unstaged code. Hop involves two code stages: server-side code and client-side code. Unlike G-expressions, support for tilde forms is built in the Hop compiler, -and tild forms are not first-class objects. Hop comes with useful +and tilde forms are not first-class objects. Hop comes with useful multi-stage debugging facilities not found in Guix, such as the ability to display cross-stage stack traces with correct source location information.]) @@ -775,7 +903,17 @@ location information.]) ) (chapter :title [Conclusion] - :ident "conclusion") + :ident "conclusion" + + (p [G-expressions are a novel application of hygienic code +staging techniques from the literature to functional software +deployment. They extend common staging constructs (,(tt [bracket]) +and ,(tt [escape])) with additional tooling: cross-compilation-aware +escapes, and imported-module annotations. Gexps are used in +production to express package build procedures in Guix as well as all +the assembly of operating system components in GuixSD. Using a +single-language framework with staging has proved to enable new ways +of code reuse and composition.])) (references)) diff --git a/doc/gpce-2017/staging.sbib b/doc/gpce-2017/staging.sbib index 328f733..c12b605 100644 --- a/doc/gpce-2017/staging.sbib +++ b/doc/gpce-2017/staging.sbib @@ -7,6 +7,22 @@ (year "1992") (url "http://www.cs.indiana.edu/~dyb/pubs/tr356.pdf")) +(inproceedings kohlbecker1986:hygienic + (author "Kohlbecker, Eugene and Friedman, Daniel P. and Felleisen, Matthias and Duba, Bruce") + (title "Hygienic Macro Expansion") + (booktitle "Proceedings of the 1986 ACM Conference on LISP and Functional Programming") + (series "LFP '86") + (year "1986") + (isbn "0-89791-200-4") + (location "Cambridge, Massachusetts, USA") + (pages "151--161") + (numpages "11") + (url "http://doi.acm.org/10.1145/319838.319859") + (doi "10.1145/319838.319859") + (acmid "319859") + (publisher "ACM") + (address "New York, NY, USA")) + (article serrano2010:multitier (author "Manuel Serrano and Christian Queinnec") (title "A Multi-tier Semantics for Hop") @@ -22,7 +38,7 @@ (address "Hingham, MA, USA")) (misc kiselyov2008:metascheme - (author "Oleg Kiselyov") + (author "Oleg Kiselyov and Chung-chieh Shan") (year "2008") (month "August") (url "http://okmij.org/ftp/meta-programming/#meta-scheme") @@ -78,7 +94,22 @@ Evaluation and Semantics-Based Program Manipulation (PEPM 1999)") (address "San Antonio, Texas, USA") (url "http://repository.readscheme.org/ftp/papers/pepm99/bawden.pdf")) -;; http://dl.acm.org/citation.cfm?doid=2661103.2661109 +(inproceedings rhiger2012:hygienic + (author "Rhiger, Morten") + (title "Hygienic Quasiquotation in Scheme") + (booktitle "Proceedings of the 2012 Annual Workshop on Scheme and Functional Programming") + (series "Scheme '12") + (year "2012") + (isbn "978-1-4503-1895-2") + (location "Copenhagen, Denmark") + (pages "58--64") + (numpages "7") + (url "http://doi.acm.org/10.1145/2661103.2661109") + (doi "10.1145/2661103.2661109") + (acmid "2661109") + (publisher "ACM") + (address "New York, NY, USA") + (keywords "hygiene, lexical scope, program generation, quasiquotation, types")) #| (defun skr-from-bibtex ()