implementing functional languages: a tutorial

Kragen Javier Sitaker Sat, 23 Sep 2006 00:37:09 -0700

In 1991, Simon Peyton Jones and David Lester wrote a 300-page book on
how to implement functional programming languges, including full
source code in Miranda.  Miranda is a dead language, and I don't have
a Haskell system handy on this machine, so here's my effort at
transliterating the first 30 pages (so far) into JavaScript.


<html><head><title>The Core Language</title>
<script type="text/javascript">
// First, JavaScript's toString behavior is totally fucked.  So:

function map(func, list) {
    var rv = []
    for (var ii = 0; ii < list.length; ii++) {
        rv.push(func(list[ii]))
    }
    return rv
}

function repr(obj) {
    var undefined
    if (obj === undefined) { return "undefined" }
    else if (obj == null) { return "null" }
    else if (obj.__repr__) { return obj.__repr__() }
    else if (obj.constructor == String) { 
        return '"' + obj.replace(/"/g, '\\"').replace(/\n/g, '\\n') + '"'
    } else if (obj.constructor == Array) {
        return '[' + map(repr, obj).join(', ') + ']'
    } else {
        return obj.toString()
    }
}

// Miranda has built in the ability to make 'constructors' very
// easily.  Here we add code to emulate that ability somewhat, so that
// where the book has 'EVar "x"', we can write 'EVar("x")', and where
// the book has 'data Expr a = EVar Name', we can write 
// 'EVar = Constructor("EVar", ["Name"])'.  However, I think
// pattern-matching in JavaScript is a lost cause, so if you have an
// EVar x, to get its name, you say x.Name.
function ConstructorObject(name, argnames, argvals) {
    this.tagname = name
    for (var ii = 0; ii < argnames.length; ii++) {
        this[argnames[ii]] = argvals[ii]
    }
    this.argnames = argnames
}
ConstructorObject.prototype = {
    toString: function() { 
        var rv = this.tagname
        for (var ii = 0; ii < this.argnames.length; ii++) {
            rv = rv + ' ' + this[this.argnames[ii]]
        }
        return rv
    },
    __repr__: function() {
        var self = this
        return this.tagname + '(' + map(repr, this.argvals).join(', ') + 
            ')'
    },
    argvals: function() {
        var self = this
        return map(function(name) { return self[name] }, self.argnames)
    }
}
function ArgumentsMismatch(argvals, argnames) {
    this.argvals = argvals
    this.argnames = argnames
    this.toString = function () { 
        return ('ArgumentsMismatch: got: ' + repr(this.argvals) + 
                ' expected: ' + repr(this.argnames))
    }
}
function Constructor(name, argnames) {
    return function() {
        if (arguments.length != argnames.length) {
            throw new ArgumentsMismatch(arguments, argnames)
        }
        return new ConstructorObject(name, argnames, arguments)
    }
}

// Maybe pattern-matching isn't a lost cause.  I added this around
// section 1.5.5 so only the later code uses it.  Basically I got sick
// of not getting error messages when I forgot a case or passed a
// value of the wrong type to a case analysis.

function case_analysis(value, cases) {
    var closure = cases['on_' + value.tagname]
    if (!closure) closure = cases['default']
    if (!closure) throw "Missing case for " + value.tagname
    return closure.apply(this, value.argvals())
}

// So here's the first bit of code from the book, in section 1.3,
// which defines the type Expr a as an algebraic type.

EVar = Constructor("EVar", ["Name"])
ENum = Constructor("ENum", ["Value"])
EConstr = Constructor("EConstr", ["Tag", "Arity"])
EAp = Constructor("EAp", ["Function", "Argument"])
ELet = Constructor("ELet", ["IsRec", "Definitions", "Body"])
ECase = Constructor("ECase", ["Expr", "Alternatives"])
ELam = Constructor("ELam", ["Arguments", "Body"])

// There's a little more code in the section:

function bindersOf(defns) { return map(function(x) { x[0] }, defns) }
function rhssOf(defns) { return map(function(x) { x[1] }, defns) }

// I'm making Alter a constructor rather than using tuples (or, in
// JavaScript, Arrays):
Alter = Constructor("Alter", ["Tag", "Variables", "Body"])

function isAtomicExpr(expr) {
    return (expr.tagname == 'EVar' || expr.tagname == 'ENum')
}

// ScDefn too gets to be a constructor
ScDefn = Constructor("ScDefn", ["Name", "Arguments", "Body"])

small_program_on_p_20 = (
[ScDefn("main", [], EAp(EVar("double"), ENum(21))),
 ScDefn("double", ["x"], EAp(EAp(EVar("+"), EVar("x")), EVar("x")))]
)

PreludeDefs = [
    ScDefn("I", ["x"], EVar("x")),
    ScDefn("K", ["x", "y"], EVar("x")),
    ScDefn("K1", ["x", "y"], EVar("y")),
    ScDefn("S", ["f", "g", "x"], EAp(EAp(EVar("f"), EVar("x")),
                                     EAp(EVar("g"), EVar("x")))),
    ScDefn("compose", ["f", "g", "x"], EAp(EVar("f"),
                                           EAp(EVar("g"), EVar("x")))),
    ScDefn("twice", ["f"], EAp(EAp(EVar("compose"), EVar("f")), EVar("f"))),
] 

function pprExpr_section_1_5_1(expr) {
    if (expr.tagname == 'ENum') { return expr.Value.toString() }
    else if (expr.tagname == 'EVar') { return expr.Name }
    else if (expr.tagname == 'EAp') { 
        return pprExpr_section_1_5_1(expr.Function) + ' ' + 
            pprExpr_section_1_5_1(expr.Argument)
    }
}

function pprAExpr(e) {
    if (isAtomicExpr(e)) return pprExpr(e)
    else return "(" + pprExpr(e) + ")"
}

function repeat(n, x) {
    var rv = []
    for (var ii = 0; ii < n; ii++) rv.push(x)
    return rv
}

function foldll(func, start, list) {
    var rv = start
    for (var ii = 0; ii < list.length; ii++) rv = func(rv, list[ii])
    return rv
}

function mkMultiAp(n, e1, e2) {
    // the Miranda version of this starts out by constructing an
    // infinite list of e2s, which is more difficult to do in JavaScript
    return foldll(EAp, e1, repeat(n, e2))
}

// at this point they justify introducing section 1.5.2 by saying,
// A pretty-printer whose cost is quadratic in the size of the program to
// be printed is clearly unacceptable, so we had better find a way around
// it.

// which may be true in the abstract, but mkMultiAp(990, EVar("f"),
// EVar("x")) still pprExprs quite quickly enough for me with the
// version in the last section, and my JavaScript interpreter seems to
// have a thousand-level stack limit.

function pprExpr(expr) {
    // they forgot ENum in this version...
    if (expr.tagname == 'ENum') { return iStr(expr.Value.toString()) }
    else if (expr.tagname == 'EVar') { return iStr(expr.Name) }
    else if (expr.tagname == 'EAp') { 
        return iAppend(pprExpr(expr.Function), 
                       iAppend(iStr(" "), pprAExpr(expr.Argument)))
    } else if (expr.tagname == 'ELet') {
        var keyword = (expr.IsRec ? "letrec" : "let")
        return iConcat([iStr(keyword), iNewline,
                        iStr("  "), iIndent(pprDefns(expr.Definitions)),
                        iNewline,
                        iStr("in "), pprExpr(expr.Body)])
    } else throw expr.tagname
}

function pprDefns(defns) {
    var sep = iConcat([iStr(";"), iNewline])
    return iInterleave(sep, map(pprDefn, defns))
}

function pprDefn(definition) {
    return iConcat([iStr(definition[0]), iStr(" = "), 
                   iIndent(pprExpr(definition[1]))])
}

// exercise 1.2
function iConcat(iseqs) {
    return foldll(iAppend, iNil, iseqs)
}
function iInterleave(separator, iseqs) {
    if (!iseqs.length) return iNil
    var rv = iseqs.shift()
    while (iseqs.length) rv = iAppend(rv, iAppend(separator, iseqs.shift()))
    return rv
}

function pprint(prog) {
    return iDisplay(pprProgram(prog))
}

// Section 1.5.3.  At this point none of the code in the previous
// section is executable because all the iCrap hasn't been defined yet.

// Note that there is an errro here in the text: it describes a type
// "iseqRep" which isn't mentioned in the code.

INil = Constructor("INil", [])
IStr = Constructor("IStr", ["String"])
IAppend = Constructor("IAppend", ["Seq1", "Seq2"])

iNil = INil()
iStr = IStr
function iIndent(seq) { return seq }
iNewline = IStr("\n")
iAppend = IAppend
function iDisplay(seq) { return flatten([seq]) }

// This is iterative rather than recursive because JavaScript
// implementations don't deal well with deep recursion.
function flatten(iseqlist) {
    var rv = ""
    while (iseqlist.length > 0) {
        var front = iseqlist.shift()
        if (front.tagname == 'INil') continue
        if (front.tagname == 'IStr') { rv = rv + front.String; continue }
        if (front.tagname == 'IAppend') {
            iseqlist.unshift(front.Seq2)
            iseqlist.unshift(front.Seq1)
            continue
        }
    }
    return rv
}

// Section 1.5.4

IIndent = Constructor("IIndent", ["Contents"])
INewline = Constructor("INewline", [])
iIndent = IIndent
iNewline = INewline()

function iDisplay(seq) { return flatten(0, [[seq, 0]]) }
function flatten(column, iseqlist) {
    var rv = ""
    while (iseqlist.length > 0) {
        var front = iseqlist.shift()
        var indent = front[1]
        // the cascading-if version of this was 2 lines longer
        case_analysis(front[0], {
            on_INil: function() { }, // exercise 1.6
            on_IStr: function(string) {  // exercise 1.6
                rv += string; column += string.length
            },
            on_IAppend: function(seq1, seq2) { // exercise 1.6
                iseqlist.unshift([seq2, indent])
                iseqlist.unshift([seq1, indent])
            },
            on_INewline: function() {
                rv += '\n' + repeat(indent, ' ').join('')
                column = indent  // one of the very few ways JS beats Python
            },
            on_IIndent: function(contents) {
                iseqlist.unshift([contents, column])
            },
        })
    }
    return rv
}

// exercises 1.7, 1.8: not done because not necessary

// section 1.5.6
iNum = iStr
function iFWNum(width, n) {
    var digits = n.toString()
    return iStr(repeat(width - digits.length, ' ').join('') + digits)
}

function iLayn(seqs) { // I lay in sex!
    var rv = []
    for (var ii = 0; ii < seqs.length; ii++) {
        var seq = seqs[ii]
        var n = ii + 1
        rv.append(iConcat([iFWNum(4, n), iStr(") "), iIndent(seq), iNewline]))
    }
    return iConcat(rv)
}

</script>
</head>
<body>
<pre>
<script type="text/javascript">
xpy = EAp(EAp(EVar("+"), EVar("x")), EVar("y"))
;(pprExpr_section_1_5_1(xpy) + "\n")
f5 = mkMultiAp(5, EVar("f"), EVar("x"))
;(pprExpr_section_1_5_1(f5) + "\n")
;(iDisplay(pprExpr(f5)) + "\n")
;(iDisplay(pprExpr(ELet(true, 
[["x", ENum(3)],
 ["y", EAp(EAp(EVar("+"), EVar("x")), ENum("4"))]],
EVar("y")
))))
</script>
</pre>
<h1>The core language</h1>

In 1991, Simon Peyton Jones and David R. Lester wrote "Implementing
Functional Languages: A Tutorial", in which they implement a simple
functional language (suitable for use as the intermediate language in
a functional language compiler) as an interpreter.  They call this
language "the core language".  This page contains a JavaScript
transliteration of their code, which is in Miranda.  So far I've
finished everything up to section 1.6.  Use Jesse Ruderman's
JavaScript Shell bookmarklet to access it, or the crappy JavaScript
line below:

<pre id="results"></pre>
<form onsubmit="var toprint; try { toprint = repr(eval(this.js.value)) } 
catch(e) { toprint = 'Exception: ' + e }; 
document.getElementById('results').appendChild(document.createTextNode(toprint 
+ '\n')); return false">
<input size="80" name="js" /><input type="submit" />
</form>

</body></html>

implementing functional languages: a tutorial

Reply via email to