FYI: The reformatter, which I plan to soon rename to "sweetener", is working much better now. Its output also serves as an interesting demo of sweet-expressions. Like any reformatter, it won't necessarily choose the representation a human would choose, but it's often reasonable.
I've also been using it to test round-trips (sweeten|unsweeten|prettyprint should produce the same thing as prettyprint if given S-expressions), which helps wring out bugs in both tools. Below is an example of a program in S-expression format, followed by the result of the "sweetener" converting it to sweet-expressions. The program I chose to reformat is the sweetener itself :-). --- David A. Wheeler ===== sweetener-as-s-expressions.scm =========== ; Filter to read S-expressions and output indented sweet-expression. ; ; Copyright (C) 2006-2012 David A. Wheeler. ; ; This software is released as open source software under the "MIT" license: ; ; Permission is hereby granted, free of charge, to any person obtaining a ; copy of this software and associated documentation files (the "Software"), ; to deal in the Software without restriction, including without limitation ; the rights to use, copy, modify, merge, publish, distribute, sublicense, ; and/or sell copies of the Software, and to permit persons to whom the ; Software is furnished to do so, subject to the following conditions: ; ; The above copyright notice and this permission notice shall be included ; in all copies or substantial portions of the Software. ; ; THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR ; IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, ; FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL ; THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR ; OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ; ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR ; OTHER DEALINGS IN THE SOFTWARE. ; TODO: Handle arbitrary end-of-line. Currently this assumes that ; lines end with just #\newline. ; Note: The maxwidth may be violated if, at a current indent, there is a ; long non-pair that exceeds it. But other than long atoms, it's respected, ; so it's unlikely to exceed this width in practice: (define maxwidth 78) (define indent-increment '(#\space #\space)) (define max-unit-character-length 60) (define max-unit-list-length 8) (define group-string "\\\\ ") (define infix-operators '(and or xor + - * / ^ ++ -- ** // ^^ < <= > >= = <> != ==)) ; Lists with these symbols as first parameter, and aren't shown as 1 line, ; are be shown as a line with SYMBOL FIRST-PARAMETER and *then* indents. ; This is used when in typical uses the first parameter is *special* and ; has a different semantic meaning from later parameters. ; This refinement isn't *necessary* but I think it looks better. (define cuddle-first-parameter '(define lambda if when unless case set! let let* letrec let1 do define-module library export import defun block typecase let-syntax letcrec-syntax define-syntax syntax-rules)) (define tab (integer->char 9)) (define LISTLP (list #\()) (define LISTRP (list #\))) (define LISTLBRACE (list (integer->char 123))) (define LISTRBRACE (list (integer->char 125))) ; Return length of x, which may be an improper list. ; If improper, count the two sides as two, so "(a . b)" is length 2. (define (general-length x) (general-length-inner x 0)) (define (general-length-inner x count-so-far) (cond ((null? x) count-so-far) ((not (pair? x)) (+ count-so-far 1)) (#t (general-length-inner (cdr x) (+ count-so-far 1))))) ; Return list x's *contents* represented as a list of characters. ; Each one must use modern-expressions, space-separated; ; it will be surrounded by (...) so no indentation processing is relevant. (define (unit-list x) (cond ((null? x) (quote ())) ((pair? x) (if (null? (cdr x)) (unit (car x)) (append (unit (car x)) '(#\space) (unit-list (cdr x))))) (#t (append (quote (#\space #\. #\space)) (unit x))))) ; Return #t if x should be represented using curly-infix notation {...}. (define (represent-as-infix? x) (and (pair? x) (symbol? (car x)) (memq (car x) infix-operators) (list? x) (>= (length x) 3) (<= (length x) 6))) ; Return tail of an infix expression, as list of chars (define (infix-tail op x) (cond ((null? x) LISTRBRACE) ((pair? x) (append '(#\space) op '(#\space) (unit (car x)) (infix-tail op (cdr x)))) (#t (append '(#\space #\. #\space) (unit x) LISTRBRACE)))) ; Define an association list of Lisp abbreviations. ; Eventually use "group" to define this, but currently the group ; symbol is under discussion, so best not to use it yet: (define abbreviations '('(#\') `(#\`) ,(#\,) ,@(#\, #\@))) ; return #t if we should as a traditional abbreviation, e.g., ' (define (represent-as-abbreviation? x) (and (assq (car x) abbreviations) (pair? (cdr x)) (null? (cddr x)))) ; Given list of characters, return the characters that would REPRESENT ; those characters inside a string AFTER the initial double-quote, ; then return the double-quote. This translates newline to \n, etc. (define (unit-string-tail x) (if (null? x) '(#\") (append (let ((c (car x))) (cond ((eq? c #\\) (quote (#\\ #\\))) ((eq? c #\") (quote (#\\ #\"))) ((eq? c #\newline) (quote (#\\ #\n))) ((eq? c tab) (quote (#\\ #\t))) (#t (list (car x))))) (unit-string-tail (cdr x))))) ; Return x represented as a modern-expression unit, as a list of characters. ; Indentation processing *may* be active, but the character sequence ; returned must not depend on that. ; This is widely-used; may want to memoize this. (define (unit x) (cond ((null? x) (string->list "()")) ((number? x) (string->list (number->string x))) ((boolean? x) (if x (quote (#\# #\t)) (quote (#\# #\f)))) ((symbol? x) (string->list (symbol->string x))) ((string? x) (append '(#\") (unit-string-tail (string->list x)))) ((pair? x) (cond ((represent-as-abbreviation? x) (append (cadr (assq (car x) abbreviations)) (unit (cadr x)))) ((symbol? (car x)) (if (represent-as-infix? x) (append LISTLBRACE (unit (cadr x)) (infix-tail (unit (car x)) (cddr x))) (append (unit (car x)) LISTLP (unit-list (cdr x)) LISTRP))) (#t (append LISTLP (unit-list x) LISTRP)))) (#t (string->list (object->string x))))) ; Return x, the rest of the list, as a list of characters. See line(). (define (line-tail x) (cond ((null? x) (quote ())) ((pair? x) (append (unit (car x)) '(#\space) (line-tail (cdr x)))) (#t (append (quote (#\. #\space)) (unit x))))) ; Return x represented as a line of space-separated modern-expression units, ; as a list of characters. ; Indentation processing *MUST* be active. (define (line x) (cond ((not (pair? x)) (unit x)) ((represent-as-abbreviation? x) (unit x)) ((represent-as-infix? x) (unit x)) ((and (pair? x) (null? (cdr x))) (if (symbol? (car x)) (append (unit (car x)) LISTLP LISTRP) (append LISTLP (unit (car x)) LISTRP))) (#t (append (unit (car x)) '(#\space) (line-tail (cdr x)))))) ; Input: expression "m" with indentation string "indent". ; Output: List of characters representing it (#\newline for new line). ; This tail handles the body of a list (after its first entry) when ; indentation processing is active, each of these is its own initial line. (define (iformat-body m indent) (if (null? m) '() (append (iformat-top (car m) indent) (iformat-body (cdr m) indent)))) ; Return list m represented in the typical indent style. (define (iformat-top-normal-indent m indent) (append (iformat-top (car m) indent) (iformat-body (cdr m) (append indent indent-increment)))) ; Input: expression "m" with indentation string "indent". ; Output: List of characters representing it (#\newline for new line). ; At this point, we're at the beginning of a possibly-indented ; line with sweet-expression (indentation) processing active. (define (iformat-top m indent) (if (not (pair? m)) (append indent (unit m) (quote (#\newline))) (let* ((asline (line m)) (length-asline (length asline))) (if (and (< length-asline max-unit-character-length) (< (+ length-asline (length indent)) maxwidth) (< (general-length m) max-unit-list-length)) (append indent asline (quote (#\newline))) (if (pair? (car m)) (let* ((asunit-car (unit (car m)))) (if (< (+ (length asunit-car) (length indent)) maxwidth) (append indent asunit-car '(#\newline) (iformat-body (cdr m) (append indent indent-increment))) (append indent (string->list group-string) (iformat-top (car m) (append indent indent-increment)) (iformat-body (cdr m) (append indent indent-increment))))) (if (and (memq (car m) cuddle-first-parameter) (>= (length m) 3)) (let* ((asline-cuddled (line (list (car m) (cadr m))))) (if (< (+ (length asline-cuddled) (length indent)) maxwidth) (append indent asline-cuddled '(#\newline) (iformat-body (cddr m) (append indent indent-increment))) (iformat-top-normal-indent m indent))) (iformat-top-normal-indent m indent))))))) ; Display x (a list of chars and strings), but do NOT display ; the last EOL. (define (display-skip-last-eol x) (cond ((null? x) #t) ((string? (car x)) (display (car x)) (display-skip-last-eol (cdr x))) ((and (char? (car x)) (null? (cdr x)) (eq? (car x) #\newline)) #t) ((char? (car x)) (write-char (car x)) (display-skip-last-eol (cdr x))) (#t (display "BUG!!! in display-skip-last-eof!")))) ; Display formatted expression "m" using indentation string "indent". ; Do NOT display the final end-of-line, due to an interesting subtlety: ; read() doesn't consume the end-of-line, so if we are translating ; what read() has read in, then we need to NOT generate the last end-of-line ; so that they will match up. (define (iformat m) (display-skip-last-eol (iformat-top m (string->list "")))) ; Copy one line from stdin to stdout, end on EOF or newline. (define (copy-line port) (let ((c (peek-char port))) (cond ((eof-object? c) c) ((char=? c #\newline) (read-char) (newline)) (#t (write-char (read-char port)) (copy-line port))))) ; filter stdin to stdout, reading in traditional s-expressions and outputting ; sweet-expressions. Preserve the comments outside an s-expression. (define (iformat-filter) (let ((c (peek-char (current-input-port)))) (cond ((eof-object? c) c) ((char=? c #\newline) (read-char) (newline) (iformat-filter)) ((char=? c #\space) (read-char) (iformat-filter)) ((char=? c #\ht) (read-char) (iformat-filter)) ((char=? c #\;) (copy-line (current-input-port)) (iformat-filter)) (#t (let ((result (read (current-input-port)))) (if (eof-object? result) result (begin (iformat result) (iformat-filter)))))))) (iformat-filter) ===== sweetener-as-sweet-expressions.sscm =========== ; Filter to read S-expressions and output indented sweet-expression. ; ; Copyright (C) 2006-2012 David A. Wheeler. ; ; This software is released as open source software under the "MIT" license: ; ; Permission is hereby granted, free of charge, to any person obtaining a ; copy of this software and associated documentation files (the "Software"), ; to deal in the Software without restriction, including without limitation ; the rights to use, copy, modify, merge, publish, distribute, sublicense, ; and/or sell copies of the Software, and to permit persons to whom the ; Software is furnished to do so, subject to the following conditions: ; ; The above copyright notice and this permission notice shall be included ; in all copies or substantial portions of the Software. ; ; THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR ; IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, ; FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL ; THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR ; OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ; ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR ; OTHER DEALINGS IN THE SOFTWARE. ; TODO: Handle arbitrary end-of-line. Currently this assumes that ; lines end with just #\newline. ; Note: The maxwidth may be violated if, at a current indent, there is a ; long non-pair that exceeds it. But other than long atoms, it's respected, ; so it's unlikely to exceed this width in practice: define maxwidth 78 define indent-increment '(#\space #\space) define max-unit-character-length 60 define max-unit-list-length 8 define group-string "\\\\\n" define infix-operators 'and(or xor + - * / ^ ++ -- ** // ^^ < <= > >= = <> != ==) ; Lists with these symbols as first parameter, and aren't shown as 1 line, ; are be shown as a line with SYMBOL FIRST-PARAMETER and *then* indents. ; This is used when in typical uses the first parameter is *special* and ; has a different semantic meaning from later parameters. ; This refinement isn't *necessary* but I think it looks better. define cuddle-first-parameter quote define lambda if when unless case set! let let* letrec let1 do define-module library export import defun block typecase let-syntax letcrec-syntax define-syntax syntax-rules define tab integer->char(9) define LISTLP list(#\() define LISTRP list(#\)) define LISTLBRACE list(integer->char(123)) define LISTRBRACE list(integer->char(125)) ; Return length of x, which may be an improper list. ; If improper, count the two sides as two, so "(a . b)" is length 2. define general-length(x) general-length-inner(x 0) define general-length-inner(x count-so-far) cond null?(x) count-so-far not(pair?(x)) {count-so-far + 1} #t general-length-inner(cdr(x) {count-so-far + 1}) ; Return list x's *contents* represented as a list of characters. ; Each one must use modern-expressions, space-separated; ; it will be surrounded by (...) so no indentation processing is relevant. define unit-list(x) cond null?(x) '() pair?(x) if null?(cdr(x)) unit car(x) append unit(car(x)) '(#\space) unit-list(cdr(x)) #t append('(#\space #\. #\space) unit(x)) ; Return #t if x should be represented using curly-infix notation {...}. define represent-as-infix?(x) and pair? x symbol? car(x) memq car(x) infix-operators list? x {length(x) >= 3} {length(x) <= 6} ; Return tail of an infix expression, as list of chars define infix-tail(op x) cond null?(x) LISTRBRACE pair?(x) append '(#\space) op '(#\space) unit car(x) infix-tail op cdr(x) #t append('(#\space #\. #\space) unit(x) LISTRBRACE) ; Define an association list of Lisp abbreviations. ; Eventually use "group" to define this, but currently the group ; symbol is under discussion, so best not to use it yet: define abbreviations '('(#\') `(#\`) ,(#\,) ,@(#\, #\@)) ; return #t if we should as a traditional abbreviation, e.g., ' define represent-as-abbreviation?(x) and assq car(x) abbreviations pair? cdr(x) null? cddr(x) ; Given list of characters, return the characters that would REPRESENT ; those characters inside a string AFTER the initial double-quote, ; then return the double-quote. This translates newline to \n, etc. define unit-string-tail(x) if null?(x) '(#\") append let (c(car(x))) cond eq?(c #\\) '(#\\ #\\) eq?(c #\") '(#\\ #\") eq?(c #\newline) '(#\\ #\n) eq?(c tab) '(#\\ #\t) #t list(car(x)) unit-string-tail cdr(x) ; Return x represented as a modern-expression unit, as a list of characters. ; Indentation processing *may* be active, but the character sequence ; returned must not depend on that. ; This is widely-used; may want to memoize this. define unit(x) cond null?(x) string->list("()") number?(x) string->list(number->string(x)) boolean?(x) if(x '(#\# #\t) '(#\# #\f)) symbol?(x) string->list(symbol->string(x)) string?(x) append '(#\") unit-string-tail(string->list(x)) pair?(x) cond represent-as-abbreviation?(x) append cadr(assq(car(x) abbreviations)) unit(cadr(x)) symbol?(car(x)) if represent-as-infix?(x) append LISTLBRACE unit cadr(x) infix-tail unit(car(x)) cddr(x) append unit(car(x)) LISTLP unit-list(cdr(x)) LISTRP #t append(LISTLP unit-list(x) LISTRP) #t string->list(object->string(x)) ; Return x, the rest of the list, as a list of characters. See line(). define line-tail(x) cond null?(x) '() pair?(x) append(unit(car(x)) '(#\space) line-tail(cdr(x))) #t append('(#\. #\space) unit(x)) ; Return x represented as a line of space-separated modern-expression units, ; as a list of characters. ; Indentation processing *MUST* be active. define line(x) cond not(pair?(x)) unit(x) represent-as-abbreviation?(x) unit(x) represent-as-infix?(x) unit(x) {pair?(x) and null?(cdr(x))} if symbol?(car(x)) append unit(car(x)) LISTLP LISTRP append LISTLP unit(car(x)) LISTRP #t append(unit(car(x)) '(#\space) line-tail(cdr(x))) ; Input: expression "m" with indentation string "indent". ; Output: List of characters representing it (#\newline for new line). ; This tail handles the body of a list (after its first entry) when ; indentation processing is active, each of these is its own initial line. define iformat-body(m indent) if null?(m) '() append iformat-top car(m) indent iformat-body cdr(m) indent ; Return list m represented in the typical indent style. define iformat-top-normal-indent(m indent) append iformat-top car(m) indent iformat-body cdr(m) append(indent indent-increment) ; Input: expression "m" with indentation string "indent". ; Output: List of characters representing it (#\newline for new line). ; At this point, we're at the beginning of a possibly-indented ; line with sweet-expression (indentation) processing active. define iformat-top(m indent) if not(pair?(m)) append indent unit(m) '(#\newline) let* (asline(line(m)) length-asline(length(asline))) if and {length-asline < max-unit-character-length} {{length-asline + length(indent)} < maxwidth} {general-length(m) < max-unit-list-length} append indent asline '(#\newline) if pair?(car(m)) let* (asunit-car(unit(car(m)))) if {{length(asunit-car) + length(indent)} < maxwidth} append indent asunit-car '(#\newline) iformat-body cdr(m) append(indent indent-increment) append indent string->list group-string iformat-top car(m) append(indent indent-increment) iformat-body cdr(m) append(indent indent-increment) if {memq(car(m) cuddle-first-parameter) and {length(m) >= 3}} let* (asline-cuddled(line(list(car(m) cadr(m))))) if {{length(asline-cuddled) + length(indent)} < maxwidth} append indent asline-cuddled '(#\newline) iformat-body cddr(m) append(indent indent-increment) iformat-top-normal-indent m indent iformat-top-normal-indent m indent ; Display x (a list of chars and strings), but do NOT display ; the last EOL. define display-skip-last-eol(x) cond null?(x) #t string?(car(x)) display car(x) display-skip-last-eol cdr(x) {char?(car(x)) and null?(cdr(x)) and eq?(car(x) #\newline)} #t char?(car(x)) write-char car(x) display-skip-last-eol cdr(x) #t display("BUG!!! in display-skip-last-eof!") ; Display formatted expression "m" using indentation string "indent". ; Do NOT display the final end-of-line, due to an interesting subtlety: ; read() doesn't consume the end-of-line, so if we are translating ; what read() has read in, then we need to NOT generate the last end-of-line ; so that they will match up. define iformat(m) display-skip-last-eol iformat-top(m string->list("")) ; Copy one line from stdin to stdout, end on EOF or newline. define copy-line(port) let (c(peek-char(port))) cond eof-object?(c) c char=?(c #\newline) read-char() newline() #t write-char(read-char(port)) copy-line(port) ; filter stdin to stdout, reading in traditional s-expressions and outputting ; sweet-expressions. Preserve the comments outside an s-expression. define iformat-filter() let (c(peek-char(current-input-port()))) cond eof-object?(c) c char=?(c #\newline) read-char() newline() iformat-filter() char=?(c #\space) read-char() iformat-filter() char=?(c #\ht) read-char() iformat-filter() char=?(c #\;) copy-line current-input-port() iformat-filter() #t let (result(read(current-input-port()))) if eof-object?(result) result begin iformat(result) iformat-filter() iformat-filter() ------------------------------------------------------------------------------ Live Security Virtual Conference Exclusive live event will cover all the ways today's security and threat landscape has changed and how IT managers can respond. Discussions will include endpoint security, mobile security and the latest in malware threats. http://www.accelacomm.com/jaw/sfrnl04242012/114/50122263/ _______________________________________________ Readable-discuss mailing list Readable-discuss@lists.sourceforge.net https://lists.sourceforge.net/lists/listinfo/readable-discuss