FYI: The reformatter, which I plan to soon rename to "sweetener", is working 
much better now.  Its output also serves as an interesting demo of 
sweet-expressions.  Like any reformatter, it won't necessarily choose the 
representation a human would choose, but it's often reasonable.

I've also been using it to test round-trips (sweeten|unsweeten|prettyprint 
should produce the same thing as prettyprint if given S-expressions), which 
helps wring out bugs in both tools.

Below is an example of a program in S-expression format, followed by the result 
of the "sweetener" converting it to sweet-expressions.  The program I chose to 
reformat is the sweetener itself :-).

--- David A. Wheeler



===== sweetener-as-s-expressions.scm ===========


; Filter to read S-expressions and output indented sweet-expression.
;
; Copyright (C) 2006-2012 David A. Wheeler.
;
; This software is released as open source software under the "MIT" license:
;
; Permission is hereby granted, free of charge, to any person obtaining a
; copy of this software and associated documentation files (the "Software"),
; to deal in the Software without restriction, including without limitation
; the rights to use, copy, modify, merge, publish, distribute, sublicense,
; and/or sell copies of the Software, and to permit persons to whom the
; Software is furnished to do so, subject to the following conditions:
;
; The above copyright notice and this permission notice shall be included
; in all copies or substantial portions of the Software.
;
; THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
; IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
; FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
; THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
; OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
; ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
; OTHER DEALINGS IN THE SOFTWARE.

; TODO: Handle arbitrary end-of-line. Currently this assumes that
;       lines end with just #\newline.

; Note: The maxwidth may be violated if, at a current indent, there is a
; long non-pair that exceeds it.  But other than long atoms, it's respected,
; so it's unlikely to exceed this width in practice:
(define maxwidth 78)

(define indent-increment
  '(#\space #\space))
(define max-unit-character-length 60)
(define max-unit-list-length 8)

(define group-string "\\\\
")
(define infix-operators
  '(and or
        xor
        +
        -
        *
        /
        ^
        ++
        --
        **
        //
        ^^
        <
        <=
        >
        >=
        =
        <>
        !=
        ==))

; Lists with these symbols as first parameter, and aren't shown as 1 line,
; are be shown as a line with SYMBOL FIRST-PARAMETER and *then* indents.
; This is used when in typical uses the first parameter is *special* and
; has a different semantic meaning from later parameters.
; This refinement isn't *necessary* but I think it looks better.
(define cuddle-first-parameter
  '(define lambda
     if
     when
     unless
     case
     set!
     let
     let*
     letrec
     let1
     do
     define-module
     library
     export
     import
     defun
     block
     typecase
     let-syntax
     letcrec-syntax
     define-syntax
     syntax-rules))


(define tab (integer->char 9))
(define LISTLP (list #\())
(define LISTRP (list #\)))
(define LISTLBRACE (list (integer->char 123)))
(define LISTRBRACE (list (integer->char 125)))


; Return length of x, which may be an improper list.
; If improper, count the two sides as two, so "(a . b)" is length 2.
(define (general-length x)
  (general-length-inner x 0))

(define (general-length-inner x count-so-far)
  (cond ((null? x) count-so-far)
        ((not (pair? x)) (+ count-so-far 1))
        (#t
         (general-length-inner (cdr x) (+ count-so-far 1)))))

; Return list x's *contents* represented as a list of characters.
; Each one must use modern-expressions, space-separated;
; it will be surrounded by (...) so no indentation processing is relevant.
(define (unit-list x)
  (cond ((null? x) (quote ()))
        ((pair? x)
         (if (null? (cdr x))
           (unit (car x))
           (append
             (unit (car x))
             '(#\space)
             (unit-list (cdr x)))))
        (#t
         (append (quote (#\space #\. #\space)) (unit x)))))


; Return #t if x should be represented using curly-infix notation {...}.
(define (represent-as-infix? x)
  (and (pair? x)
       (symbol? (car x))
       (memq (car x) infix-operators)
       (list? x)
       (>= (length x) 3)
       (<= (length x) 6)))

; Return tail of an infix expression, as list of chars
(define (infix-tail op x)
  (cond ((null? x) LISTRBRACE)
        ((pair? x)
         (append
           '(#\space)
           op
           '(#\space)
           (unit (car x))
           (infix-tail op (cdr x))))
        (#t
         (append
           '(#\space #\. #\space)
           (unit x)
           LISTRBRACE))))

; Define an association list of Lisp abbreviations.
; Eventually use "group" to define this, but currently the group
; symbol is under discussion, so best not to use it yet:
(define abbreviations
  '('(#\')
    `(#\`)
    ,(#\,)
    ,@(#\, #\@)))


; return #t if we should as a traditional abbreviation, e.g., '
(define (represent-as-abbreviation? x)
  (and (assq (car x) abbreviations)
       (pair? (cdr x))
       (null? (cddr x))))

; Given list of characters, return the characters that would REPRESENT
; those characters inside a string AFTER the initial double-quote,
; then return the double-quote.  This translates newline to \n, etc.
(define (unit-string-tail x)
  (if (null? x)
    '(#\")
    (append
      (let ((c (car x)))
        (cond ((eq? c #\\) (quote (#\\ #\\)))
              ((eq? c #\") (quote (#\\ #\")))
              ((eq? c #\newline) (quote (#\\ #\n)))
              ((eq? c tab) (quote (#\\ #\t)))
              (#t (list (car x)))))
      (unit-string-tail (cdr x)))))

; Return x represented as a modern-expression unit, as a list of characters.
; Indentation processing *may* be active, but the character sequence
; returned must not depend on that.
; This is widely-used; may want to memoize this.
(define (unit x)
  (cond ((null? x) (string->list "()"))
        ((number? x) (string->list (number->string x)))
        ((boolean? x)
         (if x (quote (#\# #\t)) (quote (#\# #\f))))
        ((symbol? x) (string->list (symbol->string x)))
        ((string? x)
         (append
           '(#\")
           (unit-string-tail (string->list x))))
        ((pair? x)
         (cond ((represent-as-abbreviation? x)
                (append
                  (cadr (assq (car x) abbreviations))
                  (unit (cadr x))))
               ((symbol? (car x))
                (if (represent-as-infix? x)
                  (append
                    LISTLBRACE
                    (unit (cadr x))
                    (infix-tail (unit (car x)) (cddr x)))
                  (append
                    (unit (car x))
                    LISTLP
                    (unit-list (cdr x))
                    LISTRP)))
               (#t (append LISTLP (unit-list x) LISTRP))))
        (#t (string->list (object->string x)))))


; Return x, the rest of the list, as a list of characters. See line().
(define (line-tail x)
  (cond ((null? x) (quote ()))
        ((pair? x)
         (append
           (unit (car x))
           '(#\space)
           (line-tail (cdr x))))
        (#t (append (quote (#\. #\space)) (unit x)))))

; Return x represented as a line of space-separated modern-expression units,
; as a list of characters.
; Indentation processing *MUST* be active.
(define (line x)
  (cond ((not (pair? x)) (unit x))
        ((represent-as-abbreviation? x) (unit x))
        ((represent-as-infix? x) (unit x))
        ((and (pair? x) (null? (cdr x)))
         (if (symbol? (car x))
           (append (unit (car x)) LISTLP LISTRP)
           (append LISTLP (unit (car x)) LISTRP)))
        (#t
         (append
           (unit (car x))
           '(#\space)
           (line-tail (cdr x))))))

; Input: expression "m" with indentation string "indent".
; Output: List of characters representing it (#\newline for new line).
; This tail handles the body of a list (after its first entry) when
; indentation processing is active, each of these is its own initial line.
(define (iformat-body m indent)
  (if (null? m)
    '()
    (append
      (iformat-top (car m) indent)
      (iformat-body (cdr m) indent))))

; Return list m represented in the typical indent style.
(define (iformat-top-normal-indent m indent)
  (append
    (iformat-top (car m) indent)
    (iformat-body
      (cdr m)
      (append indent indent-increment))))

; Input: expression "m" with indentation string "indent".
; Output: List of characters representing it (#\newline for new line).
; At this point, we're at the beginning of a possibly-indented
; line with sweet-expression (indentation) processing active.
(define (iformat-top m indent)
  (if (not (pair? m))
    (append indent (unit m) (quote (#\newline)))
    (let* ((asline (line m))
           (length-asline (length asline)))
      (if (and (< length-asline max-unit-character-length)
               (< (+ length-asline (length indent)) maxwidth)
               (< (general-length m) max-unit-list-length))
        (append indent asline (quote (#\newline)))
        (if (pair? (car m))
          (let* ((asunit-car (unit (car m))))
            (if (< (+ (length asunit-car) (length indent))
                   maxwidth)
              (append
                indent
                asunit-car
                '(#\newline)
                (iformat-body
                  (cdr m)
                  (append indent indent-increment)))
              (append
                indent
                (string->list group-string)
                (iformat-top
                  (car m)
                  (append indent indent-increment))
                (iformat-body
                  (cdr m)
                  (append indent indent-increment)))))
          (if (and (memq (car m) cuddle-first-parameter)
                   (>= (length m) 3))
            (let* ((asline-cuddled (line (list (car m) (cadr m)))))
              (if (< (+ (length asline-cuddled) (length indent))
                     maxwidth)
                (append
                  indent
                  asline-cuddled
                  '(#\newline)
                  (iformat-body
                    (cddr m)
                    (append indent indent-increment)))
                (iformat-top-normal-indent m indent)))
            (iformat-top-normal-indent m indent)))))))

; Display x (a list of chars and strings), but do NOT display
; the last EOL.
(define (display-skip-last-eol x)
  (cond ((null? x) #t)
        ((string? (car x))
         (display (car x))
         (display-skip-last-eol (cdr x)))
        ((and (char? (car x))
              (null? (cdr x))
              (eq? (car x) #\newline))
         #t)
        ((char? (car x))
         (write-char (car x))
         (display-skip-last-eol (cdr x)))
        (#t (display "BUG!!! in display-skip-last-eof!"))))

; Display formatted expression "m" using indentation string "indent".
; Do NOT display the final end-of-line, due to an interesting subtlety:
; read() doesn't consume the end-of-line, so if we are translating
; what read() has read in, then we need to NOT generate the last end-of-line
; so that they will match up.
(define (iformat m)
  (display-skip-last-eol
    (iformat-top m (string->list ""))))



; Copy one line from stdin to stdout, end on EOF or newline.
(define (copy-line port)
  (let ((c (peek-char port)))
    (cond ((eof-object? c) c)
          ((char=? c #\newline) (read-char) (newline))
          (#t
           (write-char (read-char port))
           (copy-line port)))))

; filter stdin to stdout, reading in traditional s-expressions and outputting
; sweet-expressions.  Preserve the comments outside an s-expression.
(define (iformat-filter)
  (let ((c (peek-char (current-input-port))))
    (cond ((eof-object? c) c)
          ((char=? c #\newline)
           (read-char)
           (newline)
           (iformat-filter))
          ((char=? c #\space) (read-char) (iformat-filter))
          ((char=? c #\ht) (read-char) (iformat-filter))
          ((char=? c #\;)
           (copy-line (current-input-port))
           (iformat-filter))
          (#t
           (let ((result (read (current-input-port))))
             (if (eof-object? result)
               result
               (begin (iformat result) (iformat-filter))))))))

(iformat-filter)




===== sweetener-as-sweet-expressions.sscm ===========


; Filter to read S-expressions and output indented sweet-expression.
;
; Copyright (C) 2006-2012 David A. Wheeler.
;
; This software is released as open source software under the "MIT" license:
;
; Permission is hereby granted, free of charge, to any person obtaining a
; copy of this software and associated documentation files (the "Software"),
; to deal in the Software without restriction, including without limitation
; the rights to use, copy, modify, merge, publish, distribute, sublicense,
; and/or sell copies of the Software, and to permit persons to whom the
; Software is furnished to do so, subject to the following conditions:
;
; The above copyright notice and this permission notice shall be included
; in all copies or substantial portions of the Software.
;
; THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
; IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
; FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
; THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
; OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
; ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
; OTHER DEALINGS IN THE SOFTWARE.

; TODO: Handle arbitrary end-of-line. Currently this assumes that
;       lines end with just #\newline.

; Note: The maxwidth may be violated if, at a current indent, there is a
; long non-pair that exceeds it.  But other than long atoms, it's respected,
; so it's unlikely to exceed this width in practice:
define maxwidth 78 

define indent-increment '(#\space #\space) 
define max-unit-character-length 60 
define max-unit-list-length 8 

define group-string "\\\\\n" 
define infix-operators 
  'and(or xor + - * / ^ ++ -- ** // ^^ < <= > >= = <> != ==)

; Lists with these symbols as first parameter, and aren't shown as 1 line,
; are be shown as a line with SYMBOL FIRST-PARAMETER and *then* indents.
; This is used when in typical uses the first parameter is *special* and
; has a different semantic meaning from later parameters.
; This refinement isn't *necessary* but I think it looks better.
define cuddle-first-parameter 
  quote
    define lambda 
      if
      when
      unless
      case
      set!
      let
      let*
      letrec
      let1
      do
      define-module
      library
      export
      import
      defun
      block
      typecase
      let-syntax
      letcrec-syntax
      define-syntax
      syntax-rules


define tab integer->char(9) 
define LISTLP list(#\() 
define LISTRP list(#\)) 
define LISTLBRACE list(integer->char(123)) 
define LISTRBRACE list(integer->char(125)) 


; Return length of x, which may be an improper list.
; If improper, count the two sides as two, so "(a . b)" is length 2.
define general-length(x) general-length-inner(x 0) 

define general-length-inner(x count-so-far) 
  cond
    null?(x) count-so-far 
    not(pair?(x)) {count-so-far + 1} 
    #t general-length-inner(cdr(x) {count-so-far + 1}) 

; Return list x's *contents* represented as a list of characters.
; Each one must use modern-expressions, space-separated;
; it will be surrounded by (...) so no indentation processing is relevant.
define unit-list(x) 
  cond
    null?(x) '() 
    pair?(x)
      if null?(cdr(x)) 
        unit car(x) 
        append unit(car(x)) '(#\space) unit-list(cdr(x)) 
    #t append('(#\space #\. #\space) unit(x)) 


; Return #t if x should be represented using curly-infix notation {...}.
define represent-as-infix?(x) 
  and
    pair? x 
    symbol? car(x) 
    memq car(x) infix-operators 
    list? x 
    {length(x) >= 3}
    {length(x) <= 6}

; Return tail of an infix expression, as list of chars
define infix-tail(op x) 
  cond
    null?(x) LISTRBRACE 
    pair?(x)
      append
        '(#\space)
        op
        '(#\space)
        unit car(x) 
        infix-tail op cdr(x) 
    #t append('(#\space #\. #\space) unit(x) LISTRBRACE) 

; Define an association list of Lisp abbreviations.
; Eventually use "group" to define this, but currently the group
; symbol is under discussion, so best not to use it yet:
define abbreviations '('(#\') `(#\`) ,(#\,) ,@(#\, #\@)) 


; return #t if we should as a traditional abbreviation, e.g., '
define represent-as-abbreviation?(x) 
  and
    assq car(x) abbreviations 
    pair? cdr(x) 
    null? cddr(x) 

; Given list of characters, return the characters that would REPRESENT
; those characters inside a string AFTER the initial double-quote,
; then return the double-quote.  This translates newline to \n, etc.
define unit-string-tail(x) 
  if null?(x) 
    '(#\")
    append
      let (c(car(x))) 
        cond
          eq?(c #\\) '(#\\ #\\) 
          eq?(c #\") '(#\\ #\") 
          eq?(c #\newline) '(#\\ #\n) 
          eq?(c tab) '(#\\ #\t) 
          #t list(car(x)) 
      unit-string-tail cdr(x) 

; Return x represented as a modern-expression unit, as a list of characters.
; Indentation processing *may* be active, but the character sequence
; returned must not depend on that.
; This is widely-used; may want to memoize this.
define unit(x) 
  cond
    null?(x) string->list("()") 
    number?(x) string->list(number->string(x)) 
    boolean?(x) if(x '(#\# #\t) '(#\# #\f)) 
    symbol?(x) string->list(symbol->string(x)) 
    string?(x)
      append '(#\") unit-string-tail(string->list(x)) 
    pair?(x)
      cond
        represent-as-abbreviation?(x)
          append cadr(assq(car(x) abbreviations)) unit(cadr(x)) 
        symbol?(car(x))
          if represent-as-infix?(x) 
            append
              LISTLBRACE
              unit cadr(x) 
              infix-tail unit(car(x)) cddr(x) 
            append unit(car(x)) LISTLP unit-list(cdr(x)) LISTRP 
        #t append(LISTLP unit-list(x) LISTRP) 
    #t string->list(object->string(x)) 


; Return x, the rest of the list, as a list of characters. See line().
define line-tail(x) 
  cond
    null?(x) '() 
    pair?(x) append(unit(car(x)) '(#\space) line-tail(cdr(x))) 
    #t append('(#\. #\space) unit(x)) 

; Return x represented as a line of space-separated modern-expression units,
; as a list of characters.
; Indentation processing *MUST* be active.
define line(x) 
  cond
    not(pair?(x)) unit(x) 
    represent-as-abbreviation?(x) unit(x) 
    represent-as-infix?(x) unit(x) 
    {pair?(x) and null?(cdr(x))}
      if symbol?(car(x)) 
        append unit(car(x)) LISTLP LISTRP 
        append LISTLP unit(car(x)) LISTRP 
    #t append(unit(car(x)) '(#\space) line-tail(cdr(x))) 

; Input: expression "m" with indentation string "indent".
; Output: List of characters representing it (#\newline for new line).
; This tail handles the body of a list (after its first entry) when
; indentation processing is active, each of these is its own initial line.
define iformat-body(m indent) 
  if null?(m) 
    '()
    append
      iformat-top car(m) indent 
      iformat-body cdr(m) indent 

; Return list m represented in the typical indent style.
define iformat-top-normal-indent(m indent) 
  append
    iformat-top car(m) indent 
    iformat-body cdr(m) append(indent indent-increment) 

; Input: expression "m" with indentation string "indent".
; Output: List of characters representing it (#\newline for new line).
; At this point, we're at the beginning of a possibly-indented
; line with sweet-expression (indentation) processing active.
define iformat-top(m indent) 
  if not(pair?(m)) 
    append indent unit(m) '(#\newline) 
    let* (asline(line(m)) length-asline(length(asline))) 
      if
        and
          {length-asline < max-unit-character-length}
          {{length-asline + length(indent)} < maxwidth}
          {general-length(m) < max-unit-list-length}
        append indent asline '(#\newline) 
        if pair?(car(m)) 
          let* (asunit-car(unit(car(m)))) 
            if {{length(asunit-car) + length(indent)} < maxwidth} 
              append
                indent
                asunit-car
                '(#\newline)
                iformat-body cdr(m) append(indent indent-increment) 
              append
                indent
                string->list group-string 
                iformat-top car(m) append(indent indent-increment) 
                iformat-body cdr(m) append(indent indent-increment) 
          if {memq(car(m) cuddle-first-parameter) and {length(m) >= 3}} 
            let* (asline-cuddled(line(list(car(m) cadr(m))))) 
              if {{length(asline-cuddled) + length(indent)} < maxwidth} 
                append
                  indent
                  asline-cuddled
                  '(#\newline)
                  iformat-body cddr(m) append(indent indent-increment) 
                iformat-top-normal-indent m indent 
            iformat-top-normal-indent m indent 

; Display x (a list of chars and strings), but do NOT display
; the last EOL.
define display-skip-last-eol(x) 
  cond
    null?(x) #t 
    string?(car(x))
      display car(x) 
      display-skip-last-eol cdr(x) 
    {char?(car(x)) and null?(cdr(x)) and eq?(car(x) #\newline)}
      #t
    char?(car(x))
      write-char car(x) 
      display-skip-last-eol cdr(x) 
    #t display("BUG!!! in display-skip-last-eof!") 

; Display formatted expression "m" using indentation string "indent".
; Do NOT display the final end-of-line, due to an interesting subtlety:
; read() doesn't consume the end-of-line, so if we are translating
; what read() has read in, then we need to NOT generate the last end-of-line
; so that they will match up.
define iformat(m) 
  display-skip-last-eol iformat-top(m string->list("")) 



; Copy one line from stdin to stdout, end on EOF or newline.
define copy-line(port) 
  let (c(peek-char(port))) 
    cond
      eof-object?(c) c 
      char=?(c #\newline) read-char() newline() 
      #t write-char(read-char(port)) copy-line(port) 

; filter stdin to stdout, reading in traditional s-expressions and outputting
; sweet-expressions.  Preserve the comments outside an s-expression.
define iformat-filter() 
  let (c(peek-char(current-input-port()))) 
    cond
      eof-object?(c) c 
      char=?(c #\newline) read-char() newline() iformat-filter() 
      char=?(c #\space) read-char() iformat-filter() 
      char=?(c #\ht) read-char() iformat-filter() 
      char=?(c #\;)
        copy-line current-input-port() 
        iformat-filter()
      #t
        let (result(read(current-input-port()))) 
          if eof-object?(result) 
            result
            begin iformat(result) iformat-filter() 

iformat-filter()


------------------------------------------------------------------------------
Live Security Virtual Conference
Exclusive live event will cover all the ways today's security and 
threat landscape has changed and how IT managers can respond. Discussions 
will include endpoint security, mobile security and the latest in malware 
threats. http://www.accelacomm.com/jaw/sfrnl04242012/114/50122263/
_______________________________________________
Readable-discuss mailing list
Readable-discuss@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/readable-discuss

Reply via email to