On Tue, 25 May 2004, Tom Jackson wrote:

> On Tue, 2004-05-25 at 08:20, Daniël Mantione wrote:
>
> > To those unknown to the OpenACS compiler; OpenACS has its own adp parser
> > which is way more powerfull than the AOLserver adp parser.
>
> Actually the OpenACS parser doesn't exist, ATS uses ns_register_adptag
> and ns_adp_parse. One big problem with this setup is that recusion is
> difficult, making it difficult to extend the templating system, but it
> works great for tags which do not need to parse their content.

Hmmm. Are we looking at the same code? I'm quite sure it converts the
.adp & .tcl into a Tcl script, which it then evaluates. That would be
contradictory with ns_adp_parse. Well, time to take a closer look at it,

> > What's the idea? Well, such a compiler would not be a very large piece of
> > Tcl code as far as I can see,

> Tcl is very limited in data structures. You would need to be able to map
> each PHP data structure to tcl.

Yes, that's the hard part. I think it should be possible to map most
datastructures, but for classes I don't know yet if that'll be possible.

> > Well, I was that enthousiastic of the idea that I started doing
> > programming, and within 3 hours (!) I programmed a working tokenizer for
> > PHP. A parser would be a bit more work of course, but it seems the idea is
> > very feasible.
>
> I've done a template tokenizer/parser/compiler for a tcl-like templating
> language to tcl code, I'd like to do the same thing with PHP. It would
> be interesting to see what a tokenizer for PHP looks like.

Here 'ya go :) It tokenizes already a lot of php-files correctly, but
there will of course certainly be bugs.

Daniël


#!/usr/bin/tclsh
#
#This script should become a Php -> Tcl compiler

array set keywords {
    __CLASS__           _CLASS
    __FILE__            _FILE
    __FUNCTION          _FUNCTION
    __LINE__            _LINE
    __METHOD            _METHOD
    exception           EXCEPTION
    and                 AND
    array               ARRAY
    as                  AS
    break               BREAK
    case                CASE
    cfunction           CFUNCTION
    class               CLASS
    const               CONST
    continue            CONTINUE
    declare             DECLARE
    default             DEFAULT
    die                 DIE
    do                  DO
    else                ELSE
    elseif              ELSEIF
    empty               EMPTY
    enddeclare          ENDDECLARE
    endfor              ENDFOR
    endforeach          ENDFOREACH
    endif               ENDIF
    endswitch           ENDSWITCH
    endwhile            ENDWHILE
    eval                EVAL
    exit                EXIT
    extends             EXTENDS
    for                 FOR
    foreach             FOREACH
    function            FUNCTION
    global              GLOBAL
    if                  IF
    include             INCLUDE
    include_once        INCLUDEONCE
    isset               ISSET
    list                LIST
    new                 NEW
    old_function        OLDFUNCTION
    or                  OR
    php_user_filter     PHPUSERFILTER
    print               PRINT
    require             REQUIRE
    require_once        REQUIRE_ONCE
    return              RETURN
    static              STATIC
    switch              SWITCH
    unset               UNSET
    use                 USE
    var                 VAR
    while               WHILE
    xor                 XOR
}

proc php_skip_whitespace {} {
    global php ptr tokenendptr token

    set c [string index $php $ptr]
    while {[string match "\[ \t\n\r\]" $c]} {
        incr ptr
        set c [string index $php $ptr]
    }
}

proc php_get_token {} {
    global php ptr tokenendptr token endtoken keywords

    php_skip_whitespace

    switch -glob -- [string range $php $ptr [expr $ptr + 1]] {
        "\\?>" {
          # End of chunk
          if {$endtoken == "?>"} then {
            set token END_OF_CHUNK
            set tokenendptr [expr $ptr + 2]
          }
        }
        "%>" {
          # End of chunk
          if {$endtoken == "%>"} then {
            set token END_OF_CHUNK
            set tokenendptr [expr $ptr + 2]
          }
        }
        "//" {
           # Comment, skip until end of line
           incr ptr 2
           set c [string index $php $ptr]
           while {$c != "\n"} {
              incr ptr
              set c [string index $php $ptr]
           }
           incr ptr
           # Recurse to get really token (might be whitespace & comment again).
           php_get_token
        }
        "/\\*" {
           # Comment, skip until "*/"
           incr ptr 2
           set s [string range $php $ptr [expr $ptr + 1]]
           while {$s != "*/"} {
              incr ptr
              set s [string range $php $ptr [expr $ptr + 1]]
           }
           incr ptr 2
           # Recurse to get really token (might be whitespace & comment again).
           php_get_token
        }
        "'*" {
           # Literal string
           set tokenendptr $ptr
           incr tokenendptr
           set c [string index $php $tokenendptr]
           set ident ""
           while {$c != "'"} {
              append ident $c
              incr tokenendptr
              set c [string index $php $tokenendptr]
           }
           incr tokenendptr
           set token [list LSTRING $ident]
        }
        "\"*" {
            # Expandable string
            set tokenendptr $ptr
            incr tokenendptr
            set c [string index $php $tokenendptr]
            if {$c == "\\"} then {
               incr tokenendptr
               set c [string index $php $tokenendptr]
               append ident $c
               incr tokenendptr
               set c [string index $php $tokenendptr]
            }
            set ident ""
            while {$c != "\""} {
               append ident $c
               incr tokenendptr
               set c [string index $php $tokenendptr]
               if {$c == "\\"} then {
                  incr tokenendptr
                  set c [string index $php $tokenendptr]
                  append ident $c
                  incr tokenendptr
                  set c [string index $php $tokenendptr]
               }
            }
            incr tokenendptr
            set token [list ESTRING $ident]
        }
        "(*" {
            set token LPAR
            set tokenendptr [expr $ptr + 1]
        }
        ")*" {
            set token RPAR
            set tokenendptr [expr $ptr + 1]
        }
        ";*" {
            set token SEMICOLON
            set tokenendptr [expr $ptr + 1]
        }
        ",*" {
            set token COMMA
            set tokenendptr [expr $ptr + 1]
        }
        "=*" {
            set token ASSIGN
            set tokenendptr [expr $ptr + 1]
        }
        "+*" {
            set token PLUS
            set tokenendptr [expr $ptr + 1]
        }
        "-*" {
            set token MINUS
            set tokenendptr [expr $ptr + 1]
        }
        "\\**" {
            set token TIMES
            set tokenendptr [expr $ptr + 1]
        }
        "/*" {
            set token DIV
            set tokenendptr [expr $ptr + 1]
        }
        ".*" {
            set token CONCAT
            set tokenendptr [expr $ptr + 1]
        }
        "==" {
            set token EQUAL
            set tokenendptr [expr $ptr + 2]
        }
        "!=" {
            set token UNEQUAL
            set tokenendptr [expr $ptr + 2]
        }
        "<*" {
            set token LT
            set tokenendptr [expr $ptr + 1]
        }
        ">*" {
            set token GT
            set tokenendptr [expr $ptr + 1]
        }
        "<=" {
            set token LTE
            set tokenendptr [expr $ptr + 2]
        }
        ">=" {
            set token GTE
            set tokenendptr [expr $ptr + 2]
        }
        "\\?*" {
            set token IFEXPR
            set tokenendptr [expr $ptr + 1]
        }
        "{*" {
            set token LBRACE
            set tokenendptr [expr $ptr + 1]
        }
        "}*" {
            set token RBRACE
            set tokenendptr [expr $ptr + 1]
        }
        "\\\[*" {
            set token LBRACKET
            set tokenendptr [expr $ptr + 1]
        }
        "\\\]*" {
            set token RBRACKET
            set tokenendptr [expr $ptr + 1]

        }
        "$*" {
            #Variable
            set ident ""
            set tokenendptr $ptr
            incr tokenendptr
            set c [string index $php $tokenendptr]
            while {![regexp "\[\\\[\\\];(){},. \t\n\r\]" $c]} {
                append ident $c
                incr tokenendptr
                set c [string index $php $tokenendptr]
            }
            set token [list VARIABLE $ident]
        }
        default {
            #Identifier or keyword
            set ident ""
            set tokenendptr $ptr
            set c [string index $php $tokenendptr]
            while {![regexp "\[\\\[\\\];(){},. \t\n\r\]" $c]} {
                append ident $c
                incr tokenendptr
                set c [string index $php $tokenendptr]
            }
            if {$ident == ""} then {
                set token [list ERROR "Invalid character '$c'."]
                return
            }
            if [string is integer $ident] then {
                # PHP integer notation is exactly the same as for TCL
                set token [list INTEGER $ident]
            } else {
                if [info exists keywords($ident)] then {
                    set token [list $keywords($ident)]
                } else {
                    set token [list IDENT $ident]
                }
            }
        }
    }
}

proc php_init_tokenizer {} {
    global php ptr tokenendptr token

    set tokenendptr $ptr

    php_get_token
}

proc php_tokenize_chunk {} {
    global php ptr token tokenendptr

    php_init_tokenizer

    set parsed ""

    while {$token != "END_OF_CHUNK" && $token != "ERROR"} {
        puts $token
        lappend parsed $token
        set ptr $tokenendptr
        php_get_token
    }
    set ptr $tokenendptr
    return $parsed
}

proc php_php2chunks {phpcode} {
    global php ptr endtoken

    set php $phpcode
    set result ""
    set ptr 0
    set chunk ""
    set state "notag"
    while {$ptr < [string length $php]} {
        switch -glob -- [string range $php $ptr [expr $ptr + 4]] {
            "<\\?php" {
                lappend result [list html $chunk]
                set chunk ""
                set endtoken "?>"
                set ptr [expr $ptr + 5]
                        lappend result [list code [php_tokenize_chunk]]
                }
            "<\\?*" {
                lappend result [list html $chunk]
                set chunk ""
                set endtoken "?>"
                set ptr [expr $ptr + 2]
                lappend result [list code [php_tokenize_chunk]]
            }
            "<%=*" {
                    lappend result [list html $chunk]
                set chunk ""
                set endtoken "%>"
                set ptr [expr $ptr + 3]
                lappend result [list expr [php_tokenize_chunk]]
                }
            "<%*" {
                    lappend result [list html $chunk]
                set chunk ""
                set endtoken "%>"
                set ptr [expr $ptr + 2]
                lappend result [list code [php_tokenize_chunk]]
                }
            "<*" {
                incr ptr
                append chunk "<"
                set c [string index $php $ptr]
                while {$c != ">"} {
                  if {$c == "\""} then {
                      append chunk $c
                      incr ptr
                      set c [string index $php $ptr]
                      while {$c != "\""} {
                         append chunk $c
                         incr ptr
                         set c [string index $php $ptr]
                      }
                  }
                  if {$c == "'"} then {
                      append chunk $c
                      incr ptr
                      set c [string index $php $ptr]
                      while {$c != "'"} {
                         append chunk $c
                         incr ptr
                         set c [string index $php $ptr]
                      }
                  }
                  append chunk $c
                  incr ptr
                  set c [string index $php $ptr]
               }
            }
        }
        set c [string index $php $ptr]
        append chunk $c
        incr ptr
    }
    lappend result [list html $chunk]
}

proc process_chunks {chunks} {
    foreach chunk $chunks {
        foreach {type data} $chunk {}
        switch $type {
            "html" {
                puts "ns_puts {[string map "{ \\{ } \\}" $data]}"
             }
            "code" {
                puts "code chunk"
            }
            "expr" {
                puts "expr chunk"
            }
        }
    }
}

proc php_compile {php} {
    set chunks [php_php2chunks $php]
    puts $chunks
    process_chunks $chunks
}

if {$argc != 1} then {
    puts file2 "Usage: phptcl <filename.php>"
} else {
    set f [open [lindex $argv 0] r]
    set php [read $f]
    close $f
    php_compile $php
}


--
AOLserver - http://www.aolserver.com/

To Remove yourself from this list, simply send an email to <[EMAIL PROTECTED]> with the
body of "SIGNOFF AOLSERVER" in the email message. You can leave the Subject: field of 
your email blank.

Reply via email to