Hi Fabio,
On Sun, Oct 25, 2009 at 10:15 PM, Fabio Mascarenhas <[email protected]>wrote:
> Hi Ryan,
>
> Can you send me the Lua code you mention, with the proper attribution?
>
I attached the file to the message. If that does not work let me know. The
developer and the licence are at the top of the file.
--
Regards,
Ryan
--[[
File: autoformat.lua
Copyright (C) 2000-2008 Christopher Moore ([email protected])
This software is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write the Free Software Foundation, Inc., 51
Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
--]]
module( "orbit.beautifier", package.seeall )
-- HELPER FUNCTIONS ------------------------------------------------------------
--
--hack table to use the same :new naming scheme
tablemeta = { __index = table }
function table:new()
local t = {}
setmetatable(t, tablemeta)
return t
end
function class()
local classobj = {}
classobj.metaobject = { __index = classobj }
function classobj:new(...)
local obj = {}
setmetatable(obj, self.metaobject)
if obj.ctor then obj:ctor(...) end
return obj
end
return classobj
end
-- LEX -------------------------------------------------------------------------
--
Lex = class() -- nameless classes. amazing. good luck keeping track of them without always directly setting ...
Lex.STATE = { TEXT = {}, TAG = {} }
function Lex:ctor( str )
self.state = self.STATE.TEXT
self.str = str
self.index = 1 -- current location. nil means we're done (find-style)
self:getNextChar()
self:getToken()
-- load the first char / first token into the system.
end
-- grab the next char (for token building)
function Lex:getNextChar() -- get the next char
local lastchar = self.nextchar -- return the prev char before the one we get
if not self.index then
self.nextchar = nil
else
-- grab the next token
self.nextchar = self.str:sub(self.index, self.index)
-- increment and test for eof
self.index = self.index + 1
if self.index > #self.str then self.index = nil end
end
return lastchar
end
-- grab the next token
function Lex:getToken()
local lasttoken = self.token
local lasttokentype = self.tokenType
-- skip any white spaces
if self.nextchar then
while self.nextchar:find('%s') do self:getNextChar() end
end
-- now append our token
self.token = self:getNextChar()
self.tokenType = nil -- clear == arbitrary
if self.token then
-- if we're inside of a <> then parse one way
-- if we're outside then parse another (amass things until we get to a <)
if self.state == self.STATE.TEXT then
if self.token == '<' then
-- in comment detecting
-- a side effect is now all <! <!- and <!-- tokens are distinct
if self.nextchar == '!' then
self.token = self.token .. self:getNextChar()
if self.nextchar == '-' then
self.token = self.token .. self:getNextChar()
if self.nextchar == '-' then
self.token = self.token .. self:getNextChar()
self.tokenType = 'comment'
-- just read the comment in here
while self.token:sub(#self.token-2) ~= '-->' do
self.token = self.token .. self:getNextChar()
end
end
end
end
if self.tokenType ~= 'comment' then -- if it wasn't a comment then it better've been a tag ...
self.state = self.STATE.TAG
end
else
self.tokenType = 'text'
while self.nextchar and self.nextchar ~= '<' do
self.token = self.token .. self:getNextChar()
end
end
elseif self.state == self.STATE.TAG then
-- while we should add it do so.
-- whitespace (should we even consider these?) or should we skip them? skip.
-- alpha char <-> read until we run out of
if self.token:find('%a') then
self.tokenType = 'name'
while self.nextchar:find('[%a%d-_]') do
self.token = self.token .. self:getNextChar()
end
-- numeric chars ... [-] ###.####
elseif self.token == '-' or self.token == '.' or self.token:find('%d') then
self.tokenType = 'number'
if self.token == '-' or self.token:find('%d') then
while self.nextchar:find('%d') do
self.token = self.token .. self:getNextChar()
end
end
if self.nextchar == '.' then
self.token = self.token .. self:getNextChar()
while self.nextchar:find('%d') do
self.token = self.token .. self:getNextChar()
end
end
-- strings
elseif self.token == '\'' or self.token == '"' then
self.tokenType = 'string'
local quotetype = self.token
while self.nextchar ~= quotetype do
self.token = self.token .. self:getNextChar()
if self.token == '\\' then -- make sure its not escaped ...
self.token = self.token .. self:getNextChar()
end
end
self.token = self.token .. self:getNextChar()
elseif self.token == '>' then
self.state = self.STATE.TEXT
end
end
end
return lasttoken, lasttokentype
end
function Lex:error(msg)
msg = msg .. '\n'
msg = msg .. 'current token is '..self.token..'\n'
msg = msg .. 'current type is '..self.tokenType..'\n'
msg = msg .. 'current buffer location: '..self.index..'\n'
local index = self.index or #self.str
local min = index - 30
if min < 1 then min = 1 end
local max = index + 30
if max > #self.str then max = #self.str end
msg = msg .. 'surrounding content:\n'..self.str:sub(min,max)..'\n'
error(msg)
end
function Lex:maybe(token)
if self.token == token then
self:getToken()
return true
end
end
function Lex:maybeType(...)
for i,t in ipairs{...} do
if self.tokenType == t then
self:getToken()
return true
end
end
end
function Lex:expect(token)
local nexttoken = self:getToken()
if nexttoken ~= token then lex:error('expected '..token) end
return nexttoken
end
function Lex:expectType(...)
local expectedTypes = ''
for i,t in ipairs{...} do
if self.tokenType == t then
return self:getToken() -- make sure we give back then info if we find it
end
expectedTypes = expectedTypes .. ' ' .. t
end
lex:error("didn't find a token from the list we were given:\n"..expectedTypes)
end
-- NODE ------------------------------------------------------------------------
--
Node = class()
function Node:ctor(args)
if args then
for k,v in pairs(args) do
self[k] = v
end
end
end
function Node:print(tabs, sameline)
if not tabs then tabs = '' end
local endline = '\n'
if sameline then endline = '' tabs = '' end
if self.type == 'comment' or self.type == 'text' then
io.write(self.text..endline)
elseif self.type == 'tag' then
io.write(tabs..'<')
if self.doctype then io.write('!') end
io.write(self.name)
if self.args then
for i,v in ipairs(self.args) do
local value = ''
if v.value then value = '='..v.value end
io.write(' '..v.key..value)
end
end
local close = '>'
if self.standalone then close = '/' .. close .. endline end
io.write(close)
if self.children then
if #self.children == 0 then
elseif #self.children == 1 and (not self.children[1].children or #self.children[1].children == 0) then
self.children[1]:print('',true)
else
print() --newline
for i,v in ipairs(self.children) do
v:print(tabs..'\t')
end
-- only print the closer if we have children
io.write(tabs)
end
io.write('</'..self.name..'>'..endline)
end
elseif self.type == 'document' then
for i,v in ipairs(self.children) do
v:print()
end
else
assert(false,'forgot to make a Node:print for type '..tostring(self.type))
end
end
function Node:detailedPrint()
if self.type then print('type: '..self.type) end
if self.name then print('name: '..self.name) end
if self.args then
print('args:')
for i,v in ipairs(self.args) do
local value = ''
if v.value then value = '='..v.value end
print('\t' .. v.key .. value)
end
end
if self.children then
print('children:')
for i,v in ipairs(self.children) do
print('begin child -- ')
v:detailedPrint()
print('-- end child')
end
end
end
function readkey()
if lex.tokenType == 'string' then return lex:getToken() end
if lex.tokenType == 'name' then
local key = lex:getToken() -- returns the token, used for the key
if lex.token == ':' then
key = key .. lex:getToken() --namespace dealio
key = key .. lex:expectType('name')
end
return key
end
end
function node()
if not lex.token then return end
local n = Node:new()
-- see if we got some text first of all ...
if lex.tokenType == 'text' or lex.tokenType == 'comment' then
n.type = lex.tokenType
n.text = lex.token
lex:getToken()
else
-- pick out a < and keep going til you get a >
if lex:maybe('<!') then --doctype
n.doctype = true
n.name = lex:expect('DOCTYPE')
elseif lex:maybe('<') then
if lex:maybe('/') then
n.closer = true
end
n.name = lex:expectType('name')
else
lex:error('expected a < or a <!')
end
n.type = 'tag'
if not n.closer then -- no args on a closer
n.args = table:new()
local key = readkey()
while key do
-- store args as {{key=.., value=..}} rather than {..=..} so we preserve order
local arg = { key = key }
if lex:maybe('=') then
arg.value = lex:expectType('string','name','number')
end
n.args:insert(arg)
key = readkey()
end
if lex:maybe('/') then
n.standalone = true -- no children
end
end
lex:expect('>')
end
return n
end
--[[
next algorithm for building a tree from quirksmode:
1) run forward to the first closing tag
2) run backwards to the first matching opening tag
3) build a subtree from that.
--]]
function treeize(n,first,last)
if not first then first = 1 end
if not last then last = #n.children end
--[[
now p is one flat collection of all nodes
now we piece them together ...
look for tags that are not standalone
then search backwards (across our current range) for a closer with matching tag
once you find them subdivide and conquor, then continue on after the closer
--]]
local i = first
while i <= last do
local v = n.children[i]
if v.type == 'tag' and v.closer then
assert(not v.standalone) --clsoers aren't standalones. </asdf> ~= <asdf/>
assert(not v.args) -- i better not have added args to closers...
local j = i-1
while j >= first do
local u = n.children[j]
-- here u.children is the flag for whether or not it's already been paired with an opener
if u.type == 'tag' and u.name == v.name and not u.standalone and not u.closer and not u.children then
-- we found a match!
-- move everything from i+1 to j-1 into v's children (should be empty before now)
u.children = table:new()
for k=j+1,i-1 do
u.children:insert(n.children[k])
end
-- now we can remove these stray elements ... do so back to front for indexing consistenty
local removed = 0
for k=i,j+1,-1 do -- remove index 'j' as well cuz we dont need the closer anymore
n.children:remove(k)
removed = removed + 1
end
-- then remove u (because it's part of v)
-- and continue processing after j
--
-- don't forget to offset j by the subset removed when we continue
-- and 'last' too
i = j -- plus one to pass the last entry, but that'll happen in a few lines
last = #n.children --last - removed
break
end
j = j - 1
end
end
i = i + 1
end
end
function trim(n)
if n.args then
local i = #n.args
while i > 0 do
local a = n.args[i]
if n.type == 'tag' and n.name == 'a' and a.key == 'target' then
if a.value == '_self' or a.value == '"_self"' or a.value == "'_self'" then
n.args:remove(i) -- target="_self" is redundant
end
end
i = i - 1
end
end
if n.children then
-- traverse children backwards so we can remove them if we want to
local i = #n.children
while i > 0 do
local ch = n.children[i]
trim(ch)
if ch.type == 'comment' then
n.children:remove(i)
elseif ch.type == 'tag' then
if (ch.name == 'span' --[[or ch.name == 'div'--]]) and (not ch.children or #ch.children == 0) then
n.children:remove(i)
end
end
i = i - 1
end
end
end
--[[
page:
expect a tag up front
it might be a doctype (<!DOCTYPE ... >)
otherwise it better be a <html> tag
--]]
function beautify( buffer )
-- create a lexor.
lex = Lex:new( buffer )
local d = Node:new{ type = 'document', children = table:new() }
while true do
local n = node() -- doctype?
if not n then break end
d.children:insert(n)
end
treeize(d)
trim(d)
return d
end
-- Test
--[[
local buf = [[<html ><head ><title>VisionDB - Capture File Vision Database</title><meta http-equiv="Content-Type" content="text/html; charset=utf-8"></meta><link rel="stylesheet" type="text/css" href="/visiondb/css/default.css" media="screen"></link></head><body ><div id="header"><h1 id="logo"><a href="/visiondb/visiondb.ws/">VisionDB - Capture File Vision Database</a></h1><div id="version">v0.56 Alpha</div></div><div id="mainnavcontainer"><div id="mainnav"><ul ><li><a href="/visiondb/visiondb.ws/">Home Page</a></li><li><a href="/visiondb/visiondb.ws/search">Search</a></li><li><a href="/visiondb/visiondb.ws/clip/add">Add Clip</a></li><li><a href="/visiondb/visiondb.ws/admin">Administration</a></li></ul></div><div id="userdetails"><ul ><li style="border-left: 1px solid #F0F0F0;"><a href="/visiondb/visiondb.ws/logout">Sign out</a></li><li >Logged in as <a href="/visiondb/visiondb.ws/user/1">admin</a></li></ul></div></div><div id="menu"><h3>About this Site</h3><ul><li>This is a place to post Capture clips that have been clipped and
graded. All of the clips details, or meta-data, are recoded here and you can use
this application to generate reports. Then from these reports you can retrieve
a queue file for automated testing of SmartBeam.</li></ul><h3>Recent Clips</h3><ul ><li><a href="/visiondb/visiondb.ws/clip/1">20090715182008A_Auto.SB2</a></li></ul><h3>Links</h3><ul ><li><a href="http://eeweb.gentex.com/redmine/projects/perfdatabase/issues/new">Report Issue</a></li><li><a href="http://eeweb.gentex.com/redmine/projects/metacapture">MetaCapture</a></li></ul></div><div id="content"><h2>Recent Clips</h2><p>This is a list of the recently added clips in VisionDB. To view the details of a particular clip click on it's ID.</p><br/><table class="list"><thead ><tr ><td class="">Column Class Gsub Column</td><td class="">Column Class Gsub Column</td><td class="">Column Class Gsub Column</td><td class="">Column Class Gsub Column</td><td class="">Column Class Gsub Column</td><td class="">Column Class Gsub Column</td></tr></thead><tbody ><tr class="clip odd"><td class="id"><a href="/visiondb/visiondb.ws/clip/1">1</a></td><td >BMW</td><td >Bixenon</td><td >Blue Sign</td><td >20090715182008A_Auto.SB2</td><td >admin</td></tr></tbody></table><br clear="left"></br><h2>General Statistics</h2><p>Gives a quick overview of what is contained in VisionDB.</p><br/><table class="list"><thead ><tr ><th class="center" colspan="2">Overall Details</th></tr></thead><tbody ><tr class="details" ><td class="odd">Total Clips Found</td><td class="totals">1</td></tr></tbody></table><br clear="left"></br><h2>Usage</h2><p>VisionDB is a simple way to search and query for known clips that can be used to test and simulate driving conditions for Gentex's vision products.</p><br/><ul ><li >To get started you can just click the 'Search' button.<ol ><li>Select the filter to add it.</li><li>Select the search type. (e.g. is, is not, contains, doesn't contain)</li><li>Select or fill in the actual search criteria. (e.g. Green signs, Night, etc.)</li></ol></li><li >To save searches and manipulate the default view of the output you need to sign in first.<ul ><li>The login link can be found on the right hand side. It is called 'Sign In'.</li><li>If you don't have a login yet just click the 'Register a New User Account' link and fill in the details.</li></ul></li></ul></div><br clear="left"></br><div id="footer">Copyright © Gentex Corporation 2009</div></body></html>]]
local p = beautify( buf )
print( p:print() )
]]
return _M
_______________________________________________
Kepler-Project mailing list
[email protected]
http://lists.luaforge.net/cgi-bin/mailman/listinfo/kepler-project
http://www.keplerproject.org/