Author: fperrad Date: Sun Feb 18 23:15:40 2007 New Revision: 17052 Modified: trunk/languages/lua/lib/luaregex.pir trunk/languages/lua/lib/luastring.pir trunk/languages/lua/t/shootout.t trunk/languages/lua/t/string.t
Log: [Lua] - add string.gsub() - and tests Modified: trunk/languages/lua/lib/luaregex.pir ============================================================================== --- trunk/languages/lua/lib/luaregex.pir (original) +++ trunk/languages/lua/lib/luaregex.pir Sun Feb 18 23:15:40 2007 @@ -342,8 +342,16 @@ initchar_ok: if initchar == ')' goto end inc pos - if initchar != "\\" goto term_literal + term_percent: + if initchar != '%' goto term_backslash + initchar = substr target, pos, 1 + inc pos + if pos <= lastpos goto term_percent_ok + parse_error(mob, pos, "Search pattern not terminated") + term_percent_ok: + goto term_literal term_backslash: + if initchar != "\\" goto term_literal initchar = substr target, pos, 1 inc pos if pos <= lastpos goto term_backslash_ok @@ -382,7 +390,6 @@ .const int PGE_INF = 2147483647 .const int PGE_BACKTRACK_GREEDY = 1 .const int PGE_BACKTRACK_EAGER = 2 -.const int PGE_BACKTRACK_NONE = 3 .sub "parse_quant" .param pmc mob @@ -398,7 +405,7 @@ lastpos = length target min = 0 max = PGE_INF - backtrack = PGE_BACKTRACK_NONE + backtrack = PGE_BACKTRACK_GREEDY if key != '+' goto quant_max min = 1 quant_max: Modified: trunk/languages/lua/lib/luastring.pir ============================================================================== --- trunk/languages/lua/lib/luastring.pir (original) +++ trunk/languages/lua/lib/luastring.pir Sun Feb 18 23:15:40 2007 @@ -131,110 +131,6 @@ .end -.sub 'str_find_aux' :anon - .param int find - .param pmc s :optional - .param pmc pattern :optional - .param pmc init :optional - .param pmc plain :optional - $S1 = checkstring(s) - $I1 = length $S1 - $S2 = checkstring(pattern) - $I2 = length $S2 - $I3 = optint(init, 1) - $I3 = posrelat($I3, $I1) - dec $I3 - unless $I3 < 0 goto L1 - $I3 = 0 - goto L2 -L1: - unless $I3 > $I1 goto L2 - $I3 = $I1 -L2: - $S1 = substr $S1, $I3 - unless find goto L3 - if_null plain, L4 - $I0 = istrue plain - if $I0 goto L5 -L4: - .const string specials = "^$*+?.([%-" - $P0 = split '', specials -L6: - $I0 = $P0 - unless $I0 goto L5 - $S0 = shift $P0 - $I0 = index $S2, $S0 - if $I0 >= 0 goto L3 - goto L6 -L5: - # do a plain search - .local int idx - idx = index $S1, $S2 - if idx < 0 goto L7 - .local pmc start - .local pmc end - new start, .LuaNumber - $I0 = $I3 + idx - inc $I0 - set start, $I0 - new end, .LuaNumber - $I0 = $I3 + idx - $I0 += $I2 - set end, $I0 - .return (start, end) -L3: - .local pmc regex_comp - regex_comp = global '_REGEX_COMP' - .local pmc rulesub - rulesub = regex_comp($S2) - .local pmc match - match = rulesub($S1) - unless match goto L7 - new $P0, .Array - .local pmc capts - capts = match.'get_array'() - if_null capts, L10 - $I1 = capts - set $P0, $I1 - $I0 = 0 -L9: - unless $I0 < $I1 goto L10 - $P1 = capts[$I0] - $S0 = $P1 - new $P2, .LuaString - set $P2, $S0 - $P0[$I0] = $P2 - inc $I0 - goto L9 -L10: - unless find goto L8 - .local pmc start - .local pmc end - new start, .LuaNumber - $I0 = match.'from'() - $I0 += $I3 - inc $I0 - set start, $I0 - new end, .LuaNumber - $I0 = match.'to'() - $I0 += $I3 - set end, $I0 - .return (start, end, $P0 :flat) -L8: - if $P0 goto L11 - .local pmc ret - $S0 = match.'text'() - new ret, .LuaString - set ret, $S0 - .return (ret) -L11: - .return ($P0 :flat) -L7: - # not found - new ret, .LuaNil - .return (ret) -.end - =item C<string.byte (s [, i [, j]])> Returns the internal numerical codes of the characters C<s[i]>, C<s[i+1]>,..., @@ -366,6 +262,121 @@ .return str_find_aux(1, argv :flat) .end +.sub 'str_find_aux' :anon + .param int find + .param pmc s :optional + .param pmc pattern :optional + .param pmc init :optional + .param pmc plain :optional + $S1 = checkstring(s) + $I1 = length $S1 + $S2 = checkstring(pattern) + $I2 = length $S2 + $I3 = optint(init, 1) + $I3 = posrelat($I3, $I1) + dec $I3 + unless $I3 < 0 goto L1 + $I3 = 0 + goto L2 +L1: + unless $I3 > $I1 goto L2 + $I3 = $I1 +L2: + $S1 = substr $S1, $I3 + unless find goto L3 + if_null plain, L4 + $I0 = istrue plain + if $I0 goto L5 +L4: + .const string specials = "^$*+?.([%-" + $P0 = split '', specials +L6: + $I0 = $P0 + unless $I0 goto L5 + $S0 = shift $P0 + $I0 = index $S2, $S0 + if $I0 >= 0 goto L3 + goto L6 +L5: + # do a plain search + .local int idx + idx = index $S1, $S2 + if idx < 0 goto L7 + .local pmc start + .local pmc end + new start, .LuaNumber + $I0 = $I3 + idx + inc $I0 + set start, $I0 + new end, .LuaNumber + $I0 = $I3 + idx + $I0 += $I2 + set end, $I0 + .return (start, end) +L3: + .local pmc regex_comp + regex_comp = global '_REGEX_COMP' + .local pmc rulesub + rulesub = regex_comp($S2) + .local pmc match + match = rulesub($S1) + unless match goto L7 + unless find goto L8 + .local pmc start + .local pmc end + new start, .LuaNumber + $I0 = match.'from'() + $I0 += $I3 + inc $I0 + set start, $I0 + new end, .LuaNumber + $I0 = match.'to'() + $I0 += $I3 + set end, $I0 + $P0 = captures(match, 0) + .return (start, end, $P0 :flat) +L8: + $P0 = captures(match, 1) + .return ($P0 :flat) +L7: + # not found + .local pmc ret + new ret, .LuaNil + .return (ret) +.end + +.sub 'captures' :anon + .param pmc match + .param int whole + .local pmc ret + new ret, .Array + .local pmc capts + capts = match.'get_array'() + if_null capts, L1 + $I1 = capts + set ret, $I1 + $I0 = 0 +L2: + unless $I0 < $I1 goto L3 + $P0 = capts[$I0] + $S0 = $P0.'text'() + new $P1, .LuaString + set $P1, $S0 + ret[$I0] = $P1 + inc $I0 + goto L2 +L3: + .return (ret) +L1: + unless whole == 1 goto L4 + set ret, 1 + $S0 = match.'text'() + new $P1, .LuaString + set $P1, $S0 + ret[0] = $P1 +L4: + .return (ret) +.end =item C<string.format (formatstring, e1, e2, ...)> @@ -478,11 +489,12 @@ .return (ret) .end +.const string digits = '0123456789' + .sub 'scanformat' :anon .param string strfrmt .param int start .const string flags = '-+ #0' - .const string digits = '0123456789' .local int idx $I1 = length strfrmt idx = start @@ -624,29 +636,8 @@ $I0 = match.'to'() $S1 = substr $S1, $I0 set s, $S1 - .local pmc capts - capts = match.'get_array'() - if_null capts, L2 - $I1 = capts - new $P0, .Array - set $P0, $I1 - $I0 = 0 -L3: - unless $I0 < $I1 goto L4 - $P1 = capts[$I0] - $S0 = $P1 - new $P2, .LuaString - set $P2, $S0 - $P0[$I0] = $P2 - inc $I0 - goto L3 -L4: + $P0 = captures(match, 1) .return ($P0 :flat) -L2: - $S0 = match.'text'() - new ret, .LuaString - set ret, $S0 - .return (ret) L1: .local pmc ret new ret, .LuaNil @@ -674,7 +665,7 @@ to occur. For instance, when C<n> is 1 only the first occurrence of C<pat> is replaced. -NOT YET IMPLEMENTED. +STILL INCOMPLETE (see F<languages/lua/lib/luaregex.pir>). =cut @@ -682,15 +673,156 @@ .param pmc s :optional .param pmc pat :optional .param pmc repl :optional - .param pmc n :optional - $S0 = checkstring(s) - $I0 = length $S0 - $S1 = checkstring(pat) - $I1 = $I0 + 1 - $I2 = optint(n, $I1) - not_implemented() + .param pmc max :optional + .local string src + src = checkstring(s) + $I1 = length src + $S2 = checkstring(pat) + $I0 = $I1 + 1 + $I4 = optint(max, $I0) + .local int anchor + anchor = 0 + $S0 = substr $S2, 0, 1 + unless $S0 == '^' goto L1 + anchor = 1 +L1: + .local int n + n = 0 + .local pmc regex_comp + regex_comp = global '_REGEX_COMP' + .local pmc rulesub + rulesub = regex_comp($S2) + .local pmc b + new b, .LuaString +L2: + unless n < $I4 goto L3 + .local pmc match + match = rulesub(src) + unless match goto L3 + inc n + add_value(b, src, match, repl) + $I0 = match.'to'() + src = substr src, $I0 + if anchor goto L3 + goto L2 +L3: + $S0 = b + $S0 .= src + set b, $S0 + new $P0, .LuaNumber + set $P0, n + .return (b, $P0) .end +.sub 'add_value' :anon + .param pmc b + .param string s + .param pmc match + .param pmc repl + $I0 = isa repl, 'LuaNumber' + unless $I0 goto L1 + $P0 = repl.'tostring'() + .return add_s(b, s, match, $P0) +L1: + $I0 = isa repl, 'LuaString' + unless $I0 goto L2 + .return add_s(b, s, match, repl) +L2: + $I0 = isa repl, 'LuaClosure' + if $I0 goto L3 + $I0 = isa repl, 'LuaFunction' + if $I0 goto L3 + goto L4 +L3: + $P0 = captures(match, 1) + ($P1) = repl($P0 :flat) + goto L5 +L4: + $I0 = isa repl, 'LuaTable' + unless $I0 goto L6 + $S0 = onecapture(match, 0) + new $P0, .LuaString + set $P0, $S0 + $P1 = repl[$P0] + goto L5 +L6: + error("string/function/table expected") +L5: + if $P1 goto L7 # nil or false? + # keep original text + $S1 = b + $I0 = match.'to'() + $S0 = substr s, 0, $I0 + $S1 .= $S0 + set b, $S1 + .return () +L7: + $I0 = isa $P1, 'LuaString' + if $I0 goto L8 + $I0 = isa $P1, 'LuaNumber' + if $I0 goto L8 + $S0 = "invalid replacement value (a " + $S1 = typeof $P1 + $S0 .= $S1 + $S0 .= ")" + error($S0) +L8: + $S1 = b + $I0 = match.'from'() + $S0 = substr s, 0, $I0 + $S1 .= $S0 + $S0 = $P1 + $S1 .= $S0 + set b, $S1 + .return () +.end + +.sub 'add_s' :anon + .param pmc b + .param string s + .param pmc match + .param pmc repl + $S1 = b + $I0 = match.'from'() + $S0 = substr s, 0, $I0 + $S1 .= $S0 + $S4 = repl + $I4 = length $S4 + .local int i + i = 0 +L1: + unless i < $I4 goto L2 + $S0 = substr $S4, i, 1 + if $S0 != '%' goto L3 + inc i + $S0 = substr $S4, i, 1 + $I0 = index digits, $S0 + if $I0 < 0 goto L3 + unless $S0 == '0' goto L4 + $S0 = match.'text'() + goto L3 +L4: + dec $I0 + $S0 = onecapture(match, $I0) +L3: + $S1 .= $S0 + inc i + goto L1 +L2: + set b, $S1 +.end + +.sub 'onecapture' :anon + .param pmc match + .param int i + push_eh _handler + $P0 = match.'get_array'() + $P1 = $P0[i] + $S0 = $P1.'text'() + .return ($S0) +_handler: + error("invalid capture index") +.end =item C<string.len (s)> Modified: trunk/languages/lua/t/shootout.t ============================================================================== --- trunk/languages/lua/t/shootout.t (original) +++ trunk/languages/lua/t/shootout.t Sun Feb 18 23:15:40 2007 @@ -69,7 +69,7 @@ TODO: { - local $TODO = 'string.gsub is not implemented'; + local $TODO = 'loadstring is not implemented'; $code = Parrot::Test::slurp_file(File::Spec->catfile( @dir, 'pidigits.lua-2.lua' )); $out = Parrot::Test::slurp_file(File::Spec->catfile( @dir, 'pidigits-output.txt' )); @@ -92,7 +92,7 @@ TODO: { - local $TODO = 'string.gsub is not implemented'; + local $TODO = 'string.gsub is still incomplete'; $code = Parrot::Test::slurp_file(File::Spec->catfile( @dir, 'regexdna.lua-3.lua' )); $out = Parrot::Test::slurp_file(File::Spec->catfile( @dir, 'regexdna-output.txt' )); Modified: trunk/languages/lua/t/string.t ============================================================================== --- trunk/languages/lua/t/string.t (original) +++ trunk/languages/lua/t/string.t Sun Feb 18 23:15:40 2007 @@ -27,7 +27,7 @@ use FindBin; use lib "$FindBin::Bin"; -use Parrot::Test tests => 25; +use Parrot::Test tests => 33; use Test::More; language_output_is( 'lua', << 'CODE', << 'OUTPUT', 'function string.byte' ); @@ -211,6 +211,125 @@ to Lua OUTPUT +language_output_is( 'lua', << 'CODE', << 'OUTPUT', 'function string.gsub' ); +x = string.gsub("hello world", "(%w+)", "%1 %1") +print(x) +x = string.gsub("hello world", "%w+", "%0 %0", 1) +print(x) +x = string.gsub("hello world from Lua", "(%w+)%s*(%w+)", "%2 %1") +print(x) +x = string.gsub("home = $HOME, user = $USER", "%$(%w+)", string.reverse) +print(x) +-- x = string.gsub("4+5 = $return 4+5$", "%$(.-)%$", function (s) return loadstring(s)() end) +-- print(x) +print "4+5 = 9" +local t = {name="lua", version="5.1"} +x = string.gsub("$name-$version.tar.gz", "%$(%w+)", t) +print(x) +CODE +hello hello world world +hello hello world +world hello Lua from +home = EMOH, user = RESU +4+5 = 9 +lua-5.1.tar.gz +OUTPUT + +language_output_is( 'lua', << 'CODE', << 'OUTPUT', 'function string.gsub' ); +s = string.gsub("Lua is cute", "cute", "great") +print(s) +s = string.gsub("all lii", "l", "x") +print(s) +s = string.gsub("Lua is great", "Sol", "Sun") +print(s) +s = string.gsub("all lii", "l", "x", 1) +print(s) +s = string.gsub("all lii", "l", "x", 2) +print(s) +count = select(2, string.gsub("string with 3 spaces", " ", " ")) +print(count) +CODE +Lua is great +axx xii +Lua is great +axl lii +axx lii +3 +OUTPUT + +language_output_is( 'lua', << 'CODE', << 'OUTPUT', 'function string.gsub' ); +print(string.gsub("hello, up-down!", "%A", ".")) +text = "hello world" +nvow = select(2, string.gsub(text, "[AEIOUaeiou]", "")) +print(nvow) +print(string.gsub("one, and two; and three", "%a+", "word")) +test = "int x; /* x */ int y; /* y */" +print(string.gsub(test, "/%*.*%*/", "<COMMENT>")) +print(string.gsub(test, "/%*.-%*/", "<COMMENT>")) +s = "a (enclosed (in) parentheses) line" +-- print(string.gsub(s, "%b()", "")) +print("a line", 1) +print(string.gsub("hello Lua!", "%a", "%0-%0")) +print(string.gsub("hello Lua", "(.)(.)", "%2%1")) +CODE +hello..up.down. 4 +3 +word, word word; word word 5 +int x; <COMMENT> 1 +int x; <COMMENT> int y; <COMMENT> 2 +a line 1 +h-he-el-ll-lo-o L-Lu-ua-a! 8 +ehll ouLa 4 +OUTPUT + +language_output_is( 'lua', << 'CODE', << 'OUTPUT', 'function string.gsub' ); +function expand (s) + return (string.gsub(s, "%$(%w+)", _G)) +end + +name = "Lua"; status= "great" +print(expand("$name is $status, isn't it?")) +print(expand("$othername is $status, isn't it?")) +CODE +Lua is great, isn't it? +$othername is great, isn't it? +OUTPUT + +language_output_like( 'lua', << 'CODE', << 'OUTPUT', 'function string.gsub' ); +function expand (s) + return (string.gsub(s, "%$(%w+)", function (n) + return tostring(_G[n]), 1 + end)) +end + +print(expand("print = $print; a = $a")) +CODE +/print = function: (0[Xx])?[0-9A-Fa-f]+; a = nil/ +OUTPUT + +language_output_like( 'lua', << 'CODE', << 'OUTPUT', 'function string.gsub (invalid index)' ); +x = string.gsub("hello world", "(%w+)", "%2 %2") +CODE +/invalid capture index/ +OUTPUT + +language_output_like( 'lua', << 'CODE', << 'OUTPUT', 'function string.gsub (bad type)' ); +x = string.gsub("hello world", "(%w+)", true) +CODE +/string\/function\/table expected/ +OUTPUT + +language_output_like( 'lua', << 'CODE', << 'OUTPUT', 'function string.gsub (invalid value)' ); +function expand (s) + return (string.gsub(s, "%$(%w+)", _G)) +end + +name = "Lua"; status= true +print(expand("$name is $status, isn't it?")) +CODE +/invalid replacement value \(a boolean\)/ +OUTPUT + language_output_is( 'lua', << 'CODE', << 'OUTPUT', 'function string.len' ); print(string.len("")) print(string.len("test"))