Hello Kaspar et al,
at first, many thanks for Parslet, it's a wonderful tool.
I've run into a case where I'd like to match "simple" or "sequence".
---8<---
require 'rubygems'
require 'parslet'
class Parser < Parslet::Parser
rule(:line) {
str(' ').repeat(0).as(:indentation) >>
match('[^\n]').repeat(0).as(:stuff) >>
str("\n") }
rule(:lines) { line.repeat }
root(:lines)
end
class Transformer < Parslet::Transform
# vanilla rule
rule(:indentation => simple(:ind), :stuff => simple(:stu)) {
[ ind.to_s.length, stu.to_s ]
}
# rule for when :indentation => []
rule(:indentation => sequence(:ind), :stuff => simple(:stu)) {
[ 0, stu.to_s ]
}
# rule for when :indentation => [] and :stuff => []
rule(:indentation => sequence(:ind), :stuff => sequence(:stu)) {
[ 0, '' ]
}
end
s = %{
alpha
bravo
charly
}
parser = Parser.new
transformer = Transformer.new
tree = parser.parse(s)
out = transformer.apply(tree)
puts
p tree
puts
p out
puts
# output :
#
# [{:indentation=>[], :stuff=>[]}, {:indentation=>[], :stuff=>"alpha"@1},
{:indentation=>" "@7, :stuff=>"bravo"@9}, {:indentation=>" "@15,
:stuff=>"charly"@19}]
#
# [[0, ""], [0, "alpha"], [2, "bravo"], [4, "charly"]]
--->8---
I'm probably only a fool running into a trap I built myself.
I've tried to tweak the parse rules a bit :
---8<---
require 'rubygems'
require 'parslet'
class Parser < Parslet::Parser
rule(:line) {
str(' ').repeat(1).as(:indentation).maybe >>
match('[^\n]').repeat(1).as(:stuff).maybe >>
str("\n") }
rule(:lines) { line.repeat }
root(:lines)
end
class Transformer < Parslet::Transform
# vanilla rule
rule(:indentation => simple(:ind), :stuff => simple(:stu)) {
[ ind.to_s.length, stu.to_s ]
}
# rule for when there is no :indentation
rule(:stuff => simple(:stu)) {
[ 0, stu.to_s ]
}
end
s = %{
alpha
bravo
charly
}
parser = Parser.new
transformer = Transformer.new
tree = parser.parse(s)
out = transformer.apply(tree)
puts
p tree
puts
p out
puts
# output :
#
# [{:stuff=>"alpha"@1}, {:indentation=>" "@7, :stuff=>"bravo"@9},
{:indentation=>" "@15, :stuff=>"charly"@19}]
#
# [[0, "alpha"], [2, "bravo"], [4, "charly"]]
--->8---
But I still have to have rules for when :indentation or :stuff is not set.
Is there a better way ? Would simple_or_sequence(:x) or whatever(:x) as
captures make sense ?
Many thanks again,
--
John Mettraux - http://jmettraux.wordpress.com