You are missing the 'as' keyword. This lets you structure the data you get
in your output.
rule(:number) {
(
str('-').maybe >> (
str('0') | (match('[1-9]') >> digit.repeat)
) >> (
str('.') >> digit.repeat(1)
).maybe >> (
match('[eE]') >> (str('+') | str('-')).maybe >> digit.repeat(1)
).maybe
)*.as(:number)*
}
---
"No man is an island... except Philip"
On Thu, Nov 1, 2012 at 6:55 AM, Peter Harkins <[email protected]> wrote:
> I'm trying to use Parslet to parse some TeX (definitely not all of TeX,
> just a small set of documents that only use a small subset of the
> language) and I'm not having any luck. I've written rules that consume the
> markup without error, but when I puts the result I don't see a nice
> structure of hashes like I do with other scripts like my_json:
> https://gist.github.com/966020
>
> So either I'm correctly consuming it and getting odd output, or I'm not
> correctly consuming it and need to fix my rules. I think the latter is
> more likely, because I have a couple rules that need to consume arbitrary
> text between the markup commands.
>
> My code is below. If you save and run this as 'ruby tex.rb' it'll run one
> example that shows the odd behavior I've described; 'rspec tex.rb' will
> run my little test suite. I'd appreciate any suggestions.
>
> Thanks in advance!
>
>
> require 'parslet'
> require 'parslet/convenience'
> require 'pp'
> require 'rspec'
> require 'parslet/rig/rspec'
> require 'yaml'
>
> class Tex < Parslet::Parser
> rule(:backslash) { str '\\' }
> rule(:command) { backslash >> match('[a-zA-Z]').repeat(1) >> option? }
> rule(:command?) { command.maybe }
>
> rule(:option) { str('{') >> match('[a-zA-Z0-9 ,]').repeat() >>
> str('}') }
> rule(:option?) { option.maybe }
>
> rule(:comment) { str('%') >> any.repeat }
> rule(:comment?) { comment.maybe }
>
> rule(:texspace) { str '\\/' }
>
> #rule(:text) { match('[a-ZA-Z ]').repeat(1) }
> rule(:text) { any.repeat(1) }
>
> rule(:thing) { command | texspace | text }
> rule(:line) { thing.repeat }
> rule(:body_or_comment) do
> (comment | line).as(:body_x_comment)
> end
>
> root(:body_or_comment)
>
> #rule(:space) { match('\s').repeat(1) }
> #rule(:space?) { space.maybe }
> end
>
> class Foo < Parslet::Parser
> rule(:backslash) { str '\\' }
> rule(:command) { backslash >> match('[a-zA-Z]').repeat(1) >> option? }
> rule(:command?) { command.maybe }
>
> rule(:option) { str('{') >> match('[a-zA-Z0-9 ,]').repeat() >>
> str('}') }
> rule(:option?) { option.maybe }
> end
>
> describe Tex do
> subject { Tex.new }
>
> it('parses comments') { should parse("% foo") }
> it('parses comments after text') { should parse("text % foo") }
> it('parses comments to EOL') { should parse("% foo\ntext") }
>
> it('parses commands without options') { should parse("\\rm") }
> it('parses commands with options braces') { should parse("\\rm{}") }
> it('parses commands with options') { should parse("\\rm{a}") }
>
> describe 'real data' do
> it('parses commands') { should parse('\\SmallCommand{Option}') }
> it('parses multiple commands') { should
> parse('\\SmallCommand{Option}\\BigCommand{Big long text, with puncuation}')
> }
> it('parses options with text') { should
> parse('\\SmallCommand{Option}\\BigCommand{Big long text, with
> puncuation}\\BFont') }
> it('parses commands in front of text') { should
> parse('\\SmallCommand{1}\\OtherCommand{}Lorem ipsum sic dolor amet ') }
> it('parses text and commands alternating') { should parse('Lorem.
> \\Command{2}ipsum \\it sic\\/\\rm ') }
>
> it('parses multiple lines') do
> should parse <<-TEXT
> \\SmallCommand{Option}\\BigCommand{Big long text, with
> puncuation}\\Something
> \\Command{1}\\rmLorem ipsum sic dolor amet.
> Lorem. \\Command{2}ipsum \\it sic\\/\\rm
> TEXT
> end
> end
>
> end
>
> begin
> puts Tex.new.command.parse('\\SmallCommand{Option}\\Othercommand{text}
> regular text')
> rescue Parslet::ParseFailed => failure
> puts failure.cause.ascii_tree
> end
>
> --
> Peter Harkins - http://push.cx - http://NearbyGamers.com
>