I'm trying to use Parslet to parse some TeX (definitely not all of TeX,
just a small set of documents that only use a small subset of the
language) and I'm not having any luck. I've written rules that consume the
markup without error, but when I puts the result I don't see a nice
structure of hashes like I do with other scripts like my_json:
https://gist.github.com/966020
So either I'm correctly consuming it and getting odd output, or I'm not
correctly consuming it and need to fix my rules. I think the latter is
more likely, because I have a couple rules that need to consume arbitrary
text between the markup commands.
My code is below. If you save and run this as 'ruby tex.rb' it'll run one
example that shows the odd behavior I've described; 'rspec tex.rb' will
run my little test suite. I'd appreciate any suggestions.
Thanks in advance!
require 'parslet'
require 'parslet/convenience'
require 'pp'
require 'rspec'
require 'parslet/rig/rspec'
require 'yaml'
class Tex < Parslet::Parser
rule(:backslash) { str '\\' }
rule(:command) { backslash >> match('[a-zA-Z]').repeat(1) >> option? }
rule(:command?) { command.maybe }
rule(:option) { str('{') >> match('[a-zA-Z0-9 ,]').repeat() >> str('}') }
rule(:option?) { option.maybe }
rule(:comment) { str('%') >> any.repeat }
rule(:comment?) { comment.maybe }
rule(:texspace) { str '\\/' }
#rule(:text) { match('[a-ZA-Z ]').repeat(1) }
rule(:text) { any.repeat(1) }
rule(:thing) { command | texspace | text }
rule(:line) { thing.repeat }
rule(:body_or_comment) do
(comment | line).as(:body_x_comment)
end
root(:body_or_comment)
#rule(:space) { match('\s').repeat(1) }
#rule(:space?) { space.maybe }
end
class Foo < Parslet::Parser
rule(:backslash) { str '\\' }
rule(:command) { backslash >> match('[a-zA-Z]').repeat(1) >> option? }
rule(:command?) { command.maybe }
rule(:option) { str('{') >> match('[a-zA-Z0-9 ,]').repeat() >> str('}') }
rule(:option?) { option.maybe }
end
describe Tex do
subject { Tex.new }
it('parses comments') { should parse("% foo") }
it('parses comments after text') { should parse("text % foo") }
it('parses comments to EOL') { should parse("% foo\ntext") }
it('parses commands without options') { should parse("\\rm") }
it('parses commands with options braces') { should parse("\\rm{}") }
it('parses commands with options') { should parse("\\rm{a}") }
describe 'real data' do
it('parses commands') { should parse('\\SmallCommand{Option}') }
it('parses multiple commands') { should
parse('\\SmallCommand{Option}\\BigCommand{Big long text, with puncuation}') }
it('parses options with text') { should
parse('\\SmallCommand{Option}\\BigCommand{Big long text, with
puncuation}\\BFont') }
it('parses commands in front of text') { should
parse('\\SmallCommand{1}\\OtherCommand{}Lorem ipsum sic dolor amet ') }
it('parses text and commands alternating') { should parse('Lorem.
\\Command{2}ipsum \\it sic\\/\\rm ') }
it('parses multiple lines') do
should parse <<-TEXT
\\SmallCommand{Option}\\BigCommand{Big long text, with
puncuation}\\Something
\\Command{1}\\rmLorem ipsum sic dolor amet.
Lorem. \\Command{2}ipsum \\it sic\\/\\rm
TEXT
end
end
end
begin
puts Tex.new.command.parse('\\SmallCommand{Option}\\Othercommand{text}
regular text')
rescue Parslet::ParseFailed => failure
puts failure.cause.ascii_tree
end
--
Peter Harkins - http://push.cx - http://NearbyGamers.com