Hi. Glad you are enjoying Parslet.
There are a couple of solutions to your problem.
Your rule is not matching because transform rules have to match the whole
hash. If it didn't it would have to throw away data for you, and it won't
do that. (unless you tell it to)... and you also need to match the content
of the hash. There is probably a way to do this using subtree, but I avoid
it.
You can simplify your generated tree by removing a bunch of "as"
statements.... then your rule will match..
#### like this ####
#!/usr/bin/env ruby
require 'parslet'
require 'date'
class WebLog < Parslet::Parser
rule(:integer) { match('[0-9]').repeat(1) }
rule(:space) { match('\s').repeat(1) }
rule(:space?) { space.maybe }
rule(:dot) { match('.') }
rule(:month) { (str('Jan') | str('Feb') |
str('Mar') | str('Apr') |
str('May') | str('Jun') |
str('Jul') | str('Aug') |
str('Sep') | str('Oct') |
str('Nov') | str('Dec')).as(:wordmonth) >> space?
}
rule(:timezone) { match('[+-]') >> integer >> space? }
rule(:date) { str('[') >> integer.as(:day) >>
str('/') >> month.as(:month) >>
str('/') >> integer.as(:year) >>
str(':') >> integer >>
str(':') >> integer >>
str(':') >> integer >>
space? >> timezone >>
str(']')
}
rule(:ipaddr) { integer >> dot >>
integer >> dot >>
integer >> dot >>
integer }
rule(:weblog) { ipaddr.as(:IP) >> space? >>
str('-') >> space? >> str('-') >> space? >> date.as
(:rawdate)
}
root :weblog
end
class WebLogTransform < Parslet::Transform
rule(:wordmonth => simple(:month)) {
['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct',
'Nov', 'Dec'].index(month) + 1
}
rule(:day => simple(:day), :month => simple(:month), :year =>
simple(:year)) {
DateTime.new(year.to_i, month, day.to_i)
}
end
def parse(str)
log = WebLog.new
trans = WebLogTransform.new
puts trans.apply(log.parse(str))
rescue Parslet::ParseFailed => failure
puts failure.cause.ascii_tree
end
parse "137.207.74.55 - - [08/Feb/2013:19:28:10 -0500]"
###########
Actually .. you can match with a subtree if you add another "as" in your
parser for date...
rule(:weblog) { ipaddr.as(:IP) >> space? >>
str('-') >> space? >> str('-') >> space? >> *(*date.as
(:rawdate)*).as(:date)*
}
This lets you match the rawdate hash and replace it as a whole, and storing
the result in "date".. like this
class WebLogTransform < Parslet::Transform
rule(:wordmonth => simple(:month)) {
['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct',
'Nov', 'Dec'].index(month) + 1
}
rule(:rawdate => subtree(:date)) { DateTime.new(date[:year].to_i,
date[:month], date[:day].to_i) }
end
Hope this helps.
Nigel
---
---
"No man is an island... except Philip"
On Sat, Feb 9, 2013 at 4:42 PM, Jeffrey Drake <[email protected]> wrote:
> I have attempted to write myself a parser for the Apache Log formats, and
> starting off, I seem to have a parser that works for the two main parts I
> am looking at now: ip and date. The problem is the transform.
>
> My transform looks like
>
>
> class WebLogTransform < Parslet::Transform
> rule(:wordmonth => simple(:month)) {
> ['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct',
> 'Nov', 'Dec'].index(month) + 1
> }
> rule(:rawdate => simple(:date)) {
> DateTime.new(Integer(Date.year), Integer(Date.month), Integer(Date.day))
> }
> end
>
> (* note I just added the conversion for wordmonth, and rawdate depends on
> it, not sure best way to handle that).
>
> This thing doesn't affect anything. I would like to think that I have
> gotten a handle on everything else.
>
> Any suggestions?
>
> The output is like this:
>
> => {:IP=>"137.207.74.55"@0, :rawdate=>{:day=>"08"@19,
> :month=>{:month=>"Feb"@22}, :year=>"2013"@26, :hour=>"19"@31,
> :minute=>"28"@34, :second=>"10"@37, :timezone=>{:tzpm=>"-"@40,
> :tz=>"0500"@41}}}
>
> The full parser file is below. I am amazed at how clean this is, much
> easier to read than Boost::Spirit.
>
> With thanks,
>
> Jeffrey Drake.
>
>
>
> #!/usr/bin/env ruby
>
> require 'parslet'
> require 'date'
>
> class WebLog < Parslet::Parser
> rule(:integer) { match('[0-9]').repeat(1) }
> rule(:space) { match('\s').repeat(1) }
> rule(:space?) { space.maybe }
> rule(:dot) { match('.') }
>
> rule(:month) { (str('Jan') | str('Feb') |
> str('Mar') | str('Apr') |
> str('May') | str('Jun') |
> str('Jul') | str('Aug') |
> str('Sep') | str('Oct') |
> str('Nov') | str('Dec')).as(:wordmonth) >> space?
> }
>
> rule(:timezone) { match('[+-]').as(:tzpm) >> integer.as(:tz) >> space?
> }
>
> rule(:date) { str('[') >> integer.as(:day) >>
> str('/') >> month.as(:month) >>
> str('/') >> integer.as(:year) >>
> str(':') >> integer.as(:hour) >>
> str(':') >> integer.as(:minute) >>
> str(':') >> integer.as(:second) >>
> space? >> timezone.as(:timezone) >>
> str(']')
> }
>
>
> rule(:ipaddr) { integer >> dot >>
> integer >> dot >>
> integer >> dot >>
> integer }
>
> rule(:weblog) { ipaddr.as(:IP) >> space? >>
> str('-') >> space? >> str('-') >> space? >> date.as
> (:rawdate)
> }
> root :weblog
> end
>
>
>
> class WebLogTransform < Parslet::Transform
> rule(:wordmonth => simple(:month)) {
> ['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct',
> 'Nov', 'Dec'].index(month) + 1
> }
> rule(:rawdate => simple(:date)) {
> DateTime.new(Integer(Date.year), Integer(Date.month), Integer(Date.day))
> }
> end
>
>
>
> def parse(str)
> log = WebLog.new
> trans = WebLogTransform.new
>
> puts trans.apply(log.parse(str))
> rescue Parslet::ParseFailed => failure
> puts failure.cause.ascii_tree
> end
>
> parse "137.207.74.55 - - [08/Feb/2013:19:28:10 -0500]"
>