This fixes #2666 and #2664. This is a gross hack that instructs the lexer when it is possible to find a regex in the token stream, and when it isn't possible.
Signed-off-by: Brice Figureau <[email protected]> --- lib/puppet/parser/lexer.rb | 32 ++++++++++++----- spec/unit/parser/lexer.rb | 82 ++++++++++++++++++++++++++++++++++++++++++-- 2 files changed, 102 insertions(+), 12 deletions(-) diff --git a/lib/puppet/parser/lexer.rb b/lib/puppet/parser/lexer.rb index e027a69..7043f30 100644 --- a/lib/puppet/parser/lexer.rb +++ b/lib/puppet/parser/lexer.rb @@ -13,11 +13,11 @@ module Puppet::Parser; end class Puppet::Parser::Lexer attr_reader :last, :file - attr_accessor :line, :indefine + attr_accessor :line, :indefine, :no_regex # Our base token class. class Token - attr_accessor :regex, :name, :string, :skip, :incr_line, :skip_text, :accumulate + attr_accessor :regex, :name, :string, :skip, :incr_line, :skip_text, :accumulate, :no_regex_after def initialize(regex, name) if regex.is_a?(String) @@ -87,7 +87,13 @@ class Puppet::Parser::Lexer # Define more tokens. def add_tokens(hash) hash.each do |regex, name| - add_token(name, regex) + options = {} + if name.is_a?(Hash) + options = name + name = options[:name] + options.delete(:name) + end + add_token(name, regex, options) end end @@ -106,7 +112,7 @@ class Puppet::Parser::Lexer '}' => :RBRACE, '(' => :LPAREN, ')' => :RPAREN, - '=' => :EQUALS, + '=' => { :name => :EQUALS, :no_regex_after => true }, '+=' => :APPENDS, '==' => :ISEQUAL, '>=' => :GREATEREQUAL, @@ -124,7 +130,7 @@ class Puppet::Parser::Lexer '<|' => :LCOLLECT, '|>' => :RCOLLECT, ';' => :SEMIC, - '?' => :QMARK, + '?' => { :name => :QMARK, :no_regex_after => false }, '\\' => :BACKSLASH, '=>' => :FARROW, '+>' => :PARROW, @@ -134,8 +140,8 @@ class Puppet::Parser::Lexer '*' => :TIMES, '<<' => :LSHIFT, '>>' => :RSHIFT, - '=~' => :MATCH, - '!~' => :NOMATCH, + '=~' => { :name => :MATCH, :no_regex_after => false }, + '!~' => { :name => :NOMATCH, :no_regex_after => false }, %r{([a-z][-\w]*)?(::[a-z][-\w]*)+} => :CLASSNAME, # Require '::' in the class name, else we'd compete with NAME %r{((::){0,1}[A-Z][-\w]*)+} => :CLASSREF ) @@ -211,7 +217,7 @@ class Puppet::Parser::Lexer KEYWORDS = TokenList.new KEYWORDS.add_tokens( - "case" => :CASE, + "case" => { :name => :CASE, :no_regex_after => false }, "class" => :CLASS, "default" => :DEFAULT, "define" => :DEFINE, @@ -220,7 +226,7 @@ class Puppet::Parser::Lexer "elsif" => :ELSIF, "else" => :ELSE, "inherits" => :INHERITS, - "node" => :NODE, + "node" => { :name => :NODE, :no_regex_after => false }, "and" => :AND, "or" => :OR, "undef" => :UNDEF, @@ -294,6 +300,7 @@ class Puppet::Parser::Lexer # a slightly negative affect and was a good bit more complicated. TOKENS.regex_tokens.each do |token| next unless match_length = @scanner.match?(token.regex) + next if token.name == :REGEX and @no_regex # We've found a longer match if match_length > length @@ -345,6 +352,7 @@ class Puppet::Parser::Lexer @indefine = false @expected = [] @commentstack = [ ['', @line] ] + @no_regex = false end # Make any necessary changes to the token and/or value. @@ -365,6 +373,12 @@ class Puppet::Parser::Lexer @commentstack.push(comment) end + if token.no_regex_after + @no_regex = true + elsif not token.no_regex_after.nil? + @no_regex = false + end + return if token.skip return token, { :value => value, :line => @line } diff --git a/spec/unit/parser/lexer.rb b/spec/unit/parser/lexer.rb index 1c3e91b..17bf7d1 100755 --- a/spec/unit/parser/lexer.rb +++ b/spec/unit/parser/lexer.rb @@ -44,7 +44,7 @@ describe Puppet::Parser::Lexer::Token do @token = Puppet::Parser::Lexer::Token.new(%r{something}, :NAME) end - [:regex, :name, :string, :skip, :incr_line, :skip_text, :accumulate].each do |param| + [:regex, :name, :string, :skip, :incr_line, :skip_text, :accumulate, :no_regex_after].each do |param| it "should have a #{param.to_s} reader" do @token.should be_respond_to(param) end @@ -95,6 +95,12 @@ describe Puppet::Parser::Lexer::TokenList do @list[:bar].should_not be_nil end + it "should init token options if called with an hash" do + @list.expects(:add_token).with(:name, "whatever", { :option => "options" }) + + @list.add_tokens "whatever" => { :name => :name, :option => "options" } + end + it "should fail to add tokens sharing a name with an existing token" do @list.add_token :name, "whatever" lambda { @list.add_token :name, "whatever" }.should raise_error(ArgumentError) @@ -191,6 +197,17 @@ describe Puppet::Parser::Lexer::TOKENS do end { + :EQUALS => true, + :QMARK => false, + :MATCH => false, + :NOMATCH => false, + }.each do |name, no_regex_after| + it "should have have a token #{name} with no_regex_after = #{no_regex_after} " do + Puppet::Parser::Lexer::TOKENS[name].no_regex_after.should == no_regex_after + end + end + + { "case" => :CASE, "class" => :CLASS, "default" => :DEFAULT, @@ -216,6 +233,15 @@ describe Puppet::Parser::Lexer::TOKENS do end end + { + :CASE => false, + :NODE => false, + }.each do |name, no_regex_after| + it "should have have a keyword #{name} with no_regex_after = #{no_regex_after} " do + Puppet::Parser::Lexer::KEYWORDS[name].no_regex_after.should == no_regex_after + end + end + # These tokens' strings don't matter, just that the tokens exist. [:DQTEXT, :SQTEXT, :BOOLEAN, :NAME, :NUMBER, :COMMENT, :MLCOMMENT, :RETURN, :SQUOTE, :DQUOTE, :VARIABLE].each do |name| it "should have a token named #{name.to_s}" do @@ -460,6 +486,23 @@ describe Puppet::Parser::Lexer::TOKENS[:REGEX] do @token.regex.should =~ '/this is a regex/' end + it 'should not match if there is \n in the regex' do + @token.regex.should_not =~ "/this is \n a regex/" + end + + describe "when parsing the divide operator" do + before { @lexer = Puppet::Parser::Lexer.new } + + it "should not mis-lex it as a regex start" do + @lexer.string = "$var = 4096 / 2 / 2" + tokens = [] + @lexer.scan do |name, value| + tokens << value + end + tokens[4][:value].should_not be_a(Regexp) + end + end + describe "when including escaped slashes" do before { @lexer = Puppet::Parser::Lexer.new } @@ -473,17 +516,50 @@ describe Puppet::Parser::Lexer::TOKENS[:REGEX] do end end - it "should return the REGEX token and a Regexp" do @token.convert(stub("lexer"), "/myregex/").should == [Puppet::Parser::Lexer::TOKENS[:REGEX], Regexp.new(/myregex/)] end end +describe Puppet::Parser::Lexer, "when lexing lexeme" do + before { @lexer = Puppet::Parser::Lexer.new } + + it "should set no_regex if a no_regex_after lexeme is found" do + @lexer.string = "$var = 4096" + @lexer.fullscan + + @lexer.no_regex.should be_true + end + + it "should not match regex anymore when no_regex is true" do + @lexer.string = "/regex/" + @lexer.no_regex = true + + @lexer.find_regex_token.should == [nil, ""] + end + + it "should match regex when no_regex is false" do + @lexer.string = "/regex/" + @lexer.no_regex = false + + @lexer.find_regex_token.should == [ Puppet::Parser::Lexer::TOKENS[:REGEX], "/regex/"] + end + + it "should set no_regex back to false when a false no_regex_after lexeme is lexed" do + @lexer.no_regex = true + @lexer.string = "node" + + @lexer.fullscan + + @lexer.no_regex.should be_false + end +end + describe Puppet::Parser::Lexer, "when lexing comments" do before { @lexer = Puppet::Parser::Lexer.new } it "should accumulate token in munge_token" do - token = stub 'token', :skip => true, :accumulate? => true, :incr_line => nil, :skip_text => false + token = stub 'token', :skip => true, :accumulate? => true, :incr_line => nil, :skip_text => false, :no_regex_after => nil token.stubs(:convert).with(@lexer, "# this is a comment").returns([token, " this is a comment"]) @lexer.munge_token(token, "# this is a comment") -- 1.6.4 --~--~---------~--~----~------------~-------~--~----~ You received this message because you are subscribed to the Google Groups "Puppet Developers" group. To post to this group, send email to [email protected] To unsubscribe from this group, send email to [email protected] For more options, visit this group at http://groups.google.com/group/puppet-dev?hl=en -~----------~----~----~----~------~----~------~--~---
