1] Fix #2666 - Make sure regex are parsed in regex context

Brice Figureau Tue, 22 Sep 2009 14:20:21 -0700

This fixes #2666 and #2664.
This is a gross hack that instructs the lexer when it is possible
to find a regex in the token stream, and when it isn't possible.


Signed-off-by: Brice Figureau <[email protected]>
---
 lib/puppet/parser/lexer.rb |   32 ++++++++++++-----
 spec/unit/parser/lexer.rb  |   82 ++++++++++++++++++++++++++++++++++++++++++--
 2 files changed, 102 insertions(+), 12 deletions(-)

diff --git a/lib/puppet/parser/lexer.rb b/lib/puppet/parser/lexer.rb
index e027a69..7043f30 100644
--- a/lib/puppet/parser/lexer.rb
+++ b/lib/puppet/parser/lexer.rb
@@ -13,11 +13,11 @@ module Puppet::Parser; end
 class Puppet::Parser::Lexer
     attr_reader :last, :file
 
-    attr_accessor :line, :indefine
+    attr_accessor :line, :indefine, :no_regex
 
     # Our base token class.
     class Token
-        attr_accessor :regex, :name, :string, :skip, :incr_line, :skip_text, 
:accumulate
+        attr_accessor :regex, :name, :string, :skip, :incr_line, :skip_text, 
:accumulate, :no_regex_after
 
         def initialize(regex, name)
             if regex.is_a?(String)
@@ -87,7 +87,13 @@ class Puppet::Parser::Lexer
         # Define more tokens.
         def add_tokens(hash)
             hash.each do |regex, name|
-                add_token(name, regex)
+                options = {}
+                if name.is_a?(Hash)
+                    options = name
+                    name = options[:name]
+                    options.delete(:name)
+                end
+                add_token(name, regex, options)
             end
         end
 
@@ -106,7 +112,7 @@ class Puppet::Parser::Lexer
         '}' => :RBRACE,
         '(' => :LPAREN,
         ')' => :RPAREN,
-        '=' => :EQUALS,
+        '=' => { :name => :EQUALS, :no_regex_after => true },
         '+=' => :APPENDS,
         '==' => :ISEQUAL,
         '>=' => :GREATEREQUAL,
@@ -124,7 +130,7 @@ class Puppet::Parser::Lexer
         '<|' => :LCOLLECT,
         '|>' => :RCOLLECT,
         ';' => :SEMIC,
-        '?' => :QMARK,
+        '?' => { :name => :QMARK, :no_regex_after => false },
         '\\' => :BACKSLASH,
         '=>' => :FARROW,
         '+>' => :PARROW,
@@ -134,8 +140,8 @@ class Puppet::Parser::Lexer
         '*' => :TIMES,
         '<<' => :LSHIFT,
         '>>' => :RSHIFT,
-        '=~' => :MATCH,
-        '!~' => :NOMATCH,
+        '=~' => { :name => :MATCH, :no_regex_after => false },
+        '!~' => { :name => :NOMATCH, :no_regex_after => false },
         %r{([a-z][-\w]*)?(::[a-z][-\w]*)+} => :CLASSNAME, # Require '::' in 
the class name, else we'd compete with NAME
         %r{((::){0,1}[A-Z][-\w]*)+} => :CLASSREF
     )
@@ -211,7 +217,7 @@ class Puppet::Parser::Lexer
     KEYWORDS = TokenList.new
 
     KEYWORDS.add_tokens(
-        "case" => :CASE,
+        "case" => { :name => :CASE, :no_regex_after => false },
         "class" => :CLASS,
         "default" => :DEFAULT,
         "define" => :DEFINE,
@@ -220,7 +226,7 @@ class Puppet::Parser::Lexer
         "elsif" => :ELSIF,
         "else" => :ELSE,
         "inherits" => :INHERITS,
-        "node" => :NODE,
+        "node" => { :name => :NODE, :no_regex_after => false },
         "and"  => :AND,
         "or"   => :OR,
         "undef"   => :UNDEF,
@@ -294,6 +300,7 @@ class Puppet::Parser::Lexer
         # a slightly negative affect and was a good bit more complicated.
         TOKENS.regex_tokens.each do |token|
             next unless match_length = @scanner.match?(token.regex)
+            next if token.name == :REGEX and @no_regex
 
             # We've found a longer match
             if match_length > length
@@ -345,6 +352,7 @@ class Puppet::Parser::Lexer
         @indefine = false
         @expected = []
         @commentstack = [ ['', @line] ]
+        @no_regex = false
     end
 
     # Make any necessary changes to the token and/or value.
@@ -365,6 +373,12 @@ class Puppet::Parser::Lexer
             @commentstack.push(comment)
         end
 
+        if token.no_regex_after
+            @no_regex = true
+        elsif not token.no_regex_after.nil?
+            @no_regex = false
+        end
+
         return if token.skip
 
         return token, { :value => value, :line => @line }
diff --git a/spec/unit/parser/lexer.rb b/spec/unit/parser/lexer.rb
index 1c3e91b..17bf7d1 100755
--- a/spec/unit/parser/lexer.rb
+++ b/spec/unit/parser/lexer.rb
@@ -44,7 +44,7 @@ describe Puppet::Parser::Lexer::Token do
         @token = Puppet::Parser::Lexer::Token.new(%r{something}, :NAME)
     end
 
-    [:regex, :name, :string, :skip, :incr_line, :skip_text, :accumulate].each 
do |param|
+    [:regex, :name, :string, :skip, :incr_line, :skip_text, :accumulate, 
:no_regex_after].each do |param|
         it "should have a #{param.to_s} reader" do
             @token.should be_respond_to(param)
         end
@@ -95,6 +95,12 @@ describe Puppet::Parser::Lexer::TokenList do
         @list[:bar].should_not be_nil
     end
 
+    it "should init token options if called with an hash" do
+        @list.expects(:add_token).with(:name, "whatever", { :option => 
"options" })
+
+        @list.add_tokens "whatever" => { :name => :name, :option => "options" }
+    end
+
     it "should fail to add tokens sharing a name with an existing token" do
         @list.add_token :name, "whatever"
         lambda { @list.add_token :name, "whatever" }.should 
raise_error(ArgumentError)
@@ -191,6 +197,17 @@ describe Puppet::Parser::Lexer::TOKENS do
     end
 
     {
+        :EQUALS => true,
+        :QMARK => false,
+        :MATCH => false,
+        :NOMATCH => false,
+    }.each do |name, no_regex_after|
+        it "should have have a token #{name} with no_regex_after = 
#{no_regex_after} " do
+            Puppet::Parser::Lexer::TOKENS[name].no_regex_after.should == 
no_regex_after
+        end
+    end
+
+    {
         "case" => :CASE,
         "class" => :CLASS,
         "default" => :DEFAULT,
@@ -216,6 +233,15 @@ describe Puppet::Parser::Lexer::TOKENS do
         end
     end
 
+    {
+        :CASE => false,
+        :NODE => false,
+    }.each do |name, no_regex_after|
+        it "should have have a keyword #{name} with no_regex_after = 
#{no_regex_after} " do
+            Puppet::Parser::Lexer::KEYWORDS[name].no_regex_after.should == 
no_regex_after
+        end
+    end
+
     # These tokens' strings don't matter, just that the tokens exist.
     [:DQTEXT, :SQTEXT, :BOOLEAN, :NAME, :NUMBER, :COMMENT, :MLCOMMENT, 
:RETURN, :SQUOTE, :DQUOTE, :VARIABLE].each do |name|
         it "should have a token named #{name.to_s}" do
@@ -460,6 +486,23 @@ describe Puppet::Parser::Lexer::TOKENS[:REGEX] do
         @token.regex.should =~ '/this is a regex/'
     end
 
+    it 'should not match if there is \n in the regex' do
+        @token.regex.should_not =~ "/this is \n a regex/"
+    end
+
+    describe "when parsing the divide operator" do
+        before { @lexer = Puppet::Parser::Lexer.new }
+
+        it "should not mis-lex it as a regex start" do
+            @lexer.string = "$var = 4096 / 2 / 2"
+            tokens = []
+            @lexer.scan do |name, value|
+                tokens << value
+            end
+            tokens[4][:value].should_not be_a(Regexp)
+        end
+    end
+
     describe "when including escaped slashes" do
         before { @lexer = Puppet::Parser::Lexer.new }
 
@@ -473,17 +516,50 @@ describe Puppet::Parser::Lexer::TOKENS[:REGEX] do
         end
     end
 
-
     it "should return the REGEX token and a Regexp" do
         @token.convert(stub("lexer"), "/myregex/").should == 
[Puppet::Parser::Lexer::TOKENS[:REGEX], Regexp.new(/myregex/)]
     end
 end
 
+describe Puppet::Parser::Lexer, "when lexing lexeme" do
+    before { @lexer = Puppet::Parser::Lexer.new }
+
+    it "should set no_regex if a no_regex_after lexeme is found" do
+        @lexer.string = "$var = 4096"
+        @lexer.fullscan
+
+        @lexer.no_regex.should be_true
+    end
+
+    it "should not match regex anymore when no_regex is true" do
+        @lexer.string = "/regex/"
+        @lexer.no_regex = true
+
+        @lexer.find_regex_token.should == [nil, ""]
+    end
+
+    it "should match regex when no_regex is false" do
+        @lexer.string = "/regex/"
+        @lexer.no_regex = false
+
+        @lexer.find_regex_token.should == [ 
Puppet::Parser::Lexer::TOKENS[:REGEX], "/regex/"]
+    end
+
+    it "should set no_regex back to false when a false no_regex_after lexeme 
is lexed" do
+        @lexer.no_regex = true
+        @lexer.string = "node"
+
+        @lexer.fullscan
+
+        @lexer.no_regex.should be_false
+    end
+end
+
 describe Puppet::Parser::Lexer, "when lexing comments" do
     before { @lexer = Puppet::Parser::Lexer.new }
 
     it "should accumulate token in munge_token" do
-        token = stub 'token', :skip => true, :accumulate? => true, :incr_line 
=> nil, :skip_text => false
+        token = stub 'token', :skip => true, :accumulate? => true, :incr_line 
=> nil, :skip_text => false, :no_regex_after => nil
 
         token.stubs(:convert).with(@lexer, "# this is a 
comment").returns([token, " this is a comment"])
         @lexer.munge_token(token, "# this is a comment")
-- 
1.6.4


--~--~---------~--~----~------------~-------~--~----~
You received this message because you are subscribed to the Google Groups 
"Puppet Developers" group.
To post to this group, send email to [email protected]
To unsubscribe from this group, send email to 
[email protected]
For more options, visit this group at 
http://groups.google.com/group/puppet-dev?hl=en
-~----------~----~----~----~------~----~------~--~---

[Puppet-dev] [PATCH/puppet 1/1] Fix #2666 - Make sure regex are parsed in regex context

Reply via email to