edit: $/Dev10/feature/vs_langs01_s/Merlin/Main/Languages/Ruby/IronRuby.Tests/RubyTests.cs;C1696776
File: RubyTests.cs
===================================================================
--- $/Dev10/feature/vs_langs01_s/Merlin/Main/Languages/Ruby/IronRuby.Tests/RubyTests.cs;C1696776  (server)    3/25/2010 10:10 AM
+++ Shelved Change: $/Dev10/feature/vs_langs01_s/Merlin/Main/Languages/Ruby/IronRuby.Tests/RubyTests.cs;Tokens.1
@@ -68,6 +68,11 @@
                 ParseEscapes1,
                 ParseEolns1,
                 Scenario_ParseRegex1,
+                StringLiterals1,
+                Escapes1,
+                Heredoc1,
+                LexicalState1,
+                ParsingSymbols1,
 
                 Scenario_RubyCategorizer1,
                 NameMangling1,
@@ -120,13 +125,8 @@
 
                 NumericLiterals1,
                 NumericOps1,
-                StringLiterals1,
-                Escapes1,
                 UnicodeEscapes1,
                 UnicodeEscapes2,
-
-                Heredoc1,
-                ParsingSymbols1,
                 
                 KCode1,
                 KCode2,
===================================================================
edit: $/Dev10/feature/vs_langs01_s/Merlin/Main/Languages/Ruby/IronRuby.Tests/Parser/AssertTokenizer.cs;C1162930
File: AssertTokenizer.cs
===================================================================
--- $/Dev10/feature/vs_langs01_s/Merlin/Main/Languages/Ruby/IronRuby.Tests/Parser/AssertTokenizer.cs;C1162930  (server)    3/25/2010 10:07 AM
+++ Shelved Change: $/Dev10/feature/vs_langs01_s/Merlin/Main/Languages/Ruby/IronRuby.Tests/Parser/AssertTokenizer.cs;Tokens.1
@@ -196,8 +196,13 @@
             return this;
         }
 
-        public AssertTokenizer/*!*/ this[Tokens token] {
-            get { return Read(token); }
+        public AssertTokenizer/*!*/ State(LexicalState expected) {
+            _tests.Assert(Tokenizer.LexicalState == expected);
+            return this;
+        }
+
+        public AssertTokenizer/*!*/ this[Tokens expected] {
+            get { return Read(expected); }
         }
 
         public AssertTokenizer/*!*/ this[string/*!*/ expected] {
===================================================================
edit: $/Dev10/feature/vs_langs01_s/Merlin/Main/Languages/Ruby/IronRuby.Tests/Parser/ParserTests.cs;C1696776
File: ParserTests.cs
===================================================================
--- $/Dev10/feature/vs_langs01_s/Merlin/Main/Languages/Ruby/IronRuby.Tests/Parser/ParserTests.cs;C1696776  (server)    3/24/2010 10:06 AM
+++ Shelved Change: $/Dev10/feature/vs_langs01_s/Merlin/Main/Languages/Ruby/IronRuby.Tests/Parser/ParserTests.cs;Tokens.1
@@ -703,6 +703,26 @@
             t.Expect();
         }
 
+        private void LexicalState1() {
+            AssertTokenizer t = NewAssertTokenizer();
+
+            // command mode:
+            t.Load("a")[Tokens.Identifier, "a"].State(LexicalState.EXPR_CMDARG).EOF();
+            t.Load("1;a")[1][Tokens.Semicolon][Tokens.Identifier, "a"].State(LexicalState.EXPR_CMDARG).EOF();
+
+            t.Load("a(b c)")
+                [Tokens.Identifier, "a"].State(LexicalState.EXPR_CMDARG)
+                [Tokens.LeftParenthesis].State(LexicalState.EXPR_BEG)       // switches to command mode for the next non-whitespace token
+                [Tokens.Identifier, "b"].State(LexicalState.EXPR_CMDARG)
+                [Tokens.Identifier, "c"].State(LexicalState.EXPR_ARG)       // command mode switched off
+                [Tokens.RightParenthesis].State(LexicalState.EXPR_END).EOF();
+
+            t.Load("a\nb")
+                [Tokens.Identifier, "a"].State(LexicalState.EXPR_CMDARG)
+                [(Tokens)'\n'].State(LexicalState.EXPR_BEG)                 // switches to command mode for the next non-whitespace token
+                [Tokens.Identifier, "b"].State(LexicalState.EXPR_CMDARG).EOF();
+        }
+
         private void Heredoc1() {
             AssertTokenizer t = NewAssertTokenizer();
 
@@ -721,7 +741,47 @@
             t.Load("<<LABEL\nLABEL123\nLABEL")
                 [Tokens.StringBegin]["LABEL123\n"][Tokens.StringEnd][(Tokens)'\n'].EOF();
 
+            t.Load("puts <<L1, 1, <<L2, 2\naaa\nL1\nbbb\nL2\n3")
+                [Tokens.Identifier, "puts"]
+                [Tokens.StringBegin]["aaa\n"][Tokens.StringEnd]
+                [Tokens.Comma][1][Tokens.Comma]
+                [Tokens.StringBegin]["bbb\n"][Tokens.StringEnd]
+                [Tokens.Comma][2]
+                [(Tokens)'\n']
+                [3].EOF();
+
             t.Expect();
+
+            //// index:                          111111111122 2222 222 2333 333 3
+            ////                       0123456789012345678901 2345 678 9012 345 6
+            //TestCategorizer(Engine, "puts <<L1, 1, <<L2, 2\naaa\nL1\nbbb\nL2\n3", -1, new TokenInfo[] {
+            //// column:               1234567890123456789012 1234 123 1234 123 1
+            //// line:                 1111111111111111111111 2222 333 4444 555 6
+            //// 
+            //    new TokenInfo(new SourceSpan(new SourceLocation(0, 1, 1), new SourceLocation(4, 1, 5)), TokenCategory.Identifier, TokenTriggers.None),
+            //    // <<L1
+            //    new TokenInfo(new SourceSpan(new SourceLocation(5, 1, 6), new SourceLocation(9, 1, 10)), TokenCategory.StringLiteral, TokenTriggers.None),
+            //    // aaa
+            //    new TokenInfo(new SourceSpan(new SourceLocation(22, 2, 1), new SourceLocation(26, 3, 1)), TokenCategory.StringLiteral, TokenTriggers.None),
+            //    // L1
+            //    new TokenInfo(new SourceSpan(new SourceLocation(9, 1, 10), new SourceLocation(9, 1, 10)), TokenCategory.StringLiteral, TokenTriggers.None),
+            //    new TokenInfo(new SourceSpan(new SourceLocation(9, 1, 10), new SourceLocation(10, 1, 11)), TokenCategory.Delimiter, TokenTriggers.ParameterNext),
+            //    // 1
+            //    new TokenInfo(new SourceSpan(new SourceLocation(11, 1, 12), new SourceLocation(12, 1, 13)), TokenCategory.NumericLiteral, TokenTriggers.None),
+            //    new TokenInfo(new SourceSpan(new SourceLocation(12, 1, 13), new SourceLocation(13, 1, 14)), TokenCategory.Delimiter, TokenTriggers.ParameterNext),
+            //    // <<L2
+            //    new TokenInfo(new SourceSpan(new SourceLocation(14, 1, 15), new SourceLocation(18, 1, 19)), TokenCategory.StringLiteral, TokenTriggers.None),
+            //    // bbb
+            //    new TokenInfo(new SourceSpan(new SourceLocation(29, 4, 1), new SourceLocation(33, 5, 1)), TokenCategory.StringLiteral, TokenTriggers.None),
+            //    // L2
+            //    new TokenInfo(new SourceSpan(new SourceLocation(18, 1, 19), new SourceLocation(18, 1, 19)), TokenCategory.StringLiteral, TokenTriggers.None),
+            //    new TokenInfo(new SourceSpan(new SourceLocation(18, 1, 19), new SourceLocation(19, 1, 20)), TokenCategory.Delimiter, TokenTriggers.ParameterNext),
+            //    // 2
+            //    new TokenInfo(new SourceSpan(new SourceLocation(20, 1, 21), new SourceLocation(21, 1, 22)), TokenCategory.NumericLiteral, TokenTriggers.None),
+            //    // 3
+            //    new TokenInfo(new SourceSpan(new SourceLocation(36, 6, 1), new SourceLocation(37, 6, 2)), TokenCategory.NumericLiteral, TokenTriggers.None),
+                
+            //});
         }
 
         public void ParsingSymbols1() {
===================================================================
edit: $/Dev10/feature/vs_langs01_s/Merlin/Main/Languages/Ruby/IronRuby.Tests/Parser/TokenizerTestDriver.cs;C966724
File: TokenizerTestDriver.cs
===================================================================
--- $/Dev10/feature/vs_langs01_s/Merlin/Main/Languages/Ruby/IronRuby.Tests/Parser/TokenizerTestDriver.cs;C966724  (server)    3/25/2010 11:37 AM
+++ Shelved Change: $/Dev10/feature/vs_langs01_s/Merlin/Main/Languages/Ruby/IronRuby.Tests/Parser/TokenizerTestDriver.cs;Tokens.1
@@ -302,7 +302,11 @@
                     break;
 
                 case Tokens.StringContent:
-                    output.Write("String(\"{0}\")", Parser.EscapeString(value.String));
+                    if (value.StringContent is string) {
+                        output.Write("String(\"{0}\")", Parser.EscapeString((string)value.StringContent));
+                    } else {
+                        output.Write("String({0})", BitConverter.ToString((byte[])value.StringContent));
+                    }
                     break;
 
                 case Tokens.StringBegin:
===================================================================
edit: $/Dev10/feature/vs_langs01_s/Merlin/Main/Languages/Ruby/Ruby/Compiler/Parser/Tokenizer.cs;C1696776
File: Tokenizer.cs
===================================================================
--- $/Dev10/feature/vs_langs01_s/Merlin/Main/Languages/Ruby/Ruby/Compiler/Parser/Tokenizer.cs;C1696776  (server)    3/24/2010 9:51 AM
+++ Shelved Change: $/Dev10/feature/vs_langs01_s/Merlin/Main/Languages/Ruby/Ruby/Compiler/Parser/Tokenizer.cs;Tokens.1
@@ -33,7 +33,7 @@
         EXPR_BEG,			// ignore newline, +/- is a sign.
         EXPR_END,			// newline significant, +/- is an operator.
         EXPR_ARG,			// newline significant, +/- is an operator.
-        EXPR_CMDARG,		// newline significant, +/- is an operator.
+        EXPR_CMDARG,		// newline significant, +/- is an operator, an identifier is a command name, special behavior of do keyword and left parenthesis
         EXPR_ENDARG,		// newline significant, +/- is an operator.
         EXPR_MID,			// newline significant, +/- is an operator.
         EXPR_FNAME,			// ignore newline, no reserved words.
@@ -42,6 +42,9 @@
     };
 
     public class Tokenizer : TokenizerService {
+        private readonly ILexicalVariableResolver/*!*/ _localVariableResolver;
+        private BignumParser _bigIntParser;
+
         private const int InitialBufferSize = 80;
 
         public bool ForceBinaryMultiByte { get; set; }
@@ -73,22 +76,23 @@
 
         private SourceUnit _sourceUnit;
         private ErrorSink/*!*/ _errorSink;
-        private BignumParser _bigIntParser;
-        private ILexicalVariableResolver/*!*/ _localVariableResolver;
         
         #region State
 
         private LexicalState _lexicalState;
-        private bool _commaStart = true;
-        private StringTokenizer _currentString = null;
-        private int _cmdArgStack = 0;
-        private int _condStack = 0;
+        
+        // true if the following identifier is treated as a command name (sets LexicalState.CMDARG):
+        private bool _commandMode;
 
+        private StringTokenizer _currentString;
+        private int _cmdArgStack;
+        private int _condStack;
+
         // Non-zero => End of the last heredoc that finished reading content.
         // While non-zero the current stream position doesn't correspond the current line and line index 
         // (the stream is ahead, we are reading from a buffer restored by the last heredoc).
         private int _heredocEndLine;
-        private int _heredocEndLineIndex = -1;
+        private int _heredocEndLineIndex;
 
         #endregion
 
@@ -105,6 +109,7 @@
         // current line no:
         private int _currentLine;
         private int _currentLineIndex;
+
         
         // out: whether the last token terminated
         private bool _unterminatedToken;
@@ -163,7 +168,7 @@
 
             SetState(LexicalState.EXPR_BEG);
 
-            _commaStart = true;
+            _commandMode = true;
             _currentString = null;
             _cmdArgStack = 0;
             _condStack = 0;
@@ -258,47 +263,47 @@
         }
         
         internal void CMDARG_PUSH(int n) { 
-            BITSTACK_PUSH(ref _cmdArgStack, n); 
+            BitStackPush(ref _cmdArgStack, n); 
         }
 
         internal int CMDARG_POP() { 
-            return BITSTACK_POP(ref _cmdArgStack); 
+            return BitStackPop(ref _cmdArgStack); 
         }
 
         internal void CMDARG_LEXPOP() { 
-            BITSTACK_LEXPOP(ref _cmdArgStack); 
+            BitStackOrPop(ref _cmdArgStack); 
         }
 
         internal bool CMDARG_P() { 
-            return BITSTACK_SET_P(_cmdArgStack); 
+            return BitStackPeek(_cmdArgStack); 
         }
 
         // Push(n)
-        private void BITSTACK_PUSH(ref int stack, int n) {
+        private void BitStackPush(ref int stack, int n) {
             stack = (stack << 1) | ((n) & 1);
         }
 
         // Pop()
-        private int BITSTACK_POP(ref int stack) {
+        private int BitStackPop(ref int stack) {
             return (stack >>= 1);
         }
 
         // x = Pop(), Top |= x
-        private void BITSTACK_LEXPOP(ref int stack) {
+        private void BitStackOrPop(ref int stack) {
             stack = (stack >> 1) | (stack & 1);
         }
 
         // Peek() != 0
-        private bool BITSTACK_SET_P(int stack) {
+        private bool BitStackPeek(int stack) {
             return (stack & 1) != 0;
         }
 
         internal void COND_PUSH(int n) {
-            BITSTACK_PUSH(ref _condStack, n);
+            BitStackPush(ref _condStack, n);
         }
 
         internal int COND_POP() {
-            return BITSTACK_POP(ref _condStack);
+            return BitStackPop(ref _condStack);
         }
 
         internal void COND_LEXPOP() {
@@ -310,7 +315,7 @@
         }
 
         internal bool COND_P() {
-            return BITSTACK_SET_P(_condStack);
+            return BitStackPeek(_condStack);
         }
 
         // Stores the current string tokenizer into the StringEmbeddedVariableBegin token.
@@ -624,15 +629,13 @@
             }
 
             bool whitespaceSeen = false;
-            bool cmdState = _commaStart;
-            _commaStart = false;
-
+            
             while (true) {
                 // TODO:
                 RefillBuffer();
 
-                Tokens token = Tokenize(whitespaceSeen, cmdState);
-            
+                Tokens token = Tokenize(whitespaceSeen);
+                
                 _tokenSpan = new SourceSpan(_currentTokenStart, _currentTokenEnd);
                 DumpToken(token);
                 
@@ -646,14 +649,24 @@
                         whitespaceSeen = true;
                         break;
 
-                    case Tokens.EndOfLine: // not considered whitespace
+                    case Tokens.EndOfLine: // not considered whitespace, just ignored
                         break;
 
                     case Tokens.EndOfFile:
                         _eofReached = true;
+                        _commandMode = false;
+                        return token;
+
+                    case (Tokens)'\n':
+                    case Tokens.Semicolon:
+                    case Tokens.LeftParenthesis:
+                    case Tokens.LeftArgParenthesis:
+                    case Tokens.LeftExprParenthesis:
+                        _commandMode = true;
                         return token;
 
                     default:
+                        _commandMode = false;
                         return token;
                 }
 
@@ -663,7 +676,7 @@
             }
         }
 
-        private Tokens Tokenize(bool whitespaceSeen, bool cmdState) {
+        private Tokens Tokenize(bool whitespaceSeen) {
             MarkTokenStart();
             int c = Read();
 
@@ -727,7 +740,7 @@
                     return MarkSingleLineTokenEnd(ReadSingleQuote());
 
                 case '`':
-                    return MarkSingleLineTokenEnd(ReadBacktick(cmdState));
+                    return MarkSingleLineTokenEnd(ReadBacktick());
 
                 case '?':
                     return TokenizeQuestionmark();
@@ -769,7 +782,6 @@
                     return MarkSingleLineTokenEnd(ReadCaret());
 
                 case ';':
-                    _commaStart = true;
                     _lexicalState = LexicalState.EXPR_BEG;
                     MarkSingleLineTokenEnd();
                     return Tokens.Semicolon;
@@ -783,7 +795,6 @@
                     return MarkSingleLineTokenEnd(ReadTilde());
 
                 case '(':
-                    _commaStart = true;
                     return MarkSingleLineTokenEnd(ReadLeftParenthesis(whitespaceSeen));
 
                 case '[':
@@ -818,7 +829,7 @@
                         _dataOffset = _currentLineIndex + _lineLength;
                         return Tokens.EndOfFile;
                     }
-                    return MarkSingleLineTokenEnd(ReadIdentifier(c, cmdState));
+                    return MarkSingleLineTokenEnd(ReadIdentifier(c));
 
                 default:
                     if (!IsIdentifierInitial(c, _multiByteIdentifier)) {
@@ -843,7 +854,7 @@
                         return Tokens.Whitespace;
                     }
 
-                    return MarkSingleLineTokenEnd(ReadIdentifier(c, cmdState));
+                    return MarkSingleLineTokenEnd(ReadIdentifier(c));
             }
         }
 
@@ -877,9 +888,8 @@
                 return Tokens.EndOfLine;
             }
 
-            _commaStart = true;
             _lexicalState = LexicalState.EXPR_BEG;
-            return (Tokens)'\n';
+            return (Tokens)'\n';    
         }
 
         private Tokens TokenizeBackslash() {
@@ -956,7 +966,7 @@
         //   [:alpha:_][:identifier:]+[=][^=~>]
         //   [:alpha:_][:identifier:]+[=] immediately followed by =>
         // Keywords
-        private Tokens ReadIdentifier(int firstCharacter, bool cmdState) {
+        private Tokens ReadIdentifier(int firstCharacter) {
             // the first character already read:
             int start = _bufferPos - 1;
             SkipVariableName();
@@ -986,7 +996,7 @@
 
                 if (_localVariableResolver.IsLocalVariable(identifier)) {
                     _lexicalState = LexicalState.EXPR_END;
-                } else if (cmdState) {
+                } else if (_commandMode) {
                     _lexicalState = LexicalState.EXPR_CMDARG;
                 } else {
                     _lexicalState = LexicalState.EXPR_ARG;
@@ -1804,14 +1814,19 @@
 
         // String: `...
         // Operator: `
-        private Tokens ReadBacktick(bool cmdState) {
+        private Tokens ReadBacktick() {
             if (_lexicalState == LexicalState.EXPR_FNAME) {
                 _lexicalState = LexicalState.EXPR_END;
                 return (Tokens)'`';
             }
 
             if (_lexicalState == LexicalState.EXPR_DOT) {
-                _lexicalState = (cmdState) ? LexicalState.EXPR_CMDARG : LexicalState.EXPR_ARG;
+                // This used to check if we are in command. There seems to be no way how we could get there.
+                // The lexical state is EXPR_BEG after inCommand is set for the next non-whitespace token. Whitespace tokens don't change the state.
+                // The lexical state is EXPR_DOT after Token.SeparatingDoubleColon and Token.Dot none of which change the command state.
+                // _lexicalState = (_commandMode) ? LexicalState.EXPR_CMDARG : LexicalState.EXPR_ARG;
+                Debug.Assert(!_commandMode);
+                _lexicalState = LexicalState.EXPR_ARG;
                 return (Tokens)'`';
             }
 
@@ -2936,7 +2951,7 @@
             private int _position;
 
             public int Position { get { return _position; } set { _position = value; } }
-            public char[] Buffer { get { return _buffer; } set { _buffer = value; } } // TODO: remove
+            public char[] Buffer { get { return _buffer; } set { _buffer = value; } }
 
             public BignumParser() {
             }
===================================================================