Hello,

Thanks to Kornél's msg I noticed that I shouldn't reset cached token
on identifier "partial". Thus I created fixed patch.

Now I blame SeekableStreamReader on bringing UTF8 related bug
(without proof ;-) so I made a patch to eliminate this class.

Additionally I made tiny modification for 'MZ' executable check
(even with a test case bom-mz.cs that differentiates csc and mcs).

I'm not sure if it really is the culprit, but now we don't have
tricky stream usage, so now the code should be a bit healthy.

In general adding more tests to the xtoken() routine hurts performance,
that is why we have avoided it.  I also believe that the new code is a
bit more difficult to follow: just how many saved variables are we going
to have?

I agree that xtoken() could be performance sensitive, but on the
other hand, current code moves back the stream, which results in
extraneous parse on every parenthesized expressions. Actually
as mentioned later, the patched code seems faster. Note that
SeekableStreamReader has to call encoding GetCharCount() when
we reposition the stream.

For complexity, SeekableStreamReader is much more difficult to
debug; it requires us to debug not only tokenizer but also
Stream repositioning, Encoding conversion sanity, relationship
between Stream repositioning and conversion in-process bytes
(incomplete blocks).

I needed to introduce just one cached token and one boolean flag
to support putback from parser (the latter one is required because
there could be another putback_char).

Before we can do something about this, I would like two tests to be
done:

        time mcs --parse sourcefiles

Where sourcefiles means a lot of source code, measure before and after.

Then we should measure the impact of the allocation of SavedToken with
mono --profile before and after.

Actually the resulting performance is better than existing code:

existing code at *best*:
real    0m2.711s
user    0m0.080s
sys     0m0.050s

Total memory allocated: 34835 KB

patched code at *worst*:
real    0m2.635s
user    0m0.020s
sys     0m0.030s
Total memory allocated: 34083 KB

Most of the memory consumption difference comes from
SeekableStreamReader:

########################
    731 KB Mono.CSharp.SeekableStreamReader::.ctor(StreamReader)
        731 KB      363 System.Char[]
 Callers (with count) that contribute at least for 1%:
        363  100 % Mono.CSharp.SeekableStreamReader::.ctor(Stream,Encoding)

Atsushi Eno
Index: support.cs
===================================================================
--- support.cs  (revision 49144)
+++ support.cs  (working copy)
@@ -344,6 +344,7 @@
                }
        }
 
+/*
        /// <summary>
        ///   This is a wrapper around StreamReader which is seekable.
        /// </summary>
@@ -442,6 +443,7 @@
                        return buffer [pos++];
                }
        }
+*/
 
        public class DoubleHash {
                const int DEFAULT_INITIAL_BUCKETS = 100;
Index: cs-tokenizer.cs
===================================================================
--- cs-tokenizer.cs     (revision 49144)
+++ cs-tokenizer.cs     (working copy)
@@ -31,7 +31,7 @@
 
        public class Tokenizer : yyParser.yyInput
        {
-               SeekableStreamReader reader;
+               StreamReader reader;
                SourceFile ref_name;
                SourceFile file_name;
                int ref_line = 1;
@@ -45,6 +45,8 @@
                Location current_location;
                Location current_comment_location = Location.Null;
                ArrayList escapedIdentifiers = new ArrayList ();
+               SavedToken saved_token = SavedToken.Null;
+               bool putback_ambiguous_close_parens = false;
 
                //
                // XML documentation buffer. The save point is used to divide
@@ -385,7 +387,7 @@
                        defines [def] = true;
                }
                
-               public Tokenizer (SeekableStreamReader input, SourceFile file, 
ArrayList defs)
+               public Tokenizer (StreamReader input, SourceFile file, 
ArrayList defs)
                {
                        this.ref_name = file;
                        this.file_name = file;
@@ -467,19 +469,11 @@
 
                                --deambiguate_close_parens;
 
-                               // Save current position and parse next token.
-                               int old = reader.Position;
-                               int old_ref_line = ref_line;
-                               int old_col = col;
-
-                               // disable preprocessing directives when peeking
-                               process_directives = false;
+                               // Save next token.
+                               Location cur_loc = current_location;
                                int new_token = token ();
-                               process_directives = true;
-                               reader.Position = old;
-                               ref_line = old_ref_line;
-                               col = old_col;
-                               putback_char = -1;
+                               saved_token = new SavedToken (new_token, val, 
Location);
+                               current_location = cur_loc;
 
                                if (new_token == Token.OPEN_PARENS)
                                        return Token.CLOSE_PARENS_OPEN_PARENS;
@@ -658,7 +652,7 @@
 
                public void Deambiguate_CloseParens ()
                {
-                       putback (')');
+                       putback_ambiguous_close_parens = true;
                        deambiguate_close_parens++;
                }
 
@@ -1087,6 +1081,10 @@
                int getChar ()
                {
                        int x;
+                       if (putback_ambiguous_close_parens) {
+                               putback_ambiguous_close_parens = false;
+                               return ')';
+                       }
                        if (putback_char != -1) {
                                x = putback_char;
                                putback_char = -1;
@@ -1106,6 +1104,8 @@
 
                int peekChar ()
                {
+                       if (putback_ambiguous_close_parens)
+                               return ')';
                        if (putback_char != -1)
                                return putback_char;
                        putback_char = reader.Read ();
@@ -1114,6 +1114,8 @@
 
                int peekChar2 ()
                {
+                       if (putback_ambiguous_close_parens)
+                               return ')';
                        if (putback_char != -1)
                                return putback_char;
                        return reader.Peek ();
@@ -1202,7 +1204,14 @@
 
                public int token ()
                 {
-                       current_token = xtoken ();
+                       if (!saved_token.Location.IsNull) {
+                               current_token = saved_token.Token;
+                               val = saved_token.Value;
+                               current_location = saved_token.Location;
+                               saved_token = SavedToken.Null;
+                       }
+                       else
+                               current_token = xtoken ();
                         return current_token;
                 }
 
@@ -1844,25 +1853,16 @@
                        }
 
                        if (res == Token.PARTIAL) {
-                               // Save current position and parse next token.
-                               int old = reader.Position;
-                               int old_putback = putback_char;
-                               int old_ref_line = ref_line;
-                               int old_col = col;
-
-                               putback_char = -1;
-
+                               // Save next token.
+                               Location cur_loc = Location;
                                int next_token = token ();
+                               saved_token = new SavedToken (next_token, val, 
Location);
+                               current_location = cur_loc;
                                bool ok = (next_token == Token.CLASS) ||
                                        (next_token == Token.STRUCT) ||
                                        (next_token == Token.INTERFACE) ||
                                        (next_token == Token.ENUM); // 
"partial" is a keyword in 'partial enum', even though it's not valid
 
-                               reader.Position = old;
-                               ref_line = old_ref_line;
-                               col = old_col;
-                               putback_char = old_putback;
-
                                if (ok)
                                        return res;
                                else {
@@ -2309,6 +2309,23 @@
                        }
                                
                }
+
+               public struct SavedToken
+               {
+                       public static readonly SavedToken Null =
+                               new SavedToken (0, null, Location.Null);
+
+                       public readonly int Token;
+                       public readonly object Value;
+                       public readonly Location Location;
+
+                       public SavedToken (int token, object value, Location 
loc)
+                       {
+                               Token = token;
+                               Value = value;
+                               Location = loc;
+                       }
+               }
        }
 
        //
Index: cs-parser.jay
===================================================================
--- cs-parser.jay       (revision 49144)
+++ cs-parser.jay       (working copy)
@@ -4856,7 +4856,7 @@
        }
 }                 
 
-public CSharpParser (SeekableStreamReader reader, SourceFile file, ArrayList 
defines)
+public CSharpParser (StreamReader reader, SourceFile file, ArrayList defines)
 {
        current_namespace = new NamespaceEntry (null, file, null, 
Location.Null);
        this.name = file.Name;
Index: driver.cs
===================================================================
--- driver.cs   (revision 49144)
+++ driver.cs   (working copy)
@@ -153,7 +153,7 @@
                        }
 
                        using (input){
-                               SeekableStreamReader reader = new 
SeekableStreamReader (input, encoding);
+                               StreamReader reader = new StreamReader (input, 
encoding, true);
                                Tokenizer lexer = new Tokenizer (reader, file, 
defines);
                                int token, tokens = 0, errors = 0;
 
@@ -181,16 +181,16 @@
                                return;
                        }
 
-                       SeekableStreamReader reader = new SeekableStreamReader 
(input, encoding);
-
                        // Check 'MZ' header
-                       if (reader.Read () == 77 && reader.Read () == 90) {
+                       if (input.ReadByte () == 77 && input.ReadByte () == 90) 
{
                                Report.Error (2015, "Source file `{0}' is a 
binary file and not a text file", file.Name);
                                input.Close ();
                                return;
                        }
+                       input.Seek (0, SeekOrigin.Begin);
 
-                       reader.Position = 0;
+                       StreamReader reader = new StreamReader (input, 
encoding, true);
+
                        parser = new CSharpParser (reader, file, defines);
                        parser.ErrorOutput = Report.Stderr;
                        try {
_______________________________________________
Mono-devel-list mailing list
Mono-devel-list@lists.ximian.com
http://lists.ximian.com/mailman/listinfo/mono-devel-list

Reply via email to