Author: bernhard
Date: Sun Nov  4 11:18:16 2007
New Revision: 22708

Modified:
   trunk/languages/scheme/lib/Scheme.pm
   trunk/languages/scheme/lib/Scheme/Tokenizer.pm

Log:
[Scheme]
Start to convert to regex based lexing, stealing from HOP


Modified: trunk/languages/scheme/lib/Scheme.pm
==============================================================================
--- trunk/languages/scheme/lib/Scheme.pm        (original)
+++ trunk/languages/scheme/lib/Scheme.pm        Sun Nov  4 11:18:16 2007
@@ -93,14 +93,12 @@
 This is called in schemec.
 
 =cut
-
 sub compile {
     my $self = shift;
 
-    my $tokens = Scheme::Tokenizer::tokenize( $self->{file} );
-    # die Dumper( $tokens );
-    my $tree   = Scheme::Parser::parse( $tokens );
-    # die Dumper( $tree );
+    my $tokens_char_by_char = Scheme::Tokenizer::tokenize_char_by_char( 
$self->{file} );
+    my $tokens_hop          = Scheme::Tokenizer::tokenize_hop( $self->{file} 
); # not used yet
+    my $tree                = Scheme::Parser::parse( $tokens_char_by_char );
 
     return link_functions( Scheme::Generator::generate( $tree ) );
 }

Modified: trunk/languages/scheme/lib/Scheme/Tokenizer.pm
==============================================================================
--- trunk/languages/scheme/lib/Scheme/Tokenizer.pm      (original)
+++ trunk/languages/scheme/lib/Scheme/Tokenizer.pm      Sun Nov  4 11:18:16 2007
@@ -12,7 +12,42 @@
 
 use Data::Dumper;
 
-sub tokenize {
+sub tokenize_hop {
+    my $file = shift;
+
+    # read file and throw away comments
+    # XXX probably broken WRT to strings with embedded comments
+    my $target;
+    {
+        open my $source, '<', $file;
+        while (<$source>) {
+            next if m/ \A \s* ; /xms;
+            s/ ; .* \z //xms;
+            $target .= $_;
+        }
+        close $source;
+    }
+
+    my $lexer = sub {
+         TOKEN:
+         {
+             return [ 'INTEGER', $1 ] if $target =~ m/\G (\d+)            /gcx;
+             redo TOKEN               if $target =~ m/\G \s+              /gcx;
+             return [ 'UNKNOWN', $1 ] if $target =~ m/\G (.)              /gcx;
+             return;
+         }
+    };
+
+    my @tokens;
+    while ( my $token = $lexer->() ) {
+        push @tokens, $token->[1];
+    }
+
+    return [EMAIL PROTECTED];
+}
+
+
+sub tokenize_char_by_char {
     my $file = shift;
 
     # read file and throw away comments

Reply via email to