Author: kn
Date: Fri Feb  8 14:59:49 2008
New Revision: 7325

Log:
- Added support for bullet lists in tokenizer

Added:
    experimental/Document/tests/files/rst/tokenizer/bullet_list.tokens
    experimental/Document/tests/files/rst/tokenizer/definition_list.tokens
Modified:
    experimental/Document/src/document/rst/token.php
    experimental/Document/src/document/rst/tokenizer.php
    experimental/Document/tests/document_rst_tokenizer_tests.php
    experimental/Document/tests/files/rst/tokenizer/   (props changed)

Modified: experimental/Document/src/document/rst/token.php
==============================================================================
--- experimental/Document/src/document/rst/token.php [iso-8859-1] (original)
+++ experimental/Document/src/document/rst/token.php [iso-8859-1] Fri Feb  8 
14:59:49 2008
@@ -23,6 +23,8 @@
     const NEWLINE       = 2;
 
     const HEADLINE      = 11;
+
+    const BULLET_POINT  = 21;
 
     const TEXT_LINE     = 99;
 

Modified: experimental/Document/src/document/rst/tokenizer.php
==============================================================================
--- experimental/Document/src/document/rst/tokenizer.php [iso-8859-1] (original)
+++ experimental/Document/src/document/rst/tokenizer.php [iso-8859-1] Fri Feb  
8 14:59:49 2008
@@ -19,6 +19,12 @@
 class ezcDocumentRstTokenizer
 {
     /**
+     * Common whitespace characters. The vertical tab is excluded, because it
+     * causes strange problems with PCRE.
+     */
+    const WHITESPACE_CHARS  = ' \\t';
+
+    /**
      * Allowed character sets for headlines.
      *
      * @see 
http://docutils.sourceforge.net/docs/ref/rst/restructuredtext.html#sections
@@ -76,12 +82,15 @@
     {
         $this->tokens = array(
             ezcDocumentRstToken::WHITESPACE =>
-                '(\\A(?P<value>[ \\t]+))',
+                '(\\A(?P<value>[' . self::WHITESPACE_CHARS . ']+))',
             ezcDocumentRstToken::NEWLINE =>
                 '(\\A(?P<value>\\r\\n|\\r|\\n))',
 
             ezcDocumentRstToken::HEADLINE =>
                 '(\\A(?P<value>([' . self::HEADLINE_CHARS . '])\\2*)$)m',
+
+            ezcDocumentRstToken::BULLET_POINT =>
+                '(\\A(?P<value>[' . self::BULLET_LIST_CHARS . '][' . 
self::WHITESPACE_CHARS . ']))u',
 
             // This should be last match
             ezcDocumentRstToken::TEXT_LINE =>
@@ -166,6 +175,8 @@
 
                 if ( preg_match( $expression, $string, $matches ) )
                 {
+//                    echo "- Matched token $token (" . $matches['value'] . 
":" . strlen( $matches['value'] ) . ")\n";
+
                     // A token matched, so add the matched token to the token
                     // list and update all variables.
                     $tokens[] = new ezcDocumentRstToken(

Modified: experimental/Document/tests/document_rst_tokenizer_tests.php
==============================================================================
--- experimental/Document/tests/document_rst_tokenizer_tests.php [iso-8859-1] 
(original)
+++ experimental/Document/tests/document_rst_tokenizer_tests.php [iso-8859-1] 
Fri Feb  8 14:59:49 2008
@@ -59,13 +59,18 @@
 
         $expected = include $to;
 
-        file_put_contents( $to . '.test', "<?php\n\nreturn " . var_export( 
$tokens, true ) . ";\n\n" );
+        // Store test file, to have something to compare on failure
+        $tempDir = $this->createTempDir( 'rst_tokenizer' ) . '/';
+        file_put_contents( $tempDir . basename( $to ), "<?php\n\nreturn " . 
var_export( $tokens, true ) . ";\n\n" );
 
         $this->assertEquals(
             $expected,
             $tokens,
             'Extracted tokens do not match expected tokens.'
         );
+
+        // Remove tempdir, when nothing failed.
+        $this->removeTempDir();
     }
 
     public function testNotExistantFile()

Propchange: experimental/Document/tests/files/rst/tokenizer/
            ('svn:ignore' removed)

Added: experimental/Document/tests/files/rst/tokenizer/bullet_list.tokens
==============================================================================
--- experimental/Document/tests/files/rst/tokenizer/bullet_list.tokens (added)
+++ experimental/Document/tests/files/rst/tokenizer/bullet_list.tokens 
[iso-8859-1] Fri Feb  8 14:59:49 2008
@@ -1,0 +1,197 @@
+<?php
+
+return array (
+  0 => 
+  ezcDocumentRstToken::__set_state(array(
+     'type' => 21,
+     'content' => '- ',
+     'line' => 1,
+     'position' => 1,
+  )),
+  1 => 
+  ezcDocumentRstToken::__set_state(array(
+     'type' => 99,
+     'content' => 'This',
+     'line' => 1,
+     'position' => 3,
+  )),
+  2 => 
+  ezcDocumentRstToken::__set_state(array(
+     'type' => 1,
+     'content' => ' ',
+     'line' => 1,
+     'position' => 7,
+  )),
+  3 => 
+  ezcDocumentRstToken::__set_state(array(
+     'type' => 99,
+     'content' => 'is',
+     'line' => 1,
+     'position' => 8,
+  )),
+  4 => 
+  ezcDocumentRstToken::__set_state(array(
+     'type' => 1,
+     'content' => ' ',
+     'line' => 1,
+     'position' => 10,
+  )),
+  5 => 
+  ezcDocumentRstToken::__set_state(array(
+     'type' => 99,
+     'content' => 'a',
+     'line' => 1,
+     'position' => 11,
+  )),
+  6 => 
+  ezcDocumentRstToken::__set_state(array(
+     'type' => 1,
+     'content' => ' ',
+     'line' => 1,
+     'position' => 12,
+  )),
+  7 => 
+  ezcDocumentRstToken::__set_state(array(
+     'type' => 99,
+     'content' => 'bullet',
+     'line' => 1,
+     'position' => 13,
+  )),
+  8 => 
+  ezcDocumentRstToken::__set_state(array(
+     'type' => 1,
+     'content' => ' ',
+     'line' => 1,
+     'position' => 19,
+  )),
+  9 => 
+  ezcDocumentRstToken::__set_state(array(
+     'type' => 99,
+     'content' => 'list.',
+     'line' => 1,
+     'position' => 20,
+  )),
+  10 => 
+  ezcDocumentRstToken::__set_state(array(
+     'type' => 2,
+     'content' => '
+',
+     'line' => 1,
+     'position' => 25,
+  )),
+  11 => 
+  ezcDocumentRstToken::__set_state(array(
+     'type' => 2,
+     'content' => '
+',
+     'line' => 2,
+     'position' => 1,
+  )),
+  12 => 
+  ezcDocumentRstToken::__set_state(array(
+     'type' => 21,
+     'content' => '- ',
+     'line' => 3,
+     'position' => 1,
+  )),
+  13 => 
+  ezcDocumentRstToken::__set_state(array(
+     'type' => 99,
+     'content' => 'Bullets',
+     'line' => 3,
+     'position' => 3,
+  )),
+  14 => 
+  ezcDocumentRstToken::__set_state(array(
+     'type' => 1,
+     'content' => ' ',
+     'line' => 3,
+     'position' => 10,
+  )),
+  15 => 
+  ezcDocumentRstToken::__set_state(array(
+     'type' => 99,
+     'content' => 'can',
+     'line' => 3,
+     'position' => 11,
+  )),
+  16 => 
+  ezcDocumentRstToken::__set_state(array(
+     'type' => 1,
+     'content' => ' ',
+     'line' => 3,
+     'position' => 14,
+  )),
+  17 => 
+  ezcDocumentRstToken::__set_state(array(
+     'type' => 99,
+     'content' => 'be',
+     'line' => 3,
+     'position' => 15,
+  )),
+  18 => 
+  ezcDocumentRstToken::__set_state(array(
+     'type' => 1,
+     'content' => ' ',
+     'line' => 3,
+     'position' => 17,
+  )),
+  19 => 
+  ezcDocumentRstToken::__set_state(array(
+     'type' => 99,
+     'content' => '"*",',
+     'line' => 3,
+     'position' => 18,
+  )),
+  20 => 
+  ezcDocumentRstToken::__set_state(array(
+     'type' => 1,
+     'content' => ' ',
+     'line' => 3,
+     'position' => 22,
+  )),
+  21 => 
+  ezcDocumentRstToken::__set_state(array(
+     'type' => 99,
+     'content' => '"+",',
+     'line' => 3,
+     'position' => 23,
+  )),
+  22 => 
+  ezcDocumentRstToken::__set_state(array(
+     'type' => 1,
+     'content' => ' ',
+     'line' => 3,
+     'position' => 27,
+  )),
+  23 => 
+  ezcDocumentRstToken::__set_state(array(
+     'type' => 99,
+     'content' => 'or',
+     'line' => 3,
+     'position' => 28,
+  )),
+  24 => 
+  ezcDocumentRstToken::__set_state(array(
+     'type' => 1,
+     'content' => ' ',
+     'line' => 3,
+     'position' => 30,
+  )),
+  25 => 
+  ezcDocumentRstToken::__set_state(array(
+     'type' => 99,
+     'content' => '"-".',
+     'line' => 3,
+     'position' => 31,
+  )),
+  26 => 
+  ezcDocumentRstToken::__set_state(array(
+     'type' => 2,
+     'content' => '
+',
+     'line' => 3,
+     'position' => 35,
+  )),
+);
+

Added: experimental/Document/tests/files/rst/tokenizer/definition_list.tokens
==============================================================================
--- experimental/Document/tests/files/rst/tokenizer/definition_list.tokens 
(added)
+++ experimental/Document/tests/files/rst/tokenizer/definition_list.tokens 
[iso-8859-1] Fri Feb  8 14:59:49 2008
@@ -1,0 +1,495 @@
+<?php
+
+return array (
+  0 => 
+  ezcDocumentRstToken::__set_state(array(
+     'type' => 99,
+     'content' => 'what',
+     'line' => 1,
+     'position' => 1,
+  )),
+  1 => 
+  ezcDocumentRstToken::__set_state(array(
+     'type' => 2,
+     'content' => '
+',
+     'line' => 1,
+     'position' => 5,
+  )),
+  2 => 
+  ezcDocumentRstToken::__set_state(array(
+     'type' => 1,
+     'content' => '    ',
+     'line' => 2,
+     'position' => 1,
+  )),
+  3 => 
+  ezcDocumentRstToken::__set_state(array(
+     'type' => 99,
+     'content' => 'Definition',
+     'line' => 2,
+     'position' => 5,
+  )),
+  4 => 
+  ezcDocumentRstToken::__set_state(array(
+     'type' => 1,
+     'content' => ' ',
+     'line' => 2,
+     'position' => 15,
+  )),
+  5 => 
+  ezcDocumentRstToken::__set_state(array(
+     'type' => 99,
+     'content' => 'lists',
+     'line' => 2,
+     'position' => 16,
+  )),
+  6 => 
+  ezcDocumentRstToken::__set_state(array(
+     'type' => 1,
+     'content' => ' ',
+     'line' => 2,
+     'position' => 21,
+  )),
+  7 => 
+  ezcDocumentRstToken::__set_state(array(
+     'type' => 99,
+     'content' => 'associate',
+     'line' => 2,
+     'position' => 22,
+  )),
+  8 => 
+  ezcDocumentRstToken::__set_state(array(
+     'type' => 1,
+     'content' => ' ',
+     'line' => 2,
+     'position' => 31,
+  )),
+  9 => 
+  ezcDocumentRstToken::__set_state(array(
+     'type' => 99,
+     'content' => 'a',
+     'line' => 2,
+     'position' => 32,
+  )),
+  10 => 
+  ezcDocumentRstToken::__set_state(array(
+     'type' => 1,
+     'content' => ' ',
+     'line' => 2,
+     'position' => 33,
+  )),
+  11 => 
+  ezcDocumentRstToken::__set_state(array(
+     'type' => 99,
+     'content' => 'term',
+     'line' => 2,
+     'position' => 34,
+  )),
+  12 => 
+  ezcDocumentRstToken::__set_state(array(
+     'type' => 1,
+     'content' => ' ',
+     'line' => 2,
+     'position' => 38,
+  )),
+  13 => 
+  ezcDocumentRstToken::__set_state(array(
+     'type' => 99,
+     'content' => 'with',
+     'line' => 2,
+     'position' => 39,
+  )),
+  14 => 
+  ezcDocumentRstToken::__set_state(array(
+     'type' => 1,
+     'content' => ' ',
+     'line' => 2,
+     'position' => 43,
+  )),
+  15 => 
+  ezcDocumentRstToken::__set_state(array(
+     'type' => 99,
+     'content' => 'a',
+     'line' => 2,
+     'position' => 44,
+  )),
+  16 => 
+  ezcDocumentRstToken::__set_state(array(
+     'type' => 1,
+     'content' => ' ',
+     'line' => 2,
+     'position' => 45,
+  )),
+  17 => 
+  ezcDocumentRstToken::__set_state(array(
+     'type' => 99,
+     'content' => 'definition.',
+     'line' => 2,
+     'position' => 46,
+  )),
+  18 => 
+  ezcDocumentRstToken::__set_state(array(
+     'type' => 2,
+     'content' => '
+',
+     'line' => 2,
+     'position' => 57,
+  )),
+  19 => 
+  ezcDocumentRstToken::__set_state(array(
+     'type' => 2,
+     'content' => '
+',
+     'line' => 3,
+     'position' => 1,
+  )),
+  20 => 
+  ezcDocumentRstToken::__set_state(array(
+     'type' => 99,
+     'content' => 'how',
+     'line' => 4,
+     'position' => 1,
+  )),
+  21 => 
+  ezcDocumentRstToken::__set_state(array(
+     'type' => 2,
+     'content' => '
+',
+     'line' => 4,
+     'position' => 4,
+  )),
+  22 => 
+  ezcDocumentRstToken::__set_state(array(
+     'type' => 1,
+     'content' => '    ',
+     'line' => 5,
+     'position' => 1,
+  )),
+  23 => 
+  ezcDocumentRstToken::__set_state(array(
+     'type' => 99,
+     'content' => 'The',
+     'line' => 5,
+     'position' => 5,
+  )),
+  24 => 
+  ezcDocumentRstToken::__set_state(array(
+     'type' => 1,
+     'content' => ' ',
+     'line' => 5,
+     'position' => 8,
+  )),
+  25 => 
+  ezcDocumentRstToken::__set_state(array(
+     'type' => 99,
+     'content' => 'term',
+     'line' => 5,
+     'position' => 9,
+  )),
+  26 => 
+  ezcDocumentRstToken::__set_state(array(
+     'type' => 1,
+     'content' => ' ',
+     'line' => 5,
+     'position' => 13,
+  )),
+  27 => 
+  ezcDocumentRstToken::__set_state(array(
+     'type' => 99,
+     'content' => 'is',
+     'line' => 5,
+     'position' => 14,
+  )),
+  28 => 
+  ezcDocumentRstToken::__set_state(array(
+     'type' => 1,
+     'content' => ' ',
+     'line' => 5,
+     'position' => 16,
+  )),
+  29 => 
+  ezcDocumentRstToken::__set_state(array(
+     'type' => 99,
+     'content' => 'a',
+     'line' => 5,
+     'position' => 17,
+  )),
+  30 => 
+  ezcDocumentRstToken::__set_state(array(
+     'type' => 1,
+     'content' => ' ',
+     'line' => 5,
+     'position' => 18,
+  )),
+  31 => 
+  ezcDocumentRstToken::__set_state(array(
+     'type' => 99,
+     'content' => 'one-line',
+     'line' => 5,
+     'position' => 19,
+  )),
+  32 => 
+  ezcDocumentRstToken::__set_state(array(
+     'type' => 1,
+     'content' => ' ',
+     'line' => 5,
+     'position' => 27,
+  )),
+  33 => 
+  ezcDocumentRstToken::__set_state(array(
+     'type' => 99,
+     'content' => 'phrase,',
+     'line' => 5,
+     'position' => 28,
+  )),
+  34 => 
+  ezcDocumentRstToken::__set_state(array(
+     'type' => 1,
+     'content' => ' ',
+     'line' => 5,
+     'position' => 35,
+  )),
+  35 => 
+  ezcDocumentRstToken::__set_state(array(
+     'type' => 99,
+     'content' => 'and',
+     'line' => 5,
+     'position' => 36,
+  )),
+  36 => 
+  ezcDocumentRstToken::__set_state(array(
+     'type' => 1,
+     'content' => ' ',
+     'line' => 5,
+     'position' => 39,
+  )),
+  37 => 
+  ezcDocumentRstToken::__set_state(array(
+     'type' => 99,
+     'content' => 'the',
+     'line' => 5,
+     'position' => 40,
+  )),
+  38 => 
+  ezcDocumentRstToken::__set_state(array(
+     'type' => 1,
+     'content' => ' ',
+     'line' => 5,
+     'position' => 43,
+  )),
+  39 => 
+  ezcDocumentRstToken::__set_state(array(
+     'type' => 99,
+     'content' => 'definition',
+     'line' => 5,
+     'position' => 44,
+  )),
+  40 => 
+  ezcDocumentRstToken::__set_state(array(
+     'type' => 1,
+     'content' => ' ',
+     'line' => 5,
+     'position' => 54,
+  )),
+  41 => 
+  ezcDocumentRstToken::__set_state(array(
+     'type' => 99,
+     'content' => 'is',
+     'line' => 5,
+     'position' => 55,
+  )),
+  42 => 
+  ezcDocumentRstToken::__set_state(array(
+     'type' => 1,
+     'content' => ' ',
+     'line' => 5,
+     'position' => 57,
+  )),
+  43 => 
+  ezcDocumentRstToken::__set_state(array(
+     'type' => 99,
+     'content' => 'one',
+     'line' => 5,
+     'position' => 58,
+  )),
+  44 => 
+  ezcDocumentRstToken::__set_state(array(
+     'type' => 2,
+     'content' => '
+',
+     'line' => 5,
+     'position' => 61,
+  )),
+  45 => 
+  ezcDocumentRstToken::__set_state(array(
+     'type' => 1,
+     'content' => '    ',
+     'line' => 6,
+     'position' => 1,
+  )),
+  46 => 
+  ezcDocumentRstToken::__set_state(array(
+     'type' => 99,
+     'content' => 'or',
+     'line' => 6,
+     'position' => 5,
+  )),
+  47 => 
+  ezcDocumentRstToken::__set_state(array(
+     'type' => 1,
+     'content' => ' ',
+     'line' => 6,
+     'position' => 7,
+  )),
+  48 => 
+  ezcDocumentRstToken::__set_state(array(
+     'type' => 99,
+     'content' => 'more',
+     'line' => 6,
+     'position' => 8,
+  )),
+  49 => 
+  ezcDocumentRstToken::__set_state(array(
+     'type' => 1,
+     'content' => ' ',
+     'line' => 6,
+     'position' => 12,
+  )),
+  50 => 
+  ezcDocumentRstToken::__set_state(array(
+     'type' => 99,
+     'content' => 'paragraphs',
+     'line' => 6,
+     'position' => 13,
+  )),
+  51 => 
+  ezcDocumentRstToken::__set_state(array(
+     'type' => 1,
+     'content' => ' ',
+     'line' => 6,
+     'position' => 23,
+  )),
+  52 => 
+  ezcDocumentRstToken::__set_state(array(
+     'type' => 99,
+     'content' => 'or',
+     'line' => 6,
+     'position' => 24,
+  )),
+  53 => 
+  ezcDocumentRstToken::__set_state(array(
+     'type' => 1,
+     'content' => ' ',
+     'line' => 6,
+     'position' => 26,
+  )),
+  54 => 
+  ezcDocumentRstToken::__set_state(array(
+     'type' => 99,
+     'content' => 'body',
+     'line' => 6,
+     'position' => 27,
+  )),
+  55 => 
+  ezcDocumentRstToken::__set_state(array(
+     'type' => 1,
+     'content' => ' ',
+     'line' => 6,
+     'position' => 31,
+  )),
+  56 => 
+  ezcDocumentRstToken::__set_state(array(
+     'type' => 99,
+     'content' => 'elements,',
+     'line' => 6,
+     'position' => 32,
+  )),
+  57 => 
+  ezcDocumentRstToken::__set_state(array(
+     'type' => 1,
+     'content' => ' ',
+     'line' => 6,
+     'position' => 41,
+  )),
+  58 => 
+  ezcDocumentRstToken::__set_state(array(
+     'type' => 99,
+     'content' => 'indented',
+     'line' => 6,
+     'position' => 42,
+  )),
+  59 => 
+  ezcDocumentRstToken::__set_state(array(
+     'type' => 1,
+     'content' => ' ',
+     'line' => 6,
+     'position' => 50,
+  )),
+  60 => 
+  ezcDocumentRstToken::__set_state(array(
+     'type' => 99,
+     'content' => 'relative',
+     'line' => 6,
+     'position' => 51,
+  )),
+  61 => 
+  ezcDocumentRstToken::__set_state(array(
+     'type' => 1,
+     'content' => ' ',
+     'line' => 6,
+     'position' => 59,
+  )),
+  62 => 
+  ezcDocumentRstToken::__set_state(array(
+     'type' => 99,
+     'content' => 'to',
+     'line' => 6,
+     'position' => 60,
+  )),
+  63 => 
+  ezcDocumentRstToken::__set_state(array(
+     'type' => 2,
+     'content' => '
+',
+     'line' => 6,
+     'position' => 62,
+  )),
+  64 => 
+  ezcDocumentRstToken::__set_state(array(
+     'type' => 1,
+     'content' => '    ',
+     'line' => 7,
+     'position' => 1,
+  )),
+  65 => 
+  ezcDocumentRstToken::__set_state(array(
+     'type' => 99,
+     'content' => 'the',
+     'line' => 7,
+     'position' => 5,
+  )),
+  66 => 
+  ezcDocumentRstToken::__set_state(array(
+     'type' => 1,
+     'content' => ' ',
+     'line' => 7,
+     'position' => 8,
+  )),
+  67 => 
+  ezcDocumentRstToken::__set_state(array(
+     'type' => 99,
+     'content' => 'term.',
+     'line' => 7,
+     'position' => 9,
+  )),
+  68 => 
+  ezcDocumentRstToken::__set_state(array(
+     'type' => 2,
+     'content' => '
+',
+     'line' => 7,
+     'position' => 14,
+  )),
+);
+


-- 
svn-components mailing list
svn-components@lists.ez.no
http://lists.ez.no/mailman/listinfo/svn-components

Reply via email to