Author: kn
Date: Fri Feb 8 14:59:49 2008
New Revision: 7325
Log:
- Added support for bullet lists in tokenizer
Added:
experimental/Document/tests/files/rst/tokenizer/bullet_list.tokens
experimental/Document/tests/files/rst/tokenizer/definition_list.tokens
Modified:
experimental/Document/src/document/rst/token.php
experimental/Document/src/document/rst/tokenizer.php
experimental/Document/tests/document_rst_tokenizer_tests.php
experimental/Document/tests/files/rst/tokenizer/ (props changed)
Modified: experimental/Document/src/document/rst/token.php
==============================================================================
--- experimental/Document/src/document/rst/token.php [iso-8859-1] (original)
+++ experimental/Document/src/document/rst/token.php [iso-8859-1] Fri Feb 8
14:59:49 2008
@@ -23,6 +23,8 @@
const NEWLINE = 2;
const HEADLINE = 11;
+
+ const BULLET_POINT = 21;
const TEXT_LINE = 99;
Modified: experimental/Document/src/document/rst/tokenizer.php
==============================================================================
--- experimental/Document/src/document/rst/tokenizer.php [iso-8859-1] (original)
+++ experimental/Document/src/document/rst/tokenizer.php [iso-8859-1] Fri Feb
8 14:59:49 2008
@@ -19,6 +19,12 @@
class ezcDocumentRstTokenizer
{
/**
+ * Common whitespace characters. The vertical tab is excluded, because it
+ * causes strange problems with PCRE.
+ */
+ const WHITESPACE_CHARS = ' \\t';
+
+ /**
* Allowed character sets for headlines.
*
* @see
http://docutils.sourceforge.net/docs/ref/rst/restructuredtext.html#sections
@@ -76,12 +82,15 @@
{
$this->tokens = array(
ezcDocumentRstToken::WHITESPACE =>
- '(\\A(?P<value>[ \\t]+))',
+ '(\\A(?P<value>[' . self::WHITESPACE_CHARS . ']+))',
ezcDocumentRstToken::NEWLINE =>
'(\\A(?P<value>\\r\\n|\\r|\\n))',
ezcDocumentRstToken::HEADLINE =>
'(\\A(?P<value>([' . self::HEADLINE_CHARS . '])\\2*)$)m',
+
+ ezcDocumentRstToken::BULLET_POINT =>
+ '(\\A(?P<value>[' . self::BULLET_LIST_CHARS . '][' .
self::WHITESPACE_CHARS . ']))u',
// This should be last match
ezcDocumentRstToken::TEXT_LINE =>
@@ -166,6 +175,8 @@
if ( preg_match( $expression, $string, $matches ) )
{
+// echo "- Matched token $token (" . $matches['value'] .
":" . strlen( $matches['value'] ) . ")\n";
+
// A token matched, so add the matched token to the token
// list and update all variables.
$tokens[] = new ezcDocumentRstToken(
Modified: experimental/Document/tests/document_rst_tokenizer_tests.php
==============================================================================
--- experimental/Document/tests/document_rst_tokenizer_tests.php [iso-8859-1]
(original)
+++ experimental/Document/tests/document_rst_tokenizer_tests.php [iso-8859-1]
Fri Feb 8 14:59:49 2008
@@ -59,13 +59,18 @@
$expected = include $to;
- file_put_contents( $to . '.test', "<?php\n\nreturn " . var_export(
$tokens, true ) . ";\n\n" );
+ // Store test file, to have something to compare on failure
+ $tempDir = $this->createTempDir( 'rst_tokenizer' ) . '/';
+ file_put_contents( $tempDir . basename( $to ), "<?php\n\nreturn " .
var_export( $tokens, true ) . ";\n\n" );
$this->assertEquals(
$expected,
$tokens,
'Extracted tokens do not match expected tokens.'
);
+
+ // Remove tempdir, when nothing failed.
+ $this->removeTempDir();
}
public function testNotExistantFile()
Propchange: experimental/Document/tests/files/rst/tokenizer/
('svn:ignore' removed)
Added: experimental/Document/tests/files/rst/tokenizer/bullet_list.tokens
==============================================================================
--- experimental/Document/tests/files/rst/tokenizer/bullet_list.tokens (added)
+++ experimental/Document/tests/files/rst/tokenizer/bullet_list.tokens
[iso-8859-1] Fri Feb 8 14:59:49 2008
@@ -1,0 +1,197 @@
+<?php
+
+return array (
+ 0 =>
+ ezcDocumentRstToken::__set_state(array(
+ 'type' => 21,
+ 'content' => '- ',
+ 'line' => 1,
+ 'position' => 1,
+ )),
+ 1 =>
+ ezcDocumentRstToken::__set_state(array(
+ 'type' => 99,
+ 'content' => 'This',
+ 'line' => 1,
+ 'position' => 3,
+ )),
+ 2 =>
+ ezcDocumentRstToken::__set_state(array(
+ 'type' => 1,
+ 'content' => ' ',
+ 'line' => 1,
+ 'position' => 7,
+ )),
+ 3 =>
+ ezcDocumentRstToken::__set_state(array(
+ 'type' => 99,
+ 'content' => 'is',
+ 'line' => 1,
+ 'position' => 8,
+ )),
+ 4 =>
+ ezcDocumentRstToken::__set_state(array(
+ 'type' => 1,
+ 'content' => ' ',
+ 'line' => 1,
+ 'position' => 10,
+ )),
+ 5 =>
+ ezcDocumentRstToken::__set_state(array(
+ 'type' => 99,
+ 'content' => 'a',
+ 'line' => 1,
+ 'position' => 11,
+ )),
+ 6 =>
+ ezcDocumentRstToken::__set_state(array(
+ 'type' => 1,
+ 'content' => ' ',
+ 'line' => 1,
+ 'position' => 12,
+ )),
+ 7 =>
+ ezcDocumentRstToken::__set_state(array(
+ 'type' => 99,
+ 'content' => 'bullet',
+ 'line' => 1,
+ 'position' => 13,
+ )),
+ 8 =>
+ ezcDocumentRstToken::__set_state(array(
+ 'type' => 1,
+ 'content' => ' ',
+ 'line' => 1,
+ 'position' => 19,
+ )),
+ 9 =>
+ ezcDocumentRstToken::__set_state(array(
+ 'type' => 99,
+ 'content' => 'list.',
+ 'line' => 1,
+ 'position' => 20,
+ )),
+ 10 =>
+ ezcDocumentRstToken::__set_state(array(
+ 'type' => 2,
+ 'content' => '
+',
+ 'line' => 1,
+ 'position' => 25,
+ )),
+ 11 =>
+ ezcDocumentRstToken::__set_state(array(
+ 'type' => 2,
+ 'content' => '
+',
+ 'line' => 2,
+ 'position' => 1,
+ )),
+ 12 =>
+ ezcDocumentRstToken::__set_state(array(
+ 'type' => 21,
+ 'content' => '- ',
+ 'line' => 3,
+ 'position' => 1,
+ )),
+ 13 =>
+ ezcDocumentRstToken::__set_state(array(
+ 'type' => 99,
+ 'content' => 'Bullets',
+ 'line' => 3,
+ 'position' => 3,
+ )),
+ 14 =>
+ ezcDocumentRstToken::__set_state(array(
+ 'type' => 1,
+ 'content' => ' ',
+ 'line' => 3,
+ 'position' => 10,
+ )),
+ 15 =>
+ ezcDocumentRstToken::__set_state(array(
+ 'type' => 99,
+ 'content' => 'can',
+ 'line' => 3,
+ 'position' => 11,
+ )),
+ 16 =>
+ ezcDocumentRstToken::__set_state(array(
+ 'type' => 1,
+ 'content' => ' ',
+ 'line' => 3,
+ 'position' => 14,
+ )),
+ 17 =>
+ ezcDocumentRstToken::__set_state(array(
+ 'type' => 99,
+ 'content' => 'be',
+ 'line' => 3,
+ 'position' => 15,
+ )),
+ 18 =>
+ ezcDocumentRstToken::__set_state(array(
+ 'type' => 1,
+ 'content' => ' ',
+ 'line' => 3,
+ 'position' => 17,
+ )),
+ 19 =>
+ ezcDocumentRstToken::__set_state(array(
+ 'type' => 99,
+ 'content' => '"*",',
+ 'line' => 3,
+ 'position' => 18,
+ )),
+ 20 =>
+ ezcDocumentRstToken::__set_state(array(
+ 'type' => 1,
+ 'content' => ' ',
+ 'line' => 3,
+ 'position' => 22,
+ )),
+ 21 =>
+ ezcDocumentRstToken::__set_state(array(
+ 'type' => 99,
+ 'content' => '"+",',
+ 'line' => 3,
+ 'position' => 23,
+ )),
+ 22 =>
+ ezcDocumentRstToken::__set_state(array(
+ 'type' => 1,
+ 'content' => ' ',
+ 'line' => 3,
+ 'position' => 27,
+ )),
+ 23 =>
+ ezcDocumentRstToken::__set_state(array(
+ 'type' => 99,
+ 'content' => 'or',
+ 'line' => 3,
+ 'position' => 28,
+ )),
+ 24 =>
+ ezcDocumentRstToken::__set_state(array(
+ 'type' => 1,
+ 'content' => ' ',
+ 'line' => 3,
+ 'position' => 30,
+ )),
+ 25 =>
+ ezcDocumentRstToken::__set_state(array(
+ 'type' => 99,
+ 'content' => '"-".',
+ 'line' => 3,
+ 'position' => 31,
+ )),
+ 26 =>
+ ezcDocumentRstToken::__set_state(array(
+ 'type' => 2,
+ 'content' => '
+',
+ 'line' => 3,
+ 'position' => 35,
+ )),
+);
+
Added: experimental/Document/tests/files/rst/tokenizer/definition_list.tokens
==============================================================================
--- experimental/Document/tests/files/rst/tokenizer/definition_list.tokens
(added)
+++ experimental/Document/tests/files/rst/tokenizer/definition_list.tokens
[iso-8859-1] Fri Feb 8 14:59:49 2008
@@ -1,0 +1,495 @@
+<?php
+
+return array (
+ 0 =>
+ ezcDocumentRstToken::__set_state(array(
+ 'type' => 99,
+ 'content' => 'what',
+ 'line' => 1,
+ 'position' => 1,
+ )),
+ 1 =>
+ ezcDocumentRstToken::__set_state(array(
+ 'type' => 2,
+ 'content' => '
+',
+ 'line' => 1,
+ 'position' => 5,
+ )),
+ 2 =>
+ ezcDocumentRstToken::__set_state(array(
+ 'type' => 1,
+ 'content' => ' ',
+ 'line' => 2,
+ 'position' => 1,
+ )),
+ 3 =>
+ ezcDocumentRstToken::__set_state(array(
+ 'type' => 99,
+ 'content' => 'Definition',
+ 'line' => 2,
+ 'position' => 5,
+ )),
+ 4 =>
+ ezcDocumentRstToken::__set_state(array(
+ 'type' => 1,
+ 'content' => ' ',
+ 'line' => 2,
+ 'position' => 15,
+ )),
+ 5 =>
+ ezcDocumentRstToken::__set_state(array(
+ 'type' => 99,
+ 'content' => 'lists',
+ 'line' => 2,
+ 'position' => 16,
+ )),
+ 6 =>
+ ezcDocumentRstToken::__set_state(array(
+ 'type' => 1,
+ 'content' => ' ',
+ 'line' => 2,
+ 'position' => 21,
+ )),
+ 7 =>
+ ezcDocumentRstToken::__set_state(array(
+ 'type' => 99,
+ 'content' => 'associate',
+ 'line' => 2,
+ 'position' => 22,
+ )),
+ 8 =>
+ ezcDocumentRstToken::__set_state(array(
+ 'type' => 1,
+ 'content' => ' ',
+ 'line' => 2,
+ 'position' => 31,
+ )),
+ 9 =>
+ ezcDocumentRstToken::__set_state(array(
+ 'type' => 99,
+ 'content' => 'a',
+ 'line' => 2,
+ 'position' => 32,
+ )),
+ 10 =>
+ ezcDocumentRstToken::__set_state(array(
+ 'type' => 1,
+ 'content' => ' ',
+ 'line' => 2,
+ 'position' => 33,
+ )),
+ 11 =>
+ ezcDocumentRstToken::__set_state(array(
+ 'type' => 99,
+ 'content' => 'term',
+ 'line' => 2,
+ 'position' => 34,
+ )),
+ 12 =>
+ ezcDocumentRstToken::__set_state(array(
+ 'type' => 1,
+ 'content' => ' ',
+ 'line' => 2,
+ 'position' => 38,
+ )),
+ 13 =>
+ ezcDocumentRstToken::__set_state(array(
+ 'type' => 99,
+ 'content' => 'with',
+ 'line' => 2,
+ 'position' => 39,
+ )),
+ 14 =>
+ ezcDocumentRstToken::__set_state(array(
+ 'type' => 1,
+ 'content' => ' ',
+ 'line' => 2,
+ 'position' => 43,
+ )),
+ 15 =>
+ ezcDocumentRstToken::__set_state(array(
+ 'type' => 99,
+ 'content' => 'a',
+ 'line' => 2,
+ 'position' => 44,
+ )),
+ 16 =>
+ ezcDocumentRstToken::__set_state(array(
+ 'type' => 1,
+ 'content' => ' ',
+ 'line' => 2,
+ 'position' => 45,
+ )),
+ 17 =>
+ ezcDocumentRstToken::__set_state(array(
+ 'type' => 99,
+ 'content' => 'definition.',
+ 'line' => 2,
+ 'position' => 46,
+ )),
+ 18 =>
+ ezcDocumentRstToken::__set_state(array(
+ 'type' => 2,
+ 'content' => '
+',
+ 'line' => 2,
+ 'position' => 57,
+ )),
+ 19 =>
+ ezcDocumentRstToken::__set_state(array(
+ 'type' => 2,
+ 'content' => '
+',
+ 'line' => 3,
+ 'position' => 1,
+ )),
+ 20 =>
+ ezcDocumentRstToken::__set_state(array(
+ 'type' => 99,
+ 'content' => 'how',
+ 'line' => 4,
+ 'position' => 1,
+ )),
+ 21 =>
+ ezcDocumentRstToken::__set_state(array(
+ 'type' => 2,
+ 'content' => '
+',
+ 'line' => 4,
+ 'position' => 4,
+ )),
+ 22 =>
+ ezcDocumentRstToken::__set_state(array(
+ 'type' => 1,
+ 'content' => ' ',
+ 'line' => 5,
+ 'position' => 1,
+ )),
+ 23 =>
+ ezcDocumentRstToken::__set_state(array(
+ 'type' => 99,
+ 'content' => 'The',
+ 'line' => 5,
+ 'position' => 5,
+ )),
+ 24 =>
+ ezcDocumentRstToken::__set_state(array(
+ 'type' => 1,
+ 'content' => ' ',
+ 'line' => 5,
+ 'position' => 8,
+ )),
+ 25 =>
+ ezcDocumentRstToken::__set_state(array(
+ 'type' => 99,
+ 'content' => 'term',
+ 'line' => 5,
+ 'position' => 9,
+ )),
+ 26 =>
+ ezcDocumentRstToken::__set_state(array(
+ 'type' => 1,
+ 'content' => ' ',
+ 'line' => 5,
+ 'position' => 13,
+ )),
+ 27 =>
+ ezcDocumentRstToken::__set_state(array(
+ 'type' => 99,
+ 'content' => 'is',
+ 'line' => 5,
+ 'position' => 14,
+ )),
+ 28 =>
+ ezcDocumentRstToken::__set_state(array(
+ 'type' => 1,
+ 'content' => ' ',
+ 'line' => 5,
+ 'position' => 16,
+ )),
+ 29 =>
+ ezcDocumentRstToken::__set_state(array(
+ 'type' => 99,
+ 'content' => 'a',
+ 'line' => 5,
+ 'position' => 17,
+ )),
+ 30 =>
+ ezcDocumentRstToken::__set_state(array(
+ 'type' => 1,
+ 'content' => ' ',
+ 'line' => 5,
+ 'position' => 18,
+ )),
+ 31 =>
+ ezcDocumentRstToken::__set_state(array(
+ 'type' => 99,
+ 'content' => 'one-line',
+ 'line' => 5,
+ 'position' => 19,
+ )),
+ 32 =>
+ ezcDocumentRstToken::__set_state(array(
+ 'type' => 1,
+ 'content' => ' ',
+ 'line' => 5,
+ 'position' => 27,
+ )),
+ 33 =>
+ ezcDocumentRstToken::__set_state(array(
+ 'type' => 99,
+ 'content' => 'phrase,',
+ 'line' => 5,
+ 'position' => 28,
+ )),
+ 34 =>
+ ezcDocumentRstToken::__set_state(array(
+ 'type' => 1,
+ 'content' => ' ',
+ 'line' => 5,
+ 'position' => 35,
+ )),
+ 35 =>
+ ezcDocumentRstToken::__set_state(array(
+ 'type' => 99,
+ 'content' => 'and',
+ 'line' => 5,
+ 'position' => 36,
+ )),
+ 36 =>
+ ezcDocumentRstToken::__set_state(array(
+ 'type' => 1,
+ 'content' => ' ',
+ 'line' => 5,
+ 'position' => 39,
+ )),
+ 37 =>
+ ezcDocumentRstToken::__set_state(array(
+ 'type' => 99,
+ 'content' => 'the',
+ 'line' => 5,
+ 'position' => 40,
+ )),
+ 38 =>
+ ezcDocumentRstToken::__set_state(array(
+ 'type' => 1,
+ 'content' => ' ',
+ 'line' => 5,
+ 'position' => 43,
+ )),
+ 39 =>
+ ezcDocumentRstToken::__set_state(array(
+ 'type' => 99,
+ 'content' => 'definition',
+ 'line' => 5,
+ 'position' => 44,
+ )),
+ 40 =>
+ ezcDocumentRstToken::__set_state(array(
+ 'type' => 1,
+ 'content' => ' ',
+ 'line' => 5,
+ 'position' => 54,
+ )),
+ 41 =>
+ ezcDocumentRstToken::__set_state(array(
+ 'type' => 99,
+ 'content' => 'is',
+ 'line' => 5,
+ 'position' => 55,
+ )),
+ 42 =>
+ ezcDocumentRstToken::__set_state(array(
+ 'type' => 1,
+ 'content' => ' ',
+ 'line' => 5,
+ 'position' => 57,
+ )),
+ 43 =>
+ ezcDocumentRstToken::__set_state(array(
+ 'type' => 99,
+ 'content' => 'one',
+ 'line' => 5,
+ 'position' => 58,
+ )),
+ 44 =>
+ ezcDocumentRstToken::__set_state(array(
+ 'type' => 2,
+ 'content' => '
+',
+ 'line' => 5,
+ 'position' => 61,
+ )),
+ 45 =>
+ ezcDocumentRstToken::__set_state(array(
+ 'type' => 1,
+ 'content' => ' ',
+ 'line' => 6,
+ 'position' => 1,
+ )),
+ 46 =>
+ ezcDocumentRstToken::__set_state(array(
+ 'type' => 99,
+ 'content' => 'or',
+ 'line' => 6,
+ 'position' => 5,
+ )),
+ 47 =>
+ ezcDocumentRstToken::__set_state(array(
+ 'type' => 1,
+ 'content' => ' ',
+ 'line' => 6,
+ 'position' => 7,
+ )),
+ 48 =>
+ ezcDocumentRstToken::__set_state(array(
+ 'type' => 99,
+ 'content' => 'more',
+ 'line' => 6,
+ 'position' => 8,
+ )),
+ 49 =>
+ ezcDocumentRstToken::__set_state(array(
+ 'type' => 1,
+ 'content' => ' ',
+ 'line' => 6,
+ 'position' => 12,
+ )),
+ 50 =>
+ ezcDocumentRstToken::__set_state(array(
+ 'type' => 99,
+ 'content' => 'paragraphs',
+ 'line' => 6,
+ 'position' => 13,
+ )),
+ 51 =>
+ ezcDocumentRstToken::__set_state(array(
+ 'type' => 1,
+ 'content' => ' ',
+ 'line' => 6,
+ 'position' => 23,
+ )),
+ 52 =>
+ ezcDocumentRstToken::__set_state(array(
+ 'type' => 99,
+ 'content' => 'or',
+ 'line' => 6,
+ 'position' => 24,
+ )),
+ 53 =>
+ ezcDocumentRstToken::__set_state(array(
+ 'type' => 1,
+ 'content' => ' ',
+ 'line' => 6,
+ 'position' => 26,
+ )),
+ 54 =>
+ ezcDocumentRstToken::__set_state(array(
+ 'type' => 99,
+ 'content' => 'body',
+ 'line' => 6,
+ 'position' => 27,
+ )),
+ 55 =>
+ ezcDocumentRstToken::__set_state(array(
+ 'type' => 1,
+ 'content' => ' ',
+ 'line' => 6,
+ 'position' => 31,
+ )),
+ 56 =>
+ ezcDocumentRstToken::__set_state(array(
+ 'type' => 99,
+ 'content' => 'elements,',
+ 'line' => 6,
+ 'position' => 32,
+ )),
+ 57 =>
+ ezcDocumentRstToken::__set_state(array(
+ 'type' => 1,
+ 'content' => ' ',
+ 'line' => 6,
+ 'position' => 41,
+ )),
+ 58 =>
+ ezcDocumentRstToken::__set_state(array(
+ 'type' => 99,
+ 'content' => 'indented',
+ 'line' => 6,
+ 'position' => 42,
+ )),
+ 59 =>
+ ezcDocumentRstToken::__set_state(array(
+ 'type' => 1,
+ 'content' => ' ',
+ 'line' => 6,
+ 'position' => 50,
+ )),
+ 60 =>
+ ezcDocumentRstToken::__set_state(array(
+ 'type' => 99,
+ 'content' => 'relative',
+ 'line' => 6,
+ 'position' => 51,
+ )),
+ 61 =>
+ ezcDocumentRstToken::__set_state(array(
+ 'type' => 1,
+ 'content' => ' ',
+ 'line' => 6,
+ 'position' => 59,
+ )),
+ 62 =>
+ ezcDocumentRstToken::__set_state(array(
+ 'type' => 99,
+ 'content' => 'to',
+ 'line' => 6,
+ 'position' => 60,
+ )),
+ 63 =>
+ ezcDocumentRstToken::__set_state(array(
+ 'type' => 2,
+ 'content' => '
+',
+ 'line' => 6,
+ 'position' => 62,
+ )),
+ 64 =>
+ ezcDocumentRstToken::__set_state(array(
+ 'type' => 1,
+ 'content' => ' ',
+ 'line' => 7,
+ 'position' => 1,
+ )),
+ 65 =>
+ ezcDocumentRstToken::__set_state(array(
+ 'type' => 99,
+ 'content' => 'the',
+ 'line' => 7,
+ 'position' => 5,
+ )),
+ 66 =>
+ ezcDocumentRstToken::__set_state(array(
+ 'type' => 1,
+ 'content' => ' ',
+ 'line' => 7,
+ 'position' => 8,
+ )),
+ 67 =>
+ ezcDocumentRstToken::__set_state(array(
+ 'type' => 99,
+ 'content' => 'term.',
+ 'line' => 7,
+ 'position' => 9,
+ )),
+ 68 =>
+ ezcDocumentRstToken::__set_state(array(
+ 'type' => 2,
+ 'content' => '
+',
+ 'line' => 7,
+ 'position' => 14,
+ )),
+);
+
--
svn-components mailing list
[email protected]
http://lists.ez.no/mailman/listinfo/svn-components