Author: kn Date: Mon Feb 11 11:04:38 2008 New Revision: 7331 Log: - Simplified tokenizer, everything else is now left to the parser # Visually checked the created token lists for more files...
Added: experimental/Document/tests/files/rst/tokenizer/bullet_list_deep.tokens (with props) experimental/Document/tests/files/rst/tokenizer/bullet_list_incorrect.tokens (with props) experimental/Document/tests/files/rst/tokenizer/deep_block_quote.tokens (with props) experimental/Document/tests/files/rst/tokenizer/definition_list_alignements.tokens (with props) experimental/Document/tests/files/rst/tokenizer/definition_list_classifier.tokens (with props) experimental/Document/tests/files/rst/tokenizer/embedded_uris.tokens (with props) experimental/Document/tests/files/rst/tokenizer/field_list.tokens (with props) experimental/Document/tests/files/rst/tokenizer/field_list_intended.tokens (with props) experimental/Document/tests/files/rst/tokenizer/hyperlinks.tokens (with props) experimental/Document/tests/files/rst/tokenizer/inline_formatting.tokens (with props) experimental/Document/tests/files/rst/tokenizer/inline_internal_targets.tokens (with props) experimental/Document/tests/files/rst/tokenizer/line_block.tokens (with props) experimental/Document/tests/files/rst/tokenizer/line_block_indented.tokens (with props) experimental/Document/tests/files/rst/tokenizer/literal_block.tokens (with props) experimental/Document/tests/files/rst/tokenizer/literal_block_notations.tokens (with props) Modified: experimental/Document/src/document/rst/token.php experimental/Document/src/document/rst/tokenizer.php experimental/Document/tests/files/rst/tokenizer/bullet_list.tokens (contents, props changed) experimental/Document/tests/files/rst/tokenizer/colons.txt experimental/Document/tests/files/rst/tokenizer/definition_list.tokens (contents, props changed) experimental/Document/tests/files/rst/tokenizer/empty.tokens (props changed) experimental/Document/tests/files/rst/tokenizer/paragraph.tokens (contents, props changed) experimental/Document/tests/files/rst/tokenizer/titles.tokens (contents, props changed) Modified: experimental/Document/src/document/rst/token.php ============================================================================== --- experimental/Document/src/document/rst/token.php [iso-8859-1] (original) +++ experimental/Document/src/document/rst/token.php [iso-8859-1] Mon Feb 11 11:04:38 2008 @@ -22,22 +22,11 @@ const WHITESPACE = 1; const NEWLINE = 2; - const HEADLINE = 11; + const BACKSLASH = 3; - const BULLET_POINT = 21; + const SPECIAL_CHARS = 4; - const QUOTE = 50; - const SINGLE_QUOTE = 51; - const DOUBLE_QUOTE = 52; - const ASTERISK = 53; - const UNDERSCORE = 54; - const ROUND_BRACKET_OPEN = 55; - const ROUND_BRACKET_CLOSE = 56; - const SQUARE_BRACKET_OPEN = 57; - const SQUARE_BRACKET_CLOSE = 58; - const PIPE = 59; - - const TEXT_LINE = 99; + const TEXT_LINE = 5; /** * Token type Modified: experimental/Document/src/document/rst/tokenizer.php ============================================================================== --- experimental/Document/src/document/rst/tokenizer.php [iso-8859-1] (original) +++ experimental/Document/src/document/rst/tokenizer.php [iso-8859-1] Mon Feb 11 11:04:38 2008 @@ -29,37 +29,14 @@ * * @see http://docutils.sourceforge.net/docs/ref/rst/restructuredtext.html#sections */ - const HEADLINE_CHARS = '!"#$%&\'()*+,-./:;<=>[EMAIL PROTECTED]|}~'; - - /** - * Allowed character sets for table lines. - * - * @see http://docutils.sourceforge.net/docs/ref/rst/restructuredtext.html#sections - */ - const TABLE_CHARS = '!"#$%&\'()*+,-./:;<=>[EMAIL PROTECTED]|}~ '; - - /** - * Characters to start bullet lists. Prepared for inclusion in regular - * expression character groups. - * - * @see http://docutils.sourceforge.net/docs/ref/rst/restructuredtext.html#bullet-lists - */ - const BULLET_LIST_CHARS = '*+\\x{e280a2}\\x{e280a3}\\x{e28183}-'; - - /** - * Characters to start enumerated lists. Prepared for inclusion in regular - * expressions. - * - * @see http://docutils.sourceforge.net/docs/ref/rst/restructuredtext.html#enumerated-lists - */ - const ENUM_LIST_CHARS = '(?P<enum>\\d+|[A-Z]|[a-z]|[IVXLCDM]+|[ivxlcdm]+|#)'; + const SPECIAL_CHARS = '!"#$%&\'()*+,-./:;<=>[EMAIL PROTECTED]|}~'; /** * Characters ending a pure text section. * * @see http://docutils.sourceforge.net/docs/ref/rst/restructuredtext.html#enumerated-lists */ - const TEXT_END_CHARS = '`*_[\\]|()"\'\r\n'; + const TEXT_END_CHARS = '`*_\\\\[\\]|()"\':\r\n'; /** * List with tokens and a regular expression matching the given token. @@ -87,35 +64,11 @@ ezcDocumentRstToken::NEWLINE => '(\\A(?P<value>\\r\\n|\\r|\\n))', - // Document structure - ezcDocumentRstToken::HEADLINE => - '(\\A(?P<value>([' . self::HEADLINE_CHARS . '])\\2*)$)m', - - // Lists - ezcDocumentRstToken::BULLET_POINT => - '(\\A(?P<value>[' . self::BULLET_LIST_CHARS . '][' . self::WHITESPACE_CHARS . ']))u', - - // Special characters in text - ezcDocumentRstToken::QUOTE => - '(\\A(?P<value>`))', - ezcDocumentRstToken::SINGLE_QUOTE => - '(\\A(?P<value>\'))', - ezcDocumentRstToken::DOUBLE_QUOTE => - '(\\A(?P<value>\"))', - ezcDocumentRstToken::ASTERISK => - '(\\A(?P<value>\\*))', - ezcDocumentRstToken::UNDERSCORE => - '(\\A(?P<value>_))', - ezcDocumentRstToken::ROUND_BRACKET_OPEN => - '(\\A(?P<value>\\())', - ezcDocumentRstToken::ROUND_BRACKET_CLOSE => - '(\\A(?P<value>\\)))', - ezcDocumentRstToken::SQUARE_BRACKET_OPEN => - '(\\A(?P<value>\\[))', - ezcDocumentRstToken::SQUARE_BRACKET_CLOSE => - '(\\A(?P<value>]))', - ezcDocumentRstToken::PIPE => - '(\\A(?P<value>\\|))', + // Sequences of special characters + ezcDocumentRstToken::SPECIAL_CHARS => + '(\\A(?P<value>([' . self::SPECIAL_CHARS . '])\\2*))', + ezcDocumentRstToken::BACKSLASH => + '(\\A(?P<value>\\\\))', // This should be last match ezcDocumentRstToken::TEXT_LINE => Modified: experimental/Document/tests/files/rst/tokenizer/bullet_list.tokens ============================================================================== Binary files - no diff available. Propchange: experimental/Document/tests/files/rst/tokenizer/bullet_list.tokens ------------------------------------------------------------------------------ svn:mime-type = application/octet-stream Added: experimental/Document/tests/files/rst/tokenizer/bullet_list_deep.tokens ============================================================================== Binary file - no diff available. Propchange: experimental/Document/tests/files/rst/tokenizer/bullet_list_deep.tokens ------------------------------------------------------------------------------ svn:mime-type = application/octet-stream Added: experimental/Document/tests/files/rst/tokenizer/bullet_list_incorrect.tokens ============================================================================== Binary file - no diff available. Propchange: experimental/Document/tests/files/rst/tokenizer/bullet_list_incorrect.tokens ------------------------------------------------------------------------------ svn:mime-type = application/octet-stream Modified: experimental/Document/tests/files/rst/tokenizer/colons.txt ============================================================================== --- experimental/Document/tests/files/rst/tokenizer/colons.txt [iso-8859-1] (original) +++ experimental/Document/tests/files/rst/tokenizer/colons.txt [iso-8859-1] Mon Feb 11 11:04:38 2008 @@ -24,7 +24,7 @@ Ho :: -:: +:: foo bar:: Ho Added: experimental/Document/tests/files/rst/tokenizer/deep_block_quote.tokens ============================================================================== Binary file - no diff available. Propchange: experimental/Document/tests/files/rst/tokenizer/deep_block_quote.tokens ------------------------------------------------------------------------------ svn:mime-type = application/octet-stream Modified: experimental/Document/tests/files/rst/tokenizer/definition_list.tokens ============================================================================== Binary files - no diff available. Propchange: experimental/Document/tests/files/rst/tokenizer/definition_list.tokens ------------------------------------------------------------------------------ svn:mime-type = application/octet-stream Added: experimental/Document/tests/files/rst/tokenizer/definition_list_alignements.tokens ============================================================================== Binary file - no diff available. Propchange: experimental/Document/tests/files/rst/tokenizer/definition_list_alignements.tokens ------------------------------------------------------------------------------ svn:mime-type = application/octet-stream Added: experimental/Document/tests/files/rst/tokenizer/definition_list_classifier.tokens ============================================================================== Binary file - no diff available. Propchange: experimental/Document/tests/files/rst/tokenizer/definition_list_classifier.tokens ------------------------------------------------------------------------------ svn:mime-type = application/octet-stream Added: experimental/Document/tests/files/rst/tokenizer/embedded_uris.tokens ============================================================================== Binary file - no diff available. Propchange: experimental/Document/tests/files/rst/tokenizer/embedded_uris.tokens ------------------------------------------------------------------------------ svn:mime-type = application/octet-stream Propchange: experimental/Document/tests/files/rst/tokenizer/empty.tokens ------------------------------------------------------------------------------ svn:mime-type = application/octet-stream Added: experimental/Document/tests/files/rst/tokenizer/field_list.tokens ============================================================================== Binary file - no diff available. Propchange: experimental/Document/tests/files/rst/tokenizer/field_list.tokens ------------------------------------------------------------------------------ svn:mime-type = application/octet-stream Added: experimental/Document/tests/files/rst/tokenizer/field_list_intended.tokens ============================================================================== Binary file - no diff available. Propchange: experimental/Document/tests/files/rst/tokenizer/field_list_intended.tokens ------------------------------------------------------------------------------ svn:mime-type = application/octet-stream Added: experimental/Document/tests/files/rst/tokenizer/hyperlinks.tokens ============================================================================== Binary file - no diff available. Propchange: experimental/Document/tests/files/rst/tokenizer/hyperlinks.tokens ------------------------------------------------------------------------------ svn:mime-type = application/octet-stream Added: experimental/Document/tests/files/rst/tokenizer/inline_formatting.tokens ============================================================================== Binary file - no diff available. Propchange: experimental/Document/tests/files/rst/tokenizer/inline_formatting.tokens ------------------------------------------------------------------------------ svn:mime-type = application/octet-stream Added: experimental/Document/tests/files/rst/tokenizer/inline_internal_targets.tokens ============================================================================== Binary file - no diff available. Propchange: experimental/Document/tests/files/rst/tokenizer/inline_internal_targets.tokens ------------------------------------------------------------------------------ svn:mime-type = application/octet-stream Added: experimental/Document/tests/files/rst/tokenizer/line_block.tokens ============================================================================== Binary file - no diff available. Propchange: experimental/Document/tests/files/rst/tokenizer/line_block.tokens ------------------------------------------------------------------------------ svn:mime-type = application/octet-stream Added: experimental/Document/tests/files/rst/tokenizer/line_block_indented.tokens ============================================================================== Binary file - no diff available. Propchange: experimental/Document/tests/files/rst/tokenizer/line_block_indented.tokens ------------------------------------------------------------------------------ svn:mime-type = application/octet-stream Added: experimental/Document/tests/files/rst/tokenizer/literal_block.tokens ============================================================================== Binary file - no diff available. Propchange: experimental/Document/tests/files/rst/tokenizer/literal_block.tokens ------------------------------------------------------------------------------ svn:mime-type = application/octet-stream Added: experimental/Document/tests/files/rst/tokenizer/literal_block_notations.tokens ============================================================================== Binary file - no diff available. Propchange: experimental/Document/tests/files/rst/tokenizer/literal_block_notations.tokens ------------------------------------------------------------------------------ svn:mime-type = application/octet-stream Modified: experimental/Document/tests/files/rst/tokenizer/paragraph.tokens ============================================================================== Binary files - no diff available. Propchange: experimental/Document/tests/files/rst/tokenizer/paragraph.tokens ------------------------------------------------------------------------------ svn:mime-type = application/octet-stream Modified: experimental/Document/tests/files/rst/tokenizer/titles.tokens ============================================================================== Binary files - no diff available. Propchange: experimental/Document/tests/files/rst/tokenizer/titles.tokens ------------------------------------------------------------------------------ svn:mime-type = application/octet-stream -- svn-components mailing list [EMAIL PROTECTED] http://lists.ez.no/mailman/listinfo/svn-components