This is an automated email from the git hooks/post-receive script. js pushed a commit to tag PEVANS in repository libparser-mgc-perl.
commit 53bba00cba91efd6bc452fb5d68e0af30fa72f68 Author: Paul Evans <leon...@leonerd.org.uk> Date: Sat Mar 19 14:21:25 2011 +0000 Import of PEVANS/Parser-MGC-0.07 from CPAN. gitpan-cpan-distribution: Parser-MGC gitpan-cpan-version: 0.07 gitpan-cpan-path: PEVANS/Parser-MGC-0.07.tar.gz gitpan-cpan-author: PEVANS gitpan-cpan-maturity: released --- Changes | 8 ++++++++ META.yml | 4 ++-- README | 47 +++++++++++++++++++++++++++----------------- examples/eval-expr.pl | 4 +--- examples/parse-dict.pl | 4 ++-- examples/parse-pod.pl | 7 +++---- examples/synopsis.pl | 8 ++++---- lib/Parser/MGC.pm | 53 ++++++++++++++++++++++++++++++++------------------ t/02expect.t | 16 ++++++++++++++- 9 files changed, 98 insertions(+), 53 deletions(-) diff --git a/Changes b/Changes index 0259a77..4ef1d8e 100644 --- a/Changes +++ b/Changes @@ -1,5 +1,13 @@ Revision history for Parser-MGC +0.07 CHANGES: + * Allow ->expect to return subgroup captures in list context + * Documentation improvements + + BUGFIXES: + * Use Data::Dumper rather than Data::Dump in examples, as the latter + is not core; no point pulling in non-core deps simply for examples + 0.06 CHANGES: * Renamed ->one_of to ->any_of * Added ->substring_before diff --git a/META.yml b/META.yml index 41e1e73..b3e7f51 100644 --- a/META.yml +++ b/META.yml @@ -15,9 +15,9 @@ name: Parser-MGC provides: Parser::MGC: file: lib/Parser/MGC.pm - version: 0.06 + version: 0.07 requires: File::Slurp: 0 resources: license: http://dev.perl.org/licenses/ -version: 0.06 +version: 0.07 diff --git a/README b/README index efe5982..20d6c24 100644 --- a/README +++ b/README @@ -31,12 +31,13 @@ DESCRIPTION right, returning a parse structure. It takes its name from the "m//gc" regexps used to implement the token parsing behaviour. - It provides a number of token-parsing methods, which each atomically - extract a grammatical token from the string. It also provides wrapping - methods that can be used to build up a possibly-recursive grammar - structure. Each method, both token and structural, atomically either - consumes a prefix of the string and returns its result, or fails and - consumes nothing. + It provides a number of token-parsing methods, which each extract a + grammatical token from the string. It also provides wrapping methods + that can be used to build up a possibly-recursive grammar structure, by + applying a structure around other parts of parsing code. Each method, + both token and structural, atomically either consumes a prefix of the + string and returns its result, or fails and consumes nothing. This makes + it simple to implement grammars that require backtracking. CONSTRUCTOR $parser = Parser::MGC->new( %args ) @@ -109,7 +110,7 @@ METHODS input interractively from a user. It cannot be used in all cases (for example, reading fixed-size buffers from a file) because two successive invocations may split a single token across the buffer boundaries, and - cause parse failures parse failures. + cause parse failures. ( $lineno, $col, $text ) = $parser->where Returns the current parse position, as a line and column number, and the @@ -270,8 +271,17 @@ TOKEN PARSING METHODS $str = $parser->expect( $literal ) $str = $parser->expect( qr/pattern/ ) + @groups = $parser->expect( qr/pattern/ ) Expects to find a literal string or regexp pattern match, and consumes - it. This method returns the string that was captured. + it. In scalar context, this method returns the string that was captured. + In list context it returns the matching substring and the contents of + any subgroups contained in the pattern. + + This method will raise a parse error (by calling "fail") if the regexp + fails to match. Note that if the pattern could match an empty string + (such as for example "qr/\d*/"), the pattern will always match, even if + it has to match an empty string. This method will not consider a failure + if the regexp matches with zero-width. $str = $parser->substring_before( $literal ) $str = $parser->substring_before( qr/pattern/ ) @@ -306,10 +316,11 @@ TOKEN PARSING METHODS and consumes it. Negative integers, preceeded by "-", are also recognised. - $int = $parser->token_float + $float = $parser->token_float Expects to find a number expressed in floating-point notation; a sequence of digits possibly prefixed by "-", possibly containing a - decimal point. + decimal point, possibly followed by an exponent specified by "e" + followed by an integer. The numerical value is then returned. $str = $parser->token_string Expects to find a quoted string, and consumes it. The string should be @@ -319,14 +330,14 @@ TOKEN PARSING METHODS to those accepted by C or Perl. Specifically, the following forms are recognised: - \a Bell ("alert") - \b Backspace - \e Escape - \f Form feed - \n Newline - \r Return - \t Horizontal Tab - \0, \012 Octal character + \a Bell ("alert") + \b Backspace + \e Escape + \f Form feed + \n Newline + \r Return + \t Horizontal Tab + \0, \012 Octal character \x34, \x{5678} Hexadecimal character C's "\v" for vertical tab is not supported as it is rarely used in diff --git a/examples/eval-expr.pl b/examples/eval-expr.pl index ab9a1f2..b5bebc2 100755 --- a/examples/eval-expr.pl +++ b/examples/eval-expr.pl @@ -53,8 +53,6 @@ sub parse_atom ); } -use Data::Dump qw( pp ); - if( !caller ) { my $parser = __PACKAGE__->new; @@ -62,7 +60,7 @@ if( !caller ) { my $ret = eval { $parser->from_string( $line ) }; print $@ and next if $@; - print pp( $ret ) . "\n"; + print "$ret\n"; } } diff --git a/examples/parse-dict.pl b/examples/parse-dict.pl index 178132d..225a122 100644 --- a/examples/parse-dict.pl +++ b/examples/parse-dict.pl @@ -38,7 +38,7 @@ sub parse_dict return \%ret } -use Data::Dump qw( pp ); +use Data::Dumper; if( !caller ) { my $parser = __PACKAGE__->new; @@ -47,7 +47,7 @@ if( !caller ) { my $ret = eval { $parser->from_string( $line ) }; print $@ and next if $@; - print pp( $ret ) . "\n"; + print Dumper( $ret ); } } diff --git a/examples/parse-pod.pl b/examples/parse-pod.pl index 918a6c7..222d40c 100755 --- a/examples/parse-pod.pl +++ b/examples/parse-pod.pl @@ -13,9 +13,8 @@ sub parse $self->sequence_of( sub { $self->any_of( - sub { my $tag = $self->expect( qr/[A-Z](?=<)/ ); + sub { my ( undef, $tag, $delim ) = $self->expect( qr/([A-Z])(<+)/ ); $self->commit; - my $delim = $self->expect( qr/<+/ ); +{ $tag => $self->scope_of( undef, \&parse, ">" x length $delim ) }; }, sub { $self->substring_before( qr/[A-Z]</ ) }, @@ -23,7 +22,7 @@ sub parse ); } -use Data::Dump qw( pp ); +use Data::Dumper; if( !caller ) { my $parser = __PACKAGE__->new; @@ -32,7 +31,7 @@ if( !caller ) { my $ret = eval { $parser->from_string( $line ) }; print $@ and next if $@; - print pp( $ret ) . "\n"; + print Dumper( $ret ); } } diff --git a/examples/synopsis.pl b/examples/synopsis.pl index 51a8006..b7791dd 100755 --- a/examples/synopsis.pl +++ b/examples/synopsis.pl @@ -20,12 +20,12 @@ sub parse } ); } -my $parser = LispParser->new; - -use Data::Dump qw( pp ); +use Data::Dumper; if( !caller ) { - print pp( $parser->from_file( $ARGV[0] ) ); + my $parser = __PACKAGE__->new; + + print Dumper( $parser->from_file( $ARGV[0] ) ); } 1; diff --git a/lib/Parser/MGC.pm b/lib/Parser/MGC.pm index b58897f..2a90c14 100644 --- a/lib/Parser/MGC.pm +++ b/lib/Parser/MGC.pm @@ -8,7 +8,7 @@ package Parser::MGC; use strict; use warnings; -our $VERSION = '0.06'; +our $VERSION = '0.07'; use Carp; @@ -50,11 +50,13 @@ parsers that consume a given input string from left to right, returning a parse structure. It takes its name from the C<m//gc> regexps used to implement the token parsing behaviour. -It provides a number of token-parsing methods, which each atomically extract a +It provides a number of token-parsing methods, which each extract a grammatical token from the string. It also provides wrapping methods that can -be used to build up a possibly-recursive grammar structure. Each method, both -token and structural, atomically either consumes a prefix of the string and -returns its result, or fails and consumes nothing. +be used to build up a possibly-recursive grammar structure, by applying a +structure around other parts of parsing code. Each method, both token and +structural, atomically either consumes a prefix of the string and returns its +result, or fails and consumes nothing. This makes it simple to implement +grammars that require backtracking. =cut @@ -218,7 +220,7 @@ for reading lines of a file in the common case where lines are considered as skippable whitespace, or for reading lines of input interractively from a user. It cannot be used in all cases (for example, reading fixed-size buffers from a file) because two successive invocations may split a single token -across the buffer boundaries, and cause parse failures parse failures. +across the buffer boundaries, and cause parse failures. =cut @@ -629,8 +631,18 @@ sub skip_ws =head2 $str = $parser->expect( qr/pattern/ ) +=head2 @groups = $parser->expect( qr/pattern/ ) + Expects to find a literal string or regexp pattern match, and consumes it. -This method returns the string that was captured. +In scalar context, this method returns the string that was captured. In list +context it returns the matching substring and the contents of any subgroups +contained in the pattern. + +This method will raise a parse error (by calling C<fail>) if the regexp fails +to match. Note that if the pattern could match an empty string (such as for +example C<qr/\d*/>), the pattern will always match, even if it has to match an +empty string. This method will not consider a failure if the regexp matches +with zero-width. =cut @@ -642,10 +654,11 @@ sub expect ref $expect or $expect = qr/\Q$expect/; $self->skip_ws; - $self->{str} =~ m/\G($expect)/gc or + $self->{str} =~ m/\G$expect/gc or $self->fail( "Expected $expect" ); - return $1; + return substr( $self->{str}, $-[0], $+[0]-$-[0] ) if !wantarray; + return map { substr( $self->{str}, $-[$_], $+[$_]-$-[$_] ) } 0 .. $#+; } =head2 $str = $parser->substring_before( $literal ) @@ -730,10 +743,12 @@ sub token_int return $sign * $int; } -=head2 $int = $parser->token_float +=head2 $float = $parser->token_float Expects to find a number expressed in floating-point notation; a sequence of -digits possibly prefixed by C<->, possibly containing a decimal point. +digits possibly prefixed by C<->, possibly containing a decimal point, +possibly followed by an exponent specified by C<e> followed by an integer. The +numerical value is then returned. =cut @@ -758,14 +773,14 @@ using C<"> or C<'> quote marks. The content of the quoted string can contain character escapes similar to those accepted by C or Perl. Specifically, the following forms are recognised: - \a Bell ("alert") - \b Backspace - \e Escape - \f Form feed - \n Newline - \r Return - \t Horizontal Tab - \0, \012 Octal character + \a Bell ("alert") + \b Backspace + \e Escape + \f Form feed + \n Newline + \r Return + \t Horizontal Tab + \0, \012 Octal character \x34, \x{5678} Hexadecimal character C's C<\v> for vertical tab is not supported as it is rarely used in practice diff --git a/t/02expect.t b/t/02expect.t index 6826c73..d40d394 100644 --- a/t/02expect.t +++ b/t/02expect.t @@ -2,7 +2,7 @@ use strict; -use Test::More tests => 4; +use Test::More tests => 5; package TestParser; use base qw( Parser::MGC ); @@ -14,6 +14,16 @@ sub parse [ $self->expect( "hello" ), $self->expect( qr/world/ ) ]; } +package HexParser; +use base qw( Parser::MGC ); + +sub parse +{ + my $self = shift; + + return hex +( $self->expect( qr/0x([0-9A-F]+)/i ) )[1]; +} + package main; my $parser = TestParser->new; @@ -36,3 +46,7 @@ is( $@, qq[goodbye world\n] . qq[^\n], 'Exception from "goodbye world" failure' ); + +$parser = HexParser->new; + +is( $parser->from_string( "0x123" ), 0x123, "Hex parser captures substring" ); -- Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/pkg-perl/packages/libparser-mgc-perl.git _______________________________________________ Pkg-perl-cvs-commits mailing list Pkg-perl-cvs-commits@lists.alioth.debian.org http://lists.alioth.debian.org/cgi-bin/mailman/listinfo/pkg-perl-cvs-commits