Author: allison Date: Sun Jan 25 19:21:17 2009 New Revision: 36012 Added: trunk/languages/pod/ trunk/languages/pod/README trunk/languages/pod/config/ trunk/languages/pod/config/makefiles/ trunk/languages/pod/config/makefiles/root.in trunk/languages/pod/pod.pir trunk/languages/pod/src/ trunk/languages/pod/src/parser/ trunk/languages/pod/src/parser/actions.pm trunk/languages/pod/src/parser/grammar.pg trunk/languages/pod/t/ trunk/languages/pod/t/harness trunk/languages/pod/test.pod
Log: [languages] Adding a PGE-based POD parser. Added: trunk/languages/pod/README ============================================================================== --- (empty file) +++ trunk/languages/pod/README Sun Jan 25 19:21:17 2009 @@ -0,0 +1,30 @@ +This is a liberal Pod parser, intended to have the semantics checked and +document structure built by an appropriate actions file. See the file test.pod +for the syntax it can handle. It can be run with the '--target=parse' option, +to dump the parse tree: + + ../../parrot pod.pir --target=parse test.pod + + +This parser mostly follows the specification in 'perlpodspec', with the +following exceptions: + + - It doesn't allow formatting codes to span multiple lines. + + - It doesn't allow headers or item names to span multiple lines. + + - It requires =cut at the end of a section of Pod, even at the end of the + file. + + - It allows any Pod formatting within a =begin/=end section, with no special + treatment. (The semantic handling may impose constraints.) + + - It doesn't truncate whitespace within a formatting code (leaving that up to + the semantic handling). + + - No special parsing is done for L<> codes (leaving that up to the formatter). + +The current actions file is a no-op, simply returning the parse tree (but left +in place because HLLCompiler apparently can't handle a null actions file). + +The 'make test' target does nothing. Added: trunk/languages/pod/config/makefiles/root.in ============================================================================== --- (empty file) +++ trunk/languages/pod/config/makefiles/root.in Sun Jan 25 19:21:17 2009 @@ -0,0 +1,118 @@ +## $Id$ + +## arguments we want to run parrot with +PARROT_ARGS = + +## configuration settings +BUILD_DIR = @build_dir@ +LOAD_EXT = @load_ext@ +O = @o@ + +## Setup some commands +LN_S = @lns@ +PERL = @perl@ +RM_RF = @rm_rf@ +CP = @cp@ +PARROT = ../../par...@exe@ +CAT = $(PERL) -MExtUtils::Command -e cat +BUILD_DYNPMC = $(PERL) $(BUILD_DIR)/tools/build/dynpmc.pl +RECONFIGURE = $(PERL) $(BUILD_DIR)/tools/dev/reconfigure.pl +#CONDITIONED_LINE(darwin): +#CONDITIONED_LINE(darwin):# MACOSX_DEPLOYMENT_TARGET must be defined for OS X compilation/linking +#CONDITIONED_LINE(darwin):export MACOSX_DEPLOYMENT_TARGET := @osx_version@ + +## places to look for things +PARROT_DYNEXT = $(BUILD_DIR)/runtime/parrot/dynext +PGE_LIBRARY = $(BUILD_DIR)/runtime/parrot/library/PGE +PERL6GRAMMAR = $(PGE_LIBRARY)/Perl6Grammar.pbc +NQP = $(BUILD_DIR)/compilers/nqp/nqp.pbc +PCT = $(BUILD_DIR)/runtime/parrot/library/PCT.pbc + +PMC_DIR = src/pmc + +all: pod.pbc + +POD_GROUP = $(PMC_DIR)/pod_group$(LOAD_EXT) + +SOURCES = pod.pir \ + src/gen_grammar.pir \ + src/gen_actions.pir \ +# $(POD_GROUP) + +# PMCS = pod +# PMC_SOURCES = $(PMC_DIR)/pod.pmc + +# the default target +pod.pbc: $(PARROT) $(SOURCES) + $(PARROT) $(PARROT_ARGS) -o pod.pbc pod.pir + +src/gen_grammar.pir: $(PERL6GRAMMAR) src/parser/grammar.pg + $(PARROT) $(PARROT_ARGS) $(PERL6GRAMMAR) \ + --output=src/gen_grammar.pir \ + src/parser/grammar.pg \ + +src/gen_actions.pir: $(NQP) $(PCT) src/parser/actions.pm + $(PARROT) $(PARROT_ARGS) $(NQP) --output=src/gen_actions.pir \ + --target=pir src/parser/actions.pm + +$(POD_GROUP): $(PARROT) $(PMC_SOURCES) + cd $(PMC_DIR) && $(BUILD_DYNPMC) generate $(PMCS) + cd $(PMC_DIR) && $(BUILD_DYNPMC) compile $(PMCS) + cd $(PMC_DIR) && $(BUILD_DYNPMC) linklibs $(PMCS) + cd $(PMC_DIR) && $(BUILD_DYNPMC) copy --destination=$(PARROT_DYNEXT) $(PMCS) + +# regenerate the Makefile +Makefile: config/makefiles/root.in + cd $(BUILD_DIR) && $(RECONFIGURE) --step=gen::languages --languages=pod + +# This is a listing of all targets, that are meant to be called by users +help: + @echo "" + @echo "Following targets are available for the user:" + @echo "" + @echo " all: pod.pbc" + @echo " This is the default." + @echo "Testing:" + @echo " test: Run the test suite." + @echo " testclean: Clean up test results." + @echo "" + @echo "Cleaning:" + @echo " clean: Basic cleaning up." + @echo " realclean: Removes also files generated by 'Configure.pl'" + @echo " distclean: Removes also anything built, in theory" + @echo "" + @echo "Misc:" + @echo " help: Print this help message." + @echo "" + +test: all + $(PERL) t/harness + +# this target has nothing to do +testclean: + +CLEANUPS = \ + pod.pbc \ + src/gen_grammar.pir \ + src/gen_actions.pir \ + $(PMC_DIR)/*.h \ + $(PMC_DIR)/*.c \ + $(PMC_DIR)/*.dump \ + $(PMC_DIR)/*$(O) \ + $(PMC_DIR)/*$(LOAD_EXT) \ + $(PMC_DIR)/*.exp \ + $(PMC_DIR)/*.ilk \ + $(PMC_DIR)/*.manifest \ + $(PMC_DIR)/*.pdb \ + $(PMC_DIR)/*.lib \ + + +clean: testclean + $(RM_RF) $(CLEANUPS) + +realclean: clean + $(RM_RF) Makefile + +distclean: realclean + + Added: trunk/languages/pod/pod.pir ============================================================================== --- (empty file) +++ trunk/languages/pod/pod.pir Sun Jan 25 19:21:17 2009 @@ -0,0 +1,77 @@ +=head1 TITLE + +pod.pir - A Pod compiler. + +=head2 Description + +This is the base file for the Pod compiler. + +This file includes the parsing and grammar rules from +the src/ directory, loads the relevant PGE libraries, +and registers the compiler under the name 'Pod'. + +=head2 Functions + +=over 4 + +=item onload() + +Creates the Pod compiler using a C<PCT::HLLCompiler> +object. + +=cut + +.HLL 'pod' + +.namespace [ 'Pod';'Compiler' ] + +.loadlib 'pod_group' + +.sub '' :anon :load :init + load_bytecode 'PCT.pbc' + .local pmc parrotns, hllns, exports + parrotns = get_root_namespace ['parrot'] + hllns = get_hll_namespace + exports = split ' ', 'PAST PCT PGE' + parrotns.'export_to'(hllns, exports) +.end + +.include 'src/gen_grammar.pir' +.include 'src/gen_actions.pir' + +.sub 'onload' :anon :load :init + $P0 = get_hll_global ['PCT'], 'HLLCompiler' + $P1 = $P0.'new'() + $P1.'language'('pod') + $P0 = get_hll_namespace ['Pod';'Grammar'] + $P1.'parsegrammar'($P0) + $P0 = get_hll_namespace ['Pod';'Grammar';'Actions'] + $P1.'parseactions'($P0) +.end + +=item main(args :slurpy) :main + +Start compilation by passing any command line C<args> +to the Pod compiler. + +=cut + +.sub 'main' :main + .param pmc args + + $P0 = compreg 'pod' + $P1 = $P0.'command_line'(args) +.end + +.include 'src/gen_builtins.pir' + +=back + +=cut + +# Local Variables: +# mode: pir +# fill-column: 100 +# End: +# vim: expandtab shiftwidth=4 ft=pir: + Added: trunk/languages/pod/src/parser/actions.pm ============================================================================== --- (empty file) +++ trunk/languages/pod/src/parser/actions.pm Sun Jan 25 19:21:17 2009 @@ -0,0 +1,29 @@ +# $Id$ + +=begin comments + +Pod::Grammar::Actions - ast transformations for Pod + +This file contains the methods that are used by the parse grammar +to build the PAST representation of an Pod program. +Each method below corresponds to a rule in F<src/parser/grammar.pg>, +and is invoked at the point where C<{*}> appears in the rule, +with the current match object as the first argument. If the +line containing C<{*}> also has a C<#= key> comment, then the +value of the comment is passed as the second argument to the method. + +=end comments + +class Pod::Grammar::Actions; + +method TOP($/) { + make $( $/ ); +} + +# Local Variables: +# mode: cperl +# cperl-indent-level: 4 +# fill-column: 100 +# End: +# vim: expandtab shiftwidth=4: + Added: trunk/languages/pod/src/parser/grammar.pg ============================================================================== --- (empty file) +++ trunk/languages/pod/src/parser/grammar.pg Sun Jan 25 19:21:17 2009 @@ -0,0 +1,180 @@ +# $Id$ + +=begin overview + +This is the grammar for Pod written as a sequence of Perl 6 rules. + +=end overview + +grammar Pod::Grammar is PCT::Grammar; + +rule TOP { + [ <skipped> + | <pod_section> + ]* + [ $ || <panic: 'Pod syntax error'> ] + {*} +} + +rule skipped { + ^^ <![=]> \N* + {*} +} + +rule pod_section { + [ <pod_directive> | <?before '='> ] + <pod_sequence>* + <cut_directive> + {*} +} + +rule pod_sequence { + <back_directive> {*} + | <item_directive> {*} + | <over_directive> {*} + | <heading> + | <begin_directive> + | <end_directive> + | <for_directive> + | <encoding_directive> +# | <literal_paragraph> + | <paragraph> +} + +token pod_directive { + ^^ '=pod' + <.blank_line> + {*} +} + +token cut_directive { + ^^ '=cut' + <.blank_line>? + {*} +} + +token heading { + ^^ + '=head' + <digit> + <block_title>? + <.blank_line> + {*} +} + +token begin_directive { + ^^ + '=begin' + <block_name> + <block_title>? + <.blank_line> + {*} +} + +token end_directive { + ^^ '=end' + <block_name> + <.blank_line> + {*} +} + +token for_directive { + ^^ + '=for' + <block_name> + <block_title>? + <.blank_line> + {*} +} + +token over_directive { + ^^ + '=over' + [ <.pod_ws> + <digit>* + [ '.' <digit>+ ]? + ]? + <.blank_line> + {*} +} + +token back_directive { + ^^ '=back' + <.blank_line> + {*} +} + +token item_directive { + ^^ + '=item' + [ <.pod_ws> + [ '*' + | <digit>* '.'? + ] + ]? + <.blank_line> + {*} +} + +token encoding_directive { + ^^ + '=encoding' + <block_name> + <.blank_line> + {*} +} + +regex paragraph { + ^^ + <!before '='> # Not a directive + <formatted_text> + [ \n <formatted_text> ]* + <.blank_line> + {*} +} + +token literal_paragraph { + [ <.pod_ws> <formatted_text> \n ]+ + <.blank_line> + {*} +} + +token block_name { + <.pod_ws> + <[_]+alpha+digit>+ + {*} +} + +token block_title { + <.pod_ws> + <formatted_text> + {*} +} + +regex formatted_text { + [ <format_code> + | <alpha> + | <digit> + | <punct> + | <pod_ws> + ]+ + {*} +} + +regex format_code { + <[BCEFILSXZ]> + [ '<<<' <formatted_text> '>>>' + | '<<' <formatted_text> '>>' + | '<' <formatted_text> '>' + ] + {*} +} + +#token ws { <!ww> \s+ } + +token pod_ws { + [ ' ' | \t ]+ # Literal spaces or tabs, no newlines or other whitespace + {*} +} + +token blank_line { \n <.pod_ws>? \n } Added: trunk/languages/pod/t/harness ============================================================================== --- (empty file) +++ trunk/languages/pod/t/harness Sun Jan 25 19:21:17 2009 @@ -0,0 +1,13 @@ +#! perl + +# $Id$ + +# pragmata +use strict; +use warnings; +use 5.008; + +use lib qw( . lib ../lib ../../lib ../../lib ); +use Parrot::Test::Harness language => 'Pod', + compiler => 'pod.pbc'; + Added: trunk/languages/pod/test.pod ============================================================================== --- (empty file) +++ trunk/languages/pod/test.pod Sun Jan 25 19:21:17 2009 @@ -0,0 +1,87 @@ +Foo +Bar +Baz + +=pod + +=over + +=item + +=back + +=cut + +Some blob of text. + +=over 4.5 + +=item * + +=back + +=cut + +Another blob of text. + +=over 4 + +=item 45 + +=back + +=cut + +More text. + +=head1 Foo + +=cut + + + + +=head2 + +=cut + +=begin foo + +=end foo + +=cut + +=begin bar Some Text + +=end bar + +=cut + +=for baz + +=cut + +=head1 a paragraph + +Random paragraph. + +=cut + +=head2 more paragraphs + +Another random paragraph. +Including a newline. + +=cut + +=head3 formatting codes + +Text containing E<formatting B<codes>>. + +A multi-bracket formating code containing C<<special -> characters>>. + +=cut + +=encoding utf8 + +=cut