Author: allison
Date: Sun Jan 25 19:21:17 2009
New Revision: 36012

Added:
   trunk/languages/pod/
   trunk/languages/pod/README
   trunk/languages/pod/config/
   trunk/languages/pod/config/makefiles/
   trunk/languages/pod/config/makefiles/root.in
   trunk/languages/pod/pod.pir
   trunk/languages/pod/src/
   trunk/languages/pod/src/parser/
   trunk/languages/pod/src/parser/actions.pm
   trunk/languages/pod/src/parser/grammar.pg
   trunk/languages/pod/t/
   trunk/languages/pod/t/harness
   trunk/languages/pod/test.pod

Log:
[languages] Adding a PGE-based POD parser.


Added: trunk/languages/pod/README
==============================================================================
--- (empty file)
+++ trunk/languages/pod/README  Sun Jan 25 19:21:17 2009
@@ -0,0 +1,30 @@
+This is a liberal Pod parser, intended to have the semantics checked and
+document structure built by an appropriate actions file. See the file test.pod
+for the syntax it can handle. It can be run with the '--target=parse' option,
+to dump the parse tree:
+
+  ../../parrot pod.pir --target=parse test.pod 
+
+
+This parser mostly follows the specification in 'perlpodspec', with the
+following exceptions:
+
+  - It doesn't allow formatting codes to span multiple lines.
+
+  - It doesn't allow headers or item names to span multiple lines.
+
+  - It requires =cut at the end of a section of Pod, even at the end of the
+    file.
+
+  - It allows any Pod formatting within a =begin/=end section, with no special
+    treatment. (The semantic handling may impose constraints.)
+
+  - It doesn't truncate whitespace within a formatting code (leaving that up to
+    the semantic handling).
+
+  - No special parsing is done for L<> codes (leaving that up to the 
formatter).
+
+The current actions file is a no-op, simply returning the parse tree (but left
+in place because HLLCompiler apparently can't handle a null actions file).
+
+The 'make test' target does nothing.

Added: trunk/languages/pod/config/makefiles/root.in
==============================================================================
--- (empty file)
+++ trunk/languages/pod/config/makefiles/root.in        Sun Jan 25 19:21:17 2009
@@ -0,0 +1,118 @@
+## $Id$
+
+## arguments we want to run parrot with
+PARROT_ARGS =
+
+## configuration settings
+BUILD_DIR     = @build_dir@
+LOAD_EXT      = @load_ext@
+O             = @o@
+
+## Setup some commands
+LN_S          = @lns@
+PERL          = @perl@
+RM_RF         = @rm_rf@
+CP            = @cp@
+PARROT        = ../../par...@exe@
+CAT           = $(PERL) -MExtUtils::Command -e cat
+BUILD_DYNPMC  = $(PERL) $(BUILD_DIR)/tools/build/dynpmc.pl
+RECONFIGURE   = $(PERL) $(BUILD_DIR)/tools/dev/reconfigure.pl
+#CONDITIONED_LINE(darwin):
+#CONDITIONED_LINE(darwin):# MACOSX_DEPLOYMENT_TARGET must be defined for OS X 
compilation/linking
+#CONDITIONED_LINE(darwin):export MACOSX_DEPLOYMENT_TARGET := @osx_version@
+
+## places to look for things
+PARROT_DYNEXT = $(BUILD_DIR)/runtime/parrot/dynext
+PGE_LIBRARY   = $(BUILD_DIR)/runtime/parrot/library/PGE
+PERL6GRAMMAR  = $(PGE_LIBRARY)/Perl6Grammar.pbc
+NQP           = $(BUILD_DIR)/compilers/nqp/nqp.pbc
+PCT           = $(BUILD_DIR)/runtime/parrot/library/PCT.pbc
+
+PMC_DIR       = src/pmc
+
+all: pod.pbc
+
+POD_GROUP = $(PMC_DIR)/pod_group$(LOAD_EXT)
+
+SOURCES = pod.pir \
+  src/gen_grammar.pir \
+  src/gen_actions.pir \
+#  $(POD_GROUP)
+
+# PMCS = pod
+# PMC_SOURCES = $(PMC_DIR)/pod.pmc
+
+# the default target
+pod.pbc: $(PARROT) $(SOURCES)
+       $(PARROT) $(PARROT_ARGS) -o pod.pbc pod.pir
+
+src/gen_grammar.pir: $(PERL6GRAMMAR) src/parser/grammar.pg
+       $(PARROT) $(PARROT_ARGS) $(PERL6GRAMMAR) \
+           --output=src/gen_grammar.pir \
+           src/parser/grammar.pg \
+
+src/gen_actions.pir: $(NQP) $(PCT) src/parser/actions.pm
+       $(PARROT) $(PARROT_ARGS) $(NQP) --output=src/gen_actions.pir \
+           --target=pir src/parser/actions.pm
+
+$(POD_GROUP): $(PARROT) $(PMC_SOURCES)
+       cd $(PMC_DIR) && $(BUILD_DYNPMC) generate $(PMCS)
+       cd $(PMC_DIR) && $(BUILD_DYNPMC) compile $(PMCS)
+       cd $(PMC_DIR) && $(BUILD_DYNPMC) linklibs $(PMCS)
+       cd $(PMC_DIR) && $(BUILD_DYNPMC) copy --destination=$(PARROT_DYNEXT) 
$(PMCS)
+
+# regenerate the Makefile
+Makefile: config/makefiles/root.in
+       cd $(BUILD_DIR) && $(RECONFIGURE) --step=gen::languages --languages=pod
+
+# This is a listing of all targets, that are meant to be called by users
+help:
+       @echo ""
+       @echo "Following targets are available for the user:"
+       @echo ""
+       @echo "  all:               pod.pbc"
+       @echo "                     This is the default."
+       @echo "Testing:"
+       @echo "  test:              Run the test suite."
+       @echo "  testclean:         Clean up test results."
+       @echo ""
+       @echo "Cleaning:"
+       @echo "  clean:             Basic cleaning up."
+       @echo "  realclean:         Removes also files generated by 
'Configure.pl'"
+       @echo "  distclean:         Removes also anything built, in theory"
+       @echo ""
+       @echo "Misc:"
+       @echo "  help:              Print this help message."
+       @echo ""
+
+test: all
+       $(PERL) t/harness
+
+# this target has nothing to do
+testclean:
+
+CLEANUPS = \
+  pod.pbc \
+  src/gen_grammar.pir \
+  src/gen_actions.pir \
+  $(PMC_DIR)/*.h \
+  $(PMC_DIR)/*.c \
+  $(PMC_DIR)/*.dump \
+  $(PMC_DIR)/*$(O) \
+  $(PMC_DIR)/*$(LOAD_EXT) \
+  $(PMC_DIR)/*.exp \
+  $(PMC_DIR)/*.ilk \
+  $(PMC_DIR)/*.manifest \
+  $(PMC_DIR)/*.pdb \
+  $(PMC_DIR)/*.lib \
+
+
+clean: testclean
+       $(RM_RF) $(CLEANUPS)
+
+realclean: clean
+       $(RM_RF) Makefile
+
+distclean: realclean
+
+

Added: trunk/languages/pod/pod.pir
==============================================================================
--- (empty file)
+++ trunk/languages/pod/pod.pir Sun Jan 25 19:21:17 2009
@@ -0,0 +1,77 @@
+=head1 TITLE
+
+pod.pir - A Pod compiler.
+
+=head2 Description
+
+This is the base file for the Pod compiler.
+
+This file includes the parsing and grammar rules from
+the src/ directory, loads the relevant PGE libraries,
+and registers the compiler under the name 'Pod'.
+
+=head2 Functions
+
+=over 4
+
+=item onload()
+
+Creates the Pod compiler using a C<PCT::HLLCompiler>
+object.
+
+=cut
+
+.HLL 'pod'
+
+.namespace [ 'Pod';'Compiler' ]
+
+.loadlib 'pod_group'
+
+.sub '' :anon :load :init
+    load_bytecode 'PCT.pbc'
+    .local pmc parrotns, hllns, exports
+    parrotns = get_root_namespace ['parrot']
+    hllns = get_hll_namespace
+    exports = split ' ', 'PAST PCT PGE'
+    parrotns.'export_to'(hllns, exports)
+.end
+
+.include 'src/gen_grammar.pir'
+.include 'src/gen_actions.pir'
+
+.sub 'onload' :anon :load :init
+    $P0 = get_hll_global ['PCT'], 'HLLCompiler'
+    $P1 = $P0.'new'()
+    $P1.'language'('pod')
+    $P0 = get_hll_namespace ['Pod';'Grammar']
+    $P1.'parsegrammar'($P0)
+    $P0 = get_hll_namespace ['Pod';'Grammar';'Actions']
+    $P1.'parseactions'($P0)
+.end
+
+=item main(args :slurpy)  :main
+
+Start compilation by passing any command line C<args>
+to the Pod compiler.
+
+=cut
+
+.sub 'main' :main
+    .param pmc args
+
+    $P0 = compreg 'pod'
+    $P1 = $P0.'command_line'(args)
+.end
+
+.include 'src/gen_builtins.pir'
+
+=back
+
+=cut
+
+# Local Variables:
+#   mode: pir
+#   fill-column: 100
+# End:
+# vim: expandtab shiftwidth=4 ft=pir:
+

Added: trunk/languages/pod/src/parser/actions.pm
==============================================================================
--- (empty file)
+++ trunk/languages/pod/src/parser/actions.pm   Sun Jan 25 19:21:17 2009
@@ -0,0 +1,29 @@
+# $Id$
+
+=begin comments
+
+Pod::Grammar::Actions - ast transformations for Pod
+
+This file contains the methods that are used by the parse grammar
+to build the PAST representation of an Pod program.
+Each method below corresponds to a rule in F<src/parser/grammar.pg>,
+and is invoked at the point where C<{*}> appears in the rule,
+with the current match object as the first argument.  If the
+line containing C<{*}> also has a C<#= key> comment, then the
+value of the comment is passed as the second argument to the method.
+
+=end comments
+
+class Pod::Grammar::Actions;
+
+method TOP($/) {
+    make $( $/ );
+}
+
+# Local Variables:
+#   mode: cperl
+#   cperl-indent-level: 4
+#   fill-column: 100
+# End:
+# vim: expandtab shiftwidth=4:
+

Added: trunk/languages/pod/src/parser/grammar.pg
==============================================================================
--- (empty file)
+++ trunk/languages/pod/src/parser/grammar.pg   Sun Jan 25 19:21:17 2009
@@ -0,0 +1,180 @@
+# $Id$
+
+=begin overview
+
+This is the grammar for Pod written as a sequence of Perl 6 rules.
+
+=end overview
+
+grammar Pod::Grammar is PCT::Grammar;
+
+rule TOP {
+    [ <skipped>
+    | <pod_section>
+    ]*
+    [ $ || <panic: 'Pod syntax error'> ]
+    {*}
+}
+
+rule skipped {
+    ^^ <![=]> \N*
+    {*}
+}
+
+rule pod_section {
+    [ <pod_directive> | <?before '='> ]
+    <pod_sequence>*
+    <cut_directive>
+    {*}
+}
+
+rule pod_sequence {
+     <back_directive> {*}
+    | <item_directive> {*}
+    | <over_directive> {*}
+    | <heading>
+    | <begin_directive>
+    | <end_directive>
+    | <for_directive>
+    | <encoding_directive>
+#    | <literal_paragraph>
+    | <paragraph>
+}
+
+token pod_directive {
+    ^^ '=pod'
+    <.blank_line>
+    {*}
+}
+
+token cut_directive {
+    ^^ '=cut'
+    <.blank_line>?
+    {*}
+}
+
+token heading {
+    ^^
+    '=head'
+    <digit>
+    <block_title>?
+    <.blank_line>
+    {*}
+}
+
+token begin_directive {
+    ^^
+    '=begin'
+    <block_name>
+    <block_title>?
+    <.blank_line>
+    {*}
+}
+
+token end_directive {
+    ^^ '=end'
+    <block_name>
+    <.blank_line>
+    {*}
+}
+
+token for_directive {
+    ^^
+    '=for'
+    <block_name>
+    <block_title>?
+    <.blank_line>
+    {*}
+}
+
+token over_directive {
+    ^^
+    '=over'
+    [   <.pod_ws>
+        <digit>*
+        [ '.' <digit>+ ]?
+    ]?
+    <.blank_line>
+    {*}
+}
+
+token back_directive {
+    ^^ '=back'
+    <.blank_line>
+    {*}
+}
+
+token item_directive {
+    ^^
+    '=item'
+    [ <.pod_ws>
+        [ '*'
+        | <digit>* '.'?
+        ]
+    ]?
+    <.blank_line>
+    {*}
+}
+
+token encoding_directive {
+    ^^
+    '=encoding'
+    <block_name>
+    <.blank_line>
+    {*}
+}
+
+regex paragraph {
+    ^^
+    <!before '='>           # Not a directive
+    <formatted_text>
+    [ \n <formatted_text> ]*
+    <.blank_line>
+    {*}
+}
+
+token literal_paragraph {
+    [ <.pod_ws> <formatted_text> \n ]+
+    <.blank_line>
+    {*}
+}
+
+token block_name {
+    <.pod_ws>
+    <[_]+alpha+digit>+
+    {*}
+}
+
+token block_title {
+    <.pod_ws>
+    <formatted_text>
+    {*}
+}
+
+regex formatted_text {
+    [ <format_code>
+    | <alpha>
+    | <digit>
+    | <punct>
+    | <pod_ws>
+    ]+ 
+    {*}
+}
+
+regex format_code {
+    <[BCEFILSXZ]>
+    [ '<<<' <formatted_text> '>>>'
+    | '<<'  <formatted_text> '>>'
+    | '<'   <formatted_text> '>'
+    ]
+    {*}
+}
+
+#token ws { <!ww> \s+ }
+
+token pod_ws {
+    [ ' ' | \t ]+ # Literal spaces or tabs, no newlines or other whitespace
+    {*}
+}
+
+token blank_line { \n <.pod_ws>? \n }

Added: trunk/languages/pod/t/harness
==============================================================================
--- (empty file)
+++ trunk/languages/pod/t/harness       Sun Jan 25 19:21:17 2009
@@ -0,0 +1,13 @@
+#! perl
+
+# $Id$
+
+# pragmata
+use strict;
+use warnings;
+use 5.008;
+
+use lib qw( . lib ../lib ../../lib ../../lib );
+use Parrot::Test::Harness language => 'Pod',
+                          compiler => 'pod.pbc';
+

Added: trunk/languages/pod/test.pod
==============================================================================
--- (empty file)
+++ trunk/languages/pod/test.pod        Sun Jan 25 19:21:17 2009
@@ -0,0 +1,87 @@
+Foo
+Bar
+Baz
+
+=pod
+
+=over
+
+=item
+
+=back
+
+=cut
+
+Some blob of text.
+
+=over 4.5
+
+=item *
+
+=back
+
+=cut
+
+Another blob of text.
+
+=over 4
+
+=item 45
+
+=back
+
+=cut
+
+More text.
+
+=head1 Foo
+
+=cut
+
+
+
+
+=head2
+
+=cut
+
+=begin foo
+
+=end foo
+
+=cut
+
+=begin bar Some Text
+
+=end bar
+
+=cut
+
+=for baz
+
+=cut
+
+=head1 a paragraph
+
+Random paragraph.
+
+=cut
+
+=head2 more paragraphs
+
+Another random paragraph.
+Including a newline.
+
+=cut
+
+=head3 formatting codes
+
+Text containing E<formatting B<codes>>.
+
+A multi-bracket formating code containing C<<special -> characters>>.
+
+=cut
+
+=encoding utf8
+
+=cut

Reply via email to