hi,
attached an updated grammar for pynie/python. The PAST stuff is still
broken, have to work a bit on that.
THis grammar does part of the expression parsing recursive descent, and
from comparisons (==, != etc) to the power operator (**) through the
operator table.
regards,
kjs
## $Id: grammar_rules.pg 17096 2007-02-20 20:20:48Z paultcochrane $
## TITLE
## Pynie::Grammar -- a grammar for parsing Python
##
## DESCRIPTION
##
## These are a set of rules for parsing programs written in Python.
## Many of the rules are derived from the "Python Language Reference
## Manual", version 2.5 by Guido van Rossum (Fred L. Drake, Jr., Editor).
## Available online at http://docs.python.org/ref/ and
## http://docs.python.org/ref/grammar.txt .
grammar Pynie::Grammar ;
## C<TOP> defines the top level rule to the HLLCompiler.
token TOP { <?indent_zero> <file_input> }
token newline { [ <?ws> \n ]+ }
token ws { \h* [ <'#'> \N* ]? }
token file_input { ^ [ <?newline> | <statement> ]* [ $ | <?die: syntax_error> ]
}
token suite {
| <stmt_list> <?newline>
| <?newline> <?indent> <statement>
[ <?indent_same> <statement> ]*
[ <?dedent> | <?die: IndentationError: unindent does not match any
outer indentation level> ]
}
token statement {
| <compound_stmt>
| <stmt_list> <?newline>
}
token stmt_list { <simple_stmt> [ <';'> <simple_stmt> ]* <';'>? }
token compound_stmt {
| <if_stmt>
| <while_stmt>
| <for_stmt>
| <try_stmt>
| <with_stmt>
| <funcdef>
| <classdef>
}
rule if_stmt {
<'if'> <expression> <':'> <suite>
[ <'elif'> <expression> <':'> <suite> ]*
[ <'else'> <':'> <suite> ]?
}
rule while_stmt {
<'while'> <expression> <':'> <suite>
[ <'else'> <':'> <suite> ]?
}
rule for_stmt {
<'for'> <target_list> <'in'> <expression_list> <':'> <suite>
[ <'else'> <':'> <suite> ]?
}
rule try_stmt {
<try1_stmt> | <try2_stmt>
}
rule try1_stmt {
<'try'> <':'> <suite>
[ <'except'> [ <expression> [ <','> <target> ]? ]? <':'> <suite> ]+
[ <'else'> <':'> <suite> ]?
[ <'finally'> <':'> <suite> ]?
}
rule try2_stmt {
<'try'> <':'> <suite>
<'finally'> <':'> <suite>
}
rule with_stmt {
<'with'> <expression> [ <'as'> <target> ]? <':'> <suite>
}
rule funcdef {
<decorators>? <'def'> <funcname> <'('> <parameter_list>? <')'>
<':'> <suite>
}
rule decorators {
<decorator>+
}
rule decorator {
<'@'> <dotted_name> [ <'('> [ <argument_list> <','>? ]? <')'> ]? <?newline>
}
rule argument_list {
| <positional_arguments> [ <','> <keyword_arguments> ]?
[ <','> <'*'> <expression> ]?
[ <','> <'**'> <expression> ]?
| <keyword_arguments>
[ <','> <'*'> <expression> ]?
[ <','> <'**'> <expression> ]?
| <'*'> <expression>
[ <','> <'**'> <expression> ]?
| <'**'> <expression>
}
rule positional_arguments {
<expression> [ <','> <expression> ]*
}
rule keyword_arguments {
<keyword_item> [ <','> <keyword_item> ]*
}
rule keyword_item {
<identifier> <'='> <expression>
}
rule dotted_name {
<identifier> [ <'.'> <identifier> ]*
}
rule funcname {
<identifier>
}
rule parameter_list {
[ <defparameter> <','> ]*
[ <'*'> <identifier> [ <','> <'**'> <identifier> ]?
| <'**'> <identifier>
| <defparameter> <','>?
]
}
rule defparameter {
<parameter> [ <'='> <expression> ]?
}
rule sublist {
<parameter> [ <','> <parameter> ]* <','>?
}
rule parameter {
<identifier> | <'('> <sublist> <')'>
}
rule classdef {
<'class'> <classname> <inheritance>? <':'> <suite>
}
rule classname {
<identifier>
}
rule inheritance {
<'('> <expression_list>? <')'>
}
rule lambda_form {
<'lambda'> <parameter_list>? <':'> <expression>
}
token simple_stmt {
| <assert_stmt>
| <assignment_stmt>
| <augmented_assignment_stmt>
| <expression_stmt>
| <pass_stmt>
| <del_stmt>
| <print_stmt>
| <return_stmt>
| <yield_stmt>
| <raise_stmt>
| <break_stmt>
| <continue_stmt>
| <import_stmt>
| <global_stmt>
| <exec_stmt>
}
rule expression_stmt { <expression_list>
}
rule assert_stmt {
<'assert'> <expression> [ <','> <expression> ]?
}
rule assignment_stmt { [ <target_list> <'='> ]+ <expression_list> }
rule augmented_assignment_stmt { <target> <augop> <expression_list> }
rule target_list { <target> [ <','> <target> ]* <','>? }
rule target {
| <identifier>
| <'('> <target_list> <')'>
| <'['> <target_list> <']'>
| <primary> <attributeref>
| <primary> <subscription>
| <primary> <slicing>
}
rule call {
| <'('> [ <argument_list>? <','>? ]? <')'>
| <'('> [ <argument_list> <','>? | <test> <genexpr_for> ] <')'>
}
rule attributeref {
<'.'> <identifier>
}
rule subscription {
<'['> <expression_list> <']'>
}
rule slicing {
<simple_slicing> | <extended_slicing>
}
rule simple_slicing {
<'['> <short_slice> <']'>
}
rule extended_slicing {
<'['> <slice_list> <']'>
}
rule slice_list {
<slice_item> [ <','> <slice_item> ]* <','>?
}
rule slice_item {
<expression> | <proper_slice> | <'...'>
}
rule proper_slice {
<short_slice> | <long_slice>
}
rule short_slice {
<expression>? <':'> <expression>?
}
rule long_slice {
<short_slice> <':'> <expression>?
}
token identifier { <!reserved> [ <?alpha> | <'_'> ] \w* }
token name { <!reserved> <[a..z]> [ <alpha> | <'_'> ]* }
rule print_stmt {
<'print'> [ <expression> [ <','> <expression> ]* (<','>?) ]?
}
rule pass_stmt {
<'pass'>
}
rule del_stmt {
<'del'> <target_list>
}
rule return_stmt {
<'return'> <expression_list>?
}
rule yield_stmt {
<'yield'> <expression_list>
}
rule break_stmt {
<'break'>
}
rule continue_stmt {
<'continue'>
}
rule raise_stmt {
<'raise'> [ <expression> [ <','> <expression>
[ <','> <expression> ]? ]? ]?
}
rule global_stmt {
<'global'> <identifier> [ <','> <identifier> ]*
}
rule exec_stmt {
<'exec'> <expression>
[ <'in'> <expression> [ <','> <expression> ]? ]?
}
rule import_stmt {
| <'import'> <module> <import_alias>?
[ <','> <module> <import_alias>? ]*
| <import_module> <identifier> <import_alias>?
[ <','> <identifier> <import_alias>? ]*
| <import_module> <'('> <identifier> <import_alias>?
[ <','> <identifier> <import_alias>? ]* <','>? <')'>
| <import_module> <'*'>
}
rule import_module {
<'from'> <module> <'import'>
}
rule import_alias {
<'as'> <name>
}
rule module {
[ <identifier> <'.'> ]* <identifier>
}
token literal {
| <stringliteral>
| <integer>
| <longinteger>
| <floatnumber>
| <imagnumber>
}
token integer {
| <[1..9]> \d* | <'0'>
| <'0'> <[0..7]>+
| <'0'> <[xX]> [ \d | <[a..f]> | <[A..F]> ]+
}
token intpart { \d+ }
token longinteger { <integer> <[lL]> }
token imagnumber { [ <floatnumber> | <intpart> ] <[jJ]> }
token floatnumber {
| \d* <'.'> \d+
| \d+ <'.'>
| [ <'.'>? \d+ ] <[eE]> <[+\-]> \d+
}
token stringliteral {
<stringprefix>?
[ <longstring> | <shortstring> ]
}
token shortstring {
| " ( [ \\ . | <-["\n]> ]* ) "
| ' ( [ \\ . | <-['\n]> ]* ) '
}
regex longstring {
| """ ( [ \\ . | . ]* ) """
| ''' ( [ \\ . | . ]* ) '''
}
token stringprefix {
<'r'> | <'u'> | <'ur'> | <'R'> | <'U'> | <'UR'> | <'Ur'> | <'uR'>
}
rule expression_list { <expression> [ , <expression> ]* <','>? }
rule list_display { <'['> <listmaker>? <']'> }
rule listmaker {
<expression>
[ <list_for>
| [ <','> <expression> ]* <','>?
]
}
rule list_iter {
<list_for> | <list_if>
}
rule list_for {
<'for'> <expression_list> <'in'> <testlist>
<list_iter>?
}
rule test {
<or_test> | <lambda_form>
}
rule testlist {
<test> [ <','> <test> ]* <','>?
}
rule list_if {
<'if'> <test> <list_iter>?
}
token augop {
| <'+='> | <'-='> | <'*='> | <'/='> | <'\%='> | <'**='>
| <'>>='> | <'<<='> | <'&='> | <'^='> | <'|='>
}
rule primary {
<atom> <postop>*
}
rule postop {
| <attributeref>
| <subscription>
| <slicing>
| <call>
}
rule atom {
<identifier> | <literal> | <enclosure>
}
rule parenth_form {
<'('> <expression_list>? <')'>
}
rule enclosure {
| <parenth_form>
| <list_display>
| <generator_expression>
| <dict_display>
| <string_conversion>
}
rule generator_expression {
<'('> <test> <genexpr_for> <')'>
}
rule genexpr_for {
<'for'> <expression_list> <'in'> <test>
<genexpr_iter>?
}
rule genexpr_iter {
<genexpr_for> | <genexpr_if>
}
rule genexpr_if {
<'if'> <test> <genexpr_iter>?
}
rule dict_display {
<'{'> <key_datum_list>? <'}'>
}
rule key_datum_list {
<key_datum> [ <','> <key_datum> ]* <','>?
}
rule key_datum {
<expression> <':'> <expression>
}
rule string_conversion {
<'`'> <expression_list> <'`'>
}
# Evaluation order of expressions is based on the table as provided in
# http://docs.python.org/ref/summary.html
#
#
rule expression {
| <or_test> [ <'if'> <or_test> <'else'> <test> ]?
| <lambda_form>
}
rule or_test {
<and_test> [ <'or'> <and_test> ]*
}
rule and_test {
<not_test> [ <'and'> <not_test> ]*
}
rule not_test {
| <'not'> <not_test>
| <not_in_test>
}
rule not_in_test {
<is_not_test> [ <'not'>? <'in'> <is_not_test> ]*
}
rule is_not_test {
<comparison> [ <'is'> <'not'>? <comparison> ]*
}
rule 'comparison' is optable { ... }
proto 'term:' is precedence('=') is parsed(&primary) { ... }
proto 'infix:**' is looser('term:') { ... }
proto 'prefix:~' is looser('infix:**') { ... }
proto 'prefix:+' is looser('prefix:~') { ... }
proto 'prefix:-' is equiv('prefix:+') { ... }
proto 'infix:*' is looser('prefix:+') is pirop('mul') { ... }
proto 'infix:/' is equiv('infix:*') is pirop('div') { ... }
proto 'infix://' is equiv('infix:*') { ... }
proto 'infix:%' is equiv('infix:*') is pirop('mod') { ... }
proto 'infix:+' is looser('infix:*') is pirop('add') { ... }
proto 'infix:-' is equiv('infix:+') is pirop('sub') { ... }
proto 'infix:<<' is looser('infix:+') { ... }
proto 'infix:>>' is equiv('infix:<<') { ... }
proto 'infix:&' is looser('infix:<<') { ... }
proto 'infix:^' is looser('infix:&') { ... }
proto 'infix:|' is looser('infix:^') { ... }
proto 'infix:==' is looser('infix:|') { ... }
proto 'infix:!=' is equiv('infix:==') { ... }
proto 'infix:<=' is equiv('infix:==') { ... }
proto 'infix:>=' is equiv('infix:==') { ... }
proto 'infix:<' is equiv('infix:==') { ... }
proto 'infix:>' is equiv('infix:==') { ... }
## Python reserved words and keywords
token reserved {
<keyword> | <'None'>
}
token keyword {
[ <'and'> | <'assert'> | <'break'> | <'class'> | <'continue'> | <'def'>
| <'del'> | <'elif'> | <'else'> | <'except'> | <'exec'> |
<'finally'>
| <'for'> | <'from'> | <'global'> | <'if'> | <'import'> | <'in'>
| <'is'> | <'lambda'> | <'not'> | <'or'> | <'pass'> |
<'print'>
| <'raise'> | <'return'> | <'try'> | <'while'> | <'with'> |
<'yield'> | <'nor'> ] \b
}
## vim: expandtab sw=4