hi,

I've done some more work on the grammar of Pynie.

This patch also includes the other patch I sent yesterday (so that one can be skipped).

This grammar is ALMOST done, but now I'm kinda stuck, because there is some infinite recursion :-( It would also need some cleanup, and of course testing. It looks like we need to rewrite some rules.

Another thing: Python allows for:

x = a not in b

and

x = a is not c

I fixed this with adding a "prefix:in" rule and a "infix:not" rule. However, this would also allow for:

x =  a not b

and

x = in c

Which is of course wrong. Maybe we need an extra operator type? (not infix, not prefix, but "multiop" or whatever, to allow for multi-token operators).

regards,
kjs



Index: languages/pynie/src/parser/Grammar.pg
===================================================================
--- languages/pynie/src/parser/Grammar.pg	(revision 17189)
+++ languages/pynie/src/parser/Grammar.pg	(working copy)
@@ -37,6 +37,11 @@
 token compound_stmt {
     | <if_stmt>
     | <while_stmt>
+    | <for_stmt>
+    | <try_stmt>
+    | <with_stmt>
+    | <funcdef>
+    | <classdef>
 }
 
 rule if_stmt {
@@ -50,32 +55,296 @@
     [ <'else'> <':'> <suite> ]?
 }
 
+rule for_stmt {
+    <'for'> <target_list> <'in'> <expression_list> <':'> <suite>
+    [ <'else'> <':'> <suite> ]?
+}
+
+rule try_stmt {
+    <try1_stmt> | <try2_stmt>
+}
+
+rule try1_stmt {
+    <'try'> <':'> <suite>
+    [ <'except'> [ <expression> [ <','> <target> ]? ]? <':'> <suite> ]+
+    [ <'else'> <':'> <suite> ]?
+    [ <'finally'> <':'> <suite> ]?
+}
+
+rule try2_stmt {
+    <'try'> <':'> <suite>
+    <'finally'> <':'> <suite>
+}
+
+rule with_stmt {
+    <'with'> <expression> [ <'as'> <target> ]? <':'> <suite>
+}
+
+rule funcdef {
+    <decorators>? <'def'> <funcname> <'('> <parameter_list>? <')'>
+    <':'> <suite>
+}
+
+
+rule decorators {
+    <decorator>+
+}
+
+rule decorator {
+    <'@'> <dotted_name> [ <'('> [ <argument_list> <','>? ]? <')'> ]? <?newline>
+}
+
+rule argument_list {
+    <positional_arguments> [ <','> <keyword_arguments> ]?
+      [ <','> <'*'> <expression> ]?
+      [ <','> <'**'> <expression> ]?
+    | <keyword_arguments>
+        [ <','> <'*'> <expression> ]?
+        [ <','> <'**'> <expression> ]?
+    | <','> <'*'> <expression>
+      [ <','> <'**'> <expression> ]?
+    | <','> <'**'> <expression>
+}
+
+rule positional_arguments {
+    <expression> [ <','> <expression> ]*
+}
+
+rule keyword_arguments {
+    <keyword_item> [ <','> <keyword_item> ]*
+}
+
+rule keyword_item {
+    <identifier> <'='> <expression>
+}
+
+rule dotted_name {
+    <identifier> [ <'.'> <identifier> ]*
+}
+
+rule funcname {
+    <identifier>
+}
+
+rule parameter_list {
+    [ <defparameter> <','> ]*
+    [
+      <'*'> <identifier> [ <','> <'**'> <identifier> ]?
+    | <'**'> <identifier>
+    | <defparameter> <','>?
+    ]
+}
+
+rule defparameter {
+    <parameter> [ <'='> <expression> ]?
+}
+
+rule sublist {
+    <parameter> [ <','> <parameter> ]* <','>?
+}
+
+rule parameter {
+    <identifier> | <'('> <sublist> <')'>
+}
+
+
+rule classdef {
+    <'class'> <classname> <inheritance>? <':'> <suite>
+}
+
+rule classname {
+    <identifier>
+}
+
+rule inheritance {
+    <'('> <expression_list>? <')'>
+}
+
+rule or_test {
+    <and_test> | <or_test> <'or'> <and_test>
+}
+
+rule and_test {
+    <not_test> | <and_test> <'and'> <not_test>
+}
+
+rule not_test {
+    <comparison> | <'not'> <not_test>
+}
+
+rule lambda_form {
+    <'lambda'> <parameter_list>? <':'> <expression>
+}
+
 token simple_stmt {
+    | <assert_stmt>
+    | <assignment_stmt>
+    | <augmented_assignment_stmt>
+    | <expression_stmt>
+    | <pass_stmt>
+    | <del_stmt>
     | <print_stmt>
-    | <assignment_stmt>
-    | <expression>
+    | <return_stmt>
+    | <yield_stmt>
+    | <raise_stmt>
+    | <break_stmt>
+    | <continue_stmt>
+    | <import_stmt>
+    | <global_stmt>
+    | <exec_stmt>
 }
 
+rule expression_stmt {
+    <expression_list>
+}
+
+rule assert_stmt {
+    <'assert'> <expression> [ <','> <expression> ]?
+}
+
 rule assignment_stmt { [ <target_list> <'='> ]+ <expression_list> }
 
-rule target_list { <target> [ <','> <target> ]* (<','>)? }
+rule augmented_assignment_stmt { <target> <augop> <expression_list> }
 
-token target { <identifier> }
+rule target_list { <target> [ <','> <target> ]* <','>? }
 
-token identifier { [ <?alpha> | <'_'> ] \w* }
+rule target {
+  <identifier>
+  | <'('> <target_list> <')'>
+  | <'['> <target_list> <']'>
+  | <attributeref>
+  | <subscription>
+  | <slicing>
+}
 
+rule attributeref {
+		<primary> <'.'> <identifier>
+}
+
+rule subscription {
+		<primary> <'['> <expression_list> <']'>
+}
+
+rule slicing {
+		<simple_slicing> | <extended_slicing>
+}
+
+rule simple_slicing {
+		<primary> <'['> <short_slice> <']'>
+}
+
+rule extended_slicing {
+		<primary> <'['> <slice_list> <']'>
+}
+
+rule slice_list {
+		<slice_item> [ <','> <slice_item> ]* <','>?
+}
+
+rule slice_item {
+		<expression> | <proper_slice> | <'...'>
+}
+
+rule proper_slice {
+		<short_slice> | <long_slice>
+}
+
+rule short_slice {
+		<expression>? <':'> <expression>?		
+}
+
+rule long_slice {
+		<short_slice> <':'> <expression>?
+}
+
+token identifier { <!reserved> [ <?alpha> | <'_'> ] \w* }
+
+token name { <!reserved> <[a..z]> [ <alpha> | <'_'> ]* }
+
 rule print_stmt {
     <'print'> [ <expression> [ <','> <expression> ]* (<','>?) ]?
 }
 
+rule pass_stmt {
+    <'pass'>
+}
 
+rule del_stmt {
+    <'del'> <target_list>
+}
+
+rule return_stmt {
+    <'return'> <expression_list>?
+}
+
+rule yield_stmt {
+    <'yield'> <expression_list>
+}
+
+rule break_stmt {
+    <'break'>
+}
+
+rule continue_stmt {
+    <'continue'>
+}
+
+rule raise_stmt {
+    <'raise'> [ <expression> [ <','> <expression>
+    [ <','> <expression> ]? ]? ]?
+}
+
+rule global_stmt {
+    <'global'> <identifier> [ <','> <identifier> ]*
+}
+
+rule exec_stmt {
+    <'exec'> <expression>
+    [ <'in'> <expression> [ <','> <expression> ]? ]?
+}
+
+rule import_stmt {
+    | <'import'> <module> <import_alias>?
+      [ <','> <module> <import_alias>? ]*
+    | <import_module> <identifier> <import_alias>?
+      [ <','> <identifier> <import_alias>? ]*
+    | <import_module> <'('> <identifier> <import_alias>?
+      [ <','> <identifier> <import_alias>? ]* <','>? <')'>
+    | <import_module> <'*'>
+}
+
+rule import_module {
+    <'from'> <module> <'import'>
+}
+
+rule import_alias {
+    <'as'> <name>
+}
+
+rule module {
+    [ <identifier> <'.'> ]* <identifier>
+}
+
 token literal {
     | <stringliteral>
     | <integer>
+    | <longinteger>
+    | <floatnumber>
+    | <imagnumber>
 }
 
-token integer { \d+ }
+token integer { 
+		| <[1..9]> \d* | <'0'>
+		| <'0'> <[0..7]>+
+		| <'0'> <[xX]> [ \d | <[a..f]> | <[A..F]> ]+
+}
 
+token intpart { \d+ }
+
+token longinteger { <integer> <[lL]> }
+
+token imagnumber { [ <floatnumber> | <intpart> ] <[jJ]> }
+
 token floatnumber {
     | \d* <'.'> \d+
     | \d+ <'.'>
@@ -83,25 +352,133 @@
 }
 
 token stringliteral {
-    | " ( [ \\ . | <-["]> ]* ) "
-    | ' ( [ \\ . | <-[']> ]* ) '
+    <stringprefix>?
+    [ <longstring> | <shortstring> ]
 }
 
-rule expression_list { <expression> [ , <expression> ]* }
+token shortstring {
+    | " ( [ \\ . | <-["\n]> ]* ) "
+    | ' ( [ \\ . | <-['\n]> ]* ) '
+}
 
+regex longstring {
+    | """ ( [ \\ . | <-["""]> ]* ) """
+    | ''' ( [ \\ . | <-[''']> ]* ) '''
+}
+
+
+token stringprefix {
+    <'r'> | <'u'> | <'ur'> | <'R'> | <'U'> | <'UR'> | <'Ur'> | <'uR'>
+}
+
+rule expression_list { <expression> [ , <expression> ]* <','>? }
+
 rule list_display { <'['> <listmaker>? <']'> }
 
-rule listmaker { <expression> [ <','> <expression> ]* (<','>)? }
+rule listmaker { 
+		<expression> [ <list_for> 
+		| [ <','> <expression> ]* <','>? ] 
+}
 
+rule list_iter {
+		<list_for> | <list_if>
+}
 
-##  This identifies operators for the bottom-up parser
+rule list_for {
+		<'for'> <expression_list> <'in'> <testlist>
+		<list_iter>?
+}
 
+rule test {
+		<or_test> | <lambda_form>
+}
+
+rule testlist {
+		<test> [ <','> <test> ]* <','>?
+}
+
+rule list_if {
+		<'if'> <test> <list_iter>?
+}
+
+token augop {
+    | <'+='>  | <'-='>  | <'*='> | <'/='> | <'\%='> | <'**='>
+    | <'>>='> | <'<<='> | <'&='> | <'^='> | <'|='>
+}
+
+
+rule call {
+    <primary> <'('> [ <argument_list>? <','>? ]? <')'>
+    # XXX
+}
+
+rule primary {
+    | <atom>
+    #| <attributeref>
+    #| <subscription>
+    #| <slicing>
+    #| <call>
+}
+
+rule atom {
+		<identifier> | <literal> | <enclosure>
+}
+
+rule parenth_form {
+		<'('> <expression_list>? <')'>
+}
+
+rule enclosure {
+		| <parenth_form> 
+		| <list_display> 
+		| <generator_expression> 
+		| <dict_display>
+		| <string_conversion>
+}
+
+rule generator_expression {
+		<'('> <test> <genexpr_for> <')'>
+}
+
+rule genexpr_for {
+		<'for'> <expression_list> <'in'> <test> 
+		<genexpr_iter>?
+}
+
+rule genexpr_iter {
+		<genexpr_for> | <genexpr_if>
+}
+
+rule genexpr_if {
+		<'if'> <test> <genexpr_iter>?
+}
+
+rule dict_display {
+		<'{'> <key_datum_list>? <'}'>
+}
+
+rule key_datum_list {
+		<key_datum> [ <','> <key_datum> ]* <','>?
+}
+
+rule key_datum {
+		<expression> <':'> <expression>
+}
+
+rule string_conversion {
+		<'`'> <expression_list> <'`'>
+}
+
+## This identifies operators for the bottom-up parser
+## See operator precedence table at http://docs.python.org/ref/summary.html
+
 token 'expression' is optable { ... }
 
 proto 'term:'      is precedence('=')
     is parsed(&term)
     { ... }
 
+# should this be primary??
 token term {
     | <literal>
     | <identifier>
@@ -110,7 +487,11 @@
 
 proto 'infix:**'   is looser('term:')  {...}
 
-proto 'infix:*'    is looser('infix:**')
+proto 'prefix:~'    is looser('infix:**') { ... }
+proto 'prefix:+'    is looser('prefix:~') { ... }
+proto 'prefix:-'    is equiv('prefix:+') { ... }
+
+proto 'infix:*'    is looser('prefix:+')
     is pirop('mul')
     { ... }
 
@@ -130,11 +511,52 @@
     is pirop('sub')
     { ... }
 
-proto 'infix:=='   is looser('infix:+')  { ... }
+
+
+proto 'infix:<<' is looser('infix:+') { ... }
+proto 'infix:>>' is equiv('infix:<<') { ... }
+proto 'infix:&' is looser('infix:<<') { ... }
+proto 'infix:^' is looser('infix:&') { ... }
+proto 'infix:|' is looser('infix:^') { ... }
+
+proto 'infix:=='   is looser('infix:|')  { ... }
 proto 'infix:!='   is equiv('infix:==')  { ... }
 proto 'infix:<='   is equiv('infix:==')  { ... }
 proto 'infix:>='   is equiv('infix:==')  { ... }
 proto 'infix:<'    is equiv('infix:==')  { ... }
 proto 'infix:>'    is equiv('infix:==')  { ... }
 
+# "is not" infix operator is handled by combining 'prefix:not' and 'infix:is'
+proto 'infix:is' is looser('infix:==') { ... }
+proto 'infix:in' is looser('infix:is') { ... }
+
+proto 'prefix:not' is looser('infix:in') { ... }
+
+# to handle "not in" as infix op, combine 'prefix:in' and 'infix:not'
+# this also allows for "x = 1 not 2", which is of course Wrong.
+# Fix this later (new category of operator??)
+#
+proto 'infix:not' is looser('infix:in') { ... }
+proto 'prefix:in' is looser('infix:is') { ... }
+
+proto 'infix:and' is looser('prefix:not') { ... }
+proto 'infix:or' is looser('infix:and') { ... }
+
+
+
+
+## Python reserved words and keywords
+
+token reserved {
+    <keyword> | <'None'>
+}
+
+token keyword {
+    [ <'and'>   | <'assert'> | <'break'>  | <'class'>  | <'continue'> | <'def'>
+    | <'del'>   | <'elif'>   | <'else'>   | <'except'> | <'exec'>     | <'finally'>
+    | <'for'>   | <'from'>   | <'global'> | <'if'>     | <'import'>   | <'in'>
+    | <'is'>    | <'lambda'> | <'not'>    | <'or'>     | <'pass'>     | <'print'>
+    | <'raise'> | <'return'> | <'try'>    | <'while'>  | <'with'>     | <'yield'> ] \b
+}
+
 ## vim: expandtab sw=4

Reply via email to