[Pynie] Grammar mostly done, but help needed

Klaas-Jan Stol Wed, 28 Feb 2007 02:10:48 -0800

hi,

attached my current version of languages/pynie's grammar.

I can't get the binary operators "is not" and "not in" workingcorrectly. The problem seems to be that "is parsed()" does not work.When I specify a rule in the is parsed() annotation, and in the rule Iemit some message (just calling a sub that prints something), the ruleis never called.


what works:
x = 1 is 2
x = 1 in 2 # does not make sense, but just for parsing


what does not work (correctly)

x = 1 is not 2 ***** This DOES parse, but not correctly, on inspectingthe parse tree, you can see that the "prefix:not" operator is used,instead of the "isnot" rule.

x = 1 not in 2 This does not parse at all. This probably also has to dowith "prefix:not"; after that 'not' it expects a primary, not "in".

I'm not sure about how is parsed() should be working. It is ok for"term:", but I can't understand why the other is parsed() annotationdoes not work.Also, if I specify an "is parsed()" for the "in" or "is" operator, thoseoperators don't work anymore. This is another hint that "is parsed" doesnot work.


Any new insights would be appreciated.
regards,
kjs

## $Id: grammar_rules.pg 17096 2007-02-20 20:20:48Z paultcochrane $

## TITLE
##     Pynie::Grammar -- a grammar for parsing Python
##
## DESCRIPTION
##
## These are a set of rules for parsing programs written in Python.
## Many of the rules are derived from the "Python Language Reference
## Manual", version 2.5 by Guido van Rossum (Fred L. Drake, Jr., Editor).
## Available online at http://docs.python.org/ref/ and
## http://docs.python.org/ref/grammar.txt .

grammar Pynie::Grammar ;

## C<TOP> defines the top level rule to the HLLCompiler.
token TOP { <?indent_zero> <file_input> }

token newline { [ <?ws> \n ]+ }

token ws { \h* [ <'#'> \N* ]? }

token file_input { ^ [ <?newline> | <statement> ]* [ $ | <?die: syntax_error> ] 
}

token suite {
    | <stmt_list> <?newline>
    | <?newline> <?indent> <statement>
        [ <?indent_same> <statement> ]*
        [ <?dedent> | <?die: IndentationError: unindent does not match any 
outer indentation level> ]
}

token statement {
    | <compound_stmt>
    | <stmt_list> <?newline>
}

token stmt_list { <simple_stmt> [ <';'> <simple_stmt> ]* <';'>? }

token compound_stmt {
    | <if_stmt>
    | <while_stmt>
    | <for_stmt>
    | <try_stmt>
    | <with_stmt>
    | <funcdef>
    | <classdef>
}

rule if_stmt {
    <'if'> <expression> <':'> <suite>
    [ <'elif'> <expression> <':'> <suite> ]*
    [ <'else'> <':'> <suite> ]?
}

rule while_stmt {
    <'while'> <expression> <':'> <suite>
    [ <'else'> <':'> <suite> ]?
}

rule for_stmt {
    <'for'> <target_list> <'in'> <expression_list> <':'> <suite>
    [ <'else'> <':'> <suite> ]?
}

rule try_stmt {
    <try1_stmt> | <try2_stmt>
}

rule try1_stmt {
    <'try'> <':'> <suite>
    [ <'except'> [ <expression> [ <','> <target> ]? ]? <':'> <suite> ]+
    [ <'else'> <':'> <suite> ]?
    [ <'finally'> <':'> <suite> ]?
}

rule try2_stmt {
    <'try'> <':'> <suite>
    <'finally'> <':'> <suite>
}

rule with_stmt {
    <'with'> <expression> [ <'as'> <target> ]? <':'> <suite>
}

rule funcdef {
    <decorators>? <'def'> <funcname> <'('> <parameter_list>? <')'>
    <':'> <suite>
}


rule decorators {
    <decorator>+
}

rule decorator {
    <'@'> <dotted_name> [ <'('> [ <argument_list> <','>? ]? <')'> ]? <?newline>
}

rule argument_list {
    | <positional_arguments> [ <','> <keyword_arguments> ]?
        [ <','> <'*'> <expression> ]?
        [ <','> <'**'> <expression> ]?
    | <keyword_arguments>
        [ <','> <'*'> <expression> ]?
        [ <','> <'**'> <expression> ]?
    | <'*'> <expression>
        [ <','> <'**'> <expression> ]?
    | <'**'> <expression>
}

rule positional_arguments {
    <expression> [ <','> <expression> ]*
}

rule keyword_arguments {
    <keyword_item> [ <','> <keyword_item> ]*
}

rule keyword_item {
    <identifier> <'='> <expression>
}

rule dotted_name {
    <identifier> [ <'.'> <identifier> ]*
}

rule funcname {
    <identifier>
}

rule parameter_list {
    [ <defparameter> <','> ]*
    [ <'*'> <identifier> [ <','> <'**'> <identifier> ]?
    | <'**'> <identifier>
    | <defparameter> <','>?
    ]
}

rule defparameter {
    <parameter> [ <'='> <expression> ]?
}

rule sublist {
    <parameter> [ <','> <parameter> ]* <','>?
}

rule parameter {
    <identifier> | <'('> <sublist> <')'>
}


rule classdef {
    <'class'> <classname> <inheritance>? <':'> <suite>
}

rule classname {
    <identifier>
}

rule inheritance {
    <'('> <expression_list>? <')'>
}


rule lambda_form {
    <'lambda'> <parameter_list>? <':'> <expression>
}

token simple_stmt {
    | <assert_stmt>
    | <assignment_stmt>
    | <augmented_assignment_stmt>
    | <expression_stmt>
    | <pass_stmt>
    | <del_stmt>
    | <print_stmt>
    | <return_stmt>
    | <yield_stmt>
    | <raise_stmt>
    | <break_stmt>
    | <continue_stmt>
    | <import_stmt>
    | <global_stmt>
    | <exec_stmt>
}

rule expression_stmt {    <expression_list>
}

rule assert_stmt {
    <'assert'> <expression> [ <','> <expression> ]?
}

rule assignment_stmt { [ <target_list> <'='> ]+ <expression_list> }

rule augmented_assignment_stmt { <target> <augop> <expression_list> }

rule target_list { <target> [ <','> <target> ]* <','>? }

rule target {
    | <identifier>
    | <'('> <target_list> <')'>
    | <'['> <target_list> <']'>
    | <primary> <attributeref>
    | <primary> <subscription>
    | <primary> <slicing>
}

rule call {
    | <'('> [ <argument_list>? <','>? ]? <')'>
    | <'('> [ <argument_list> <','>? | <test> <genexpr_for> ] <')'>
}

rule attributeref {
    <'.'> <identifier>
}

rule subscription {
    <'['> <expression_list> <']'>
}

rule slicing {
    <simple_slicing> | <extended_slicing>
}

rule simple_slicing {
    <'['> <short_slice> <']'>
}

rule extended_slicing {
    <'['> <slice_list> <']'>
}

rule slice_list {
    <slice_item> [ <','> <slice_item> ]* <','>?
}

rule slice_item {
    <expression> | <proper_slice> | <'...'>
}

rule proper_slice {
    <short_slice> | <long_slice>
}

rule short_slice {
    <expression>? <':'> <expression>?
}

rule long_slice {
    <short_slice> <':'> <expression>?
}

token identifier { <!reserved> [ <?alpha> | <'_'> ] \w* }

token name { <!reserved> <[a..z]> [ <alpha> | <'_'> ]* }

rule print_stmt {
    <'print'> [ <expression> [ <','> <expression> ]* (<','>?) ]?
}

rule pass_stmt {
    <'pass'>
}

rule del_stmt {
    <'del'> <target_list>
}

rule return_stmt {
    <'return'> <expression_list>?
}

rule yield_stmt {
    <'yield'> <expression_list>
}

rule break_stmt {
    <'break'>
}

rule continue_stmt {
    <'continue'>
}

rule raise_stmt {
    <'raise'> [ <expression> [ <','> <expression>
    [ <','> <expression> ]? ]? ]?
}

rule global_stmt {
    <'global'> <identifier> [ <','> <identifier> ]*
}

rule exec_stmt {
    <'exec'> <expression>
    [ <'in'> <expression> [ <','> <expression> ]? ]?
}

rule import_stmt {
    | <'import'> <module> <import_alias>?
      [ <','> <module> <import_alias>? ]*
    | <import_module> <identifier> <import_alias>?
      [ <','> <identifier> <import_alias>? ]*
    | <import_module> <'('> <identifier> <import_alias>?
      [ <','> <identifier> <import_alias>? ]* <','>? <')'>
    | <import_module> <'*'>
}

rule import_module {
    <'from'> <module> <'import'>
}

rule import_alias {
    <'as'> <name>
}

rule module {
    [ <identifier> <'.'> ]* <identifier>
}

token literal {
    | <stringliteral>
    | <integer>
    | <longinteger>
    | <floatnumber>
    | <imagnumber>
}

token integer {
    | <[1..9]> \d* | <'0'>
    | <'0'> <[0..7]>+
    | <'0'> <[xX]> [ \d | <[a..f]> | <[A..F]> ]+
}

token intpart { \d+ }

token longinteger { <integer> <[lL]> }

token imagnumber { [ <floatnumber> | <intpart> ] <[jJ]> }

token floatnumber {
    | \d* <'.'> \d+
    | \d+ <'.'>
    | [ <'.'>? \d+ ] <[eE]> <[+\-]> \d+
}

token stringliteral {
    <stringprefix>?
    [ <longstring> | <shortstring> ]
}

token shortstring {
    | " ( [ \\ . | <-["\n]> ]* ) "
    | ' ( [ \\ . | <-['\n]> ]* ) '
}

regex longstring {
    | """ ( [ \\ . | . ]* ) """
    | ''' ( [ \\ . | . ]* ) '''
}


token stringprefix {
    <'r'> | <'u'> | <'ur'> | <'R'> | <'U'> | <'UR'> | <'Ur'> | <'uR'>
}

rule expression_list { <expression> [ , <expression> ]* <','>? }

rule list_display { <'['> <listmaker>? <']'> }

rule listmaker {
    <expression>
    [ <list_for>
    | [ <','> <expression> ]* <','>?
    ]
}

rule list_iter {
    <list_for> | <list_if>
}

rule list_for {
    <'for'> <expression_list> <'in'> <testlist>
    <list_iter>?
}

rule test {
    <or_test> | <lambda_form>
}

rule testlist {
    <test> [ <','> <test> ]* <','>?
}

rule list_if {
    <'if'> <test> <list_iter>?
}

token augop {
    | <'+='>  | <'-='>  | <'*='> | <'/='> | <'\%='> | <'**='>
    | <'>>='> | <'<<='> | <'&='> | <'^='> | <'|='>
}

rule primary {
    <atom> <postop>*
}

rule postop {
    | <attributeref>
    | <subscription>
    | <slicing>
    | <call>
}


rule atom {
    <identifier> | <literal> | <enclosure>
}

rule parenth_form {
    <'('> <expression_list>? <')'>
}

rule enclosure {
    | <parenth_form>
    | <list_display>
    | <generator_expression>
    | <dict_display>
    | <string_conversion>
}

rule generator_expression {
    <'('> <test> <genexpr_for> <')'>
}

rule genexpr_for {
    <'for'> <expression_list> <'in'> <test>
    <genexpr_iter>?
}

rule genexpr_iter {
    <genexpr_for> | <genexpr_if>
}

rule genexpr_if {
    <'if'> <test> <genexpr_iter>?
}

rule dict_display {
    <'{'> <key_datum_list>? <'}'>
}

rule key_datum_list {
    <key_datum> [ <','> <key_datum> ]* <','>?
}

rule key_datum {
    <expression> <':'> <expression>
}

rule string_conversion {
    <'`'> <expression_list> <'`'>
}

rule expression {
    | <or_test> [ <'if'> <or_test> <'else'> <test> ]?
    | <lambda_form>
}

rule isnot {
    <'is'> <'not'>
}

rule notin {
    <'not'> <'in'>
}


rule 'or_test' is optable { ... }


proto 'term:'      is precedence('=')    is parsed(&primary) { ... }
proto 'infix:**'   is looser('term:')        { ... }
proto 'prefix:~'   is looser('infix:**')     { ... }
proto 'prefix:+'   is looser('prefix:~')     { ... }
proto 'prefix:-'   is equiv('prefix:+')      { ... }
proto 'infix:*'    is looser('prefix:+') is pirop('mul')     { ... }
proto 'infix:/'    is equiv('infix:*')   is pirop('div')     { ... }
#proto 'infix://'   is equiv('infix:*')       { ... }
proto 'infix:%'    is equiv('infix:*')   is pirop('mod')     { ... }
proto 'infix:+'    is looser('infix:*')  is pirop('add')     { ... }
proto 'infix:-'    is equiv('infix:+')   is pirop('sub')     { ... }

proto 'infix:<<'   is looser('infix:+')      { ... }
proto 'infix:>>'   is equiv('infix:<<')      { ... }
proto 'infix:&'    is looser('infix:<<')     { ... }
proto 'infix:^'    is looser('infix:&')      { ... }
proto 'infix:|'    is looser('infix:^')      { ... }

proto 'infix:=='   is looser('infix:|')      { ... }
proto 'infix:!='   is equiv('infix:==')      { ... }
proto 'infix:<='   is equiv('infix:==')      { ... }
proto 'infix:>='   is equiv('infix:==')      { ... }
proto 'infix:<'    is equiv('infix:==')      { ... }
proto 'infix:>'    is equiv('infix:==')      { ... }


proto 'infix:is'    is looser('infix:==')
{ ... }

proto 'infix:isnot' is parsed(&isnot) is equiv('infix:is')
{ ... }

proto 'infix:in'    is looser('infix:is')
{ ... }

proto 'infix:notin' is parsed(&notin)  is equiv('infix:in')
{ ... }


# <not_test>
proto 'prefix:nor'
is looser('infix:in')
{ ... }

# <and_test>
proto 'infix:and'
is looser('prefix:nor')   # just for testing rename to something else, to see 
if "is not" is using "prefix:not"
{ ... }

# <or_test>
proto 'infix:or'   is looser('infix:and')    { ... }


## Python reserved words and keywords

token reserved {
    <keyword> | <'None'>
}

token keyword {
    [ <'and'>   | <'assert'> | <'break'>  | <'class'>  | <'continue'> | <'def'>
    | <'del'>   | <'elif'>   | <'else'>   | <'except'> | <'exec'>     | 
<'finally'>
    | <'for'>   | <'from'>   | <'global'> | <'if'>     | <'import'>   | <'in'>
    | <'is'>    | <'lambda'> | <'not'>    | <'or'>     | <'pass'>     | 
<'print'>
    | <'raise'> | <'return'> | <'try'>    | <'while'>  | <'with'>     | 
<'yield'> | <'nor'> ] \b
}

## vim: expandtab sw=4

[Pynie] Grammar mostly done, but help needed

Reply via email to