[pypy-dev] Internship and bug report

Leonard de HARO Mon, 18 Jun 2012 17:26:08 -0700

Hello,

I'm a french undergraduated CS. student currently doing an internship in
Santigo's University under the supervision of Eric Tanter. I'm working on
the meta JIT compiler provided in the pypy project, trying to document
it's performance on different kind of interpreters. I started two weeks
ago.


I have a problem with the translation toolchain. I've been writing an
interpreter for a small language inspired by Shriram Krishnamurthi's F1WAE
from his book "Programming Languages: Application and Interpretation".
My problem is the following: I have an interpreter that seems to work
perfecty (at least on my examples) when I use pypy interpreters, but
whenever I translate it, execution fails. The fact is: I have identified
the source of the problem but I don't understand it.
If you look at the parser file, on line 184, there is a "print"
instruction. With this instruction on comment, the translated version of
RPinterpret executed on test10run10 gets a Stack Overflow. Use a
translated version of the same file (RPinterpret.py) with the print
instruction in parser.py, you get the normal behavior (which you have
anyway using "pypy RPinterpret.py test10runs10").

Looks like a bug, but since I'm a beginer (both on this work and on Python
in general), maybe I have missed something.

I have the same problem with the supposely-JITing interpreter, but it's
worse since the translations takes 20min against 1min, so changing and
experiencing on the JITing file seems much harder.

Anyway, thanks for your help.
I didn't know whether I was supposed to make a bug report or just contact
the mailing list, but since I'll be working on this until september, might
as well get in touch with you right now.

Leonard de HARO

PS: As I said, I just learned Python, so maybe my files aren't verry
pretty, I apologize for that and I'll welcome any comment on my work you
could make. Thanks again.

import treeClass

#######################################
# Reimplementation of some functions from the library #
########################################

def belong(n,s,e):
    """ Raises True if s<=n<e """
    
    return n<e and n>=s


def CutWord(string,i): 
    """Find the first blank character following string[i] in string. If there's none, return the length of the list."""
    
    if string[i] in (' ','\n','\t'): 
        return i 
    else:
        try: 
            return CutWord(string,i+1)
        except IndexError:
            return i+1

# Block of functions to define an identifier or a number

alphaOrUnd = ('a','z','e','r','t','y','u','i','o','p','q','s','d','f','g','h','j','k','l','m','w','x','c','v','b','n','_')
digit = ('0','1','2','3','4','5','6','7','8','9')

def isAlphaOrUndChar(c):
    """ Check if the first character belongs to alphaOrUnd. """
    try:
        return c[0] in alphaOrUnd
    except IndexError:
        return False

def isAlphaNumOrUnd(c):
    """ Check if every character is either in alphaOrUnd or in digit. """

    length =len(c)
    pc = 0
    answer = True
    while answer and pc < length:
        answer = answer and (c[pc] in alphaOrUnd or c[pc] in digit)
        pc +=1
    return answer and length >0
        
def IsIdentifier(word):
    """True if word is a correct identifier."""
    
    return (isAlphaOrUndChar(word) and isAlphaNumOrUnd(word))

def IsNumber(c):
    """ True iff the string is only made of digits and non-empty."""

    length =len(c)
    pc = 0
    answer = True
    while answer and pc < length:
        answer = answer and c[pc] in digit
        pc +=1
    return answer and length>0
#

# Replace str.partition(' ') unavailable with TTC
def partitionSpace(word):
    """ Same as string method partition(' ') """

    length = len(word)
    pc = 0
    head = tail = ''
    while pc < length:
        if word[pc]==' ':
            assert(pc>=0)
            head = word[0:pc]
            tail = word[pc+1:length]
            break
        else:
            pc += 1
            
    return head, tail

# Replace str.split() available, but we need the number of spaces deleted at the beging of the word
def StripSpace(word):
    """ Same as str.strip(' ') but also return the number of spaces deleted at the begining. """

    beg = 0
    end = len(word)
    count = 0

    while beg < end:
        if word[beg] == ' ':
            count += 1
            beg += 1
        else:
            break

    while end > beg:
        end -= 1
        if word[end] != ' ':
            break

    if beg == end == len(word):
        return '', len(word)
    else:
        end += 1
        assert(end>=0)
        return word[beg: end], count
#

#######################################
# Useful functions #
########################################

# Build associativity of braces

# To optmisize (RPython): calculate size of stack and dic beforehand
def BuildAssociativity(fileToUse):
    """ Build a associative table of braces. """

    bracketMap = {}
    leftstack = []

    pc = 0
    for char in fileToUse:
        if char == '(' or char == '{':
            leftstack.append(pc)
        elif char == ')':
            left = leftstack.pop()
            if fileToUse[left] != '(':
                raise ValueError("Should be } at " + str(pc))
            right = pc
            bracketMap[left] = right
            bracketMap[right] = left
        elif char == '}':
            left = leftstack.pop()
            if fileToUse[left] != '{':
                raise ValueError("Should be ) at "+str(pc))
            right = pc
            bracketMap[left] = right
            bracketMap[right] = left
        else:
                pass
        pc += 1
            
    return bracketMap

# To make the dict corresponding to a cut string
def FilterDic(dictry, start, end):
    """Return a new dictionnary containning only pairing of indices between start and (end-1) inclued."""

    newDic ={}
    for i,k in dictry.items():
        if belong(i,start,end) and belong(k,start,end):
            newDic[i-start]=k-start
        elif belong(i,start,end) and not belong(k,start,end):
            raise ValueError("Not a valid bracket matching")
        else:
            pass
    return newDic


# Spliting code into meaningful blocks

def SplittingCode(fileToUse, bracketMap):
    """ Splits the code into meaningful blocks. """

    pc = 0
    length = len(fileToUse)
    blocks = []
    
    while pc<length:
        ch = fileToUse[pc]
        if (ch in (' ', '\t', '\n')):
            pass
        elif ch =='(' or ch=='{':
            matchingBracket = bracketMap[pc] + 1
            assert(matchingBracket >=0 )
            block = fileToUse[pc:matchingBracket]
            newDic = FilterDic(bracketMap, pc, matchingBracket)
            blocks.append((block, newDic))
            pc = matchingBracket
        else:
            end = CutWord(fileToUse,pc)
            # print("This line fixes the stack overflow problem in the translated execution of RPinterpret.py")
            assert(end >= 0)
            blocks.append((fileToUse[pc:end], {}))
            pc = end
        pc += 1
            
    return blocks

#######################################
# Actual parsing functions #
########################################

# Parsing a function declaration

def ParseFunc((block, dic)):
    """ Given a block defining a function, return the correspondant representation. There are only simple spaces. """

    n = dic[0]
    if not (block[0] == '{' and block[n] == '}'):
        raise ValueError("Not a function block :\n"+ block)
    else:
        assert(n>=0)
        workingBlock = block[1:n]
        dic2 = FilterDic(dic,1,n)
        subBlocks = SplittingCode(workingBlock, dic2)
        #
        if len(subBlocks) != 2:
            raise ValueError("Only two sub-blocks expected in block :\n"+block)
        else:
            declaration, dd = subBlocks[0]
            #
            if len(dd.values()) != 2 :
                raise ValueError("No sub-blocks expected inside of :\n" + declaration)
            #
            end = dd[0]
            assert(end>=0)
            declareList = declaration[1:end].split(" ")
            if len(declareList) != 2:
                raise ValueError("Wrong declaration: \n" + declaration + "\nExpected form: <id> <id>")
            name = declareList[0]
            argName = declareList[1]
                
            bodyTree = ParseF1WAE(subBlocks[1])

    return name, treeClass.Func(name,argName,bodyTree)


# Parsing a function declaration

def ParseF1WAE((block, dic)):
    """Parses <F1WAE>. Only simple spaces."""

    if block[0] == '{':
        raise ValueError("Function declaration is not allowed in <F1WAE> :\n" + block)
    #
    elif block[0] == '(':
        lastPos = dic[0]
        assert(lastPos >= 0)
        blockContent, count = StripSpace(block[1:lastPos])
        # First word in blockContent allows to identify the case
        head, tail = partitionSpace(blockContent)
        #
        if head == 'with':
            bodyWith = SplittingCode(tail, FilterDic(dic,len(head+' ')+1+count,dic[0]))
            if len(bodyWith) != 2:
                raise ValueError("Two expressions expected following keyword 'with':\n" + block)
            else:
                falseApp = ParseF1WAE(bodyWith[0]) #Same syntax as an App
                if not(isinstance(falseApp, treeClass.App)):
                    raise ValueError("Wrong assignement in with block:\n" + block)
                else:
                    return treeClass.With(falseApp.funName, falseApp.arg, ParseF1WAE(bodyWith[1]))
            #
        elif head == 'if':
            bodyWith = SplittingCode(tail, FilterDic(dic, len(head+' ')+1+count, dic[0]))
            length = len(bodyWith)
            if length ==  3: # All fields requested. No 'pass' instructions in the language.
                cond = ParseF1WAE(bodyWith[0])
                ctrue = ParseF1WAE(bodyWith[1])
                cfalse = ParseF1WAE(bodyWith[2])
                return treeClass.If(cond, ctrue, cfalse)
            else:
                raise ValueError("Too many (or no) instructions in 'if' block :\n"+block)
            #
        elif head[0] in ('+','-','*','/','%','='): # Treats the case were space is forgotten after operator
            if len(head)==1: # There is a space after the operator
                bodyOp = SplittingCode(tail,FilterDic(dic, len(head+' ')+1+count,dic[0]))
            else: # There's no space after the operator
                newHead = head[1:len(head)] +' '
                bodyOp = SplittingCode((newHead + tail),FilterDic(dic, 1+1+count,dic[0]))
            if len(bodyOp) != 2:
                raise ValueError("Two expressions expected following operator :\n" + block)
            else:
                return treeClass.Op(head[0], ParseF1WAE(bodyOp[0]), ParseF1WAE(bodyOp[1]))
            #
        else: # An App or a parenthesized Num or Id
            bodyApp =  SplittingCode(tail, FilterDic(dic,len(head+' ')+1+count,dic[0]))
            lenght = len(bodyApp)
            if lenght == 0: # Parenthesized Num or Id
                return ParseF1WAE((head, FilterDic(dic,1,dic[0])))
            elif lenght == 1: #An App
                return treeClass.App(head, ParseF1WAE(bodyApp[0]))
        #
    else:
        #
        if IsIdentifier(block):
            return treeClass.Id(block)
        elif IsNumber(block):
            return treeClass.Num(int(block))
        else:
            raise ValueError("Syntax Error in identifier :\n" + block)

# Global parsing method
        
def Parse(myFile):
    """ The global parsing function. """
    
    myFile = (myFile.replace('\n',' ')).replace('\t',' ')
    # There are only simple spaces. Makes it easier to deal with.
    bracketMap = BuildAssociativity(myFile)
    codeInPieces = SplittingCode(myFile, bracketMap)
    #
    funcToDefine = []
    prog = []
    #
    for couple in codeInPieces:
        s,d = couple
        if s[0] == '{':
            funcToDefine.append((s,d))
        else:
            prog.append((s,d))
    #
    try: # Check that BNF is respected
        prog[1]
        raise ValueError("Only one <Prog> is allowed.")
    except IndexError:
        pass
    #
    # Create the function dictionnary
    funcDict = {}
    for funcDef in funcToDefine:
        name, descr = ParseFunc(funcDef)
        try:
            uselessVar = funcDict[name]
            raise ValueError("Function "+name+" already defined.")
        except KeyError:
            funcDict[name] = descr
    #
    # Create AST of main program
    ast = ParseF1WAE(prog[0])

    return ast, funcDict

# <file> ::= <Def>* <Prog> <Def>*

# <Prog> ::= <F1WAE>

# <Def> ::= { ( <id> <id> )
#                ( <F1WAE> ) } 


# <F1WAE> :: = <num>
#          | ( <op> <F1WAE> <F1WAE> )
#          | ( with ( <id> <F1WAE> ) <F1WAE> )
#          | <id>
#          | ( <id> <F1WAE>)
#          | ( if <F1WAE> <F1WAE> <F1WAE> ) # (if <cond> <true> <false>) True <=> !=0

# <op> ::= '+' | '-' | '*' | '/' | '%' | '='
# <num> ::= [ '0' - '9' ]+
# <id> ::= [ '_', 'a' - 'z', 'A' - 'Z'][ '_', 'a' - 'z', 'A' - 'Z', '0' -'9' ]*

class File:
    def __init__(self, prog, funcDic):
        self.prog=prog #Should be a F1WAE
        self.funcDic=funcDic #Should be a dictionnary of keys name_of_function and values of class Func

class Func:
    def __init__(self, name, argName, body):
        self.name=name # Id
        self.argName=argName # Id
        self.body=body # F1WAE

class F1WAE:
    def __init__(self):
        pass
        
class Node(F1WAE):
    def __init__(self):
        pass

class Leaf(F1WAE):
    def __init__(self):
        pass

class Num(Leaf):
    def __init__(self, n):
        self.n=n # Int

class Id(Leaf):
    def __init__(self, name):
        self.name=name # Id

class Op(Node):
    def __init__(self, op, lhs, rhs):
        self.op=op # Op
        self.lhs=lhs # F1WAE
        self.rhs=rhs # F1WAE

class With(Node):
    def __init__(self, name, nameExpr, body):
        self.name=name # Id
        self.nameExpr=nameExpr # F1WAE
        self.body=body # F1WAE

class App(Node):
    def __init__(self, funName, arg):
        self.funName=funName # Id, name of a function
        self.arg=arg # F1WAE

class If(Node):
    def __init__(self, cond, ctrue, cfalse):
        self.cond=cond # Condition
        self.ctrue=ctrue # If condition is true
        self.cfalse=cfalse #If condition is false

import treeClass
import parser
import sys
 
def Interpret(tree, funDict, contVar):
    """ Interpret the F1WAE AST given a set of defined functions. We use deferred substituion and eagerness."""

    if isinstance(tree, treeClass.Num):
        return tree.n
    #
    elif isinstance(tree, treeClass.Op):
        left = Interpret(tree.lhs, funDict, contVar)
        right = Interpret(tree.rhs, funDict, contVar)
        if tree.op == '+':
            return left + right
        elif tree.op == '-':
            return left - right
        elif tree.op == '*':
            return left * right
        elif tree.op == '/':
            return left/right
        elif tree.op == '%':
            return left % right
        elif tree.op == '=':
            if left == right:
                return 1
            else:
                return 0
        else:
            raise ValueError("Parsing Error, symobl "+ tree.op+" shouldn't be here.")
    #
    elif isinstance(tree, treeClass.With):
        val = Interpret(tree.nameExpr, funDict, contVar)
        contVar[tree.name] = val #Eager
        return Interpret(tree.body, funDict, contVar)
    #
    elif isinstance(tree, treeClass.Id):
        try:
            return contVar[tree.name] 
        except KeyError:
            raise ValueError("Interpret Error: free identifier :\n" + tree.name)
    #
    elif isinstance(tree, treeClass.App):
        try:
            #
            funDef = funDict[tree.funName]
            val = Interpret(tree.arg, funDict, contVar)
            if not isinstance(funDef, treeClass.Func):
                raise ValueError("Wrong Dictionnary.")
            newCont = {funDef.argName: val} # Eager
            return Interpret(funDef.body, funDict, newCont)
        #
        except KeyError:
            raise ValueError("Invalid function : " + tree.funName)
    #
    elif isinstance(tree, treeClass.If):
        condition = Interpret(tree.cond, funDict, contVar)
        if condition != 0: #True
            return Interpret(tree.ctrue, funDict, contVar)
        else:
            return Interpret(tree.cfalse, funDict, contVar)
    #
    else: # Not an <F1WAE>
                raise ValueError("Argument of Interpret is not a <F1WAE>:\n")
        
def Main(file):
    t,d = parser.Parse(file)
    j = Interpret(t,d,{})
    print("the answer is :" + str(j))

import os

def run(fp):
    program_contents = ""
    while True:
        read = os.read(fp, 4096)
        if len(read) == 0:
            break
        program_contents += read
    os.close(fp)
    Main(program_contents)

def entry_point(argv):
    try:
        filename = argv[1]
    except IndexError:
        print "You must supply a filename"
        return 1

    run(os.open(filename, os.O_RDONLY, 0777))
    return 0

def target(*args):
    return entry_point, None

if __name__ == "__main__":
    entry_point(sys.argv)

test10runs10
Description: Binary data

_______________________________________________
pypy-dev mailing list
pypy-dev@python.org
http://mail.python.org/mailman/listinfo/pypy-dev

[pypy-dev] Internship and bug report

Reply via email to