Hello, I'm a french undergraduated CS. student currently doing an internship in Santigo's University under the supervision of Eric Tanter. I'm working on the meta JIT compiler provided in the pypy project, trying to document it's performance on different kind of interpreters. I started two weeks ago.
I have a problem with the translation toolchain. I've been writing an interpreter for a small language inspired by Shriram Krishnamurthi's F1WAE from his book "Programming Languages: Application and Interpretation". My problem is the following: I have an interpreter that seems to work perfecty (at least on my examples) when I use pypy interpreters, but whenever I translate it, execution fails. The fact is: I have identified the source of the problem but I don't understand it. If you look at the parser file, on line 184, there is a "print" instruction. With this instruction on comment, the translated version of RPinterpret executed on test10run10 gets a Stack Overflow. Use a translated version of the same file (RPinterpret.py) with the print instruction in parser.py, you get the normal behavior (which you have anyway using "pypy RPinterpret.py test10runs10"). Looks like a bug, but since I'm a beginer (both on this work and on Python in general), maybe I have missed something. I have the same problem with the supposely-JITing interpreter, but it's worse since the translations takes 20min against 1min, so changing and experiencing on the JITing file seems much harder. Anyway, thanks for your help. I didn't know whether I was supposed to make a bug report or just contact the mailing list, but since I'll be working on this until september, might as well get in touch with you right now. Leonard de HARO PS: As I said, I just learned Python, so maybe my files aren't verry pretty, I apologize for that and I'll welcome any comment on my work you could make. Thanks again.
import treeClass
#######################################
# Reimplementation of some functions from the library #
########################################
def belong(n,s,e):
""" Raises True if s<=n<e """
return n<e and n>=s
def CutWord(string,i):
"""Find the first blank character following string[i] in string. If there's none, return the length of the list."""
if string[i] in (' ','\n','\t'):
return i
else:
try:
return CutWord(string,i+1)
except IndexError:
return i+1
# Block of functions to define an identifier or a number
alphaOrUnd = ('a','z','e','r','t','y','u','i','o','p','q','s','d','f','g','h','j','k','l','m','w','x','c','v','b','n','_')
digit = ('0','1','2','3','4','5','6','7','8','9')
def isAlphaOrUndChar(c):
""" Check if the first character belongs to alphaOrUnd. """
try:
return c[0] in alphaOrUnd
except IndexError:
return False
def isAlphaNumOrUnd(c):
""" Check if every character is either in alphaOrUnd or in digit. """
length =len(c)
pc = 0
answer = True
while answer and pc < length:
answer = answer and (c[pc] in alphaOrUnd or c[pc] in digit)
pc +=1
return answer and length >0
def IsIdentifier(word):
"""True if word is a correct identifier."""
return (isAlphaOrUndChar(word) and isAlphaNumOrUnd(word))
def IsNumber(c):
""" True iff the string is only made of digits and non-empty."""
length =len(c)
pc = 0
answer = True
while answer and pc < length:
answer = answer and c[pc] in digit
pc +=1
return answer and length>0
#
# Replace str.partition(' ') unavailable with TTC
def partitionSpace(word):
""" Same as string method partition(' ') """
length = len(word)
pc = 0
head = tail = ''
while pc < length:
if word[pc]==' ':
assert(pc>=0)
head = word[0:pc]
tail = word[pc+1:length]
break
else:
pc += 1
return head, tail
# Replace str.split() available, but we need the number of spaces deleted at the beging of the word
def StripSpace(word):
""" Same as str.strip(' ') but also return the number of spaces deleted at the begining. """
beg = 0
end = len(word)
count = 0
while beg < end:
if word[beg] == ' ':
count += 1
beg += 1
else:
break
while end > beg:
end -= 1
if word[end] != ' ':
break
if beg == end == len(word):
return '', len(word)
else:
end += 1
assert(end>=0)
return word[beg: end], count
#
#######################################
# Useful functions #
########################################
# Build associativity of braces
# To optmisize (RPython): calculate size of stack and dic beforehand
def BuildAssociativity(fileToUse):
""" Build a associative table of braces. """
bracketMap = {}
leftstack = []
pc = 0
for char in fileToUse:
if char == '(' or char == '{':
leftstack.append(pc)
elif char == ')':
left = leftstack.pop()
if fileToUse[left] != '(':
raise ValueError("Should be } at " + str(pc))
right = pc
bracketMap[left] = right
bracketMap[right] = left
elif char == '}':
left = leftstack.pop()
if fileToUse[left] != '{':
raise ValueError("Should be ) at "+str(pc))
right = pc
bracketMap[left] = right
bracketMap[right] = left
else:
pass
pc += 1
return bracketMap
# To make the dict corresponding to a cut string
def FilterDic(dictry, start, end):
"""Return a new dictionnary containning only pairing of indices between start and (end-1) inclued."""
newDic ={}
for i,k in dictry.items():
if belong(i,start,end) and belong(k,start,end):
newDic[i-start]=k-start
elif belong(i,start,end) and not belong(k,start,end):
raise ValueError("Not a valid bracket matching")
else:
pass
return newDic
# Spliting code into meaningful blocks
def SplittingCode(fileToUse, bracketMap):
""" Splits the code into meaningful blocks. """
pc = 0
length = len(fileToUse)
blocks = []
while pc<length:
ch = fileToUse[pc]
if (ch in (' ', '\t', '\n')):
pass
elif ch =='(' or ch=='{':
matchingBracket = bracketMap[pc] + 1
assert(matchingBracket >=0 )
block = fileToUse[pc:matchingBracket]
newDic = FilterDic(bracketMap, pc, matchingBracket)
blocks.append((block, newDic))
pc = matchingBracket
else:
end = CutWord(fileToUse,pc)
# print("This line fixes the stack overflow problem in the translated execution of RPinterpret.py")
assert(end >= 0)
blocks.append((fileToUse[pc:end], {}))
pc = end
pc += 1
return blocks
#######################################
# Actual parsing functions #
########################################
# Parsing a function declaration
def ParseFunc((block, dic)):
""" Given a block defining a function, return the correspondant representation. There are only simple spaces. """
n = dic[0]
if not (block[0] == '{' and block[n] == '}'):
raise ValueError("Not a function block :\n"+ block)
else:
assert(n>=0)
workingBlock = block[1:n]
dic2 = FilterDic(dic,1,n)
subBlocks = SplittingCode(workingBlock, dic2)
#
if len(subBlocks) != 2:
raise ValueError("Only two sub-blocks expected in block :\n"+block)
else:
declaration, dd = subBlocks[0]
#
if len(dd.values()) != 2 :
raise ValueError("No sub-blocks expected inside of :\n" + declaration)
#
end = dd[0]
assert(end>=0)
declareList = declaration[1:end].split(" ")
if len(declareList) != 2:
raise ValueError("Wrong declaration: \n" + declaration + "\nExpected form: <id> <id>")
name = declareList[0]
argName = declareList[1]
bodyTree = ParseF1WAE(subBlocks[1])
return name, treeClass.Func(name,argName,bodyTree)
# Parsing a function declaration
def ParseF1WAE((block, dic)):
"""Parses <F1WAE>. Only simple spaces."""
if block[0] == '{':
raise ValueError("Function declaration is not allowed in <F1WAE> :\n" + block)
#
elif block[0] == '(':
lastPos = dic[0]
assert(lastPos >= 0)
blockContent, count = StripSpace(block[1:lastPos])
# First word in blockContent allows to identify the case
head, tail = partitionSpace(blockContent)
#
if head == 'with':
bodyWith = SplittingCode(tail, FilterDic(dic,len(head+' ')+1+count,dic[0]))
if len(bodyWith) != 2:
raise ValueError("Two expressions expected following keyword 'with':\n" + block)
else:
falseApp = ParseF1WAE(bodyWith[0]) #Same syntax as an App
if not(isinstance(falseApp, treeClass.App)):
raise ValueError("Wrong assignement in with block:\n" + block)
else:
return treeClass.With(falseApp.funName, falseApp.arg, ParseF1WAE(bodyWith[1]))
#
elif head == 'if':
bodyWith = SplittingCode(tail, FilterDic(dic, len(head+' ')+1+count, dic[0]))
length = len(bodyWith)
if length == 3: # All fields requested. No 'pass' instructions in the language.
cond = ParseF1WAE(bodyWith[0])
ctrue = ParseF1WAE(bodyWith[1])
cfalse = ParseF1WAE(bodyWith[2])
return treeClass.If(cond, ctrue, cfalse)
else:
raise ValueError("Too many (or no) instructions in 'if' block :\n"+block)
#
elif head[0] in ('+','-','*','/','%','='): # Treats the case were space is forgotten after operator
if len(head)==1: # There is a space after the operator
bodyOp = SplittingCode(tail,FilterDic(dic, len(head+' ')+1+count,dic[0]))
else: # There's no space after the operator
newHead = head[1:len(head)] +' '
bodyOp = SplittingCode((newHead + tail),FilterDic(dic, 1+1+count,dic[0]))
if len(bodyOp) != 2:
raise ValueError("Two expressions expected following operator :\n" + block)
else:
return treeClass.Op(head[0], ParseF1WAE(bodyOp[0]), ParseF1WAE(bodyOp[1]))
#
else: # An App or a parenthesized Num or Id
bodyApp = SplittingCode(tail, FilterDic(dic,len(head+' ')+1+count,dic[0]))
lenght = len(bodyApp)
if lenght == 0: # Parenthesized Num or Id
return ParseF1WAE((head, FilterDic(dic,1,dic[0])))
elif lenght == 1: #An App
return treeClass.App(head, ParseF1WAE(bodyApp[0]))
#
else:
#
if IsIdentifier(block):
return treeClass.Id(block)
elif IsNumber(block):
return treeClass.Num(int(block))
else:
raise ValueError("Syntax Error in identifier :\n" + block)
# Global parsing method
def Parse(myFile):
""" The global parsing function. """
myFile = (myFile.replace('\n',' ')).replace('\t',' ')
# There are only simple spaces. Makes it easier to deal with.
bracketMap = BuildAssociativity(myFile)
codeInPieces = SplittingCode(myFile, bracketMap)
#
funcToDefine = []
prog = []
#
for couple in codeInPieces:
s,d = couple
if s[0] == '{':
funcToDefine.append((s,d))
else:
prog.append((s,d))
#
try: # Check that BNF is respected
prog[1]
raise ValueError("Only one <Prog> is allowed.")
except IndexError:
pass
#
# Create the function dictionnary
funcDict = {}
for funcDef in funcToDefine:
name, descr = ParseFunc(funcDef)
try:
uselessVar = funcDict[name]
raise ValueError("Function "+name+" already defined.")
except KeyError:
funcDict[name] = descr
#
# Create AST of main program
ast = ParseF1WAE(prog[0])
return ast, funcDict
# <file> ::= <Def>* <Prog> <Def>*
# <Prog> ::= <F1WAE>
# <Def> ::= { ( <id> <id> )
# ( <F1WAE> ) }
# <F1WAE> :: = <num>
# | ( <op> <F1WAE> <F1WAE> )
# | ( with ( <id> <F1WAE> ) <F1WAE> )
# | <id>
# | ( <id> <F1WAE>)
# | ( if <F1WAE> <F1WAE> <F1WAE> ) # (if <cond> <true> <false>) True <=> !=0
# <op> ::= '+' | '-' | '*' | '/' | '%' | '='
# <num> ::= [ '0' - '9' ]+
# <id> ::= [ '_', 'a' - 'z', 'A' - 'Z'][ '_', 'a' - 'z', 'A' - 'Z', '0' -'9' ]*
class File:
def __init__(self, prog, funcDic):
self.prog=prog #Should be a F1WAE
self.funcDic=funcDic #Should be a dictionnary of keys name_of_function and values of class Func
class Func:
def __init__(self, name, argName, body):
self.name=name # Id
self.argName=argName # Id
self.body=body # F1WAE
class F1WAE:
def __init__(self):
pass
class Node(F1WAE):
def __init__(self):
pass
class Leaf(F1WAE):
def __init__(self):
pass
class Num(Leaf):
def __init__(self, n):
self.n=n # Int
class Id(Leaf):
def __init__(self, name):
self.name=name # Id
class Op(Node):
def __init__(self, op, lhs, rhs):
self.op=op # Op
self.lhs=lhs # F1WAE
self.rhs=rhs # F1WAE
class With(Node):
def __init__(self, name, nameExpr, body):
self.name=name # Id
self.nameExpr=nameExpr # F1WAE
self.body=body # F1WAE
class App(Node):
def __init__(self, funName, arg):
self.funName=funName # Id, name of a function
self.arg=arg # F1WAE
class If(Node):
def __init__(self, cond, ctrue, cfalse):
self.cond=cond # Condition
self.ctrue=ctrue # If condition is true
self.cfalse=cfalse #If condition is false
import treeClass
import parser
import sys
def Interpret(tree, funDict, contVar):
""" Interpret the F1WAE AST given a set of defined functions. We use deferred substituion and eagerness."""
if isinstance(tree, treeClass.Num):
return tree.n
#
elif isinstance(tree, treeClass.Op):
left = Interpret(tree.lhs, funDict, contVar)
right = Interpret(tree.rhs, funDict, contVar)
if tree.op == '+':
return left + right
elif tree.op == '-':
return left - right
elif tree.op == '*':
return left * right
elif tree.op == '/':
return left/right
elif tree.op == '%':
return left % right
elif tree.op == '=':
if left == right:
return 1
else:
return 0
else:
raise ValueError("Parsing Error, symobl "+ tree.op+" shouldn't be here.")
#
elif isinstance(tree, treeClass.With):
val = Interpret(tree.nameExpr, funDict, contVar)
contVar[tree.name] = val #Eager
return Interpret(tree.body, funDict, contVar)
#
elif isinstance(tree, treeClass.Id):
try:
return contVar[tree.name]
except KeyError:
raise ValueError("Interpret Error: free identifier :\n" + tree.name)
#
elif isinstance(tree, treeClass.App):
try:
#
funDef = funDict[tree.funName]
val = Interpret(tree.arg, funDict, contVar)
if not isinstance(funDef, treeClass.Func):
raise ValueError("Wrong Dictionnary.")
newCont = {funDef.argName: val} # Eager
return Interpret(funDef.body, funDict, newCont)
#
except KeyError:
raise ValueError("Invalid function : " + tree.funName)
#
elif isinstance(tree, treeClass.If):
condition = Interpret(tree.cond, funDict, contVar)
if condition != 0: #True
return Interpret(tree.ctrue, funDict, contVar)
else:
return Interpret(tree.cfalse, funDict, contVar)
#
else: # Not an <F1WAE>
raise ValueError("Argument of Interpret is not a <F1WAE>:\n")
def Main(file):
t,d = parser.Parse(file)
j = Interpret(t,d,{})
print("the answer is :" + str(j))
import os
def run(fp):
program_contents = ""
while True:
read = os.read(fp, 4096)
if len(read) == 0:
break
program_contents += read
os.close(fp)
Main(program_contents)
def entry_point(argv):
try:
filename = argv[1]
except IndexError:
print "You must supply a filename"
return 1
run(os.open(filename, os.O_RDONLY, 0777))
return 0
def target(*args):
return entry_point, None
if __name__ == "__main__":
entry_point(sys.argv)
test10runs10
Description: Binary data
_______________________________________________ pypy-dev mailing list [email protected] http://mail.python.org/mailman/listinfo/pypy-dev
