On Mar 24, 6:52 pm, "mkppk" <[EMAIL PROTECTED]> wrote: > > Its just that I would rather not reinvent the wheel (or read old C > code).. > Wouldn't we all!
Here is the basic structure of a pyparsing solution. The parsing part isn't so bad - the real problem is the awful ParseONUS routine in C. Plus things are awkward since the C program parses right-to-left and then reverses all of the found fields, and the parser I wrote works left-to-right. Still, this grammar does most of the job. I've left out my port of ParseONUS since it is *so* ugly, and not really part of the pyparsing example. -- Paul from pyparsing import * # define values for optional fields NoAmountGiven = "" NoEPCGiven = "" NoAuxOnusGiven = "" # define delimiters DOLLAR = Suppress("$") T_ = Suppress("T") A_ = Suppress("A") # field definitions amt = DOLLAR + Word(nums,exact=10) + DOLLAR onus = Word("0123456789A- ") transit = T_ + Word("0123456789-") + T_ epc = oneOf( list(nums) ) aux_onus = A_ + Word("0123456789- ") + A_ # validation parse action def validateTransitNumber(t): transit = t[0] flds = transit.split("-") if len(flds) > 2: raise ParseException(0, "too many dashes in transit number", 0) if len(flds) == 2: if len(flds[0]) not in (3,4): raise ParseException(0, "invalid dash position in transit number", 0) else: # compute checksum ti = map(int,transit) ti.reverse() # original algorithm worked with reversed data cksum = 3*(ti[8]+ti[5]+ti[2]) + 7*(ti[7]+ti[4]+ti[1]) + ti[6]+ti[3]+ti[0] if cksum%10 != 0: raise ParseException(0, "transit number failed checksum", 0) return transit # define overall MICR format, with results names micrdata = Optional(aux_onus,default=NoAuxOnusGiven).setResultsName("aux_onus") + \ Optional(epc,default=NoEPCGiven).setResultsName("epc") +\ transit.setParseAction(validateTransitNumber).setResultsName("transit") + \ onus.setResultsName("onus") + \ Optional(amt,default=NoAmountGiven).setResultsName("amt") + \ stringEnd import re def parseONUS(tokens): tokens["csn"] = "" tokens["tpc"] = "" tokens["account"] = "" tokens["amt"] = tokens["amt"][0] onus = tokens.onus # remainder omitted out of respect for newsreaders... # suffice to say that unspeakable acts are performed on # onus and aux_onus fields to extract account and # check numbers micrdata.setParseAction(parseONUS) testdata = file("checks.csv").readlines()[1:] tests = [(flds[1],flds) for flds in map(lambda l:l.split(","),testdata)] def verifyResults(res,csv): def match(x,y): print (x==y and "_" or "X"),x,"=",y Ex,MICR,Bank,Stat,Amt,AS,TPC,TS,CSN,CS,ACCT,AS,EPC,ES,ONUS,OS,AUX,AS,Tran,TS = csv match(res.amt,Amt) match(res.account,ACCT) match(res.csn,CSN) match(res.onus,ONUS) match(res.tpc,TPC) match(res.epc,EPC) match(res.transit,Tran) for t,data in tests: print t try: res = micrdata.parseString(t) print res.dump() if not(data[0] == "No"): print "Passed expression that should have failed" verifyResults(res,data) except ParseException,pe: print "<parse failed> %s" % pe.msg if not(data[0] == "Yes"): print "Failed expression that should have passed" print -- http://mail.python.org/mailman/listinfo/python-list