Attached is YET another version of formula parser, now with a static method that can convert a RPN order array of Ptgs into human readable infix order formula string.
This change depends on changes to many ptg files, so i will not even attempt to send accross a full set to the list. After I merge my stuff with what andy has done, i will send a proper patch. Till then you will have to live with these bits and pieces :( Cheers - Avik
/* ==================================================================== * The Apache Software License, Version 1.1 * * Copyright (c) 2002 The Apache Software Foundation. All rights * reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in * the documentation and/or other materials provided with the * distribution. * * 3. The end-user documentation included with the redistribution, * if any, must include the following acknowledgment: * "This product includes software developed by the * Apache Software Foundation (http://www.apache.org/)." * Alternately, this acknowledgment may appear in the software itself, * if and wherever such third-party acknowledgments normally appear. * * 4. The names "Apache" and "Apache Software Foundation" and * "Apache POI" must not be used to endorse or promote products * derived from this software without prior written permission. For * written permission, please contact [EMAIL PROTECTED] * * 5. Products derived from this software may not be called "Apache", * "Apache POI", nor may "Apache" appear in their name, without * prior written permission of the Apache Software Foundation. * * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE * DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * ==================================================================== * * This software consists of voluntary contributions made by many * individuals on behalf of the Apache Software Foundation. For more * information on the Apache Software Foundation, please see * <http://www.apache.org/>. */ package org.apache.poi.hssf.record.formula; import java.util.List; import java.util.ArrayList; import java.util.Stack; /** * EXPERIMENTAL * * @author Avik Sengupta <avik AT Avik Sengupta DOT com> * * This class parses a formula string into a List of tokens in RPN order * Inspired by * Lets Build a Compiler, by Jack Crenshaw * BNF for the formula expression is : * <expression> ::= <term> [<addop> <term>]* * <term> ::= <factor> [ <mulop> <factor ]* * <factor> ::= <number> | (<expression>) | <cellRef> */ public class FormulaParser { private String formulaString; private int pointer=0; private List tokens = new java.util.Stack(); //private Stack tokens = new java.util.Stack(); private List result = new ArrayList(); private int numParen; private static char TAB = '\t'; private static char CR = '\n'; private char Look; // Lookahead Character /** create the parser with the string that is to be parsed * later call the parse() method to return ptg list in rpn order * then call the getRPNPtg() to retrive the parse results * This class is recommended only for single threaded use * The parse and getPRNPtg are internally synchronized for safety, thus * while it is safe to use in a multithreaded environment, you will get long lock waits. */ public FormulaParser(String formula){ formulaString = formula; pointer=0; } /** Read New Character From Input Stream */ private void GetChar() { Look=formulaString.charAt(pointer++); //System.out.println("Got char: "+Look); } /** Report an Error */ private void Error(String s) { System.out.println("Error: "+s); } /** Report Error and Halt */ private void Abort(String s) { Error(s); //System.exit(1); //throw exception?? throw new RuntimeException("Cannot Parse, sorry"); } /** Report What Was Expected */ private void Expected(String s) { Abort(s + " Expected"); } /** Recognize an Alpha Character */ private boolean IsAlpha(char c) { return Character.isLetter(c); } /** Recognize a Decimal Digit */ private boolean IsDigit(char c) { //System.out.println("Checking digit for"+c); return Character.isDigit(c); } /** Recognize an Alphanumeric */ private boolean IsAlNum(char c) { return (IsAlpha(c) || IsDigit(c)); } /** Recognize an Addop */ private boolean IsAddop( char c) { return (c =='+' || c =='-'); } /** Recognize White Space */ private boolean IsWhite( char c) { return (c ==' ' || c== TAB); } /** Skip Over Leading White Space */ private void SkipWhite() { while (IsWhite(Look)) { GetChar(); } } /** Match a Specific Input Character */ private void Match(char x) { if (Look != x) { Expected("" + x + ""); }else { GetChar(); SkipWhite(); } } /** Get an Identifier */ private String GetName() { String Token; Token = ""; if (!IsAlpha(Look)) { Expected("Name"); } while (IsAlNum(Look)) { Token = Token + Character.toUpperCase(Look); GetChar(); } SkipWhite(); return Token; } /** Get a Number */ private String GetNum() { String Value =""; if (!IsDigit(Look)) Expected("Integer"); while (IsDigit(Look)){ Value = Value + Look; GetChar(); } SkipWhite(); return Value; } /** Output a String with Tab */ private void Emit(String s){ System.out.print(TAB+s); } /** Output a String with Tab and CRLF */ private void EmitLn(String s) { Emit(s); System.out.println();; } /** Parse and Translate a Identifier */ private void Ident() { String Name; Name = GetName(); if (Look == '('){ //This is a function Match('('); int numArgs = Arguments(); //Expression() -- add this! Match(')'); //this is the end of the function tokens.add(new DummyFunctionPtg(Name,numArgs)); } else { //this can be either a cell ref or a named range !! boolean cellRef = true ; //we should probably do it with reg exp?? if (cellRef) { tokens.add(new ValueReferencePtg()); //TODO we need to pass in Name somewhere?? }else { //handle after named range is integrated!! } } } /** get arguments to a function */ private int Arguments() { int numArgs = 0; if (Look != ')') { numArgs++; Expression(); } while (Look == ',') { Match(','); Expression(); numArgs++; } return numArgs; } /** Parse and Translate a Math Factor */ private void Factor() { if (Look == '(' ) { Match('('); //tokens.add(new ParenthesisPtg()); Expression(); Match(')'); //tokens.add(new ParenthesisPtg()); return; } else if (IsAlpha(Look)){ Ident(); }else{ //EmitLn("MOVE #" + GetNum() + ",D0"); IntPtg p = new IntPtg(); p.setValue(Short.parseShort(GetNum())); tokens.add(p); } } /** Recognize and Translate a Multiply */ private void Multiply(){ Match('*'); Factor(); tokens.add(new MultiplyPtg()); //EmitLn("MULS (SP)+,D0"); } /** Recognize and Translate a Divide */ private void Divide() { Match('/'); Factor(); tokens.add(new DividePtg()); //EmitLn("MOVE (SP)+,D1"); //EmitLn("EXS.L D0"); //EmitLn("DIVS D1,D0"); } /** Parse and Translate a Math Term */ private void Term(){ Factor(); while (Look == '*' || Look == '/' ) { //EmitLn("MOVE D0,-(SP)"); ///TODO do we need to do anything here?? if (Look == '*') Multiply(); if (Look == '/') Divide(); } } /** Recognize and Translate an Add */ private void Add() { Match('+'); Term(); tokens.add(new AddPtg()); } /** Recognize and Translate a Subtract */ private void Subtract() { Match('-'); Term(); tokens.add(new SubtractPtg()); } /** Parse and Translate an Expression */ private void Expression() { if (IsAddop(Look)) { EmitLn("CLR D0"); //unaryAdd ptg??? } else { Term(); } while (IsAddop(Look)) { if ( Look == '+' ) Add(); if (Look == '-') Subtract(); // if (Look == '*') Multiply(); // if (Look == '/') Divide(); } } //{--------------------------------------------------------------} //{ Parse and Translate an Assignment Statement } /** procedure Assignment; var Name: string[8]; begin Name := GetName; Match('='); Expression; end; **/ /** Initialize */ private void Init() { GetChar(); SkipWhite(); } /** API call to execute the parsing of the formula * */ public void parse() { synchronized (tokens) { Init(); Expression(); } } /** API call to retrive the array of Ptgs created as * a result of the parsing */ public Ptg[] getRPNPtg() { synchronized (tokens) { if (tokens == null) throw new IllegalStateException("Please parse a string before trying to access the parse result"); Ptg[] retval = new Ptg[tokens.size()]; return (Ptg[]) tokens.toArray(retval); } } /** Static method to convert an array of Ptgs in RPN order * to a human readable string format in infix mode * TODO - extra brackets might appear, but string will be semantically correct. */ public static String toFormulaString(Ptg[] ptgs) { java.util.Stack stack = new java.util.Stack(); int numPtgs = ptgs.length; OperationPtg o; int numOperands; String[] operands; for (int i=0;i<numPtgs;i++) { if (ptgs[i] instanceof OperationPtg) { o = (OperationPtg) ptgs[i]; numOperands = o.getNumberOfOperands(); operands = new String[numOperands]; for (int j=0;j<numOperands;j++) { operands[numOperands-j-1] = (String) stack.pop(); //TODO: catch stack underflow and throw parse exception. } String result = o.toFormulaString(operands); if (! (o instanceof DummyFunctionPtg) ) result = "("+result+")" ; stack.push(result); } else { stack.push(ptgs[i].toFormulaString()); } } return (String) stack.pop(); //TODO: catch stack underflow and throw parse exception. } public String toString() { StringBuffer buf = new StringBuffer(); for (int i=0;i<tokens.size();i++) { buf.append( ( (Ptg)tokens.get(i)).toFormulaString()); buf.append(' '); } return buf.toString(); } /** Main Program for testing*/ public static void main(String[] argv) { FormulaParser fp = new FormulaParser(argv[0]+";"); System.out.println("\nFormula is: "); fp.parse(); System.out.println("RPN Form is: " +fp.toString()); System.out.println("Converted Text form is : "+fp.toFormulaString(fp.getRPNPtg())); //If Look <> CR then Expected('NewLine'); } }
