The XLS2CSVmra example that uses HSSF was extremely helpful, so I tried to find
something similar for XSSF, but without any luck. So I offer this as a first draft. It's rudimentary. I don't really trust the parsing of dates and times. It should use OPCPackage instead of just Package, but that was not in 3.5-beta5. Further this uses basic java XML calls to get a SAX-based XML reader, not the direct request
to get a Xerces parser shown in the XSSF and SAX (Event API) FromHowTo example.

Anyhow I offer this in the hope that someone will improve it to the point that
it's worth contributing to the POI XSSF examples area.  HTH and thanks for POI.

chris...

---

package something.or.other;

import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.PrintStream;
import java.text.SimpleDateFormat;
import java.util.Date;

import javax.xml.parsers.ParserConfigurationException;
import javax.xml.parsers.SAXParser;
import javax.xml.parsers.SAXParserFactory;

import org.apache.log4j.BasicConfigurator;
import org.apache.poi.openxml4j.exceptions.OpenXML4JException;
import org.apache.poi.openxml4j.opc.Package;
import org.apache.poi.ss.usermodel.DateUtil;
import org.apache.poi.xssf.eventusermodel.XSSFReader;
import org.apache.poi.xssf.model.SharedStringsTable;
import org.apache.poi.xssf.usermodel.XSSFRichTextString;
import org.xml.sax.Attributes;
import org.xml.sax.ContentHandler;
import org.xml.sax.InputSource;
import org.xml.sax.SAXException;
import org.xml.sax.XMLReader;
import org.xml.sax.helpers.DefaultHandler;

/**
* A rudimentary XLSX -> CSV processor
* based on XLS2CSVmra by Nick Burch from
* package org.apache.poi.hssf.eventusermodel.examples.
* This is an attempt to demonstrate the same thing using XSSF.
*/
public class XLSX2CSV {

        /**
         * Derived from 
http://poi.apache.org/spreadsheet/how-to.html#xssf_sax_api
         */
        class MyXSSFSheetHandler extends DefaultHandler {
                private SharedStringsTable sharedStringsTable;

                /** Destination for data */
                private final PrintStream output;

                /** Number of columns to read starting with leftmost */
                private final int minColumnCount;

                // Runtime
                SimpleDateFormat simpleDateFormat = new 
SimpleDateFormat("M/d/yyyy");
                SimpleDateFormat simpleTimeFormat = new SimpleDateFormat("hh:mm:ss 
a");
                private boolean nextIsBool;
                private boolean nextIsDate;
                private boolean nextIsDateTime;
                private boolean nextIsString;
                private boolean nextIsTime;

                private int thisColumn = -1;
                // The last column printed to the output stream
                private int lastColumnNumber = -1;

                private StringBuffer contents;

                /**
* * @param sst
                 * @param cols
                 * @param target
                 */
                public MyXSSFSheetHandler(
                                SharedStringsTable sst,
                                int cols,
                                PrintStream target) {
                        this.sharedStringsTable = sst;
                        this.minColumnCount = cols;
                        this.output = target;
                        this.contents = new StringBuffer();
                }

                /*
                 * (non-Javadoc)
                 * @see 
org.xml.sax.helpers.DefaultHandler#startElement(java.lang.String, 
java.lang.String, java.lang.String, org.xml.sax.Attributes)
                 */
                public void startElement(String uri, String localName, String 
name,
                                Attributes attributes) throws SAXException {

                        // c => cell
                        if (name.equals("c")) {
                                // Get the cell reference
                                String r = attributes.getValue("r");
                                int firstDigit = -1;
                                for (int c = 0; c < r.length(); ++c) {
                                        if (Character.isDigit(r.charAt(c))) {
                                                firstDigit = c;
                                                break;
                                        }
                                }
                                thisColumn = nameToColumn(r.substring(0, 
firstDigit));

                                // Figure out if the value is an index in the 
SST
                                // or something else.
                                String cellType = attributes.getValue("t");
                                nextIsBool = ("b".equals(cellType));
                                nextIsString = ("s".equals(cellType));
                                String cellSomething = attributes.getValue("s");
nextIsDate = ("2".equals(cellSomething)); nextIsTime = ("3".equals(cellSomething)); nextIsDateTime = ("4".equals(cellSomething));
                        }

                        // Clear contents cache
                        contents.setLength(0);
                }

                /*
                 * (non-Javadoc)
                 * @see 
org.xml.sax.helpers.DefaultHandler#endElement(java.lang.String, 
java.lang.String, java.lang.String)
                 */
                public void endElement(String uri, String localName, String 
name)
                throws SAXException {

                        String thisStr = null;

                        // Process the last contents as required.
                        // Do now, as characters() may be called more than once
                        if(nextIsBool) {
                                char first = contents.charAt(0);
                                thisStr = first == '0' ? "FALSE" : "TRUE";
                                nextIsBool = false;
                        }
                        else if(nextIsDate) {
                                // Actually an integer
                                double daysSince = 
Double.parseDouble(contents.toString());
                                Date d = DateUtil.getJavaDate(daysSince);
                                thisStr = simpleDateFormat.format(d);
                        }
                        else if(nextIsDateTime) {
                                // Days to left of decimal, seconds (?) to 
right of decimal.
                                Date d = 
DateUtil.getJavaDate(Double.parseDouble(contents.toString()));
                                thisStr = d.toString();
                        }
                        else if(nextIsString) {
                                String sstIndex = contents.toString();
                                try {
                                        int idx = Integer.parseInt(sstIndex);
                                        thisStr = new 
XSSFRichTextString(sharedStringsTable.getEntryAt(idx)).toString();
                                        nextIsString = false;
                                }
                                catch (NumberFormatException ex) {
                                        output.println("Pgmr err, lastContents is 
not int: " + sstIndex);
                                }
                        }
                        else if(nextIsTime) {
                                Date d = 
DateUtil.getJavaDate(Double.parseDouble(contents.toString()));
                                thisStr = simpleTimeFormat.format(d);
                        }
                        else {
                                thisStr = contents.toString();
                        }

                        // v => contents of a cell
                        // Output after we've seen the string contents
                        if(name.equals("v")) {
                                // Emit commas for any fields that were missing 
on this row
                                if(lastColumnNumber == -1) { lastColumnNumber = 
0; }
                                for (int i = lastColumnNumber; i < thisColumn; 
++i)
                                        output.print(',');

                                // Might be the empty string.
                                output.print('"' + thisStr + '"');
                        }
                        else if(name.equals("row")) {

                                // Print out any missing commas if needed
                                if(minColumns > 0) {
                                        // Columns are 0 based
                                        if(lastColumnNumber == -1) { 
lastColumnNumber = 0; }
                                        for(int i=lastColumnNumber; 
i<(this.minColumnCount); i++) {
                                                output.print(',');
                                        }
                                }

                                // We're onto a new row
                                output.println();
                                lastColumnNumber = -1;
                        }

// Update column if(thisColumn > -1)
                                lastColumnNumber = thisColumn;

                }

                public void characters(char[] ch, int start, int length)
                throws SAXException {
                        contents.append(ch, start, length);
                }

                /**
                 * Converts an Excel column name like "C" to a zero-based index.
                 * @param name
                 * @return Index corresponding to the specified name
                 */
                private int nameToColumn(String name) {
                        int column = -1;
                        for (int i = 0; i < name.length(); ++i) {
                                int c = name.charAt(i);
                                column = (column + 1) * 26 + c - 'A';
                        }
                        return column;
                }

        }

        ///////////////////////////////////////

        private Package xlsxPackage;
        private int minColumns;
        private PrintStream output;

        /**
         * Creates a new XLSX -> CSV converter
         * Should use OPCPackage instead of Package, but the new one
         * is not available in Poi 3.5-beta5.
* * @param pkg The XLSX package to process
         * @param output The PrintStream to output the CSV to
         * @param minColumns The minimum number of columns to output, or -1 for 
no minimum
         */
        public XLSX2CSV(Package pkg, PrintStream output, int minColumns) {
                this.xlsxPackage = pkg;
                this.output = output;
                this.minColumns = minColumns;
        }

        /**
         * @param sst
         * @param sheetInputStream
         */
public void processSheet(SharedStringsTable sst, InputStream sheetInputStream) throws IOException, ParserConfigurationException, SAXException {

                InputSource sheetSource = new InputSource(sheetInputStream);
                SAXParserFactory saxFactory = SAXParserFactory.newInstance();
                SAXParser saxParser = saxFactory.newSAXParser();
                XMLReader sheetParser = saxParser.getXMLReader();
                ContentHandler handler = new MyXSSFSheetHandler(sst, 
this.minColumns, this.output);
                sheetParser.setContentHandler(handler);
                sheetParser.parse(sheetSource);
        }

        /**
         * Initiates the processing of the XLS file to CSV
* @throws OpenXML4JException */ public void process() throws IOException, OpenXML4JException, ParserConfigurationException, SAXException {

                XSSFReader xssfReader = new XSSFReader(this.xlsxPackage);
                SharedStringsTable sst = xssfReader.getSharedStringsTable();
                XSSFReader.SheetIterator iter = 
(XSSFReader.SheetIterator)xssfReader.getSheetsData();
                int index = 0;
                while (iter.hasNext()) {
                        InputStream stream = iter.next();
                        String sheetName = iter.getSheetName();
this.output.println(sheetName + " [index=" + index + "]:"); processSheet(sst, stream);
                        stream.close();
                        ++index;
                }               
        }

        public static void main(String[] args) throws Exception {
                if(args.length < 1) {
                        System.err.println("Use:");
                        System.err.println("  XLSX2CSV <xlsx file> [min 
columns]");
                        System.exit(1);
                }

                File xlsxFile = new File(args[0]);
                if (! xlsxFile.exists()) {
                        System.err.println("Not found or not a file: " + 
xlsxFile.getPath());
                        System.exit(1);
                }

                int minColumns = -1;
                if(args.length >= 2) {
                        minColumns = Integer.parseInt(args[1]);
                }

                // Provide rudimentary configuration for log4j to avoid these 
messages:
                // log4j:WARN No appenders could be found for logger 
(org.openxml4j.opc).
                // log4j:WARN Please initialize the log4j system properly.
                BasicConfigurator.configure();

                FileInputStream fis = new FileInputStream(xlsxFile);
                XLSX2CSV xlsx2csv = new XLSX2CSV(Package.open(fis), System.out, 
minColumns);
                xlsx2csv.process();
                fis.close();
        }

}

---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to