Nick, what Microsoft documents do you mean? I just don't know where
to start.
Please see below for a first implementation of a
ReadonlySharedStringsTable. Initially I wanted to extend
POIXMLDocumentPart, but that supports read/write access. I copied the
relevant method signatures from POIXMLDocumentPart and provided
implementations. The program depends on finding the counts in the
root element to allocate space, which may be just too fragile, but
time will tell. It certainly runs extremely quickly.
HTH
chris...
Nick Burch wrote:
On Thu, 7 May 2009, Chris Lott wrote:
Sounds reasonable. Can you offer any insights into the shared
string table?
Have a look at the microsoft docs, they're not bad.
It looks like the shared string ID in a sheet(n).xml file is simply
the index of the string in the table. That suggests to me a simple
Java vector might be a suitable implementation with not only low
memory use but also constant lookup time; don't even need a map.
I think you ought to be just fine with a vector or array / arraylist,
yes.
Nick
--
package com.telcordia.arroyo.beans.poi;
import java.io.File;
import java.io.IOException;
import java.io.InputStream;
import java.util.ArrayList;
import javax.xml.parsers.ParserConfigurationException;
import javax.xml.parsers.SAXParser;
import javax.xml.parsers.SAXParserFactory;
import org.apache.log4j.BasicConfigurator;
import org.apache.log4j.Level;
import org.apache.log4j.Logger;
import org.apache.poi.openxml4j.opc.Package;
import org.apache.poi.openxml4j.opc.PackageAccess;
import org.apache.poi.openxml4j.opc.PackagePart;
import org.apache.poi.openxml4j.opc.PackageRelationship;
import org.apache.poi.xssf.usermodel.XSSFRelation;
import org.xml.sax.Attributes;
import org.xml.sax.InputSource;
import org.xml.sax.SAXException;
import org.xml.sax.XMLReader;
import org.xml.sax.helpers.DefaultHandler;
/**
* Many methods copied from org.apache.poi.POIXMLDocumentPart because
* I was not sure whether this class should extend that class.
*
*/
public class ReadonlySharedStringsTable extends DefaultHandler {
/**
* An integer representing the total count of strings in the
workbook. This count does not
* include any numbers, it counts only the total of text strings
in the workbook.
*/
private int count;
/**
* An integer representing the total count of unique strings in
the Shared String Table.
* A string is unique even if it is a copy of another string, but
has different formatting applied
* at the character level.
*/
private int uniqueCount;
/**
* The shared strings table.
*/
private String [] strings;
/**
* * @param pkg
* @throws IOException
* @throws SAXException
* @throws ParserConfigurationException
*/
public ReadonlySharedStringsTable(Package pkg)
throws IOException, SAXException, ParserConfigurationException {
ArrayList<PackagePart> parts =
pkg.getPartsByContentType(XSSFRelation.SHARED_STRINGS.getContentType());
PackagePart sstPart = parts.get(0);
readFrom(sstPart.getInputStream());
}
/**
* Like POIXMLDocumentPart constructor
* * @param part
* @param rel_ignored
* @throws IOException
*/
public ReadonlySharedStringsTable(PackagePart part,
PackageRelationship rel_ignored) throws IOException, SAXException,
ParserConfigurationException {
readFrom(part.getInputStream());
}
/**
* Read this shared strings table from an XML file.
* * @param is The input stream containing the XML document.
* @throws IOException if an error occurs while reading.
* @throws SAXException * @throws
ParserConfigurationException */
public void readFrom(InputStream is) throws IOException,
SAXException, ParserConfigurationException {
InputSource sheetSource = new InputSource(is);
SAXParserFactory saxFactory = SAXParserFactory.newInstance();
SAXParser saxParser = saxFactory.newSAXParser();
XMLReader sheetParser = saxParser.getXMLReader();
sheetParser.setContentHandler(this);
sheetParser.parse(sheetSource);
}
/**
* Return an integer representing the total count of strings in
the workbook. This count does not
* include any numbers, it counts only the total of text strings
in the workbook.
*
* @return the total count of strings in the workbook
*/
public int getCount(){
return this.count;
}
/**
* Returns an integer representing the total count of unique
strings in the Shared String Table.
* A string is unique even if it is a copy of another string, but
has different formatting applied
* at the character level.
*
* @return the total count of unique strings in the workbook
*/
public int getUniqueCount(){
return this.uniqueCount;
}
/**
* Return a string item by index
*
* @param idx index of item to return.
* @return the item at the specified position in this Shared
String table.
*/
public String getEntryAt(int idx) {
return strings[idx];
}
//// ContentHandler methods ////
private StringBuffer characters;
private boolean tIsOpen;
private int index;
/*
* (non-Javadoc)
* @see
org.xml.sax.helpers.DefaultHandler#startElement(java.lang.String,
java.lang.String, java.lang.String, org.xml.sax.Attributes)
*/
public void startElement(String uri, String localName, String name,
Attributes attributes) throws SAXException {
if ("sst".equals(name)) {
String count = attributes.getValue("count");
String uniqueCount = attributes.getValue("uniqueCount");
this.count = Integer.parseInt(count);
this.uniqueCount = Integer.parseInt(uniqueCount);
this.strings = new String[this.uniqueCount];
index = 0;
characters = new StringBuffer();
}
else if ("t".equals(name)) {
characters.setLength(0);
tIsOpen = true;
}
}
/*
* (non-Javadoc)
* @see
org.xml.sax.helpers.DefaultHandler#endElement(java.lang.String,
java.lang.String, java.lang.String)
*/
public void endElement(String uri, String localName, String name)
throws SAXException {
if ("t".equals(name)) {
strings[index] = characters.toString();
++index;
}
}
/**
* Captures characters only if a t(ext?) element is open.
*/
public void characters(char[] ch, int start, int length)
throws SAXException {
if (tIsOpen)
characters.append(ch, start, length);
}
////////////// For testing //////////////
/**
* Dumps the shared string table from an XLSX package.
* * @param args
* @throws Exception
*/
public static void main(String [] args) throws Exception {
BasicConfigurator.configure();
Logger.getRootLogger().setLevel(Level.INFO);
if (args.length != 1) {
System.err.println("Usage: ReadonlySharedStringsTable
<file.xlsx>");
return;
}
File inputFile = new File(args[0]);
if (! inputFile.exists()) {
System.err.println("Failed to find file: " +
inputFile.getPath());
return;
}
Package pkg = Package.open(inputFile.getPath(),
PackageAccess.READ);
ReadonlySharedStringsTable sst = new
ReadonlySharedStringsTable(pkg);
int unique = sst.getUniqueCount();
for (int s = 0; s < unique; ++s)
System.out.println("Index " + s + ": " +
sst.getEntryAt(s));
// Close without saving any changes.
pkg.revert();
}
}
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]
__________ Information from ESET NOD32 Antivirus, version of virus
signature database 4061 (20090507) __________
The message was checked by ESET NOD32 Antivirus.
http://www.eset.com