Author: jukka Date: Wed Mar 26 09:33:33 2008 New Revision: 641384 URL: http://svn.apache.org/viewvc?rev=641384&view=rev Log: TIKA-97: Tika GUI - New tabs for different views of the parser output - Improved drag-and-drop support - Improved error handling
Modified: incubator/tika/trunk/src/main/java/org/apache/tika/gui/ParsingTransferHandler.java incubator/tika/trunk/src/main/java/org/apache/tika/gui/TikaGUI.java Modified: incubator/tika/trunk/src/main/java/org/apache/tika/gui/ParsingTransferHandler.java URL: http://svn.apache.org/viewvc/incubator/tika/trunk/src/main/java/org/apache/tika/gui/ParsingTransferHandler.java?rev=641384&r1=641383&r2=641384&view=diff ============================================================================== --- incubator/tika/trunk/src/main/java/org/apache/tika/gui/ParsingTransferHandler.java (original) +++ incubator/tika/trunk/src/main/java/org/apache/tika/gui/ParsingTransferHandler.java Wed Mar 26 09:33:33 2008 @@ -21,41 +21,24 @@ import java.awt.datatransfer.Transferable; import java.awt.event.InputEvent; import java.io.File; -import java.io.FileInputStream; -import java.io.InputStream; -import java.io.StringWriter; import java.util.List; import javax.swing.Icon; import javax.swing.JComponent; -import javax.swing.JEditorPane; import javax.swing.TransferHandler; -import javax.swing.table.DefaultTableModel; -import javax.xml.transform.OutputKeys; -import javax.xml.transform.sax.SAXTransformerFactory; -import javax.xml.transform.sax.TransformerHandler; -import javax.xml.transform.stream.StreamResult; - -import org.apache.tika.metadata.Metadata; -import org.apache.tika.parser.AutoDetectParser; -import org.apache.tika.parser.Parser; -public class ParsingTransferHandler extends TransferHandler { - - private final Parser parser = new AutoDetectParser(); +/** + * Utility class that turns drag-and-drop events into Tika parse requests. + */ +class ParsingTransferHandler extends TransferHandler { private final TransferHandler delegate; - private final DefaultTableModel table; - - private final JEditorPane editor; + private final TikaGUI tika; - public ParsingTransferHandler( - TransferHandler delegate, - DefaultTableModel table, JEditorPane editor) { + public ParsingTransferHandler(TransferHandler delegate, TikaGUI tika) { this.delegate = delegate; - this.table = table; - this.editor = editor; + this.tika = tika; } public boolean canImport(JComponent component, DataFlavor[] flavors) { @@ -73,37 +56,11 @@ List<?> files = (List<?>) transferable.getTransferData(DataFlavor.javaFileListFlavor); for (Object file : files) { - importFile((File) file); + tika.importFile((File) file); } return true; } catch (Exception e) { - e.printStackTrace(); return false; - } - } - - private void importFile(File file) throws Exception { - InputStream input = new FileInputStream(file); - try { - StringWriter writer = new StringWriter(); - Metadata metadata = new Metadata(); - metadata.set(Metadata.RESOURCE_NAME_KEY, file.getName()); - - SAXTransformerFactory factory = (SAXTransformerFactory) - SAXTransformerFactory.newInstance(); - TransformerHandler handler = factory.newTransformerHandler(); - handler.getTransformer().setOutputProperty( - OutputKeys.METHOD, "html"); - handler.setResult(new StreamResult(writer)); - parser.parse(input, handler, metadata); - - table.setRowCount(0); - for (String name : metadata.names()) { - table.addRow(new Object[] { name, metadata.get(name) }); - } - editor.setText(writer.toString()); - } finally { - input.close(); } } Modified: incubator/tika/trunk/src/main/java/org/apache/tika/gui/TikaGUI.java URL: http://svn.apache.org/viewvc/incubator/tika/trunk/src/main/java/org/apache/tika/gui/TikaGUI.java?rev=641384&r1=641383&r2=641384&view=diff ============================================================================== --- incubator/tika/trunk/src/main/java/org/apache/tika/gui/TikaGUI.java (original) +++ incubator/tika/trunk/src/main/java/org/apache/tika/gui/TikaGUI.java Wed Mar 26 09:33:33 2008 @@ -16,52 +16,212 @@ */ package org.apache.tika.gui; +import java.awt.Dimension; +import java.io.File; +import java.io.FileInputStream; +import java.io.IOException; +import java.io.InputStream; +import java.io.PrintWriter; +import java.io.StringWriter; +import java.io.Writer; +import java.util.Arrays; + import javax.swing.JEditorPane; import javax.swing.JFrame; +import javax.swing.JOptionPane; import javax.swing.JScrollPane; import javax.swing.JTabbedPane; -import javax.swing.JTable; +import javax.swing.ProgressMonitorInputStream; import javax.swing.SwingUtilities; import javax.swing.UIManager; -import javax.swing.table.DefaultTableModel; +import javax.xml.transform.OutputKeys; +import javax.xml.transform.TransformerConfigurationException; +import javax.xml.transform.sax.SAXTransformerFactory; +import javax.xml.transform.sax.TransformerHandler; +import javax.xml.transform.stream.StreamResult; + +import org.apache.tika.metadata.Metadata; +import org.apache.tika.parser.AutoDetectParser; +import org.apache.tika.parser.Parser; +import org.apache.tika.sax.TeeContentHandler; +import org.apache.tika.sax.WriteOutContentHandler; +import org.apache.tika.sax.XHTMLContentHandler; +import org.apache.tika.sax.xpath.MatchingContentHandler; +import org.apache.tika.sax.xpath.XPathParser; +import org.xml.sax.ContentHandler; /** - * Simple Swing GUI for Apache Tika. Opens a window with tabs for - * "Text content" and "Metadata". You can drag and drop files on top + * Simple Swing GUI for Apache Tika. You can drag and drop files on top * of the window to have them parsed. */ -public class TikaGUI implements Runnable { +public class TikaGUI extends JFrame { - public void run() { - JFrame frame = new JFrame("Apache Tika"); - frame.setDefaultCloseOperation(JFrame.EXIT_ON_CLOSE); + /** + * Main method. Sets the Swing look and feel to the operating system + * settings, and starts the Tika GUI with an [EMAIL PROTECTED] AutoDetectParser) + * instance as the default parser. + * + * @param args ignored + * @throws Exception if an error occurs + */ + public static void main(String[] args) throws Exception { + UIManager.setLookAndFeel(UIManager.getSystemLookAndFeelClassName()); + SwingUtilities.invokeLater(new Runnable() { + public void run() { + new TikaGUI(new AutoDetectParser()).setVisible(true); + } + }); + } - JTabbedPane tabs = new JTabbedPane(); - frame.add(tabs); + /** + * Configured parser instance. + */ + private final Parser parser; + + /** + * Tabs in the Tika GUI window. + */ + private final JTabbedPane tabs; + + /** + * Formatted XHTML output. + */ + private final JEditorPane html; + + /** + * Plain text output. + */ + private final JEditorPane text; + + /** + * Raw XHTML source. + */ + private final JEditorPane xml; + + /** + * Document metadata. + */ + private final JEditorPane metadata; + + /** + * Parsing errors. + */ + private final JEditorPane errors; + + public TikaGUI(Parser parser) { + super("Apache Tika"); + setDefaultCloseOperation(JFrame.EXIT_ON_CLOSE); + + tabs = new JTabbedPane(); + add(tabs); + + html = createEditor("Formatted text", "text/html"); + text = createEditor("Plain text", "text/plain"); + xml = createEditor("Structured text", "text/plain"); + metadata = createEditor("Metadata", "text/plain"); + errors = createEditor("Errors", "text/plain"); - JEditorPane editor = new JEditorPane(); - editor.setContentType("text/html"); - editor.setText("<center>Drop file here</center>"); - tabs.add("Text content", new JScrollPane(editor)); - - DefaultTableModel model = new DefaultTableModel( - new Object[][] { { "", "" } }, - new Object[] { "Name", "Value" }); - JTable table = new JTable(model); - tabs.addTab("Metadata", new JScrollPane(table)); + setPreferredSize(new Dimension(500, 400)); + pack(); + + this.parser = parser; + } + + public void importFile(File file) throws IOException { + InputStream input = new FileInputStream(file); + try { + StringWriter htmlBuffer = new StringWriter(); + StringWriter textBuffer = new StringWriter(); + StringWriter xmlBuffer = new StringWriter(); + StringBuilder metadataBuffer = new StringBuilder(); + + ContentHandler handler = new TeeContentHandler( + getHtmlHandler(htmlBuffer), + getTextContentHandler(textBuffer), + getXmlContentHandler(xmlBuffer)); + Metadata md = new Metadata(); + md.set(Metadata.RESOURCE_NAME_KEY, file.getName()); + + input = new ProgressMonitorInputStream( + this, "Parsing file " + file.getName(), input); + parser.parse(input, handler, md); + + String[] names = md.names(); + Arrays.sort(names); + for (String name : names) { + metadataBuffer.append(name); + metadataBuffer.append(": "); + metadataBuffer.append(md.get(name)); + metadataBuffer.append("\n"); + } + + setText(errors, ""); + setText(metadata, metadataBuffer.toString()); + setText(xml, xmlBuffer.toString()); + setText(text, textBuffer.toString()); + setText(html, htmlBuffer.toString()); + tabs.setSelectedIndex(0); + } catch (Exception e) { + StringWriter writer = new StringWriter(); + e.printStackTrace(new PrintWriter(writer)); + setText(errors, writer.toString()); + setText(metadata, ""); + setText(xml, ""); + setText(text, ""); + setText(html, ""); + tabs.setSelectedIndex(tabs.getTabCount() - 1); + JOptionPane.showMessageDialog( + this, + "Apache Tika was unable to parse the file " + + file.getName() + ".\n See the errors tab for" + + " the detailed stack trace of this error.", + "Parse error", + JOptionPane.ERROR_MESSAGE); + } finally { + input.close(); + } + } - table.setTransferHandler(new ParsingTransferHandler( - table.getTransferHandler(), model, editor)); + private JEditorPane createEditor(String title, String type) { + JEditorPane editor = new JEditorPane(); + editor.setContentType(type); editor.setTransferHandler(new ParsingTransferHandler( - editor.getTransferHandler(), model, editor)); + editor.getTransferHandler(), this)); + tabs.add(title, new JScrollPane(editor)); + return editor; + } - frame.pack(); - frame.setVisible(true); + private void setText(JEditorPane editor, String text) { + editor.setText(text); + editor.setCaretPosition(0); } - public static void main(String[] args) throws Exception { - UIManager.setLookAndFeel(UIManager.getSystemLookAndFeelClassName()); - SwingUtilities.invokeLater(new TikaGUI()); + private ContentHandler getHtmlHandler(Writer writer) + throws TransformerConfigurationException { + SAXTransformerFactory factory = (SAXTransformerFactory) + SAXTransformerFactory.newInstance(); + TransformerHandler handler = factory.newTransformerHandler(); + handler.getTransformer().setOutputProperty(OutputKeys.METHOD, "html"); + handler.setResult(new StreamResult(writer)); + return handler; + } + + private ContentHandler getTextContentHandler(Writer writer) { + XPathParser parser = + new XPathParser("xhtml", XHTMLContentHandler.XHTML); + return new MatchingContentHandler( + new WriteOutContentHandler(writer), + parser.parse("/xhtml:html/xhtml:body//text()")); + } + + private ContentHandler getXmlContentHandler(Writer writer) + throws TransformerConfigurationException { + SAXTransformerFactory factory = (SAXTransformerFactory) + SAXTransformerFactory.newInstance(); + TransformerHandler handler = factory.newTransformerHandler(); + handler.getTransformer().setOutputProperty(OutputKeys.METHOD, "xml"); + handler.setResult(new StreamResult(writer)); + return handler; } }