Author: nick
Date: Thu Aug 20 10:07:52 2015
New Revision: 1696751

URL: http://svn.apache.org/r1696751
Log:
TIKA-1710 patch from Yaniv Kunda - Use Commons IO instead of the Tika Core IO 
copies, and java.nio.charset.StandardCharsets

Modified:
    tika/trunk/tika-bundle/src/test/java/org/apache/tika/bundle/BundleIT.java
    
tika/trunk/tika-example/src/main/java/org/apache/tika/example/DirListParser.java
    
tika/trunk/tika-example/src/main/java/org/apache/tika/example/DumpTikaConfigExample.java
    
tika/trunk/tika-example/src/main/java/org/apache/tika/example/ExtractEmbeddedFiles.java
    
tika/trunk/tika-example/src/main/java/org/apache/tika/example/MyFirstTika.java
    
tika/trunk/tika-example/src/main/java/org/apache/tika/example/RollbackSoftware.java
    
tika/trunk/tika-example/src/main/java/org/apache/tika/example/SpringExample.java
    
tika/trunk/tika-example/src/test/java/org/apache/tika/example/DumpTikaConfigExampleTest.java
    
tika/trunk/tika-example/src/test/java/org/apache/tika/example/SimpleTextExtractorTest.java
    
tika/trunk/tika-example/src/test/java/org/apache/tika/example/SimpleTypeDetectorTest.java

Modified: 
tika/trunk/tika-bundle/src/test/java/org/apache/tika/bundle/BundleIT.java
URL: 
http://svn.apache.org/viewvc/tika/trunk/tika-bundle/src/test/java/org/apache/tika/bundle/BundleIT.java?rev=1696751&r1=1696750&r2=1696751&view=diff
==============================================================================
--- tika/trunk/tika-bundle/src/test/java/org/apache/tika/bundle/BundleIT.java 
(original)
+++ tika/trunk/tika-bundle/src/test/java/org/apache/tika/bundle/BundleIT.java 
Thu Aug 20 10:07:52 2015
@@ -16,6 +16,7 @@
  */
 package org.apache.tika.bundle;
 
+import static java.nio.charset.StandardCharsets.UTF_8;
 import static org.junit.Assert.assertEquals;
 import static org.junit.Assert.assertTrue;
 import static org.junit.Assert.assertFalse;
@@ -141,7 +142,7 @@ public class BundleIT {
     public void testForkParser() throws Exception {
         ForkParser parser = new ForkParser(Activator.class.getClassLoader(), 
defaultParser);
         String data = "<!DOCTYPE html>\n<html><body><p>test 
<span>content</span></p></body></html>";
-        InputStream stream = new ByteArrayInputStream(data.getBytes("UTF-8"));
+        InputStream stream = new ByteArrayInputStream(data.getBytes(UTF_8));
         Writer writer = new StringWriter();
         ContentHandler contentHandler = new BodyContentHandler(writer);
         Metadata metadata = new Metadata();

Modified: 
tika/trunk/tika-example/src/main/java/org/apache/tika/example/DirListParser.java
URL: 
http://svn.apache.org/viewvc/tika/trunk/tika-example/src/main/java/org/apache/tika/example/DirListParser.java?rev=1696751&r1=1696750&r2=1696751&view=diff
==============================================================================
--- 
tika/trunk/tika-example/src/main/java/org/apache/tika/example/DirListParser.java
 (original)
+++ 
tika/trunk/tika-example/src/main/java/org/apache/tika/example/DirListParser.java
 Thu Aug 20 10:07:52 2015
@@ -32,6 +32,8 @@ import org.apache.tika.sax.BodyContentHa
 import org.xml.sax.ContentHandler;
 import org.xml.sax.SAXException;
 
+import static java.nio.charset.StandardCharsets.UTF_8;
+
 /**
  * Parses the output of /bin/ls and counts the number of files and the number 
of
  * executables using Tika.
@@ -75,8 +77,7 @@ public class DirListParser implements Pa
                        Metadata metadata, ParseContext context) throws 
IOException,
                        SAXException, TikaException {
 
-               List<String> lines = FileUtils.readLines(
-                       TikaInputStream.get(is).getFile(), "utf-8");
+               List<String> lines = 
FileUtils.readLines(TikaInputStream.get(is).getFile(), UTF_8);
                for (String line : lines) {
                        String[] fileToks = line.split("\\s+");
                        if (fileToks.length < 8)

Modified: 
tika/trunk/tika-example/src/main/java/org/apache/tika/example/DumpTikaConfigExample.java
URL: 
http://svn.apache.org/viewvc/tika/trunk/tika-example/src/main/java/org/apache/tika/example/DumpTikaConfigExample.java?rev=1696751&r1=1696750&r2=1696751&view=diff
==============================================================================
--- 
tika/trunk/tika-example/src/main/java/org/apache/tika/example/DumpTikaConfigExample.java
 (original)
+++ 
tika/trunk/tika-example/src/main/java/org/apache/tika/example/DumpTikaConfigExample.java
 Thu Aug 20 10:07:52 2015
@@ -41,7 +41,6 @@ import org.apache.tika.config.TikaConfig
 import org.apache.tika.detect.DefaultDetector;
 import org.apache.tika.detect.Detector;
 import org.apache.tika.exception.TikaException;
-import org.apache.tika.io.IOUtils;
 import org.apache.tika.language.translate.DefaultTranslator;
 import org.apache.tika.language.translate.Translator;
 import org.apache.tika.mime.MediaType;
@@ -52,6 +51,8 @@ import org.w3c.dom.Document;
 import org.w3c.dom.Element;
 import org.w3c.dom.Node;
 
+import static java.nio.charset.StandardCharsets.UTF_8;
+
 
 /**
  * This class shows how to dump a TikaConfig object to a configuration file.
@@ -190,7 +191,7 @@ public class DumpTikaConfigExample {
      */
     public static void main(String[] args) throws Exception {
 
-        Charset encoding = IOUtils.UTF_8;
+        Charset encoding = UTF_8;
         Writer writer = null;
         if (args.length > 0) {
             writer = new OutputStreamWriter(new FileOutputStream(new 
File(args[0])), encoding);

Modified: 
tika/trunk/tika-example/src/main/java/org/apache/tika/example/ExtractEmbeddedFiles.java
URL: 
http://svn.apache.org/viewvc/tika/trunk/tika-example/src/main/java/org/apache/tika/example/ExtractEmbeddedFiles.java?rev=1696751&r1=1696750&r2=1696751&view=diff
==============================================================================
--- 
tika/trunk/tika-example/src/main/java/org/apache/tika/example/ExtractEmbeddedFiles.java
 (original)
+++ 
tika/trunk/tika-example/src/main/java/org/apache/tika/example/ExtractEmbeddedFiles.java
 Thu Aug 20 10:07:52 2015
@@ -20,12 +20,12 @@ import java.io.InputStream;
 import java.nio.file.Files;
 import java.nio.file.Path;
 
+import org.apache.commons.io.FilenameUtils;
 import org.apache.tika.config.TikaConfig;
 import org.apache.tika.detect.Detector;
 import org.apache.tika.exception.TikaException;
 import org.apache.tika.extractor.EmbeddedDocumentExtractor;
 import org.apache.tika.extractor.ParsingEmbeddedDocumentExtractor;
-import org.apache.tika.io.FilenameUtils;
 import org.apache.tika.metadata.Metadata;
 import org.apache.tika.mime.MediaType;
 import org.apache.tika.mime.MimeTypeException;

Modified: 
tika/trunk/tika-example/src/main/java/org/apache/tika/example/MyFirstTika.java
URL: 
http://svn.apache.org/viewvc/tika/trunk/tika-example/src/main/java/org/apache/tika/example/MyFirstTika.java?rev=1696751&r1=1696750&r2=1696751&view=diff
==============================================================================
--- 
tika/trunk/tika-example/src/main/java/org/apache/tika/example/MyFirstTika.java 
(original)
+++ 
tika/trunk/tika-example/src/main/java/org/apache/tika/example/MyFirstTika.java 
Thu Aug 20 10:07:52 2015
@@ -32,6 +32,8 @@ import org.apache.tika.parser.Parser;
 import org.apache.tika.sax.BodyContentHandler;
 import org.xml.sax.ContentHandler;
 
+import static java.nio.charset.StandardCharsets.UTF_8;
+
 /**
  * Demonstrates how to call the different components within Tika: its
  * {@link Detector} framework (aka MIME identification and repository), its
@@ -91,7 +93,7 @@ public class MyFirstTika {
                 + detector.detect(stream, metadata) + "]");
 
         LanguageIdentifier lang = new LanguageIdentifier(new LanguageProfile(
-                FileUtils.readFileToString(new File(filename), "utf-8")));
+                FileUtils.readFileToString(new File(filename), UTF_8)));
 
         System.out.println("The language of this content is: ["
                 + lang.getLanguage() + "]");

Modified: 
tika/trunk/tika-example/src/main/java/org/apache/tika/example/RollbackSoftware.java
URL: 
http://svn.apache.org/viewvc/tika/trunk/tika-example/src/main/java/org/apache/tika/example/RollbackSoftware.java?rev=1696751&r1=1696750&r2=1696751&view=diff
==============================================================================
--- 
tika/trunk/tika-example/src/main/java/org/apache/tika/example/RollbackSoftware.java
 (original)
+++ 
tika/trunk/tika-example/src/main/java/org/apache/tika/example/RollbackSoftware.java
 Thu Aug 20 10:07:52 2015
@@ -37,6 +37,8 @@ import org.apache.tika.sax.XHTMLContentH
 import org.xml.sax.ContentHandler;
 import org.xml.sax.SAXException;
 
+import static java.nio.charset.StandardCharsets.UTF_8;
+
 /**
  * Demonstrates Tika and its ability to sense symlinks.
  */
@@ -52,7 +54,7 @@ public class RollbackSoftware {
                LinkContentHandler handler = new LinkContentHandler();
                Metadata met = new Metadata();
                DeploymentAreaParser parser = new DeploymentAreaParser();
-               
parser.parse(IOUtils.toInputStream(deployArea.getAbsolutePath(), "utf-8"),
+               
parser.parse(IOUtils.toInputStream(deployArea.getAbsolutePath(), UTF_8),
                                handler, met);
                List<Link> links = handler.getLinks();
                if (links.size() < 2)
@@ -110,7 +112,7 @@ public class RollbackSoftware {
                                Metadata metadata, ParseContext context) throws 
IOException,
                                SAXException, TikaException {
 
-                       File deployArea = new File(IOUtils.toString(is, 
"utf-8"));
+                       File deployArea = new File(IOUtils.toString(is, UTF_8));
                        File[] versions = deployArea.listFiles(new FileFilter() 
{
 
                                public boolean accept(File pathname) {

Modified: 
tika/trunk/tika-example/src/main/java/org/apache/tika/example/SpringExample.java
URL: 
http://svn.apache.org/viewvc/tika/trunk/tika-example/src/main/java/org/apache/tika/example/SpringExample.java?rev=1696751&r1=1696750&r2=1696751&view=diff
==============================================================================
--- 
tika/trunk/tika-example/src/main/java/org/apache/tika/example/SpringExample.java
 (original)
+++ 
tika/trunk/tika-example/src/main/java/org/apache/tika/example/SpringExample.java
 Thu Aug 20 10:07:52 2015
@@ -15,6 +15,7 @@
 package org.apache.tika.example;
 
 import java.io.ByteArrayInputStream;
+
 import org.apache.tika.metadata.Metadata;
 import org.apache.tika.parser.ParseContext;
 import org.apache.tika.parser.Parser;
@@ -22,7 +23,7 @@ import org.apache.tika.sax.WriteOutConte
 import org.springframework.context.ApplicationContext;
 import org.springframework.context.support.ClassPathXmlApplicationContext;
 
-import com.google.common.base.Charsets;
+import static java.nio.charset.StandardCharsets.UTF_8;
 
 public class SpringExample {
 
@@ -30,7 +31,7 @@ public class SpringExample {
                ApplicationContext context = new ClassPathXmlApplicationContext(
                                new String[] { 
"org/apache/tika/example/spring.xml" });
                Parser parser = context.getBean("tika", Parser.class);
-               parser.parse(new ByteArrayInputStream("Hello, 
World!".getBytes(Charsets.UTF_8)),
+               parser.parse(new ByteArrayInputStream("Hello, 
World!".getBytes(UTF_8)),
                                new WriteOutContentHandler(System.out), new 
Metadata(),
                                new ParseContext());
        }

Modified: 
tika/trunk/tika-example/src/test/java/org/apache/tika/example/DumpTikaConfigExampleTest.java
URL: 
http://svn.apache.org/viewvc/tika/trunk/tika-example/src/test/java/org/apache/tika/example/DumpTikaConfigExampleTest.java?rev=1696751&r1=1696750&r2=1696751&view=diff
==============================================================================
--- 
tika/trunk/tika-example/src/test/java/org/apache/tika/example/DumpTikaConfigExampleTest.java
 (original)
+++ 
tika/trunk/tika-example/src/test/java/org/apache/tika/example/DumpTikaConfigExampleTest.java
 Thu Aug 20 10:07:52 2015
@@ -18,6 +18,8 @@ package org.apache.tika.example;
  */
 
 
+import static java.nio.charset.StandardCharsets.UTF_16LE;
+import static java.nio.charset.StandardCharsets.UTF_8;
 import static org.junit.Assert.assertEquals;
 import static org.junit.Assert.assertTrue;
 
@@ -26,6 +28,7 @@ import java.io.FileOutputStream;
 import java.io.IOException;
 import java.io.OutputStreamWriter;
 import java.io.Writer;
+import java.nio.charset.Charset;
 
 import org.apache.tika.config.TikaConfig;
 import org.apache.tika.detect.CompositeDetector;
@@ -61,9 +64,9 @@ public class DumpTikaConfigExampleTest {
     @Test
     public void testDump() throws Exception {
         DumpTikaConfigExample ex = new DumpTikaConfigExample();
-        for (String encoding : new String[]{ "UTF-8", "UTF-16LE"}) {
-            Writer writer = new OutputStreamWriter(new 
FileOutputStream(configFile), encoding);
-            ex.dump(TikaConfig.getDefaultConfig(), writer, encoding);
+        for (Charset charset : new Charset[]{UTF_8, UTF_16LE}) {
+            Writer writer = new OutputStreamWriter(new 
FileOutputStream(configFile), charset);
+            ex.dump(TikaConfig.getDefaultConfig(), writer, charset.name());
             writer.flush();
             writer.close();
 

Modified: 
tika/trunk/tika-example/src/test/java/org/apache/tika/example/SimpleTextExtractorTest.java
URL: 
http://svn.apache.org/viewvc/tika/trunk/tika-example/src/test/java/org/apache/tika/example/SimpleTextExtractorTest.java?rev=1696751&r1=1696750&r2=1696751&view=diff
==============================================================================
--- 
tika/trunk/tika-example/src/test/java/org/apache/tika/example/SimpleTextExtractorTest.java
 (original)
+++ 
tika/trunk/tika-example/src/test/java/org/apache/tika/example/SimpleTextExtractorTest.java
 Thu Aug 20 10:07:52 2015
@@ -14,6 +14,7 @@
 
 package org.apache.tika.example;
 
+import static java.nio.charset.StandardCharsets.UTF_8;
 import static org.junit.Assert.assertEquals;
 
 import java.io.ByteArrayOutputStream;
@@ -23,8 +24,6 @@ import java.io.PrintStream;
 import org.apache.commons.io.FileUtils;
 import org.junit.Test;
 
-import com.google.common.base.Charsets;
-
 public class SimpleTextExtractorTest {
     @Test
     public void testSimpleTextExtractor() throws Exception {
@@ -33,18 +32,17 @@ public class SimpleTextExtractorTest {
             + " content written in English to test autodetection of"
             + " the character encoding of the input stream.";
         ByteArrayOutputStream buffer = new ByteArrayOutputStream();
-        String UTF8 = Charsets.UTF_8.name();
 
         PrintStream out = System.out;
-        System.setOut(new PrintStream(buffer, true, UTF8));
+        System.setOut(new PrintStream(buffer, true, UTF_8.name()));
 
         File file = new File("target", "test.txt");
-        FileUtils.writeStringToFile(file, message, UTF8);
+        FileUtils.writeStringToFile(file, message, UTF_8);
         SimpleTextExtractor.main(new String[] { file.getPath() });
         file.delete();
 
         System.setOut(out);
 
-        assertEquals(message, buffer.toString(UTF8).trim());
+        assertEquals(message, buffer.toString(UTF_8.name()).trim());
     }
 }

Modified: 
tika/trunk/tika-example/src/test/java/org/apache/tika/example/SimpleTypeDetectorTest.java
URL: 
http://svn.apache.org/viewvc/tika/trunk/tika-example/src/test/java/org/apache/tika/example/SimpleTypeDetectorTest.java?rev=1696751&r1=1696750&r2=1696751&view=diff
==============================================================================
--- 
tika/trunk/tika-example/src/test/java/org/apache/tika/example/SimpleTypeDetectorTest.java
 (original)
+++ 
tika/trunk/tika-example/src/test/java/org/apache/tika/example/SimpleTypeDetectorTest.java
 Thu Aug 20 10:07:52 2015
@@ -14,12 +14,12 @@
 
 package org.apache.tika.example;
 
+import static java.nio.charset.StandardCharsets.UTF_8;
 import static org.junit.Assert.assertEquals;
 
 import java.io.ByteArrayOutputStream;
 import java.io.PrintStream;
 
-import com.google.common.base.Charsets;
 import org.junit.Test;
 
 @SuppressWarnings("deprecation")
@@ -30,14 +30,14 @@ public class SimpleTypeDetectorTest {
                ByteArrayOutputStream buffer = new ByteArrayOutputStream();
 
                PrintStream out = System.out;
-               System.setOut(new PrintStream(buffer, true, 
Charsets.UTF_8.name()));
+               System.setOut(new PrintStream(buffer, true, UTF_8.name()));
 
                SimpleTypeDetector.main(new String[] { "pom.xml" });
 
                System.setOut(out);
 
                assertEquals("pom.xml: application/xml",
-                               buffer.toString(Charsets.UTF_8.name()).trim());
+                               buffer.toString(UTF_8.name()).trim());
        }
 
 }


Reply via email to