Author: tallison
Date: Thu Oct 1 00:24:50 2015
New Revision: 1706178
URL: http://svn.apache.org/viewvc?rev=1706178&view=rev
Log:
TIKA-1757 and TIKA-1758. Mea culpa. Thank you Uwe Schindler and Yaniv Kinda
Modified:
tika/trunk/tika-app/src/test/java/org/apache/tika/cli/TikaCLIBatchCommandLineTest.java
tika/trunk/tika-app/src/test/java/org/apache/tika/cli/TikaCLIBatchIntegrationTest.java
tika/trunk/tika-batch/src/test/java/org/apache/tika/batch/CommandLineParserBuilderTest.java
Modified:
tika/trunk/tika-app/src/test/java/org/apache/tika/cli/TikaCLIBatchCommandLineTest.java
URL:
http://svn.apache.org/viewvc/tika/trunk/tika-app/src/test/java/org/apache/tika/cli/TikaCLIBatchCommandLineTest.java?rev=1706178&r1=1706177&r2=1706178&view=diff
==============================================================================
---
tika/trunk/tika-app/src/test/java/org/apache/tika/cli/TikaCLIBatchCommandLineTest.java
(original)
+++
tika/trunk/tika-app/src/test/java/org/apache/tika/cli/TikaCLIBatchCommandLineTest.java
Thu Oct 1 00:24:50 2015
@@ -19,11 +19,13 @@ package org.apache.tika.cli;
import static java.nio.charset.StandardCharsets.UTF_8;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertTrue;
+import static org.junit.Assert.fail;
-import java.io.File;
-import java.io.FileOutputStream;
import java.io.IOException;
import java.io.OutputStream;
+import java.nio.file.Files;
+import java.nio.file.Path;
+import java.nio.file.Paths;
import java.util.LinkedHashMap;
import java.util.Map;
@@ -35,45 +37,50 @@ import org.junit.Test;
public class TikaCLIBatchCommandLineTest {
- File testInput = null;
- File testFile = null;
+ Path testInput = null;
+ Path testFile = null;
+
+ String testInputPathForCommandLine;
@Before
public void init() {
- testInput = new File("testInput");
- if (!testInput.mkdirs()) {
+ testInput = Paths.get("testInput");
+ try {
+ Files.createDirectories(testInput);
+ } catch (IOException e) {
throw new RuntimeException("Failed to open test input directory");
}
- testFile = new File("testFile.txt");
- OutputStream os = null;
- try {
- os = new FileOutputStream(testFile);
+ testFile = Paths.get("testFile.txt");
+ try (OutputStream os = Files.newOutputStream(testFile)) {
IOUtils.write("test output", os, UTF_8);
} catch (IOException e) {
throw new RuntimeException("Couldn't open testFile");
- } finally {
- IOUtils.closeQuietly(os);
}
+ testInputPathForCommandLine = testInput.toAbsolutePath().toString();
}
@After
public void tearDown() {
try {
- FileUtils.deleteDirectory(testInput);
- testFile.delete();
+ //TODO: refactor this to use our FileUtils.deleteDirectory(Path)
+ //when that is ready
+ FileUtils.deleteDirectory(testInput.toFile());
} catch (IOException e) {
throw new RuntimeException(e);
+ } finally {
+ try {
+ Files.deleteIfExists(testFile);
+ } catch (IOException e) {
+ throw new RuntimeException(e);
+ }
+
}
}
@Test
public void testJVMOpts() throws Exception {
- String path = testInput.getAbsolutePath();
- if (path.contains(" ")) {
- path = "\"" + path + "\"";
- }
String[] params = {"-JXmx1g",
"-JDlog4j.configuration=batch_process_log4j.xml", "-inputDir",
- path, "-outputDir", "testout-output"};
+ testInputPathForCommandLine, "-outputDir", "testout-output"};
String[] commandLine = BatchCommandLineBuilder.build(params);
@@ -95,16 +102,12 @@ public class TikaCLIBatchCommandLineTest
assertTrue(log < classInd);
assertTrue(inputDir > classInd);
}
-
+
@Test
public void testBasicMappingOfArgs() throws Exception {
- String path = testInput.getAbsolutePath();
- if (path.contains(" ")) {
- path = "\"" + path + "\"";
- }
String[] params = {"-JXmx1g",
"-JDlog4j.configuration=batch_process_log4j.xml",
"-bc", "batch-config.xml",
- "-J", "-h", "-inputDir", path};
+ "-J", "-h", "-inputDir", testInputPathForCommandLine};
String[] commandLine = BatchCommandLineBuilder.build(params);
Map<String, String> attrs = mapify(commandLine);
@@ -112,50 +115,43 @@ public class TikaCLIBatchCommandLineTest
assertEquals("html", attrs.get("-basicHandlerType"));
assertEquals("json", attrs.get("-outputSuffix"));
assertEquals("batch-config.xml", attrs.get("-bc"));
- assertEquals(path, attrs.get("-inputDir"));
+ assertEquals(testInputPathForCommandLine, attrs.get("-inputDir"));
}
@Test
public void testTwoDirsNoFlags() throws Exception {
String outputRoot = "outputRoot";
- String path = testInput.getAbsolutePath();
- if (path.contains(" ")) {
- path = "\"" + path + "\"";
- }
- String[] params = {path, outputRoot};
+
+ String[] params = {testInputPathForCommandLine, outputRoot};
String[] commandLine = BatchCommandLineBuilder.build(params);
Map<String, String> attrs = mapify(commandLine);
- assertEquals(path, attrs.get("-inputDir"));
+ assertEquals(testInputPathForCommandLine, attrs.get("-inputDir"));
assertEquals(outputRoot, attrs.get("-outputDir"));
}
@Test
public void testTwoDirsVarious() throws Exception {
String outputRoot = "outputRoot";
- String path = testInput.getAbsolutePath();
- if (path.contains(" ")) {
- path = "\"" + path + "\"";
- }
- String[] params = {"-i", path, "-o", outputRoot};
+ String[] params = {"-i", testInputPathForCommandLine, "-o",
outputRoot};
String[] commandLine = BatchCommandLineBuilder.build(params);
Map<String, String> attrs = mapify(commandLine);
- assertEquals(path, attrs.get("-inputDir"));
+ assertEquals(testInputPathForCommandLine, attrs.get("-inputDir"));
assertEquals(outputRoot, attrs.get("-outputDir"));
- params = new String[]{"--inputDir", path, "--outputDir", outputRoot};
+ params = new String[]{"--inputDir", testInputPathForCommandLine,
"--outputDir", outputRoot};
commandLine = BatchCommandLineBuilder.build(params);
attrs = mapify(commandLine);
- assertEquals(path, attrs.get("-inputDir"));
+ assertEquals(testInputPathForCommandLine, attrs.get("-inputDir"));
assertEquals(outputRoot, attrs.get("-outputDir"));
- params = new String[]{"-inputDir", path, "-outputDir", outputRoot};
+ params = new String[]{"-inputDir", testInputPathForCommandLine,
"-outputDir", outputRoot};
commandLine = BatchCommandLineBuilder.build(params);
attrs = mapify(commandLine);
- assertEquals(path, attrs.get("-inputDir"));
+ assertEquals(testInputPathForCommandLine, attrs.get("-inputDir"));
assertEquals(outputRoot, attrs.get("-outputDir"));
}
@@ -163,17 +159,12 @@ public class TikaCLIBatchCommandLineTest
public void testConfig() throws Exception {
String outputRoot = "outputRoot";
String configPath = "c:/somewhere/someConfig.xml";
- String path = testInput.getAbsolutePath();
-
- if (path.contains(" ")) {
- path = "\"" + path + "\"";
- }
- String[] params = {"--inputDir", path, "--outputDir", outputRoot,
+ String[] params = {"--inputDir", testInputPathForCommandLine,
"--outputDir", outputRoot,
"--config="+configPath};
String[] commandLine = BatchCommandLineBuilder.build(params);
Map<String, String> attrs = mapify(commandLine);
- assertEquals(path, attrs.get("-inputDir"));
+ assertEquals(testInputPathForCommandLine, attrs.get("-inputDir"));
assertEquals(outputRoot, attrs.get("-outputDir"));
assertEquals(configPath, attrs.get("-c"));
@@ -183,15 +174,14 @@ public class TikaCLIBatchCommandLineTest
public void testOneDirOneFileException() throws Exception {
boolean ex = false;
try {
- String outputRoot = "outputRoot";
- String path = testInput.getAbsolutePath();
+ String path = testFile.toAbsolutePath().toString();
if (path.contains(" ")) {
path = "\"" + path + "\"";
}
- String[] params = {path, testFile.getAbsolutePath()};
+ String[] params = {testInputPathForCommandLine, path};
String[] commandLine = BatchCommandLineBuilder.build(params);
-
+ fail("Not allowed to have one dir and one file");
} catch (IllegalArgumentException e) {
ex = true;
}
@@ -199,7 +189,7 @@ public class TikaCLIBatchCommandLineTest
}
private Map<String, String> mapify(String[] args) {
- Map<String, String> map = new LinkedHashMap<String, String>();
+ Map<String, String> map = new LinkedHashMap<>();
for (int i = 0; i < args.length; i++) {
if (args[i].startsWith("-")) {
String k = args[i];
Modified:
tika/trunk/tika-app/src/test/java/org/apache/tika/cli/TikaCLIBatchIntegrationTest.java
URL:
http://svn.apache.org/viewvc/tika/trunk/tika-app/src/test/java/org/apache/tika/cli/TikaCLIBatchIntegrationTest.java?rev=1706178&r1=1706177&r2=1706178&view=diff
==============================================================================
---
tika/trunk/tika-app/src/test/java/org/apache/tika/cli/TikaCLIBatchIntegrationTest.java
(original)
+++
tika/trunk/tika-app/src/test/java/org/apache/tika/cli/TikaCLIBatchIntegrationTest.java
Thu Oct 1 00:24:50 2015
@@ -23,16 +23,15 @@ import static org.junit.Assert.assertNot
import static org.junit.Assert.assertTrue;
import java.io.ByteArrayOutputStream;
-import java.io.File;
-import java.io.FileInputStream;
-import java.io.InputStreamReader;
import java.io.OutputStream;
import java.io.PrintStream;
import java.io.Reader;
+import java.nio.file.Files;
+import java.nio.file.Path;
+import java.nio.file.Paths;
import java.util.List;
import org.apache.commons.io.FileUtils;
-import org.apache.commons.io.IOUtils;
import org.apache.tika.metadata.Metadata;
import org.apache.tika.metadata.serialization.JsonMetadataList;
import org.apache.tika.parser.RecursiveParserWrapper;
@@ -42,18 +41,17 @@ import org.junit.Test;
public class TikaCLIBatchIntegrationTest {
- private File testDataFile = new File("src/test/resources/test-data");
-
- private File tempDir;
+ private Path testInputDir = Paths.get("src/test/resources/test-data");
+ private String testInputDirForCommandLine;
+ private Path tempOutputDir;
+ private String tempOutputDirForCommandLine;
private OutputStream out = null;
private OutputStream err = null;
private ByteArrayOutputStream outBuffer = null;
@Before
public void setup() throws Exception {
- tempDir = File.createTempFile("tika-cli-test-batch-", "");
- tempDir.delete();
- tempDir.mkdir();
+ tempOutputDir = Files.createTempDirectory("tika-cli-test-batch-");
outBuffer = new ByteArrayOutputStream();
PrintStream outWriter = new PrintStream(outBuffer, true, UTF_8.name());
ByteArrayOutputStream errBuffer = new ByteArrayOutputStream();
@@ -62,85 +60,85 @@ public class TikaCLIBatchIntegrationTest
err = System.err;
System.setOut(outWriter);
System.setErr(errWriter);
+ testInputDirForCommandLine = testInputDir.toAbsolutePath().toString();
+ tempOutputDirForCommandLine =
tempOutputDir.toAbsolutePath().toString();
}
@After
public void tearDown() throws Exception {
System.setOut(new PrintStream(out, true, UTF_8.name()));
System.setErr(new PrintStream(err, true, UTF_8.name()));
- FileUtils.deleteDirectory(tempDir);
+ //TODO: refactor to use our deleteDirectory with straight path
+ FileUtils.deleteDirectory(tempOutputDir.toFile());
}
@Test
public void testSimplestBatchIntegration() throws Exception {
- String[] params = {escape(testDataFile.getAbsolutePath()),
- escape(tempDir.getAbsolutePath())};
+ String[] params = {testInputDirForCommandLine,
+ tempOutputDirForCommandLine};
TikaCLI.main(params);
- assertTrue("bad_xml.xml.xml", new File(tempDir,
"bad_xml.xml.xml").isFile());
- assertTrue("coffee.xls.xml", new File(tempDir,
"coffee.xls.xml").exists());
+ assertFileExists(tempOutputDir.resolve("bad_xml.xml.xml"));
+ assertFileExists(tempOutputDir.resolve("coffee.xls.xml"));
}
@Test
public void testBasicBatchIntegration() throws Exception {
- String[] params = {"-i", escape(testDataFile.getAbsolutePath()),
- "-o", escape(tempDir.getAbsolutePath()),
+ String[] params = {"-i", testInputDirForCommandLine,
+ "-o", tempOutputDirForCommandLine,
"-numConsumers", "2"
};
TikaCLI.main(params);
- assertTrue("bad_xml.xml.xml", new File(tempDir,
"bad_xml.xml.xml").isFile());
- assertTrue("coffee.xls.xml", new File(tempDir,
"coffee.xls.xml").exists());
+ assertFileExists(tempOutputDir.resolve("bad_xml.xml.xml"));
+ assertFileExists(tempOutputDir.resolve("coffee.xls.xml"));
}
@Test
public void testJsonRecursiveBatchIntegration() throws Exception {
- Reader reader = null;
- try {
- String[] params = {"-i", escape(testDataFile.getAbsolutePath()),
- "-o", escape(tempDir.getAbsolutePath()),
- "-numConsumers", "10",
- "-J", //recursive Json
- "-t" //plain text in content
- };
- TikaCLI.main(params);
- reader = new InputStreamReader(
- new FileInputStream(new File(tempDir,
"test_recursive_embedded.docx.json")), UTF_8);
+ String[] params = {"-i", testInputDirForCommandLine,
+ "-o", tempOutputDirForCommandLine,
+ "-numConsumers", "10",
+ "-J", //recursive Json
+ "-t" //plain text in content
+ };
+ TikaCLI.main(params);
+
+ Path jsonFile =
tempOutputDir.resolve("test_recursive_embedded.docx.json");
+ try (Reader reader = Files.newBufferedReader(jsonFile, UTF_8)) {
List<Metadata> metadataList = JsonMetadataList.fromJson(reader);
assertEquals(12, metadataList.size());
assertTrue(metadataList.get(6).get(RecursiveParserWrapper.TIKA_CONTENT).contains("human
events"));
- } finally {
- IOUtils.closeQuietly(reader);
}
}
@Test
public void testProcessLogFileConfig() throws Exception {
- String[] params = {"-i", escape(testDataFile.getAbsolutePath()),
- "-o", escape(tempDir.getAbsolutePath()),
+ String[] params = {"-i", testInputDirForCommandLine,
+ "-o", tempOutputDirForCommandLine,
"-numConsumers", "2",
"-JDlog4j.configuration=log4j_batch_process_test.properties"};
TikaCLI.main(params);
- assertTrue("bad_xml.xml.xml", new File(tempDir,
"bad_xml.xml.xml").isFile());
- assertTrue("coffee.xls.xml", new File(tempDir,
"coffee.xls.xml").exists());
+ assertFileExists(tempOutputDir.resolve("bad_xml.xml.xml"));
+ assertFileExists(tempOutputDir.resolve("coffee.xls.xml"));
String sysOutString = new String(outBuffer.toByteArray(), UTF_8);
assertTrue(sysOutString.contains("MY_CUSTOM_LOG_CONFIG"));
}
@Test
public void testDigester() throws Exception {
- Reader reader = null;
-/* try {
+/*
+ try {
String[] params = {"-i", escape(testDataFile.getAbsolutePath()),
- "-o", escape(tempDir.getAbsolutePath()),
+ "-o", escape(tempOutputDir.getAbsolutePath()),
"-numConsumers", "10",
"-J", //recursive Json
"-t" //plain text in content
};
TikaCLI.main(params);
reader = new InputStreamReader(
- new FileInputStream(new File(tempDir,
"test_recursive_embedded.docx.json")), UTF_8);
+ new FileInputStream(new File(tempOutputDir,
"test_recursive_embedded.docx.json")), UTF_8);
List<Metadata> metadataList = JsonMetadataList.fromJson(reader);
assertEquals(12, metadataList.size());
assertEquals("59f626e09a8c16ab6dbc2800c685f772",
metadataList.get(0).get("X-TIKA:digest:MD5"));
@@ -149,33 +147,28 @@ public class TikaCLIBatchIntegrationTest
IOUtils.closeQuietly(reader);
}
*/
- reader = null;
- try {
- String[] params = {"-i", escape(testDataFile.getAbsolutePath()),
- "-o", escape(tempDir.getAbsolutePath()),
- "-numConsumers", "10",
- "-J", //recursive Json
- "-t", //plain text in content
- "-digest", "sha512"
- };
- TikaCLI.main(params);
- reader = new InputStreamReader(
- new FileInputStream(new File(tempDir,
"test_recursive_embedded.docx.json")), UTF_8);
+ String[] params = {"-i", testInputDirForCommandLine,
+ "-o", tempOutputDirForCommandLine,
+ "-numConsumers", "10",
+ "-J", //recursive Json
+ "-t", //plain text in content
+ "-digest", "sha512"
+ };
+ TikaCLI.main(params);
+ Path jsonFile =
tempOutputDir.resolve("test_recursive_embedded.docx.json");
+ try (Reader reader = Files.newBufferedReader(jsonFile, UTF_8)) {
+
List<Metadata> metadataList = JsonMetadataList.fromJson(reader);
assertEquals(12, metadataList.size());
assertNotNull(metadataList.get(0).get("X-TIKA:digest:SHA512"));
assertTrue(metadataList.get(0).get("X-TIKA:digest:SHA512").startsWith("ee46d973ee1852c01858"));
- } finally {
- IOUtils.closeQuietly(reader);
}
-
}
- public static String escape(String path) {
- if (path.indexOf(' ') > -1) {
- return '"' + path + '"';
- }
- return path;
+ private void assertFileExists(Path path) {
+ assertTrue("File doesn't exist: "+path.toAbsolutePath(),
+ Files.isRegularFile(path));
}
+
}
Modified:
tika/trunk/tika-batch/src/test/java/org/apache/tika/batch/CommandLineParserBuilderTest.java
URL:
http://svn.apache.org/viewvc/tika/trunk/tika-batch/src/test/java/org/apache/tika/batch/CommandLineParserBuilderTest.java?rev=1706178&r1=1706177&r2=1706178&view=diff
==============================================================================
---
tika/trunk/tika-batch/src/test/java/org/apache/tika/batch/CommandLineParserBuilderTest.java
(original)
+++
tika/trunk/tika-batch/src/test/java/org/apache/tika/batch/CommandLineParserBuilderTest.java
Thu Oct 1 00:24:50 2015
@@ -17,12 +17,9 @@ package org.apache.tika.batch;
* limitations under the License.
*/
-import java.io.File;
-import java.io.FileInputStream;
import java.io.InputStream;
import org.apache.commons.cli.Options;
-import org.apache.commons.io.IOUtils;
import org.apache.tika.batch.builders.CommandLineParserBuilder;
import org.apache.tika.batch.fs.FSBatchTestBase;
import org.junit.Test;
@@ -32,16 +29,10 @@ public class CommandLineParserBuilderTes
@Test
public void testBasic() throws Exception {
- String configFile = this.getClass().getResource(
- "/tika-batch-config-test.xml").getFile();
- InputStream is = null;
- try {
- is = new FileInputStream(new File(configFile));
+ try (InputStream is =
this.getClass().getResourceAsStream("/tika-batch-config-test.xml")) {
CommandLineParserBuilder builder = new CommandLineParserBuilder();
Options options = builder.build(is);
//TODO: insert actual tests :)
- } finally {
- IOUtils.closeQuietly(is);
}
}