Modified: tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/pkg/RarParserTest.java URL: http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/pkg/RarParserTest.java?rev=1679211&r1=1679210&r2=1679211&view=diff ============================================================================== --- tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/pkg/RarParserTest.java (original) +++ tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/pkg/RarParserTest.java Wed May 13 13:49:36 2015 @@ -1,132 +1,132 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.tika.parser.pkg; - -import static org.junit.Assert.assertEquals; -import static org.junit.Assert.assertNotNull; -import static org.junit.Assert.assertNull; -import static org.junit.Assert.assertTrue; - -import java.io.InputStream; - -import org.apache.tika.metadata.Metadata; -import org.apache.tika.parser.AutoDetectParser; -import org.apache.tika.parser.Parser; -import org.apache.tika.sax.BodyContentHandler; -import org.junit.Test; -import org.xml.sax.ContentHandler; - -/** - * Test case for parsing rar files. - */ -public class RarParserTest extends AbstractPkgTest { - - @Test - public void testRarParsing() throws Exception { - Parser parser = new AutoDetectParser(); // Should auto-detect! - ContentHandler handler = new BodyContentHandler(); - Metadata metadata = new Metadata(); - - InputStream stream = RarParserTest.class.getResourceAsStream( - "/test-documents/test-documents.rar"); - try { - parser.parse(stream, handler, metadata, recursingContext); - } finally { - stream.close(); - } - - assertEquals("application/x-rar-compressed", metadata.get(Metadata.CONTENT_TYPE)); - String content = handler.toString(); - assertContains("test-documents/testEXCEL.xls", content); - assertContains("Sample Excel Worksheet", content); - assertContains("test-documents/testHTML.html", content); - assertContains("Test Indexation Html", content); - assertContains("test-documents/testOpenOffice2.odt", content); - assertContains("This is a sample Open Office document", content); - assertContains("test-documents/testPDF.pdf", content); - assertContains("Apache Tika", content); - assertContains("test-documents/testPPT.ppt", content); - assertContains("Sample Powerpoint Slide", content); - assertContains("test-documents/testRTF.rtf", content); - assertContains("indexation Word", content); - assertContains("test-documents/testTXT.txt", content); - assertContains("Test d'indexation de Txt", content); - assertContains("test-documents/testWORD.doc", content); - assertContains("This is a sample Microsoft Word Document", content); - assertContains("test-documents/testXML.xml", content); - assertContains("Rida Benjelloun", content); - } - - /** - * Tests that the ParseContext parser is correctly - * fired for all the embedded entries. - */ - @Test - public void testEmbedded() throws Exception { - Parser parser = new AutoDetectParser(); // Should auto-detect! - ContentHandler handler = new BodyContentHandler(); - Metadata metadata = new Metadata(); - - InputStream stream = RarParserTest.class.getResourceAsStream( - "/test-documents/test-documents.rar"); - try { - parser.parse(stream, handler, metadata, trackingContext); - } finally { - stream.close(); - } - - // Should have found all 9 documents, but not the directory - assertEquals(9, tracker.filenames.size()); - assertEquals(9, tracker.mediatypes.size()); - assertEquals(9, tracker.modifiedAts.size()); - - // Should have names but not content types, as rar doesn't - // store the content types - assertEquals("test-documents/testEXCEL.xls", tracker.filenames.get(0)); - assertEquals("test-documents/testHTML.html", tracker.filenames.get(1)); - assertEquals("test-documents/testOpenOffice2.odt", tracker.filenames.get(2)); - assertEquals("test-documents/testPDF.pdf", tracker.filenames.get(3)); - assertEquals("test-documents/testPPT.ppt", tracker.filenames.get(4)); - assertEquals("test-documents/testRTF.rtf", tracker.filenames.get(5)); - assertEquals("test-documents/testTXT.txt", tracker.filenames.get(6)); - assertEquals("test-documents/testWORD.doc", tracker.filenames.get(7)); - assertEquals("test-documents/testXML.xml", tracker.filenames.get(8)); - - for(String type : tracker.mediatypes) { - assertNull(type); - } - for(String crt : tracker.createdAts) { - assertNull(crt); - } - for(String mod : tracker.modifiedAts) { - assertNotNull(mod); - assertTrue("Modified at " + mod, mod.startsWith("20")); - } - - // Should have filenames in the content string - String content = handler.toString(); - assertContains("test-documents/testHTML.html", content); - assertContains("test-documents/testEXCEL.xls", content); - assertContains("test-documents/testOpenOffice2.odt", content); - assertContains("test-documents/testPDF.pdf", content); - assertContains("test-documents/testPPT.ppt", content); - assertContains("test-documents/testRTF.rtf", content); - assertContains("test-documents/testTXT.txt", content); - assertContains("test-documents/testWORD.doc", content); - assertContains("test-documents/testXML.xml", content); - } +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.tika.parser.pkg; + +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertNotNull; +import static org.junit.Assert.assertNull; +import static org.junit.Assert.assertTrue; + +import java.io.InputStream; + +import org.apache.tika.metadata.Metadata; +import org.apache.tika.parser.AutoDetectParser; +import org.apache.tika.parser.Parser; +import org.apache.tika.sax.BodyContentHandler; +import org.junit.Test; +import org.xml.sax.ContentHandler; + +/** + * Test case for parsing rar files. + */ +public class RarParserTest extends AbstractPkgTest { + + @Test + public void testRarParsing() throws Exception { + Parser parser = new AutoDetectParser(); // Should auto-detect! + ContentHandler handler = new BodyContentHandler(); + Metadata metadata = new Metadata(); + + InputStream stream = RarParserTest.class.getResourceAsStream( + "/test-documents/test-documents.rar"); + try { + parser.parse(stream, handler, metadata, recursingContext); + } finally { + stream.close(); + } + + assertEquals("application/x-rar-compressed", metadata.get(Metadata.CONTENT_TYPE)); + String content = handler.toString(); + assertContains("test-documents/testEXCEL.xls", content); + assertContains("Sample Excel Worksheet", content); + assertContains("test-documents/testHTML.html", content); + assertContains("Test Indexation Html", content); + assertContains("test-documents/testOpenOffice2.odt", content); + assertContains("This is a sample Open Office document", content); + assertContains("test-documents/testPDF.pdf", content); + assertContains("Apache Tika", content); + assertContains("test-documents/testPPT.ppt", content); + assertContains("Sample Powerpoint Slide", content); + assertContains("test-documents/testRTF.rtf", content); + assertContains("indexation Word", content); + assertContains("test-documents/testTXT.txt", content); + assertContains("Test d'indexation de Txt", content); + assertContains("test-documents/testWORD.doc", content); + assertContains("This is a sample Microsoft Word Document", content); + assertContains("test-documents/testXML.xml", content); + assertContains("Rida Benjelloun", content); + } + + /** + * Tests that the ParseContext parser is correctly + * fired for all the embedded entries. + */ + @Test + public void testEmbedded() throws Exception { + Parser parser = new AutoDetectParser(); // Should auto-detect! + ContentHandler handler = new BodyContentHandler(); + Metadata metadata = new Metadata(); + + InputStream stream = RarParserTest.class.getResourceAsStream( + "/test-documents/test-documents.rar"); + try { + parser.parse(stream, handler, metadata, trackingContext); + } finally { + stream.close(); + } + + // Should have found all 9 documents, but not the directory + assertEquals(9, tracker.filenames.size()); + assertEquals(9, tracker.mediatypes.size()); + assertEquals(9, tracker.modifiedAts.size()); + + // Should have names but not content types, as rar doesn't + // store the content types + assertEquals("test-documents/testEXCEL.xls", tracker.filenames.get(0)); + assertEquals("test-documents/testHTML.html", tracker.filenames.get(1)); + assertEquals("test-documents/testOpenOffice2.odt", tracker.filenames.get(2)); + assertEquals("test-documents/testPDF.pdf", tracker.filenames.get(3)); + assertEquals("test-documents/testPPT.ppt", tracker.filenames.get(4)); + assertEquals("test-documents/testRTF.rtf", tracker.filenames.get(5)); + assertEquals("test-documents/testTXT.txt", tracker.filenames.get(6)); + assertEquals("test-documents/testWORD.doc", tracker.filenames.get(7)); + assertEquals("test-documents/testXML.xml", tracker.filenames.get(8)); + + for(String type : tracker.mediatypes) { + assertNull(type); + } + for(String crt : tracker.createdAts) { + assertNull(crt); + } + for(String mod : tracker.modifiedAts) { + assertNotNull(mod); + assertTrue("Modified at " + mod, mod.startsWith("20")); + } + + // Should have filenames in the content string + String content = handler.toString(); + assertContains("test-documents/testHTML.html", content); + assertContains("test-documents/testEXCEL.xls", content); + assertContains("test-documents/testOpenOffice2.odt", content); + assertContains("test-documents/testPDF.pdf", content); + assertContains("test-documents/testPPT.ppt", content); + assertContains("test-documents/testRTF.rtf", content); + assertContains("test-documents/testTXT.txt", content); + assertContains("test-documents/testWORD.doc", content); + assertContains("test-documents/testXML.xml", content); + } } \ No newline at end of file
Modified: tika/trunk/tika-parsers/src/test/resources/test-documents/mock/example.xml URL: http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/test/resources/test-documents/mock/example.xml?rev=1679211&r1=1679210&r2=1679211&view=diff ============================================================================== --- tika/trunk/tika-parsers/src/test/resources/test-documents/mock/example.xml (original) +++ tika/trunk/tika-parsers/src/test/resources/test-documents/mock/example.xml Wed May 13 13:49:36 2015 @@ -1,51 +1,51 @@ -<?xml version="1.0" encoding="UTF-8" ?> - -<!-- - Licensed to the Apache Software Foundation (ASF) under one - or more contributor license agreements. See the NOTICE file - distributed with this work for additional information - regarding copyright ownership. The ASF licenses this file - to you under the Apache License, Version 2.0 (the - "License"); you may not use this file except in compliance - with the License. You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, - software distributed under the License is distributed on an - "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - KIND, either express or implied. See the License for the - specific language governing permissions and limitations - under the License. ---> - -<mock> - <!-- this file offers all of the options as documentation - Parsing should stop at an IOException, of course - --> - - <!-- action can be "add" or "set" --> - <metadata action="add" name="author">Nikolai Lobachevsky</metadata> - <!-- element is the name of the sax event to write, p=paragraph - if the element is not specified, the default is <p> --> - <write element="p">some content</write> - <!-- write something to System.out --> - <print_out>writing to System.out</print_out> - <!-- write something to System.err --> - <print_err>writing to System.err</print_err> - <!-- hang - millis: how many milliseconds to pause. The actual hang time will probably - be a bit longer than the value specified. heavy: whether or not the hang should do something computationally expensive. - If the value is false, this just does a Thread.sleep(millis). - This attribute is optional, with default of heavy=false. - pulse_millis: (required if "heavy" is true), how often to check to see - whether the thread was interrupted or that the total hang time exceeded the millis - interruptible: whether or not the parser will check to see if its thread - has been interrupted; this attribute is optional with default of true - --> - <hang millis="100" heavy="true" pulse_millis="10" interruptible="true" /> - <!-- throw an exception or error; optionally include a message or not --> - <throw class="java.io.IOException">not another IOException</throw> - <!-- perform a genuine OutOfMemoryError --> - <oom/> +<?xml version="1.0" encoding="UTF-8" ?> + +<!-- + Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, + software distributed under the License is distributed on an + "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + KIND, either express or implied. See the License for the + specific language governing permissions and limitations + under the License. +--> + +<mock> + <!-- this file offers all of the options as documentation + Parsing should stop at an IOException, of course + --> + + <!-- action can be "add" or "set" --> + <metadata action="add" name="author">Nikolai Lobachevsky</metadata> + <!-- element is the name of the sax event to write, p=paragraph + if the element is not specified, the default is <p> --> + <write element="p">some content</write> + <!-- write something to System.out --> + <print_out>writing to System.out</print_out> + <!-- write something to System.err --> + <print_err>writing to System.err</print_err> + <!-- hang + millis: how many milliseconds to pause. The actual hang time will probably + be a bit longer than the value specified. heavy: whether or not the hang should do something computationally expensive. + If the value is false, this just does a Thread.sleep(millis). + This attribute is optional, with default of heavy=false. + pulse_millis: (required if "heavy" is true), how often to check to see + whether the thread was interrupted or that the total hang time exceeded the millis + interruptible: whether or not the parser will check to see if its thread + has been interrupted; this attribute is optional with default of true + --> + <hang millis="100" heavy="true" pulse_millis="10" interruptible="true" /> + <!-- throw an exception or error; optionally include a message or not --> + <throw class="java.io.IOException">not another IOException</throw> + <!-- perform a genuine OutOfMemoryError --> + <oom/> </mock> \ No newline at end of file Modified: tika/trunk/tika-parsers/src/test/resources/test-documents/mock/fake_oom.xml URL: http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/test/resources/test-documents/mock/fake_oom.xml?rev=1679211&r1=1679210&r2=1679211&view=diff ============================================================================== --- tika/trunk/tika-parsers/src/test/resources/test-documents/mock/fake_oom.xml (original) +++ tika/trunk/tika-parsers/src/test/resources/test-documents/mock/fake_oom.xml Wed May 13 13:49:36 2015 @@ -1,25 +1,25 @@ -<?xml version="1.0" encoding="UTF-8" ?> - -<!-- - Licensed to the Apache Software Foundation (ASF) under one - or more contributor license agreements. See the NOTICE file - distributed with this work for additional information - regarding copyright ownership. The ASF licenses this file - to you under the Apache License, Version 2.0 (the - "License"); you may not use this file except in compliance - with the License. You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, - software distributed under the License is distributed on an - "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - KIND, either express or implied. See the License for the - specific language governing permissions and limitations - under the License. ---> - -<mock> - <metadata action="add" name="author">Nikolai Lobachevsky</metadata> - <throw class="java.lang.OutOfMemoryError">not another oom</throw> +<?xml version="1.0" encoding="UTF-8" ?> + +<!-- + Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, + software distributed under the License is distributed on an + "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + KIND, either express or implied. See the License for the + specific language governing permissions and limitations + under the License. +--> + +<mock> + <metadata action="add" name="author">Nikolai Lobachevsky</metadata> + <throw class="java.lang.OutOfMemoryError">not another oom</throw> </mock> \ No newline at end of file Modified: tika/trunk/tika-parsers/src/test/resources/test-documents/mock/heavy_hang.xml URL: http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/test/resources/test-documents/mock/heavy_hang.xml?rev=1679211&r1=1679210&r2=1679211&view=diff ============================================================================== --- tika/trunk/tika-parsers/src/test/resources/test-documents/mock/heavy_hang.xml (original) +++ tika/trunk/tika-parsers/src/test/resources/test-documents/mock/heavy_hang.xml Wed May 13 13:49:36 2015 @@ -1,25 +1,25 @@ -<?xml version="1.0" encoding="UTF-8" ?> -<!-- - Licensed to the Apache Software Foundation (ASF) under one - or more contributor license agreements. See the NOTICE file - distributed with this work for additional information - regarding copyright ownership. The ASF licenses this file - to you under the Apache License, Version 2.0 (the - "License"); you may not use this file except in compliance - with the License. You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, - software distributed under the License is distributed on an - "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - KIND, either express or implied. See the License for the - specific language governing permissions and limitations - under the License. ---> - -<mock> - <metadata action="add" name="author">Nikolai Lobachevsky</metadata> - <write element="p">some content</write> - <hang millis="3000" heavy="true" pulse_millis="100" /> +<?xml version="1.0" encoding="UTF-8" ?> +<!-- + Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, + software distributed under the License is distributed on an + "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + KIND, either express or implied. See the License for the + specific language governing permissions and limitations + under the License. +--> + +<mock> + <metadata action="add" name="author">Nikolai Lobachevsky</metadata> + <write element="p">some content</write> + <hang millis="3000" heavy="true" pulse_millis="100" /> </mock> \ No newline at end of file Modified: tika/trunk/tika-parsers/src/test/resources/test-documents/mock/nothing_bad.xml URL: http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/test/resources/test-documents/mock/nothing_bad.xml?rev=1679211&r1=1679210&r2=1679211&view=diff ============================================================================== --- tika/trunk/tika-parsers/src/test/resources/test-documents/mock/nothing_bad.xml (original) +++ tika/trunk/tika-parsers/src/test/resources/test-documents/mock/nothing_bad.xml Wed May 13 13:49:36 2015 @@ -1,26 +1,26 @@ -<?xml version="1.0" encoding="UTF-8" ?> -<!-- - Licensed to the Apache Software Foundation (ASF) under one - or more contributor license agreements. See the NOTICE file - distributed with this work for additional information - regarding copyright ownership. The ASF licenses this file - to you under the Apache License, Version 2.0 (the - "License"); you may not use this file except in compliance - with the License. You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, - software distributed under the License is distributed on an - "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - KIND, either express or implied. See the License for the - specific language governing permissions and limitations - under the License. ---> - -<mock> - <metadata action="add" name="author">Geoffrey Chaucer</metadata> - <write element="p">Whan that Aprille with his shoures soote</write> - <write>The droghte of Marche hath perced to the roote,</write> - <write>And bathed every veyne in swich licour,</write> +<?xml version="1.0" encoding="UTF-8" ?> +<!-- + Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, + software distributed under the License is distributed on an + "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + KIND, either express or implied. See the License for the + specific language governing permissions and limitations + under the License. +--> + +<mock> + <metadata action="add" name="author">Geoffrey Chaucer</metadata> + <write element="p">Whan that Aprille with his shoures soote</write> + <write>The droghte of Marche hath perced to the roote,</write> + <write>And bathed every veyne in swich licour,</write> </mock> \ No newline at end of file Modified: tika/trunk/tika-parsers/src/test/resources/test-documents/mock/null_pointer.xml URL: http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/test/resources/test-documents/mock/null_pointer.xml?rev=1679211&r1=1679210&r2=1679211&view=diff ============================================================================== --- tika/trunk/tika-parsers/src/test/resources/test-documents/mock/null_pointer.xml (original) +++ tika/trunk/tika-parsers/src/test/resources/test-documents/mock/null_pointer.xml Wed May 13 13:49:36 2015 @@ -1,25 +1,25 @@ -<?xml version="1.0" encoding="UTF-8" ?> -<!-- - Licensed to the Apache Software Foundation (ASF) under one - or more contributor license agreements. See the NOTICE file - distributed with this work for additional information - regarding copyright ownership. The ASF licenses this file - to you under the Apache License, Version 2.0 (the - "License"); you may not use this file except in compliance - with the License. You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, - software distributed under the License is distributed on an - "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - KIND, either express or implied. See the License for the - specific language governing permissions and limitations - under the License. ---> - -<mock> - <metadata action="add" name="author">Nikolai Lobachevsky</metadata> - <write element="p">some content</write> - <throw class="java.lang.NullPointerException">another null pointer exception</throw> +<?xml version="1.0" encoding="UTF-8" ?> +<!-- + Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, + software distributed under the License is distributed on an + "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + KIND, either express or implied. See the License for the + specific language governing permissions and limitations + under the License. +--> + +<mock> + <metadata action="add" name="author">Nikolai Lobachevsky</metadata> + <write element="p">some content</write> + <throw class="java.lang.NullPointerException">another null pointer exception</throw> </mock> \ No newline at end of file Modified: tika/trunk/tika-parsers/src/test/resources/test-documents/mock/null_pointer_no_msg.xml URL: http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/test/resources/test-documents/mock/null_pointer_no_msg.xml?rev=1679211&r1=1679210&r2=1679211&view=diff ============================================================================== --- tika/trunk/tika-parsers/src/test/resources/test-documents/mock/null_pointer_no_msg.xml (original) +++ tika/trunk/tika-parsers/src/test/resources/test-documents/mock/null_pointer_no_msg.xml Wed May 13 13:49:36 2015 @@ -1,25 +1,25 @@ -<?xml version="1.0" encoding="UTF-8" ?> -<!-- - Licensed to the Apache Software Foundation (ASF) under one - or more contributor license agreements. See the NOTICE file - distributed with this work for additional information - regarding copyright ownership. The ASF licenses this file - to you under the Apache License, Version 2.0 (the - "License"); you may not use this file except in compliance - with the License. You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, - software distributed under the License is distributed on an - "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - KIND, either express or implied. See the License for the - specific language governing permissions and limitations - under the License. ---> - -<mock> - <metadata action="add" name="author">Nikolai Lobachevsky</metadata> - <write element="p">some content</write> - <throw class="java.lang.NullPointerException"/> +<?xml version="1.0" encoding="UTF-8" ?> +<!-- + Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, + software distributed under the License is distributed on an + "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + KIND, either express or implied. See the License for the + specific language governing permissions and limitations + under the License. +--> + +<mock> + <metadata action="add" name="author">Nikolai Lobachevsky</metadata> + <write element="p">some content</write> + <throw class="java.lang.NullPointerException"/> </mock> \ No newline at end of file Modified: tika/trunk/tika-parsers/src/test/resources/test-documents/mock/real_oom.xml URL: http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/test/resources/test-documents/mock/real_oom.xml?rev=1679211&r1=1679210&r2=1679211&view=diff ============================================================================== --- tika/trunk/tika-parsers/src/test/resources/test-documents/mock/real_oom.xml (original) +++ tika/trunk/tika-parsers/src/test/resources/test-documents/mock/real_oom.xml Wed May 13 13:49:36 2015 @@ -1,24 +1,24 @@ -<?xml version="1.0" encoding="UTF-8" ?> -<!-- - Licensed to the Apache Software Foundation (ASF) under one - or more contributor license agreements. See the NOTICE file - distributed with this work for additional information - regarding copyright ownership. The ASF licenses this file - to you under the Apache License, Version 2.0 (the - "License"); you may not use this file except in compliance - with the License. You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, - software distributed under the License is distributed on an - "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - KIND, either express or implied. See the License for the - specific language governing permissions and limitations - under the License. ---> - -<mock> - <metadata action="add" name="author">Nikolai Lobachevsky</metadata> - <oom/> +<?xml version="1.0" encoding="UTF-8" ?> +<!-- + Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, + software distributed under the License is distributed on an + "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + KIND, either express or implied. See the License for the + specific language governing permissions and limitations + under the License. +--> + +<mock> + <metadata action="add" name="author">Nikolai Lobachevsky</metadata> + <oom/> </mock> \ No newline at end of file Modified: tika/trunk/tika-parsers/src/test/resources/test-documents/mock/sleep.xml URL: http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/test/resources/test-documents/mock/sleep.xml?rev=1679211&r1=1679210&r2=1679211&view=diff ============================================================================== --- tika/trunk/tika-parsers/src/test/resources/test-documents/mock/sleep.xml (original) +++ tika/trunk/tika-parsers/src/test/resources/test-documents/mock/sleep.xml Wed May 13 13:49:36 2015 @@ -1,25 +1,25 @@ -<?xml version="1.0" encoding="UTF-8" ?> -<!-- - Licensed to the Apache Software Foundation (ASF) under one - or more contributor license agreements. See the NOTICE file - distributed with this work for additional information - regarding copyright ownership. The ASF licenses this file - to you under the Apache License, Version 2.0 (the - "License"); you may not use this file except in compliance - with the License. You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, - software distributed under the License is distributed on an - "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - KIND, either express or implied. See the License for the - specific language governing permissions and limitations - under the License. ---> - -<mock> - <metadata action="add" name="author">Nikolai Lobachevsky</metadata> - <write element="p">some content</write> - <hang millis="3000" heavy="false" /> +<?xml version="1.0" encoding="UTF-8" ?> +<!-- + Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, + software distributed under the License is distributed on an + "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + KIND, either express or implied. See the License for the + specific language governing permissions and limitations + under the License. +--> + +<mock> + <metadata action="add" name="author">Nikolai Lobachevsky</metadata> + <write element="p">some content</write> + <hang millis="3000" heavy="false" /> </mock> \ No newline at end of file Modified: tika/trunk/tika-parsers/src/test/resources/test-documents/mock/sleep_interruptible.xml URL: http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/test/resources/test-documents/mock/sleep_interruptible.xml?rev=1679211&r1=1679210&r2=1679211&view=diff ============================================================================== --- tika/trunk/tika-parsers/src/test/resources/test-documents/mock/sleep_interruptible.xml (original) +++ tika/trunk/tika-parsers/src/test/resources/test-documents/mock/sleep_interruptible.xml Wed May 13 13:49:36 2015 @@ -1,25 +1,25 @@ -<?xml version="1.0" encoding="UTF-8" ?> -<!-- - Licensed to the Apache Software Foundation (ASF) under one - or more contributor license agreements. See the NOTICE file - distributed with this work for additional information - regarding copyright ownership. The ASF licenses this file - to you under the Apache License, Version 2.0 (the - "License"); you may not use this file except in compliance - with the License. You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, - software distributed under the License is distributed on an - "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - KIND, either express or implied. See the License for the - specific language governing permissions and limitations - under the License. ---> - -<mock> - <metadata action="add" name="author">Nikolai Lobachevsky</metadata> - <write element="p">some content</write> - <hang millis="3000" heavy="false" interruptible="true" /> +<?xml version="1.0" encoding="UTF-8" ?> +<!-- + Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, + software distributed under the License is distributed on an + "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + KIND, either express or implied. See the License for the + specific language governing permissions and limitations + under the License. +--> + +<mock> + <metadata action="add" name="author">Nikolai Lobachevsky</metadata> + <write element="p">some content</write> + <hang millis="3000" heavy="false" interruptible="true" /> </mock> \ No newline at end of file Modified: tika/trunk/tika-parsers/src/test/resources/test-documents/mock/sleep_not_interruptible.xml URL: http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/test/resources/test-documents/mock/sleep_not_interruptible.xml?rev=1679211&r1=1679210&r2=1679211&view=diff ============================================================================== --- tika/trunk/tika-parsers/src/test/resources/test-documents/mock/sleep_not_interruptible.xml (original) +++ tika/trunk/tika-parsers/src/test/resources/test-documents/mock/sleep_not_interruptible.xml Wed May 13 13:49:36 2015 @@ -1,25 +1,25 @@ -<?xml version="1.0" encoding="UTF-8" ?> -<!-- - Licensed to the Apache Software Foundation (ASF) under one - or more contributor license agreements. See the NOTICE file - distributed with this work for additional information - regarding copyright ownership. The ASF licenses this file - to you under the Apache License, Version 2.0 (the - "License"); you may not use this file except in compliance - with the License. You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, - software distributed under the License is distributed on an - "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - KIND, either express or implied. See the License for the - specific language governing permissions and limitations - under the License. ---> - -<mock> - <metadata action="add" name="author">Nikolai Lobachevsky</metadata> - <write element="p">some content</write> - <hang millis="3000" heavy="false" interruptible="false" /> +<?xml version="1.0" encoding="UTF-8" ?> +<!-- + Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, + software distributed under the License is distributed on an + "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + KIND, either express or implied. See the License for the + specific language governing permissions and limitations + under the License. +--> + +<mock> + <metadata action="add" name="author">Nikolai Lobachevsky</metadata> + <write element="p">some content</write> + <hang millis="3000" heavy="false" interruptible="false" /> </mock> \ No newline at end of file Modified: tika/trunk/tika-serialization/src/main/java/org/apache/tika/metadata/serialization/JsonMetadataBase.java URL: http://svn.apache.org/viewvc/tika/trunk/tika-serialization/src/main/java/org/apache/tika/metadata/serialization/JsonMetadataBase.java?rev=1679211&r1=1679210&r2=1679211&view=diff ============================================================================== --- tika/trunk/tika-serialization/src/main/java/org/apache/tika/metadata/serialization/JsonMetadataBase.java (original) +++ tika/trunk/tika-serialization/src/main/java/org/apache/tika/metadata/serialization/JsonMetadataBase.java Wed May 13 13:49:36 2015 @@ -1,52 +1,52 @@ -package org.apache.tika.metadata.serialization; - -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -import java.util.Arrays; - -import com.google.gson.Gson; -import com.google.gson.GsonBuilder; -import org.apache.tika.metadata.Metadata; - -public class JsonMetadataBase { - - - static Gson defaultInit() { - GsonBuilder builder = new GsonBuilder(); - builder.registerTypeHierarchyAdapter(Metadata.class, new JsonMetadataSerializer()); - builder.registerTypeHierarchyAdapter(Metadata.class, new JsonMetadataDeserializer()); - return builder.create(); - } - - static Gson prettyInit() { - GsonBuilder builder = new GsonBuilder(); - builder.registerTypeHierarchyAdapter(Metadata.class, new SortedJsonMetadataSerializer()); - builder.registerTypeHierarchyAdapter(Metadata.class, new JsonMetadataDeserializer()); - builder.setPrettyPrinting(); - return builder.create(); - } - - private static class SortedJsonMetadataSerializer extends JsonMetadataSerializer { - @Override - public String[] getNames(Metadata m) { - String[] names = m.names(); - Arrays.sort(names, new PrettyMetadataKeyComparator()); - return names; - } - } -} +package org.apache.tika.metadata.serialization; + +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.util.Arrays; + +import com.google.gson.Gson; +import com.google.gson.GsonBuilder; +import org.apache.tika.metadata.Metadata; + +public class JsonMetadataBase { + + + static Gson defaultInit() { + GsonBuilder builder = new GsonBuilder(); + builder.registerTypeHierarchyAdapter(Metadata.class, new JsonMetadataSerializer()); + builder.registerTypeHierarchyAdapter(Metadata.class, new JsonMetadataDeserializer()); + return builder.create(); + } + + static Gson prettyInit() { + GsonBuilder builder = new GsonBuilder(); + builder.registerTypeHierarchyAdapter(Metadata.class, new SortedJsonMetadataSerializer()); + builder.registerTypeHierarchyAdapter(Metadata.class, new JsonMetadataDeserializer()); + builder.setPrettyPrinting(); + return builder.create(); + } + + private static class SortedJsonMetadataSerializer extends JsonMetadataSerializer { + @Override + public String[] getNames(Metadata m) { + String[] names = m.names(); + Arrays.sort(names, new PrettyMetadataKeyComparator()); + return names; + } + } +} Modified: tika/trunk/tika-server/src/main/java/org/apache/tika/server/HTMLHelper.java URL: http://svn.apache.org/viewvc/tika/trunk/tika-server/src/main/java/org/apache/tika/server/HTMLHelper.java?rev=1679211&r1=1679210&r2=1679211&view=diff ============================================================================== --- tika/trunk/tika-server/src/main/java/org/apache/tika/server/HTMLHelper.java (original) +++ tika/trunk/tika-server/src/main/java/org/apache/tika/server/HTMLHelper.java Wed May 13 13:49:36 2015 @@ -21,22 +21,22 @@ import java.io.IOException; import java.io.InputStream; import org.apache.tika.io.IOUtils; - -/** - * Helps produce user facing HTML output. - * <p/> - * TODO Decide if this would be better done as a MessageBodyWriter - */ -public class HTMLHelper { + +/** + * Helps produce user facing HTML output. + * <p/> + * TODO Decide if this would be better done as a MessageBodyWriter + */ +public class HTMLHelper { private static final String PATH = "/tikaserver-template.html"; private static final String TITLE_VAR = "[[TITLE]]"; - private static final String BODY_VAR = "[[BODY]]"; - private String PRE_BODY; - private String POST_BODY; - - public HTMLHelper() { - InputStream htmlStr = getClass().getResourceAsStream(PATH); - if (htmlStr == null) { + private static final String BODY_VAR = "[[BODY]]"; + private String PRE_BODY; + private String POST_BODY; + + public HTMLHelper() { + InputStream htmlStr = getClass().getResourceAsStream(PATH); + if (htmlStr == null) { throw new IllegalArgumentException("Template Not Found - " + PATH); } try { @@ -45,18 +45,18 @@ public class HTMLHelper { PRE_BODY = html.substring(0, bodyAt); POST_BODY = html.substring(bodyAt + BODY_VAR.length()); } catch (IOException e) { - throw new IllegalStateException("Unable to read template"); - } - } - - /** - * Generates the HTML Header for the user facing page, adding - * in the given title as required - */ - public void generateHeader(StringBuffer html, String title) { - html.append(PRE_BODY.replace(TITLE_VAR, title)); - } - - public void generateFooter(StringBuffer html) { - html.append(POST_BODY); + throw new IllegalStateException("Unable to read template"); + } + } + + /** + * Generates the HTML Header for the user facing page, adding + * in the given title as required + */ + public void generateHeader(StringBuffer html, String title) { + html.append(PRE_BODY.replace(TITLE_VAR, title)); + } + + public void generateFooter(StringBuffer html) { + html.append(POST_BODY); }} \ No newline at end of file Modified: tika/trunk/tika-server/src/main/java/org/apache/tika/server/RichTextContentHandler.java URL: http://svn.apache.org/viewvc/tika/trunk/tika-server/src/main/java/org/apache/tika/server/RichTextContentHandler.java?rev=1679211&r1=1679210&r2=1679211&view=diff ============================================================================== --- tika/trunk/tika-server/src/main/java/org/apache/tika/server/RichTextContentHandler.java (original) +++ tika/trunk/tika-server/src/main/java/org/apache/tika/server/RichTextContentHandler.java Wed May 13 13:49:36 2015 @@ -14,34 +14,34 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - -package org.apache.tika.server; - -import java.io.Writer; - -import org.apache.tika.sax.WriteOutContentHandler; -import org.xml.sax.Attributes; -import org.xml.sax.SAXException; - -public class RichTextContentHandler extends WriteOutContentHandler { - public RichTextContentHandler(Writer writer) { - super(writer); - } - - @Override - public void startElement(String uri, String localName, String qName, Attributes attributes) throws SAXException { - super.startElement(uri, localName, qName, attributes); - - if ("img".equals(localName) && attributes.getValue("alt") != null) { - String nfo = "[image: " + attributes.getValue("alt") + ']'; - - characters(nfo.toCharArray(), 0, nfo.length()); - } - - if ("a".equals(localName) && attributes.getValue("name") != null) { - String nfo = "[bookmark: " + attributes.getValue("name") + ']'; - - characters(nfo.toCharArray(), 0, nfo.length()); - } - } -} + +package org.apache.tika.server; + +import java.io.Writer; + +import org.apache.tika.sax.WriteOutContentHandler; +import org.xml.sax.Attributes; +import org.xml.sax.SAXException; + +public class RichTextContentHandler extends WriteOutContentHandler { + public RichTextContentHandler(Writer writer) { + super(writer); + } + + @Override + public void startElement(String uri, String localName, String qName, Attributes attributes) throws SAXException { + super.startElement(uri, localName, qName, attributes); + + if ("img".equals(localName) && attributes.getValue("alt") != null) { + String nfo = "[image: " + attributes.getValue("alt") + ']'; + + characters(nfo.toCharArray(), 0, nfo.length()); + } + + if ("a".equals(localName) && attributes.getValue("name") != null) { + String nfo = "[bookmark: " + attributes.getValue("name") + ']'; + + characters(nfo.toCharArray(), 0, nfo.length()); + } + } +} Modified: tika/trunk/tika-server/src/main/java/org/apache/tika/server/TikaServerParseExceptionMapper.java URL: http://svn.apache.org/viewvc/tika/trunk/tika-server/src/main/java/org/apache/tika/server/TikaServerParseExceptionMapper.java?rev=1679211&r1=1679210&r2=1679211&view=diff ============================================================================== --- tika/trunk/tika-server/src/main/java/org/apache/tika/server/TikaServerParseExceptionMapper.java (original) +++ tika/trunk/tika-server/src/main/java/org/apache/tika/server/TikaServerParseExceptionMapper.java Wed May 13 13:49:36 2015 @@ -1,90 +1,90 @@ -package org.apache.tika.server; -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -import javax.ws.rs.WebApplicationException; -import javax.ws.rs.core.Response; -import javax.ws.rs.ext.ExceptionMapper; -import javax.ws.rs.ext.Provider; - - -import java.io.IOException; -import java.io.PrintWriter; -import java.io.StringWriter; -import java.io.Writer; - -import org.apache.poi.hwpf.OldWordFileFormatException; -import org.apache.tika.exception.EncryptedDocumentException; -import org.apache.tika.exception.TikaException; - -@Provider -public class TikaServerParseExceptionMapper implements ExceptionMapper<TikaServerParseException> { - - private final boolean returnStack; - - public TikaServerParseExceptionMapper(boolean returnStack) { - this.returnStack = returnStack; - } - - public Response toResponse(TikaServerParseException e) { - if (e.getMessage() != null && - e.getMessage().equals(Response.Status.UNSUPPORTED_MEDIA_TYPE.toString())) { - return buildResponse(e, 415); - } - Throwable cause = e.getCause(); - if (cause == null) { - return buildResponse(e, Response.Status.INTERNAL_SERVER_ERROR.getStatusCode()); - } else { - if (cause instanceof EncryptedDocumentException) { - return buildResponse(cause, 422); - } else if (cause instanceof TikaException) { - //unsupported media type - Throwable causeOfCause = cause.getCause(); - if (causeOfCause instanceof WebApplicationException) { - return ((WebApplicationException) causeOfCause).getResponse(); - } - return buildResponse(cause, 422); - } else if (cause instanceof IllegalStateException) { - return buildResponse(cause, 422); - } else if (cause instanceof OldWordFileFormatException) { - return buildResponse(cause, 422); - } else if (cause instanceof WebApplicationException) { - return ((WebApplicationException) e.getCause()).getResponse(); - } else { - return buildResponse(e, 500); - } - } - } - - private Response buildResponse(Throwable cause, int i) { - if (returnStack && cause != null) { - Writer result = new StringWriter(); - PrintWriter writer = new PrintWriter(result); - cause.printStackTrace(writer); - writer.flush(); - try { - result.flush(); - } catch (IOException e) { - //something went seriously wrong - return Response.status(500).build(); - } - return Response.status(i).entity(result.toString()).type("text/plain").build(); - } else { - return Response.status(i).build(); - } - } -} +package org.apache.tika.server; +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import javax.ws.rs.WebApplicationException; +import javax.ws.rs.core.Response; +import javax.ws.rs.ext.ExceptionMapper; +import javax.ws.rs.ext.Provider; + + +import java.io.IOException; +import java.io.PrintWriter; +import java.io.StringWriter; +import java.io.Writer; + +import org.apache.poi.hwpf.OldWordFileFormatException; +import org.apache.tika.exception.EncryptedDocumentException; +import org.apache.tika.exception.TikaException; + +@Provider +public class TikaServerParseExceptionMapper implements ExceptionMapper<TikaServerParseException> { + + private final boolean returnStack; + + public TikaServerParseExceptionMapper(boolean returnStack) { + this.returnStack = returnStack; + } + + public Response toResponse(TikaServerParseException e) { + if (e.getMessage() != null && + e.getMessage().equals(Response.Status.UNSUPPORTED_MEDIA_TYPE.toString())) { + return buildResponse(e, 415); + } + Throwable cause = e.getCause(); + if (cause == null) { + return buildResponse(e, Response.Status.INTERNAL_SERVER_ERROR.getStatusCode()); + } else { + if (cause instanceof EncryptedDocumentException) { + return buildResponse(cause, 422); + } else if (cause instanceof TikaException) { + //unsupported media type + Throwable causeOfCause = cause.getCause(); + if (causeOfCause instanceof WebApplicationException) { + return ((WebApplicationException) causeOfCause).getResponse(); + } + return buildResponse(cause, 422); + } else if (cause instanceof IllegalStateException) { + return buildResponse(cause, 422); + } else if (cause instanceof OldWordFileFormatException) { + return buildResponse(cause, 422); + } else if (cause instanceof WebApplicationException) { + return ((WebApplicationException) e.getCause()).getResponse(); + } else { + return buildResponse(e, 500); + } + } + } + + private Response buildResponse(Throwable cause, int i) { + if (returnStack && cause != null) { + Writer result = new StringWriter(); + PrintWriter writer = new PrintWriter(result); + cause.printStackTrace(writer); + writer.flush(); + try { + result.flush(); + } catch (IOException e) { + //something went seriously wrong + return Response.status(500).build(); + } + return Response.status(i).entity(result.toString()).type("text/plain").build(); + } else { + return Response.status(i).build(); + } + } +} Modified: tika/trunk/tika-server/src/main/java/org/apache/tika/server/resource/DetectorResource.java URL: http://svn.apache.org/viewvc/tika/trunk/tika-server/src/main/java/org/apache/tika/server/resource/DetectorResource.java?rev=1679211&r1=1679210&r2=1679211&view=diff ============================================================================== --- tika/trunk/tika-server/src/main/java/org/apache/tika/server/resource/DetectorResource.java (original) +++ tika/trunk/tika-server/src/main/java/org/apache/tika/server/resource/DetectorResource.java Wed May 13 13:49:36 2015 @@ -1,72 +1,72 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.tika.server.resource; - -import javax.ws.rs.Consumes; -import javax.ws.rs.PUT; -import javax.ws.rs.Path; -import javax.ws.rs.Produces; -import javax.ws.rs.core.Context; -import javax.ws.rs.core.HttpHeaders; -import javax.ws.rs.core.UriInfo; - -import java.io.IOException; -import java.io.InputStream; - -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; -import org.apache.tika.config.TikaConfig; -import org.apache.tika.io.TikaInputStream; -import org.apache.tika.metadata.Metadata; -import org.apache.tika.mime.MediaType; - -@Path("/detect") -public class DetectorResource { - - private static final Log logger = LogFactory.getLog(DetectorResource.class - .getName()); - - private TikaConfig config = null; - - public DetectorResource(TikaConfig config) { - this.config = config; - } - - @PUT - @Path("stream") - @Consumes("*/*") - @Produces("text/plain") - public String detect(final InputStream is, - @Context HttpHeaders httpHeaders, @Context final UriInfo info) { - Metadata met = new Metadata(); - TikaInputStream tis = TikaInputStream.get(is); - String filename = TikaResource.detectFilename(httpHeaders - .getRequestHeaders()); - logger.info("Detecting media type for Filename: " + filename); - met.add(Metadata.RESOURCE_NAME_KEY, filename); - try { - return this.config.getDetector().detect(tis, met).toString(); - } catch (IOException e) { - logger.warn("Unable to detect MIME type for file. Reason: " - + e.getMessage()); - e.printStackTrace(); - return MediaType.OCTET_STREAM.toString(); - } - } - -} +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.tika.server.resource; + +import javax.ws.rs.Consumes; +import javax.ws.rs.PUT; +import javax.ws.rs.Path; +import javax.ws.rs.Produces; +import javax.ws.rs.core.Context; +import javax.ws.rs.core.HttpHeaders; +import javax.ws.rs.core.UriInfo; + +import java.io.IOException; +import java.io.InputStream; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.tika.config.TikaConfig; +import org.apache.tika.io.TikaInputStream; +import org.apache.tika.metadata.Metadata; +import org.apache.tika.mime.MediaType; + +@Path("/detect") +public class DetectorResource { + + private static final Log logger = LogFactory.getLog(DetectorResource.class + .getName()); + + private TikaConfig config = null; + + public DetectorResource(TikaConfig config) { + this.config = config; + } + + @PUT + @Path("stream") + @Consumes("*/*") + @Produces("text/plain") + public String detect(final InputStream is, + @Context HttpHeaders httpHeaders, @Context final UriInfo info) { + Metadata met = new Metadata(); + TikaInputStream tis = TikaInputStream.get(is); + String filename = TikaResource.detectFilename(httpHeaders + .getRequestHeaders()); + logger.info("Detecting media type for Filename: " + filename); + met.add(Metadata.RESOURCE_NAME_KEY, filename); + try { + return this.config.getDetector().detect(tis, met).toString(); + } catch (IOException e) { + logger.warn("Unable to detect MIME type for file. Reason: " + + e.getMessage()); + e.printStackTrace(); + return MediaType.OCTET_STREAM.toString(); + } + } + +} Modified: tika/trunk/tika-server/src/main/java/org/apache/tika/server/resource/RecursiveMetadataResource.java URL: http://svn.apache.org/viewvc/tika/trunk/tika-server/src/main/java/org/apache/tika/server/resource/RecursiveMetadataResource.java?rev=1679211&r1=1679210&r2=1679211&view=diff ============================================================================== --- tika/trunk/tika-server/src/main/java/org/apache/tika/server/resource/RecursiveMetadataResource.java (original) +++ tika/trunk/tika-server/src/main/java/org/apache/tika/server/resource/RecursiveMetadataResource.java Wed May 13 13:49:36 2015 @@ -1,94 +1,94 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.tika.server.resource; - -import javax.ws.rs.Consumes; -import javax.ws.rs.POST; -import javax.ws.rs.PUT; -import javax.ws.rs.Path; -import javax.ws.rs.Produces; -import javax.ws.rs.core.Context; -import javax.ws.rs.core.HttpHeaders; -import javax.ws.rs.core.MultivaluedMap; -import javax.ws.rs.core.Response; -import javax.ws.rs.core.UriInfo; - -import java.io.InputStream; - -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; -import org.apache.cxf.jaxrs.ext.multipart.Attachment; -import org.apache.tika.config.TikaConfig; -import org.apache.tika.language.ProfilingHandler; -import org.apache.tika.metadata.Metadata; -import org.apache.tika.parser.AutoDetectParser; -import org.apache.tika.parser.ParseContext; -import org.apache.tika.parser.RecursiveParserWrapper; -import org.apache.tika.sax.BasicContentHandlerFactory; -import org.apache.tika.server.MetadataList; -import org.xml.sax.helpers.DefaultHandler; - -@Path("/rmeta") -public class RecursiveMetadataResource { - private static final Log logger = LogFactory.getLog(RecursiveMetadataResource.class); - - private TikaConfig tikaConfig; - - public RecursiveMetadataResource(TikaConfig tikaConfig) { - this.tikaConfig = tikaConfig; - } - - @POST - @Consumes("multipart/form-data") - @Produces({"text/csv", "application/json"}) - @Path("form") - public Response getMetadataFromMultipart(Attachment att, @Context UriInfo info) throws Exception { - return Response.ok( - parseMetadata(att.getObject(InputStream.class), att.getHeaders(), info)).build(); - } - - @PUT - @Produces("application/json") - public Response getMetadata(InputStream is, @Context HttpHeaders httpHeaders, @Context UriInfo info) throws Exception { - return Response.ok( - parseMetadata(is, httpHeaders.getRequestHeaders(), info)).build(); - } - - private MetadataList parseMetadata(InputStream is, - MultivaluedMap<String, String> httpHeaders, UriInfo info) - throws Exception { - final Metadata metadata = new Metadata(); - final ParseContext context = new ParseContext(); - AutoDetectParser parser = TikaResource.createParser(tikaConfig); - // TODO: parameterize choice of handler and max chars? - BasicContentHandlerFactory.HANDLER_TYPE type = BasicContentHandlerFactory.HANDLER_TYPE.TEXT; - RecursiveParserWrapper wrapper = new RecursiveParserWrapper(parser, - new BasicContentHandlerFactory(type, -1)); - TikaResource.fillMetadata(parser, metadata, context, httpHeaders); - // no need to add parser to parse recursively - TikaResource.fillParseContext(context, httpHeaders, null); - TikaResource.logRequest(logger, info, metadata); - TikaResource.parse(wrapper, logger, info.getPath(), is, - new ProfilingHandler() { - public void endDocument() { - metadata.set("language", getLanguage().getLanguage()); - } - }, metadata, context); - return new MetadataList(wrapper.getMetadata()); - } -} +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.tika.server.resource; + +import javax.ws.rs.Consumes; +import javax.ws.rs.POST; +import javax.ws.rs.PUT; +import javax.ws.rs.Path; +import javax.ws.rs.Produces; +import javax.ws.rs.core.Context; +import javax.ws.rs.core.HttpHeaders; +import javax.ws.rs.core.MultivaluedMap; +import javax.ws.rs.core.Response; +import javax.ws.rs.core.UriInfo; + +import java.io.InputStream; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.cxf.jaxrs.ext.multipart.Attachment; +import org.apache.tika.config.TikaConfig; +import org.apache.tika.language.ProfilingHandler; +import org.apache.tika.metadata.Metadata; +import org.apache.tika.parser.AutoDetectParser; +import org.apache.tika.parser.ParseContext; +import org.apache.tika.parser.RecursiveParserWrapper; +import org.apache.tika.sax.BasicContentHandlerFactory; +import org.apache.tika.server.MetadataList; +import org.xml.sax.helpers.DefaultHandler; + +@Path("/rmeta") +public class RecursiveMetadataResource { + private static final Log logger = LogFactory.getLog(RecursiveMetadataResource.class); + + private TikaConfig tikaConfig; + + public RecursiveMetadataResource(TikaConfig tikaConfig) { + this.tikaConfig = tikaConfig; + } + + @POST + @Consumes("multipart/form-data") + @Produces({"text/csv", "application/json"}) + @Path("form") + public Response getMetadataFromMultipart(Attachment att, @Context UriInfo info) throws Exception { + return Response.ok( + parseMetadata(att.getObject(InputStream.class), att.getHeaders(), info)).build(); + } + + @PUT + @Produces("application/json") + public Response getMetadata(InputStream is, @Context HttpHeaders httpHeaders, @Context UriInfo info) throws Exception { + return Response.ok( + parseMetadata(is, httpHeaders.getRequestHeaders(), info)).build(); + } + + private MetadataList parseMetadata(InputStream is, + MultivaluedMap<String, String> httpHeaders, UriInfo info) + throws Exception { + final Metadata metadata = new Metadata(); + final ParseContext context = new ParseContext(); + AutoDetectParser parser = TikaResource.createParser(tikaConfig); + // TODO: parameterize choice of handler and max chars? + BasicContentHandlerFactory.HANDLER_TYPE type = BasicContentHandlerFactory.HANDLER_TYPE.TEXT; + RecursiveParserWrapper wrapper = new RecursiveParserWrapper(parser, + new BasicContentHandlerFactory(type, -1)); + TikaResource.fillMetadata(parser, metadata, context, httpHeaders); + // no need to add parser to parse recursively + TikaResource.fillParseContext(context, httpHeaders, null); + TikaResource.logRequest(logger, info, metadata); + TikaResource.parse(wrapper, logger, info.getPath(), is, + new ProfilingHandler() { + public void endDocument() { + metadata.set("language", getLanguage().getLanguage()); + } + }, metadata, context); + return new MetadataList(wrapper.getMetadata()); + } +} Modified: tika/trunk/tika-server/src/main/java/org/apache/tika/server/resource/TikaDetectors.java URL: http://svn.apache.org/viewvc/tika/trunk/tika-server/src/main/java/org/apache/tika/server/resource/TikaDetectors.java?rev=1679211&r1=1679210&r2=1679211&view=diff ============================================================================== --- tika/trunk/tika-server/src/main/java/org/apache/tika/server/resource/TikaDetectors.java (original) +++ tika/trunk/tika-server/src/main/java/org/apache/tika/server/resource/TikaDetectors.java Wed May 13 13:49:36 2015 @@ -1,128 +1,128 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.tika.server.resource; - -import javax.ws.rs.GET; -import javax.ws.rs.Path; -import javax.ws.rs.Produces; - -import java.util.ArrayList; -import java.util.HashMap; -import java.util.List; -import java.util.Map; - -import org.apache.tika.config.TikaConfig; -import org.apache.tika.detect.CompositeDetector; -import org.apache.tika.detect.Detector; -import org.apache.tika.server.HTMLHelper; -import org.eclipse.jetty.util.ajax.JSON; - -/** - * <p>Provides details of all the {@link Detector}s registered with - * Apache Tika, similar to <em>--list-detectors</em> with the Tika CLI. - */ -@Path("/detectors") -public class TikaDetectors { - private TikaConfig tika; - private HTMLHelper html; - - public TikaDetectors(TikaConfig tika) { - this.tika = tika; - this.html = new HTMLHelper(); - } - - @GET - @Produces("text/html") - public String getDectorsHTML() { - StringBuffer h = new StringBuffer(); - html.generateHeader(h, "Detectors available to Apache Tika"); - detectorAsHTML(tika.getDetector(), h, 2); - html.generateFooter(h); - return h.toString(); - } - - private void detectorAsHTML(Detector d, StringBuffer html, int level) { - html.append("<h"); - html.append(level); - html.append(">"); - String name = d.getClass().getName(); - html.append(name.substring(name.lastIndexOf('.') + 1)); - html.append("</h"); - html.append(level); - html.append(">"); - html.append("<p>Class: "); - html.append(name); - html.append("</p>"); - if (d instanceof CompositeDetector) { - html.append("<p>Composite Detector</p>"); - for (Detector cd : ((CompositeDetector) d).getDetectors()) { - detectorAsHTML(cd, html, level + 1); - } - } - } - - @GET - @Produces(javax.ws.rs.core.MediaType.APPLICATION_JSON) - public String getDetectorsJSON() { - Map<String, Object> details = new HashMap<String, Object>(); - detectorAsMap(tika.getDetector(), details); - return JSON.toString(details); - } - - private void detectorAsMap(Detector d, Map<String, Object> details) { - details.put("name", d.getClass().getName()); - - boolean isComposite = (d instanceof CompositeDetector); - details.put("composite", isComposite); - if (isComposite) { - List<Map<String, Object>> c = new ArrayList<Map<String, Object>>(); - for (Detector cd : ((CompositeDetector) d).getDetectors()) { - Map<String, Object> cdet = new HashMap<String, Object>(); - detectorAsMap(cd, cdet); - c.add(cdet); - } - details.put("children", c); - } - } - - @GET - @Produces("text/plain") - public String getDetectorsPlain() { - StringBuffer text = new StringBuffer(); - renderDetector(tika.getDetector(), text, 0); - return text.toString(); - } - - private void renderDetector(Detector d, StringBuffer text, int indent) { - boolean isComposite = (d instanceof CompositeDetector); - String name = d.getClass().getName(); - - for (int i = 0; i < indent; i++) { - text.append(" "); - } - text.append(name); - if (isComposite) { - text.append(" (Composite Detector):\n"); - - List<Detector> subDetectors = ((CompositeDetector) d).getDetectors(); - for (Detector sd : subDetectors) { - renderDetector(sd, text, indent + 1); - } - } else { - text.append("\n"); } - } +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.tika.server.resource; + +import javax.ws.rs.GET; +import javax.ws.rs.Path; +import javax.ws.rs.Produces; + +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; +import java.util.Map; + +import org.apache.tika.config.TikaConfig; +import org.apache.tika.detect.CompositeDetector; +import org.apache.tika.detect.Detector; +import org.apache.tika.server.HTMLHelper; +import org.eclipse.jetty.util.ajax.JSON; + +/** + * <p>Provides details of all the {@link Detector}s registered with + * Apache Tika, similar to <em>--list-detectors</em> with the Tika CLI. + */ +@Path("/detectors") +public class TikaDetectors { + private TikaConfig tika; + private HTMLHelper html; + + public TikaDetectors(TikaConfig tika) { + this.tika = tika; + this.html = new HTMLHelper(); + } + + @GET + @Produces("text/html") + public String getDectorsHTML() { + StringBuffer h = new StringBuffer(); + html.generateHeader(h, "Detectors available to Apache Tika"); + detectorAsHTML(tika.getDetector(), h, 2); + html.generateFooter(h); + return h.toString(); + } + + private void detectorAsHTML(Detector d, StringBuffer html, int level) { + html.append("<h"); + html.append(level); + html.append(">"); + String name = d.getClass().getName(); + html.append(name.substring(name.lastIndexOf('.') + 1)); + html.append("</h"); + html.append(level); + html.append(">"); + html.append("<p>Class: "); + html.append(name); + html.append("</p>"); + if (d instanceof CompositeDetector) { + html.append("<p>Composite Detector</p>"); + for (Detector cd : ((CompositeDetector) d).getDetectors()) { + detectorAsHTML(cd, html, level + 1); + } + } + } + + @GET + @Produces(javax.ws.rs.core.MediaType.APPLICATION_JSON) + public String getDetectorsJSON() { + Map<String, Object> details = new HashMap<String, Object>(); + detectorAsMap(tika.getDetector(), details); + return JSON.toString(details); + } + + private void detectorAsMap(Detector d, Map<String, Object> details) { + details.put("name", d.getClass().getName()); + + boolean isComposite = (d instanceof CompositeDetector); + details.put("composite", isComposite); + if (isComposite) { + List<Map<String, Object>> c = new ArrayList<Map<String, Object>>(); + for (Detector cd : ((CompositeDetector) d).getDetectors()) { + Map<String, Object> cdet = new HashMap<String, Object>(); + detectorAsMap(cd, cdet); + c.add(cdet); + } + details.put("children", c); + } + } + + @GET + @Produces("text/plain") + public String getDetectorsPlain() { + StringBuffer text = new StringBuffer(); + renderDetector(tika.getDetector(), text, 0); + return text.toString(); + } + + private void renderDetector(Detector d, StringBuffer text, int indent) { + boolean isComposite = (d instanceof CompositeDetector); + String name = d.getClass().getName(); + + for (int i = 0; i < indent; i++) { + text.append(" "); + } + text.append(name); + if (isComposite) { + text.append(" (Composite Detector):\n"); + + List<Detector> subDetectors = ((CompositeDetector) d).getDetectors(); + for (Detector sd : subDetectors) { + renderDetector(sd, text, indent + 1); + } + } else { + text.append("\n"); } + } } \ No newline at end of file