Merge
Project: http://git-wip-us.apache.org/repos/asf/tika/repo Commit: http://git-wip-us.apache.org/repos/asf/tika/commit/aa448a3b Tree: http://git-wip-us.apache.org/repos/asf/tika/tree/aa448a3b Diff: http://git-wip-us.apache.org/repos/asf/tika/diff/aa448a3b Branch: refs/heads/master Commit: aa448a3b7e61f9a46efd1bf3f2ac72e6a3852d8f Parents: 9546bd3 d011d70 Author: Nick Burch <[email protected]> Authored: Thu Dec 22 01:31:25 2016 +0000 Committer: Nick Burch <[email protected]> Committed: Thu Dec 22 01:31:25 2016 +0000 ---------------------------------------------------------------------- CHANGES.txt | 3 + .../org/apache/tika/metadata/QuattroPro.java | 48 +++ .../org/apache/tika/metadata/WordPerfect.java | 66 +++ .../org/apache/tika/mime/tika-mimetypes.xml | 32 +- .../tika/parser/epub/EpubContentParser.java | 6 +- .../parser/wordperfect/QPWTextExtractor.java | 226 ++++++++++ .../parser/wordperfect/QuattroProParser.java | 71 +++ .../tika/parser/wordperfect/WP6Constants.java | 432 +++++++++++++++++++ .../tika/parser/wordperfect/WP6FileHeader.java | 191 ++++++++ .../parser/wordperfect/WP6TextExtractor.java | 219 ++++++++++ .../tika/parser/wordperfect/WPInputStream.java | 223 ++++++++++ .../parser/wordperfect/WordPerfectParser.java | 74 ++++ .../services/org.apache.tika.parser.Parser | 2 + .../apache/tika/parser/epub/EpubParserTest.java | 65 +-- .../tika/parser/ibooks/iBooksParserTest.java | 62 +-- .../tika/parser/wordperfect/QuattroProTest.java | 52 +++ .../parser/wordperfect/WPInputStreamTest.java | 134 ++++++ .../parser/wordperfect/WordPerfectTest.java | 39 ++ .../test/resources/test-documents/testEPUB.epub | Bin 29719 -> 30556 bytes .../test-documents/testWordPerfect.wpd | Bin 0 -> 2044 bytes 20 files changed, 1877 insertions(+), 68 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/tika/blob/aa448a3b/CHANGES.txt ---------------------------------------------------------------------- diff --cc CHANGES.txt index 615039d,a9cf6f1..630aa73 --- a/CHANGES.txt +++ b/CHANGES.txt @@@ -1,8 -1,8 +1,11 @@@ Release 1.15 - ?? + * Mime magic for the OneNote family (.one / .onetoc / .onepkg), no parser + (TIKA-2224). + + * Add parsers for WordPerfect and QuattroPro (.qpw) files. + Contributed by Pascal Essiembre (TIKA-1946). + * Add configurability of "preserve-interword-spacing" to TesseractOCRParser (TIKA-2190). http://git-wip-us.apache.org/repos/asf/tika/blob/aa448a3b/tika-core/src/main/resources/org/apache/tika/mime/tika-mimetypes.xml ----------------------------------------------------------------------
