nltk modification
Project: http://git-wip-us.apache.org/repos/asf/tika/repo Commit: http://git-wip-us.apache.org/repos/asf/tika/commit/1b14b39d Tree: http://git-wip-us.apache.org/repos/asf/tika/tree/1b14b39d Diff: http://git-wip-us.apache.org/repos/asf/tika/diff/1b14b39d Branch: refs/heads/master Commit: 1b14b39d3e1b453620b2f7b26a933103a78c958a Parents: 14ca320 Author: manali <[email protected]> Authored: Fri Feb 19 17:37:25 2016 -0800 Committer: manali <[email protected]> Committed: Fri Feb 19 17:37:25 2016 -0800 ---------------------------------------------------------------------- .../src/main/java/org/apache/tika/mime/MimeType.java | 1 + .../resources/org/apache/tika/mime/tika-mimetypes.xml | 13 +++++++------ .../tika/parser/ner/nltk/NLTKNERecogniserTest.java | 2 +- 3 files changed, 9 insertions(+), 7 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/tika/blob/1b14b39d/tika-core/src/main/java/org/apache/tika/mime/MimeType.java ---------------------------------------------------------------------- diff --git a/tika-core/src/main/java/org/apache/tika/mime/MimeType.java b/tika-core/src/main/java/org/apache/tika/mime/MimeType.java index b4d651e..fc520cf 100644 --- a/tika-core/src/main/java/org/apache/tika/mime/MimeType.java +++ b/tika-core/src/main/java/org/apache/tika/mime/MimeType.java @@ -270,6 +270,7 @@ public final class MimeType implements Comparable<MimeType>, Serializable { } } + void addMagic(Magic magic) { if (magic == null) { return; http://git-wip-us.apache.org/repos/asf/tika/blob/1b14b39d/tika-core/src/main/resources/org/apache/tika/mime/tika-mimetypes.xml ---------------------------------------------------------------------- diff --git a/tika-core/src/main/resources/org/apache/tika/mime/tika-mimetypes.xml b/tika-core/src/main/resources/org/apache/tika/mime/tika-mimetypes.xml index 1d7b42b..52dd67b 100644 --- a/tika-core/src/main/resources/org/apache/tika/mime/tika-mimetypes.xml +++ b/tika-core/src/main/resources/org/apache/tika/mime/tika-mimetypes.xml @@ -38,6 +38,12 @@ --> <mime-info> + <mime-type type="application/dicom"> + <_comment>DICOM medical imaging data</_comment> + <magic priority="50"> + <match value="DICM" type="string" offset="128"/> + </magic> + </mime-type> <mime-type type="application/activemessage"/> <mime-type type="application/andrew-inset"> <glob pattern="*.ez"/> @@ -112,12 +118,7 @@ <mime-type type="application/dec-dx"/> <mime-type type="application/dialog-info+xml"/> - <mime-type type="application/dicom"> - <_comment>DICOM medical imaging data</_comment> - <magic priority="50"> - <match value="DICM" type="string" offset="128"/> - </magic> - </mime-type> + <mime-type type="application/dita+xml"> <sub-class-of type="application/xml"/> http://git-wip-us.apache.org/repos/asf/tika/blob/1b14b39d/tika-parsers/src/test/java/org/apache/tika/parser/ner/nltk/NLTKNERecogniserTest.java ---------------------------------------------------------------------- diff --git a/tika-parsers/src/test/java/org/apache/tika/parser/ner/nltk/NLTKNERecogniserTest.java b/tika-parsers/src/test/java/org/apache/tika/parser/ner/nltk/NLTKNERecogniserTest.java index 563e836..2861051 100644 --- a/tika-parsers/src/test/java/org/apache/tika/parser/ner/nltk/NLTKNERecogniserTest.java +++ b/tika-parsers/src/test/java/org/apache/tika/parser/ner/nltk/NLTKNERecogniserTest.java @@ -25,6 +25,7 @@ import org.junit.Ignore; import org.junit.Test; import java.io.ByteArrayInputStream; +import java.io.File; import java.nio.charset.StandardCharsets; import java.util.Arrays; import java.util.HashSet; @@ -37,7 +38,6 @@ public class NLTKNERecogniserTest { public void testGetEntityTypes() throws Exception { String text = "America"; System.setProperty(NamedEntityParser.SYS_PROP_NER_IMPL, NLTKNERecogniser.class.getName()); - Tika tika = new Tika(new TikaConfig(NamedEntityParser.class.getResourceAsStream("tika-config.xml"))); Metadata md = new Metadata(); tika.parse(new ByteArrayInputStream(text.getBytes(StandardCharsets.UTF_8)), md);
