Author: jnioche Date: Mon Jan 11 10:13:21 2010 New Revision: 897825 URL: http://svn.apache.org/viewvc?rev=897825&view=rev Log: fix for NUTCH-767 : reverted original expected values for test + treat text/plain as a default mime-type from Tika
Modified: lucene/nutch/trunk/src/java/org/apache/nutch/util/MimeUtil.java lucene/nutch/trunk/src/test/org/apache/nutch/protocol/TestContent.java Modified: lucene/nutch/trunk/src/java/org/apache/nutch/util/MimeUtil.java URL: http://svn.apache.org/viewvc/lucene/nutch/trunk/src/java/org/apache/nutch/util/MimeUtil.java?rev=897825&r1=897824&r2=897825&view=diff ============================================================================== --- lucene/nutch/trunk/src/java/org/apache/nutch/util/MimeUtil.java (original) +++ lucene/nutch/trunk/src/java/org/apache/nutch/util/MimeUtil.java Mon Jan 11 10:13:21 2010 @@ -159,6 +159,7 @@ if (this.mimeMagic) { MimeType magicType = this.mimeTypes.getMimeType(data); if (magicType != null && !magicType.getName().equals(MimeTypes.OCTET_STREAM) + && !magicType.getName().equals(MimeTypes.PLAIN_TEXT) && type != null && !type.getName().equals(magicType.getName())) { // If magic enabled and the current mime type differs from that of the // one returned from the magic, take the magic mimeType Modified: lucene/nutch/trunk/src/test/org/apache/nutch/protocol/TestContent.java URL: http://svn.apache.org/viewvc/lucene/nutch/trunk/src/test/org/apache/nutch/protocol/TestContent.java?rev=897825&r1=897824&r2=897825&view=diff ============================================================================== --- lucene/nutch/trunk/src/test/org/apache/nutch/protocol/TestContent.java (original) +++ lucene/nutch/trunk/src/test/org/apache/nutch/protocol/TestContent.java Mon Jan 11 10:13:21 2010 @@ -63,28 +63,19 @@ "http://www.foo.com/", "".getBytes("UTF8"), "text/html; charset=UTF-8", p, conf); - // TODO check potential Tika issue and - // revert the expected value to text/html - // see https://issues.apache.org/jira/browse/NUTCH-767 - assertEquals("text/plain", c.getContentType()); + assertEquals("text/html", c.getContentType()); c = new Content("http://www.foo.com/foo.html", "http://www.foo.com/", "".getBytes("UTF8"), "", p, conf); - // TODO check potential Tika issue and - // revert the expected value to text/html - // see https://issues.apache.org/jira/browse/NUTCH-767 - assertEquals("text/plain", c.getContentType()); + assertEquals("text/html", c.getContentType()); c = new Content("http://www.foo.com/foo.html", "http://www.foo.com/", "".getBytes("UTF8"), null, p, conf); - // TODO check potential Tika issue and - // revert the expected value to text/html - // see https://issues.apache.org/jira/browse/NUTCH-767 - assertEquals("text/plain", c.getContentType()); + assertEquals("text/html", c.getContentType()); c = new Content("http://www.foo.com/", "http://www.foo.com/", @@ -108,10 +99,7 @@ "http://www.foo.com/", "".getBytes("UTF8"), "", p, conf); - // TODO check that Tika returns the right value and - // revert to the default type - // see https://issues.apache.org/jira/browse/NUTCH-767 - assertEquals("text/plain", c.getContentType()); + assertEquals(MimeTypes.OCTET_STREAM, c.getContentType()); c = new Content("http://www.foo.com/", "http://www.foo.com/",