Author: lewismc Date: Thu Apr 23 21:38:36 2015 New Revision: 1675724 URL: http://svn.apache.org/r1675724 Log: NUTCH-1994 Upgrade to Apache Tika 1.8
Modified: nutch/branches/2.x/CHANGES.txt nutch/branches/2.x/ivy/ivy.xml nutch/branches/2.x/src/plugin/parse-tika/howto_upgrade_tika.txt nutch/branches/2.x/src/plugin/parse-tika/ivy.xml nutch/branches/2.x/src/plugin/parse-tika/plugin.xml Modified: nutch/branches/2.x/CHANGES.txt URL: http://svn.apache.org/viewvc/nutch/branches/2.x/CHANGES.txt?rev=1675724&r1=1675723&r2=1675724&view=diff ============================================================================== --- nutch/branches/2.x/CHANGES.txt (original) +++ nutch/branches/2.x/CHANGES.txt Thu Apr 23 21:38:36 2015 @@ -2,6 +2,8 @@ Nutch Change Log Current Development 2.4-SNAPSHOT +* NUTCH-1994 Upgrade to Apache Tika 1.8 (lewismc) + * NUTCH-1990 Use URI.normalise() in BasicURLNormalizer (snagel, jnioche) * NUTCH-1981 Upgrade to icu4j 55.1 (Marko Asplund via snagel) Modified: nutch/branches/2.x/ivy/ivy.xml URL: http://svn.apache.org/viewvc/nutch/branches/2.x/ivy/ivy.xml?rev=1675724&r1=1675723&r2=1675724&view=diff ============================================================================== --- nutch/branches/2.x/ivy/ivy.xml (original) +++ nutch/branches/2.x/ivy/ivy.xml Thu Apr 23 21:38:36 2015 @@ -55,7 +55,7 @@ </dependency> <dependency org="com.ibm.icu" name="icu4j" rev="55.1" /> - <dependency org="org.apache.tika" name="tika-core" rev="1.7" /> + <dependency org="org.apache.tika" name="tika-core" rev="1.8" /> <dependency org="com.googlecode.juniversalchardet" name="juniversalchardet" rev="1.0.3"/> <dependency org="log4j" name="log4j" rev="1.2.15" conf="*->master" /> Modified: nutch/branches/2.x/src/plugin/parse-tika/howto_upgrade_tika.txt URL: http://svn.apache.org/viewvc/nutch/branches/2.x/src/plugin/parse-tika/howto_upgrade_tika.txt?rev=1675724&r1=1675723&r2=1675724&view=diff ============================================================================== --- nutch/branches/2.x/src/plugin/parse-tika/howto_upgrade_tika.txt (original) +++ nutch/branches/2.x/src/plugin/parse-tika/howto_upgrade_tika.txt Thu Apr 23 21:38:36 2015 @@ -5,4 +5,4 @@ 3. Upgrade Tika's own dependencies in src/plugin/parse-tika/plugin.xml To get the list of dependencies and their versions execute: $ ant -f ./build-ivy.xml - $ ls lib/ + $ ls lib | sed 's/^/ <library name="/g' | sed 's/$/"\/>/g' Modified: nutch/branches/2.x/src/plugin/parse-tika/ivy.xml URL: http://svn.apache.org/viewvc/nutch/branches/2.x/src/plugin/parse-tika/ivy.xml?rev=1675724&r1=1675723&r2=1675724&view=diff ============================================================================== --- nutch/branches/2.x/src/plugin/parse-tika/ivy.xml (original) +++ nutch/branches/2.x/src/plugin/parse-tika/ivy.xml Thu Apr 23 21:38:36 2015 @@ -36,7 +36,7 @@ </publications> <dependencies> - <dependency org="org.apache.tika" name="tika-parsers" rev="1.7" conf="*->default"> + <dependency org="org.apache.tika" name="tika-parsers" rev="1.8" conf="*->default"> <exclude org="org.apache.tika" name="tika-core" /> </dependency> <override module="rome" rev="0.9"/> Modified: nutch/branches/2.x/src/plugin/parse-tika/plugin.xml URL: http://svn.apache.org/viewvc/nutch/branches/2.x/src/plugin/parse-tika/plugin.xml?rev=1675724&r1=1675723&r2=1675724&view=diff ============================================================================== --- nutch/branches/2.x/src/plugin/parse-tika/plugin.xml (original) +++ nutch/branches/2.x/src/plugin/parse-tika/plugin.xml Thu Apr 23 21:38:36 2015 @@ -30,40 +30,65 @@ <library name="apache-mime4j-dom-0.7.2.jar"/> <library name="asm-debug-all-4.1.jar"/> <library name="aspectjrt-1.8.0.jar"/> - <library name="bcmail-jdk15-1.45.jar"/> - <library name="bcprov-jdk15-1.45.jar"/> + <library name="bcmail-jdk15on-1.52.jar"/> + <library name="bcpkix-jdk15on-1.52.jar"/> + <library name="bcprov-jdk15on-1.52.jar"/> <library name="boilerpipe-1.1.0.jar"/> - <library name="commons-codec-1.9.jar"/> - <library name="commons-compress-1.8.1.jar"/> - <library name="commons-httpclient-3.1.jar"/> + <library name="bzip2-0.9.1.jar"/> + <library name="c3p0-0.9.1.1.jar"/> + <library name="cdm-4.5.5.jar"/> + <library name="commons-codec-1.6.jar"/> + <library name="commons-compress-1.9.jar"/> + <library name="commons-csv-1.0.jar"/> <library name="commons-logging-1.1.1.jar"/> - <library name="dom4j-1.6.1.jar"/> - <library name="fontbox-1.8.8.jar"/> - <library name="geronimo-stax-api_1.0_spec-1.0.1.jar"/> + <library name="commons-logging-api-1.1.jar"/> + <library name="commons-vfs2-2.0.jar"/> + <library name="ehcache-core-2.6.2.jar"/> + <library name="fontbox-1.8.9.jar"/> + <library name="grib-4.5.5.jar"/> + <library name="guava-10.0.1.jar"/> + <library name="httpclient-4.2.6.jar"/> + <library name="httpcore-4.2.5.jar"/> + <library name="httpmime-4.2.6.jar"/> + <library name="httpservices-4.5.5.jar"/> <library name="isoparser-1.0.2.jar"/> <library name="java-libpst-0.8.1.jar"/> <library name="jcip-annotations-1.0.jar"/> + <library name="jcommander-1.35.jar"/> <library name="jdom-1.0.jar"/> - <library name="jempbox-1.8.8.jar"/> - <library name="jhighlight-1.0.jar"/> + <library name="jdom2-2.0.4.jar"/> + <library name="jempbox-1.8.9.jar"/> + <library name="jhighlight-1.0.2.jar"/> + <library name="jj2000-5.2.jar"/> <library name="jmatio-1.0.jar"/> + <library name="jna-4.1.0.jar"/> + <library name="joda-time-2.2.jar"/> + <library name="jsoup-1.7.2.jar"/> + <library name="jsr305-1.3.9.jar"/> <library name="juniversalchardet-1.0.3.jar"/> - <library name="metadata-extractor-2.6.2.jar"/> - <library name="netcdf-4.2.20.jar"/> - <library name="pdfbox-1.8.8.jar"/> - <library name="poi-3.11.jar"/> - <library name="poi-ooxml-3.11.jar"/> - <library name="poi-ooxml-schemas-3.11.jar"/> - <library name="poi-scratchpad-3.11.jar"/> + <library name="junrar-0.7.jar"/> + <library name="maven-scm-api-1.4.jar"/> + <library name="maven-scm-provider-svn-commons-1.4.jar"/> + <library name="maven-scm-provider-svnexe-1.4.jar"/> + <library name="metadata-extractor-2.8.0.jar"/> + <library name="netcdf4-4.5.5.jar"/> + <library name="pdfbox-1.8.9.jar"/> + <library name="plexus-utils-1.5.6.jar"/> + <library name="poi-3.12-beta1.jar"/> + <library name="poi-ooxml-3.12-beta1.jar"/> + <library name="poi-ooxml-schemas-3.12-beta1.jar"/> + <library name="poi-scratchpad-3.12-beta1.jar"/> + <library name="protobuf-java-2.5.0.jar"/> + <library name="quartz-2.2.0.jar"/> + <library name="regexp-1.3.jar"/> <library name="rome-0.9.jar"/> - <library name="slf4j-api-1.6.1.jar"/> + <library name="slf4j-api-1.7.12.jar"/> + <library name="sqlite-jdbc-3.8.6.jar"/> <library name="tagsoup-1.2.1.jar"/> - <library name="tika-parsers-1.7.jar"/> - <library name="unidataCommon-4.2.20.jar"/> + <library name="tika-parsers-1.8.jar"/> + <library name="udunits-4.5.5.jar"/> <library name="vorbis-java-core-0.6.jar"/> <library name="vorbis-java-tika-0.6.jar"/> - <library name="xercesImpl-2.8.1.jar"/> - <library name="xml-apis-1.3.03.jar"/> <library name="xmlbeans-2.6.0.jar"/> <library name="xmpcore-5.1.2.jar"/> <library name="xz-1.5.jar"/>