This is an automated email from the git hooks/post-receive script. apo-guest pushed a commit to branch master in repository tika.
commit 8f685429e2f880595c101744256f3a4c51e4e8fa Author: Markus Koschany <[email protected]> Date: Mon Nov 30 16:08:35 2015 +0000 Add refreshed patches again. --- debian/patches/01-jar-packaging.patch | 16 +- debian/patches/MP4Parser.patch | 340 ++++++++++++++++++++++ debian/patches/netcdf.patch | 290 ++++++++++++++++++ debian/patches/optional-parser-dependencies.patch | 303 +++++++++++++++++++ debian/patches/osgi.patch | 31 ++ debian/patches/series | 4 + 6 files changed, 981 insertions(+), 3 deletions(-) diff --git a/debian/patches/01-jar-packaging.patch b/debian/patches/01-jar-packaging.patch index 3186ec0..38ccbaa 100644 --- a/debian/patches/01-jar-packaging.patch +++ b/debian/patches/01-jar-packaging.patch @@ -1,6 +1,14 @@ -Description: Change the Maven packaging from bundle to jar to avoid build issues with maven-debian-helper -Author: Emmanuel Bourg <[email protected]> -Forwarded: not-needed +From: Debian Java Maintainers <[email protected]> +Date: Mon, 30 Nov 2015 15:50:06 +0000 +Subject: jar-packaging + +--- + tika-core/pom.xml | 2 +- + tika-parsers/pom.xml | 2 +- + 2 files changed, 2 insertions(+), 2 deletions(-) + +diff --git a/tika-core/pom.xml b/tika-core/pom.xml +index 1ed5538..7f80b87 100644 --- a/tika-core/pom.xml +++ b/tika-core/pom.xml @@ -30,7 +30,7 @@ @@ -12,6 +20,8 @@ Forwarded: not-needed <name>Apache Tika core</name> <url>http://tika.apache.org/</url> +diff --git a/tika-parsers/pom.xml b/tika-parsers/pom.xml +index 9557a3d..c0f673f 100644 --- a/tika-parsers/pom.xml +++ b/tika-parsers/pom.xml @@ -30,7 +30,7 @@ diff --git a/debian/patches/MP4Parser.patch b/debian/patches/MP4Parser.patch new file mode 100644 index 0000000..3998d88 --- /dev/null +++ b/debian/patches/MP4Parser.patch @@ -0,0 +1,340 @@ +From: Markus Koschany <[email protected]> +Date: Mon, 30 Nov 2015 15:50:18 +0000 +Subject: MP4Parser + +--- + .../java/org/apache/tika/parser/mp4/MP4Parser.java | 325 --------------------- + 1 file changed, 325 deletions(-) + delete mode 100644 tika-parsers/src/main/java/org/apache/tika/parser/mp4/MP4Parser.java + +diff --git a/tika-parsers/src/main/java/org/apache/tika/parser/mp4/MP4Parser.java b/tika-parsers/src/main/java/org/apache/tika/parser/mp4/MP4Parser.java +deleted file mode 100644 +index 20c8246..0000000 +--- a/tika-parsers/src/main/java/org/apache/tika/parser/mp4/MP4Parser.java ++++ /dev/null +@@ -1,325 +0,0 @@ +-/* +- * Licensed to the Apache Software Foundation (ASF) under one or more +- * contributor license agreements. See the NOTICE file distributed with +- * this work for additional information regarding copyright ownership. +- * The ASF licenses this file to You under the Apache License, Version 2.0 +- * (the "License"); you may not use this file except in compliance with +- * the License. You may obtain a copy of the License at +- * +- * http://www.apache.org/licenses/LICENSE-2.0 +- * +- * Unless required by applicable law or agreed to in writing, software +- * distributed under the License is distributed on an "AS IS" BASIS, +- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +- * See the License for the specific language governing permissions and +- * limitations under the License. +- */ +-package org.apache.tika.parser.mp4; +- +-import java.io.IOException; +-import java.io.InputStream; +-import java.text.DecimalFormat; +-import java.text.NumberFormat; +-import java.util.Arrays; +-import java.util.Collections; +-import java.util.HashMap; +-import java.util.List; +-import java.util.Locale; +-import java.util.Map; +-import java.util.Set; +- +-import org.apache.tika.exception.TikaException; +-import org.apache.tika.io.TemporaryResources; +-import org.apache.tika.io.TikaInputStream; +-import org.apache.tika.metadata.Metadata; +-import org.apache.tika.metadata.Property; +-import org.apache.tika.metadata.TikaCoreProperties; +-import org.apache.tika.metadata.XMP; +-import org.apache.tika.metadata.XMPDM; +-import org.apache.tika.mime.MediaType; +-import org.apache.tika.parser.AbstractParser; +-import org.apache.tika.parser.ParseContext; +-import org.apache.tika.sax.XHTMLContentHandler; +-import org.xml.sax.ContentHandler; +-import org.xml.sax.SAXException; +- +-import com.coremedia.iso.IsoFile; +-import com.coremedia.iso.boxes.Box; +-import com.coremedia.iso.boxes.Container; +-import com.coremedia.iso.boxes.FileTypeBox; +-import com.coremedia.iso.boxes.MetaBox; +-import com.coremedia.iso.boxes.MovieBox; +-import com.coremedia.iso.boxes.MovieHeaderBox; +-import com.coremedia.iso.boxes.SampleDescriptionBox; +-import com.coremedia.iso.boxes.SampleTableBox; +-import com.coremedia.iso.boxes.TrackBox; +-import com.coremedia.iso.boxes.TrackHeaderBox; +-import com.coremedia.iso.boxes.UserDataBox; +-import com.coremedia.iso.boxes.apple.AppleItemListBox; +-import com.coremedia.iso.boxes.sampleentry.AudioSampleEntry; +-import com.googlecode.mp4parser.boxes.apple.AppleAlbumBox; +-import com.googlecode.mp4parser.boxes.apple.AppleArtistBox; +-import com.googlecode.mp4parser.boxes.apple.AppleArtist2Box; +-import com.googlecode.mp4parser.boxes.apple.AppleCommentBox; +-import com.googlecode.mp4parser.boxes.apple.AppleCompilationBox; +-import com.googlecode.mp4parser.boxes.apple.AppleDiskNumberBox; +-import com.googlecode.mp4parser.boxes.apple.AppleEncoderBox; +-import com.googlecode.mp4parser.boxes.apple.AppleGenreBox; +-import com.googlecode.mp4parser.boxes.apple.AppleNameBox; +-import com.googlecode.mp4parser.boxes.apple.AppleRecordingYear2Box; +-import com.googlecode.mp4parser.boxes.apple.AppleTrackAuthorBox; +-import com.googlecode.mp4parser.boxes.apple.AppleTrackNumberBox; +-import com.googlecode.mp4parser.boxes.apple.Utf8AppleDataBox; +- +-/** +- * Parser for the MP4 media container format, as well as the older +- * QuickTime format that MP4 is based on. +- * +- * This uses the MP4Parser project from http://code.google.com/p/mp4parser/ +- * to do the underlying parsing +- */ +-public class MP4Parser extends AbstractParser { +- /** Serial version UID */ +- private static final long serialVersionUID = 84011216792285L; +- /** TODO Replace this with a 2dp Duration Property Converter */ +- private static final DecimalFormat DURATION_FORMAT = +- (DecimalFormat)NumberFormat.getNumberInstance(Locale.ROOT); +- static { +- DURATION_FORMAT.applyPattern("0.0#"); +- } +- +- // Ensure this stays in Sync with the entries in tika-mimetypes.xml +- private static final Map<MediaType,List<String>> typesMap = new HashMap<MediaType, List<String>>(); +- static { +- // All types should be 4 bytes long, space padded as needed +- typesMap.put(MediaType.audio("mp4"), Arrays.asList( +- "M4A ", "M4B ", "F4A ", "F4B ")); +- typesMap.put(MediaType.video("3gpp"), Arrays.asList( +- "3ge6", "3ge7", "3gg6", "3gp1", "3gp2", "3gp3", "3gp4", "3gp5", "3gp6", "3gs7")); +- typesMap.put(MediaType.video("3gpp2"), Arrays.asList( +- "3g2a", "3g2b", "3g2c")); +- typesMap.put(MediaType.video("mp4"), Arrays.asList( +- "mp41", "mp42")); +- typesMap.put(MediaType.video("x-m4v"), Arrays.asList( +- "M4V ", "M4VH", "M4VP")); +- +- typesMap.put(MediaType.video("quicktime"), Collections.<String>emptyList()); +- typesMap.put(MediaType.application("mp4"), Collections.<String>emptyList()); +- } +- +- private static final Set<MediaType> SUPPORTED_TYPES = +- Collections.unmodifiableSet(typesMap.keySet()); +- +- public Set<MediaType> getSupportedTypes(ParseContext context) { +- return SUPPORTED_TYPES; +- } +- +- +- public void parse( +- InputStream stream, ContentHandler handler, +- Metadata metadata, ParseContext context) +- throws IOException, SAXException, TikaException { +- IsoFile isoFile; +- +- // The MP4Parser library accepts either a File, or a byte array +- // As MP4 video files are typically large, always use a file to +- // avoid OOMs that may occur with in-memory buffering +- TemporaryResources tmp = new TemporaryResources(); +- TikaInputStream tstream = TikaInputStream.get(stream, tmp); +- try { +- isoFile = new IsoFile(new DirectFileReadDataSource(tstream.getFile())); +- tmp.addResource(isoFile); +- +- // Grab the file type box +- FileTypeBox fileType = getOrNull(isoFile, FileTypeBox.class); +- if (fileType != null) { +- // Identify the type +- MediaType type = MediaType.application("mp4"); +- for (MediaType t : typesMap.keySet()) { +- if (typesMap.get(t).contains(fileType.getMajorBrand())) { +- type = t; +- break; +- } +- } +- metadata.set(Metadata.CONTENT_TYPE, type.toString()); +- +- if (type.getType().equals("audio")) { +- metadata.set(XMPDM.AUDIO_COMPRESSOR, fileType.getMajorBrand().trim()); +- } +- } else { +- // Some older QuickTime files lack the FileType +- metadata.set(Metadata.CONTENT_TYPE, "video/quicktime"); +- } +- +- +- // Get the main MOOV box +- MovieBox moov = getOrNull(isoFile, MovieBox.class); +- if (moov == null) { +- // Bail out +- return; +- } +- +- +- XHTMLContentHandler xhtml = new XHTMLContentHandler(handler, metadata); +- xhtml.startDocument(); +- +- +- // Pull out some information from the header box +- MovieHeaderBox mHeader = getOrNull(moov, MovieHeaderBox.class); +- if (mHeader != null) { +- // Get the creation and modification dates +- metadata.set(Metadata.CREATION_DATE, mHeader.getCreationTime()); +- metadata.set(TikaCoreProperties.MODIFIED, mHeader.getModificationTime()); +- +- // Get the duration +- double durationSeconds = ((double)mHeader.getDuration()) / mHeader.getTimescale(); +- metadata.set(XMPDM.DURATION, DURATION_FORMAT.format(durationSeconds)); +- +- // The timescale is normally the sampling rate +- metadata.set(XMPDM.AUDIO_SAMPLE_RATE, (int)mHeader.getTimescale()); +- } +- +- +- // Get some more information from the track header +- // TODO Decide how to handle multiple tracks +- List<TrackBox> tb = moov.getBoxes(TrackBox.class); +- if (tb.size() > 0) { +- TrackBox track = tb.get(0); +- +- TrackHeaderBox header = track.getTrackHeaderBox(); +- // Get the creation and modification dates +- metadata.set(TikaCoreProperties.CREATED, header.getCreationTime()); +- metadata.set(TikaCoreProperties.MODIFIED, header.getModificationTime()); +- +- // Get the video with and height +- metadata.set(Metadata.IMAGE_WIDTH, (int)header.getWidth()); +- metadata.set(Metadata.IMAGE_LENGTH, (int)header.getHeight()); +- +- // Get the sample information +- SampleTableBox samples = track.getSampleTableBox(); +- SampleDescriptionBox sampleDesc = samples.getSampleDescriptionBox(); +- if (sampleDesc != null) { +- // Look for the first Audio Sample, if present +- AudioSampleEntry sample = getOrNull(sampleDesc, AudioSampleEntry.class); +- if (sample != null) { +- XMPDM.ChannelTypePropertyConverter.convertAndSet(metadata, sample.getChannelCount()); +- //metadata.set(XMPDM.AUDIO_SAMPLE_TYPE, sample.getSampleSize()); // TODO Num -> Type mapping +- metadata.set(XMPDM.AUDIO_SAMPLE_RATE, (int)sample.getSampleRate()); +- //metadata.set(XMPDM.AUDIO_, sample.getSamplesPerPacket()); +- //metadata.set(XMPDM.AUDIO_, sample.getBytesPerSample()); +- } +- } +- } +- +- // Get metadata from the User Data Box +- UserDataBox userData = getOrNull(moov, UserDataBox.class); +- if (userData != null) { +- MetaBox meta = getOrNull(userData, MetaBox.class); +- +- // Check for iTunes Metadata +- // See http://atomicparsley.sourceforge.net/mpeg-4files.html and +- // http://code.google.com/p/mp4v2/wiki/iTunesMetadata for more on these +- AppleItemListBox apple = getOrNull(meta, AppleItemListBox.class); +- if (apple != null) { +- // Title +- AppleNameBox title = getOrNull(apple, AppleNameBox.class); +- addMetadata(TikaCoreProperties.TITLE, metadata, title); +- +- // Artist +- AppleArtistBox artist = getOrNull(apple, AppleArtistBox.class); +- addMetadata(TikaCoreProperties.CREATOR, metadata, artist); +- addMetadata(XMPDM.ARTIST, metadata, artist); +- +- // Album Artist +- AppleArtist2Box artist2 = getOrNull(apple, AppleArtist2Box.class); +- addMetadata(XMPDM.ALBUM_ARTIST, metadata, artist2); +- +- // Album +- AppleAlbumBox album = getOrNull(apple, AppleAlbumBox.class); +- addMetadata(XMPDM.ALBUM, metadata, album); +- +- // Composer +- AppleTrackAuthorBox composer = getOrNull(apple, AppleTrackAuthorBox.class); +- addMetadata(XMPDM.COMPOSER, metadata, composer); +- +- // Genre +- AppleGenreBox genre = getOrNull(apple, AppleGenreBox.class); +- addMetadata(XMPDM.GENRE, metadata, genre); +- +- // Year +- AppleRecordingYear2Box year = getOrNull(apple, AppleRecordingYear2Box.class); +- if (year != null) { +- metadata.set(XMPDM.RELEASE_DATE, year.getValue()); +- } +- +- // Track number +- AppleTrackNumberBox trackNum = getOrNull(apple, AppleTrackNumberBox.class); +- if (trackNum != null) { +- metadata.set(XMPDM.TRACK_NUMBER, trackNum.getA()); +- //metadata.set(XMPDM.NUMBER_OF_TRACKS, trackNum.getB()); // TODO +- } +- +- // Disc number +- AppleDiskNumberBox discNum = getOrNull(apple, AppleDiskNumberBox.class); +- if (discNum != null) { +- metadata.set(XMPDM.DISC_NUMBER, discNum.getA()); +- } +- +- // Compilation +- AppleCompilationBox compilation = getOrNull(apple, AppleCompilationBox.class); +- if (compilation != null) { +- metadata.set(XMPDM.COMPILATION, (int)compilation.getValue()); +- } +- +- // Comment +- AppleCommentBox comment = getOrNull(apple, AppleCommentBox.class); +- addMetadata(XMPDM.LOG_COMMENT, metadata, comment); +- +- // Encoder +- AppleEncoderBox encoder = getOrNull(apple, AppleEncoderBox.class); +- if (encoder != null) { +- metadata.set(XMP.CREATOR_TOOL, encoder.getValue()); +- } +- +- +- // As text +- for (Box box : apple.getBoxes()) { +- if (box instanceof Utf8AppleDataBox) { +- xhtml.element("p", ((Utf8AppleDataBox)box).getValue()); +- } +- } +- } +- +- // TODO Check for other kinds too +- } +- +- // All done +- xhtml.endDocument(); +- +- } finally { +- tmp.dispose(); +- } +- +- } +- +- private static void addMetadata(String key, Metadata m, Utf8AppleDataBox metadata) { +- if (metadata != null) { +- m.add(key, metadata.getValue()); +- } +- } +- private static void addMetadata(Property prop, Metadata m, Utf8AppleDataBox metadata) { +- if (metadata != null) { +- m.set(prop, metadata.getValue()); +- } +- } +- +- private static <T extends Box> T getOrNull(Container box, Class<T> clazz) { +- if (box == null) return null; +- +- List<T> boxes = box.getBoxes(clazz); +- if (boxes.size() == 0) { +- return null; +- } +- return boxes.get(0); +- } +-} diff --git a/debian/patches/netcdf.patch b/debian/patches/netcdf.patch new file mode 100644 index 0000000..2fe84d6 --- /dev/null +++ b/debian/patches/netcdf.patch @@ -0,0 +1,290 @@ +From: Markus Koschany <[email protected]> +Date: Mon, 30 Nov 2015 15:53:57 +0000 +Subject: netcdf + +--- + .../java/org/apache/tika/parser/hdf/HDFParser.java | 122 ----------------- + .../apache/tika/parser/netcdf/NetCDFParser.java | 144 --------------------- + 2 files changed, 266 deletions(-) + delete mode 100644 tika-parsers/src/main/java/org/apache/tika/parser/hdf/HDFParser.java + delete mode 100644 tika-parsers/src/main/java/org/apache/tika/parser/netcdf/NetCDFParser.java + +diff --git a/tika-parsers/src/main/java/org/apache/tika/parser/hdf/HDFParser.java b/tika-parsers/src/main/java/org/apache/tika/parser/hdf/HDFParser.java +deleted file mode 100644 +index 821493b..0000000 +--- a/tika-parsers/src/main/java/org/apache/tika/parser/hdf/HDFParser.java ++++ /dev/null +@@ -1,122 +0,0 @@ +-/* +- * Licensed to the Apache Software Foundation (ASF) under one or more +- * contributor license agreements. See the NOTICE file distributed with +- * this work for additional information regarding copyright ownership. +- * The ASF licenses this file to You under the Apache License, Version 2.0 +- * (the "License"); you may not use this file except in compliance with +- * the License. You may obtain a copy of the License at +- * +- * http://www.apache.org/licenses/LICENSE-2.0 +- * +- * Unless required by applicable law or agreed to in writing, software +- * distributed under the License is distributed on an "AS IS" BASIS, +- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +- * See the License for the specific language governing permissions and +- * limitations under the License. +- */ +- +-package org.apache.tika.parser.hdf; +- +-//JDK imports +-import java.io.ByteArrayOutputStream; +-import java.io.IOException; +-import java.io.InputStream; +-import java.util.Collections; +-import java.util.Set; +- +-import org.apache.commons.io.IOUtils; +-import org.apache.tika.exception.TikaException; +-import org.apache.tika.metadata.Metadata; +-import org.apache.tika.mime.MediaType; +-import org.apache.tika.parser.AbstractParser; +-import org.apache.tika.parser.ParseContext; +-import org.apache.tika.parser.netcdf.NetCDFParser; +-import org.apache.tika.sax.XHTMLContentHandler; +-import org.xml.sax.ContentHandler; +-import org.xml.sax.SAXException; +- +-import ucar.nc2.Attribute; +-import ucar.nc2.Group; +-import ucar.nc2.NetcdfFile; +- +-/** +- * +- * Since the {@link NetCDFParser} depends on the <a +- * href="http://www.unidata.ucar.edu/software/netcdf-java" >NetCDF-Java</a> API, +- * we are able to use it to parse HDF files as well. See <a href= +- * "http://www.unidata.ucar.edu/software/netcdf-java/formats/FileTypes.html" +- * >this link</a> for more information. +- */ +-public class HDFParser extends AbstractParser { +- +- /** Serial version UID */ +- private static final long serialVersionUID = 1091208208003437549L; +- +- private static final Set<MediaType> SUPPORTED_TYPES = +- Collections.singleton(MediaType.application("x-hdf")); +- +- /* +- * (non-Javadoc) +- * +- * @see +- * org.apache.tika.parser.netcdf.NetCDFParser#getSupportedTypes(org.apache +- * .tika.parser.ParseContext) +- */ +- public Set<MediaType> getSupportedTypes(ParseContext context) { +- return SUPPORTED_TYPES; +- } +- +- /* +- * (non-Javadoc) +- * +- * @see +- * org.apache.tika.parser.netcdf.NetCDFParser#parse(java.io.InputStream, +- * org.xml.sax.ContentHandler, org.apache.tika.metadata.Metadata, +- * org.apache.tika.parser.ParseContext) +- */ +- public void parse(InputStream stream, ContentHandler handler, +- Metadata metadata, ParseContext context) throws IOException, +- SAXException, TikaException { +- ByteArrayOutputStream os = new ByteArrayOutputStream(); +- IOUtils.copy(stream, os); +- +- String name = metadata.get(Metadata.RESOURCE_NAME_KEY); +- if (name == null) { +- name = ""; +- } +- try { +- NetcdfFile ncFile = NetcdfFile.openInMemory(name, os.toByteArray()); +- unravelStringMet(ncFile, null, metadata); +- } catch (IOException e) { +- throw new TikaException("HDF parse error", e); +- } +- +- XHTMLContentHandler xhtml = new XHTMLContentHandler(handler, metadata); +- xhtml.startDocument(); +- xhtml.endDocument(); +- } +- +- protected void unravelStringMet(NetcdfFile ncFile, Group group, Metadata met) { +- if (group == null) { +- group = ncFile.getRootGroup(); +- } +- +- // get file type +- met.set("File-Type-Description", ncFile.getFileTypeDescription()); +- // unravel its string attrs +- for (Attribute attribute : group.getAttributes()) { +- if (attribute.isString()) { +- met.add(attribute.getFullName(), attribute.getStringValue()); +- } else { +- // try and cast its value to a string +- met.add(attribute.getFullName(), String.valueOf(attribute +- .getNumericValue())); +- } +- } +- +- for (Group g : group.getGroups()) { +- unravelStringMet(ncFile, g, met); +- } +- } +- +-} +diff --git a/tika-parsers/src/main/java/org/apache/tika/parser/netcdf/NetCDFParser.java b/tika-parsers/src/main/java/org/apache/tika/parser/netcdf/NetCDFParser.java +deleted file mode 100644 +index 57254f8..0000000 +--- a/tika-parsers/src/main/java/org/apache/tika/parser/netcdf/NetCDFParser.java ++++ /dev/null +@@ -1,144 +0,0 @@ +-/* +- * Licensed to the Apache Software Foundation (ASF) under one or more +- * contributor license agreements. See the NOTICE file distributed with +- * this work for additional information regarding copyright ownership. +- * The ASF licenses this file to You under the Apache License, Version 2.0 +- * (the "License"); you may not use this file except in compliance with +- * the License. You may obtain a copy of the License at +- * +- * http://www.apache.org/licenses/LICENSE-2.0 +- * +- * Unless required by applicable law or agreed to in writing, software +- * distributed under the License is distributed on an "AS IS" BASIS, +- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +- * See the License for the specific language governing permissions and +- * limitations under the License. +- */ +-package org.apache.tika.parser.netcdf; +- +-//JDK imports +- +-import java.io.IOException; +-import java.io.InputStream; +-import java.util.Collections; +-import java.util.Set; +-import java.util.List; +- +-import org.apache.tika.exception.TikaException; +-import org.apache.tika.io.TemporaryResources; +-import org.apache.tika.io.TikaInputStream; +-import org.apache.tika.metadata.Metadata; +-import org.apache.tika.metadata.Property; +-import org.apache.tika.metadata.TikaCoreProperties; +-import org.apache.tika.mime.MediaType; +-import org.apache.tika.parser.AbstractParser; +-import org.apache.tika.parser.ParseContext; +-import org.apache.tika.parser.Parser; +-import org.apache.tika.sax.XHTMLContentHandler; +-import org.xml.sax.ContentHandler; +-import org.xml.sax.SAXException; +- +-import ucar.nc2.Attribute; +-import ucar.nc2.NetcdfFile; +-import ucar.nc2.Variable; +-import ucar.nc2.Dimension; +- +-/** +- * A {@link Parser} for <a +- * href="http://www.unidata.ucar.edu/software/netcdf/index.html">NetCDF</a> +- * files using the UCAR, MIT-licensed <a +- * href="http://www.unidata.ucar.edu/software/netcdf-java/">NetCDF for Java</a> +- * API. +- */ +-public class NetCDFParser extends AbstractParser { +- +- /** +- * Serial version UID +- */ +- private static final long serialVersionUID = -5940938274907708665L; +- +- private final Set<MediaType> SUPPORTED_TYPES = +- Collections.singleton(MediaType.application("x-netcdf")); +- +- /* +- * (non-Javadoc) +- * +- * @see +- * org.apache.tika.parser.Parser#getSupportedTypes(org.apache.tika.parser +- * .ParseContext) +- */ +- public Set<MediaType> getSupportedTypes(ParseContext context) { +- return SUPPORTED_TYPES; +- } +- +- /* +- * (non-Javadoc) +- * +- * @see org.apache.tika.parser.Parser#parse(java.io.InputStream, +- * org.xml.sax.ContentHandler, org.apache.tika.metadata.Metadata, +- * org.apache.tika.parser.ParseContext) +- */ +- public void parse(InputStream stream, ContentHandler handler, +- Metadata metadata, ParseContext context) throws IOException, +- SAXException, TikaException { +- +- TikaInputStream tis = TikaInputStream.get(stream, new TemporaryResources()); +- try { +- NetcdfFile ncFile = NetcdfFile.open(tis.getFile().getAbsolutePath()); +- metadata.set("File-Type-Description", ncFile.getFileTypeDescription()); +- // first parse out the set of global attributes +- for (Attribute attr : ncFile.getGlobalAttributes()) { +- Property property = resolveMetadataKey(attr.getFullName()); +- if (attr.getDataType().isString()) { +- metadata.add(property, attr.getStringValue()); +- } else if (attr.getDataType().isNumeric()) { +- int value = attr.getNumericValue().intValue(); +- metadata.add(property, String.valueOf(value)); +- } +- } +- +- +- XHTMLContentHandler xhtml = new XHTMLContentHandler(handler, metadata); +- xhtml.startDocument(); +- xhtml.newline(); +- xhtml.element("h1", "dimensions"); +- xhtml.startElement("ul"); +- xhtml.newline(); +- for (Dimension dim : ncFile.getDimensions()) { +- xhtml.element("li", dim.getFullName() + " = " + dim.getLength()); +- } +- xhtml.endElement("ul"); +- +- xhtml.element("h1", "variables"); +- xhtml.startElement("ul"); +- xhtml.newline(); +- for (Variable var : ncFile.getVariables()) { +- xhtml.startElement("li"); +- xhtml.characters(var.getDataType() + " " + var.getNameAndDimensions()); +- xhtml.newline(); +- List<Attribute> attributes = var.getAttributes(); +- if (!attributes.isEmpty()) { +- xhtml.startElement("ul"); +- for (Attribute element : attributes) { +- xhtml.element("li", element.toString()); +- } +- xhtml.endElement("ul"); +- } +- xhtml.endElement("li"); +- } +- xhtml.endElement("ul"); +- +- xhtml.endDocument(); +- +- } catch (IOException e) { +- throw new TikaException("NetCDF parse error", e); +- } +- } +- +- private Property resolveMetadataKey(String localName) { +- if ("title".equals(localName)) { +- return TikaCoreProperties.TITLE; +- } +- return Property.internalText(localName); +- } +-} +\ No newline at end of file diff --git a/debian/patches/optional-parser-dependencies.patch b/debian/patches/optional-parser-dependencies.patch new file mode 100644 index 0000000..5d933bc --- /dev/null +++ b/debian/patches/optional-parser-dependencies.patch @@ -0,0 +1,303 @@ +From: Markus Koschany <[email protected]> +Date: Mon, 30 Nov 2015 16:08:14 +0000 +Subject: optional parser dependencies + +--- + tika-parsers/pom.xml | 44 ++++++++++++++++++++++++++++++++++++++++++++ + 1 file changed, 44 insertions(+) + +diff --git a/tika-parsers/pom.xml b/tika-parsers/pom.xml +index c0f673f..6872101 100644 +--- a/tika-parsers/pom.xml ++++ b/tika-parsers/pom.xml +@@ -76,16 +76,19 @@ + <groupId>org.gagravarr</groupId> + <artifactId>vorbis-java-tika</artifactId> + <version>${vorbis.version}</version> ++ <optional>true</optional> + </dependency> + <dependency> + <groupId>com.healthmarketscience.jackcess</groupId> + <artifactId>jackcess</artifactId> + <version>2.1.2</version> ++ <optional>true</optional> + </dependency> + <dependency> + <groupId>com.healthmarketscience.jackcess</groupId> + <artifactId>jackcess-encrypt</artifactId> + <version>2.1.1</version> ++ <optional>true</optional> + </dependency> + + <!-- Optional OSGi dependencies, used only when running within OSGi --> +@@ -93,6 +96,7 @@ + <groupId>org.apache.felix</groupId> + <artifactId>org.apache.felix.scr.annotations</artifactId> + <scope>provided</scope> ++ <optional>true</optional> + </dependency> + + <!-- Upstream parser libraries --> +@@ -100,37 +104,44 @@ + <groupId>net.sourceforge.jmatio</groupId> + <artifactId>jmatio</artifactId> + <version>1.0</version> ++ <optional>true</optional> + </dependency> + <dependency> + <groupId>org.apache.james</groupId> + <artifactId>apache-mime4j-core</artifactId> + <version>${mime4j.version}</version> ++ <optional>true</optional> + </dependency> + <dependency> + <groupId>org.apache.james</groupId> + <artifactId>apache-mime4j-dom</artifactId> + <version>${mime4j.version}</version> ++ <optional>true</optional> + </dependency> + <dependency> + <groupId>org.apache.commons</groupId> + <artifactId>commons-compress</artifactId> + <version>${commons.compress.version}</version> ++ <optional>true</optional> + </dependency> + <dependency> + <groupId>org.tukaani</groupId> + <artifactId>xz</artifactId> + <version>${tukaani.version}</version> ++ <optional>true</optional> + </dependency> + + <dependency> + <groupId>commons-codec</groupId> + <artifactId>commons-codec</artifactId> + <version>${codec.version}</version> ++ <optional>true</optional> + </dependency> + <dependency> + <groupId>org.apache.pdfbox</groupId> + <artifactId>pdfbox</artifactId> + <version>${pdfbox.version}</version> ++ <optional>true</optional> + </dependency> + <!-- TIKA-370: PDFBox declares the Bouncy Castle dependencies + as optional, but we prefer to have them always to avoid +@@ -139,26 +150,31 @@ + <groupId>org.bouncycastle</groupId> + <artifactId>bcmail-jdk15on</artifactId> + <version>1.52</version> ++ <optional>true</optional> + </dependency> + <dependency> + <groupId>org.bouncycastle</groupId> + <artifactId>bcprov-jdk15on</artifactId> + <version>1.52</version> ++ <optional>true</optional> + </dependency> + <dependency> + <groupId>org.apache.poi</groupId> + <artifactId>poi</artifactId> + <version>${poi.version}</version> ++ <optional>true</optional> + </dependency> + <dependency> + <groupId>org.apache.poi</groupId> + <artifactId>poi-scratchpad</artifactId> + <version>${poi.version}</version> ++ <optional>true</optional> + </dependency> + <dependency> + <groupId>org.apache.poi</groupId> + <artifactId>poi-ooxml</artifactId> + <version>${poi.version}</version> ++ <optional>true</optional> + <exclusions> + <exclusion> + <groupId>stax</groupId> +@@ -174,61 +190,73 @@ + <groupId>org.ccil.cowan.tagsoup</groupId> + <artifactId>tagsoup</artifactId> + <version>1.2.1</version> ++ <optional>true</optional> + </dependency> + <dependency> + <groupId>org.ow2.asm</groupId> + <artifactId>asm</artifactId> + <version>5.0.4</version> ++ <optional>true</optional> + </dependency> + <dependency> + <groupId>com.googlecode.mp4parser</groupId> + <artifactId>isoparser</artifactId> + <version>1.0.2</version> ++ <optional>true</optional> + </dependency> + <dependency> + <groupId>com.drewnoakes</groupId> + <artifactId>metadata-extractor</artifactId> + <version>2.8.0</version> ++ <optional>true</optional> + </dependency> + <dependency> + <groupId>de.l3s.boilerpipe</groupId> + <artifactId>boilerpipe</artifactId> + <version>1.1.0</version> ++ <optional>true</optional> + </dependency> + <dependency> + <groupId>rome</groupId> + <artifactId>rome</artifactId> + <version>1.0</version> ++ <optional>true</optional> + </dependency> + <dependency> + <groupId>org.gagravarr</groupId> + <artifactId>vorbis-java-core</artifactId> + <version>${vorbis.version}</version> ++ <optional>true</optional> + </dependency> + <dependency> + <groupId>com.googlecode.juniversalchardet</groupId> + <artifactId>juniversalchardet</artifactId> + <version>1.0.3</version> ++ <optional>true</optional> + </dependency> + <dependency> + <groupId>org.codelibs</groupId> + <artifactId>jhighlight</artifactId> + <version>1.0.2</version> ++ <optional>true</optional> + </dependency> + <dependency> + <groupId>com.pff</groupId> + <artifactId>java-libpst</artifactId> + <version>0.8.1</version> ++ <optional>true</optional> + </dependency> + <dependency> + <groupId>com.github.junrar</groupId> + <artifactId>junrar</artifactId> + <version>0.7</version> ++ <optional>true</optional> + </dependency> + <dependency> + <groupId>org.apache.cxf</groupId> + <artifactId>cxf-rt-rs-client</artifactId> + <version>${cxf.version}</version> ++ <optional>true</optional> + </dependency> + + +@@ -238,30 +266,35 @@ + <artifactId>sqlite-jdbc</artifactId> + <version>3.8.10.1</version> + <scope>provided</scope> ++ <optional>true</optional> + </dependency> + + <dependency> + <groupId>org.apache.opennlp</groupId> + <artifactId>opennlp-tools</artifactId> + <version>1.5.3</version> ++ <optional>true</optional> + </dependency> + + <dependency> + <groupId>commons-io</groupId> + <artifactId>commons-io</artifactId> + <version>${commons.io.version}</version> ++ <optional>true</optional> + </dependency> + + <dependency> + <groupId>org.apache.commons</groupId> + <artifactId>commons-exec</artifactId> + <version>1.3</version> ++ <optional>true</optional> + </dependency> + + <dependency> + <groupId>com.googlecode.json-simple</groupId> + <artifactId>json-simple</artifactId> + <version>1.1.1</version> ++ <optional>true</optional> + <exclusions> + <exclusion> + <groupId>junit</groupId> +@@ -274,6 +307,7 @@ + <groupId>org.json</groupId> + <artifactId>json</artifactId> + <version>20140107</version> ++ <optional>true</optional> + </dependency> + + +@@ -299,16 +333,19 @@ + <groupId>edu.ucar</groupId> + <artifactId>netcdf4</artifactId> + <version>${netcdf-java.version}</version> ++ <optional>true</optional> + </dependency> + <dependency> + <groupId>edu.ucar</groupId> + <artifactId>grib</artifactId> + <version>${netcdf-java.version}</version> ++ <optional>true</optional> + </dependency> + <dependency> + <groupId>edu.ucar</groupId> + <artifactId>cdm</artifactId> + <version>${netcdf-java.version}</version> ++ <optional>true</optional> + <exclusions> + <exclusion> + <groupId>org.slf4j</groupId> +@@ -320,33 +357,39 @@ + <groupId>edu.ucar</groupId> + <artifactId>httpservices</artifactId> + <version>${netcdf-java.version}</version> ++ <optional>true</optional> + </dependency> + <!-- Apache Commons CSV --> + <dependency> + <groupId>org.apache.commons</groupId> + <artifactId>commons-csv</artifactId> + <version>1.0</version> ++ <optional>true</optional> + </dependency> + + <dependency> + <groupId>org.apache.sis.core</groupId> + <artifactId>sis-utility</artifactId> + <version>0.5</version> ++ <optional>true</optional> + </dependency> + <dependency> + <groupId>org.apache.sis.storage</groupId> + <artifactId>sis-netcdf</artifactId> + <version>0.5</version> ++ <optional>true</optional> + </dependency> + <dependency> + <groupId>org.apache.sis.core</groupId> + <artifactId>sis-metadata</artifactId> + <version>0.5</version> ++ <optional>true</optional> + </dependency> + <dependency> + <groupId>org.opengis</groupId> + <artifactId>geoapi</artifactId> + <version>3.0.0</version> ++ <optional>true</optional> + </dependency> + <!-- Apache cTAKES --> + <dependency> +@@ -354,6 +397,7 @@ + <artifactId>ctakes-core</artifactId> + <version>3.2.2</version> + <scope>provided</scope> ++ <optional>true</optional> + </dependency> + </dependencies> + diff --git a/debian/patches/osgi.patch b/debian/patches/osgi.patch new file mode 100644 index 0000000..15a86d9 --- /dev/null +++ b/debian/patches/osgi.patch @@ -0,0 +1,31 @@ +From: Markus Koschany <[email protected]> +Date: Mon, 30 Nov 2015 15:55:24 +0000 +Subject: osgi + +--- + .../src/main/java/org/apache/tika/parser/internal/Activator.java | 8 ++++---- + 1 file changed, 4 insertions(+), 4 deletions(-) + +diff --git a/tika-parsers/src/main/java/org/apache/tika/parser/internal/Activator.java b/tika-parsers/src/main/java/org/apache/tika/parser/internal/Activator.java +index a884d3a..f3324b8 100644 +--- a/tika-parsers/src/main/java/org/apache/tika/parser/internal/Activator.java ++++ b/tika-parsers/src/main/java/org/apache/tika/parser/internal/Activator.java +@@ -35,14 +35,14 @@ public class Activator implements BundleActivator { + @Override + public void start(BundleContext context) throws Exception { + detectorService = context.registerService( +- Detector.class.getName(), ++ Detector.class, + new DefaultDetector(Activator.class.getClassLoader()), +- new Properties()); ++ new java.util.Hashtable<String,String>()); + Parser parser = new DefaultParser(Activator.class.getClassLoader()); + parserService = context.registerService( +- Parser.class.getName(), ++ Parser.class, + parser, +- new Properties()); ++ new java.util.Hashtable<String,String>()); + } + + @Override diff --git a/debian/patches/series b/debian/patches/series index da693f0..9c90618 100644 --- a/debian/patches/series +++ b/debian/patches/series @@ -1 +1,5 @@ 01-jar-packaging.patch +MP4Parser.patch +netcdf.patch +osgi.patch +optional-parser-dependencies.patch -- Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/pkg-java/tika.git _______________________________________________ pkg-java-commits mailing list [email protected] http://lists.alioth.debian.org/cgi-bin/mailman/listinfo/pkg-java-commits

