This is an automated email from the git hooks/post-receive script. ebourg-guest pushed a commit to branch master in repository tika.
commit 175f55179303d954c538188fca703c313738f9f8 Author: Emmanuel Bourg <[email protected]> Date: Wed Jun 10 00:33:23 2015 -1200 Enabled the jhighlight support --- debian/changelog | 6 ++ debian/control | 1 + debian/maven.ignoreRules | 1 - debian/patches/04-ignore-jhighlight.patch | 140 ------------------------------ debian/patches/series | 1 - 5 files changed, 7 insertions(+), 142 deletions(-) diff --git a/debian/changelog b/debian/changelog index ebbcfd3..8421fd5 100644 --- a/debian/changelog +++ b/debian/changelog @@ -1,3 +1,9 @@ +tika (1.5-3) UNRELEASED; urgency=medium + + * Enabled the jhighlight support + + -- Emmanuel Bourg <[email protected]> Wed, 10 Jun 2015 00:32:29 -1200 + tika (1.5-2) unstable; urgency=medium * Depend on libmetadata-extractor-java >= 2.7.2 diff --git a/debian/control b/debian/control index 7677830..99083c7 100644 --- a/debian/control +++ b/debian/control @@ -12,6 +12,7 @@ Build-Depends-Indep: bnd (>= 1.43.0), libboilerpipe-java, libcommons-compress-java, libjempbox-java, + libjhighlight-java, libjuniversalchardet-java, libmaven-bundle-plugin-java, libmetadata-extractor-java (>= 2.7.2-1~), diff --git a/debian/maven.ignoreRules b/debian/maven.ignoreRules index 4eca5ed..82c8f18 100644 --- a/debian/maven.ignoreRules +++ b/debian/maven.ignoreRules @@ -1,6 +1,5 @@ com.googlecode.mp4parser isoparser * * * * -com.uwyn jhighlight * * * * edu.ucar netcdf * * * * junit junit * * * * org.apache.felix maven-scr-plugin * * * * diff --git a/debian/patches/04-ignore-jhighlight.patch b/debian/patches/04-ignore-jhighlight.patch deleted file mode 100644 index cee0b85..0000000 --- a/debian/patches/04-ignore-jhighlight.patch +++ /dev/null @@ -1,140 +0,0 @@ -Description: Remove the classes using the jhighlight library which isn't in Debian yet -Author: Emmanuel Bourg <[email protected]> -Forwarded: not-needed - ---- a/tika-parsers/src/main/java/org/apache/tika/parser/code/SourceCodeParser.java -+++ /dev/null -@@ -1,133 +0,0 @@ --/* -- * Licensed to the Apache Software Foundation (ASF) under one or more -- * contributor license agreements. See the NOTICE file distributed with -- * this work for additional information regarding copyright ownership. -- * The ASF licenses this file to You under the Apache License, Version 2.0 -- * (the "License"); you may not use this file except in compliance with -- * the License. You may obtain a copy of the License at -- * -- * http://www.apache.org/licenses/LICENSE-2.0 -- * -- * Unless required by applicable law or agreed to in writing, software -- * distributed under the License is distributed on an "AS IS" BASIS, -- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -- * See the License for the specific language governing permissions and -- * limitations under the License. -- */ --package org.apache.tika.parser.code; -- --import static com.uwyn.jhighlight.renderer.XhtmlRendererFactory.CPP; --import static com.uwyn.jhighlight.renderer.XhtmlRendererFactory.GROOVY; --import static com.uwyn.jhighlight.renderer.XhtmlRendererFactory.JAVA; -- --import java.io.IOException; --import java.io.InputStream; --import java.nio.charset.Charset; --import java.util.HashMap; --import java.util.Map; --import java.util.Set; --import java.util.regex.Matcher; --import java.util.regex.Pattern; -- --import org.apache.tika.config.ServiceLoader; --import org.apache.tika.detect.AutoDetectReader; --import org.apache.tika.exception.TikaException; --import org.apache.tika.io.CloseShieldInputStream; --import org.apache.tika.metadata.Metadata; --import org.apache.tika.metadata.TikaCoreProperties; --import org.apache.tika.mime.MediaType; --import org.apache.tika.parser.ParseContext; --import org.apache.tika.parser.Parser; --import org.xml.sax.ContentHandler; --import org.xml.sax.SAXException; -- --import com.uwyn.jhighlight.renderer.Renderer; --import com.uwyn.jhighlight.renderer.XhtmlRendererFactory; --/** -- * Generic Source code parser for Java, Groovy, C++ -- * -- * @author Hong-Thai.Nguyen -- * @since 1.6 -- */ --public class SourceCodeParser implements Parser { -- -- private static final long serialVersionUID = -4543476498190054160L; -- -- private static final Pattern authorPattern = Pattern.compile("(?im)@author (.*) *$"); -- -- private static final Map<MediaType, String> TYPES_TO_RENDERER = new HashMap<MediaType, String>() { -- private static final long serialVersionUID = -741976157563751152L; -- { -- put(MediaType.text("x-c++src"), CPP); -- put(MediaType.text("x-java-source"), JAVA); -- put(MediaType.text("x-groovy"), GROOVY); -- } -- }; -- -- private static final ServiceLoader LOADER = new ServiceLoader(SourceCodeParser.class.getClassLoader()); -- -- @Override -- public Set<MediaType> getSupportedTypes(ParseContext context) { -- return TYPES_TO_RENDERER.keySet(); -- } -- -- @Override -- public void parse(InputStream stream, ContentHandler handler, Metadata metadata, ParseContext context) -- throws IOException, SAXException, TikaException { -- -- AutoDetectReader reader = new AutoDetectReader(new CloseShieldInputStream(stream), metadata, context.get(ServiceLoader.class, LOADER)); -- -- try { -- Charset charset = reader.getCharset(); -- String mediaType = metadata.get(Metadata.CONTENT_TYPE); -- String name = metadata.get(Metadata.RESOURCE_NAME_KEY); -- if (mediaType != null && name != null) { -- MediaType type = MediaType.parse(mediaType); -- metadata.set(Metadata.CONTENT_TYPE, type.toString()); -- metadata.set(Metadata.CONTENT_ENCODING, charset.name()); -- -- StringBuilder out = new StringBuilder(); -- String line; -- int nbLines = 0; -- while ((line = reader.readLine()) != null) { -- out.append(line); -- String author = parserAuthor(line); -- if (author != null) { -- metadata.add(TikaCoreProperties.CREATOR, author); -- } -- nbLines ++; -- } -- metadata.set("LoC", String.valueOf(nbLines)); -- -- Renderer renderer = getRenderer(type.toString()); -- String codeAsHtml = renderer.highlight(name, out.toString(), charset.name(), false); -- char[] charArray = codeAsHtml.toCharArray(); -- handler.startDocument(); -- handler.characters(charArray, 0, charArray.length); -- handler.endDocument(); -- } -- } finally { -- reader.close(); -- } -- -- } -- -- private Renderer getRenderer(String mimeType) { -- MediaType mt = MediaType.parse(mimeType); -- String type = TYPES_TO_RENDERER.get(mt); -- if (type == null) { -- throw new RuntimeException("unparseable content type " + mimeType); -- } -- return XhtmlRendererFactory.getRenderer(type); -- } -- -- -- private String parserAuthor(String line) { -- Matcher m = authorPattern.matcher(line); -- if (m.find()) { -- return m.group(1).trim(); -- } -- -- return null; -- } --} diff --git a/debian/patches/series b/debian/patches/series index be6ada8..9c6136e 100644 --- a/debian/patches/series +++ b/debian/patches/series @@ -1,7 +1,6 @@ 01-jar-packaging.patch 02-ignore-mp4parser.patch 03-ignore-netcdf.patch -04-ignore-jhighlight.patch 05-osgi-compatibility.patch 06-optional-parser-dependencies.patch 07-metadata-extractor-2.7-compatibility.patch -- Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/pkg-java/tika.git _______________________________________________ pkg-java-commits mailing list [email protected] http://lists.alioth.debian.org/cgi-bin/mailman/listinfo/pkg-java-commits

