This is an automated email from the ASF dual-hosted git repository. tallison pushed a commit to branch branch_1x in repository https://gitbox.apache.org/repos/asf/tika.git
commit ae21558ad30a64480fdcf935495ae5c9389fc57a Author: tallison <[email protected]> AuthorDate: Fri Jul 17 11:55:32 2020 -0400 fix merge conflicts --- .../main/java/org/apache/tika/config/Param.java | 89 ++++++++++++++++++---- .../java/org/apache/tika/config/TikaConfig.java | 31 +++++--- .../metadata/filter/ClearByMimeMetadataFilter.java | 8 +- .../filter/ExcludeFieldMetadataFilter.java | 15 ++-- .../filter/IncludeFieldMetadataFilter.java | 8 +- .../java/org/apache/tika/config/ParamTest.java | 7 ++ .../tika/parser/ParameterizedParserTest.java | 3 +- .../org/apache/tika/config/TIKA-3137-exclude.xml | 5 +- .../apache/tika/config/TIKA-3137-include-uc.xml | 5 +- .../org/apache/tika/config/TIKA-3137-include.xml | 5 +- .../org/apache/tika/config/TIKA-3137-mimes-uc.xml | 5 +- .../org/apache/tika/parser/TIKA-3137-include.xml | 5 +- 12 files changed, 137 insertions(+), 49 deletions(-) diff --git a/tika-core/src/main/java/org/apache/tika/config/Param.java b/tika-core/src/main/java/org/apache/tika/config/Param.java index 112955b..73e2bd9 100644 --- a/tika-core/src/main/java/org/apache/tika/config/Param.java +++ b/tika-core/src/main/java/org/apache/tika/config/Param.java @@ -21,6 +21,7 @@ import org.apache.tika.utils.XMLReaderUtils; import org.w3c.dom.Document; import org.w3c.dom.Element; import org.w3c.dom.Node; +import org.w3c.dom.NodeList; import org.xml.sax.SAXException; import javax.xml.parsers.DocumentBuilder; @@ -39,7 +40,9 @@ import java.lang.reflect.InvocationTargetException; import java.math.BigInteger; import java.net.URI; import java.net.URL; +import java.util.ArrayList; import java.util.HashMap; +import java.util.List; import java.util.Map; @@ -51,8 +54,10 @@ import java.util.Map; */ public class Param<T> implements Serializable { + private static final String LIST = "list"; private static final Map<Class<?>, String> map = new HashMap<>(); private static final Map<String, Class<?>> reverseMap = new HashMap<>(); + private static final Map<String, Class<?>> wellKnownMap = new HashMap<>(); static { map.put(Boolean.class, "bool"); @@ -67,26 +72,36 @@ public class Param<T> implements Serializable { map.put(File.class, "file"); map.put(URI.class, "uri"); map.put(URL.class, "url"); + map.put(ArrayList.class, LIST); for (Map.Entry<Class<?>, String> entry : map.entrySet()) { reverseMap.put(entry.getValue(), entry.getKey()); } + //wellKnownMap.put("metadataPolicy", AbstractMultipleParser.MetadataPolicy.class); } private Class<T> type; private String name; - private String value; + private List<String> valueStrings = new ArrayList<>(); private T actualValue; public Param(){ } - public Param(String name, Class<T> type, T value){ + public Param(String name, Class<T> type, T value) { this.name = name; this.type = type; - this.value = value.toString(); + this.actualValue = value; + if (List.class.isAssignableFrom(value.getClass())) { + this.valueStrings.addAll((List)value); + } else { + this.valueStrings.add(value.toString()); + } + if (this.type == null) { + this.type = (Class<T>)wellKnownMap.get(name); + } } public Param(String name, T value){ @@ -113,6 +128,9 @@ public class Param<T> implements Serializable { if (type == null) { return null; } + if (List.class.isAssignableFrom(type)) { + return LIST; + } if (map.containsKey(type)){ return map.get(type); } @@ -129,9 +147,6 @@ public class Param<T> implements Serializable { } public T getValue(){ - if (actualValue == null) { - actualValue = getTypedValue(type, value); - } return actualValue; } @@ -139,7 +154,7 @@ public class Param<T> implements Serializable { public String toString() { return "Param{" + "name='" + name + '\'' + - ", value='" + value + '\'' + + ", valueStrings='" + valueStrings + '\'' + ", actualValue=" + actualValue + '}'; } @@ -152,13 +167,13 @@ public class Param<T> implements Serializable { Element paramEl = doc.createElement("param"); doc.appendChild(paramEl); - save(paramEl); + save(doc, paramEl); Transformer transformer = XMLReaderUtils.getTransformer(); transformer.transform(new DOMSource(paramEl), new StreamResult(stream)); } - public void save(Node node) { + public void save(Document doc, Node node) { if ( !(node instanceof Element) ) { throw new IllegalArgumentException("Not an Element : " + node); @@ -168,7 +183,17 @@ public class Param<T> implements Serializable { el.setAttribute("name", getName()); el.setAttribute("type", getTypeString()); - el.setTextContent(value); + if (List.class.isAssignableFrom(actualValue.getClass())) { + for (int i = 0; i < valueStrings.size(); i++) { + String val = valueStrings.get(i); + String typeString = map.get(((List)actualValue).get(i).getClass()); + Node item = doc.createElement(typeString); + item.setTextContent(val); + el.appendChild(item); + } + } else { + el.setTextContent(valueStrings.get(0)); + } } public static <T> Param<T> load(InputStream stream) throws SAXException, IOException, TikaException { @@ -179,20 +204,49 @@ public class Param<T> implements Serializable { return load(document.getFirstChild()); } - public static <T> Param<T> load(Node node) { + public static <T> Param<T> load(Node node) { Node nameAttr = node.getAttributes().getNamedItem("name"); Node typeAttr = node.getAttributes().getNamedItem("type"); + Node valueAttr = node.getAttributes().getNamedItem("value"); Node value = node.getFirstChild(); + if (value instanceof NodeList && valueAttr != null) { + throw new IllegalArgumentException("can't specify a value attr _and_ a node list"); + } + if (valueAttr != null && (value == null || value.getTextContent() == null)) { + value = valueAttr; + } Param<T> ret = new Param<T>(); ret.name = nameAttr.getTextContent(); - ret.setTypeString(typeAttr.getTextContent()); - ret.value = value.getTextContent(); - + if (typeAttr != null) { + ret.setTypeString(typeAttr.getTextContent()); + } else { + ret.type = (Class<T>)wellKnownMap.get(ret.name); + } + + if (List.class.isAssignableFrom(ret.type)) { + loadList(ret, node); + } else { + ret.actualValue = getTypedValue(ret.type, value.getTextContent()); + ret.valueStrings.add(value.getTextContent()); + } return ret; } - + + private static <T> void loadList(Param<T> ret, Node root) { + Node child = root.getFirstChild(); + ret.actualValue = (T)new ArrayList<>(); + while (child != null) { + if (child.getNodeType() == Node.ELEMENT_NODE) { + Class type = classFromType(child.getLocalName()); + ((List) ret.actualValue).add(getTypedValue(type, child.getTextContent())); + ret.valueStrings.add(child.getTextContent()); + } + child = child.getNextSibling(); + } + } + private static <T> Class<T> classFromType(String type) { if (reverseMap.containsKey(type)){ return (Class<T>) reverseMap.get(type); @@ -205,6 +259,11 @@ public class Param<T> implements Serializable { private static <T> T getTypedValue(Class<T> type, String value) { try { + if (type.isEnum()) { + Object val = Enum.valueOf((Class)type, value); + return (T)val; + } + Constructor<T> constructor = type.getConstructor(String.class); constructor.setAccessible(true); return constructor.newInstance(value); diff --git a/tika-core/src/main/java/org/apache/tika/config/TikaConfig.java b/tika-core/src/main/java/org/apache/tika/config/TikaConfig.java index a0cc102..18b3add 100644 --- a/tika-core/src/main/java/org/apache/tika/config/TikaConfig.java +++ b/tika-core/src/main/java/org/apache/tika/config/TikaConfig.java @@ -573,8 +573,8 @@ public class TikaConfig { abstract CT createDefault(MimeTypes mimeTypes, ServiceLoader loader); abstract CT createComposite(List<T> loaded, MimeTypes mimeTypes, ServiceLoader loader); abstract T createComposite(Class<? extends T> compositeClass, - List<T> children, Set<Class<? extends T>> excludeChildren, - MimeTypes mimeTypes, ServiceLoader loader) + List<T> children, Set<Class<? extends T>> excludeChildren, + Map<String, Param> params, MimeTypes mimeTypes, ServiceLoader loader) throws InvocationTargetException, IllegalAccessException, InstantiationException; abstract T decorate(T created, Element element) throws IOException, TikaException; // eg explicit mime types @@ -632,6 +632,14 @@ public class TikaConfig { loaded = preLoadOne(loadedClass, name, mimeTypes); if (loaded != null) return loaded; + // Get any parameters / settings for the parser + Map<String, Param> params = null; + try { + params = getParams(element); + } catch (Exception e) { + throw new TikaConfigException(e.getMessage(), e); + } + // Is this a composite or decorated class? If so, support recursion if (isComposite(loadedClass)) { // Get the child objects for it @@ -657,7 +665,7 @@ public class TikaConfig { } // Create the Composite - loaded = createComposite(loadedClass, children, excludeChildren, mimeTypes, loader); + loaded = createComposite(loadedClass, children, excludeChildren, params, mimeTypes, loader); // Default constructor fallback if (loaded == null) { @@ -670,7 +678,6 @@ public class TikaConfig { // See the thread "Configuring parsers and translators" for details } - Map<String, Param> params = getParams(element); //Assigning the params to bean fields/setters AnnotationUtils.assignFieldParams(loaded, params); if (loaded instanceof Initializable) { @@ -791,7 +798,7 @@ public class TikaConfig { @Override Parser createComposite(Class<? extends Parser> parserClass, List<Parser> childParsers, Set<Class<? extends Parser>> excludeParsers, - MimeTypes mimeTypes, ServiceLoader loader) + Map<String, Param> params, MimeTypes mimeTypes, ServiceLoader loader) throws InvocationTargetException, IllegalAccessException, InstantiationException { Parser parser = null; Constructor<? extends Parser> c = null; @@ -821,6 +828,12 @@ public class TikaConfig { } if (parser == null) { try { + c = parserClass.getConstructor(MediaTypeRegistry.class, Collection.class, Map.class); + parser = c.newInstance(registry, childParsers, params); + } catch (NoSuchMethodException me) {} + } + if (parser == null) { + try { c = parserClass.getConstructor(MediaTypeRegistry.class, List.class); parser = c.newInstance(registry, childParsers); } catch (NoSuchMethodException me) {} @@ -914,7 +927,7 @@ public class TikaConfig { Detector createComposite(Class<? extends Detector> detectorClass, List<Detector> childDetectors, Set<Class<? extends Detector>> excludeDetectors, - MimeTypes mimeTypes, ServiceLoader loader) + Map<String, Param> params, MimeTypes mimeTypes, ServiceLoader loader) throws InvocationTargetException, IllegalAccessException, InstantiationException { Detector detector = null; @@ -987,7 +1000,7 @@ public class TikaConfig { Translator createComposite(Class<? extends Translator> compositeClass, List<Translator> children, Set<Class<? extends Translator>> excludeChildren, - MimeTypes mimeTypes, ServiceLoader loader) + Map<String, Param> params, MimeTypes mimeTypes, ServiceLoader loader) throws InvocationTargetException, IllegalAccessException, InstantiationException { throw new InstantiationException("Only one translator supported"); @@ -1004,7 +1017,7 @@ public class TikaConfig { Class<? extends ConfigurableThreadPoolExecutor> compositeClass, List<ConfigurableThreadPoolExecutor> children, Set<Class<? extends ConfigurableThreadPoolExecutor>> excludeChildren, - MimeTypes mimeTypes, ServiceLoader loader) + Map<String, Param> params, MimeTypes mimeTypes, ServiceLoader loader) throws InvocationTargetException, IllegalAccessException, InstantiationException { throw new InstantiationException("Only one executor service supported"); @@ -1127,7 +1140,7 @@ public class TikaConfig { EncodingDetector createComposite(Class<? extends EncodingDetector> encodingDetectorClass, List<EncodingDetector> childEncodingDetectors, Set<Class<? extends EncodingDetector>> excludeDetectors, - MimeTypes mimeTypes, ServiceLoader loader) + Map<String, Param> params, MimeTypes mimeTypes, ServiceLoader loader) throws InvocationTargetException, IllegalAccessException, InstantiationException { EncodingDetector encodingDetector = null; diff --git a/tika-core/src/main/java/org/apache/tika/metadata/filter/ClearByMimeMetadataFilter.java b/tika-core/src/main/java/org/apache/tika/metadata/filter/ClearByMimeMetadataFilter.java index 05324f2..80c3c86 100644 --- a/tika-core/src/main/java/org/apache/tika/metadata/filter/ClearByMimeMetadataFilter.java +++ b/tika-core/src/main/java/org/apache/tika/metadata/filter/ClearByMimeMetadataFilter.java @@ -63,12 +63,10 @@ public class ClearByMimeMetadataFilter implements MetadataFilter { /** * - * @param mimesString comma-delimited list of mimes that will trigger complete removal of metadata + * @param mimes list of mimes that will trigger complete removal of metadata */ @Field - public void setMimes(String mimesString) { - for (String include : mimesString.split(",")) { - mimes.add(include); - } + public void setMimes(List<String> mimes) { + this.mimes.addAll(mimes); } } diff --git a/tika-core/src/main/java/org/apache/tika/metadata/filter/ExcludeFieldMetadataFilter.java b/tika-core/src/main/java/org/apache/tika/metadata/filter/ExcludeFieldMetadataFilter.java index 3b6e2a0..71dc55b 100644 --- a/tika-core/src/main/java/org/apache/tika/metadata/filter/ExcludeFieldMetadataFilter.java +++ b/tika-core/src/main/java/org/apache/tika/metadata/filter/ExcludeFieldMetadataFilter.java @@ -21,33 +21,32 @@ import org.apache.tika.exception.TikaException; import org.apache.tika.metadata.Metadata; import java.util.HashSet; +import java.util.List; import java.util.Set; public class ExcludeFieldMetadataFilter implements MetadataFilter { - private final Set<String> exclude; + private final Set<String> excludeSet; public ExcludeFieldMetadataFilter() { this(new HashSet<>()); } public ExcludeFieldMetadataFilter(Set<String> exclude) { - this.exclude = exclude; + this.excludeSet = exclude; } @Override public void filter(Metadata metadata) throws TikaException { - for (String field : exclude) { + for (String field : excludeSet) { metadata.remove(field); } } /** * - * @param excludeString comma-delimited list of fields to exclude + * @param exclude list of fields to exclude */ @Field - public void setExclude(String excludeString) { - for (String include : excludeString.split(",")) { - exclude.add(include); - } + public void setExclude(List<String> exclude) { + this.excludeSet.addAll(exclude); } } diff --git a/tika-core/src/main/java/org/apache/tika/metadata/filter/IncludeFieldMetadataFilter.java b/tika-core/src/main/java/org/apache/tika/metadata/filter/IncludeFieldMetadataFilter.java index 4bc6c9e..d518ce5 100644 --- a/tika-core/src/main/java/org/apache/tika/metadata/filter/IncludeFieldMetadataFilter.java +++ b/tika-core/src/main/java/org/apache/tika/metadata/filter/IncludeFieldMetadataFilter.java @@ -37,13 +37,11 @@ public class IncludeFieldMetadataFilter implements MetadataFilter { /** * - * @param includeString comma-delimited list of fields to include + * @param include comma-delimited list of fields to include */ @Field - public void setInclude(String includeString) { - for (String include : includeString.split(",")) { - includeSet.add(include); - } + public void setInclude(List<String> include) { + includeSet.addAll(include); } @Override diff --git a/tika-core/src/test/java/org/apache/tika/config/ParamTest.java b/tika-core/src/test/java/org/apache/tika/config/ParamTest.java index 7c9007e..416cd4a 100644 --- a/tika-core/src/test/java/org/apache/tika/config/ParamTest.java +++ b/tika-core/src/test/java/org/apache/tika/config/ParamTest.java @@ -24,8 +24,10 @@ import java.io.File; import java.math.BigInteger; import java.net.URI; import java.net.URL; +import java.util.ArrayList; import java.util.HashMap; import java.util.HashSet; +import java.util.List; import static org.junit.Assert.*; @@ -34,7 +36,12 @@ public class ParamTest { @Test public void testSaveAndLoad() throws Exception { + List<String> list = new ArrayList<>(); + list.add("quick"); + list.add("brown"); + list.add("fox"); Object objects [] = { + list, Integer.MAX_VALUE, 2.5f, 4000.57576, diff --git a/tika-core/src/test/java/org/apache/tika/parser/ParameterizedParserTest.java b/tika-core/src/test/java/org/apache/tika/parser/ParameterizedParserTest.java index a9c9a41..8b3b599 100644 --- a/tika-core/src/test/java/org/apache/tika/parser/ParameterizedParserTest.java +++ b/tika-core/src/test/java/org/apache/tika/parser/ParameterizedParserTest.java @@ -99,12 +99,11 @@ public class ParameterizedParserTest { @Test public void testBadType() throws Exception { - //TODO: should this be a TikaConfigException instead of Runtime? boolean ex = false; try { Metadata m = getMetadata("TIKA-1986-bad-types.xml"); fail("should have thrown exception"); - } catch (RuntimeException e) { + } catch (TikaConfigException e) { ex = true; } assertTrue("No RuntimeException", ex); diff --git a/tika-core/src/test/resources/org/apache/tika/config/TIKA-3137-exclude.xml b/tika-core/src/test/resources/org/apache/tika/config/TIKA-3137-exclude.xml index 27517f6..96dac44 100644 --- a/tika-core/src/test/resources/org/apache/tika/config/TIKA-3137-exclude.xml +++ b/tika-core/src/test/resources/org/apache/tika/config/TIKA-3137-exclude.xml @@ -19,7 +19,10 @@ <metadataFilters> <metadataFilter class="org.apache.tika.metadata.filter.ExcludeFieldMetadataFilter"> <params> - <param name="exclude" type="string">title,author</param> + <param name="exclude" type="list"> + <string>title</string> + <string>author</string> + </param> </params> </metadataFilter> </metadataFilters> diff --git a/tika-core/src/test/resources/org/apache/tika/config/TIKA-3137-include-uc.xml b/tika-core/src/test/resources/org/apache/tika/config/TIKA-3137-include-uc.xml index e0df476..f960e94 100644 --- a/tika-core/src/test/resources/org/apache/tika/config/TIKA-3137-include-uc.xml +++ b/tika-core/src/test/resources/org/apache/tika/config/TIKA-3137-include-uc.xml @@ -19,7 +19,10 @@ <metadataFilters> <metadataFilter class="org.apache.tika.metadata.filter.IncludeFieldMetadataFilter"> <params> - <param name="include" type="string">title,author</param> + <param name="include" type="list"> + <string>title</string> + <string>author</string> + </param> </params> </metadataFilter> <metadataFilter class="org.apache.tika.metadata.filter.MockUpperCaseFilter"/> diff --git a/tika-core/src/test/resources/org/apache/tika/config/TIKA-3137-include.xml b/tika-core/src/test/resources/org/apache/tika/config/TIKA-3137-include.xml index e92dff8..8832915 100644 --- a/tika-core/src/test/resources/org/apache/tika/config/TIKA-3137-include.xml +++ b/tika-core/src/test/resources/org/apache/tika/config/TIKA-3137-include.xml @@ -19,7 +19,10 @@ <metadataFilters> <metadataFilter class="org.apache.tika.metadata.filter.IncludeFieldMetadataFilter"> <params> - <param name="include" type="string">title,author</param> + <param name="include" type="list"> + <string>title</string> + <string>author</string> + </param> </params> </metadataFilter> </metadataFilters> diff --git a/tika-core/src/test/resources/org/apache/tika/config/TIKA-3137-mimes-uc.xml b/tika-core/src/test/resources/org/apache/tika/config/TIKA-3137-mimes-uc.xml index 486280c..a151665 100644 --- a/tika-core/src/test/resources/org/apache/tika/config/TIKA-3137-mimes-uc.xml +++ b/tika-core/src/test/resources/org/apache/tika/config/TIKA-3137-mimes-uc.xml @@ -19,7 +19,10 @@ <metadataFilters> <metadataFilter class="org.apache.tika.metadata.filter.ClearByMimeMetadataFilter"> <params> - <param name="mimes" type="string">image/jpeg,application/pdf</param> + <param name="mimes" type="list"> + <string>image/jpeg</string> + <string>application/pdf</string> + </param> </params> </metadataFilter> <metadataFilter class="org.apache.tika.metadata.filter.MockUpperCaseFilter"/> diff --git a/tika-parsers/src/test/resources/org/apache/tika/parser/TIKA-3137-include.xml b/tika-parsers/src/test/resources/org/apache/tika/parser/TIKA-3137-include.xml index 765bc11..aae2f43 100644 --- a/tika-parsers/src/test/resources/org/apache/tika/parser/TIKA-3137-include.xml +++ b/tika-parsers/src/test/resources/org/apache/tika/parser/TIKA-3137-include.xml @@ -24,7 +24,10 @@ </metadataFilter> <metadataFilter class="org.apache.tika.metadata.filter.ClearByMimeMetadataFilter"> <params> - <param name="mimes" type="string">image/emf,text/plain</param> + <param name="mimes" type="list"> + <string>image/emf</string> + <string>text/plain</string> + </param> </params> </metadataFilter> </metadataFilters>
