Claudenw commented on code in PR #233: URL: https://github.com/apache/creadur-rat/pull/233#discussion_r1566294449
########## src/site/apt/matcher_def.apt.vm: ########## @@ -0,0 +1,364 @@ +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +~~ Licensed to the Apache Software Foundation (ASF) under one or more +~~ contributor license agreements. See the NOTICE file distributed with +~~ this work for additional information regarding copyright ownership. +~~ The ASF licenses this file to You under the Apache License, Version 2.0 +~~ (the "License"); you may not use this file except in compliance with +~~ the License. You may obtain a copy of the License at +~~ +~~ http://www.apache.org/licenses/LICENSE-2.0 +~~ +~~ Unless required by applicable law or agreed to in writing, software +~~ distributed under the License is distributed on an "AS IS" BASIS, +~~ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +~~ See the License for the specific language governing permissions and +~~ limitations under the License. +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + + -------------------------- + How to define new Matchers + -------------------------- + +How to define matchers in Apache Rat + + Matchers in Apache Rat are paired with Builders. A Matcher must implement the "IHeaderMatcher" interface and its associated Builder must implement the IHeaderMatcher.Builder interface. + +* A simple example + +** The Matcher implementation + + For our example we will implement a Matcher that implements the phrase "Quality, speed and cost, pick any two” by looking for the occurrence of all three words anywhere in the header. + In most cases is it simplest to extend the AbstractHeaderMatcher class as this class will handle setting of the unique ID for instances that do not otherwise have a unique id. + ++------------------------------------------+ +public interface IHeaderMatcher extends Component { + /** + * Get the identifier for this matcher. + * <p> + * All matchers must have unique identifiers + * </p> + * + * @return the Identifier for this matcher. + */ + String getId(); + + /** + * Resets this state of this matcher to its initial state in preparation for + * use with another document scan. In most cases this method does not need to + * do anything. + */ + default void reset() { + // does nothing. + } + + /** + * Attempts to match text in the IHeaders instance. + * + * @param headers the representations of the headers to check + * @return {@code true} if the matcher matches the text, {@code false} otherwise. + */ + boolean matches(IHeaders headers); +} ++------------------------------------------+ + ++------------------------------------------+ +public abstract class AbstractHeaderMatcher implements IHeaderMatcher { + + @ConfigComponent(type = Component.Type.Parameter, desc = "The id of the matcher.") + private final String id; + + /** + * Constructs the IHeaderMatcher with an id value. If {@code id} is null then a + * unique random id is created. + * + * @param id the Id to use. + */ + protected AbstractHeaderMatcher(String id) { + this.id = StringUtils.isBlank(id) ? UUID.randomUUID().toString() : id; + } + + @Override + public String getId() { + return id; + } + + @Override + public String toString() { + return getId(); + } + + @Override + public Description getDescription() { + return DescriptionBuilder.build(this); + } +} ++------------------------------------------+ + + So lets start by creating our matcher class and implementing the match method. + ++------------------------------------------+ +package com.example.ratMatcher; + +import org.apache.rat.analysis.IHeaders; +import org.apache.rat.analysis.matchers.AbstractHeaderMatcher; +import org.apache.rat.config.parameters.Component; +import org.apache.rat.config.parameters.ConfigComponent; + +@ConfigComponent(type = Component.Type.Matcher, name = "QSC", desc = "Reports if the 'Quality, speed and cost, pick any two' rule is violated") +public class QSCMatcher extends AbstraactHeaderMatcher { + + public QSCMatcher(String id) { + super(id); + } + + @Override + public boolean matches(IHeaders headers) { + String text = headers.prune() + return text.contains("quality") && text.contains("speed") && text.contains("cost"); + } +} ++------------------------------------------+ + + In the above example we use the ConfigComponent annotation to identify that this is a Matcher component, that it has the name 'QSC' and a description of what it does. + If the "name" was not specified it would have been extracted from the class name by removing the "Matcher" from "QSCMatcher". + + The Constructor calls the AbstractHeaderMatcher with a null argument so tha the parent class will assign an id. + + The matcher uses the pruned text to check for the strings. Pruned text has only the letters and numbers from the text included and all characters are converted to + lower case. There is an issue with this matcher in that it would match the string: "The quality of Dennis Hopper's acting, as Keanu Reeves costar in 'Speed', is outstanding." + + the correction of that is left as an exercise for the reader. Hint: matching the pruned text can be a quick gating check for a set of more expensive regular expression checks. + +** The Matcher.Builder implementation + + The builder must implement the IHeaderMatcher.Builder interface. + ++------------------------------------------+ +@FunctionalInterface +interface Builder { + /** + * Build the IHeaderMatcher. + * + * Implementations of this interface should return a specific child class of IHeaderMatcher. + * + * @return a new IHeaderMatcher. + */ + IHeaderMatcher build(); + + /** + * Gets the class that is build by this builder. + * @return The class that is build by this builder. + */ + default Class<?> builtClass() throws SecurityException { + try { + return this.getClass().getMethod("build").getReturnType(); + } catch (NoSuchMethodException | SecurityException e) { + throw new IllegalStateException("the 'build' method of the Builder interface must alwasy be public"); + } + } + + /** + * Gets the Description for this builder. + * @return The description of the builder + */ + default Description getDescription() { + Class<?> clazz = builtClass(); + if (clazz == IHeaderMatcher.class) { + throw new ImplementationException(String.format( + "Class %s must implement built() method to return a child class of IHeaderMatcher", + this.getClass())); + } + return DescriptionBuilder.buildMap(clazz); + } +} ++------------------------------------------+ + + The work of handling the Id and some other tasks is handled by the AbstractBuilder. + ++------------------------------------------+ +public abstract class AbstractBuilder implements IHeaderMatcher.Builder { + + private String id; + + /** + * Protected empty constructor. + */ + protected AbstractBuilder() { + } + + /** + * Set the id for the matcher. + * @param id the id to use. + * @return this builder for chaining. + */ + public final AbstractBuilder setId(String id) { + this.id = id; + return this; + } + + /** + * @return {@code true} if the id is not null and not blank. + */ + public final boolean hasId() { + return !StringUtils.isBlank(id); + } + + /** + * @return the id as specified in the builder. + */ + protected String getId() { + return id; + } + + @Override + public String toString() { + return String.format("%s with id %s", this.getClass(), id); + } +} ++------------------------------------------+ + + So we have: + ++------------------------------------------+ +package com.example.ratMatcher; + +import org.apache.rat.configuration.builders.AbstractBuilder; + +public class QSCBuilder extends AbstractBuilder. { + QSCMatcher build() { + return new QSCMatcher(getId()); + } +} ++------------------------------------------+ + +** Registering the builder for use in XML configuration + + In order to use the matcher in a Rat configuration it has to be registered with the system. This can be done by creating file with the builder specified and passing it Review Comment: Currently we only have an XML configuration parser but the system is open to new implementations. I have updated the documentation to reflect XML configuration. -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: dev-unsubscr...@creadur.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org