This is an automated email from the ASF dual-hosted git repository. mawiesne pushed a commit to branch OPENNLP-1669-Improve-JavaDoc-of-QN-related-classes in repository https://gitbox.apache.org/repos/asf/opennlp.git
commit 100d52f2a7f3488e6d86a487708f99c535a18ede Author: Martin Wiesner <[email protected]> AuthorDate: Fri Dec 13 08:29:58 2024 +0100 OPENNLP-1669 Improve JavaDoc of QN related classes --- .../tools/ml/maxent/quasinewton/Function.java | 21 ++++ .../tools/ml/maxent/quasinewton/LineSearch.java | 140 +++++++++++---------- .../ml/maxent/quasinewton/NegLogLikelihood.java | 4 +- .../quasinewton/ParallelNegLogLikelihood.java | 9 +- .../tools/ml/maxent/quasinewton/QNMinimizer.java | 72 ++++++----- .../tools/ml/maxent/quasinewton/QNModel.java | 11 +- .../tools/ml/maxent/quasinewton/QNTrainer.java | 38 ++++-- .../java/opennlp/tools/ml/model/AbstractModel.java | 16 +-- .../ml/maxent/quasinewton/QNMinimizerTest.java | 7 +- 9 files changed, 193 insertions(+), 125 deletions(-) diff --git a/opennlp-tools/src/main/java/opennlp/tools/ml/maxent/quasinewton/Function.java b/opennlp-tools/src/main/java/opennlp/tools/ml/maxent/quasinewton/Function.java index 0aa596ab..1e3fc795 100644 --- a/opennlp-tools/src/main/java/opennlp/tools/ml/maxent/quasinewton/Function.java +++ b/opennlp-tools/src/main/java/opennlp/tools/ml/maxent/quasinewton/Function.java @@ -22,9 +22,30 @@ package opennlp.tools.ml.maxent.quasinewton; */ public interface Function { + /** + * @return Retrieves the dimension value. + */ int getDimension(); + /** + * Computes the function value for {@code x}. + * + * @param x The input vector. + * @return Returns the computed value for {@code x}. + * + * @apiNote The parameter {@code x} should be validated and if inconsistencies are encountered + * an appropriate exception should be raised, e.g. {@link IllegalArgumentException}. + */ double valueAt(double[] x); + /** + * Computes the gradient for {@code x}. + * + * @param x The input vector. + * @return Returns the computed gradient for {@code x}. + * + * @apiNote The parameter {@code x} should be validated and if inconsistencies are encountered + * an appropriate exception should be raised, e.g. {@link IllegalArgumentException}. + */ double[] gradientAt(double[] x); } diff --git a/opennlp-tools/src/main/java/opennlp/tools/ml/maxent/quasinewton/LineSearch.java b/opennlp-tools/src/main/java/opennlp/tools/ml/maxent/quasinewton/LineSearch.java index e66b18d0..2808612e 100644 --- a/opennlp-tools/src/main/java/opennlp/tools/ml/maxent/quasinewton/LineSearch.java +++ b/opennlp-tools/src/main/java/opennlp/tools/ml/maxent/quasinewton/LineSearch.java @@ -20,17 +20,25 @@ package opennlp.tools.ml.maxent.quasinewton; import opennlp.tools.ml.ArrayMath; /** - * Class that performs line search to find minimum. + * Performs line search to find a minimum. + * + * @see <a href="https://link.springer.com/book/10.1007/978-0-387-40065-5"> + * Nocedal & Wright 2006, Numerical Optimization</a>, p. 37) */ public class LineSearch { private static final double C = 0.0001; private static final double RHO = 0.5; // decrease of step size (must be from 0 to 1) /** - * Conducts a backtracking line search (see Nocedal & Wright 2006, Numerical Optimization, p. 37). + * Conducts a backtracking line search. + * + * @param function The {@link Function} to apply. + * @param direction The {@code double[]} representing the direction to search into. + * @param lsr The {@link LineSearchResult} to transport results in. + * @param initialStepSize The initial step size to apply. Must be greater than {@code 0}. */ - public static void doLineSearch(Function function, - double[] direction, LineSearchResult lsr, double initialStepSize) { + public static void doLineSearch(Function function, double[] direction, + LineSearchResult lsr, double initialStepSize) { double stepSize = initialStepSize; int currFctEvalCount = lsr.getFctEvalCount(); double[] x = lsr.getNextPoint(); @@ -79,6 +87,12 @@ public class LineSearch { /** * Conducts a constrained line search (see section 3.2 in the paper "Scalable Training * of L1-Regularized Log-Linear Models", Andrew et al. 2007) + * + * @param function The {@link Function} to apply. + * @param direction The {@code double[]} representing the direction to search into. + * @param lsr The {@link LineSearchResult} to transport results in. + * @param l1Cost The L1-regularization costs. Must be equal or greater than {@code 0}. + * @param initialStepSize The initial step size to apply. Must be greater than {@code 0}. */ public static void doConstrainedLineSearch(Function function, double[] direction, LineSearchResult lsr, double l1Cost, double initialStepSize) { @@ -146,7 +160,8 @@ public class LineSearch { // ------------------------------------------------------------------------------------- // /** - * Represents a LineSearch result. + * Represents a {@link LineSearch} result encapsulating the relevant data + * at a point in time during computation. */ public static class LineSearchResult { @@ -162,72 +177,46 @@ public class LineSearch { private double[] signVector; /** - * Constructor + * Initializes a {@link LineSearchResult} object with the specified parameters. */ - public LineSearchResult( - double stepSize, - double valueAtCurr, - double valueAtNext, - double[] gradAtCurr, - double[] gradAtNext, - double[] currPoint, - double[] nextPoint, - int fctEvalCount) + public LineSearchResult(double stepSize, double valueAtCurr, double valueAtNext, + double[] gradAtCurr, double[] gradAtNext, double[] currPoint, + double[] nextPoint, int fctEvalCount) { setAll(stepSize, valueAtCurr, valueAtNext, gradAtCurr, gradAtNext, currPoint, nextPoint, fctEvalCount); } /** - * Constructor with sign vector + * Initializes a {@link LineSearchResult} object with the specified parameters. */ - public LineSearchResult( - double stepSize, - double valueAtCurr, - double valueAtNext, - double[] gradAtCurr, - double[] gradAtNext, - double[] pseudoGradAtNext, - double[] currPoint, - double[] nextPoint, - double[] signVector, - int fctEvalCount) + public LineSearchResult(double stepSize, double valueAtCurr, double valueAtNext, + double[] gradAtCurr, double[] gradAtNext, double[] pseudoGradAtNext, + double[] currPoint, double[] nextPoint, double[] signVector, + int fctEvalCount) { setAll(stepSize, valueAtCurr, valueAtNext, gradAtCurr, gradAtNext, pseudoGradAtNext, currPoint, nextPoint, signVector, fctEvalCount); } /** - * Update line search elements + * Updates line search elements. */ - public void setAll( - double stepSize, - double valueAtCurr, - double valueAtNext, - double[] gradAtCurr, - double[] gradAtNext, - double[] currPoint, - double[] nextPoint, - int fctEvalCount) + public void setAll(double stepSize, double valueAtCurr, double valueAtNext, + double[] gradAtCurr, double[] gradAtNext, double[] currPoint, + double[] nextPoint, int fctEvalCount) { setAll(stepSize, valueAtCurr, valueAtNext, gradAtCurr, gradAtNext, null, currPoint, nextPoint, null, fctEvalCount); } /** - * Update line search elements + * Updates line search elements. */ - public void setAll( - double stepSize, - double valueAtCurr, - double valueAtNext, - double[] gradAtCurr, - double[] gradAtNext, - double[] pseudoGradAtNext, - double[] currPoint, - double[] nextPoint, - double[] signVector, - int fctEvalCount) + public void setAll(double stepSize, double valueAtCurr, double valueAtNext, + double[] gradAtCurr, double[] gradAtNext, double[] pseudoGradAtNext, + double[] currPoint, double[] nextPoint, double[] signVector, + int fctEvalCount) { this.stepSize = stepSize; this.valueAtCurr = valueAtCurr; @@ -326,35 +315,50 @@ public class LineSearch { } /** - * Initial linear search object. + * Initial linear search object for L1-regularization. + * + * @param valueAtX The value at {@code x}. + * @param gradAtX The gradient at {@code x}. + * @param x The input {@code double[]} vector. + * + * @return The {@link LineSearchResult} holding the results. */ - public static LineSearchResult getInitialObject( - double valueAtX, - double[] gradAtX, - double[] x) - { + public static LineSearchResult getInitialObject(double valueAtX, double[] gradAtX, + double[] x) { return getInitialObject(valueAtX, gradAtX, null, x, null, 0); } /** * Initial linear search object for L1-regularization. + * + * @param valueAtX The value at {@code x}. + * @param gradAtX The gradient at {@code x}. + * @param pseudoGradAtX The pseudo-gradient at {@code x}. + * @param x The input {@code double[]} vector. + * + * @return The {@link LineSearchResult} holding the results. */ - public static LineSearchResult getInitialObjectForL1( - double valueAtX, - double[] gradAtX, - double[] pseudoGradAtX, - double[] x) - { + public static LineSearchResult getInitialObjectForL1(double valueAtX, double[] gradAtX, + double[] pseudoGradAtX, double[] x) { return getInitialObject(valueAtX, gradAtX, pseudoGradAtX, x, new double[x.length], 0); } - public static LineSearchResult getInitialObject( - double valueAtX, - double[] gradAtX, - double[] pseudoGradAtX, - double[] x, - double[] signX, - int fctEvalCount) { + /** + * Initial linear search object for L1-regularization. + * + * @param valueAtX The value at {@code x}. + * @param gradAtX The gradient at {@code x}. + * @param pseudoGradAtX The pseudo-gradient at {@code x}. + * @param x The input {@code double[]} vector. + * @param signX The sign {@code double[]} vector for {@code x}. + * @param fctEvalCount The number of function evaluations. + * Must be equal to or greater than {@code 0}. + * + * @return The {@link LineSearchResult} holding the results. + */ + public static LineSearchResult getInitialObject(double valueAtX, double[] gradAtX, + double[] pseudoGradAtX, double[] x, + double[] signX, int fctEvalCount) { return new LineSearchResult(0.0, 0.0, valueAtX, new double[x.length], gradAtX, pseudoGradAtX, new double[x.length], x, signX, fctEvalCount); } diff --git a/opennlp-tools/src/main/java/opennlp/tools/ml/maxent/quasinewton/NegLogLikelihood.java b/opennlp-tools/src/main/java/opennlp/tools/ml/maxent/quasinewton/NegLogLikelihood.java index 6ef25eb3..ace8cb97 100644 --- a/opennlp-tools/src/main/java/opennlp/tools/ml/maxent/quasinewton/NegLogLikelihood.java +++ b/opennlp-tools/src/main/java/opennlp/tools/ml/maxent/quasinewton/NegLogLikelihood.java @@ -24,7 +24,9 @@ import opennlp.tools.ml.model.DataIndexer; import opennlp.tools.ml.model.OnePassRealValueDataIndexer; /** - * Evaluate negative log-likelihood and its gradient from {@link DataIndexer}. + * Evaluates negative log-likelihood and its gradient from {@link DataIndexer}. + * + * @see Function */ public class NegLogLikelihood implements Function { diff --git a/opennlp-tools/src/main/java/opennlp/tools/ml/maxent/quasinewton/ParallelNegLogLikelihood.java b/opennlp-tools/src/main/java/opennlp/tools/ml/maxent/quasinewton/ParallelNegLogLikelihood.java index b681e0b3..c95a7634 100644 --- a/opennlp-tools/src/main/java/opennlp/tools/ml/maxent/quasinewton/ParallelNegLogLikelihood.java +++ b/opennlp-tools/src/main/java/opennlp/tools/ml/maxent/quasinewton/ParallelNegLogLikelihood.java @@ -33,7 +33,10 @@ import opennlp.tools.ml.ArrayMath; import opennlp.tools.ml.model.DataIndexer; /** - * Evaluate negative log-likelihood and its gradient in parallel + * Evaluates {@link NegLogLikelihood negative log-likelihood} and + * its gradient in parallel. + * + * @see Function */ public class ParallelNegLogLikelihood extends NegLogLikelihood { @@ -67,7 +70,7 @@ public class ParallelNegLogLikelihood extends NegLogLikelihood { } /** - * Computes the negative log-likelihood. + * Computes the function value for {@code x}. * * @param x The input. * @return Returns the computed negative log-likelihood. @@ -92,7 +95,7 @@ public class ParallelNegLogLikelihood extends NegLogLikelihood { } /** - * Computes the gradient. + * Computes the gradient for {@code x}. * * @param x The input. * @return Returns the computed gradient. diff --git a/opennlp-tools/src/main/java/opennlp/tools/ml/maxent/quasinewton/QNMinimizer.java b/opennlp-tools/src/main/java/opennlp/tools/ml/maxent/quasinewton/QNMinimizer.java index 194500f7..c5493dde 100644 --- a/opennlp-tools/src/main/java/opennlp/tools/ml/maxent/quasinewton/QNMinimizer.java +++ b/opennlp-tools/src/main/java/opennlp/tools/ml/maxent/quasinewton/QNMinimizer.java @@ -24,8 +24,9 @@ import opennlp.tools.ml.ArrayMath; import opennlp.tools.ml.maxent.quasinewton.LineSearch.LineSearchResult; /** - * Implementation of L-BFGS which supports L1-, L2-regularization - * and Elastic Net for solving convex optimization problems. + * Implementation of the <a href="https://users.umiacs.umd.edu/~hal/docs/daume04cg-bfgs"> + * Limited memory Broyden-Fletcher-Goldfarb-Shanno algorithm</a> (L-BFGS) which + * supports L1-, L2-regularization and Elastic Net for solving convex optimization problems. * * <p> * Usage example: @@ -55,6 +56,10 @@ import opennlp.tools.ml.maxent.quasinewton.LineSearch.LineSearchResult; * double[] x = minimizer.minimize(f); * double min = f.valueAt(x); * </pre></blockquote> + * + * @see <a href="https://link.springer.com/chapter/10.1007/978-0-387-40065-5_6"> + * Quasi-Newton Methods</a> in: <a href="https://link.springer.com/book/10.1007/978-0-387-40065-5"> + * Nocedal J, Wright SJ. Numerical optimization. 2nd ed. New York: Springer; 2006</a>. */ public class QNMinimizer { @@ -66,25 +71,25 @@ public class QNMinimizer { // Relative gradient norm tolerance public static final double REL_GRAD_NORM_TOL = 1e-4; - // Initial step size + /** The initial step size: {@code 1.0}. */ public static final double INITIAL_STEP_SIZE = 1.0; - // Minimum step size + /** The minimum step size: {@code 1e-10}. */ public static final double MIN_STEP_SIZE = 1e-10; - // Default L1-cost + /** The default L1-cost value is {@code 0.0d}. */ public static final double L1COST_DEFAULT = 0; - // Default L2-cost + /** The default L2-cost value is {@code 0.0d}. */ public static final double L2COST_DEFAULT = 0; - // Default number of iterations + /** By default the number of iterations is {@code 100}. */ public static final int NUM_ITERATIONS_DEFAULT = 100; - // Default number of Hessian updates to store + /** The default number of Hessian updates to store is {@code 15}. */ public static final int M_DEFAULT = 15; - // Default maximum number of function evaluations + /** The default maximum number of function evaluations is {@code 30,000}. */ public static final int MAX_FCT_EVAL_DEFAULT = 30000; // L1-regularization cost @@ -113,7 +118,8 @@ public class QNMinimizer { private Evaluator evaluator; /** - * Initializes a {@link QNMinimizer} with default parameters. + * Initializes a {@link QNMinimizer} with default parameters + * (see: {@link #L1COST_DEFAULT} and {@link #L2COST_DEFAULT}). */ public QNMinimizer() { this(L1COST_DEFAULT, L2COST_DEFAULT); @@ -122,19 +128,23 @@ public class QNMinimizer { /** * Initializes a {@link QNMinimizer}. * - * @param l1Cost The L1-regularization cost. - * @param l2Cost The L2-regularization cost. + * @param l1Cost The L1-regularization cost. Must be equal to or greater than {@code 0}. + * @param l2Cost The L2-regularization cost. Must be equal to or greater than {@code 0}. + * + * @throws IllegalArgumentException Thrown if parameters were invalid. */ public QNMinimizer(double l1Cost, double l2Cost) { this(l1Cost, l2Cost, NUM_ITERATIONS_DEFAULT); } /** - * Initializes a {@link QNMinimizer}. + * Initializes a {@link QNMinimizer} with L1 and L2 parameters. + * + * @param l1Cost The L1-regularization cost. Must be equal to or greater than {@code 0}. + * @param l2Cost The L2-regularization cost. Must be equal to or greater than {@code 0}. + * @param iterations The maximum number of iterations. Must be greater than {@code 0}. * - * @param l1Cost The L1-regularization cost. - * @param l2Cost The L2-regularization cost. - * @param iterations The maximum number of iterations. + * @throws IllegalArgumentException Thrown if parameters were invalid. */ public QNMinimizer(double l1Cost, double l2Cost, int iterations) { this(l1Cost, l2Cost, iterations, M_DEFAULT, MAX_FCT_EVAL_DEFAULT); @@ -143,11 +153,13 @@ public class QNMinimizer { /** * Initializes a {@link QNMinimizer}. * - * @param l1Cost The L1-regularization cost. - * @param l2Cost The L2-regularization cost. - * @param iterations The maximum number of iterations. - * @param m The number of Hessian updates to store. - * @param maxFctEval The maximum number of function evaluations. + * @param l1Cost The L1-regularization cost. Must be equal to or greater than {@code 0}. + * @param l2Cost The L2-regularization cost. Must be equal to or greater than {@code 0}. + * @param iterations The maximum number of iterations. Must be greater than {@code 0}. + * @param m The number of Hessian updates to store. Must be greater than {@code 0}. + * @param maxFctEval The maximum number of function evaluations. Must be greater than {@code 0}. + * + * @throws IllegalArgumentException Thrown if parameters were invalid. */ public QNMinimizer(double l1Cost, double l2Cost, int iterations, int m, int maxFctEval) @@ -308,12 +320,14 @@ public class QNMinimizer { } /** - * Pseudo-gradient for L1-regularization (see equation 4 in the paper - * "Scalable Training of L1-Regularized Log-Linear Models", Andrew et al. 2007) + * Conducts pseudo-gradient for L1-regularization. + * + * @implNote See equation 4 in <a href="https://doi.org/10.1145/1273496.1273501"> + * "Scalable Training of L1-Regularized Log-Linear Models"</a>, by Andrew and Gao, 2007) * - * @param x current point - * @param g gradient at x - * @param pg pseudo-gradient at x which is to be computed + * @param x The current point + * @param g The gradient at {@code x}. + * @param pg The pseudo-gradient at {@code x} which is to be computed. */ private void computePseudoGrad(double[] x, double[] g, double[] pg) { for (int i = 0; i < dimension; i++) { @@ -340,7 +354,9 @@ public class QNMinimizer { } /** - * L-BFGS two-loop recursion (see Nocedal & Wright 2006, Numerical Optimization, p. 178) + * L-BFGS two-loop recursion, see + * <a href="https://link.springer.com/book/10.1007/978-0-387-40065-5"> + * Nocedal & Wright 2006, Numerical Optimization</a>, p. 178) */ private void computeDirection(double[] direction) { @@ -519,7 +535,7 @@ public class QNMinimizer { } /** - * Evaluate quality of training parameters. For example, + * Evaluate the quality of training parameters. For example, * it can be used to report model's training accuracy when * we train a Maximum Entropy classifier. */ diff --git a/opennlp-tools/src/main/java/opennlp/tools/ml/maxent/quasinewton/QNModel.java b/opennlp-tools/src/main/java/opennlp/tools/ml/maxent/quasinewton/QNModel.java index 358deef5..39c01746 100644 --- a/opennlp-tools/src/main/java/opennlp/tools/ml/maxent/quasinewton/QNModel.java +++ b/opennlp-tools/src/main/java/opennlp/tools/ml/maxent/quasinewton/QNModel.java @@ -19,18 +19,23 @@ package opennlp.tools.ml.maxent.quasinewton; import opennlp.tools.ml.ArrayMath; import opennlp.tools.ml.model.AbstractModel; +import opennlp.tools.ml.model.MaxentModel; import opennlp.tools.ml.model.Context; /** - * A maximum entropy model which has been trained using the Quasi Newton (QN) algorithm. + * A {@link MaxentModel maximum entropy model} which has been trained via the + * <a href="https://users.umiacs.umd.edu/~hal/docs/daume04cg-bfgs">L-BFGS</a> algorithm , + * which belongs to the group of Quasi Newton (QN) algorithms. * * @see AbstractModel + * @see MaxentModel + * @see QNTrainer */ public class QNModel extends AbstractModel { /** - * Initializes a {@link QNModel} with the specified parameters, outcome names, and - * predicate/feature labels. + * Initializes a {@link QNModel} with the specified parameters, + * predicate/feature labels, and outcome names. * * @param params The {@link Context parameters} of the model. * @param predLabels The names of the predicates used in this model. diff --git a/opennlp-tools/src/main/java/opennlp/tools/ml/maxent/quasinewton/QNTrainer.java b/opennlp-tools/src/main/java/opennlp/tools/ml/maxent/quasinewton/QNTrainer.java index ff22651a..5a783558 100644 --- a/opennlp-tools/src/main/java/opennlp/tools/ml/maxent/quasinewton/QNTrainer.java +++ b/opennlp-tools/src/main/java/opennlp/tools/ml/maxent/quasinewton/QNTrainer.java @@ -25,6 +25,7 @@ import java.util.Map; import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import opennlp.tools.commons.Trainer; import opennlp.tools.ml.AbstractEventTrainer; import opennlp.tools.ml.ArrayMath; import opennlp.tools.ml.maxent.quasinewton.QNMinimizer.Evaluator; @@ -34,10 +35,13 @@ import opennlp.tools.ml.model.DataIndexer; import opennlp.tools.util.TrainingParameters; /** - * A Maxent model {@link opennlp.tools.commons.Trainer} using L-BFGS algorithm. + * A Maxent model {@link Trainer trainer} using the + * <a href="https://users.umiacs.umd.edu/~hal/docs/daume04cg-bfgs">L-BFGS</a> algorithm. * - * @see QNModel * @see AbstractEventTrainer + * @see QNMinimizer + * @see QNModel + * @see Trainer */ public class QNTrainer extends AbstractEventTrainer { @@ -49,17 +53,25 @@ public class QNTrainer extends AbstractEventTrainer { public static final int THREADS_DEFAULT = 1; public static final String L1COST_PARAM = "L1Cost"; + + /** The default L1-cost value is {@code 0.1d}. */ public static final double L1COST_DEFAULT = 0.1; public static final String L2COST_PARAM = "L2Cost"; + + /** The default L2-cost value is {@code 0.1d}. */ public static final double L2COST_DEFAULT = 0.1; // Number of Hessian updates to store public static final String M_PARAM = "NumOfUpdates"; + + /** The default number of Hessian updates to store is {@code 15}. */ public static final int M_DEFAULT = 15; // Maximum number of function evaluations public static final String MAX_FCT_EVAL_PARAM = "MaxFctEval"; + + /** The default maximum number of function evaluations is {@code 30,000}. */ public static final int MAX_FCT_EVAL_DEFAULT = 30000; // Number of threads @@ -77,8 +89,8 @@ public class QNTrainer extends AbstractEventTrainer { /** * Initializes a {@link QNTrainer}. - * <p> - * <b>Note:</b><br> + * + * @implNote * The resulting instance does not print progress messages about training to STDOUT. */ public QNTrainer() { @@ -86,7 +98,7 @@ public class QNTrainer extends AbstractEventTrainer { } /** - * Initializes a {@link QNTrainer}. + * Initializes a {@link QNTrainer} with the specified {@code parameters}. * * @param parameters The {@link TrainingParameters} to use. */ @@ -95,16 +107,16 @@ public class QNTrainer extends AbstractEventTrainer { } /** - * Initializes a {@link QNTrainer}. + * Initializes a {@link QNTrainer} with the specified parameter {@code m}. * * @param m The number of hessian updates to store. */ - public QNTrainer(int m ) { + public QNTrainer(int m) { this(m, MAX_FCT_EVAL_DEFAULT); } /** - * Initializes a {@link QNTrainer}. + * Initializes a {@link QNTrainer} with the specified parameters. * * @param m The number of hessian updates to store. */ @@ -133,17 +145,17 @@ public class QNTrainer extends AbstractEventTrainer { String algorithmName = getAlgorithm(); if (algorithmName != null && !(MAXENT_QN_VALUE.equals(algorithmName))) { - throw new IllegalArgumentException("algorithmName must be MAXENT_QN"); + throw new IllegalArgumentException("algorithmName must be " + MAXENT_QN_VALUE); } // Number of Hessian updates to remember - if (m < 0) { + if (m <= 0) { throw new IllegalArgumentException( "Number of Hessian updates to remember must be >= 0"); } // Maximum number of function evaluations - if (maxFctEval < 0) { + if (maxFctEval <= 0) { throw new IllegalArgumentException( "Maximum number of function evaluations must be >= 0"); } @@ -175,7 +187,7 @@ public class QNTrainer extends AbstractEventTrainer { } /** - * Trains a model using the QN algorithm. + * Trains a {@link QNModel model} using the QN algorithm. * * @param iterations The number of QN iterations to perform. * @param indexer The {@link DataIndexer} used to compress events in memory. @@ -227,6 +239,8 @@ public class QNTrainer extends AbstractEventTrainer { /** * For measuring model's training accuracy. + * + * @param indexer A valid {@link DataIndexer} instance. */ private record ModelEvaluator(DataIndexer indexer) implements Evaluator { diff --git a/opennlp-tools/src/main/java/opennlp/tools/ml/model/AbstractModel.java b/opennlp-tools/src/main/java/opennlp/tools/ml/model/AbstractModel.java index 9d434e51..97823b23 100644 --- a/opennlp-tools/src/main/java/opennlp/tools/ml/model/AbstractModel.java +++ b/opennlp-tools/src/main/java/opennlp/tools/ml/model/AbstractModel.java @@ -86,8 +86,7 @@ public abstract class AbstractModel implements MaxentModel { * Return the name of the outcome corresponding to the highest likelihood * in the parameter ocs. * - * @param ocs A double[] as returned by the eval(String[] context) - * method. + * @param ocs A {@code double[]} as returned by the {@link #eval(String[]) method}. * @return The name of the most likely outcome. */ @Override @@ -131,7 +130,7 @@ public abstract class AbstractModel implements MaxentModel { /** * @param i An outcome id. - * @return Retrieves the name of the outcome associated with that id. + * @return Retrieves the name of the outcome associated with {@code id}. */ @Override public final String getOutcome(int i) { @@ -159,11 +158,11 @@ public abstract class AbstractModel implements MaxentModel { } /** - * Provides the fundamental data structures which encode the maxent model - * information. Note: This method will usually only be needed by - * {@link opennlp.tools.ml.maxent.io.GISModelWriter GIS model writers}. + * Provides the fundamental data structures which encode the {@link MaxentModel} + * information. + * * <p> - * The following values are held in the Object array which is returned by this method: + * The following values are held in the {@code Object[]} which is returned by this method: * <ul> * <li>index 0: {@link Context} array containing the model parameters.</li> * <li>index 1: {@link Map} containing the mapping of model predicates @@ -174,6 +173,9 @@ public abstract class AbstractModel implements MaxentModel { * </ul> * * @return An {@link Object} array with the values as described above. + * + * @implNote : This method will usually only be needed by + * {@link opennlp.tools.ml.maxent.io.GISModelWriter GIS model writers}. */ public final Object[] getDataStructures() { Object[] data = new Object[3]; diff --git a/opennlp-tools/src/test/java/opennlp/tools/ml/maxent/quasinewton/QNMinimizerTest.java b/opennlp-tools/src/test/java/opennlp/tools/ml/maxent/quasinewton/QNMinimizerTest.java index 7411f090..9c2ca50a 100644 --- a/opennlp-tools/src/test/java/opennlp/tools/ml/maxent/quasinewton/QNMinimizerTest.java +++ b/opennlp-tools/src/test/java/opennlp/tools/ml/maxent/quasinewton/QNMinimizerTest.java @@ -68,9 +68,7 @@ public class QNMinimizerTest { } /** - * <a href="https://en.wikipedia.org/wiki/Rosenbrock_function">Rosenbrock function</a>: - * <p> - * {@code f(x,y) = (1-x)^2 + 100*(y-x^2)^2} + * For {@code f(x,y) = (1-x)^2 + 100*(y-x^2)^2}, <br/> * {@code f(x,y)} is non-convex and has global minimum at {@code (x,y) = (1,1)} where {@code f(x,y) = 0}. * <p> * with @@ -78,6 +76,9 @@ public class QNMinimizerTest { * <li>{@code f_x = -2*(1-x) - 400*(y-x^2)*x}</li> * <li>{@code f_y = 200*(y-x^2)}</li> * </ul> + * + * @see <a href="https://doi.org/10.1093/comjnl/3.3.175">An automatic method for finding the greatest + * or least value of a function.</a> by H. H. Rosenbrock, in: The Computer Journal, 3(3):175–184, 1960 */ public static class Rosenbrock implements Function {
