This is an automated email from the ASF dual-hosted git repository.
mawiesne pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/opennlp.git
The following commit(s) were added to refs/heads/main by this push:
new 7d4b450f OPENNLP-1669: Improve JavaDoc of QN related classes (#709)
7d4b450f is described below
commit 7d4b450fc4f0cd76a3de220bc2383b5d98b12498
Author: Martin Wiesner <[email protected]>
AuthorDate: Fri Dec 13 19:40:07 2024 +0100
OPENNLP-1669: Improve JavaDoc of QN related classes (#709)
---
.../tools/ml/maxent/quasinewton/Function.java | 21 ++++
.../tools/ml/maxent/quasinewton/LineSearch.java | 140 +++++++++++----------
.../ml/maxent/quasinewton/NegLogLikelihood.java | 4 +-
.../quasinewton/ParallelNegLogLikelihood.java | 9 +-
.../tools/ml/maxent/quasinewton/QNMinimizer.java | 72 ++++++-----
.../tools/ml/maxent/quasinewton/QNModel.java | 11 +-
.../tools/ml/maxent/quasinewton/QNTrainer.java | 38 ++++--
.../java/opennlp/tools/ml/model/AbstractModel.java | 16 +--
.../ml/maxent/quasinewton/QNMinimizerTest.java | 7 +-
9 files changed, 193 insertions(+), 125 deletions(-)
diff --git
a/opennlp-tools/src/main/java/opennlp/tools/ml/maxent/quasinewton/Function.java
b/opennlp-tools/src/main/java/opennlp/tools/ml/maxent/quasinewton/Function.java
index 0aa596ab..1e3fc795 100644
---
a/opennlp-tools/src/main/java/opennlp/tools/ml/maxent/quasinewton/Function.java
+++
b/opennlp-tools/src/main/java/opennlp/tools/ml/maxent/quasinewton/Function.java
@@ -22,9 +22,30 @@ package opennlp.tools.ml.maxent.quasinewton;
*/
public interface Function {
+ /**
+ * @return Retrieves the dimension value.
+ */
int getDimension();
+ /**
+ * Computes the function value for {@code x}.
+ *
+ * @param x The input vector.
+ * @return Returns the computed value for {@code x}.
+ *
+ * @apiNote The parameter {@code x} should be validated and if
inconsistencies are encountered
+ * an appropriate exception should be raised, e.g. {@link
IllegalArgumentException}.
+ */
double valueAt(double[] x);
+ /**
+ * Computes the gradient for {@code x}.
+ *
+ * @param x The input vector.
+ * @return Returns the computed gradient for {@code x}.
+ *
+ * @apiNote The parameter {@code x} should be validated and if
inconsistencies are encountered
+ * an appropriate exception should be raised, e.g. {@link
IllegalArgumentException}.
+ */
double[] gradientAt(double[] x);
}
diff --git
a/opennlp-tools/src/main/java/opennlp/tools/ml/maxent/quasinewton/LineSearch.java
b/opennlp-tools/src/main/java/opennlp/tools/ml/maxent/quasinewton/LineSearch.java
index e66b18d0..2808612e 100644
---
a/opennlp-tools/src/main/java/opennlp/tools/ml/maxent/quasinewton/LineSearch.java
+++
b/opennlp-tools/src/main/java/opennlp/tools/ml/maxent/quasinewton/LineSearch.java
@@ -20,17 +20,25 @@ package opennlp.tools.ml.maxent.quasinewton;
import opennlp.tools.ml.ArrayMath;
/**
- * Class that performs line search to find minimum.
+ * Performs line search to find a minimum.
+ *
+ * @see <a href="https://link.springer.com/book/10.1007/978-0-387-40065-5">
+ * Nocedal & Wright 2006, Numerical Optimization</a>, p. 37)
*/
public class LineSearch {
private static final double C = 0.0001;
private static final double RHO = 0.5; // decrease of step size (must be
from 0 to 1)
/**
- * Conducts a backtracking line search (see Nocedal & Wright 2006,
Numerical Optimization, p. 37).
+ * Conducts a backtracking line search.
+ *
+ * @param function The {@link Function} to apply.
+ * @param direction The {@code double[]} representing the direction to
search into.
+ * @param lsr The {@link LineSearchResult} to transport results in.
+ * @param initialStepSize The initial step size to apply. Must be greater
than {@code 0}.
*/
- public static void doLineSearch(Function function,
- double[] direction, LineSearchResult lsr, double initialStepSize) {
+ public static void doLineSearch(Function function, double[] direction,
+ LineSearchResult lsr, double
initialStepSize) {
double stepSize = initialStepSize;
int currFctEvalCount = lsr.getFctEvalCount();
double[] x = lsr.getNextPoint();
@@ -79,6 +87,12 @@ public class LineSearch {
/**
* Conducts a constrained line search (see section 3.2 in the paper
"Scalable Training
* of L1-Regularized Log-Linear Models", Andrew et al. 2007)
+ *
+ * @param function The {@link Function} to apply.
+ * @param direction The {@code double[]} representing the direction to
search into.
+ * @param lsr The {@link LineSearchResult} to transport results in.
+ * @param l1Cost The L1-regularization costs. Must be equal or greater
than {@code 0}.
+ * @param initialStepSize The initial step size to apply. Must be greater
than {@code 0}.
*/
public static void doConstrainedLineSearch(Function function,
double[] direction, LineSearchResult lsr, double l1Cost, double
initialStepSize) {
@@ -146,7 +160,8 @@ public class LineSearch {
//
-------------------------------------------------------------------------------------
//
/**
- * Represents a LineSearch result.
+ * Represents a {@link LineSearch} result encapsulating the relevant data
+ * at a point in time during computation.
*/
public static class LineSearchResult {
@@ -162,72 +177,46 @@ public class LineSearch {
private double[] signVector;
/**
- * Constructor
+ * Initializes a {@link LineSearchResult} object with the specified
parameters.
*/
- public LineSearchResult(
- double stepSize,
- double valueAtCurr,
- double valueAtNext,
- double[] gradAtCurr,
- double[] gradAtNext,
- double[] currPoint,
- double[] nextPoint,
- int fctEvalCount)
+ public LineSearchResult(double stepSize, double valueAtCurr, double
valueAtNext,
+ double[] gradAtCurr, double[] gradAtNext, double[]
currPoint,
+ double[] nextPoint, int fctEvalCount)
{
setAll(stepSize, valueAtCurr, valueAtNext, gradAtCurr, gradAtNext,
currPoint, nextPoint, fctEvalCount);
}
/**
- * Constructor with sign vector
+ * Initializes a {@link LineSearchResult} object with the specified
parameters.
*/
- public LineSearchResult(
- double stepSize,
- double valueAtCurr,
- double valueAtNext,
- double[] gradAtCurr,
- double[] gradAtNext,
- double[] pseudoGradAtNext,
- double[] currPoint,
- double[] nextPoint,
- double[] signVector,
- int fctEvalCount)
+ public LineSearchResult(double stepSize, double valueAtCurr, double
valueAtNext,
+ double[] gradAtCurr, double[] gradAtNext, double[]
pseudoGradAtNext,
+ double[] currPoint, double[] nextPoint, double[]
signVector,
+ int fctEvalCount)
{
setAll(stepSize, valueAtCurr, valueAtNext, gradAtCurr, gradAtNext,
pseudoGradAtNext, currPoint, nextPoint, signVector, fctEvalCount);
}
/**
- * Update line search elements
+ * Updates line search elements.
*/
- public void setAll(
- double stepSize,
- double valueAtCurr,
- double valueAtNext,
- double[] gradAtCurr,
- double[] gradAtNext,
- double[] currPoint,
- double[] nextPoint,
- int fctEvalCount)
+ public void setAll(double stepSize, double valueAtCurr, double valueAtNext,
+ double[] gradAtCurr, double[] gradAtNext, double[]
currPoint,
+ double[] nextPoint, int fctEvalCount)
{
setAll(stepSize, valueAtCurr, valueAtNext, gradAtCurr, gradAtNext,
null, currPoint, nextPoint, null, fctEvalCount);
}
/**
- * Update line search elements
+ * Updates line search elements.
*/
- public void setAll(
- double stepSize,
- double valueAtCurr,
- double valueAtNext,
- double[] gradAtCurr,
- double[] gradAtNext,
- double[] pseudoGradAtNext,
- double[] currPoint,
- double[] nextPoint,
- double[] signVector,
- int fctEvalCount)
+ public void setAll(double stepSize, double valueAtCurr, double valueAtNext,
+ double[] gradAtCurr, double[] gradAtNext, double[]
pseudoGradAtNext,
+ double[] currPoint, double[] nextPoint, double[]
signVector,
+ int fctEvalCount)
{
this.stepSize = stepSize;
this.valueAtCurr = valueAtCurr;
@@ -326,35 +315,50 @@ public class LineSearch {
}
/**
- * Initial linear search object.
+ * Initial linear search object for L1-regularization.
+ *
+ * @param valueAtX The value at {@code x}.
+ * @param gradAtX The gradient at {@code x}.
+ * @param x The input {@code double[]} vector.
+ *
+ * @return The {@link LineSearchResult} holding the results.
*/
- public static LineSearchResult getInitialObject(
- double valueAtX,
- double[] gradAtX,
- double[] x)
- {
+ public static LineSearchResult getInitialObject(double valueAtX, double[]
gradAtX,
+ double[] x) {
return getInitialObject(valueAtX, gradAtX, null, x, null, 0);
}
/**
* Initial linear search object for L1-regularization.
+ *
+ * @param valueAtX The value at {@code x}.
+ * @param gradAtX The gradient at {@code x}.
+ * @param pseudoGradAtX The pseudo-gradient at {@code x}.
+ * @param x The input {@code double[]} vector.
+ *
+ * @return The {@link LineSearchResult} holding the results.
*/
- public static LineSearchResult getInitialObjectForL1(
- double valueAtX,
- double[] gradAtX,
- double[] pseudoGradAtX,
- double[] x)
- {
+ public static LineSearchResult getInitialObjectForL1(double valueAtX,
double[] gradAtX,
+ double[]
pseudoGradAtX, double[] x) {
return getInitialObject(valueAtX, gradAtX, pseudoGradAtX, x, new
double[x.length], 0);
}
- public static LineSearchResult getInitialObject(
- double valueAtX,
- double[] gradAtX,
- double[] pseudoGradAtX,
- double[] x,
- double[] signX,
- int fctEvalCount) {
+ /**
+ * Initial linear search object for L1-regularization.
+ *
+ * @param valueAtX The value at {@code x}.
+ * @param gradAtX The gradient at {@code x}.
+ * @param pseudoGradAtX The pseudo-gradient at {@code x}.
+ * @param x The input {@code double[]} vector.
+ * @param signX The sign {@code double[]} vector for {@code x}.
+ * @param fctEvalCount The number of function evaluations.
+ * Must be equal to or greater than {@code 0}.
+ *
+ * @return The {@link LineSearchResult} holding the results.
+ */
+ public static LineSearchResult getInitialObject(double valueAtX, double[]
gradAtX,
+ double[] pseudoGradAtX,
double[] x,
+ double[] signX, int
fctEvalCount) {
return new LineSearchResult(0.0, 0.0, valueAtX, new double[x.length],
gradAtX,
pseudoGradAtX, new double[x.length], x, signX, fctEvalCount);
}
diff --git
a/opennlp-tools/src/main/java/opennlp/tools/ml/maxent/quasinewton/NegLogLikelihood.java
b/opennlp-tools/src/main/java/opennlp/tools/ml/maxent/quasinewton/NegLogLikelihood.java
index 6ef25eb3..ace8cb97 100644
---
a/opennlp-tools/src/main/java/opennlp/tools/ml/maxent/quasinewton/NegLogLikelihood.java
+++
b/opennlp-tools/src/main/java/opennlp/tools/ml/maxent/quasinewton/NegLogLikelihood.java
@@ -24,7 +24,9 @@ import opennlp.tools.ml.model.DataIndexer;
import opennlp.tools.ml.model.OnePassRealValueDataIndexer;
/**
- * Evaluate negative log-likelihood and its gradient from {@link DataIndexer}.
+ * Evaluates negative log-likelihood and its gradient from {@link DataIndexer}.
+ *
+ * @see Function
*/
public class NegLogLikelihood implements Function {
diff --git
a/opennlp-tools/src/main/java/opennlp/tools/ml/maxent/quasinewton/ParallelNegLogLikelihood.java
b/opennlp-tools/src/main/java/opennlp/tools/ml/maxent/quasinewton/ParallelNegLogLikelihood.java
index b681e0b3..c95a7634 100644
---
a/opennlp-tools/src/main/java/opennlp/tools/ml/maxent/quasinewton/ParallelNegLogLikelihood.java
+++
b/opennlp-tools/src/main/java/opennlp/tools/ml/maxent/quasinewton/ParallelNegLogLikelihood.java
@@ -33,7 +33,10 @@ import opennlp.tools.ml.ArrayMath;
import opennlp.tools.ml.model.DataIndexer;
/**
- * Evaluate negative log-likelihood and its gradient in parallel
+ * Evaluates {@link NegLogLikelihood negative log-likelihood} and
+ * its gradient in parallel.
+ *
+ * @see Function
*/
public class ParallelNegLogLikelihood extends NegLogLikelihood {
@@ -67,7 +70,7 @@ public class ParallelNegLogLikelihood extends
NegLogLikelihood {
}
/**
- * Computes the negative log-likelihood.
+ * Computes the function value for {@code x}.
*
* @param x The input.
* @return Returns the computed negative log-likelihood.
@@ -92,7 +95,7 @@ public class ParallelNegLogLikelihood extends
NegLogLikelihood {
}
/**
- * Computes the gradient.
+ * Computes the gradient for {@code x}.
*
* @param x The input.
* @return Returns the computed gradient.
diff --git
a/opennlp-tools/src/main/java/opennlp/tools/ml/maxent/quasinewton/QNMinimizer.java
b/opennlp-tools/src/main/java/opennlp/tools/ml/maxent/quasinewton/QNMinimizer.java
index 194500f7..c5493dde 100644
---
a/opennlp-tools/src/main/java/opennlp/tools/ml/maxent/quasinewton/QNMinimizer.java
+++
b/opennlp-tools/src/main/java/opennlp/tools/ml/maxent/quasinewton/QNMinimizer.java
@@ -24,8 +24,9 @@ import opennlp.tools.ml.ArrayMath;
import opennlp.tools.ml.maxent.quasinewton.LineSearch.LineSearchResult;
/**
- * Implementation of L-BFGS which supports L1-, L2-regularization
- * and Elastic Net for solving convex optimization problems.
+ * Implementation of the <a
href="https://users.umiacs.umd.edu/~hal/docs/daume04cg-bfgs">
+ * Limited memory Broyden-Fletcher-Goldfarb-Shanno algorithm</a> (L-BFGS) which
+ * supports L1-, L2-regularization and Elastic Net for solving convex
optimization problems.
*
* <p>
* Usage example:
@@ -55,6 +56,10 @@ import
opennlp.tools.ml.maxent.quasinewton.LineSearch.LineSearchResult;
* double[] x = minimizer.minimize(f);
* double min = f.valueAt(x);
* </pre></blockquote>
+ *
+ * @see <a
href="https://link.springer.com/chapter/10.1007/978-0-387-40065-5_6">
+ * Quasi-Newton Methods</a> in: <a
href="https://link.springer.com/book/10.1007/978-0-387-40065-5">
+ * Nocedal J, Wright SJ. Numerical optimization. 2nd ed. New York:
Springer; 2006</a>.
*/
public class QNMinimizer {
@@ -66,25 +71,25 @@ public class QNMinimizer {
// Relative gradient norm tolerance
public static final double REL_GRAD_NORM_TOL = 1e-4;
- // Initial step size
+ /** The initial step size: {@code 1.0}. */
public static final double INITIAL_STEP_SIZE = 1.0;
- // Minimum step size
+ /** The minimum step size: {@code 1e-10}. */
public static final double MIN_STEP_SIZE = 1e-10;
- // Default L1-cost
+ /** The default L1-cost value is {@code 0.0d}. */
public static final double L1COST_DEFAULT = 0;
- // Default L2-cost
+ /** The default L2-cost value is {@code 0.0d}. */
public static final double L2COST_DEFAULT = 0;
- // Default number of iterations
+ /** By default the number of iterations is {@code 100}. */
public static final int NUM_ITERATIONS_DEFAULT = 100;
- // Default number of Hessian updates to store
+ /** The default number of Hessian updates to store is {@code 15}. */
public static final int M_DEFAULT = 15;
- // Default maximum number of function evaluations
+ /** The default maximum number of function evaluations is {@code 30,000}. */
public static final int MAX_FCT_EVAL_DEFAULT = 30000;
// L1-regularization cost
@@ -113,7 +118,8 @@ public class QNMinimizer {
private Evaluator evaluator;
/**
- * Initializes a {@link QNMinimizer} with default parameters.
+ * Initializes a {@link QNMinimizer} with default parameters
+ * (see: {@link #L1COST_DEFAULT} and {@link #L2COST_DEFAULT}).
*/
public QNMinimizer() {
this(L1COST_DEFAULT, L2COST_DEFAULT);
@@ -122,19 +128,23 @@ public class QNMinimizer {
/**
* Initializes a {@link QNMinimizer}.
*
- * @param l1Cost The L1-regularization cost.
- * @param l2Cost The L2-regularization cost.
+ * @param l1Cost The L1-regularization cost. Must be equal to or greater
than {@code 0}.
+ * @param l2Cost The L2-regularization cost. Must be equal to or greater
than {@code 0}.
+ *
+ * @throws IllegalArgumentException Thrown if parameters were invalid.
*/
public QNMinimizer(double l1Cost, double l2Cost) {
this(l1Cost, l2Cost, NUM_ITERATIONS_DEFAULT);
}
/**
- * Initializes a {@link QNMinimizer}.
+ * Initializes a {@link QNMinimizer} with L1 and L2 parameters.
+ *
+ * @param l1Cost The L1-regularization cost. Must be equal to or greater
than {@code 0}.
+ * @param l2Cost The L2-regularization cost. Must be equal to or greater
than {@code 0}.
+ * @param iterations The maximum number of iterations. Must be greater than
{@code 0}.
*
- * @param l1Cost The L1-regularization cost.
- * @param l2Cost The L2-regularization cost.
- * @param iterations The maximum number of iterations.
+ * @throws IllegalArgumentException Thrown if parameters were invalid.
*/
public QNMinimizer(double l1Cost, double l2Cost, int iterations) {
this(l1Cost, l2Cost, iterations, M_DEFAULT, MAX_FCT_EVAL_DEFAULT);
@@ -143,11 +153,13 @@ public class QNMinimizer {
/**
* Initializes a {@link QNMinimizer}.
*
- * @param l1Cost The L1-regularization cost.
- * @param l2Cost The L2-regularization cost.
- * @param iterations The maximum number of iterations.
- * @param m The number of Hessian updates to store.
- * @param maxFctEval The maximum number of function evaluations.
+ * @param l1Cost The L1-regularization cost. Must be equal to or greater
than {@code 0}.
+ * @param l2Cost The L2-regularization cost. Must be equal to or greater
than {@code 0}.
+ * @param iterations The maximum number of iterations. Must be greater than
{@code 0}.
+ * @param m The number of Hessian updates to store. Must be greater than
{@code 0}.
+ * @param maxFctEval The maximum number of function evaluations. Must be
greater than {@code 0}.
+ *
+ * @throws IllegalArgumentException Thrown if parameters were invalid.
*/
public QNMinimizer(double l1Cost, double l2Cost, int iterations,
int m, int maxFctEval)
@@ -308,12 +320,14 @@ public class QNMinimizer {
}
/**
- * Pseudo-gradient for L1-regularization (see equation 4 in the paper
- * "Scalable Training of L1-Regularized Log-Linear Models", Andrew et al.
2007)
+ * Conducts pseudo-gradient for L1-regularization.
+ *
+ * @implNote See equation 4 in <a
href="https://doi.org/10.1145/1273496.1273501">
+ * "Scalable Training of L1-Regularized Log-Linear Models"</a>, by Andrew
and Gao, 2007)
*
- * @param x current point
- * @param g gradient at x
- * @param pg pseudo-gradient at x which is to be computed
+ * @param x The current point
+ * @param g The gradient at {@code x}.
+ * @param pg The pseudo-gradient at {@code x} which is to be computed.
*/
private void computePseudoGrad(double[] x, double[] g, double[] pg) {
for (int i = 0; i < dimension; i++) {
@@ -340,7 +354,9 @@ public class QNMinimizer {
}
/**
- * L-BFGS two-loop recursion (see Nocedal & Wright 2006, Numerical
Optimization, p. 178)
+ * L-BFGS two-loop recursion, see
+ * <a href="https://link.springer.com/book/10.1007/978-0-387-40065-5">
+ * Nocedal & Wright 2006, Numerical Optimization</a>, p. 178)
*/
private void computeDirection(double[] direction) {
@@ -519,7 +535,7 @@ public class QNMinimizer {
}
/**
- * Evaluate quality of training parameters. For example,
+ * Evaluate the quality of training parameters. For example,
* it can be used to report model's training accuracy when
* we train a Maximum Entropy classifier.
*/
diff --git
a/opennlp-tools/src/main/java/opennlp/tools/ml/maxent/quasinewton/QNModel.java
b/opennlp-tools/src/main/java/opennlp/tools/ml/maxent/quasinewton/QNModel.java
index 358deef5..ea0a26d9 100644
---
a/opennlp-tools/src/main/java/opennlp/tools/ml/maxent/quasinewton/QNModel.java
+++
b/opennlp-tools/src/main/java/opennlp/tools/ml/maxent/quasinewton/QNModel.java
@@ -20,17 +20,22 @@ package opennlp.tools.ml.maxent.quasinewton;
import opennlp.tools.ml.ArrayMath;
import opennlp.tools.ml.model.AbstractModel;
import opennlp.tools.ml.model.Context;
+import opennlp.tools.ml.model.MaxentModel;
/**
- * A maximum entropy model which has been trained using the Quasi Newton (QN)
algorithm.
+ * A {@link MaxentModel maximum entropy model} which has been trained via the
+ * <a href="https://users.umiacs.umd.edu/~hal/docs/daume04cg-bfgs">L-BFGS</a>
algorithm ,
+ * which belongs to the group of Quasi Newton (QN) algorithms.
*
* @see AbstractModel
+ * @see MaxentModel
+ * @see QNTrainer
*/
public class QNModel extends AbstractModel {
/**
- * Initializes a {@link QNModel} with the specified parameters, outcome
names, and
- * predicate/feature labels.
+ * Initializes a {@link QNModel} with the specified parameters,
+ * predicate/feature labels, and outcome names.
*
* @param params The {@link Context parameters} of the model.
* @param predLabels The names of the predicates used in this model.
diff --git
a/opennlp-tools/src/main/java/opennlp/tools/ml/maxent/quasinewton/QNTrainer.java
b/opennlp-tools/src/main/java/opennlp/tools/ml/maxent/quasinewton/QNTrainer.java
index ff22651a..5a783558 100644
---
a/opennlp-tools/src/main/java/opennlp/tools/ml/maxent/quasinewton/QNTrainer.java
+++
b/opennlp-tools/src/main/java/opennlp/tools/ml/maxent/quasinewton/QNTrainer.java
@@ -25,6 +25,7 @@ import java.util.Map;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
+import opennlp.tools.commons.Trainer;
import opennlp.tools.ml.AbstractEventTrainer;
import opennlp.tools.ml.ArrayMath;
import opennlp.tools.ml.maxent.quasinewton.QNMinimizer.Evaluator;
@@ -34,10 +35,13 @@ import opennlp.tools.ml.model.DataIndexer;
import opennlp.tools.util.TrainingParameters;
/**
- * A Maxent model {@link opennlp.tools.commons.Trainer} using L-BFGS algorithm.
+ * A Maxent model {@link Trainer trainer} using the
+ * <a href="https://users.umiacs.umd.edu/~hal/docs/daume04cg-bfgs">L-BFGS</a>
algorithm.
*
- * @see QNModel
* @see AbstractEventTrainer
+ * @see QNMinimizer
+ * @see QNModel
+ * @see Trainer
*/
public class QNTrainer extends AbstractEventTrainer {
@@ -49,17 +53,25 @@ public class QNTrainer extends AbstractEventTrainer {
public static final int THREADS_DEFAULT = 1;
public static final String L1COST_PARAM = "L1Cost";
+
+ /** The default L1-cost value is {@code 0.1d}. */
public static final double L1COST_DEFAULT = 0.1;
public static final String L2COST_PARAM = "L2Cost";
+
+ /** The default L2-cost value is {@code 0.1d}. */
public static final double L2COST_DEFAULT = 0.1;
// Number of Hessian updates to store
public static final String M_PARAM = "NumOfUpdates";
+
+ /** The default number of Hessian updates to store is {@code 15}. */
public static final int M_DEFAULT = 15;
// Maximum number of function evaluations
public static final String MAX_FCT_EVAL_PARAM = "MaxFctEval";
+
+ /** The default maximum number of function evaluations is {@code 30,000}. */
public static final int MAX_FCT_EVAL_DEFAULT = 30000;
// Number of threads
@@ -77,8 +89,8 @@ public class QNTrainer extends AbstractEventTrainer {
/**
* Initializes a {@link QNTrainer}.
- * <p>
- * <b>Note:</b><br>
+ *
+ * @implNote
* The resulting instance does not print progress messages about training to
STDOUT.
*/
public QNTrainer() {
@@ -86,7 +98,7 @@ public class QNTrainer extends AbstractEventTrainer {
}
/**
- * Initializes a {@link QNTrainer}.
+ * Initializes a {@link QNTrainer} with the specified {@code parameters}.
*
* @param parameters The {@link TrainingParameters} to use.
*/
@@ -95,16 +107,16 @@ public class QNTrainer extends AbstractEventTrainer {
}
/**
- * Initializes a {@link QNTrainer}.
+ * Initializes a {@link QNTrainer} with the specified parameter {@code m}.
*
* @param m The number of hessian updates to store.
*/
- public QNTrainer(int m ) {
+ public QNTrainer(int m) {
this(m, MAX_FCT_EVAL_DEFAULT);
}
/**
- * Initializes a {@link QNTrainer}.
+ * Initializes a {@link QNTrainer} with the specified parameters.
*
* @param m The number of hessian updates to store.
*/
@@ -133,17 +145,17 @@ public class QNTrainer extends AbstractEventTrainer {
String algorithmName = getAlgorithm();
if (algorithmName != null && !(MAXENT_QN_VALUE.equals(algorithmName))) {
- throw new IllegalArgumentException("algorithmName must be MAXENT_QN");
+ throw new IllegalArgumentException("algorithmName must be " +
MAXENT_QN_VALUE);
}
// Number of Hessian updates to remember
- if (m < 0) {
+ if (m <= 0) {
throw new IllegalArgumentException(
"Number of Hessian updates to remember must be >= 0");
}
// Maximum number of function evaluations
- if (maxFctEval < 0) {
+ if (maxFctEval <= 0) {
throw new IllegalArgumentException(
"Maximum number of function evaluations must be >= 0");
}
@@ -175,7 +187,7 @@ public class QNTrainer extends AbstractEventTrainer {
}
/**
- * Trains a model using the QN algorithm.
+ * Trains a {@link QNModel model} using the QN algorithm.
*
* @param iterations The number of QN iterations to perform.
* @param indexer The {@link DataIndexer} used to compress events in
memory.
@@ -227,6 +239,8 @@ public class QNTrainer extends AbstractEventTrainer {
/**
* For measuring model's training accuracy.
+ *
+ * @param indexer A valid {@link DataIndexer} instance.
*/
private record ModelEvaluator(DataIndexer indexer) implements Evaluator {
diff --git
a/opennlp-tools/src/main/java/opennlp/tools/ml/model/AbstractModel.java
b/opennlp-tools/src/main/java/opennlp/tools/ml/model/AbstractModel.java
index b3f26c46..fc529a5b 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/ml/model/AbstractModel.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/ml/model/AbstractModel.java
@@ -88,8 +88,7 @@ public abstract class AbstractModel implements MaxentModel {
* Return the name of the outcome corresponding to the highest likelihood
* in the parameter ocs.
*
- * @param ocs A double[] as returned by the eval(String[] context)
- * method.
+ * @param ocs A {@code double[]} as returned by the {@link #eval(String[])
method}.
* @return The name of the most likely outcome.
*/
@Override
@@ -132,7 +131,7 @@ public abstract class AbstractModel implements MaxentModel {
/**
* @param i An outcome id.
- * @return Retrieves the name of the outcome associated with that id.
+ * @return Retrieves the name of the outcome associated with {@code id}.
*/
@Override
public final String getOutcome(int i) {
@@ -160,11 +159,11 @@ public abstract class AbstractModel implements
MaxentModel {
}
/**
- * Provides the fundamental data structures which encode the maxent model
- * information. Note: This method will usually only be needed by
- * {@link opennlp.tools.ml.maxent.io.GISModelWriter GIS model writers}.
+ * Provides the fundamental data structures which encode the {@link
MaxentModel}
+ * information.
+ *
* <p>
- * The following values are held in the Object array which is returned by
this method:
+ * The following values are held in the {@code Object[]} which is returned
by this method:
* <ul>
* <li>index 0: {@link Context} array containing the model parameters.</li>
* <li>index 1: {@link Map} containing the mapping of model predicates
@@ -175,6 +174,9 @@ public abstract class AbstractModel implements MaxentModel {
* </ul>
*
* @return An {@link Object} array with the values as described above.
+ *
+ * @implNote : This method will usually only be needed by
+ * {@link opennlp.tools.ml.maxent.io.GISModelWriter GIS model writers}.
*/
public final Object[] getDataStructures() {
Object[] data = new Object[3];
diff --git
a/opennlp-tools/src/test/java/opennlp/tools/ml/maxent/quasinewton/QNMinimizerTest.java
b/opennlp-tools/src/test/java/opennlp/tools/ml/maxent/quasinewton/QNMinimizerTest.java
index 7411f090..9c2ca50a 100644
---
a/opennlp-tools/src/test/java/opennlp/tools/ml/maxent/quasinewton/QNMinimizerTest.java
+++
b/opennlp-tools/src/test/java/opennlp/tools/ml/maxent/quasinewton/QNMinimizerTest.java
@@ -68,9 +68,7 @@ public class QNMinimizerTest {
}
/**
- * <a href="https://en.wikipedia.org/wiki/Rosenbrock_function">Rosenbrock
function</a>:
- * <p>
- * {@code f(x,y) = (1-x)^2 + 100*(y-x^2)^2}
+ * For {@code f(x,y) = (1-x)^2 + 100*(y-x^2)^2}, <br/>
* {@code f(x,y)} is non-convex and has global minimum at {@code (x,y) =
(1,1)} where {@code f(x,y) = 0}.
* <p>
* with
@@ -78,6 +76,9 @@ public class QNMinimizerTest {
* <li>{@code f_x = -2*(1-x) - 400*(y-x^2)*x}</li>
* <li>{@code f_y = 200*(y-x^2)}</li>
* </ul>
+ *
+ * @see <a href="https://doi.org/10.1093/comjnl/3.3.175">An automatic method
for finding the greatest
+ * or least value of a function.</a> by H. H. Rosenbrock, in: The Computer
Journal, 3(3):175–184, 1960
*/
public static class Rosenbrock implements Function {