IGNITE-9713: [ML] Fix JavaDocs in ML Prerpocessing package this closes #4867
Project: http://git-wip-us.apache.org/repos/asf/ignite/repo Commit: http://git-wip-us.apache.org/repos/asf/ignite/commit/a373486f Tree: http://git-wip-us.apache.org/repos/asf/ignite/tree/a373486f Diff: http://git-wip-us.apache.org/repos/asf/ignite/diff/a373486f Branch: refs/heads/ignite-5797 Commit: a373486f817e9c40e2eb0748b28c1d7df14e4203 Parents: f7f834b Author: zaleslaw <[email protected]> Authored: Fri Sep 28 16:01:35 2018 +0300 Committer: Yury Babak <[email protected]> Committed: Fri Sep 28 16:01:35 2018 +0300 ---------------------------------------------------------------------- .../ignite/ml/knn/ann/ANNClassificationModel.java | 2 +- .../org/apache/ignite/ml/knn/ann/ANNModelFormat.java | 2 +- .../preprocessing/encoding/EncoderPreprocessor.java | 2 ++ .../ml/preprocessing/encoding/EncoderTrainer.java | 4 +--- .../stringencoder/StringEncoderPreprocessor.java | 14 ++++++++++++++ 5 files changed, 19 insertions(+), 5 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/ignite/blob/a373486f/modules/ml/src/main/java/org/apache/ignite/ml/knn/ann/ANNClassificationModel.java ---------------------------------------------------------------------- diff --git a/modules/ml/src/main/java/org/apache/ignite/ml/knn/ann/ANNClassificationModel.java b/modules/ml/src/main/java/org/apache/ignite/ml/knn/ann/ANNClassificationModel.java index bec82a9..6ef3990 100644 --- a/modules/ml/src/main/java/org/apache/ignite/ml/knn/ann/ANNClassificationModel.java +++ b/modules/ml/src/main/java/org/apache/ignite/ml/knn/ann/ANNClassificationModel.java @@ -50,7 +50,7 @@ public class ANNClassificationModel extends NNClassificationModel { /** * Build the model based on a candidates set. * @param centers The candidates set. - * @param centroindsStat + * @param centroindsStat The stat about centroids. */ public ANNClassificationModel(LabeledVectorSet<ProbableLabel, LabeledVector> centers, ANNClassificationTrainer.CentroidStat centroindsStat) { http://git-wip-us.apache.org/repos/asf/ignite/blob/a373486f/modules/ml/src/main/java/org/apache/ignite/ml/knn/ann/ANNModelFormat.java ---------------------------------------------------------------------- diff --git a/modules/ml/src/main/java/org/apache/ignite/ml/knn/ann/ANNModelFormat.java b/modules/ml/src/main/java/org/apache/ignite/ml/knn/ann/ANNModelFormat.java index be09828..e0c2c3a 100644 --- a/modules/ml/src/main/java/org/apache/ignite/ml/knn/ann/ANNModelFormat.java +++ b/modules/ml/src/main/java/org/apache/ignite/ml/knn/ann/ANNModelFormat.java @@ -41,7 +41,7 @@ public class ANNModelFormat extends KNNModelFormat implements Serializable { * @param k Amount of nearest neighbors. * @param measure Distance measure. * @param stgy kNN strategy. - * @param candidatesStat + * @param candidatesStat The stat about candidates. */ public ANNModelFormat(int k, DistanceMeasure measure, http://git-wip-us.apache.org/repos/asf/ignite/blob/a373486f/modules/ml/src/main/java/org/apache/ignite/ml/preprocessing/encoding/EncoderPreprocessor.java ---------------------------------------------------------------------- diff --git a/modules/ml/src/main/java/org/apache/ignite/ml/preprocessing/encoding/EncoderPreprocessor.java b/modules/ml/src/main/java/org/apache/ignite/ml/preprocessing/encoding/EncoderPreprocessor.java index 7df44f3..5c60da9 100644 --- a/modules/ml/src/main/java/org/apache/ignite/ml/preprocessing/encoding/EncoderPreprocessor.java +++ b/modules/ml/src/main/java/org/apache/ignite/ml/preprocessing/encoding/EncoderPreprocessor.java @@ -25,6 +25,8 @@ import org.apache.ignite.ml.math.primitives.vector.Vector; /** * Preprocessing function that makes encoding. * + * This a base abstract class that keeps the common fields for all child encoding preprocessors. + * * @param <K> Type of a key in {@code upstream} data. * @param <V> Type of a value in {@code upstream} data. */ http://git-wip-us.apache.org/repos/asf/ignite/blob/a373486f/modules/ml/src/main/java/org/apache/ignite/ml/preprocessing/encoding/EncoderTrainer.java ---------------------------------------------------------------------- diff --git a/modules/ml/src/main/java/org/apache/ignite/ml/preprocessing/encoding/EncoderTrainer.java b/modules/ml/src/main/java/org/apache/ignite/ml/preprocessing/encoding/EncoderTrainer.java index 8b2d9b7..9a97a6d 100644 --- a/modules/ml/src/main/java/org/apache/ignite/ml/preprocessing/encoding/EncoderTrainer.java +++ b/modules/ml/src/main/java/org/apache/ignite/ml/preprocessing/encoding/EncoderTrainer.java @@ -35,9 +35,7 @@ import org.apache.ignite.ml.preprocessing.encoding.stringencoder.StringEncoderPr import org.jetbrains.annotations.NotNull; /** - * Trainer of the String Encoder preprocessor. - * The String Encoder encodes string values (categories) to double values in range [0.0, amountOfCategories) - * where the most popular value will be presented as 0.0 and the least popular value presented with amountOfCategories-1 value. + * Trainer of the String Encoder and One-Hot Encoder preprocessors. * * @param <K> Type of a key in {@code upstream} data. * @param <V> Type of a value in {@code upstream} data. http://git-wip-us.apache.org/repos/asf/ignite/blob/a373486f/modules/ml/src/main/java/org/apache/ignite/ml/preprocessing/encoding/stringencoder/StringEncoderPreprocessor.java ---------------------------------------------------------------------- diff --git a/modules/ml/src/main/java/org/apache/ignite/ml/preprocessing/encoding/stringencoder/StringEncoderPreprocessor.java b/modules/ml/src/main/java/org/apache/ignite/ml/preprocessing/encoding/stringencoder/StringEncoderPreprocessor.java index 12f98f6..c2474ef 100644 --- a/modules/ml/src/main/java/org/apache/ignite/ml/preprocessing/encoding/stringencoder/StringEncoderPreprocessor.java +++ b/modules/ml/src/main/java/org/apache/ignite/ml/preprocessing/encoding/stringencoder/StringEncoderPreprocessor.java @@ -28,6 +28,20 @@ import org.apache.ignite.ml.preprocessing.encoding.EncoderPreprocessor; /** * Preprocessing function that makes String encoding. * + * The String Encoder Preprocessor encodes string values (categories) to double values + * in range [0.0, amountOfCategories), where the most popular value will be presented as 0.0 and + * the least popular value presented with amountOfCategories-1 value. + * <p> + * This preprocessor can transform multiple columns which indices are handled during training process. These indexes could be defined via .withEncodedFeature(featureIndex) call. + * </p> + * <p> + * NOTE: it doesnât add new column but change data in-place. + *</p> + * <p> + * There is only a one strategy regarding how StringEncoder will handle unseen labels + * when you have fit a StringEncoder on one dataset and then use it to transform another: + * put unseen labels in a special additional bucket, at index is equal amountOfCategories. + * </p> * @param <K> Type of a key in {@code upstream} data. * @param <V> Type of a value in {@code upstream} data. */
