lindong28 commented on a change in pull request #24:
URL: https://github.com/apache/flink-ml/pull/24#discussion_r747312484



##########
File path: 
flink-ml-lib/src/main/java/org/apache/flink/ml/algo/batch/knn/KnnTrainBatchOp.java
##########
@@ -0,0 +1,230 @@
+package org.apache.flink.ml.algo.batch.knn;
+
+import org.apache.flink.api.common.functions.MapFunction;
+import org.apache.flink.api.common.functions.RichMapPartitionFunction;
+import org.apache.flink.api.common.typeinfo.TypeInformation;
+import org.apache.flink.api.common.typeinfo.Types;
+import org.apache.flink.api.java.typeutils.RowTypeInfo;
+import org.apache.flink.configuration.Configuration;
+import org.apache.flink.ml.algo.batch.knn.distance.BaseFastDistance;
+import org.apache.flink.ml.algo.batch.knn.distance.BaseFastDistanceData;
+import org.apache.flink.ml.algo.batch.knn.distance.FastDistanceMatrixData;
+import org.apache.flink.ml.algo.batch.knn.distance.FastDistanceSparseData;
+import org.apache.flink.ml.algo.batch.knn.distance.FastDistanceVectorData;
+import org.apache.flink.ml.common.BatchOperator;
+import org.apache.flink.ml.common.MapPartitionFunctionWrapper;
+import org.apache.flink.ml.common.linalg.DenseVector;
+import org.apache.flink.ml.common.linalg.VectorUtil;
+import org.apache.flink.ml.param.Param;
+import org.apache.flink.ml.param.StringParam;
+import org.apache.flink.ml.params.knn.HasKnnDistanceType;
+import org.apache.flink.ml.params.knn.KnnTrainParams;
+import org.apache.flink.streaming.api.datastream.DataStream;
+import org.apache.flink.table.api.Table;
+import org.apache.flink.table.api.bridge.java.StreamTableEnvironment;
+import org.apache.flink.table.api.internal.TableImpl;
+import org.apache.flink.table.catalog.ResolvedSchema;
+import org.apache.flink.table.types.DataType;
+import org.apache.flink.types.Row;
+import org.apache.flink.util.Collector;
+import org.apache.flink.util.Preconditions;
+
+import java.io.IOException;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+
+import static 
org.apache.flink.ml.algo.batch.knn.distance.BaseFastDistanceData.pGson;
+
+/**
+ * KNN is to classify unlabeled observations by assigning them to the class of 
the most similar
+ * labeled examples. Note that though there is no ``training process`` in KNN, 
we create a ``fake
+ * one`` to use in pipeline model. In this operator, we do some preparation to 
speed up the
+ * inference process.
+ */
+public final class KnnTrainBatchOp extends BatchOperator<KnnTrainBatchOp>

Review comment:
       If we just keep `KnnTrainBatchOp` and have it extend the Estimator API, 
we can reduce the total number of classes and still tell algo developer the 
information we want, right?




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


Reply via email to