Github user iyerr3 commented on a diff in the pull request:
https://github.com/apache/incubator-madlib/pull/168#discussion_r133629865
--- Diff: src/ports/postgres/modules/knn/knn.py_in ---
@@ -127,5 +124,102 @@ def knn_validate_src(schema_madlib, point_source,
point_column_name, label_colum
"Data type '{0}' is not a valid type for column '{1}'
in table '{2}'.".format(colType, id_column_name, test_source))
return k
-# ----------------------------------------------------------------------
-m4_changequote(<!`!>, <!'!>)
+
+
+
+
+def knn(schema_madlib, point_source, point_column_name, label_column_name,
+ test_source, test_column_name, id_column_name, output_table,
operation, k):
+
+ """
+ KNN function to find the K Nearest neighbours
+ Args:
+ @param schema_madlib Name of the Madlib Schema
+ @param point_source Training data table
+ @param point_column_name Name of the column with training
data points.
+ @param label_column_name Name of the column with
labels/values of training data points.
+ @param test_source Name of the table containing the
test data points.
+ @param test_column_name Name of the column with testing
data points.
+ @param id_column_name Name of the column having ids of
data points in test data table.
+ @param output_table Name of the table to store final
results.
+ @param k default: 1. Number of nearest
neighbors to consider
+
+
+ Returns:
+ VARCHAR Name of the output table.
+ """
+
+
+ oldClientMinMessages = plpy.execute("SELECT setting FROM pg_settings
WHERE name = 'client_min_messages'")[0]['setting'];
+
+ plpy.execute("SET client_min_messages TO warning");
+
+
+ k_val = knn_validate_src(schema_madlib, point_source,
point_column_name,
+ label_column_name, test_source,
+ test_column_name, id_column_name,
+ output_table, operation, k)
+
+
+ plpy.execute("SELECT
{schema_madlib}.create_schema_pg_temp()".format(schema_madlib = schema_madlib));
+
+ x_temp_table = unique_string(desp='x_temp_table')
+ y_temp_table = unique_string(desp='y_temp_table')
+ label_column_name_unique =
unique_string(desp='label_column_name_unique')
+ test_id = unique_string(desp='test_id')
+
+ convert_boolean_to_int = '';
+ if operation == 'c':
+ convert_boolean_to_int = '::INTEGER';
+
+ madlib_knn_interm = unique_string(desp='madlib_knn_interm')
+
+ plpy.execute("""DROP TABLE IF EXISTS
pg_temp.{madlib_knn_interm}""".format(**locals()));
+ plpy.execute(
+ """
+ CREATE TEMP TABLE pg_temp.{madlib_knn_interm} AS
+ SELECT *
+ FROM
+ (
+ SELECT row_number() over (partition by {test_id} order by dist)
AS r , {x_temp_table}.*
+ FROM
+ (
+ SELECT test.{id_column_name} AS {test_id} ,
{schema_madlib}.squared_dist_norm2(train.{point_column_name}
,test.{test_column_name}) AS dist, train.{label_column_name}
{convert_boolean_to_int} AS {label_column_name_unique}
+ FROM {point_source} AS train, {test_source} AS test
+ ) {x_temp_table}
+ ){y_temp_table}
+ WHERE {y_temp_table}.r <= {k_val}""".format(**locals()));
+
+ if operation == 'c':
+ plpy.execute(
+ """
+ CREATE TABLE {output_table} AS
+ SELECT {test_id} AS id, {test_column_name} ,
{schema_madlib}.mode({label_column_name_unique}) AS prediction
+ FROM pg_temp.{madlib_knn_interm} join {test_source} ON
{test_id} = {id_column_name}
+ GROUP BY {test_id} , {test_column_name}""".format(**locals()))
+
+
+ else:
+ plpy.execute(
+ """
+ CREATE TABLE {output_table} AS
+ SELECT {test_id} AS id, {test_column_name} , avg(
{label_column_name_unique} ) AS prediction
+ FROM
+ pg_temp.{madlib_knn_interm} join {test_source} on {test_id}
={id_column_name}
+ GROUP BY {test_id} , {test_column_name}
+ ORDER BY {test_id}""".format(**locals()))
+
+
+ plpy.execute("SET client_min_messages TO "+ oldClientMinMessages)
+
+ if operation == 'c':
+ returnstring = 'The classification results have been written to
output table '+ output_table;
+ else:
+ returnstring = 'The regression results have been written to output
table '+ output_table;
+
+ plpy.execute("""DROP TABLE
pg_temp.{madlib_knn_interm}""".format(**locals()));
+
+ return returnstring;
+
+
--- End diff --
In general, file contains multiple style-guide and PEP8 issues. Please
refer to [page in wiki for appropriate
style](https://cwiki.apache.org/confluence/display/MADLIB/Python+Style+Guide).
---
If your project is set up for it, you can reply to this email and have your
reply appear on GitHub as well. If your project does not have this feature
enabled and wishes so, or if the feature is enabled but not working, please
contact infrastructure at [email protected] or file a JIRA ticket
with INFRA.
---