fmcquillan99 edited a comment on issue #427: KNN: Add distances to the output table URL: https://github.com/apache/madlib/pull/427#issuecomment-521035013 ``` DROP TABLE IF EXISTS knn_result_classification_unsup; SELECT * FROM madlib.knn( 'knn_train_data', -- Table of training data 'data', -- Col name of training data 'id', -- Col name of id in train data NULL, -- NULL training labels means just list neighbors 'knn_train_data', -- Table of test data (same as training data) 'data', -- Col name of test data 'id', -- Col name of id in test data 'knn_result_classification_unsup', -- Output table 3, -- Number of nearest neighbors True, -- True to list nearest-neighbors by id 'madlib.squared_dist_norm2' -- Distance function ); SELECT * from knn_result_classification_unsup ORDER BY id; ``` produces ``` id | data | k_nearest_neighbours | distance ----+---------+----------------------+--------------- 1 | {1,1} | {1,2,3} | {0,2,8} 2 | {2,2} | {2,3,1} | {0,2,2} 3 | {3,3} | {3,2,4} | {0,2,2} 4 | {4,4} | {4,5,3} | {0,1,2} 5 | {4,5} | {5,4,3} | {0,1,5} 6 | {20,50} | {6,7,5} | {0,461,2281} 7 | {10,31} | {7,6,5} | {0,461,712} 8 | {81,13} | {8,6,7} | {0,5090,5365} 9 | {1,111} | {9,6,7} | {0,4082,6481} (9 rows) ``` ``` CREATE OR REPLACE FUNCTION chebychev_distance (x double precision[], y double precision[]) RETURNS double precision AS $$ from scipy.spatial import distance return distance.chebyshev(x, y) $$ LANGUAGE plpythonu; ``` ``` DROP TABLE IF EXISTS knn_result_classification_udf; SELECT * FROM madlib.knn( 'knn_train_data', -- Table of training data 'data', -- Col name of training data 'id', -- Col name of id in train data 'label', -- Training labels 'knn_test_data', -- Table of test data 'data', -- Col name of test data 'id', -- Col name of id in test data 'knn_result_classification_udf', -- Output table 3, -- Number of nearest neighbors True, -- True to list nearest-neighbors by id 'chebychev_distance' -- Distance function ); SELECT * from knn_result_classification_udf ORDER BY id; ``` produces ``` id | data | prediction | k_nearest_neighbours | distance ----+---------+------------+----------------------+------------ 1 | {2,1} | 1 | {1,2,3} | {1,1,2} 2 | {2,6} | 1 | {5,4,3} | {2,2,3} 3 | {15,40} | 0 | {7,6,5} | {9,10,35} 4 | {12,1} | 1 | {5,4,3} | {8,8,9} 5 | {2,90} | 0 | {9,6,7} | {21,40,59} 6 | {50,45} | 0 | {6,8,7} | {30,32,40} (6 rows) ``` LGTM
---------------------------------------------------------------- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: [email protected] With regards, Apache Git Services
