Github user auonhaidar commented on a diff in the pull request:

    https://github.com/apache/incubator-madlib/pull/81#discussion_r93768540
  
    --- Diff: src/ports/postgres/modules/knn/knn.sql_in ---
    @@ -0,0 +1,165 @@
    +/* ----------------------------------------------------------------------- 
*//**
    + *
    + * @file knn.sql_in
    + *
    + * @brief Set of functions for k-nearest neighbors.
    + *
    + *
    + *//* 
----------------------------------------------------------------------- */
    +
    +m4_include(`SQLCommon.m4')
    +
    +DROP TYPE IF EXISTS MADLIB_SCHEMA.knn_result CASCADE;
    +CREATE TYPE MADLIB_SCHEMA.knn_result AS (
    +    prediction float
    +);
    +DROP TYPE IF EXISTS MADLIB_SCHEMA.test_table_spec CASCADE;
    +CREATE TYPE MADLIB_SCHEMA.test_table_spec AS (
    +    id integer,
    +    vector DOUBLE PRECISION[]
    +);
    +
    +CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.__knn_validate_src(
    +rel_source VARCHAR
    +) RETURNS VOID AS $$
    +    PythonFunction(knn, knn, knn_validate_src)
    +$$ LANGUAGE plpythonu
    +m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `READS SQL DATA', `');
    +
    +
    +CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.knn(
    +    arg1 VARCHAR
    +) RETURNS VOID AS $$
    +BEGIN
    +    IF arg1 = 'help' THEN
    +   RAISE NOTICE 'You need to enter following arguments in order:
    +   Argument 1: Training data table having training features as vector 
column and labels
    +   Argument 2: Name of column having feature vectors in training data table
    +   Argument 3: Name of column having actual label/vlaue for corresponding 
feature vector in training data table
    +   Argument 4: Test data table having features as vector column. Id of 
features is mandatory
    +   Argument 5: Name of column having feature vectors in test data table
    +   Argument 6: Name of column having feature vector Ids in test data table
    +   Argument 7: Name of output table
    +   Argument 8: c for classification task, r for regression task
    +   Argument 9: value of k. Default will go as 1';
    +    END IF;
    +END;
    +$$ LANGUAGE plpgsql VOLATILE
    +m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `READS SQL DATA', `');
    +
    +CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.knn(
    +) RETURNS VOID AS $$
    +BEGIN    
    +    EXECUTE $sql$ select * from MADLIB_SCHEMA.knn('help') $sql$;
    +END;
    +$$ LANGUAGE plpgsql VOLATILE
    +m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `READS SQL DATA', `');
    +
    +
    +CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.knn(
    +    point_source VARCHAR,
    +    point_column_name VARCHAR,
    +    label_column_name VARCHAR,
    +    test_source VARCHAR,
    +    test_column_name VARCHAR,
    +    id_column_name VARCHAR,
    +    output_table VARCHAR,
    +    operation VARCHAR,
    +    k INTEGER
    +) RETURNS VARCHAR AS $$
    +DECLARE
    +    class_test_source REGCLASS;
    +    class_point_source REGCLASS;
    +    l FLOAT;
    +    id INTEGER;
    +    vector DOUBLE PRECISION[];
    +    cur_pid integer;
    +    theResult MADLIB_SCHEMA.knn_result;
    +    r MADLIB_SCHEMA.test_table_spec;
    +    oldClientMinMessages VARCHAR;
    +    returnstring VARCHAR;
    +BEGIN
    +    oldClientMinMessages :=
    +        (SELECT setting FROM pg_settings WHERE name = 
'client_min_messages');
    +    EXECUTE 'SET client_min_messages TO warning';
    +    PERFORM MADLIB_SCHEMA.__knn_validate_src(test_source);
    +    PERFORM MADLIB_SCHEMA.__knn_validate_src(point_source);
    +    class_test_source := test_source;
    +    class_point_source := point_source;
    +    --checks
    +    IF (k <= 0) THEN
    +        RAISE EXCEPTION 'KNN error: Number of neighbors k must be a 
positive integer.';
    +    END IF;
    +    IF (operation != 'c' AND operation != 'r') THEN
    +        RAISE EXCEPTION 'KNN error: put r for regression OR c for 
classification.';
    +    END IF;
    +    PERFORM MADLIB_SCHEMA.create_schema_pg_temp();
    +    
    +    EXECUTE format('DROP TABLE IF EXISTS %I',output_table);
    +    EXECUTE format('CREATE TABLE %I(%I integer, %I DOUBLE PRECISION[], 
predlabel float)',output_table,id_column_name,test_column_name);
    +   
    +    
    +    FOR r IN EXECUTE format('SELECT %I,%I FROM %I', id_column_name, 
test_column_name, test_source)
    +    LOOP
    +   cur_pid := r.id;
    +   vector := r.vector;
    +   EXECUTE
    +        $sql$
    +   DROP TABLE IF EXISTS pg_temp.knn_vector;
    --- End diff --
    
    Please elaborate on this.


---
If your project is set up for it, you can reply to this email and have your
reply appear on GitHub as well. If your project does not have this feature
enabled and wishes so, or if the feature is enabled but not working, please
contact infrastructure at infrastruct...@apache.org or file a JIRA ticket
with INFRA.
---

Reply via email to