This is an automated email from the ASF dual-hosted git repository. nkak pushed a commit to branch madlib2-master in repository https://gitbox.apache.org/repos/asf/madlib.git
commit 8a85d8e054da4014c36137097e00315c43f930e3 Author: Nikhil Kak <n...@vmware.com> AuthorDate: Fri Feb 16 13:34:15 2024 -0800 PMML: Add dev-check tests for pmml export JIRA: MADLIB-1517 This commit introduces a new function named `test_pmml_output` which can be used to test the output of the pmml function. It compares the output of madlib's predict function with pypmml's predict function * Added a pmml setup file that contains all the common datasets and functions used by the pmml dev-check tests * Added exhaustive tests to glm, linear and logistic. A future commit will add more tests to other pmml modules --- .../{pmml_glm_gamma.sql_in => pmml.setup.sql_in} | 322 +++++++++++++++++---- .../modules/pmml/test/pmml_check_fields.sql_in | 150 ++++++++++ .../modules/pmml/test/pmml_glm_binomial.sql_in | 178 ++++++------ .../modules/pmml/test/pmml_glm_gamma.sql_in | 145 ++++------ .../postgres/modules/pmml/test/pmml_glm_ig.sql_in | 128 ++++---- .../modules/pmml/test/pmml_glm_normal.sql_in | 171 ++++++----- .../modules/pmml/test/pmml_glm_poisson.sql_in | 118 ++++---- .../pmml/test/pmml_glm_with_grouping.sql_in | 125 ++++++++ .../pmml/test/pmml_glm_with_name_spec.sql_in | 160 ++++++++++ .../postgres/modules/pmml/test/pmml_linear.sql_in | 91 ++++++ .../modules/pmml/test/pmml_logistic.sql_in | 159 ++++++++++ .../modules/pmml/test/pmml_multinom.sql_in | 25 +- .../test/pmml_with_non_array_expression.sql_in | 94 ++++++ .../modules/pmml/test/table_to_pmml.sql_in | 197 ------------- 14 files changed, 1406 insertions(+), 657 deletions(-) diff --git a/src/ports/postgres/modules/pmml/test/pmml_glm_gamma.sql_in b/src/ports/postgres/modules/pmml/test/pmml.setup.sql_in similarity index 50% copy from src/ports/postgres/modules/pmml/test/pmml_glm_gamma.sql_in copy to src/ports/postgres/modules/pmml/test/pmml.setup.sql_in index e8b56066..dad6a03f 100644 --- a/src/ports/postgres/modules/pmml/test/pmml_glm_gamma.sql_in +++ b/src/ports/postgres/modules/pmml/test/pmml.setup.sql_in @@ -1,5 +1,55 @@ -DROP TABLE IF EXISTS abalone CASCADE; +/* ----------------------------------------------------------------------- *//** + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * + *//* ----------------------------------------------------------------------- */ + + DROP TABLE IF EXISTS houses CASCADE; +CREATE TABLE houses ( + id SERIAL NOT NULL, + tax INTEGER, + bedroom REAL, + bath REAL, + price INTEGER, + size INTEGER, + lot INTEGER, + g1 CHAR(1), + g2 CHAR(1) +); +INSERT INTO houses(tax, bedroom, bath, price, size, lot, g1, g2) VALUES +( 590, 2, 1, 50000, 770, 22100, 'A', 'z'), +(1050, 3, 2, 85000, 1410, 12000, 'A', 'y'), +(20, 3, 1, 22500, 1060, 3500 , 'A', 'y'), +( 870, 2, 2, 90000, 1300, 17500, 'A', 'y'), +(1320, 3, 2, 133000, 1500, 30000, 'A', 'y'), +(1350, 2, 1, 90500, 820, 25700, 'A', 'x'), +(2790, 3, 2.5, 260000, 2130, 25000, 'A', 'x'), +( 680, 2, 1, 142500, 1170, 22000, 'A', 'x'), +(1840, 3, 2, 160000, 1500, 19000, 'B', 'x'), +(3680, 4, 2, 240000, 2790, 20000, 'B', 'y'), +(1660, 3, 1, 87000, 1030, 17500, 'B', 'y'), +(1620, 3, 2, 118600, 1250, 20000, 'A', 'y'), +(3100, 3, 2, 140000, 1760, 38000, 'B', 'y'), +(2070, 2, 3, 148000, 1550, 14000, 'B', 'y'), +( 650, 3, 1.5, 65000, 1450, 12000, 'B', 'y'); + +DROP TABLE IF EXISTS abalone CASCADE; CREATE TABLE abalone ( id integer, sex text, @@ -76,88 +126,238 @@ INSERT INTO abalone VALUES (3759, 'I', 0.525000000000000022, 0.400000000000000022, 0.140000000000000013, 0.605500000000000038, 0.260500000000000009, 0.107999999999999999, 0.209999999999999992, 9); -DROP TABLE IF EXISTS abalone_out_inverse, abalone_out_inverse_summary; -SELECT glm( - 'abalone', - 'abalone_out_inverse', - 'rings', - 'ARRAY[1, length, diameter, height, whole, shucked, viscera, shell]', - 'family=gamma, link=inverse', NULL, 'max_iter=1000, tolerance=1e-16' +DROP TABLE IF EXISTS warpbreaks CASCADE; +CREATE TABLE warpbreaks( + id serial, + breaks integer, + wool char(1), + tension char(1), + g char(1) ); -DROP TABLE IF EXISTS abalone_out_identity, abalone_out_identity_summary; -SELECT glm( - 'abalone', - 'abalone_out_identity', - 'rings', - 'ARRAY[1, length, diameter, height, whole, shucked, viscera, shell]', - 'family=gamma, link=identity', NULL, 'max_iter=1000, tolerance=1e-16' -); +INSERT INTO warpbreaks(breaks, wool, tension, g) VALUES +(26, 'A', 'L', '1'), +(30, 'A', 'L', '1'), +(54, 'A', 'L', '1'), +(25, 'A', 'L', '1'), +(70, 'A', 'L', '1'), +(52, 'A', 'L', '1'), +(51, 'A', 'L', '1'), +(26, 'A', 'L', '1'), +(67, 'A', 'L', '1'), +(18, 'A', 'M', '1'), +(21, 'A', 'M', '1'), +(29, 'A', 'M', '1'), +(17, 'A', 'M', '1'), +(12, 'A', 'M', '1'), +(18, 'A', 'M', '1'), +(35, 'A', 'M', '1'), +(30, 'A', 'M', '1'), +(36, 'A', 'M', '1'), +(36, 'A', 'H', '0'), +(21, 'A', 'H', '0'), +(24, 'A', 'H', '0'), +(18, 'A', 'H', '0'), +(10, 'A', 'H', '0'), +(43, 'A', 'H', '0'), +(28, 'A', 'H', '0'), +(15, 'A', 'H', '0'), +(26, 'A', 'H', '0'), +(27, 'B', 'L', '0'), +(14, 'B', 'L', '0'), +(29, 'B', 'L', '0'), +(19, 'B', 'L', '0'), +(29, 'B', 'L', '0'), +(31, 'B', 'L', '0'), +(41, 'B', 'L', '0'), +(20, 'B', 'L', '1'), +(44, 'B', 'L', '1'), +(42, 'B', 'M', '1'), +(26, 'B', 'M', '1'), +(19, 'B', 'M', '1'), +(16, 'B', 'M', '1'), +(39, 'B', 'M', '1'), +(28, 'B', 'M', '1'), +(21, 'B', 'M', '1'), +(39, 'B', 'M', '1'), +(29, 'B', 'M', '1'), +(20, 'B', 'H', '1'), +(21, 'B', 'H', '1'), +(24, 'B', 'H', '1'), +(17, 'B', 'H', '1'), +(13, 'B', 'H', '1'), +(15, 'B', 'H', '1'), +(15, 'B', 'H', '1'), +(16, 'B', 'H', '1'), +(28, 'B', 'H', '1'); -DROP TABLE IF EXISTS abalone_out_log, abalone_out_log_summary; -SELECT glm( - 'abalone', - 'abalone_out_log', - 'rings', - 'ARRAY[1, length, diameter, height, whole, shucked, viscera, shell]', - 'family=gamma, link=log', NULL, 'max_iter=1000, tolerance=1e-16' -); +DROP TABLE IF EXISTS warpbreaks_dummy; +SELECT create_indicator_variables('warpbreaks', 'warpbreaks_dummy', 'wool,tension'); -DROP TABLE IF EXISTS abalone_out_log_grp, abalone_out_log_grp_summary; -SELECT glm( - 'abalone', - 'abalone_out_log_grp', - 'rings', - 'ARRAY[1, length, diameter, height, whole, shucked, viscera, shell]', - 'family=gamma, link=log', 'sex', 'max_iter=1000, tolerance=1e-16' +DROP TABLE IF EXISTS "Patients"; +CREATE TABLE "Patients" ( + id integer NOT NULL, + "Second_attack" integer, + "Treatment" integer, + trait_anxiety integer, + g1 char(1), + g2 char(1) ); +INSERT INTO "Patients"(ID, "Second_attack", "Treatment", trait_anxiety, g1, g2) VALUES +( 1, 1, 1, 70, 'A', 'x'), +( 2, 1, 1, 80, 'A', 'y'), +( 3, 1, 1, 50, 'A', 'y'), +( 4, 1, 0, 60, 'A', 'y'), +( 5, 1, 0, 40, 'A', 'y'), +( 6, 1, 0, 65, 'A', 'x'), +( 7, 1, 0, 75, 'A', 'x'), +( 8, 1, 0, 80, 'B', 'x'), +( 9, 1, 0, 70, 'A', 'x'), +(10, 1, 0, 60, 'A', 'z'), +(11, 0, 1, 65, 'A', 'z'), +(12, 0, 1, 50, 'B', 'y'), +(13, 0, 1, 45, 'A', 'y'), +(14, 0, 1, 35, 'A', 'z'), +(15, 0, 1, 40, 'A', 'z'), +(16, 0, 1, 50, 'A', 'z'), +(17, 0, 0, 55, 'B', 'z'), +(18, 0, 0, 45, 'B', 'z'), +(19, 0, 0, 50, 'B', 'z'), +(20, 0, 0, 60, 'B', 'z'); +/* ----------------------------------------------------------------------- *//** + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * + *//* ----------------------------------------------------------------------- */ + +/** + * @brief Compares the madlib prediction output with pypmml prediction output. + * Assumes that the caller has already run madlib's predict function on the test table + * Creates a dictionary of the madlib results by querying the madlib_predict_table + * Calls the pmml function on the trained madlib table + * Loops through all the rows of the test table and FOR EACH ROW: + ** Gets the madlib prediction from the dict created above + ** Runs pypmml predict + ** Compares madlib and pypmml results + ** Throws an error if the values don't match + * @param test_table The test table which was used for madlib prediction. This table will be used for pmml prediction + * @param madlib_train_table The trained model table + * @param madlib_predict_table The madlib prediction table + * @param id_col id column in the test table used for uniquely identifying the rows for comparison + * @param madlib_prediction_metric Name of the column in madlib prediction table that contains the value to compare + * @param pypmml_prediction_metric Name of the key in pmml prediction dict that contains the value to compare + * @param grouping_col grouping col used during training + * @param name_spec name_spec to be passed to the madlib pmml function + * @returns + * + */ +CREATE OR REPLACE FUNCTION test_pmml_output(test_table TEXT, madlib_train_table TEXT, madlib_predict_table TEXT, + id_col TEXT, madlib_prediction_metric TEXT, pypmml_prediction_metric TEXT, grouping_col TEXT, name_spec TEXT) returns VOID as $$ + # This function returns a key so that we can compare the results from the madlib prediction table with the pmml + # result that gets run on the test table + # The key of this dict is either just the 'id' or a combination of 'id' and all the grouping cold + def get_unique_key(input): + unique_key = [str(input[id_col])] + if grouping_col != '': + cols = grouping_col.split(',') + for col in cols: + unique_key.append(str(input[col])) + return ','.join(unique_key) + + def are_results_different(madlib_result, pypmml_result): + from math import isnan, isinf + if isinstance(madlib_result, str): + return str(pypmml_result) != str(madlib_result) + elif isinstance(madlib_result, float) or isinstance(madlib_result, int): + if isinf(float(madlib_result)) or isnan(float(madlib_result)): + return True + if isinf(float(pypmml_result)) or isnan(float(pypmml_result)): + return True + tol = 1e-6 + return abs(pypmml_result - madlib_result) > tol + return True -SELECT pmml('abalone_out_inverse'); -SELECT pmml('abalone_out_log'); -SELECT pmml('abalone_out_identity'); -SELECT pmml('abalone_out_log_grp'); + from pypmml import Model -CREATE OR REPLACE FUNCTION count_expected_names(formula varchar, expected varchar[]) + madlib_predict_output_table = plpy.execute("SELECT * from {}".format(madlib_predict_table)) + madlib_predict_output = {} + for madlib_pred in madlib_predict_output_table: + madlib_predict_output[get_unique_key(madlib_pred)] = madlib_pred[madlib_prediction_metric] + + #get madlib pmml output string + if name_spec == '': + pmml_query = "SELECT pmml('{}')".format(madlib_train_table) + else: + pmml_query = "SELECT pmml('{}','{}')".format(madlib_train_table, name_spec) + madlib_pmml_str = plpy.execute(pmml_query)[0]["pmml"] + + # load pypmml model using madlib pmml string + pypmml_model = Model.fromString(madlib_pmml_str) + + # load data and run pypmml predict + test_data = plpy.execute("SELECT * from {}".format(test_table)) + for data in test_data: + madlib_result = madlib_predict_output[get_unique_key(data)] + pypmml_prediction = pypmml_model.predict(data) + if pypmml_prediction_metric not in pypmml_prediction: + plpy.error("Metric: '{}' does not exist in pypmml output: {}".format(pypmml_prediction_metric, pypmml_prediction)) + pypmml_result = pypmml_prediction[pypmml_prediction_metric] + if are_results_different(madlib_result, pypmml_result): + plpy.info(madlib_pmml_str) + plpy.error("pmml comparison failed. input row: {}, metric to compare: {}, madlib result: {}, pypmml result: {}, detailed madlib output: {}, detailed pypmml output: {}".format(data, pypmml_prediction_metric, madlib_result, pypmml_result, madlib_predict_output, pypmml_prediction)) +$$ language plpython3u; + +CREATE OR REPLACE FUNCTION test_pmml_output(test_table TEXT, madlib_train_table TEXT, madlib_predict_table TEXT, + id_col TEXT, madlib_prediction_metric TEXT, pypmml_prediction_metric TEXT, grouping_col TEXT) returns VOID as $$ + SELECT test_pmml_output(test_table, madlib_train_table, madlib_predict_table, id_col, madlib_prediction_metric, pypmml_prediction_metric, grouping_col, '') +$$ language sql; + +CREATE OR REPLACE FUNCTION test_pmml_output(test_table TEXT, madlib_train_table TEXT, madlib_predict_table TEXT, + id_col TEXT, madlib_prediction_metric TEXT, pypmml_prediction_metric TEXT) returns VOID as $$ + SELECT test_pmml_output(test_table, madlib_train_table, madlib_predict_table, id_col, madlib_prediction_metric, pypmml_prediction_metric, '', '') +$$ language sql; + + +CREATE OR REPLACE FUNCTION count_expected_names(madlib_train_table varchar, formula varchar, expected varchar[]) RETURNS bigint AS $$ SELECT count(distinct result.name) FROM (SELECT text(unnest(xpath('/n:PMML/n:DataDictionary/n:DataField/@name', pmml_col, ARRAY[ARRAY['n', 'http://www.dmg.org/PMML-4_1']]))) AS name - FROM (SELECT CASE WHEN $1 is NULL THEN pmml('abalone_out_inverse') - ELSE pmml('abalone_out_inverse', $1) - END AS pmml_col) abalone_out_inverse_pmml + FROM (SELECT CASE WHEN $2 is NULL THEN pmml(madlib_train_table) + ELSE pmml(madlib_train_table, $2) + END AS pmml_col) pmml_output ) result, - (SELECT unnest($2) AS name) expected + (SELECT unnest($3) AS name) expected WHERE expected.name = result.name; $$ LANGUAGE sql; -CREATE OR REPLACE FUNCTION count_expected_names(name_spec varchar[], expected varchar[]) +CREATE OR REPLACE FUNCTION count_expected_names(madlib_train_table varchar, name_spec varchar[], expected varchar[]) RETURNS bigint AS $$ SELECT count(distinct result.name) FROM (SELECT text(unnest(xpath('/n:PMML/n:DataDictionary/n:DataField/@name', pmml_col, ARRAY[ARRAY['n', 'http://www.dmg.org/PMML-4_1']]))) AS name - FROM (SELECT CASE WHEN $1 is NULL THEN pmml('abalone_out_inverse') - ELSE pmml('abalone_out_inverse', $1) - END AS pmml_col) abalone_out_inverse_pmml + FROM (SELECT CASE WHEN $2 is NULL THEN pmml(madlib_train_table) + ELSE pmml(madlib_train_table, $2) + END AS pmml_col) pmml_output ) result, - (SELECT unnest($2) AS name) expected + (SELECT unnest($3) AS name) expected WHERE expected.name = result.name; $$ LANGUAGE sql; - -SELECT assert( - count_expected_names( - NULL, - ARRAY['rings_pmml_prediction', '1', 'length', 'diameter', 'height', 'whole', 'shucked', 'viscera', 'shell'] - ) = 9, - 'default case'); - -SELECT assert( - count_expected_names( - 'rings ~ 1 + length + diameter + height', - ARRAY['rings', '1', 'length', 'diameter', 'height'] - ) = 5, - text(pmml('abalone_out_inverse', - 'rings ~ 1 + length + diameter + height'))); - diff --git a/src/ports/postgres/modules/pmml/test/pmml_check_fields.sql_in b/src/ports/postgres/modules/pmml/test/pmml_check_fields.sql_in new file mode 100644 index 00000000..8381398a --- /dev/null +++ b/src/ports/postgres/modules/pmml/test/pmml_check_fields.sql_in @@ -0,0 +1,150 @@ +/* ----------------------------------------------------------------------- *//** + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * + *//* ----------------------------------------------------------------------- */ + + \i m4_regexp(MADLIB_LIBRARY_PATH, `\(.*\)/lib', + `\1/../modules/pmml/test/pmml.setup.sql_in' +) + +m4_changequote(`<!'', `!>'') + +--------------------- linear regression ----------------------- +DROP TABLE IF EXISTS linregr_model, linregr_model_summary; +SELECT linregr_train( + 'houses', + 'linregr_model', + 'price', + 'array[1, bedroom, bath, size]', + 'g1, g2' +); + +SELECT assert( + count_expected_names('linregr_model', + NULL, + ARRAY['price_pmml_prediction', 'bedroom', 'bath', 'size', 'g1', 'g2'] + ) = 6, + text(pmml('linregr_model'))); + +SELECT assert( + count_expected_names('linregr_model', + 'c.price ~ c.bedroom + c.bath + c.size', + ARRAY['c.price', 'c.bedroom', 'c.bath', 'c.size'] + ) = 4, + text(pmml('linregr_model', + 'c.price ~ c.bedroom + c.bath + c.size'))); + +SELECT assert( + count_expected_names('linregr_model', + '{c.price, c.bedroom, c.bath, c.size}'::varchar[], + ARRAY['c.price', 'c.bedroom', 'c.bath', 'c.size'] + ) = 4, + text(pmml('linregr_model', + 'c.price ~ c.bedroom + c.bath + c.size'))); + +SELECT assert( + count_expected_names('linregr_model', + '{c.price, c.bedroom, c.bath, c.size}', + ARRAY['c.price', 'c.bedroom', 'c.bath', 'c.size'] + ) = 4, + text(pmml('linregr_model', + 'c.price ~ c.bedroom + c.bath + c.size'))); + +SELECT assert( + count_expected_names('linregr_model', + 'c.bedroom + c.bath + c.size', + ARRAY['price', 'c.bedroom', 'c.bath', 'c.size'] + ) = 4, + text(pmml('linregr_model', + 'c.bedroom + c.bath + c.size'))); + +SELECT assert( + count_expected_names('linregr_model', + 'c.bedroom, c.bath, c.size', + ARRAY['price', 'c.bedroom', 'c.bath', 'c.size'] + ) = 4, + text(pmml('linregr_model', + 'c.bedroom, c.bath, c.size'))); + +--------------------- logistic regression ----------------------- +DROP TABLE IF EXISTS logregr_model, logregr_model_summary; +SELECT logregr_train( + '"Patients"', + 'logregr_model', + '"Second_attack"', + 'ARRAY[1, "Treatment", trait_anxiety]', + NULL, + 20, + 'irls' +); + +SELECT assert( + count_expected_names('logregr_model', + NULL, + ARRAY['Second_attack_pmml_prediction', 'Treatment', 'trait_anxiety'] + ) = 3, + text(pmml('logregr_model'))); + +SELECT assert( + count_expected_names('logregr_model', + 'c.second_attack ~ c.treatment + c.anxiety', + ARRAY['c.second_attack', 'c.treatment', 'c.anxiety'] + ) = 3, + text(pmml('logregr_model', + 'c.second_attack ~ c.treatment + c.anxiety'))); + +SELECT assert( + count_expected_names('logregr_model', + '{c.second_attack, c.treatment, c.anxiety}'::varchar[], + ARRAY['c.second_attack', 'c.treatment', 'c.anxiety'] + ) = 3, + text(pmml('logregr_model', + '{c.second_attack, c.treatment, c.anxiety}'))); + +SELECT assert( + count_expected_names('logregr_model', + '{c.second_attack, c.treatment, c.anxiety}', + ARRAY['c.second_attack', 'c.treatment', 'c.anxiety'] + ) = 3, + text(pmml('logregr_model', + '{c.second_attack, c.treatment, c.anxiety}'))); + +SELECT assert( + count_expected_names('logregr_model', + 'c.treatment + c.anxiety', + ARRAY['Second_attack', 'c.treatment', 'c.anxiety'] + ) = 3, + text(pmml('logregr_model', + 'c.treatment + c.anxiety'))); + +SELECT assert( + count_expected_names('logregr_model', + 'c.treatment, c.anxiety', + ARRAY['Second_attack', 'c.treatment', 'c.anxiety'] + ) = 3, + text(pmml('logregr_model', + 'c.treatment, c.anxiety'))); + +SELECT assert( + count_expected_names('logregr_model', + 'c.treatment, c.trait_anxiety', + ARRAY['Second_attack', 'c.treatment', 'c.trait_anxiety'] + ) = 3, + text(pmml('logregr_model', + 'c.treatment, c.trait_anxiety'))); diff --git a/src/ports/postgres/modules/pmml/test/pmml_glm_binomial.sql_in b/src/ports/postgres/modules/pmml/test/pmml_glm_binomial.sql_in index b874bd9e..3c17815c 100644 --- a/src/ports/postgres/modules/pmml/test/pmml_glm_binomial.sql_in +++ b/src/ports/postgres/modules/pmml/test/pmml_glm_binomial.sql_in @@ -1,100 +1,110 @@ -DROP TABLE IF EXISTS abalone CASCADE; - -CREATE TABLE abalone ( - id integer, - sex text, - length double precision, - diameter double precision, - height double precision, - whole double precision, - shucked double precision, - viscera double precision, - shell double precision, - rings integer -); +\i m4_regexp(MADLIB_LIBRARY_PATH, + `\(.*\)/lib', + `\1/../modules/pmml/test/pmml.setup.sql_in' +) + +m4_changequote(`<!'', `!>'') -INSERT INTO abalone VALUES -(3151, 'F', 0.655000000000000027, 0.505000000000000004, 0.165000000000000008, 1.36699999999999999, 0.583500000000000019, 0.351499999999999979, 0.396000000000000019, 10), -(2026, 'F', 0.550000000000000044, 0.469999999999999973, 0.149999999999999994, 0.920499999999999985, 0.381000000000000005, 0.243499999999999994, 0.267500000000000016, 10), -(3751, 'I', 0.434999999999999998, 0.375, 0.110000000000000001, 0.41549999999999998, 0.170000000000000012, 0.0759999999999999981, 0.14499999999999999, 8), -(720, 'I', 0.149999999999999994, 0.100000000000000006, 0.0250000000000000014, 0.0149999999999999994, 0.00449999999999999966, 0.00400000000000000008, 0.0050000000000000001, 2), -(1635, 'F', 0.574999999999999956, 0.469999999999999973, 0.154999999999999999, 1.1160000000000001, 0.509000000000000008, 0.237999999999999989, 0.340000000000000024, 10), -(2648, 'I', 0.5, 0.390000000000000013, 0.125, 0.582999999999999963, 0.293999999999999984, 0.132000000000000006, 0.160500000000000004, 8), -(1796, 'F', 0.57999999999999996, 0.429999999999999993, 0.170000000000000012, 1.47999999999999998, 0.65349999999999997, 0.32400000000000001, 0.41549999999999998, 10), -(209, 'F', 0.525000000000000022, 0.41499999999999998, 0.170000000000000012, 0.832500000000000018, 0.275500000000000023, 0.168500000000000011, 0.309999999999999998, 13), -(1451, 'I', 0.455000000000000016, 0.33500000000000002, 0.135000000000000009, 0.501000000000000001, 0.274000000000000021, 0.0995000000000000051, 0.106499999999999997, 7), -(1108, 'I', 0.510000000000000009, 0.380000000000000004, 0.115000000000000005, 0.515499999999999958, 0.214999999999999997, 0.113500000000000004, 0.166000000000000009, 8), -(3675, 'F', 0.594999999999999973, 0.450000000000000011, 0.165000000000000008, 1.08099999999999996, 0.489999999999999991, 0.252500000000000002, 0.279000000000000026, 12), -(2108, 'F', 0.675000000000000044, 0.550000000000000044, 0.179999999999999993, 1.68849999999999989, 0.562000000000000055, 0.370499999999999996, 0.599999999999999978, 15), -(3312, 'F', 0.479999999999999982, 0.380000000000000004, 0.135000000000000009, 0.507000000000000006, 0.191500000000000004, 0.13650000000000001, 0.154999999999999999, 12), -(882, 'M', 0.655000000000000027, 0.520000000000000018, 0.165000000000000008, 1.40949999999999998, 0.585999999999999965, 0.290999999999999981, 0.405000000000000027, 9), -(3402, 'M', 0.479999999999999982, 0.395000000000000018, 0.149999999999999994, 0.681499999999999995, 0.214499999999999996, 0.140500000000000014, 0.2495, 18), -(829, 'I', 0.409999999999999976, 0.325000000000000011, 0.100000000000000006, 0.394000000000000017, 0.20799999999999999, 0.0655000000000000027, 0.105999999999999997, 6), -(1305, 'M', 0.535000000000000031, 0.434999999999999998, 0.149999999999999994, 0.716999999999999971, 0.347499999999999976, 0.14449999999999999, 0.194000000000000006, 9), -(3613, 'M', 0.599999999999999978, 0.46000000000000002, 0.179999999999999993, 1.1399999999999999, 0.422999999999999987, 0.257500000000000007, 0.364999999999999991, 10), -(1068, 'I', 0.340000000000000024, 0.265000000000000013, 0.0800000000000000017, 0.201500000000000012, 0.0899999999999999967, 0.0475000000000000006, 0.0550000000000000003, 5), -(2446, 'M', 0.5, 0.380000000000000004, 0.135000000000000009, 0.583500000000000019, 0.22950000000000001, 0.126500000000000001, 0.179999999999999993, 12), -(1393, 'M', 0.635000000000000009, 0.474999999999999978, 0.170000000000000012, 1.19350000000000001, 0.520499999999999963, 0.269500000000000017, 0.366499999999999992, 10), -(359, 'M', 0.744999999999999996, 0.584999999999999964, 0.214999999999999997, 2.49900000000000011, 0.92649999999999999, 0.471999999999999975, 0.699999999999999956, 17), -(549, 'F', 0.564999999999999947, 0.450000000000000011, 0.160000000000000003, 0.79500000000000004, 0.360499999999999987, 0.155499999999999999, 0.23000000000000001, 12), -(1154, 'F', 0.599999999999999978, 0.474999999999999978, 0.160000000000000003, 1.02649999999999997, 0.484999999999999987, 0.2495, 0.256500000000000006, 9), -(1790, 'F', 0.54500000000000004, 0.385000000000000009, 0.149999999999999994, 1.11850000000000005, 0.542499999999999982, 0.244499999999999995, 0.284499999999999975, 9), -(3703, 'F', 0.665000000000000036, 0.540000000000000036, 0.195000000000000007, 1.76400000000000001, 0.850500000000000034, 0.361499999999999988, 0.469999999999999973, 11), -(1962, 'F', 0.655000000000000027, 0.515000000000000013, 0.179999999999999993, 1.41199999999999992, 0.619500000000000051, 0.248499999999999999, 0.496999999999999997, 11), -(1665, 'I', 0.604999999999999982, 0.469999999999999973, 0.14499999999999999, 0.802499999999999991, 0.379000000000000004, 0.226500000000000007, 0.220000000000000001, 9), -(635, 'M', 0.359999999999999987, 0.294999999999999984, 0.100000000000000006, 0.210499999999999993, 0.0660000000000000031, 0.0524999999999999981, 0.0749999999999999972, 9), -(3901, 'M', 0.445000000000000007, 0.344999999999999973, 0.140000000000000013, 0.475999999999999979, 0.205499999999999988, 0.101500000000000007, 0.108499999999999999, 15), -(2734, 'I', 0.41499999999999998, 0.33500000000000002, 0.100000000000000006, 0.357999999999999985, 0.169000000000000011, 0.067000000000000004, 0.104999999999999996, 7), -(3856, 'M', 0.409999999999999976, 0.33500000000000002, 0.115000000000000005, 0.440500000000000003, 0.190000000000000002, 0.0850000000000000061, 0.135000000000000009, 8), -(827, 'I', 0.395000000000000018, 0.28999999999999998, 0.0950000000000000011, 0.303999999999999992, 0.127000000000000002, 0.0840000000000000052, 0.076999999999999999, 6), -(3381, 'I', 0.190000000000000002, 0.130000000000000004, 0.0449999999999999983, 0.0264999999999999993, 0.00899999999999999932, 0.0050000000000000001, 0.00899999999999999932, 5), -(3972, 'I', 0.400000000000000022, 0.294999999999999984, 0.0950000000000000011, 0.252000000000000002, 0.110500000000000001, 0.0575000000000000025, 0.0660000000000000031, 6), -(1155, 'M', 0.599999999999999978, 0.455000000000000016, 0.170000000000000012, 1.1915, 0.695999999999999952, 0.239499999999999991, 0.239999999999999991, 8), -(3467, 'M', 0.640000000000000013, 0.5, 0.170000000000000012, 1.4544999999999999, 0.642000000000000015, 0.357499999999999984, 0.353999999999999981, 9), -(2433, 'F', 0.609999999999999987, 0.484999999999999987, 0.165000000000000008, 1.08699999999999997, 0.425499999999999989, 0.232000000000000012, 0.380000000000000004, 11), -(552, 'I', 0.614999999999999991, 0.489999999999999991, 0.154999999999999999, 0.988500000000000045, 0.41449999999999998, 0.195000000000000007, 0.344999999999999973, 13), -(1425, 'F', 0.729999999999999982, 0.57999999999999996, 0.190000000000000002, 1.73750000000000004, 0.678499999999999992, 0.434499999999999997, 0.520000000000000018, 11), -(2402, 'F', 0.584999999999999964, 0.41499999999999998, 0.154999999999999999, 0.69850000000000001, 0.299999999999999989, 0.145999999999999991, 0.195000000000000007, 12), -(1748, 'F', 0.699999999999999956, 0.535000000000000031, 0.174999999999999989, 1.77299999999999991, 0.680499999999999994, 0.479999999999999982, 0.512000000000000011, 15), -(3983, 'I', 0.57999999999999996, 0.434999999999999998, 0.149999999999999994, 0.891499999999999959, 0.362999999999999989, 0.192500000000000004, 0.251500000000000001, 6), -(335, 'F', 0.739999999999999991, 0.599999999999999978, 0.195000000000000007, 1.97399999999999998, 0.597999999999999976, 0.408499999999999974, 0.709999999999999964, 16), -(1587, 'I', 0.515000000000000013, 0.349999999999999978, 0.104999999999999996, 0.474499999999999977, 0.212999999999999995, 0.122999999999999998, 0.127500000000000002, 10), -(2448, 'I', 0.275000000000000022, 0.204999999999999988, 0.0800000000000000017, 0.096000000000000002, 0.0359999999999999973, 0.0184999999999999991, 0.0299999999999999989, 6), -(1362, 'F', 0.604999999999999982, 0.474999999999999978, 0.174999999999999989, 1.07600000000000007, 0.463000000000000023, 0.219500000000000001, 0.33500000000000002, 9), -(2799, 'M', 0.640000000000000013, 0.484999999999999987, 0.149999999999999994, 1.09800000000000009, 0.519499999999999962, 0.222000000000000003, 0.317500000000000004, 10), -(1413, 'F', 0.67000000000000004, 0.505000000000000004, 0.174999999999999989, 1.01449999999999996, 0.4375, 0.271000000000000019, 0.3745, 10), -(1739, 'F', 0.67000000000000004, 0.540000000000000036, 0.195000000000000007, 1.61899999999999999, 0.739999999999999991, 0.330500000000000016, 0.465000000000000024, 11), -(1152, 'M', 0.584999999999999964, 0.465000000000000024, 0.160000000000000003, 0.955500000000000016, 0.45950000000000002, 0.235999999999999988, 0.265000000000000013, 7), -(2427, 'I', 0.564999999999999947, 0.434999999999999998, 0.154999999999999999, 0.782000000000000028, 0.271500000000000019, 0.16800000000000001, 0.284999999999999976, 14), -(1777, 'M', 0.484999999999999987, 0.369999999999999996, 0.154999999999999999, 0.967999999999999972, 0.418999999999999984, 0.245499999999999996, 0.236499999999999988, 9), -(3294, 'M', 0.574999999999999956, 0.455000000000000016, 0.184999999999999998, 1.15599999999999992, 0.552499999999999991, 0.242999999999999994, 0.294999999999999984, 13), -(1403, 'M', 0.650000000000000022, 0.510000000000000009, 0.190000000000000002, 1.54200000000000004, 0.715500000000000025, 0.373499999999999999, 0.375, 9), -(2256, 'M', 0.510000000000000009, 0.395000000000000018, 0.14499999999999999, 0.61850000000000005, 0.215999999999999998, 0.138500000000000012, 0.239999999999999991, 12), -(3984, 'F', 0.584999999999999964, 0.450000000000000011, 0.125, 0.873999999999999999, 0.354499999999999982, 0.20749999999999999, 0.225000000000000006, 6), -(1116, 'M', 0.525000000000000022, 0.405000000000000027, 0.119999999999999996, 0.755499999999999949, 0.3755, 0.155499999999999999, 0.201000000000000012, 9), -(1366, 'M', 0.609999999999999987, 0.474999999999999978, 0.170000000000000012, 1.02649999999999997, 0.434999999999999998, 0.233500000000000013, 0.303499999999999992, 10), -(3759, 'I', 0.525000000000000022, 0.400000000000000022, 0.140000000000000013, 0.605500000000000038, 0.260500000000000009, 0.107999999999999999, 0.209999999999999992, 9); - - -DROP TABLE IF EXISTS abalone_probit_out, abalone_probit_out_summary; +----------------------- binomial with probit ------------------------------- +DROP TABLE IF EXISTS abalone_binomial_out, abalone_binomial_out_summary; SELECT glm( 'abalone', - 'abalone_probit_out', + 'abalone_binomial_out', 'rings < 10', 'ARRAY[1, length, diameter, height, whole, shucked, viscera, shell]', 'family=binomial, link=probit', NULL, 'max_iter=1000, tolerance=1e-16' ); -DROP TABLE IF EXISTS abalone_logit_out, abalone_logit_out_summary; +-- Test probabilities +DROP TABLE IF EXISTS glm_predict_binomial_probit_out; CREATE TABLE glm_predict_binomial_probit_out as SELECT id, glm_predict(coef, ARRAY[1, length, diameter, height, whole, shucked, viscera, shell], 'probit') +FROM abalone_binomial_out, abalone; +SELECT test_pmml_output('abalone', 'abalone_binomial_out', 'glm_predict_binomial_probit_out', 'id', 'glm_predict', 'probability_true'); + +-- Test output category +DROP TABLE IF EXISTS glm_predict_binomial_probit_out; CREATE TABLE glm_predict_binomial_probit_out as SELECT id, glm_predict_binomial(coef, ARRAY[1, length, diameter, height, whole, shucked, viscera, shell], 'probit') +FROM abalone_binomial_out, abalone; +SELECT test_pmml_output('abalone', 'abalone_binomial_out', 'glm_predict_binomial_probit_out', 'id', 'glm_predict_binomial', 'predicted_(rings < 10)_pmml_prediction'); + +----------------------- binomial with logit ------------------------------- +DROP TABLE IF EXISTS abalone_binomial_logit_out, abalone_binomial_logit_out_summary; SELECT glm( 'abalone', - 'abalone_logit_out', + 'abalone_binomial_logit_out', 'rings < 10', - 'ARRAY[1, length, diameter, height, whole, shucked, viscera, shell]', + 'ARRAY[1.0, length, diameter, height, whole, shucked, viscera, shell]', 'family=binomial, link=logit', NULL, 'max_iter=1000, tolerance=1e-16' ); +-- Test probabilities +DROP TABLE IF EXISTS glm_predict_binomial_logit_out; CREATE TABLE glm_predict_binomial_logit_out as SELECT id, glm_predict(coef, ARRAY[1, length, diameter, height, whole, shucked, viscera, shell], 'logit') +FROM abalone_binomial_logit_out, abalone; +SELECT test_pmml_output('abalone', 'abalone_binomial_logit_out', 'glm_predict_binomial_logit_out', 'id', 'glm_predict', 'probability_true'); + +-- Test output category +DROP TABLE IF EXISTS glm_predict_binomial_logit_out; CREATE TABLE glm_predict_binomial_logit_out as SELECT id, glm_predict_binomial(coef, ARRAY[1, length, diameter, height, whole, shucked, viscera, shell], 'logit') +FROM abalone_binomial_logit_out, abalone; +SELECT test_pmml_output('abalone', 'abalone_binomial_logit_out', 'glm_predict_binomial_logit_out', 'id', 'glm_predict_binomial', 'predicted_(rings < 10)_pmml_prediction'); + + +------------------------------------------------ Without intercept -------------------------------------------------- + +----------------------- binomial with probit and grouping col------------------------------- +DROP TABLE IF EXISTS abalone_binomial_nointercept_out, abalone_binomial_nointercept_out_summary; +SELECT glm( + 'abalone', + 'abalone_binomial_nointercept_out', + 'rings < 10', + 'ARRAY[length, diameter, height, whole, shucked, viscera, shell]', + 'family=binomial, link=probit', NULL, 'max_iter=1000, tolerance=1e-16' +); + +-- Test probabilities +DROP TABLE IF EXISTS glm_predict_binomial_probit_out; CREATE TABLE glm_predict_binomial_probit_out as SELECT id, glm_predict(coef, ARRAY[length, diameter, height, whole, shucked, viscera, shell], 'probit') +FROM abalone_binomial_nointercept_out, abalone; +SELECT test_pmml_output('abalone', 'abalone_binomial_nointercept_out', 'glm_predict_binomial_probit_out', 'id', 'glm_predict', 'probability_true'); + +-- Test output category +DROP TABLE IF EXISTS glm_predict_binomial_probit_out; CREATE TABLE glm_predict_binomial_probit_out as SELECT id, glm_predict_binomial(coef, ARRAY[length, diameter, height, whole, shucked, viscera, shell], 'probit') +FROM abalone_binomial_nointercept_out, abalone; +SELECT test_pmml_output('abalone', 'abalone_binomial_nointercept_out', 'glm_predict_binomial_probit_out', 'id', 'glm_predict_binomial', 'predicted_(rings < 10)_pmml_prediction'); + +----------------------- binomial with logit ------------------------------- +DROP TABLE IF EXISTS abalone_binomial_nointercept_logit_out, abalone_binomial_nointercept_logit_out_summary; +SELECT glm( + 'abalone', + 'abalone_binomial_nointercept_logit_out', + 'rings < 10', + 'ARRAY[length, diameter, height, whole, shucked, viscera, shell]', + 'family=binomial, link=logit', NULL, 'max_iter=1000, tolerance=1e-16' +); + +-- Test probabilities +DROP TABLE IF EXISTS glm_predict_binomial_logit_out; CREATE TABLE glm_predict_binomial_logit_out as SELECT id, glm_predict(coef, ARRAY[length, diameter, height, whole, shucked, viscera, shell], 'logit') +FROM abalone_binomial_nointercept_logit_out, abalone; +SELECT test_pmml_output('abalone', 'abalone_binomial_nointercept_logit_out', 'glm_predict_binomial_logit_out', 'id', 'glm_predict', 'probability_true'); + +-- Test output category +DROP TABLE IF EXISTS glm_predict_binomial_logit_out; CREATE TABLE glm_predict_binomial_logit_out as SELECT id, glm_predict_binomial(coef, ARRAY[length, diameter, height, whole, shucked, viscera, shell], 'logit') +FROM abalone_binomial_nointercept_logit_out, abalone; +SELECT test_pmml_output('abalone', 'abalone_binomial_nointercept_logit_out', 'glm_predict_binomial_logit_out', 'id', 'glm_predict_binomial', 'predicted_(rings < 10)_pmml_prediction'); + + +------------------------------------------------ with 0 intercept -------------------------------------------------- +DROP TABLE IF EXISTS abalone_binomial_zerointercept_out, abalone_binomial_zerointercept_out_summary; +SELECT glm( + 'abalone', + 'abalone_binomial_zerointercept_out', + 'rings < 10', + 'ARRAY[0, length, diameter, height, whole, shucked, viscera, shell]', + 'family=binomial, link=probit', NULL, 'max_iter=1000, tolerance=1e-16' +); -SELECT pmml('abalone_probit_out'); -SELECT pmml('abalone_logit_out'); +-- Test probabilities +DROP TABLE IF EXISTS glm_predict_binomial_probit_out; CREATE TABLE glm_predict_binomial_probit_out as SELECT id, glm_predict(coef, ARRAY[0, length, diameter, height, whole, shucked, viscera, shell], 'probit') +FROM abalone_binomial_zerointercept_out, abalone; +SELECT test_pmml_output('abalone', 'abalone_binomial_zerointercept_out', 'glm_predict_binomial_probit_out', 'id', 'glm_predict', 'probability_true'); +-- Test output category +DROP TABLE IF EXISTS glm_predict_binomial_probit_out; CREATE TABLE glm_predict_binomial_probit_out as SELECT id, glm_predict_binomial(coef, ARRAY[0, length, diameter, height, whole, shucked, viscera, shell], 'probit') +FROM abalone_binomial_zerointercept_out, abalone; +SELECT test_pmml_output('abalone', 'abalone_binomial_zerointercept_out', 'glm_predict_binomial_probit_out', 'id', 'glm_predict_binomial', 'predicted_(rings < 10)_pmml_prediction'); diff --git a/src/ports/postgres/modules/pmml/test/pmml_glm_gamma.sql_in b/src/ports/postgres/modules/pmml/test/pmml_glm_gamma.sql_in index e8b56066..e10a827d 100644 --- a/src/ports/postgres/modules/pmml/test/pmml_glm_gamma.sql_in +++ b/src/ports/postgres/modules/pmml/test/pmml_glm_gamma.sql_in @@ -1,81 +1,11 @@ -DROP TABLE IF EXISTS abalone CASCADE; - -CREATE TABLE abalone ( - id integer, - sex text, - length double precision, - diameter double precision, - height double precision, - whole double precision, - shucked double precision, - viscera double precision, - shell double precision, - rings integer -); - -INSERT INTO abalone VALUES -(3151, 'F', 0.655000000000000027, 0.505000000000000004, 0.165000000000000008, 1.36699999999999999, 0.583500000000000019, 0.351499999999999979, 0.396000000000000019, 10), -(2026, 'F', 0.550000000000000044, 0.469999999999999973, 0.149999999999999994, 0.920499999999999985, 0.381000000000000005, 0.243499999999999994, 0.267500000000000016, 10), -(3751, 'I', 0.434999999999999998, 0.375, 0.110000000000000001, 0.41549999999999998, 0.170000000000000012, 0.0759999999999999981, 0.14499999999999999, 8), -(720, 'I', 0.149999999999999994, 0.100000000000000006, 0.0250000000000000014, 0.0149999999999999994, 0.00449999999999999966, 0.00400000000000000008, 0.0050000000000000001, 2), -(1635, 'F', 0.574999999999999956, 0.469999999999999973, 0.154999999999999999, 1.1160000000000001, 0.509000000000000008, 0.237999999999999989, 0.340000000000000024, 10), -(2648, 'I', 0.5, 0.390000000000000013, 0.125, 0.582999999999999963, 0.293999999999999984, 0.132000000000000006, 0.160500000000000004, 8), -(1796, 'F', 0.57999999999999996, 0.429999999999999993, 0.170000000000000012, 1.47999999999999998, 0.65349999999999997, 0.32400000000000001, 0.41549999999999998, 10), -(209, 'F', 0.525000000000000022, 0.41499999999999998, 0.170000000000000012, 0.832500000000000018, 0.275500000000000023, 0.168500000000000011, 0.309999999999999998, 13), -(1451, 'I', 0.455000000000000016, 0.33500000000000002, 0.135000000000000009, 0.501000000000000001, 0.274000000000000021, 0.0995000000000000051, 0.106499999999999997, 7), -(1108, 'I', 0.510000000000000009, 0.380000000000000004, 0.115000000000000005, 0.515499999999999958, 0.214999999999999997, 0.113500000000000004, 0.166000000000000009, 8), -(3675, 'F', 0.594999999999999973, 0.450000000000000011, 0.165000000000000008, 1.08099999999999996, 0.489999999999999991, 0.252500000000000002, 0.279000000000000026, 12), -(2108, 'F', 0.675000000000000044, 0.550000000000000044, 0.179999999999999993, 1.68849999999999989, 0.562000000000000055, 0.370499999999999996, 0.599999999999999978, 15), -(3312, 'F', 0.479999999999999982, 0.380000000000000004, 0.135000000000000009, 0.507000000000000006, 0.191500000000000004, 0.13650000000000001, 0.154999999999999999, 12), -(882, 'M', 0.655000000000000027, 0.520000000000000018, 0.165000000000000008, 1.40949999999999998, 0.585999999999999965, 0.290999999999999981, 0.405000000000000027, 9), -(3402, 'M', 0.479999999999999982, 0.395000000000000018, 0.149999999999999994, 0.681499999999999995, 0.214499999999999996, 0.140500000000000014, 0.2495, 18), -(829, 'I', 0.409999999999999976, 0.325000000000000011, 0.100000000000000006, 0.394000000000000017, 0.20799999999999999, 0.0655000000000000027, 0.105999999999999997, 6), -(1305, 'M', 0.535000000000000031, 0.434999999999999998, 0.149999999999999994, 0.716999999999999971, 0.347499999999999976, 0.14449999999999999, 0.194000000000000006, 9), -(3613, 'M', 0.599999999999999978, 0.46000000000000002, 0.179999999999999993, 1.1399999999999999, 0.422999999999999987, 0.257500000000000007, 0.364999999999999991, 10), -(1068, 'I', 0.340000000000000024, 0.265000000000000013, 0.0800000000000000017, 0.201500000000000012, 0.0899999999999999967, 0.0475000000000000006, 0.0550000000000000003, 5), -(2446, 'M', 0.5, 0.380000000000000004, 0.135000000000000009, 0.583500000000000019, 0.22950000000000001, 0.126500000000000001, 0.179999999999999993, 12), -(1393, 'M', 0.635000000000000009, 0.474999999999999978, 0.170000000000000012, 1.19350000000000001, 0.520499999999999963, 0.269500000000000017, 0.366499999999999992, 10), -(359, 'M', 0.744999999999999996, 0.584999999999999964, 0.214999999999999997, 2.49900000000000011, 0.92649999999999999, 0.471999999999999975, 0.699999999999999956, 17), -(549, 'F', 0.564999999999999947, 0.450000000000000011, 0.160000000000000003, 0.79500000000000004, 0.360499999999999987, 0.155499999999999999, 0.23000000000000001, 12), -(1154, 'F', 0.599999999999999978, 0.474999999999999978, 0.160000000000000003, 1.02649999999999997, 0.484999999999999987, 0.2495, 0.256500000000000006, 9), -(1790, 'F', 0.54500000000000004, 0.385000000000000009, 0.149999999999999994, 1.11850000000000005, 0.542499999999999982, 0.244499999999999995, 0.284499999999999975, 9), -(3703, 'F', 0.665000000000000036, 0.540000000000000036, 0.195000000000000007, 1.76400000000000001, 0.850500000000000034, 0.361499999999999988, 0.469999999999999973, 11), -(1962, 'F', 0.655000000000000027, 0.515000000000000013, 0.179999999999999993, 1.41199999999999992, 0.619500000000000051, 0.248499999999999999, 0.496999999999999997, 11), -(1665, 'I', 0.604999999999999982, 0.469999999999999973, 0.14499999999999999, 0.802499999999999991, 0.379000000000000004, 0.226500000000000007, 0.220000000000000001, 9), -(635, 'M', 0.359999999999999987, 0.294999999999999984, 0.100000000000000006, 0.210499999999999993, 0.0660000000000000031, 0.0524999999999999981, 0.0749999999999999972, 9), -(3901, 'M', 0.445000000000000007, 0.344999999999999973, 0.140000000000000013, 0.475999999999999979, 0.205499999999999988, 0.101500000000000007, 0.108499999999999999, 15), -(2734, 'I', 0.41499999999999998, 0.33500000000000002, 0.100000000000000006, 0.357999999999999985, 0.169000000000000011, 0.067000000000000004, 0.104999999999999996, 7), -(3856, 'M', 0.409999999999999976, 0.33500000000000002, 0.115000000000000005, 0.440500000000000003, 0.190000000000000002, 0.0850000000000000061, 0.135000000000000009, 8), -(827, 'I', 0.395000000000000018, 0.28999999999999998, 0.0950000000000000011, 0.303999999999999992, 0.127000000000000002, 0.0840000000000000052, 0.076999999999999999, 6), -(3381, 'I', 0.190000000000000002, 0.130000000000000004, 0.0449999999999999983, 0.0264999999999999993, 0.00899999999999999932, 0.0050000000000000001, 0.00899999999999999932, 5), -(3972, 'I', 0.400000000000000022, 0.294999999999999984, 0.0950000000000000011, 0.252000000000000002, 0.110500000000000001, 0.0575000000000000025, 0.0660000000000000031, 6), -(1155, 'M', 0.599999999999999978, 0.455000000000000016, 0.170000000000000012, 1.1915, 0.695999999999999952, 0.239499999999999991, 0.239999999999999991, 8), -(3467, 'M', 0.640000000000000013, 0.5, 0.170000000000000012, 1.4544999999999999, 0.642000000000000015, 0.357499999999999984, 0.353999999999999981, 9), -(2433, 'F', 0.609999999999999987, 0.484999999999999987, 0.165000000000000008, 1.08699999999999997, 0.425499999999999989, 0.232000000000000012, 0.380000000000000004, 11), -(552, 'I', 0.614999999999999991, 0.489999999999999991, 0.154999999999999999, 0.988500000000000045, 0.41449999999999998, 0.195000000000000007, 0.344999999999999973, 13), -(1425, 'F', 0.729999999999999982, 0.57999999999999996, 0.190000000000000002, 1.73750000000000004, 0.678499999999999992, 0.434499999999999997, 0.520000000000000018, 11), -(2402, 'F', 0.584999999999999964, 0.41499999999999998, 0.154999999999999999, 0.69850000000000001, 0.299999999999999989, 0.145999999999999991, 0.195000000000000007, 12), -(1748, 'F', 0.699999999999999956, 0.535000000000000031, 0.174999999999999989, 1.77299999999999991, 0.680499999999999994, 0.479999999999999982, 0.512000000000000011, 15), -(3983, 'I', 0.57999999999999996, 0.434999999999999998, 0.149999999999999994, 0.891499999999999959, 0.362999999999999989, 0.192500000000000004, 0.251500000000000001, 6), -(335, 'F', 0.739999999999999991, 0.599999999999999978, 0.195000000000000007, 1.97399999999999998, 0.597999999999999976, 0.408499999999999974, 0.709999999999999964, 16), -(1587, 'I', 0.515000000000000013, 0.349999999999999978, 0.104999999999999996, 0.474499999999999977, 0.212999999999999995, 0.122999999999999998, 0.127500000000000002, 10), -(2448, 'I', 0.275000000000000022, 0.204999999999999988, 0.0800000000000000017, 0.096000000000000002, 0.0359999999999999973, 0.0184999999999999991, 0.0299999999999999989, 6), -(1362, 'F', 0.604999999999999982, 0.474999999999999978, 0.174999999999999989, 1.07600000000000007, 0.463000000000000023, 0.219500000000000001, 0.33500000000000002, 9), -(2799, 'M', 0.640000000000000013, 0.484999999999999987, 0.149999999999999994, 1.09800000000000009, 0.519499999999999962, 0.222000000000000003, 0.317500000000000004, 10), -(1413, 'F', 0.67000000000000004, 0.505000000000000004, 0.174999999999999989, 1.01449999999999996, 0.4375, 0.271000000000000019, 0.3745, 10), -(1739, 'F', 0.67000000000000004, 0.540000000000000036, 0.195000000000000007, 1.61899999999999999, 0.739999999999999991, 0.330500000000000016, 0.465000000000000024, 11), -(1152, 'M', 0.584999999999999964, 0.465000000000000024, 0.160000000000000003, 0.955500000000000016, 0.45950000000000002, 0.235999999999999988, 0.265000000000000013, 7), -(2427, 'I', 0.564999999999999947, 0.434999999999999998, 0.154999999999999999, 0.782000000000000028, 0.271500000000000019, 0.16800000000000001, 0.284999999999999976, 14), -(1777, 'M', 0.484999999999999987, 0.369999999999999996, 0.154999999999999999, 0.967999999999999972, 0.418999999999999984, 0.245499999999999996, 0.236499999999999988, 9), -(3294, 'M', 0.574999999999999956, 0.455000000000000016, 0.184999999999999998, 1.15599999999999992, 0.552499999999999991, 0.242999999999999994, 0.294999999999999984, 13), -(1403, 'M', 0.650000000000000022, 0.510000000000000009, 0.190000000000000002, 1.54200000000000004, 0.715500000000000025, 0.373499999999999999, 0.375, 9), -(2256, 'M', 0.510000000000000009, 0.395000000000000018, 0.14499999999999999, 0.61850000000000005, 0.215999999999999998, 0.138500000000000012, 0.239999999999999991, 12), -(3984, 'F', 0.584999999999999964, 0.450000000000000011, 0.125, 0.873999999999999999, 0.354499999999999982, 0.20749999999999999, 0.225000000000000006, 6), -(1116, 'M', 0.525000000000000022, 0.405000000000000027, 0.119999999999999996, 0.755499999999999949, 0.3755, 0.155499999999999999, 0.201000000000000012, 9), -(1366, 'M', 0.609999999999999987, 0.474999999999999978, 0.170000000000000012, 1.02649999999999997, 0.434999999999999998, 0.233500000000000013, 0.303499999999999992, 10), -(3759, 'I', 0.525000000000000022, 0.400000000000000022, 0.140000000000000013, 0.605500000000000038, 0.260500000000000009, 0.107999999999999999, 0.209999999999999992, 9); +\i m4_regexp(MADLIB_LIBRARY_PATH, + `\(.*\)/lib', + `\1/../modules/pmml/test/pmml.setup.sql_in' +) +m4_changequote(`<!', `!>'') +-------------------------------------------------- With intercept ------------------------------------------------------------------------- DROP TABLE IF EXISTS abalone_out_inverse, abalone_out_inverse_summary; SELECT glm( 'abalone', @@ -84,6 +14,10 @@ SELECT glm( 'ARRAY[1, length, diameter, height, whole, shucked, viscera, shell]', 'family=gamma, link=inverse', NULL, 'max_iter=1000, tolerance=1e-16' ); +DROP TABLE IF EXISTS glm_predict_gamma_out; CREATE TABLE glm_predict_gamma_out as SELECT id, glm_predict(coef, ARRAY[1, length, diameter, height, whole, shucked, viscera, shell], 'inverse') +FROM abalone_out_inverse, abalone; +SELECT test_pmml_output('abalone', 'abalone_out_inverse', 'glm_predict_gamma_out', 'id', 'glm_predict', 'predicted_rings_pmml_prediction'); + DROP TABLE IF EXISTS abalone_out_identity, abalone_out_identity_summary; SELECT glm( @@ -93,6 +27,10 @@ SELECT glm( 'ARRAY[1, length, diameter, height, whole, shucked, viscera, shell]', 'family=gamma, link=identity', NULL, 'max_iter=1000, tolerance=1e-16' ); +DROP TABLE IF EXISTS glm_predict_gamma_out; CREATE TABLE glm_predict_gamma_out as SELECT id, glm_predict(coef, ARRAY[1, length, diameter, height, whole, shucked, viscera, shell], 'identity') +FROM abalone_out_identity, abalone; +SELECT test_pmml_output('abalone', 'abalone_out_identity', 'glm_predict_gamma_out', 'id', 'glm_predict', 'predicted_rings_pmml_prediction'); + DROP TABLE IF EXISTS abalone_out_log, abalone_out_log_summary; SELECT glm( @@ -102,21 +40,50 @@ SELECT glm( 'ARRAY[1, length, diameter, height, whole, shucked, viscera, shell]', 'family=gamma, link=log', NULL, 'max_iter=1000, tolerance=1e-16' ); +DROP TABLE IF EXISTS glm_predict_gamma_out; CREATE TABLE glm_predict_gamma_out as SELECT id, glm_predict(coef, ARRAY[1, length, diameter, height, whole, shucked, viscera, shell], 'log') +FROM abalone_out_log, abalone; +SELECT test_pmml_output('abalone', 'abalone_out_log', 'glm_predict_gamma_out', 'id', 'glm_predict', 'predicted_rings_pmml_prediction'); -DROP TABLE IF EXISTS abalone_out_log_grp, abalone_out_log_grp_summary; +-------------------------------------------------- Without intercept ------------------------------------------------------------------------- +DROP TABLE IF EXISTS abalone_out_inverse_nointercept, abalone_out_inverse_nointercept_summary; SELECT glm( 'abalone', - 'abalone_out_log_grp', + 'abalone_out_inverse_nointercept', 'rings', - 'ARRAY[1, length, diameter, height, whole, shucked, viscera, shell]', - 'family=gamma, link=log', 'sex', 'max_iter=1000, tolerance=1e-16' + 'ARRAY[length, diameter, height, whole, shucked, viscera, shell]', + 'family=gamma, link=inverse', NULL, 'max_iter=1000, tolerance=1e-16' ); +DROP TABLE IF EXISTS glm_predict_gamma_out; CREATE TABLE glm_predict_gamma_out as SELECT id, glm_predict(coef, ARRAY[length, diameter, height, whole, shucked, viscera, shell], 'inverse') +FROM abalone_out_inverse_nointercept, abalone; +SELECT test_pmml_output('abalone', 'abalone_out_inverse_nointercept', 'glm_predict_gamma_out', 'id', 'glm_predict', 'predicted_rings_pmml_prediction'); + +DROP TABLE IF EXISTS abalone_out_identity_nointercept, abalone_out_identity_nointercept_summary; +SELECT glm( + 'abalone', + 'abalone_out_identity_nointercept', + 'rings', + 'ARRAY[length, diameter, height, whole, shucked, viscera, shell]', + 'family=gamma, link=identity', NULL, 'max_iter=1000, tolerance=1e-16' +); +DROP TABLE IF EXISTS glm_predict_gamma_out; CREATE TABLE glm_predict_gamma_out as SELECT id, glm_predict(coef, ARRAY[length, diameter, height, whole, shucked, viscera, shell], 'identity') +FROM abalone_out_identity_nointercept, abalone; +SELECT test_pmml_output('abalone', 'abalone_out_identity_nointercept', 'glm_predict_gamma_out', 'id', 'glm_predict', 'predicted_rings_pmml_prediction'); + + +DROP TABLE IF EXISTS abalone_out_log_nointercept, abalone_out_log_nointercept_summary; +SELECT glm( + 'abalone', + 'abalone_out_log_nointercept', + 'rings', + 'ARRAY[length, diameter, height, whole, shucked, viscera, shell]', + 'family=gamma, link=log', NULL, 'max_iter=1000, tolerance=1e-16' +); +DROP TABLE IF EXISTS glm_predict_gamma_out; CREATE TABLE glm_predict_gamma_out as SELECT id, glm_predict(coef, ARRAY[length, diameter, height, whole, shucked, viscera, shell], 'log') +FROM abalone_out_log_nointercept, abalone; +SELECT test_pmml_output('abalone', 'abalone_out_log_nointercept', 'glm_predict_gamma_out', 'id', 'glm_predict', 'predicted_rings_pmml_prediction'); -SELECT pmml('abalone_out_inverse'); -SELECT pmml('abalone_out_log'); -SELECT pmml('abalone_out_identity'); -SELECT pmml('abalone_out_log_grp'); +-------------------------------------- Test pmml output ---------------------------------------------------------------- CREATE OR REPLACE FUNCTION count_expected_names(formula varchar, expected varchar[]) RETURNS bigint AS $$ @@ -149,15 +116,15 @@ $$ LANGUAGE sql; SELECT assert( count_expected_names( NULL, - ARRAY['rings_pmml_prediction', '1', 'length', 'diameter', 'height', 'whole', 'shucked', 'viscera', 'shell'] - ) = 9, + ARRAY['rings_pmml_prediction', 'length', 'diameter', 'height', 'whole', 'shucked', 'viscera', 'shell'] + ) = 8, 'default case'); SELECT assert( count_expected_names( - 'rings ~ 1 + length + diameter + height', - ARRAY['rings', '1', 'length', 'diameter', 'height'] - ) = 5, + 'rings ~ length + diameter + height', + ARRAY['rings', 'length', 'diameter', 'height'] + ) = 4, text(pmml('abalone_out_inverse', 'rings ~ 1 + length + diameter + height'))); diff --git a/src/ports/postgres/modules/pmml/test/pmml_glm_ig.sql_in b/src/ports/postgres/modules/pmml/test/pmml_glm_ig.sql_in index ee07de53..a99b3056 100644 --- a/src/ports/postgres/modules/pmml/test/pmml_glm_ig.sql_in +++ b/src/ports/postgres/modules/pmml/test/pmml_glm_ig.sql_in @@ -1,81 +1,11 @@ -DROP TABLE IF EXISTS abalone; - -CREATE TABLE abalone ( - id integer, - sex text, - length double precision, - diameter double precision, - height double precision, - whole double precision, - shucked double precision, - viscera double precision, - shell double precision, - rings integer -); - -INSERT INTO abalone VALUES -(3151, 'F', 0.655000000000000027, 0.505000000000000004, 0.165000000000000008, 1.36699999999999999, 0.583500000000000019, 0.351499999999999979, 0.396000000000000019, 10), -(2026, 'F', 0.550000000000000044, 0.469999999999999973, 0.149999999999999994, 0.920499999999999985, 0.381000000000000005, 0.243499999999999994, 0.267500000000000016, 10), -(3751, 'I', 0.434999999999999998, 0.375, 0.110000000000000001, 0.41549999999999998, 0.170000000000000012, 0.0759999999999999981, 0.14499999999999999, 8), -(720, 'I', 0.149999999999999994, 0.100000000000000006, 0.0250000000000000014, 0.0149999999999999994, 0.00449999999999999966, 0.00400000000000000008, 0.0050000000000000001, 2), -(1635, 'F', 0.574999999999999956, 0.469999999999999973, 0.154999999999999999, 1.1160000000000001, 0.509000000000000008, 0.237999999999999989, 0.340000000000000024, 10), -(2648, 'I', 0.5, 0.390000000000000013, 0.125, 0.582999999999999963, 0.293999999999999984, 0.132000000000000006, 0.160500000000000004, 8), -(1796, 'F', 0.57999999999999996, 0.429999999999999993, 0.170000000000000012, 1.47999999999999998, 0.65349999999999997, 0.32400000000000001, 0.41549999999999998, 10), -(209, 'F', 0.525000000000000022, 0.41499999999999998, 0.170000000000000012, 0.832500000000000018, 0.275500000000000023, 0.168500000000000011, 0.309999999999999998, 13), -(1451, 'I', 0.455000000000000016, 0.33500000000000002, 0.135000000000000009, 0.501000000000000001, 0.274000000000000021, 0.0995000000000000051, 0.106499999999999997, 7), -(1108, 'I', 0.510000000000000009, 0.380000000000000004, 0.115000000000000005, 0.515499999999999958, 0.214999999999999997, 0.113500000000000004, 0.166000000000000009, 8), -(3675, 'F', 0.594999999999999973, 0.450000000000000011, 0.165000000000000008, 1.08099999999999996, 0.489999999999999991, 0.252500000000000002, 0.279000000000000026, 12), -(2108, 'F', 0.675000000000000044, 0.550000000000000044, 0.179999999999999993, 1.68849999999999989, 0.562000000000000055, 0.370499999999999996, 0.599999999999999978, 15), -(3312, 'F', 0.479999999999999982, 0.380000000000000004, 0.135000000000000009, 0.507000000000000006, 0.191500000000000004, 0.13650000000000001, 0.154999999999999999, 12), -(882, 'M', 0.655000000000000027, 0.520000000000000018, 0.165000000000000008, 1.40949999999999998, 0.585999999999999965, 0.290999999999999981, 0.405000000000000027, 9), -(3402, 'M', 0.479999999999999982, 0.395000000000000018, 0.149999999999999994, 0.681499999999999995, 0.214499999999999996, 0.140500000000000014, 0.2495, 18), -(829, 'I', 0.409999999999999976, 0.325000000000000011, 0.100000000000000006, 0.394000000000000017, 0.20799999999999999, 0.0655000000000000027, 0.105999999999999997, 6), -(1305, 'M', 0.535000000000000031, 0.434999999999999998, 0.149999999999999994, 0.716999999999999971, 0.347499999999999976, 0.14449999999999999, 0.194000000000000006, 9), -(3613, 'M', 0.599999999999999978, 0.46000000000000002, 0.179999999999999993, 1.1399999999999999, 0.422999999999999987, 0.257500000000000007, 0.364999999999999991, 10), -(1068, 'I', 0.340000000000000024, 0.265000000000000013, 0.0800000000000000017, 0.201500000000000012, 0.0899999999999999967, 0.0475000000000000006, 0.0550000000000000003, 5), -(2446, 'M', 0.5, 0.380000000000000004, 0.135000000000000009, 0.583500000000000019, 0.22950000000000001, 0.126500000000000001, 0.179999999999999993, 12), -(1393, 'M', 0.635000000000000009, 0.474999999999999978, 0.170000000000000012, 1.19350000000000001, 0.520499999999999963, 0.269500000000000017, 0.366499999999999992, 10), -(359, 'M', 0.744999999999999996, 0.584999999999999964, 0.214999999999999997, 2.49900000000000011, 0.92649999999999999, 0.471999999999999975, 0.699999999999999956, 17), -(549, 'F', 0.564999999999999947, 0.450000000000000011, 0.160000000000000003, 0.79500000000000004, 0.360499999999999987, 0.155499999999999999, 0.23000000000000001, 12), -(1154, 'F', 0.599999999999999978, 0.474999999999999978, 0.160000000000000003, 1.02649999999999997, 0.484999999999999987, 0.2495, 0.256500000000000006, 9), -(1790, 'F', 0.54500000000000004, 0.385000000000000009, 0.149999999999999994, 1.11850000000000005, 0.542499999999999982, 0.244499999999999995, 0.284499999999999975, 9), -(3703, 'F', 0.665000000000000036, 0.540000000000000036, 0.195000000000000007, 1.76400000000000001, 0.850500000000000034, 0.361499999999999988, 0.469999999999999973, 11), -(1962, 'F', 0.655000000000000027, 0.515000000000000013, 0.179999999999999993, 1.41199999999999992, 0.619500000000000051, 0.248499999999999999, 0.496999999999999997, 11), -(1665, 'I', 0.604999999999999982, 0.469999999999999973, 0.14499999999999999, 0.802499999999999991, 0.379000000000000004, 0.226500000000000007, 0.220000000000000001, 9), -(635, 'M', 0.359999999999999987, 0.294999999999999984, 0.100000000000000006, 0.210499999999999993, 0.0660000000000000031, 0.0524999999999999981, 0.0749999999999999972, 9), -(3901, 'M', 0.445000000000000007, 0.344999999999999973, 0.140000000000000013, 0.475999999999999979, 0.205499999999999988, 0.101500000000000007, 0.108499999999999999, 15), -(2734, 'I', 0.41499999999999998, 0.33500000000000002, 0.100000000000000006, 0.357999999999999985, 0.169000000000000011, 0.067000000000000004, 0.104999999999999996, 7), -(3856, 'M', 0.409999999999999976, 0.33500000000000002, 0.115000000000000005, 0.440500000000000003, 0.190000000000000002, 0.0850000000000000061, 0.135000000000000009, 8), -(827, 'I', 0.395000000000000018, 0.28999999999999998, 0.0950000000000000011, 0.303999999999999992, 0.127000000000000002, 0.0840000000000000052, 0.076999999999999999, 6), -(3381, 'I', 0.190000000000000002, 0.130000000000000004, 0.0449999999999999983, 0.0264999999999999993, 0.00899999999999999932, 0.0050000000000000001, 0.00899999999999999932, 5), -(3972, 'I', 0.400000000000000022, 0.294999999999999984, 0.0950000000000000011, 0.252000000000000002, 0.110500000000000001, 0.0575000000000000025, 0.0660000000000000031, 6), -(1155, 'M', 0.599999999999999978, 0.455000000000000016, 0.170000000000000012, 1.1915, 0.695999999999999952, 0.239499999999999991, 0.239999999999999991, 8), -(3467, 'M', 0.640000000000000013, 0.5, 0.170000000000000012, 1.4544999999999999, 0.642000000000000015, 0.357499999999999984, 0.353999999999999981, 9), -(2433, 'F', 0.609999999999999987, 0.484999999999999987, 0.165000000000000008, 1.08699999999999997, 0.425499999999999989, 0.232000000000000012, 0.380000000000000004, 11), -(552, 'I', 0.614999999999999991, 0.489999999999999991, 0.154999999999999999, 0.988500000000000045, 0.41449999999999998, 0.195000000000000007, 0.344999999999999973, 13), -(1425, 'F', 0.729999999999999982, 0.57999999999999996, 0.190000000000000002, 1.73750000000000004, 0.678499999999999992, 0.434499999999999997, 0.520000000000000018, 11), -(2402, 'F', 0.584999999999999964, 0.41499999999999998, 0.154999999999999999, 0.69850000000000001, 0.299999999999999989, 0.145999999999999991, 0.195000000000000007, 12), -(1748, 'F', 0.699999999999999956, 0.535000000000000031, 0.174999999999999989, 1.77299999999999991, 0.680499999999999994, 0.479999999999999982, 0.512000000000000011, 15), -(3983, 'I', 0.57999999999999996, 0.434999999999999998, 0.149999999999999994, 0.891499999999999959, 0.362999999999999989, 0.192500000000000004, 0.251500000000000001, 6), -(335, 'F', 0.739999999999999991, 0.599999999999999978, 0.195000000000000007, 1.97399999999999998, 0.597999999999999976, 0.408499999999999974, 0.709999999999999964, 16), -(1587, 'I', 0.515000000000000013, 0.349999999999999978, 0.104999999999999996, 0.474499999999999977, 0.212999999999999995, 0.122999999999999998, 0.127500000000000002, 10), -(2448, 'I', 0.275000000000000022, 0.204999999999999988, 0.0800000000000000017, 0.096000000000000002, 0.0359999999999999973, 0.0184999999999999991, 0.0299999999999999989, 6), -(1362, 'F', 0.604999999999999982, 0.474999999999999978, 0.174999999999999989, 1.07600000000000007, 0.463000000000000023, 0.219500000000000001, 0.33500000000000002, 9), -(2799, 'M', 0.640000000000000013, 0.484999999999999987, 0.149999999999999994, 1.09800000000000009, 0.519499999999999962, 0.222000000000000003, 0.317500000000000004, 10), -(1413, 'F', 0.67000000000000004, 0.505000000000000004, 0.174999999999999989, 1.01449999999999996, 0.4375, 0.271000000000000019, 0.3745, 10), -(1739, 'F', 0.67000000000000004, 0.540000000000000036, 0.195000000000000007, 1.61899999999999999, 0.739999999999999991, 0.330500000000000016, 0.465000000000000024, 11), -(1152, 'M', 0.584999999999999964, 0.465000000000000024, 0.160000000000000003, 0.955500000000000016, 0.45950000000000002, 0.235999999999999988, 0.265000000000000013, 7), -(2427, 'I', 0.564999999999999947, 0.434999999999999998, 0.154999999999999999, 0.782000000000000028, 0.271500000000000019, 0.16800000000000001, 0.284999999999999976, 14), -(1777, 'M', 0.484999999999999987, 0.369999999999999996, 0.154999999999999999, 0.967999999999999972, 0.418999999999999984, 0.245499999999999996, 0.236499999999999988, 9), -(3294, 'M', 0.574999999999999956, 0.455000000000000016, 0.184999999999999998, 1.15599999999999992, 0.552499999999999991, 0.242999999999999994, 0.294999999999999984, 13), -(1403, 'M', 0.650000000000000022, 0.510000000000000009, 0.190000000000000002, 1.54200000000000004, 0.715500000000000025, 0.373499999999999999, 0.375, 9), -(2256, 'M', 0.510000000000000009, 0.395000000000000018, 0.14499999999999999, 0.61850000000000005, 0.215999999999999998, 0.138500000000000012, 0.239999999999999991, 12), -(3984, 'F', 0.584999999999999964, 0.450000000000000011, 0.125, 0.873999999999999999, 0.354499999999999982, 0.20749999999999999, 0.225000000000000006, 6), -(1116, 'M', 0.525000000000000022, 0.405000000000000027, 0.119999999999999996, 0.755499999999999949, 0.3755, 0.155499999999999999, 0.201000000000000012, 9), -(1366, 'M', 0.609999999999999987, 0.474999999999999978, 0.170000000000000012, 1.02649999999999997, 0.434999999999999998, 0.233500000000000013, 0.303499999999999992, 10), -(3759, 'I', 0.525000000000000022, 0.400000000000000022, 0.140000000000000013, 0.605500000000000038, 0.260500000000000009, 0.107999999999999999, 0.209999999999999992, 9); +\i m4_regexp(MADLIB_LIBRARY_PATH, + `\(.*\)/lib', + `\1/../modules/pmml/test/pmml.setup.sql_in' +) +m4_changequote(`<!'', `!>'') +------------ with intercept ------------------------------------ DROP TABLE IF EXISTS abalone_out, abalone_out_summary; SELECT glm( 'abalone', @@ -85,4 +15,48 @@ SELECT glm( 'family=inverse_gaussian, link=sqr_inverse', NULL, 'max_iter=1000, tolerance=1e-16' ); -SELECT pmml('abalone_out'); +DROP TABLE IF EXISTS glm_predict_inverse_gaussian_out; CREATE TABLE glm_predict_inverse_gaussian_out as SELECT id, glm_predict(coef, ARRAY[1, length, diameter, height, whole, shucked, viscera, shell], 'sqr_inverse') +FROM abalone_out, abalone; +SELECT test_pmml_output('abalone', 'abalone_out', 'glm_predict_inverse_gaussian_out', 'id', 'glm_predict', 'predicted_rings_pmml_prediction'); + +------------ without intercept ------------------------------------ +DROP TABLE IF EXISTS abalone_out_nointercept, abalone_out_nointercept_summary; +SELECT glm( + 'abalone', + 'abalone_out_nointercept', + 'rings', + 'ARRAY[length, diameter, height, whole, shucked, viscera, shell]', + 'family=inverse_gaussian, link=inverse',NULL, 'max_iter=1000, tolerance=1e-16' +); + +DROP TABLE IF EXISTS glm_predict_inverse_gaussian_out; CREATE TABLE glm_predict_inverse_gaussian_out as SELECT id, glm_predict(coef, ARRAY[length, diameter, height, whole, shucked, viscera, shell], 'inverse') +FROM abalone_out_nointercept, abalone; +SELECT test_pmml_output('abalone', 'abalone_out_nointercept', 'glm_predict_inverse_gaussian_out', 'id', 'glm_predict', 'predicted_rings_pmml_prediction'); + + +----------- with 0 intercept --------------------------------------- +DROP TABLE IF EXISTS abalone_out_zerointercept, abalone_out_zerointercept_summary; +SELECT glm( + 'abalone', + 'abalone_out_zerointercept', + 'rings', + 'ARRAY[0, length, diameter, height, whole, shucked, viscera, shell]', + 'family=inverse_gaussian, link=inverse', NULL, 'max_iter=1000, tolerance=1e-16' +); + +DROP TABLE IF EXISTS glm_predict_inverse_gaussian_out; CREATE TABLE glm_predict_inverse_gaussian_out as SELECT id, glm_predict(coef, ARRAY[0, length, diameter, height, whole, shucked, viscera, shell], 'inverse') +FROM abalone_out_zerointercept, abalone; +SELECT test_pmml_output('abalone', 'abalone_out_zerointercept', 'glm_predict_inverse_gaussian_out', 'id', 'glm_predict', 'predicted_rings_pmml_prediction'); + +DROP TABLE IF EXISTS abalone_out_zerointercept, abalone_out_zerointercept_summary; +SELECT glm( + 'abalone', + 'abalone_out_zerointercept', + 'rings', + 'ARRAY[0.0, length, diameter, height, whole, shucked, viscera, shell]', + 'family=inverse_gaussian, link=inverse', NULL, 'max_iter=1000, tolerance=1e-16' +); + +DROP TABLE IF EXISTS glm_predict_inverse_gaussian_out; CREATE TABLE glm_predict_inverse_gaussian_out as SELECT id, glm_predict(coef, ARRAY[0, length, diameter, height, whole, shucked, viscera, shell], 'inverse') +FROM abalone_out_zerointercept, abalone; +SELECT test_pmml_output('abalone', 'abalone_out_zerointercept', 'glm_predict_inverse_gaussian_out', 'id', 'glm_predict', 'predicted_rings_pmml_prediction'); diff --git a/src/ports/postgres/modules/pmml/test/pmml_glm_normal.sql_in b/src/ports/postgres/modules/pmml/test/pmml_glm_normal.sql_in index 6c80595c..7616c644 100644 --- a/src/ports/postgres/modules/pmml/test/pmml_glm_normal.sql_in +++ b/src/ports/postgres/modules/pmml/test/pmml_glm_normal.sql_in @@ -1,88 +1,101 @@ -DROP TABLE IF EXISTS abalone; - -CREATE TABLE abalone ( - id integer, - sex text, - length double precision, - diameter double precision, - height double precision, - whole double precision, - shucked double precision, - viscera double precision, - shell double precision, - rings integer +\i m4_regexp(MADLIB_LIBRARY_PATH, + `\(.*\)/lib', + `\1/../modules/pmml/test/pmml.setup.sql_in' +) + +m4_changequote(`<!'', `!>'') + +DROP TABLE IF EXISTS abalone_identity_out, abalone_identity_out_summary; +SELECT glm( + 'abalone', + 'abalone_identity_out', + 'rings', + 'ARRAY[1.0, length, diameter, height, whole, shucked, viscera, shell]', + 'family=gaussian, link=identity', NULL, 'max_iter=1000, tolerance=1e-16' ); -INSERT INTO abalone VALUES -(3151, 'F', 0.655000000000000027, 0.505000000000000004, 0.165000000000000008, 1.36699999999999999, 0.583500000000000019, 0.351499999999999979, 0.396000000000000019, 10), -(2026, 'F', 0.550000000000000044, 0.469999999999999973, 0.149999999999999994, 0.920499999999999985, 0.381000000000000005, 0.243499999999999994, 0.267500000000000016, 10), -(3751, 'I', 0.434999999999999998, 0.375, 0.110000000000000001, 0.41549999999999998, 0.170000000000000012, 0.0759999999999999981, 0.14499999999999999, 8), -(720, 'I', 0.149999999999999994, 0.100000000000000006, 0.0250000000000000014, 0.0149999999999999994, 0.00449999999999999966, 0.00400000000000000008, 0.0050000000000000001, 2), -(1635, 'F', 0.574999999999999956, 0.469999999999999973, 0.154999999999999999, 1.1160000000000001, 0.509000000000000008, 0.237999999999999989, 0.340000000000000024, 10), -(2648, 'I', 0.5, 0.390000000000000013, 0.125, 0.582999999999999963, 0.293999999999999984, 0.132000000000000006, 0.160500000000000004, 8), -(1796, 'F', 0.57999999999999996, 0.429999999999999993, 0.170000000000000012, 1.47999999999999998, 0.65349999999999997, 0.32400000000000001, 0.41549999999999998, 10), -(209, 'F', 0.525000000000000022, 0.41499999999999998, 0.170000000000000012, 0.832500000000000018, 0.275500000000000023, 0.168500000000000011, 0.309999999999999998, 13), -(1451, 'I', 0.455000000000000016, 0.33500000000000002, 0.135000000000000009, 0.501000000000000001, 0.274000000000000021, 0.0995000000000000051, 0.106499999999999997, 7), -(1108, 'I', 0.510000000000000009, 0.380000000000000004, 0.115000000000000005, 0.515499999999999958, 0.214999999999999997, 0.113500000000000004, 0.166000000000000009, 8), -(3675, 'F', 0.594999999999999973, 0.450000000000000011, 0.165000000000000008, 1.08099999999999996, 0.489999999999999991, 0.252500000000000002, 0.279000000000000026, 12), -(2108, 'F', 0.675000000000000044, 0.550000000000000044, 0.179999999999999993, 1.68849999999999989, 0.562000000000000055, 0.370499999999999996, 0.599999999999999978, 15), -(3312, 'F', 0.479999999999999982, 0.380000000000000004, 0.135000000000000009, 0.507000000000000006, 0.191500000000000004, 0.13650000000000001, 0.154999999999999999, 12), -(882, 'M', 0.655000000000000027, 0.520000000000000018, 0.165000000000000008, 1.40949999999999998, 0.585999999999999965, 0.290999999999999981, 0.405000000000000027, 9), -(3402, 'M', 0.479999999999999982, 0.395000000000000018, 0.149999999999999994, 0.681499999999999995, 0.214499999999999996, 0.140500000000000014, 0.2495, 18), -(829, 'I', 0.409999999999999976, 0.325000000000000011, 0.100000000000000006, 0.394000000000000017, 0.20799999999999999, 0.0655000000000000027, 0.105999999999999997, 6), -(1305, 'M', 0.535000000000000031, 0.434999999999999998, 0.149999999999999994, 0.716999999999999971, 0.347499999999999976, 0.14449999999999999, 0.194000000000000006, 9), -(3613, 'M', 0.599999999999999978, 0.46000000000000002, 0.179999999999999993, 1.1399999999999999, 0.422999999999999987, 0.257500000000000007, 0.364999999999999991, 10), -(1068, 'I', 0.340000000000000024, 0.265000000000000013, 0.0800000000000000017, 0.201500000000000012, 0.0899999999999999967, 0.0475000000000000006, 0.0550000000000000003, 5), -(2446, 'M', 0.5, 0.380000000000000004, 0.135000000000000009, 0.583500000000000019, 0.22950000000000001, 0.126500000000000001, 0.179999999999999993, 12), -(1393, 'M', 0.635000000000000009, 0.474999999999999978, 0.170000000000000012, 1.19350000000000001, 0.520499999999999963, 0.269500000000000017, 0.366499999999999992, 10), -(359, 'M', 0.744999999999999996, 0.584999999999999964, 0.214999999999999997, 2.49900000000000011, 0.92649999999999999, 0.471999999999999975, 0.699999999999999956, 17), -(549, 'F', 0.564999999999999947, 0.450000000000000011, 0.160000000000000003, 0.79500000000000004, 0.360499999999999987, 0.155499999999999999, 0.23000000000000001, 12), -(1154, 'F', 0.599999999999999978, 0.474999999999999978, 0.160000000000000003, 1.02649999999999997, 0.484999999999999987, 0.2495, 0.256500000000000006, 9), -(1790, 'F', 0.54500000000000004, 0.385000000000000009, 0.149999999999999994, 1.11850000000000005, 0.542499999999999982, 0.244499999999999995, 0.284499999999999975, 9), -(3703, 'F', 0.665000000000000036, 0.540000000000000036, 0.195000000000000007, 1.76400000000000001, 0.850500000000000034, 0.361499999999999988, 0.469999999999999973, 11), -(1962, 'F', 0.655000000000000027, 0.515000000000000013, 0.179999999999999993, 1.41199999999999992, 0.619500000000000051, 0.248499999999999999, 0.496999999999999997, 11), -(1665, 'I', 0.604999999999999982, 0.469999999999999973, 0.14499999999999999, 0.802499999999999991, 0.379000000000000004, 0.226500000000000007, 0.220000000000000001, 9), -(635, 'M', 0.359999999999999987, 0.294999999999999984, 0.100000000000000006, 0.210499999999999993, 0.0660000000000000031, 0.0524999999999999981, 0.0749999999999999972, 9), -(3901, 'M', 0.445000000000000007, 0.344999999999999973, 0.140000000000000013, 0.475999999999999979, 0.205499999999999988, 0.101500000000000007, 0.108499999999999999, 15), -(2734, 'I', 0.41499999999999998, 0.33500000000000002, 0.100000000000000006, 0.357999999999999985, 0.169000000000000011, 0.067000000000000004, 0.104999999999999996, 7), -(3856, 'M', 0.409999999999999976, 0.33500000000000002, 0.115000000000000005, 0.440500000000000003, 0.190000000000000002, 0.0850000000000000061, 0.135000000000000009, 8), -(827, 'I', 0.395000000000000018, 0.28999999999999998, 0.0950000000000000011, 0.303999999999999992, 0.127000000000000002, 0.0840000000000000052, 0.076999999999999999, 6), -(3381, 'I', 0.190000000000000002, 0.130000000000000004, 0.0449999999999999983, 0.0264999999999999993, 0.00899999999999999932, 0.0050000000000000001, 0.00899999999999999932, 5), -(3972, 'I', 0.400000000000000022, 0.294999999999999984, 0.0950000000000000011, 0.252000000000000002, 0.110500000000000001, 0.0575000000000000025, 0.0660000000000000031, 6), -(1155, 'M', 0.599999999999999978, 0.455000000000000016, 0.170000000000000012, 1.1915, 0.695999999999999952, 0.239499999999999991, 0.239999999999999991, 8), -(3467, 'M', 0.640000000000000013, 0.5, 0.170000000000000012, 1.4544999999999999, 0.642000000000000015, 0.357499999999999984, 0.353999999999999981, 9), -(2433, 'F', 0.609999999999999987, 0.484999999999999987, 0.165000000000000008, 1.08699999999999997, 0.425499999999999989, 0.232000000000000012, 0.380000000000000004, 11), -(552, 'I', 0.614999999999999991, 0.489999999999999991, 0.154999999999999999, 0.988500000000000045, 0.41449999999999998, 0.195000000000000007, 0.344999999999999973, 13), -(1425, 'F', 0.729999999999999982, 0.57999999999999996, 0.190000000000000002, 1.73750000000000004, 0.678499999999999992, 0.434499999999999997, 0.520000000000000018, 11), -(2402, 'F', 0.584999999999999964, 0.41499999999999998, 0.154999999999999999, 0.69850000000000001, 0.299999999999999989, 0.145999999999999991, 0.195000000000000007, 12), -(1748, 'F', 0.699999999999999956, 0.535000000000000031, 0.174999999999999989, 1.77299999999999991, 0.680499999999999994, 0.479999999999999982, 0.512000000000000011, 15), -(3983, 'I', 0.57999999999999996, 0.434999999999999998, 0.149999999999999994, 0.891499999999999959, 0.362999999999999989, 0.192500000000000004, 0.251500000000000001, 6), -(335, 'F', 0.739999999999999991, 0.599999999999999978, 0.195000000000000007, 1.97399999999999998, 0.597999999999999976, 0.408499999999999974, 0.709999999999999964, 16), -(1587, 'I', 0.515000000000000013, 0.349999999999999978, 0.104999999999999996, 0.474499999999999977, 0.212999999999999995, 0.122999999999999998, 0.127500000000000002, 10), -(2448, 'I', 0.275000000000000022, 0.204999999999999988, 0.0800000000000000017, 0.096000000000000002, 0.0359999999999999973, 0.0184999999999999991, 0.0299999999999999989, 6), -(1362, 'F', 0.604999999999999982, 0.474999999999999978, 0.174999999999999989, 1.07600000000000007, 0.463000000000000023, 0.219500000000000001, 0.33500000000000002, 9), -(2799, 'M', 0.640000000000000013, 0.484999999999999987, 0.149999999999999994, 1.09800000000000009, 0.519499999999999962, 0.222000000000000003, 0.317500000000000004, 10), -(1413, 'F', 0.67000000000000004, 0.505000000000000004, 0.174999999999999989, 1.01449999999999996, 0.4375, 0.271000000000000019, 0.3745, 10), -(1739, 'F', 0.67000000000000004, 0.540000000000000036, 0.195000000000000007, 1.61899999999999999, 0.739999999999999991, 0.330500000000000016, 0.465000000000000024, 11), -(1152, 'M', 0.584999999999999964, 0.465000000000000024, 0.160000000000000003, 0.955500000000000016, 0.45950000000000002, 0.235999999999999988, 0.265000000000000013, 7), -(2427, 'I', 0.564999999999999947, 0.434999999999999998, 0.154999999999999999, 0.782000000000000028, 0.271500000000000019, 0.16800000000000001, 0.284999999999999976, 14), -(1777, 'M', 0.484999999999999987, 0.369999999999999996, 0.154999999999999999, 0.967999999999999972, 0.418999999999999984, 0.245499999999999996, 0.236499999999999988, 9), -(3294, 'M', 0.574999999999999956, 0.455000000000000016, 0.184999999999999998, 1.15599999999999992, 0.552499999999999991, 0.242999999999999994, 0.294999999999999984, 13), -(1403, 'M', 0.650000000000000022, 0.510000000000000009, 0.190000000000000002, 1.54200000000000004, 0.715500000000000025, 0.373499999999999999, 0.375, 9), -(2256, 'M', 0.510000000000000009, 0.395000000000000018, 0.14499999999999999, 0.61850000000000005, 0.215999999999999998, 0.138500000000000012, 0.239999999999999991, 12), -(3984, 'F', 0.584999999999999964, 0.450000000000000011, 0.125, 0.873999999999999999, 0.354499999999999982, 0.20749999999999999, 0.225000000000000006, 6), -(1116, 'M', 0.525000000000000022, 0.405000000000000027, 0.119999999999999996, 0.755499999999999949, 0.3755, 0.155499999999999999, 0.201000000000000012, 9), -(1366, 'M', 0.609999999999999987, 0.474999999999999978, 0.170000000000000012, 1.02649999999999997, 0.434999999999999998, 0.233500000000000013, 0.303499999999999992, 10), -(3759, 'I', 0.525000000000000022, 0.400000000000000022, 0.140000000000000013, 0.605500000000000038, 0.260500000000000009, 0.107999999999999999, 0.209999999999999992, 9); - - -DROP TABLE IF EXISTS abalone_out, abalone_out_summary; +DROP TABLE IF EXISTS glm_predict_normal_out; CREATE TABLE glm_predict_normal_out as SELECT id, glm_predict(coef, ARRAY[1, length, diameter, height, whole, shucked, viscera, shell], 'identity') +FROM abalone_identity_out, abalone; +SELECT test_pmml_output('abalone', 'abalone_identity_out', 'glm_predict_normal_out', 'id', 'glm_predict', 'predicted_rings_pmml_prediction'); + +DROP TABLE IF EXISTS abalone_log_out, abalone_log_out_summary; SELECT glm( 'abalone', - 'abalone_out', + 'abalone_log_out', 'rings', 'ARRAY[1, length, diameter, height, whole, shucked, viscera, shell]', + 'family=gaussian, link=log', NULL, 'max_iter=1000, tolerance=1e-16' +); + +DROP TABLE IF EXISTS glm_predict_normal_out; CREATE TABLE glm_predict_normal_out as SELECT id, glm_predict(coef, ARRAY[1, length, diameter, height, whole, shucked, viscera, shell], 'log') +FROM abalone_log_out, abalone; +SELECT test_pmml_output('abalone', 'abalone_log_out', 'glm_predict_normal_out', 'id', 'glm_predict', 'predicted_rings_pmml_prediction'); + +DROP TABLE IF EXISTS abalone_inverse_out, abalone_inverse_out_summary; +SELECT glm( + 'abalone', + 'abalone_inverse_out', + 'rings', + 'ARRAY[1, length, diameter, height, whole, shucked, viscera, shell]', + 'family=gaussian, link=inverse', NULL, 'max_iter=1000, tolerance=1e-16' +); + +DROP TABLE IF EXISTS glm_predict_normal_out; CREATE TABLE glm_predict_normal_out as SELECT id, glm_predict(coef, ARRAY[1, length, diameter, height, whole, shucked, viscera, shell], 'inverse') +FROM abalone_inverse_out, abalone; +SELECT test_pmml_output('abalone', 'abalone_inverse_out', 'glm_predict_normal_out', 'id', 'glm_predict', 'predicted_rings_pmml_prediction'); + + +------------------------ without intercept ------------------------------------------------------------ +DROP TABLE IF EXISTS abalone_identity_out_nointercept, abalone_identity_out_nointercept_summary; +SELECT glm( + 'abalone', + 'abalone_identity_out_nointercept', + 'rings', + 'ARRAY[length, diameter, height, whole, shucked, viscera, shell]', + 'family=gaussian, link=identity', NULL, 'max_iter=1000, tolerance=1e-16' +); + +DROP TABLE IF EXISTS glm_predict_normal_out; CREATE TABLE glm_predict_normal_out as SELECT id, glm_predict(coef, ARRAY[length, diameter, height, whole, shucked, viscera, shell], 'identity') +FROM abalone_identity_out_nointercept, abalone; +SELECT test_pmml_output('abalone', 'abalone_identity_out_nointercept', 'glm_predict_normal_out', 'id', 'glm_predict', 'predicted_rings_pmml_prediction'); + +DROP TABLE IF EXISTS abalone_log_out_nointercept, abalone_log_out_nointercept_summary; +SELECT glm( + 'abalone', + 'abalone_log_out_nointercept', + 'rings', + 'ARRAY[length, diameter, height, whole, shucked, viscera, shell]', + 'family=gaussian, link=log', NULL, 'max_iter=1000, tolerance=1e-16' +); + +DROP TABLE IF EXISTS glm_predict_normal_out; CREATE TABLE glm_predict_normal_out as SELECT id, glm_predict(coef, ARRAY[length, diameter, height, whole, shucked, viscera, shell], 'log') +FROM abalone_log_out_nointercept, abalone; +SELECT test_pmml_output('abalone', 'abalone_log_out_nointercept', 'glm_predict_normal_out', 'id', 'glm_predict', 'predicted_rings_pmml_prediction'); + +DROP TABLE IF EXISTS abalone_inverse_out_nointercept, abalone_inverse_out_nointercept_summary; +SELECT glm( + 'abalone', + 'abalone_inverse_out_nointercept', + 'rings', + 'ARRAY[length, diameter, height, whole, shucked, viscera, shell]', + 'family=gaussian, link=inverse', NULL, 'max_iter=1000, tolerance=1e-16' +); + +DROP TABLE IF EXISTS glm_predict_normal_out; CREATE TABLE glm_predict_normal_out as SELECT id, glm_predict(coef, ARRAY[length, diameter, height, whole, shucked, viscera, shell], 'inverse') +FROM abalone_inverse_out_nointercept, abalone; +SELECT test_pmml_output('abalone', 'abalone_inverse_out_nointercept', 'glm_predict_normal_out', 'id', 'glm_predict', 'predicted_rings_pmml_prediction'); + + +------------------------ with 0 intercept ------------------------------------------------------------ +DROP TABLE IF EXISTS abalone_identity_out_zerointercept, abalone_identity_out_zerointercept_summary; +SELECT glm( + 'abalone', + 'abalone_identity_out_zerointercept', + 'rings', + 'ARRAY[0, length, diameter, height, whole, shucked, viscera, shell]', 'family=gaussian, link=identity', NULL, 'max_iter=1000, tolerance=1e-16' ); -SELECT pmml('abalone_out'); +DROP TABLE IF EXISTS glm_predict_normal_out; CREATE TABLE glm_predict_normal_out as SELECT id, glm_predict(coef, ARRAY[0, length, diameter, height, whole, shucked, viscera, shell], 'identity') +FROM abalone_identity_out_zerointercept, abalone; +SELECT test_pmml_output('abalone', 'abalone_identity_out_zerointercept', 'glm_predict_normal_out', 'id', 'glm_predict', 'predicted_rings_pmml_prediction'); diff --git a/src/ports/postgres/modules/pmml/test/pmml_glm_poisson.sql_in b/src/ports/postgres/modules/pmml/test/pmml_glm_poisson.sql_in index fe581352..2dcae6d6 100644 --- a/src/ports/postgres/modules/pmml/test/pmml_glm_poisson.sql_in +++ b/src/ports/postgres/modules/pmml/test/pmml_glm_poisson.sql_in @@ -1,77 +1,59 @@ -DROP TABLE IF EXISTS warpbreaks CASCADE; - -CREATE TABLE warpbreaks( - id serial, - breaks integer, - wool char(1), - tension char(1), - g char(1) -); +\i m4_regexp(MADLIB_LIBRARY_PATH, + `\(.*\)/lib', + `\1/../modules/pmml/test/pmml.setup.sql_in' +) -INSERT INTO warpbreaks(breaks, wool, tension, g) VALUES -(26, 'A', 'L', '1'), -(30, 'A', 'L', '1'), -(54, 'A', 'L', '1'), -(25, 'A', 'L', '1'), -(70, 'A', 'L', '1'), -(52, 'A', 'L', '1'), -(51, 'A', 'L', '1'), -(26, 'A', 'L', '1'), -(67, 'A', 'L', '1'), -(18, 'A', 'M', '1'), -(21, 'A', 'M', '1'), -(29, 'A', 'M', '1'), -(17, 'A', 'M', '1'), -(12, 'A', 'M', '1'), -(18, 'A', 'M', '1'), -(35, 'A', 'M', '1'), -(30, 'A', 'M', '1'), -(36, 'A', 'M', '1'), -(36, 'A', 'H', '0'), -(21, 'A', 'H', '0'), -(24, 'A', 'H', '0'), -(18, 'A', 'H', '0'), -(10, 'A', 'H', '0'), -(43, 'A', 'H', '0'), -(28, 'A', 'H', '0'), -(15, 'A', 'H', '0'), -(26, 'A', 'H', '0'), -(27, 'B', 'L', '0'), -(14, 'B', 'L', '0'), -(29, 'B', 'L', '0'), -(19, 'B', 'L', '0'), -(29, 'B', 'L', '0'), -(31, 'B', 'L', '0'), -(41, 'B', 'L', '0'), -(20, 'B', 'L', '1'), -(44, 'B', 'L', '1'), -(42, 'B', 'M', '1'), -(26, 'B', 'M', '1'), -(19, 'B', 'M', '1'), -(16, 'B', 'M', '1'), -(39, 'B', 'M', '1'), -(28, 'B', 'M', '1'), -(21, 'B', 'M', '1'), -(39, 'B', 'M', '1'), -(29, 'B', 'M', '1'), -(20, 'B', 'H', '1'), -(21, 'B', 'H', '1'), -(24, 'B', 'H', '1'), -(17, 'B', 'H', '1'), -(13, 'B', 'H', '1'), -(15, 'B', 'H', '1'), -(15, 'B', 'H', '1'), -(16, 'B', 'H', '1'), -(28, 'B', 'H', '1'); +m4_changequote(`<!'', `!>'') -DROP TABLE IF EXISTS warpbreaks_dummy; -SELECT create_indicator_variables('warpbreaks', 'warpbreaks_dummy', 'wool,tension'); +DROP TABLE IF EXISTS warpbreaks CASCADE; +-------------------- with intercept ------------------------------ DROP TABLE IF EXISTS glm_model_sqrt, glm_model_sqrt_summary; SELECT glm('warpbreaks_dummy', 'glm_model_sqrt', 'breaks', - 'ARRAY[1.0,"wool_B","tension_M", "tension_H"]', + 'ARRAY[1.0, "wool_B","tension_M", "tension_H"]', 'family=poisson, link=sqrt'); -SELECT pmml('glm_model_sqrt'); +-- Test predicted value +DROP TABLE IF EXISTS glm_predict_poisson_sqrt_out; CREATE TABLE glm_predict_poisson_sqrt_out as SELECT id, glm_predict(coef, ARRAY[1, "wool_B","tension_M", "tension_H"], 'sqrt') +FROM glm_model_sqrt, warpbreaks_dummy; +SELECT test_pmml_output('warpbreaks_dummy', 'glm_model_sqrt', 'glm_predict_poisson_sqrt_out', 'id', 'glm_predict', 'predicted_breaks_pmml_prediction'); + +DROP TABLE IF EXISTS glm_model_log, glm_model_log_summary; +SELECT glm('warpbreaks_dummy', + 'glm_model_log', + 'breaks', + 'ARRAY[1, "wool_B","tension_M", "tension_H"]', + 'family=poisson, link=log'); + +-- Test predicted value +DROP TABLE IF EXISTS glm_predict_poisson_log_out; CREATE TABLE glm_predict_poisson_log_out as SELECT id, glm_predict(coef, ARRAY[1, "wool_B","tension_M", "tension_H"], 'log') +FROM glm_model_log, warpbreaks_dummy; +SELECT test_pmml_output('warpbreaks_dummy', 'glm_model_log', 'glm_predict_poisson_log_out', 'id', 'glm_predict', 'predicted_breaks_pmml_prediction'); + +DROP TABLE IF EXISTS glm_model_identity, glm_model_identity_summary; +SELECT glm('warpbreaks_dummy', + 'glm_model_identity', + 'breaks', + 'ARRAY[1.0, "wool_B","tension_M", "tension_H"]', + 'family=poisson, link=identity', 'g'); + +-- Test predicted value +DROP TABLE IF EXISTS glm_predict_poisson_identity_out; CREATE TABLE glm_predict_poisson_identity_out as SELECT id, glm_model_identity.g, glm_predict(coef, ARRAY[1, "wool_B","tension_M", "tension_H"], 'identity') +FROM glm_model_identity, warpbreaks_dummy; +SELECT test_pmml_output('warpbreaks_dummy', 'glm_model_identity', 'glm_predict_poisson_identity_out', 'id', 'glm_predict', 'predicted_breaks_pmml_prediction', 'g'); + +-------------------- without intercept ------------------------------ +-- TODO: create item for fixing the glm module when link='sqrt'/'identity' and no intercept is used +DROP TABLE IF EXISTS glm_model_log_nointercept, glm_model_log_nointercept_summary; +SELECT glm('warpbreaks_dummy', + 'glm_model_log_nointercept', + 'breaks', + 'ARRAY["wool_B","tension_M", "tension_H"]', + 'family=poisson, link=log'); + +-- Test predicted value +DROP TABLE IF EXISTS glm_predict_poisson_log_out; CREATE TABLE glm_predict_poisson_log_out as SELECT id, glm_predict(coef, ARRAY["wool_B","tension_M", "tension_H"], 'log') +FROM glm_model_log_nointercept, warpbreaks_dummy; +SELECT test_pmml_output('warpbreaks_dummy', 'glm_model_log_nointercept', 'glm_predict_poisson_log_out', 'id', 'glm_predict', 'predicted_breaks_pmml_prediction'); diff --git a/src/ports/postgres/modules/pmml/test/pmml_glm_with_grouping.sql_in b/src/ports/postgres/modules/pmml/test/pmml_glm_with_grouping.sql_in new file mode 100644 index 00000000..b3852ecd --- /dev/null +++ b/src/ports/postgres/modules/pmml/test/pmml_glm_with_grouping.sql_in @@ -0,0 +1,125 @@ +/* ----------------------------------------------------------------------- *//** + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * + *//* ----------------------------------------------------------------------- */ + +\i m4_regexp(MADLIB_LIBRARY_PATH, + `\(.*\)/lib', + `\1/../modules/pmml/test/pmml.setup.sql_in' +) + +m4_changequote(`<!'', `!>'') +--TODO: Should all the predict and model tables have the same name ? + +----------------------- binomial with logit ------------------------------- +DROP TABLE IF EXISTS glm_model, glm_model_summary; +SELECT glm( + 'abalone', + 'glm_model', + 'rings < 10', + 'ARRAY[1, length, diameter, height, whole, shucked, viscera, shell]', + 'family=binomial, link=logit', 'sex,id', 'max_iter=1000, tolerance=1e-16' +); + +-- Test probabilities +DROP TABLE IF EXISTS glm_predict_output; CREATE TABLE glm_predict_output as SELECT glm_model.id, glm_model.sex, glm_predict(coef, ARRAY[1, length, diameter, height, whole, shucked, viscera, shell], 'logit') +FROM glm_model, abalone WHERE abalone.id=glm_model.id and abalone.sex=glm_model.sex; +SELECT test_pmml_output('abalone', 'glm_model', 'glm_predict_output', 'id', 'glm_predict', 'probability_true', 'sex,id'); + +-- Test output category +DROP TABLE IF EXISTS glm_predict_output; CREATE TABLE glm_predict_output as SELECT glm_model.id, glm_model.sex, glm_predict_binomial(coef, ARRAY[1, length, diameter, height, whole, shucked, viscera, shell], 'logit') +FROM glm_model, abalone WHERE abalone.id=glm_model.id and abalone.sex=glm_model.sex; +SELECT test_pmml_output('abalone', 'glm_model', 'glm_predict_output', 'id', 'glm_predict_binomial', 'predicted_(rings < 10)_pmml_prediction', 'sex,id'); + +----------------------- binomial with probit without intercept------------------------------- +DROP TABLE IF EXISTS glm_model, glm_model_summary; +SELECT glm( + 'abalone', + 'glm_model', + 'rings < 10', + 'ARRAY[length, diameter, height, whole, shucked, viscera, shell]', + 'family=binomial, link=probit', 'sex', 'max_iter=1000, tolerance=1e-16' +); + +-- Test probabilities +DROP TABLE IF EXISTS glm_predict_output; CREATE TABLE glm_predict_output as SELECT id, glm_model.sex, glm_predict(coef, ARRAY[length, diameter, height, whole, shucked, viscera, shell], 'probit') +FROM glm_model, abalone WHERE abalone.sex=glm_model.sex; +SELECT test_pmml_output('abalone', 'glm_model', 'glm_predict_output', 'id', 'glm_predict', 'probability_true', 'sex'); + +-- Test output category +DROP TABLE IF EXISTS glm_predict_output; CREATE TABLE glm_predict_output as SELECT id, glm_model.sex, glm_predict_binomial(coef, ARRAY[length, diameter, height, whole, shucked, viscera, shell], 'probit') +FROM glm_model, abalone WHERE abalone.sex=glm_model.sex; +SELECT test_pmml_output('abalone', 'glm_model', 'glm_predict_output', 'id', 'glm_predict_binomial', 'predicted_(rings < 10)_pmml_prediction', 'sex'); + +----------------------- gamma with log ------------------------------- +DROP TABLE IF EXISTS glm_model, glm_model_summary; +SELECT glm( + 'abalone', + 'glm_model', + 'rings', + 'ARRAY[1, length, diameter, height, whole, shucked, viscera, shell]', + 'family=gamma, link=log', 'sex', 'max_iter=1000, tolerance=1e-16' +); + +DROP TABLE IF EXISTS glm_predict_output; CREATE TABLE glm_predict_output as SELECT id, glm_model.sex, glm_predict(coef, ARRAY[1, length, diameter, height, whole, shucked, viscera, shell], 'log') +FROM glm_model, abalone WHERE abalone.sex=glm_model.sex; +SELECT test_pmml_output('abalone', 'glm_model', 'glm_predict_output', 'id', 'glm_predict', 'predicted_rings_pmml_prediction', 'sex'); +-- +-- ----------------------- gamma with log without intercept------------------------------- +-- +DROP TABLE IF EXISTS glm_model, glm_model_summary; +SELECT glm( + 'abalone', + 'glm_model', + 'rings', + 'ARRAY[length, diameter, height, whole, shucked, viscera, shell]', + 'family=gamma, link=log', 'sex', 'max_iter=1000, tolerance=1e-16' +); + +DROP TABLE IF EXISTS glm_predict_output; CREATE TABLE glm_predict_output as SELECT id, glm_model.sex, glm_predict(coef, ARRAY[length, diameter, height, whole, shucked, viscera, shell], 'log') +FROM glm_model, abalone WHERE abalone.sex=glm_model.sex; +SELECT test_pmml_output('abalone', 'glm_model', 'glm_predict_output', 'id', 'glm_predict', 'predicted_rings_pmml_prediction', 'sex'); + + +------------ inverse_gaussian with inverse without intercept ------------------------------------ +DROP TABLE IF EXISTS glm_model, glm_model_summary; +SELECT glm( + 'abalone', + 'glm_model', + 'rings', + 'ARRAY[length, diameter, height, whole, shucked, viscera, shell]', + 'family=inverse_gaussian, link=inverse', 'sex,id', 'max_iter=1000, tolerance=1e-16' +); + +DROP TABLE IF EXISTS glm_predict_output; CREATE TABLE glm_predict_output as SELECT glm_model.id, glm_model.sex, glm_predict(coef, ARRAY[length, diameter, height, whole, shucked, viscera, shell], 'inverse') +FROM glm_model, abalone WHERE abalone.sex=glm_model.sex and abalone.id=glm_model.id; +SELECT test_pmml_output('abalone', 'glm_model', 'glm_predict_output', 'id', 'glm_predict', 'predicted_rings_pmml_prediction', 'sex,id'); + +------------ poisson with identity with intercept ------------------------------------ +DROP TABLE IF EXISTS glm_model, glm_model_summary; +SELECT glm('warpbreaks_dummy', + 'glm_model', + 'breaks', + 'ARRAY[1, "wool_B","tension_M", "tension_H"]', + 'family=poisson, link=identity', 'g'); + +-- Test predicted value +DROP TABLE IF EXISTS glm_predict_output; CREATE TABLE glm_predict_output as SELECT id, glm_model.g, glm_predict(coef, ARRAY[1, "wool_B","tension_M", "tension_H"], 'identity') +FROM glm_model, warpbreaks_dummy WHERE warpbreaks_dummy.g=glm_model.g; +SELECT test_pmml_output('warpbreaks_dummy', 'glm_model', 'glm_predict_output', 'id', 'glm_predict', 'predicted_breaks_pmml_prediction', 'g'); diff --git a/src/ports/postgres/modules/pmml/test/pmml_glm_with_name_spec.sql_in b/src/ports/postgres/modules/pmml/test/pmml_glm_with_name_spec.sql_in new file mode 100644 index 00000000..472bc3a1 --- /dev/null +++ b/src/ports/postgres/modules/pmml/test/pmml_glm_with_name_spec.sql_in @@ -0,0 +1,160 @@ +/* ----------------------------------------------------------------------- *//** + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * + *//* ----------------------------------------------------------------------- */ + + \i m4_regexp(MADLIB_LIBRARY_PATH, + `\(.*\)/lib', + `\1/../modules/pmml/test/pmml.setup.sql_in' +) + +m4_changequote(`<!'', `!>'') + +CREATE TABLE abalone_test_for_pmml as SELECT *, length as foo1, diameter as foo2, height as foo3, whole as foo4, shucked as foo5, viscera as foo6, shell as foo7 FROM abalone; + +----------------------- with intercept ------------------------------- +DROP TABLE IF EXISTS glm_model, glm_model_summary; +SELECT glm( + 'abalone', + 'glm_model', + 'rings < 10', + 'ARRAY[1, length, diameter, height, whole, shucked, viscera, shell]', + 'family=binomial, link=logit', NULL, 'max_iter=1000, tolerance=1e-16' +); + +-- -- Test probabilities +DROP TABLE IF EXISTS glm_predict_binomial_logit_out; CREATE TABLE glm_predict_binomial_logit_out as SELECT id, glm_predict(coef, ARRAY[1, length, diameter, height, whole, shucked, viscera, shell], 'logit') +FROM glm_model, abalone; +SELECT test_pmml_output('abalone_test_for_pmml', 'glm_model', 'glm_predict_binomial_logit_out', 'id', 'glm_predict', 'probability_true', '','bar ~ foo1+foo2+foo3+foo4+foo5+foo6+foo7'); +SELECT test_pmml_output('abalone_test_for_pmml', 'glm_model', 'glm_predict_binomial_logit_out', 'id', 'glm_predict', 'probability_true', '','foo1+foo2+foo3+foo4+foo5+foo6+foo7'); +SELECT test_pmml_output('abalone_test_for_pmml', 'glm_model', 'glm_predict_binomial_logit_out', 'id', 'glm_predict', 'probability_true', '','{bar,foo1,foo2,foo3,foo4,foo5,foo6,foo7}'); +SELECT test_pmml_output('abalone_test_for_pmml', 'glm_model', 'glm_predict_binomial_logit_out', 'id', 'glm_predict', 'probability_true', '','{foo1,foo2,foo3,foo4,foo5,foo6,foo7}'); + +-- Test output category +DROP TABLE IF EXISTS glm_predict_binomial_logit_out; CREATE TABLE glm_predict_binomial_logit_out as SELECT id, glm_predict_binomial(coef, ARRAY[1, length, diameter, height, whole, shucked, viscera, shell], 'logit') +FROM glm_model, abalone; +SELECT test_pmml_output('abalone_test_for_pmml', 'glm_model', 'glm_predict_binomial_logit_out', 'id', 'glm_predict_binomial', 'predicted_bar', '', 'bar ~ foo1+foo2+foo3+foo4+foo5+foo6+foo7'); +SELECT test_pmml_output('abalone_test_for_pmml', 'glm_model', 'glm_predict_binomial_logit_out', 'id', 'glm_predict_binomial', 'predicted_rings < 10', '', 'foo1+foo2+foo3+foo4+foo5+foo6+foo7'); +SELECT test_pmml_output('abalone_test_for_pmml', 'glm_model', 'glm_predict_binomial_logit_out', 'id', 'glm_predict_binomial', 'predicted_bar', '', '{bar,foo1,foo2,foo3,foo4,foo5,foo6,foo7}'); +SELECT test_pmml_output('abalone_test_for_pmml', 'glm_model', 'glm_predict_binomial_logit_out', 'id', 'glm_predict_binomial', 'predicted_rings < 10', '', '{foo1,foo2,foo3,foo4,foo5,foo6,foo7}'); + +----------------------- without intercept ------------------------------- +DROP TABLE IF EXISTS glm_model, glm_model_summary; +SELECT glm( + 'abalone', + 'glm_model', + 'rings < 10', + 'ARRAY[length, diameter, height, whole, shucked, viscera, shell]', + 'family=binomial, link=logit', NULL, 'max_iter=1000, tolerance=1e-16' +); + +-- -- Test probabilities +DROP TABLE IF EXISTS glm_predict_binomial_logit_out; CREATE TABLE glm_predict_binomial_logit_out as SELECT id, glm_predict(coef, ARRAY[length, diameter, height, whole, shucked, viscera, shell], 'logit') +FROM glm_model, abalone; +SELECT test_pmml_output('abalone_test_for_pmml', 'glm_model', 'glm_predict_binomial_logit_out', 'id', 'glm_predict', 'probability_true', '','bar ~ foo1+foo2+foo3+foo4+foo5+foo6+foo7'); +SELECT test_pmml_output('abalone_test_for_pmml', 'glm_model', 'glm_predict_binomial_logit_out', 'id', 'glm_predict', 'probability_true', '','foo1+foo2+foo3+foo4+foo5+foo6+foo7'); +SELECT test_pmml_output('abalone_test_for_pmml', 'glm_model', 'glm_predict_binomial_logit_out', 'id', 'glm_predict', 'probability_true', '','{bar, foo1,foo2,foo3,foo4,foo5,foo6,foo7}'); +SELECT test_pmml_output('abalone_test_for_pmml', 'glm_model', 'glm_predict_binomial_logit_out', 'id', 'glm_predict', 'probability_true', '','{foo1,foo2,foo3,foo4,foo5,foo6,foo7}'); + +-- Test output category +DROP TABLE IF EXISTS glm_predict_binomial_logit_out; CREATE TABLE glm_predict_binomial_logit_out as SELECT id, glm_predict_binomial(coef, ARRAY[length, diameter, height, whole, shucked, viscera, shell], 'logit') +FROM glm_model, abalone; +SELECT test_pmml_output('abalone_test_for_pmml', 'glm_model', 'glm_predict_binomial_logit_out', 'id', 'glm_predict_binomial', 'predicted_bar', '', 'bar ~ foo1+foo2+foo3+foo4+foo5+foo6+foo7'); +SELECT test_pmml_output('abalone_test_for_pmml', 'glm_model', 'glm_predict_binomial_logit_out', 'id', 'glm_predict_binomial', 'predicted_rings < 10', '', 'foo1+foo2+foo3+foo4+foo5+foo6+foo7'); +SELECT test_pmml_output('abalone_test_for_pmml', 'glm_model', 'glm_predict_binomial_logit_out', 'id', 'glm_predict_binomial', 'predicted_bar', '', '{bar, foo1,foo2,foo3,foo4,foo5,foo6,foo7}'); +SELECT test_pmml_output('abalone_test_for_pmml', 'glm_model', 'glm_predict_binomial_logit_out', 'id', 'glm_predict_binomial', 'predicted_rings < 10', '', '{foo1,foo2,foo3,foo4,foo5,foo6,foo7}'); + + +---------------------------- with grouping ----------------------------------------- +DROP TABLE IF EXISTS glm_model, glm_model_summary; +SELECT glm( + 'abalone', + 'glm_model', + 'rings < 10', + 'ARRAY[1, length, diameter, height, whole, shucked, viscera, shell]', + 'family=binomial, link=logit', 'sex', 'max_iter=1000, tolerance=1e-16' +); + +-- -- Test probabilities +DROP TABLE IF EXISTS glm_predict_binomial_logit_out; CREATE TABLE glm_predict_binomial_logit_out as SELECT id, glm_model.sex, glm_predict(coef, ARRAY[1, length, diameter, height, whole, shucked, viscera, shell], 'logit') +FROM glm_model, abalone WHERE abalone.sex=glm_model.sex; +SELECT test_pmml_output('abalone_test_for_pmml', 'glm_model', 'glm_predict_binomial_logit_out', 'id', 'glm_predict', 'probability_true', 'sex','bar ~ foo1+foo2+foo3+foo4+foo5+foo6+foo7'); +SELECT test_pmml_output('abalone_test_for_pmml', 'glm_model', 'glm_predict_binomial_logit_out', 'id', 'glm_predict', 'probability_true', 'sex','foo1+foo2+foo3+foo4+foo5+foo6+foo7'); +SELECT test_pmml_output('abalone_test_for_pmml', 'glm_model', 'glm_predict_binomial_logit_out', 'id', 'glm_predict', 'probability_true', 'sex','{bar, foo1,foo2,foo3,foo4,foo5,foo6,foo7}'); +SELECT test_pmml_output('abalone_test_for_pmml', 'glm_model', 'glm_predict_binomial_logit_out', 'id', 'glm_predict', 'probability_true', 'sex','{foo1,foo2,foo3,foo4,foo5,foo6,foo7}'); + +-- Test output category +DROP TABLE IF EXISTS glm_predict_binomial_logit_out; CREATE TABLE glm_predict_binomial_logit_out as SELECT id, glm_model.sex, glm_predict_binomial(coef, ARRAY[1, length, diameter, height, whole, shucked, viscera, shell], 'logit') +FROM glm_model, abalone WHERE abalone.sex=glm_model.sex; +SELECT test_pmml_output('abalone_test_for_pmml', 'glm_model', 'glm_predict_binomial_logit_out', 'id', 'glm_predict_binomial', 'predicted_bar', 'sex', 'bar ~ foo1+foo2+foo3+foo4+foo5+foo6+foo7'); +SELECT test_pmml_output('abalone_test_for_pmml', 'glm_model', 'glm_predict_binomial_logit_out', 'id', 'glm_predict_binomial', 'predicted_rings < 10', 'sex', 'foo1+foo2+foo3+foo4+foo5+foo6+foo7'); +SELECT test_pmml_output('abalone_test_for_pmml', 'glm_model', 'glm_predict_binomial_logit_out', 'id', 'glm_predict_binomial', 'predicted_bar', 'sex', '{bar, foo1,foo2,foo3,foo4,foo5,foo6,foo7}'); +SELECT test_pmml_output('abalone_test_for_pmml', 'glm_model', 'glm_predict_binomial_logit_out', 'id', 'glm_predict_binomial', 'predicted_rings < 10', 'sex', '{foo1,foo2,foo3,foo4,foo5,foo6,foo7}'); + + +----------------------- with grouping and incorrect feature length in name spec ------------------------- +-- Incorrect count type1: If the count of features in the name spec is incorrect, madlib pmml will fall back to using the original names +-- So we can use the original table 'abalone' to compare the results + +DROP TABLE IF EXISTS glm_model, glm_model_summary; +SELECT glm( + 'abalone', + 'glm_model', + 'rings < 10', + 'ARRAY[1, length, diameter, height, whole, shucked, viscera, shell]', + 'family=binomial, link=logit', 'sex', 'max_iter=1000, tolerance=1e-16' +); + + +DROP TABLE IF EXISTS glm_predict_binomial_logit_out; CREATE TABLE glm_predict_binomial_logit_out as SELECT id, glm_model.sex, glm_predict(coef, ARRAY[1, length, diameter, height, whole, shucked, viscera, shell], 'logit') +FROM glm_model, abalone WHERE abalone.sex=glm_model.sex; +SELECT test_pmml_output('abalone', 'glm_model', 'glm_predict_binomial_logit_out', 'id', 'glm_predict', 'probability_true', 'sex','bar ~ foo1+foo2+foo3+foo4+foo5+foo6'); +SELECT test_pmml_output('abalone', 'glm_model', 'glm_predict_binomial_logit_out', 'id', 'glm_predict', 'probability_true', 'sex','foo1+foo2+foo3+foo4+foo5+foo6'); +SELECT test_pmml_output('abalone', 'glm_model', 'glm_predict_binomial_logit_out', 'id', 'glm_predict', 'probability_true', 'sex','{foo1,foo2,foo3,foo4,foo5,foo6}'); +SELECT test_pmml_output('abalone', 'glm_model', 'glm_predict_binomial_logit_out', 'id', 'glm_predict', 'probability_true', 'sex','{bar,foo1,foo2,foo3,foo4,foo5,foo6,foo7,foo8}'); + +DROP TABLE IF EXISTS glm_predict_binomial_logit_out; CREATE TABLE glm_predict_binomial_logit_out as SELECT id, glm_model.sex, glm_predict_binomial(coef, ARRAY[1, length, diameter, height, whole, shucked, viscera, shell], 'logit') +FROM glm_model, abalone WHERE abalone.sex=glm_model.sex; +SELECT test_pmml_output('abalone', 'glm_model', 'glm_predict_binomial_logit_out', 'id', 'glm_predict_binomial', 'predicted_rings < 10', 'sex','foo1+foo2+foo3+foo4+foo5+foo6'); +SELECT test_pmml_output('abalone', 'glm_model', 'glm_predict_binomial_logit_out', 'id', 'glm_predict_binomial', 'predicted_rings < 10', 'sex','{foo1,foo2,foo3,foo4,foo5,foo6}'); +SELECT test_pmml_output('abalone', 'glm_model', 'glm_predict_binomial_logit_out', 'id', 'glm_predict_binomial', 'predicted_rings < 10', 'sex','{bar,foo1,foo2,foo3,foo4,foo5,foo6,foo7,foo8}'); + +DROP TABLE IF EXISTS glm_model, glm_model_summary; +SELECT glm( + 'abalone', + 'glm_model', + 'rings < 10', + 'ARRAY[length, diameter, height, whole, shucked, viscera, shell]', + 'family=binomial, link=logit', 'sex', 'max_iter=1000, tolerance=1e-16' +); + + +DROP TABLE IF EXISTS glm_predict_binomial_logit_out; CREATE TABLE glm_predict_binomial_logit_out as SELECT id, glm_model.sex, glm_predict(coef, ARRAY[length, diameter, height, whole, shucked, viscera, shell], 'logit') +FROM glm_model, abalone WHERE abalone.sex=glm_model.sex; +SELECT test_pmml_output('abalone', 'glm_model', 'glm_predict_binomial_logit_out', 'id', 'glm_predict', 'probability_true', 'sex','bar ~ foo1+foo2+foo3+foo4+foo5+foo6'); +SELECT test_pmml_output('abalone', 'glm_model', 'glm_predict_binomial_logit_out', 'id', 'glm_predict', 'probability_true', 'sex','foo1+foo2+foo3+foo4+foo5+foo6'); +SELECT test_pmml_output('abalone', 'glm_model', 'glm_predict_binomial_logit_out', 'id', 'glm_predict', 'probability_true', 'sex','{foo1,foo2,foo3,foo4,foo5,foo6}'); +SELECT test_pmml_output('abalone', 'glm_model', 'glm_predict_binomial_logit_out', 'id', 'glm_predict', 'probability_true', 'sex','{bar,foo1,foo2,foo3,foo4,foo5,foo6,foo7,foo8}'); + +DROP TABLE IF EXISTS glm_predict_binomial_logit_out; CREATE TABLE glm_predict_binomial_logit_out as SELECT id, glm_model.sex, glm_predict_binomial(coef, ARRAY[length, diameter, height, whole, shucked, viscera, shell], 'logit') +FROM glm_model, abalone WHERE abalone.sex=glm_model.sex; +SELECT test_pmml_output('abalone', 'glm_model', 'glm_predict_binomial_logit_out', 'id', 'glm_predict_binomial', 'predicted_rings < 10', 'sex','foo1+foo2+foo3+foo4+foo5+foo6'); +SELECT test_pmml_output('abalone', 'glm_model', 'glm_predict_binomial_logit_out', 'id', 'glm_predict_binomial', 'predicted_rings < 10', 'sex','{foo1,foo2,foo3,foo4,foo5,foo6}'); +SELECT test_pmml_output('abalone', 'glm_model', 'glm_predict_binomial_logit_out', 'id', 'glm_predict_binomial', 'predicted_rings < 10', 'sex','{bar,foo1,foo2,foo3,foo4,foo5,foo6,foo7,foo8}'); + diff --git a/src/ports/postgres/modules/pmml/test/pmml_linear.sql_in b/src/ports/postgres/modules/pmml/test/pmml_linear.sql_in new file mode 100644 index 00000000..a870e373 --- /dev/null +++ b/src/ports/postgres/modules/pmml/test/pmml_linear.sql_in @@ -0,0 +1,91 @@ +/* ----------------------------------------------------------------------- *//** + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * + *//* ----------------------------------------------------------------------- */ + + \i m4_regexp(MADLIB_LIBRARY_PATH, `\(.*\)/lib', + `\1/../modules/pmml/test/pmml.setup.sql_in' +) + +m4_changequote(`<!'', `!>'') + +DROP TABLE IF EXISTS linregr_model, linregr_model_summary; +SELECT linregr_train( + 'houses', + 'linregr_model', + 'price', + 'array[1, bedroom, bath, size]' +); + +DROP TABLE IF EXISTS linregr_predict_output; +CREATE TABLE linregr_predict_output AS + SELECT id, linregr_predict(coef, ARRAY[1, bedroom, bath, size]) +FROM linregr_model, houses; +SELECT test_pmml_output('houses', 'linregr_model', 'linregr_predict_output', 'id', 'linregr_predict', 'predicted_price_pmml_prediction'); + + +DROP TABLE IF EXISTS linregr_model, linregr_model_summary; +SELECT linregr_train( + 'houses', + 'linregr_model', + 'price', + 'array[1, bedroom, bath, size]', + 'g1, g2' +); + +DROP TABLE IF EXISTS linregr_predict_output; +CREATE TABLE linregr_predict_output AS + SELECT id, linregr_model.g1, linregr_model.g2, + linregr_predict(coef, ARRAY[1, bedroom, bath, size]) +FROM linregr_model, houses +WHERE linregr_model.g1=houses.g1 AND linregr_model.g2=houses.g2; +SELECT test_pmml_output('houses', 'linregr_model', 'linregr_predict_output', 'id', 'linregr_predict', 'predicted_price_pmml_prediction', 'g1,g2'); + +---------------- without intercept ------------------------------ +DROP TABLE IF EXISTS linregr_model, linregr_model_summary; +SELECT linregr_train( + 'houses', + 'linregr_model', + 'price', + 'array[bedroom, bath, size]' +); + +DROP TABLE IF EXISTS linregr_predict_output; +CREATE TABLE linregr_predict_output AS + SELECT id, linregr_predict(coef, ARRAY[bedroom, bath, size]) +FROM linregr_model, houses; +SELECT test_pmml_output('houses', 'linregr_model', 'linregr_predict_output', 'id', 'linregr_predict', 'predicted_price_pmml_prediction'); + + +DROP TABLE IF EXISTS linregr_model, linregr_model_summary; +SELECT linregr_train( + 'houses', + 'linregr_model', + 'price', + 'array[bedroom, bath, size]', + 'g1, g2' +); + +DROP TABLE IF EXISTS linregr_predict_output; +CREATE TABLE linregr_predict_output AS + SELECT id, linregr_model.g1, linregr_model.g2, + linregr_predict(coef, ARRAY[bedroom, bath, size]) +FROM linregr_model, houses +WHERE linregr_model.g1=houses.g1 AND linregr_model.g2=houses.g2; +SELECT test_pmml_output('houses', 'linregr_model', 'linregr_predict_output', 'id', 'linregr_predict', 'predicted_price_pmml_prediction', 'g1,g2'); diff --git a/src/ports/postgres/modules/pmml/test/pmml_logistic.sql_in b/src/ports/postgres/modules/pmml/test/pmml_logistic.sql_in new file mode 100644 index 00000000..8e9f2cbd --- /dev/null +++ b/src/ports/postgres/modules/pmml/test/pmml_logistic.sql_in @@ -0,0 +1,159 @@ +/* ----------------------------------------------------------------------- *//** + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * + *//* ----------------------------------------------------------------------- */ + + \i m4_regexp(MADLIB_LIBRARY_PATH, `\(.*\)/lib', + `\1/../modules/pmml/test/pmml.setup.sql_in' +) + +m4_changequote(`<!'', `!>'') + +DROP TABLE IF EXISTS logregr_model, logregr_model_summary; +SELECT logregr_train( + '"Patients"', + 'logregr_model', + '"Second_attack"', + 'ARRAY[1, "Treatment", trait_anxiety]', + NULL, + 20, + 'irls' +); +DROP TABLE IF EXISTS logregr_predict_output; CREATE TABLE logregr_predict_output as SELECT id, logregr_predict(coef, ARRAY[1, "Treatment", trait_anxiety]) +FROM logregr_model, "Patients"; +SELECT test_pmml_output('"Patients"', 'logregr_model', 'logregr_predict_output', 'id', 'logregr_predict', 'predicted_Second_attack_pmml_prediction'); + +DROP TABLE IF EXISTS logregr_predict_output; CREATE TABLE logregr_predict_output as SELECT id, logregr_predict_prob(coef, ARRAY[1, "Treatment", trait_anxiety]) +FROM logregr_model, "Patients"; +SELECT test_pmml_output('"Patients"', 'logregr_model', 'logregr_predict_output', 'id', 'logregr_predict_prob', 'probability_true'); + +--- simple grouping ------- +DROP TABLE IF EXISTS logregr_model_grouping, logregr_model_grouping_summary; +SELECT logregr_train( + '"Patients"', + 'logregr_model_grouping', + '"Second_attack"', + 'ARRAY[1, "Treatment", trait_anxiety]', + 'g1', + 20, + 'irls' +); +DROP TABLE IF EXISTS logregr_predict_output; CREATE TABLE logregr_predict_output as SELECT id, logregr_model_grouping.g1, logregr_predict(coef, ARRAY[1, "Treatment", trait_anxiety]) +FROM logregr_model_grouping, "Patients" WHERE logregr_model_grouping.g1="Patients".g1; +SELECT test_pmml_output('"Patients"', 'logregr_model_grouping', 'logregr_predict_output', 'id', 'logregr_predict', 'predicted_Second_attack_pmml_prediction', 'g1'); + +DROP TABLE IF EXISTS logregr_predict_output; CREATE TABLE logregr_predict_output as SELECT id, logregr_model_grouping.g1, logregr_predict_prob(coef, ARRAY[1, "Treatment", trait_anxiety]) +FROM logregr_model_grouping, "Patients" WHERE logregr_model_grouping.g1="Patients".g1; +SELECT test_pmml_output('"Patients"', 'logregr_model_grouping', 'logregr_predict_output', 'id', 'logregr_predict_prob', 'probability_true', 'g1'); + +--- complex grouping ------- +DROP TABLE IF EXISTS logregr_model_grouping, logregr_model_grouping_summary; +SELECT logregr_train( + '"Patients"', + 'logregr_model_grouping', + '"Second_attack"', + 'ARRAY[1, "Treatment", trait_anxiety]', -- test double quote + 'g1, g2', + 20, + 'irls' +); +DROP TABLE IF EXISTS logregr_predict_output; CREATE TABLE logregr_predict_output as SELECT id, logregr_model_grouping.g1, logregr_model_grouping.g2, logregr_predict(coef, ARRAY[1, "Treatment", trait_anxiety]) FROM logregr_model_grouping, "Patients" +WHERE logregr_model_grouping.g1="Patients".g1 AND logregr_model_grouping.g2="Patients".g2; +SELECT test_pmml_output('"Patients"', 'logregr_model_grouping', 'logregr_predict_output', 'id', 'logregr_predict', 'predicted_Second_attack_pmml_prediction', 'g1,g2'); + +DROP TABLE IF EXISTS logregr_predict_output; CREATE TABLE logregr_predict_output as SELECT id, logregr_model_grouping.g1, logregr_model_grouping.g2, logregr_predict_prob(coef, ARRAY[1, "Treatment", trait_anxiety]) FROM logregr_model_grouping, "Patients" +WHERE logregr_model_grouping.g1="Patients".g1 AND logregr_model_grouping.g2="Patients".g2; +SELECT test_pmml_output('"Patients"', 'logregr_model_grouping', 'logregr_predict_output', 'id', 'logregr_predict_prob', 'probability_true', 'g1,g2'); + + +--------------------------- without intercept ------------------------------ +DROP TABLE IF EXISTS logregr_model, logregr_model_summary; +SELECT logregr_train( + '"Patients"', + 'logregr_model', + '"Second_attack"', + 'ARRAY["Treatment", trait_anxiety]', + NULL, + 20, + 'irls' +); +DROP TABLE IF EXISTS logregr_predict_output; CREATE TABLE logregr_predict_output as SELECT id, logregr_predict(coef, ARRAY["Treatment", trait_anxiety]) +FROM logregr_model, "Patients"; +SELECT test_pmml_output('"Patients"', 'logregr_model', 'logregr_predict_output', 'id', 'logregr_predict', 'predicted_Second_attack_pmml_prediction'); + +DROP TABLE IF EXISTS logregr_predict_output; CREATE TABLE logregr_predict_output as SELECT id, logregr_predict_prob(coef, ARRAY["Treatment", trait_anxiety]) +FROM logregr_model, "Patients"; +SELECT test_pmml_output('"Patients"', 'logregr_model', 'logregr_predict_output', 'id', 'logregr_predict_prob', 'probability_true'); + +--- simple grouping ------- +DROP TABLE IF EXISTS logregr_model_grouping, logregr_model_grouping_summary; +SELECT logregr_train( + '"Patients"', + 'logregr_model_grouping', + '"Second_attack"', + 'ARRAY["Treatment", trait_anxiety]', + 'g1', + 20, + 'irls' +); +DROP TABLE IF EXISTS logregr_predict_output; CREATE TABLE logregr_predict_output as SELECT id, logregr_model_grouping.g1, logregr_predict(coef, ARRAY["Treatment", trait_anxiety]) +FROM logregr_model_grouping, "Patients" WHERE logregr_model_grouping.g1="Patients".g1; +SELECT test_pmml_output('"Patients"', 'logregr_model_grouping', 'logregr_predict_output', 'id', 'logregr_predict', 'predicted_Second_attack_pmml_prediction', 'g1'); + +DROP TABLE IF EXISTS logregr_predict_output; CREATE TABLE logregr_predict_output as SELECT id, logregr_model_grouping.g1, logregr_predict_prob(coef, ARRAY["Treatment", trait_anxiety]) +FROM logregr_model_grouping, "Patients" WHERE logregr_model_grouping.g1="Patients".g1; +SELECT test_pmml_output('"Patients"', 'logregr_model_grouping', 'logregr_predict_output', 'id', 'logregr_predict_prob', 'probability_true', 'g1'); + +--- complex grouping ------- +DROP TABLE IF EXISTS logregr_model_grouping, logregr_model_grouping_summary; +SELECT logregr_train( + '"Patients"', + 'logregr_model_grouping', + '"Second_attack"', + 'ARRAY["Treatment", trait_anxiety]', -- test double quote + 'g1, g2', + 20, + 'irls' +); +DROP TABLE IF EXISTS logregr_predict_output; CREATE TABLE logregr_predict_output as SELECT id, logregr_model_grouping.g1, logregr_model_grouping.g2, logregr_predict(coef, ARRAY["Treatment", trait_anxiety]) FROM logregr_model_grouping, "Patients" +WHERE logregr_model_grouping.g1="Patients".g1 AND logregr_model_grouping.g2="Patients".g2; +SELECT test_pmml_output('"Patients"', 'logregr_model_grouping', 'logregr_predict_output', 'id', 'logregr_predict', 'predicted_Second_attack_pmml_prediction', 'g1,g2'); + +DROP TABLE IF EXISTS logregr_predict_output; CREATE TABLE logregr_predict_output as SELECT id, logregr_model_grouping.g1, logregr_model_grouping.g2, logregr_predict_prob(coef, ARRAY["Treatment", trait_anxiety]) FROM logregr_model_grouping, "Patients" +WHERE logregr_model_grouping.g1="Patients".g1 AND logregr_model_grouping.g2="Patients".g2; +SELECT test_pmml_output('"Patients"', 'logregr_model_grouping', 'logregr_predict_output', 'id', 'logregr_predict_prob', 'probability_true', 'g1,g2'); + +--- with 0 intercept --------- +DROP TABLE IF EXISTS logregr_model, logregr_model_summary; +SELECT logregr_train( + '"Patients"', + 'logregr_model', + '"Second_attack"', + 'ARRAY[0, "Treatment", trait_anxiety]', + NULL, + 20, + 'irls' +); +DROP TABLE IF EXISTS logregr_predict_output; CREATE TABLE logregr_predict_output as SELECT id, logregr_predict(coef, ARRAY[0, "Treatment", trait_anxiety]) +FROM logregr_model, "Patients"; +SELECT test_pmml_output('"Patients"', 'logregr_model', 'logregr_predict_output', 'id', 'logregr_predict', 'predicted_Second_attack_pmml_prediction'); + +DROP TABLE IF EXISTS logregr_predict_output; CREATE TABLE logregr_predict_output as SELECT id, logregr_predict_prob(coef, ARRAY[0, "Treatment", trait_anxiety]) +FROM logregr_model, "Patients"; +SELECT test_pmml_output('"Patients"', 'logregr_model', 'logregr_predict_output', 'id', 'logregr_predict_prob', 'probability_true'); diff --git a/src/ports/postgres/modules/pmml/test/pmml_multinom.sql_in b/src/ports/postgres/modules/pmml/test/pmml_multinom.sql_in index 763ebd62..576d96ac 100644 --- a/src/ports/postgres/modules/pmml/test/pmml_multinom.sql_in +++ b/src/ports/postgres/modules/pmml/test/pmml_multinom.sql_in @@ -1,3 +1,10 @@ +\i m4_regexp(MADLIB_LIBRARY_PATH, + `\(.*\)/lib', + `\1/../modules/pmml/test/pmml.setup.sql_in' +) + +m4_changequote(`<!'', `!>'') + DROP TABLE IF EXISTS multinom_test; CREATE TABLE multinom_test ( feat1 INTEGER, @@ -207,6 +214,7 @@ INSERT INTO multinom_test(feat1, feat2, cat, g) VALUES (2,67,2,'B'), (2,65,2,'B'), (2,62,2,'B'); +ALTER TABLE multinom_test ADD COLUMN id SERIAL; DROP TABLE IF EXISTS mglm_out, mglm_out_summary; SELECT multinom( @@ -216,8 +224,16 @@ SELECT multinom( 'ARRAY[1, feat1, feat2]', '0', 'logit'); +DROP TABLE IF EXISTS multinom_predict_out; SELECT multinom_predict('mglm_out','multinom_test', 'multinom_predict_out', 'probability',FALSE,'id'); +SELECT test_pmml_output('multinom_test', 'mglm_out', 'multinom_predict_out', 'id', '0', 'probability_0'); +SELECT test_pmml_output('multinom_test', 'mglm_out', 'multinom_predict_out', 'id', '1', 'probability_1'); +SELECT test_pmml_output('multinom_test', 'mglm_out', 'multinom_predict_out', 'id', '2', 'probability_2'); -DROP TABLE IF EXISTS mglm_grp_out, mglm_grp_out_summary; +DROP TABLE IF EXISTS multinom_predict_out; SELECT multinom_predict('mglm_out','multinom_test', 'multinom_predict_out', 'response',FALSE,'id'); +SELECT test_pmml_output('multinom_test', 'mglm_out', 'multinom_predict_out', 'id', 'category', 'predicted_cat_pmml_prediction'); + +------------------------------------ with grouping ------------------------------------------------------------------- +-- DROP TABLE IF EXISTS mglm_grp_out, mglm_grp_out_summary; SELECT multinom( 'multinom_test', 'mglm_grp_out', @@ -228,5 +244,10 @@ SELECT multinom( 'g'); -- PMML export -SELECT pmml('mglm_out'); SELECT pmml('mglm_grp_out'); +DROP TABLE IF EXISTS multinom_predict_out; SELECT multinom_predict('mglm_grp_out','multinom_test', 'multinom_predict_out', 'probability',FALSE,'id'); +CREATE TABLE multinom_predict_grp_out as (SELECT a.id, a.g, b."0", b."1", b."2", a.feat1, a.feat2 FROM multinom_test AS a LEFT JOIN multinom_predict_out AS b on a.id =b.id); +SELECT test_pmml_output('multinom_test', 'mglm_grp_out', 'multinom_predict_grp_out', 'id', '0', 'probability_0','g'); + +DROP TABLE IF EXISTS multinom_predict_out; SELECT multinom_predict('mglm_grp_out','multinom_test', 'multinom_predict_out', 'response',FALSE,'id'); +SELECT test_pmml_output('multinom_test', 'mglm_grp_out', 'multinom_predict_out', 'id', 'category', 'predicted_cat_pmml_prediction'); diff --git a/src/ports/postgres/modules/pmml/test/pmml_with_non_array_expression.sql_in b/src/ports/postgres/modules/pmml/test/pmml_with_non_array_expression.sql_in new file mode 100644 index 00000000..1a7769d7 --- /dev/null +++ b/src/ports/postgres/modules/pmml/test/pmml_with_non_array_expression.sql_in @@ -0,0 +1,94 @@ +/* ----------------------------------------------------------------------- *//** + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * + *//* ----------------------------------------------------------------------- */ + + \i m4_regexp(MADLIB_LIBRARY_PATH, `\(.*\)/lib', + `\1/../modules/pmml/test/pmml.setup.sql_in' +) + +m4_changequote(`<!'', `!>'') + +----------------------- Use non array expression for the features argument ------------------------------- + +--------------- glm -------------- +CREATE TABLE abalone_non_array_expression as SELECT id, sex, rings < 10 as target,ARRAY[1, length, diameter, height, whole, shucked, viscera, shell] as features from abalone; + +DROP TABLE IF EXISTS glm_model, glm_model_summary; +SELECT glm( + 'abalone_non_array_expression', + 'glm_model', + 'target', + 'features', + 'family=binomial, link=logit', '', 'max_iter=1000, tolerance=1e-16' +); + +-- Test probabilities +-- The feature names in output pmml will look like "features[1], features[2]" and so on. So pypmml also needs to predict using the same names, hence the table creation +CREATE TABLE abalone_test_for_pmml1 as SELECT id, sex, length as "features[1]", diameter as "features[2]", height as "features[3]", whole as "features[4]", shucked as "features[5]", viscera as "features[6]",shell as "features[7]" from abalone; + +DROP TABLE IF EXISTS glm_predict_output; CREATE TABLE glm_predict_output as SELECT id, glm_predict(coef, features, 'logit') FROM glm_model, abalone_non_array_expression; +SELECT test_pmml_output('abalone_test_for_pmml1', 'glm_model', 'glm_predict_output', 'id', 'glm_predict', 'probability_true'); + +-- Test output category +DROP TABLE IF EXISTS glm_predict_output; CREATE TABLE glm_predict_output as SELECT id, glm_predict_binomial(coef, features, 'logit') FROM glm_model, abalone_non_array_expression; +SELECT test_pmml_output('abalone_test_for_pmml1', 'glm_model', 'glm_predict_output', 'id', 'glm_predict_binomial', 'predicted_target_pmml_prediction'); + +--------------- logistic regression -------------- +CREATE TABLE patients_non_array_expression AS SELECT id, "Second_attack" AS y, ARRAY[1, "Treatment", trait_anxiety] AS "x_A" FROM "Patients"; + +DROP TABLE IF EXISTS logregr_model, logregr_model_summary; +SELECT logregr_train( + 'patients_non_array_expression', + 'logregr_model', + 'y', + '"x_A"'); + +-- The feature names in output pmml will look like "features[1], features[2]" and so on. So pypmml also needs to predict using the same names, hence the table creation +CREATE TABLE patients_test_for_pmml as SELECT id, "Treatment" as "x_A[1]", trait_anxiety as "x_A[2]" FROM "Patients"; + +DROP TABLE IF EXISTS logregr_predict_output; CREATE TABLE logregr_predict_output as SELECT id, logregr_predict(coef, "x_A") FROM logregr_model, patients_non_array_expression; +SELECT test_pmml_output('patients_test_for_pmml', 'logregr_model', 'logregr_predict_output', 'id', 'logregr_predict', 'predicted_y_pmml_prediction'); + +DROP TABLE IF EXISTS logregr_predict_output; CREATE TABLE logregr_predict_output as SELECT id, logregr_predict_prob(coef, "x_A") FROM logregr_model, patients_non_array_expression; +SELECT test_pmml_output('patients_test_for_pmml', 'logregr_model', 'logregr_predict_output', 'id', 'logregr_predict_prob', 'probability_true'); + + +------------------------------------------ now with grouping and name spec------------------------------------------------ + +DROP TABLE IF EXISTS glm_model, glm_model_summary; +SELECT glm( + 'abalone_non_array_expression', + 'glm_model', + 'target', + 'features', + 'family=binomial, link=probit', 'sex,id', 'max_iter=1000, tolerance=1e-16' +); + +CREATE TABLE abalone_test_for_pmml2 as SELECT id, sex, length as foo1, diameter as foo2, height as foo3, whole as foo4, shucked as foo5, viscera as foo6,shell as foo7 from abalone; + +-- -- Test probabilities +DROP TABLE IF EXISTS glm_predict_output; CREATE TABLE glm_predict_output as SELECT glm_model.id, glm_model.sex, glm_predict(coef, features, 'probit') FROM glm_model, abalone_non_array_expression +WHERE abalone_non_array_expression.id=glm_model.id AND abalone_non_array_expression.sex=glm_model.sex; +SELECT test_pmml_output('abalone_test_for_pmml2', 'glm_model', 'glm_predict_output', 'id', 'glm_predict', 'probability_true', 'sex,id', 'bar ~ foo1+foo2+foo3+foo4+foo5+foo6+foo7'); + +-- Test output category +DROP TABLE IF EXISTS glm_predict_output; CREATE TABLE glm_predict_output as SELECT glm_model.id, glm_model.sex, glm_predict_binomial(coef, features, 'probit') FROM glm_model, abalone_non_array_expression +WHERE abalone_non_array_expression.id=glm_model.id AND abalone_non_array_expression.sex=glm_model.sex; +SELECT test_pmml_output('abalone_test_for_pmml2', 'glm_model', 'glm_predict_output', 'id', 'glm_predict_binomial', 'predicted_bar', 'sex,id', 'bar ~ foo1+foo2+foo3+foo4+foo5+foo6+foo7'); diff --git a/src/ports/postgres/modules/pmml/test/table_to_pmml.sql_in b/src/ports/postgres/modules/pmml/test/table_to_pmml.sql_in deleted file mode 100644 index bcdc3181..00000000 --- a/src/ports/postgres/modules/pmml/test/table_to_pmml.sql_in +++ /dev/null @@ -1,197 +0,0 @@ ---------------------------------------------------------------------------- --- logregr ---------------------------------------------------------------------------- -CREATE TABLE "Patients" ( - id integer NOT NULL, - "Second_attack" integer, - "Treatment" integer, - trait_anxiety integer, - g1 char(1), - g2 char(1) -); - -INSERT INTO "Patients"(ID, "Second_attack", "Treatment", trait_anxiety, g1, g2) VALUES -( 1, 1, 1, 70, 'A', 'x'), -( 2, 1, 1, 80, 'A', 'y'), -( 3, 1, 1, 50, 'A', 'y'), -( 4, 1, 0, 60, 'A', 'y'), -( 5, 1, 0, 40, 'A', 'y'), -( 6, 1, 0, 65, 'A', 'x'), -( 7, 1, 0, 75, 'A', 'x'), -( 8, 1, 0, 80, 'B', 'x'), -( 9, 1, 0, 70, 'A', 'x'), -(10, 1, 0, 60, 'A', 'z'), -(11, 0, 1, 65, 'A', 'z'), -(12, 0, 1, 50, 'B', 'y'), -(13, 0, 1, 45, 'A', 'y'), -(14, 0, 1, 35, 'A', 'z'), -(15, 0, 1, 40, 'A', 'z'), -(16, 0, 1, 50, 'A', 'z'), -(17, 0, 0, 55, 'B', 'z'), -(18, 0, 0, 45, 'B', 'z'), -(19, 0, 0, 50, 'B', 'z'), -(20, 0, 0, 60, 'B', 'z'); - -SELECT logregr_train( - '"Patients"', - 'no_grouping', - '"Second_attack"', - 'ARRAY[1, "Treatment", trait_anxiety]', - NULL, - 20, - 'irls' -); - -SELECT logregr_train( - '"Patients"', - 'simple_grouping', - '"Second_attack"', - 'ARRAY[1, "Treatment", trait_anxiety]', - 'g1', - 20, - 'irls' -); - - -SELECT logregr_train( - '"Patients"', - 'complex_grouping', - '"Second_attack"', - 'ARRAY[1, "Treatment", trait_anxiety]', -- test double quote - 'g1, g2', - 20, - 'irls' -); - - -SELECT pmml('no_grouping'); -SELECT pmml('simple_grouping'); -SELECT pmml('complex_grouping'); - --- test simple column -CREATE TABLE pmml_logregr_example AS -SELECT "Second_attack" AS y, ARRAY[1, "Treatment", trait_anxiety] AS "x_A" -FROM "Patients"; - -SELECT logregr_train('pmml_logregr_example', 'example_pmml', 'y', '"x_A"'); -SELECT pmml('example_pmml'); - ---------------------------------------------------------------------------- --- linregr ---------------------------------------------------------------------------- -CREATE TABLE houses ( - id SERIAL NOT NULL, - tax INTEGER, - bedroom REAL, - bath REAL, - price INTEGER, - size INTEGER, - lot INTEGER, - g1 CHAR(1), - g2 CHAR(1) -); - -INSERT INTO houses(tax, bedroom, bath, price, size, lot, g1, g2) VALUES -( 590, 2, 1, 50000, 770, 22100, 'A', 'z'), -(1050, 3, 2, 85000, 1410, 12000, 'A', 'y'), -( 20, 3, 1, 22500, 1060, 3500 , 'A', 'y'), -( 870, 2, 2, 90000, 1300, 17500, 'A', 'y'), -(1320, 3, 2, 133000, 1500, 30000, 'A', 'y'), -(1350, 2, 1, 90500, 820, 25700, 'A', 'x'), -(2790, 3, 2.5, 260000, 2130, 25000, 'A', 'x'), -( 680, 2, 1, 142500, 1170, 22000, 'A', 'x'), -(1840, 3, 2, 160000, 1500, 19000, 'B', 'x'), -(3680, 4, 2, 240000, 2790, 20000, 'B', 'y'), -(1660, 3, 1, 87000, 1030, 17500, 'B', 'y'), -(1620, 3, 2, 118600, 1250, 20000, 'A', 'y'), -(3100, 3, 2, 140000, 1760, 38000, 'B', 'y'), -(2070, 2, 3, 148000, 1550, 14000, 'B', 'y'), -( 650, 3, 1.5, 65000, 1450, 12000, 'B', 'y'); - -SELECT linregr_train( - 'houses', - 'linregr_grouping', - 'price', - 'array[1, bedroom, bath, size]', - 'g1, g2' -); - -SELECT pmml('linregr_grouping'); - - -CREATE OR REPLACE FUNCTION count_expected_names(formula varchar, expected varchar[]) -RETURNS bigint AS $$ - SELECT count(distinct result.name) - FROM - (SELECT text(unnest(xpath('/n:PMML/n:DataDictionary/n:DataField/@name', - pmml_col, ARRAY[ARRAY['n', 'http://www.dmg.org/PMML-4_1']]))) AS name - FROM (SELECT CASE WHEN $1 is NULL THEN pmml('linregr_grouping') - ELSE pmml('linregr_grouping', $1) - END AS pmml_col) linregr_grouping_pmml - ) result, - (SELECT unnest($2) AS name) expected - WHERE expected.name = result.name; -$$ LANGUAGE sql; - -CREATE OR REPLACE FUNCTION count_expected_names(name_spec varchar[], expected varchar[]) -RETURNS bigint AS $$ - SELECT count(distinct result.name) - FROM - (SELECT text(unnest(xpath('/n:PMML/n:DataDictionary/n:DataField/@name', - pmml_col, ARRAY[ARRAY['n', 'http://www.dmg.org/PMML-4_1']]))) AS name - FROM (SELECT CASE WHEN $1 is NULL THEN pmml('linregr_grouping') - ELSE pmml('linregr_grouping', $1) - END AS pmml_col) linregr_grouping_pmml - ) result, - (SELECT unnest($2) AS name) expected - WHERE expected.name = result.name; -$$ LANGUAGE sql; - - - -SELECT assert( - count_expected_names( - NULL, - ARRAY['price_pmml_prediction', '1', 'bedroom', 'bath', 'size', 'g1', 'g2'] - ) = 7, - 'default case'); - -SELECT assert( - count_expected_names( - 'c.price ~ c.1 + c.bedroom + c.bath + c.size', - ARRAY['c.price', 'c.1', 'c.bedroom', 'c.bath', 'c.size'] - ) = 5, - text(pmml('linregr_grouping', - 'c.price ~ c.1 + c.bedroom + c.bath + c.size'))); - -SELECT assert( - count_expected_names( - '{c.price, c.1, c.bedroom, c.bath, c.size}'::varchar[], - ARRAY['c.price', 'c.1', 'c.bedroom', 'c.bath', 'c.size'] - ) = 5, - text(pmml('linregr_grouping', - 'c.price ~ c.1 + c.bedroom + c.bath + c.size'))); - -SELECT assert( - count_expected_names( - '{c.price, c.1, c.bedroom, c.bath, c.size}', - ARRAY['c.price', 'c.1', 'c.bedroom', 'c.bath', 'c.size'] - ) = 5, - text(pmml('linregr_grouping', - 'c.price ~ c.1 + c.bedroom + c.bath + c.size'))); - -SELECT assert( - count_expected_names( - 'c.1 + c.bedroom + c.bath + c.size', - ARRAY['price', 'c.1', 'c.bedroom', 'c.bath', 'c.size'] - ) = 5, - text(pmml('linregr_grouping', - 'c.1 + c.bedroom + c.bath + c.size'))); - -SELECT assert( - count_expected_names( - 'c.1, c.bedroom, c.bath, c.size', - ARRAY['price', 'c.1', 'c.bedroom', 'c.bath', 'c.size'] - ) = 5, - text(pmml('linregr_grouping', - 'c.1, c.bedroom, c.bath, c.size')));