[
https://issues.apache.org/jira/browse/MADLIB-1238?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=16499775#comment-16499775
]
Frank McQuillan commented on MADLIB-1238:
-----------------------------------------
MLP TEST
create golf data set:
{code}
DROP TABLE IF EXISTS dt_golf CASCADE;
CREATE TABLE dt_golf (
id integer NOT NULL,
"OUTLOOK" text,
temperature double precision,
humidity double precision,
"Temp_Humidity" double precision[],
clouds_airquality text[],
windy boolean,
class text,
observation_weight double precision
);
INSERT INTO dt_golf VALUES
(1,'sunny', 85, 85, ARRAY[85, 85],ARRAY['none', 'unhealthy'], 'false','Don''t
Play', 5.0),
(2, 'sunny', 80, 90, ARRAY[80, 90], ARRAY['none', 'moderate'], 'true', 'Don''t
Play', 5.0),
(3, 'overcast', 83, 78, ARRAY[83, 78], ARRAY['low', 'moderate'], 'false',
'Play', 1.5),
(4, 'rain', 70, 96, ARRAY[70, 96], ARRAY['low', 'moderate'], 'false', 'Play',
1.0),
(5, 'rain', 68, 80, ARRAY[68, 80], ARRAY['medium', 'good'], 'false', 'Play',
1.0),
(6, 'rain', 65, 70, ARRAY[65, 70], ARRAY['low', 'unhealthy'], 'true', 'Don''t
Play', 1.0),
(7, 'overcast', 64, 65, ARRAY[64, 65], ARRAY['medium', 'moderate'], 'true',
'Play', 1.5),
(8, 'sunny', 72, 95, ARRAY[72, 95], ARRAY['high', 'unhealthy'], 'false',
'Don''t Play', 5.0),
(9, 'sunny', 69, 70, ARRAY[69, 70], ARRAY['high', 'good'], 'false', 'Play',
5.0),
(10, 'rain', 75, 80, ARRAY[75, 80], ARRAY['medium', 'good'], 'false', 'Play',
1.0),
(11, 'sunny', 75, 70, ARRAY[75, 70], ARRAY['none', 'good'], 'true', 'Play',
5.0),
(12, 'overcast', 72, 90, ARRAY[72, 90], ARRAY['medium', 'moderate'], 'true',
'Play', 1.5),
(13, 'overcast', 81, 75, ARRAY[81, 75], ARRAY['medium', 'moderate'], 'false',
'Play', 1.5),
(14, 'rain', 71, 80, ARRAY[71, 80], ARRAY['low', 'unhealthy'], 'true', 'Don''t
Play', 1.0);
{code}
run MLP:
{code}
SELECT madlib.mlp_classification( 'dt_golf', 'mlp_model_2', '"Temp_Humidity"',
'class', ARRAY[5], 'learning_rate_init=0.1, n_iterations=10, tolerance=0',
'relu', NULL, FALSE, TRUE );
select * from mlp_model_summary;
{code}
produces:
{code}
-[ RECORD 1 ]--------+------------------------------
source_table | iris_data
independent_varname | attributes
dependent_varname | class_text
dependent_vartype | character varying
tolerance | 0
learning_rate_init | 0.003
learning_rate_policy | constant
n_iterations | 500
n_tries | 1
layer_sizes | {4,5,2}
activation | tanh
is_classification | t
classes | {Iris_setosa,Iris_versicolor}
weights | 1
grouping_col | NULL
{code}
LGTM for MLP
> MLP fails for dt_golf dataset
> -----------------------------
>
> Key: MADLIB-1238
> URL: https://issues.apache.org/jira/browse/MADLIB-1238
> Project: Apache MADlib
> Issue Type: Bug
> Components: Module: Neural Networks
> Reporter: Frank McQuillan
> Assignee: Jingyi Mei
> Priority: Major
> Fix For: v1.15
>
>
> For the dt_golf data set from
> http://madlib.apache.org/docs/latest/group__grp__decision__tree.html#examples
> MLP fails
> {code}
> SELECT madlib.mlp_classification( 'dt_golf', 'mlp_model_2',
> '"Temp_Humidity"', 'class', ARRAY[5], 'learning_rate_init=0.1,
> n_iterations=10, tolerance=0', 'relu', NULL, FALSE, TRUE );
> ERROR: spiexceptions.SyntaxError: syntax error at or near "t"
> LINE 9: ...p_dep_var_norm72383676_1526602175_13763735__='Don't Play',__...
> ^
> QUERY:
> SELECT
> array_to_string(ARRAY[Null], ',') AS
> __madlib_temp_col_grp_key66624757_1526602175_949550__,
> NULL,
> 1 AS __madlib_temp_col_grp_iteration58741248_1526602175_20584655__,
> (
> madlib.mlp_igd_step(
> (__madlib_temp_ind_var_norm46776457_1526602175_44392250__)::DOUBLE
> PRECISION[],
> (ARRAY[__madlib_temp_dep_var_norm72383676_1526602175_13763735__='Don't
> Play',__madlib_temp_dep_var_norm72383676_1526602175_13763735__='Play']::integer[])::DOUBLE
> PRECISION[],
>
> __madlib_temp_rel_state87474628_1526602175_41807780__.__madlib_temp_col_grp_state62184949_1526602175_41368190__,
> ARRAY[ 2,5,2 ]::DOUBLE PRECISION[],
> (0.1)::FLOAT8,
> 0,
> 1,
> (1)::DOUBLE PRECISION,
> (NULL::DOUBLE PRECISION[])::DOUBLE PRECISION[],
> 0,
> 0.9::FLOAT8,
> True::boolean
> )
> ) AS __madlib_temp_col_grp_state62184949_1526602175_41368190__
> FROM (
> SELECT *,
> array_to_string(ARRAY[Null], ',') AS
> __madlib_temp_col_grp_key66624757_1526602175_949550__
> FROM __madlib_temp_tbl_data_scaled90737274_1526602175_8324315__
> ) AS _src
> JOIN ( SELECT grp_key AS
> __madlib_temp_col_grp_key66624757_1526602175_949550__,state AS
> __madlib_temp_col_grp_state62184949_1526602175_41368190__ FROM
> madlib._gen_state($1, NULL, $2) ) AS
> __madlib_temp_rel_state87474628_1526602175_41807780__
> ON TRUE
> JOIN ( SELECT unnest($3) AS
> __madlib_temp_col_grp_key66624757_1526602175_949550__, unnest($4) AS
> __madlib_temp_col_n_tuples86886826_1526602175_68539375__ ) AS _rel_n_tuples
> ON TRUE
> CONTEXT: Traceback (most recent call last):
> PL/Python function "mlp_classification", line 36, in <module>
> grouping_col
> PL/Python function "mlp_classification", line 45, in wrapper
> PL/Python function "mlp_classification", line 334, in mlp
> PL/Python function "mlp_classification", line 576, in update
> PL/Python function "mlp_classification"
> {code}
--
This message was sent by Atlassian JIRA
(v7.6.3#76005)