[
https://issues.apache.org/jira/browse/MADLIB-1234?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=16481089#comment-16481089
]
Frank McQuillan commented on MADLIB-1234:
-----------------------------------------
fixed with commit
https://github.com/apache/madlib/commit/ccc3a1832be1d0dc3cbf8a43c66b019a583c3b8b
{code}
DROP TABLE IF EXISTS mt_cars;
CREATE TABLE mt_cars (
id integer NOT NULL,
mpg double precision,
cyl integer,
disp double precision,
hp integer,
drat double precision,
wt double precision,
qsec double precision,
vs integer,
am integer,
gear integer,
carb integer
)
distributed randomly;
INSERT INTO mt_cars (id,mpg,cyl,disp,hp,drat,wt,qsec,vs,am,gear,carb) VALUES
(1,18.7,8,360,175,3.15,3.44,17.02,0,0,3,2),
(2,21,6,160,110,3.9,2.62,16.46,0,1,4,4),
(3,24.4,4,146.7,62,3.69,3.19,20,1,0,4,2),
(4,21,6,160,110,3.9,2.875,17.02,0,1,4,4),
(5,17.8,6,167.6,123,3.92,3.44,18.9,1,0,4,4),
(6,16.4,8,275.8,180,3.078,4.07,17.4,0,0,3,3),
(7,22.8,4,108,93,3.85,2.32,18.61,1,1,4,1),
(8,17.3,8,275.8,180,3.078,3.73,17.6,0,0,3,3),
(9,21.4,6,258,110,3.08,3.215,19.44,1,0,3,1),
(10,15.2,8,275.8,180,3.078,3.78,18,0,0,3,3),
(11,18.1,6,225,105,2.768,3.46,20.22,1,0,3,1),
(12,32.4,4,78.7,66,4.08,2.20,19.47,1,1,4,1),
(13,14.3,8,360,245,3.21,3.578,15.84,0,0,3,4),
(14,22.8,4,140.8,95,3.92,3.15,22.9,1,0,4,2),
(15,30.4,4,75.7,52,4.93,1.615,18.52,1,1,4,2),
(16,19.2,6,167.6,123,3.92,3.44,18.3,1,0,4,4),
(17,33.9,4,71.14,65,4.22,1.835,19.9,1,1,4,1),
(18,15.2,8,304,150,3.15,3.435,17.3,0,0,3,2),
(19,10.4,8,472,205,2.93,5.25,17.98,0,0,3,4),
(20,27.3,4,79,66,4.08,1.935,18.9,1,1,4,1),
(21,10.4,8,460,215,3,5.424,17.82,0,0,3,4),
(22,26,4,120.3,91,4.43,2.14,16.7,0,1,5,2),
(23,14.7,8,440,230,3.23,5.345,17.42,0,0,3,4),
(24,30.4,4,95.14,113,3.77,1.513,16.9,1,1,5,2),
(25,21.5,4,120.1,97,3.70,2.465,20.01,1,0,3,1),
(26,15.8,8,351,264,4.22,3.17,14.5,0,1,5,4),
(27,15.5,8,318,150,2.768,3.52,16.87,0,0,3,2),
(28,15,8,301,335,3.54,3.578,14.6,0,1,5,8),
(29,13.3,8,350,245,3.73,3.84,15.41,0,0,3,4),
(30,19.2,8,400,175,3.08,3.845,17.05,0,0,3,2),
(31,19.7,6,145,175,3.62,2.77,15.5,0,1,5,6),
(32,21.4,4,121,109,4.11,2.78,18.6,1,1,4,2);
DROP TABLE IF EXISTS mt_cars_svec;
CREATE TABLE mt_cars_svec AS
select id, mpg,
array[cyl,disp,hp,drat,wt,qsec,vs,am,gear,carb]::float8[]::madlib.svec as
features from mt_cars
DISTRIBUTED randomly;
SELECT * FROM mt_cars_svec ORDER BY id;
DROP TABLE IF EXISTS mt_cars_svec_output, mt_cars_svec_output_group,
mt_cars_svec_output_summary;
SELECT madlib.forest_train('mt_cars_svec',
'mt_cars_svec_output',
'id',
'mpg',
'features::madlib.svec::float8[]',
'', -- 'features[3], features[4], features[8], features[9], features[10]', --
exclude columns
'', -- 'features[8]', -- grouping columns
10::integer,
2::integer,
TRUE::boolean,
1,
10,
8,
3,
10
);
{code}
works OK:
{code}
madlib=# SELECT madlib.forest_train('mt_cars_svec',
madlib(# 'mt_cars_svec_output',
madlib(# 'id',
madlib(# 'mpg',
madlib(# 'features::madlib.svec::float8[]',
madlib(# '', -- 'features[3], features[4], features[8], features[9],
features[10]', -- exclude columns
madlib(# '', -- 'features[8]', -- grouping columns
madlib(# 10::integer,
madlib(# 2::integer,
madlib(# TRUE::boolean,
madlib(# 1,
madlib(# 10,
madlib(# 8,
madlib(# 3,
madlib(# 10
madlib(# );
forest_train
--------------
(1 row)
{code}
wheras it used to crash:
{code}
(psycopg2.OperationalError) server closed the connection unexpectedly
This probably means the server terminated abnormally
before or while processing the request.
[SQL: "SELECT madlib.forest_train('mt_cars_svec',\n 'mt_cars_svec_output',\n
'id',\n 'mpg',\n 'features::madlib.svec::float8[]',\n '', -- 'features[3],
features[4], features[8], features[9], features[10]', -- exclude columns\n '',
-- 'features[8]', -- grouping columns\n 10::integer,\n 2::integer,\n
TRUE::boolean,\n 1,\n 10,\n 8,\n 3,\n 10\n );"]
{code}
so looks good
> RF, DT - casting sparse vector to float array in arg list crashes DB
> ---------------------------------------------------------------------
>
> Key: MADLIB-1234
> URL: https://issues.apache.org/jira/browse/MADLIB-1234
> Project: Apache MADlib
> Issue Type: Bug
> Components: Module: Decision Tree
> Reporter: Frank McQuillan
> Assignee: Nandish Jayaram
> Priority: Major
> Fix For: v1.15
>
>
> Workaround is to cast to float[] before call to RF/DT function
--
This message was sent by Atlassian JIRA
(v7.6.3#76005)