[ 
https://issues.apache.org/jira/browse/MADLIB-1234?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=16481089#comment-16481089
 ] 

Frank McQuillan commented on MADLIB-1234:
-----------------------------------------

fixed with commit 

https://github.com/apache/madlib/commit/ccc3a1832be1d0dc3cbf8a43c66b019a583c3b8b

 

{code}
DROP TABLE IF EXISTS mt_cars;

CREATE TABLE mt_cars (
 id integer NOT NULL,
 mpg double precision,
 cyl integer,
 disp double precision,
 hp integer,
 drat double precision,
 wt double precision,
 qsec double precision,
 vs integer,
 am integer,
 gear integer,
 carb integer
)
distributed randomly;

INSERT INTO mt_cars (id,mpg,cyl,disp,hp,drat,wt,qsec,vs,am,gear,carb) VALUES
(1,18.7,8,360,175,3.15,3.44,17.02,0,0,3,2),
(2,21,6,160,110,3.9,2.62,16.46,0,1,4,4),
(3,24.4,4,146.7,62,3.69,3.19,20,1,0,4,2),
(4,21,6,160,110,3.9,2.875,17.02,0,1,4,4),
(5,17.8,6,167.6,123,3.92,3.44,18.9,1,0,4,4),
(6,16.4,8,275.8,180,3.078,4.07,17.4,0,0,3,3),
(7,22.8,4,108,93,3.85,2.32,18.61,1,1,4,1),
(8,17.3,8,275.8,180,3.078,3.73,17.6,0,0,3,3),
(9,21.4,6,258,110,3.08,3.215,19.44,1,0,3,1),
(10,15.2,8,275.8,180,3.078,3.78,18,0,0,3,3),
(11,18.1,6,225,105,2.768,3.46,20.22,1,0,3,1),
(12,32.4,4,78.7,66,4.08,2.20,19.47,1,1,4,1),
(13,14.3,8,360,245,3.21,3.578,15.84,0,0,3,4),
(14,22.8,4,140.8,95,3.92,3.15,22.9,1,0,4,2),
(15,30.4,4,75.7,52,4.93,1.615,18.52,1,1,4,2),
(16,19.2,6,167.6,123,3.92,3.44,18.3,1,0,4,4),
(17,33.9,4,71.14,65,4.22,1.835,19.9,1,1,4,1),
(18,15.2,8,304,150,3.15,3.435,17.3,0,0,3,2),
(19,10.4,8,472,205,2.93,5.25,17.98,0,0,3,4),
(20,27.3,4,79,66,4.08,1.935,18.9,1,1,4,1),
(21,10.4,8,460,215,3,5.424,17.82,0,0,3,4),
(22,26,4,120.3,91,4.43,2.14,16.7,0,1,5,2),
(23,14.7,8,440,230,3.23,5.345,17.42,0,0,3,4),
(24,30.4,4,95.14,113,3.77,1.513,16.9,1,1,5,2),
(25,21.5,4,120.1,97,3.70,2.465,20.01,1,0,3,1),
(26,15.8,8,351,264,4.22,3.17,14.5,0,1,5,4),
(27,15.5,8,318,150,2.768,3.52,16.87,0,0,3,2),
(28,15,8,301,335,3.54,3.578,14.6,0,1,5,8),
(29,13.3,8,350,245,3.73,3.84,15.41,0,0,3,4),
(30,19.2,8,400,175,3.08,3.845,17.05,0,0,3,2),
(31,19.7,6,145,175,3.62,2.77,15.5,0,1,5,6),
(32,21.4,4,121,109,4.11,2.78,18.6,1,1,4,2);

DROP TABLE IF EXISTS mt_cars_svec;

CREATE TABLE mt_cars_svec AS 
select id, mpg, 
array[cyl,disp,hp,drat,wt,qsec,vs,am,gear,carb]::float8[]::madlib.svec as 
features from mt_cars
DISTRIBUTED randomly;

SELECT * FROM mt_cars_svec ORDER BY id;

DROP TABLE IF EXISTS mt_cars_svec_output, mt_cars_svec_output_group, 
mt_cars_svec_output_summary;
SELECT madlib.forest_train('mt_cars_svec',
 'mt_cars_svec_output',
 'id',
 'mpg',
 'features::madlib.svec::float8[]',
 '', -- 'features[3], features[4], features[8], features[9], features[10]', -- 
exclude columns
 '', -- 'features[8]', -- grouping columns
 10::integer,
 2::integer,
 TRUE::boolean,
 1,
 10,
 8,
 3,
 10
 );
{code}

works OK:

{code}
madlib=# SELECT madlib.forest_train('mt_cars_svec',
madlib(# 'mt_cars_svec_output',
madlib(# 'id',
madlib(# 'mpg',
madlib(# 'features::madlib.svec::float8[]',
madlib(# '', -- 'features[3], features[4], features[8], features[9], 
features[10]', -- exclude columns
madlib(# '', -- 'features[8]', -- grouping columns
madlib(# 10::integer,
madlib(# 2::integer,
madlib(# TRUE::boolean,
madlib(# 1,
madlib(# 10,
madlib(# 8,
madlib(# 3,
madlib(# 10
madlib(# );
 forest_train 
--------------
 
(1 row)
{code}

wheras it used to crash:

{code}
(psycopg2.OperationalError) server closed the connection unexpectedly
 This probably means the server terminated abnormally
 before or while processing the request.
 [SQL: "SELECT madlib.forest_train('mt_cars_svec',\n 'mt_cars_svec_output',\n 
'id',\n 'mpg',\n 'features::madlib.svec::float8[]',\n '', -- 'features[3], 
features[4], features[8], features[9], features[10]', -- exclude columns\n '', 
-- 'features[8]', -- grouping columns\n 10::integer,\n 2::integer,\n 
TRUE::boolean,\n 1,\n 10,\n 8,\n 3,\n 10\n );"]
{code}
so looks good

> RF, DT  - casting sparse vector to float array in arg list crashes DB
> ---------------------------------------------------------------------
>
>                 Key: MADLIB-1234
>                 URL: https://issues.apache.org/jira/browse/MADLIB-1234
>             Project: Apache MADlib
>          Issue Type: Bug
>          Components: Module: Decision Tree
>            Reporter: Frank McQuillan
>            Assignee: Nandish Jayaram
>            Priority: Major
>             Fix For: v1.15
>
>
> Workaround is to cast to float[] before call to RF/DT function



--
This message was sent by Atlassian JIRA
(v7.6.3#76005)

Reply via email to