fmcquillan99 edited a comment on issue #403: SVM: Fix class weights when specified as a mapping URL: https://github.com/apache/madlib/pull/403#issuecomment-499288758 ``` DROP TABLE IF EXISTS houses; CREATE TABLE houses (id INT, tax INT, bedroom INT, bath FLOAT, price INT, size INT, lot INT, price2 TEXT); INSERT INTO houses VALUES (1 , 590 , 2 , 1 , 50000 , 770 , 22100, 'lt100k'), (2 , 1050 , 3 , 2 , 85000 , 1410 , 12000, 'lt100k'), (3 , 20 , 3 , 1 , 22500 , 1060 , 3500, 'lt100k'), (4 , 870 , 2 , 2 , 90000 , 1300 , 17500, 'lt100k'), (5 , 1320 , 3 , 2 , 133000 , 1500 , 30000, 'gt100k'), (6 , 1350 , 2 , 1 , 90500 , 820 , 25700, 'lt100k'), (7 , 2790 , 3 , 2.5 , 260000 , 2130 , 25000, 'gt100k'), (8 , 680 , 2 , 1 , 142500 , 1170 , 22000, 'gt100k'), (9 , 1840 , 3 , 2 , 160000 , 1500 , 19000, 'gt100k'), (10 , 3680 , 4 , 2 , 240000 , 2790 , 20000, 'gt100k'), (11 , 1660 , 3 , 1 , 87000 , 1030 , 17500, 'lt100k'), (12 , 1620 , 3 , 2 , 118600 , 1250 , 20000, 'gt100k'), (13 , 3100 , 3 , 2 , 140000 , 1760 , 38000, 'gt100k'), (14 , 2070 , 2 , 3 , 148000 , 1550 , 14000, 'gt100k'), (15 , 650 , 3 , 1.5 , 65000 , 1450 , 12000, 'lt100k'); ``` ``` DROP TABLE IF EXISTS houses_svm_gaussian, houses_svm_gaussian_summary, houses_svm_gaussian_random; SELECT madlib.svm_classification( 'houses', 'houses_svm_gaussian', 'price < 150000', 'ARRAY[1, tax, bath, size]', 'gaussian', 'n_components=10', '', 'init_stepsize=1, max_iter=200, class_weight=balanced' ); \x on SELECT * FROM houses_svm_gaussian; -[ RECORD 1 ]------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- coef | {0.714844965488907,0.250318127518095,-2.38928383420753,0.496511319241991,-0.732968736678418,0.405804203439028,-0.382933445051466,1.03493605457998,1.36269794687058,1.34976308449158} loss | 0.573568156888657 norm_of_gradient | 1.31262747172053 num_iterations | 176 num_rows_processed | 15 num_rows_skipped | 0 dep_var_mapping | {f,t} ``` ``` DROP TABLE IF EXISTS houses_svm_gaussian, houses_svm_gaussian_summary, houses_svm_gaussian_random; SELECT madlib.svm_classification( 'houses', 'houses_svm_gaussian', 'price < 150000', 'ARRAY[1, tax, bath, size]', 'gaussian', 'n_components=10', '', 'init_stepsize=1, max_iter=200, class_weight={true:1, false:3}' ); \x on SELECT * FROM houses_svm_gaussian; -[ RECORD 1 ]------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ coef | {0.930683028668259,0.505925477275442,-2.56846778339476,0.556405030919901,-1.07556735802439,0.325697706517631,-0.872494815785118,1.39148131884382,1.41452211430188,2.02648079470983} loss | 0.526907702801827 norm_of_gradient | 1.73625539167368 num_iterations | 148 num_rows_processed | 15 num_rows_skipped | 0 dep_var_mapping | {f,t} ``` ``` DROP TABLE IF EXISTS houses_svm_gaussian, houses_svm_gaussian_summary, houses_svm_gaussian_random; SELECT madlib.svm_classification( 'houses', 'houses_svm_gaussian', 'price < 150000', 'ARRAY[1, tax, bath, size]', 'gaussian', 'n_components=10', '', 'init_stepsize=1, max_iter=200, class_weight={false:3}' ); \x on SELECT * FROM houses_svm_gaussian; -[ RECORD 1 ]------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- coef | {0.812714358084175,0.516611089846274,-2.65310766450326,0.67665375700828,-1.1073276109641,0.303844438423391,-0.958238313229976,1.30096201645626,1.44218799133577,2.20663265611754} loss | 0.518638591961475 norm_of_gradient | 1.45988195776794 num_iterations | 183 num_rows_processed | 15 num_rows_skipped | 0 dep_var_mapping | {f,t} ``` ``` DROP TABLE IF EXISTS houses_svm_gaussian, houses_svm_gaussian_summary, houses_svm_gaussian_random; SELECT madlib.svm_classification( 'houses', 'houses_svm_gaussian', 'price2', 'ARRAY[1, tax, bath, size]', 'gaussian', 'n_components=10', '', 'init_stepsize=1, max_iter=200, class_weight={gt100k:1, lt100k:3}' ); \x on SELECT * FROM houses_svm_gaussian; -[ RECORD 1 ]------+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- coef | {-1.88839942488571,1.99106255876569,-0.795671096255553,2.1633019248309,2.92527682698349,-0.168696166478298,2.5589349412574,3.40128219835538,0.860916227855357,3.07703136724834} loss | 0 norm_of_gradient | 0 num_iterations | 200 num_rows_processed | 15 num_rows_skipped | 0 dep_var_mapping | {gt100k,lt100k} ``` ``` DROP TABLE IF EXISTS houses_svm_gaussian, houses_svm_gaussian_summary, houses_svm_gaussian_random; SELECT madlib.svm_classification( 'houses', 'houses_svm_gaussian', 'price2', 'ARRAY[1, tax, bath, size]', 'gaussian', 'n_components=10', '', 'init_stepsize=1, max_iter=200, class_weight={qqq:1, lt100k:3}' ); \x on SELECT * FROM houses_svm_gaussian; ERROR: plpy.Error: SVM: Key 'qqq' in '{qqq:1, lt100k:3}' is not a valid class label. (plpython.c:5038) CONTEXT: Traceback (most recent call last): PL/Python function "svm_classification", line 24, in <module> return svm.svm(**globals()) PL/Python function "svm_classification", line 816, in svm PL/Python function "svm_classification", line 1068, in _svm_parsed_params PL/Python function "svm_classification", line 982, in _compute_class_weight_sql PL/Python function "svm_classification", line 96, in _assert PL/Python function "svm_classification" madlib=# \x on Expanded display is on. madlib=# SELECT * FROM houses_svm_gaussian; ERROR: relation "houses_svm_gaussian" does not exist LINE 1: SELECT * FROM houses_svm_gaussian; ``` ``` DROP TABLE IF EXISTS houses_svm_gaussian, houses_svm_gaussian_summary, houses_svm_gaussian_random; SELECT madlib.svm_classification( 'houses', 'houses_svm_gaussian', 'price2', 'ARRAY[1, tax, bath, size]', 'gaussian', 'n_components=10', '', 'init_stepsize=1, max_iter=200, class_weight={gt100k:1, lt100k:3, qqq:2}' ); \x on SELECT * FROM houses_svm_gaussian; ERROR: plpy.Error: SVM: Only binary classification is supported. The class_weight param should have at least one and at most two labels in it. (plpython.c:5038) CONTEXT: Traceback (most recent call last): PL/Python function "svm_classification", line 24, in <module> return svm.svm(**globals()) PL/Python function "svm_classification", line 816, in svm PL/Python function "svm_classification", line 1068, in _svm_parsed_params PL/Python function "svm_classification", line 966, in _compute_class_weight_sql PL/Python function "svm_classification", line 96, in _assert PL/Python function "svm_classification" madlib=# \x on Expanded display is on. madlib=# SELECT * FROM houses_svm_gaussian; ERROR: relation "houses_svm_gaussian" does not exist LINE 1: SELECT * FROM houses_svm_gaussian; ``` LGTM
---------------------------------------------------------------- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: [email protected] With regards, Apache Git Services
