This is an automated email from the ASF dual-hosted git repository. nkak pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/madlib.git
commit b9f2d2e9ec43e2b214639b03a29a1f53df98441c Author: Nikhil Kak <n...@pivotal.io> AuthorDate: Tue May 14 11:25:20 2019 -0700 DL: Reorder arguments for fit and evaluate UDA JIRA: MADLIB-1343 Reordered arguments to be consistent across fit and evaluate UDAs Closes #392 Co-authored-by: Ekta Khanna <ekha...@pivotal.io> --- .../modules/deep_learning/madlib_keras.py_in | 58 ++++++---------- .../modules/deep_learning/madlib_keras.sql_in | 63 ++++++++--------- .../test/unit_tests/test_madlib_keras.py_in | 80 ++++++++++++---------- 3 files changed, 92 insertions(+), 109 deletions(-) diff --git a/src/ports/postgres/modules/deep_learning/madlib_keras.py_in b/src/ports/postgres/modules/deep_learning/madlib_keras.py_in index 9abbf00..40409f8 100644 --- a/src/ports/postgres/modules/deep_learning/madlib_keras.py_in +++ b/src/ports/postgres/modules/deep_learning/madlib_keras.py_in @@ -168,15 +168,14 @@ def fit(schema_madlib, source_table, model,model_arch_table, fit_params_to_pass = "$madlib$" + fit_params + "$madlib$" run_training_iteration = plpy.prepare(""" SELECT {schema_madlib}.fit_step( - {independent_varname}::REAL[], {dependent_varname}::SMALLINT[], - {gp_segment_id_col}, - {num_classes}::INTEGER, - ARRAY{seg_ids_train}, - ARRAY{images_per_seg_train}, + {independent_varname}::REAL[], $MAD${model_arch}$MAD$::TEXT, {compile_params_to_pass}::TEXT, {fit_params_to_pass}::TEXT, + {gp_segment_id_col}, + ARRAY{seg_ids_train}, + ARRAY{images_per_seg_train}, {gpus_per_host}, {segments_per_host}, $1 @@ -420,28 +419,11 @@ def get_images_per_seg(source_table, dependent_varname): gp_segment_id_col = 'gp_segment_id' return gp_segment_id_col, seg_ids, images_per_seg -def fit_transition(state, ind_var, dep_var, current_seg_id, num_classes, - seg_ids, images_per_seg, architecture, - compile_params, fit_params, gpus_per_host, segments_per_host, +def fit_transition(state, dependent_var, independent_var, model_architecture, + compile_params, fit_params, current_seg_id, seg_ids, + images_per_seg, gpus_per_host, segments_per_host, previous_state, **kwargs): - """ - - :param state: - :param ind_var: - :param dep_var: - :param current_seg_id: - :param num_classes: - :param seg_ids: - :param images_per_seg: - :param architecture: - :param compile_params: - :param fit_params: - :param gpus_per_host: - :param previous_state: - :param kwargs: - :return: - """ - if not ind_var or not dep_var: + if not independent_var or not dependent_var: return state start_transition = time.time() @@ -452,7 +434,7 @@ def fit_transition(state, ind_var, dep_var, current_seg_id, num_classes, if not state: if not is_platform_pg(): set_keras_session(gpus_per_host, segments_per_host) - segment_model = model_from_json(architecture) + segment_model = model_from_json(model_architecture) SD['model_shapes'] = madlib_keras_serializer.get_model_shapes(segment_model) # Configure GPUs/CPUs compile_and_set_weights(segment_model, compile_params, device_name, @@ -469,8 +451,8 @@ def fit_transition(state, ind_var, dep_var, current_seg_id, num_classes, state, SD['model_shapes']) # Prepare the data - x_train = np.array(ind_var, dtype='float64') - y_train = np.array(dep_var) + x_train = np.array(independent_var, dtype='float64') + y_train = np.array(dependent_var) # Fit segment model on data start_fit = time.time() @@ -604,7 +586,7 @@ def evaluate1(schema_madlib, model_table, test_table, id_col, model_arch_table, def get_loss_acc_from_keras_eval(schema_madlib, table, dependent_varname, independent_varname, compile_params, model_arch, model_data, gpus_per_host, - segments_per_host, seg_ids, images_per_seg, + segments_per_host, seg_ids_val, images_per_seg_val, gp_segment_id_col): """ This function will call the internal keras evaluate function to get the loss @@ -615,16 +597,18 @@ def get_loss_acc_from_keras_eval(schema_madlib, table, dependent_varname, -- The right solution is either to change the datatype of the agg function from -- SMALLINT to INTEGER, or change the output of minibatch util to produce SMALLINT -- For the first, we should change fit_step also - select ({schema_madlib}.internal_keras_evaluate({dependent_varname}::SMALLINT[], + select ({schema_madlib}.internal_keras_evaluate( + {dependent_varname}::SMALLINT[], {independent_varname}::REAL[], $MAD${model_arch}$MAD$, $1, {compile_params}, + {gp_segment_id_col}, + ARRAY{seg_ids_val}, + ARRAY{images_per_seg_val}, {gpus_per_host}, - {segments_per_host}, - ARRAY{seg_ids}, - ARRAY{images_per_seg}, - {gp_segment_id_col})) as loss_acc + {segments_per_host} + )) as loss_acc from {table} """.format(**locals()), ["bytea"]) res = plpy.execute(evaluate_query, [model_data]) @@ -633,8 +617,8 @@ def get_loss_acc_from_keras_eval(schema_madlib, table, dependent_varname, def internal_keras_eval_transition(state, dependent_var, independent_var, model_architecture, model_data, compile_params, - gpus_per_host, segments_per_host, seg_ids, - images_per_seg, current_seg_id, **kwargs): + current_seg_id, seg_ids, images_per_seg, + gpus_per_host, segments_per_host, **kwargs): SD = kwargs['SD'] device_name = get_device_name_and_set_cuda_env(gpus_per_host, current_seg_id) diff --git a/src/ports/postgres/modules/deep_learning/madlib_keras.sql_in b/src/ports/postgres/modules/deep_learning/madlib_keras.sql_in index 77222a9..5d21ef8 100644 --- a/src/ports/postgres/modules/deep_learning/madlib_keras.sql_in +++ b/src/ports/postgres/modules/deep_learning/madlib_keras.sql_in @@ -126,15 +126,14 @@ m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `MODIFIES SQL DATA'); CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.fit_transition( state BYTEA, - ind_var REAL[], - dep_var SMALLINT[], + dependent_var SMALLINT[], + independent_var REAL[], + model_architecture TEXT, + compile_params TEXT, + fit_params TEXT, current_seg_id INTEGER, - num_classes INTEGER, seg_ids INTEGER[], images_per_seg INTEGER[], - architecture TEXT, - compile_params TEXT, - fit_params TEXT, gpus_per_host INTEGER, segments_per_host INTEGER, previous_state BYTEA @@ -162,30 +161,26 @@ $$ LANGUAGE plpythonu m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `NO SQL', `'); DROP AGGREGATE IF EXISTS MADLIB_SCHEMA.fit_step( - REAL[], - SMALLINT[], - INTEGER, - INTEGER, - INTEGER[], - INTEGER[], - TEXT, - INTEGER[], - TEXT, - TEXT, - TEXT, - INTEGER, - INTEGER, - BYTEA); + SMALLINT[], + REAL[], + TEXT, + TEXT, + TEXT, + INTEGER, + INTEGER[], + INTEGER[], + INTEGER, + INTEGER, + BYTEA); CREATE AGGREGATE MADLIB_SCHEMA.fit_step( - /* ind_var */ REAL[], /* dep_var */ SMALLINT[], + /* ind_var */ REAL[], + /* model_architecture */ TEXT, + /* compile_params */ TEXT, + /* fit_params */ TEXT, /* current_seg_id */ INTEGER, - /* num_classes */ INTEGER, /* seg_ids*/ INTEGER[], /* images_per_seg*/ INTEGER[], - /* architecture */ TEXT, - /* compile_params */ TEXT, - /* fit_params */ TEXT, /* gpus_per_host */ INTEGER, /* segments_per_host */ INTEGER, /* previous_state */ BYTEA @@ -298,11 +293,11 @@ CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.internal_keras_eval_transition( model_architecture TEXT, model_data BYTEA, compile_params TEXT, - gpus_per_host INTEGER, - segments_per_host INTEGER, + current_seg_id INTEGER, seg_ids INTEGER[], images_per_seg INTEGER[], - current_seg_id INTEGER + gpus_per_host INTEGER, + segments_per_host INTEGER ) RETURNS REAL[3] AS $$ PythonFunctionBodyOnlyNoSchema(`deep_learning', `madlib_keras') return madlib_keras.internal_keras_eval_transition(**globals()) @@ -332,11 +327,11 @@ DROP AGGREGATE IF EXISTS MADLIB_SCHEMA.internal_keras_evaluate( TEXT, BYTEA, TEXT, - BOOLEAN, + INTEGER, INTEGER[], INTEGER[], - INTEGER -); + INTEGER, + INTEGER); CREATE AGGREGATE MADLIB_SCHEMA.internal_keras_evaluate( /* dependent_var */ SMALLINT[], @@ -344,11 +339,11 @@ CREATE AGGREGATE MADLIB_SCHEMA.internal_keras_evaluate( /* model_architecture */ TEXT, /* model_data */ BYTEA, /* compile_params */ TEXT, - /* gpus_per_host */ INTEGER, - /* segments_per_host */ INTEGER, + /* current_seg_id */ INTEGER, /* seg_ids */ INTEGER[], /* images_per_seg*/ INTEGER[], - /* current_seg_id */ INTEGER + /* gpus_per_host */ INTEGER, + /* segments_per_host */ INTEGER )( STYPE=REAL[3], INITCOND='{0,0,0}', diff --git a/src/ports/postgres/modules/deep_learning/test/unit_tests/test_madlib_keras.py_in b/src/ports/postgres/modules/deep_learning/test/unit_tests/test_madlib_keras.py_in index 26fde8c..51c3afa 100644 --- a/src/ports/postgres/modules/deep_learning/test/unit_tests/test_madlib_keras.py_in +++ b/src/ports/postgres/modules/deep_learning/test/unit_tests/test_madlib_keras.py_in @@ -96,9 +96,9 @@ class MadlibKerasFitTestCase(unittest.TestCase): k = {'SD' : {}} new_model_state = self.subject.fit_transition( - None, self.independent_var , self.dependent_var, 0, 2, self.all_seg_ids, self.total_images_per_seg, - self.model.to_json(), self.compile_params, self.fit_params, 0, 4, - previous_state.tostring(), **k) + None, self.dependent_var, self.independent_var , self.model.to_json(), + self.compile_params, self.fit_params, 0, self.all_seg_ids, + self.total_images_per_seg, 0, 4, previous_state.tostring(), **k) state = np.fromstring(new_model_state, dtype=np.float32) image_count = state[2] weights = np.rint(state[3:]).astype(np.int) @@ -129,9 +129,9 @@ class MadlibKerasFitTestCase(unittest.TestCase): k = {'SD' : {}} new_model_state = self.subject.fit_transition( - None, self.independent_var , self.dependent_var, 0, 2, self.all_seg_ids, self.total_images_per_seg, - self.model.to_json(), self.compile_params, self.fit_params, 0, 4, - previous_state.tostring(), **k) + None, self.dependent_var, self.independent_var , self.model.to_json(), + self.compile_params, self.fit_params, 0, self.all_seg_ids, + self.total_images_per_seg, 0, 4, previous_state.tostring(), **k) state = np.fromstring(new_model_state, dtype=np.float32) image_count = state[2] weights = np.rint(state[3:]).astype(np.int) @@ -165,8 +165,9 @@ class MadlibKerasFitTestCase(unittest.TestCase): k['SD']['segment_model'] = self.model new_model_state = self.subject.fit_transition( - state.tostring(), self.independent_var, self.dependent_var, 0, 2, self.all_seg_ids, self.total_images_per_seg, - self.model.to_json(), None, self.fit_params, 0, 4, 'dummy_previous_state', **k) + state.tostring(), self.dependent_var, self.independent_var, + self.model.to_json(), None, self.fit_params, 0, self.all_seg_ids, + self.total_images_per_seg, 0, 4, 'dummy_previous_state', **k) state = np.fromstring(new_model_state, dtype=np.float32) image_count = state[2] @@ -200,8 +201,9 @@ class MadlibKerasFitTestCase(unittest.TestCase): k = {'SD': { 'model_shapes': self.model_shapes}} k['SD']['segment_model'] = self.model new_model_state = self.subject.fit_transition( - state.tostring(), self.independent_var , self.dependent_var, 0, 2, self.all_seg_ids, self.total_images_per_seg, - self.model.to_json(), None, self.fit_params, 0, 4, 'dummy_previous_state', **k) + state.tostring(), self.dependent_var, self.independent_var , self.model.to_json(), + None, self.fit_params, 0, self.all_seg_ids, self.total_images_per_seg, + 0, 4, 'dummy_previous_state', **k) state = np.fromstring(new_model_state, dtype=np.float32) image_count = state[2] @@ -236,8 +238,9 @@ class MadlibKerasFitTestCase(unittest.TestCase): k = {'SD': { 'model_shapes': self.model_shapes}} k['SD']['segment_model'] = self.model new_model_state = self.subject.fit_transition( - state.tostring(), self.independent_var , self.dependent_var, 0, 2, self.all_seg_ids, self.total_images_per_seg, - self.model.to_json(), None, self.fit_params, 0, 4, 'dummy_previous_state', **k) + state.tostring(), self.dependent_var, self.independent_var, + self.model.to_json(), None, self.fit_params, 0, self.all_seg_ids, + self.total_images_per_seg, 0, 4, 'dummy_previous_state', **k) state = np.fromstring(new_model_state, dtype=np.float32) image_count = state[2] @@ -265,10 +268,9 @@ class MadlibKerasFitTestCase(unittest.TestCase): with self.assertRaises(plpy.PLPYException) as error: new_model_state = self.subject.fit_transition( - None, self.independent_var , self.dependent_var, 0, 2, - self.all_seg_ids, total_images_per_seg, - self.model.to_json(), self.compile_params, self.fit_params, - 0, 4, previous_state.tostring(), **k) + None, self.dependent_var, self.independent_var , self.model.to_json(), + self.compile_params, self.fit_params, 0, self.all_seg_ids, + total_images_per_seg, 0, 4, previous_state.tostring(), **k) self.assertIn('0 rows', str(error.exception)) def test_fit_transition_too_many_images(self): @@ -285,26 +287,28 @@ class MadlibKerasFitTestCase(unittest.TestCase): with self.assertRaises(plpy.PLPYException) as error: new_model_state = self.subject.fit_transition( - None, self.independent_var , self.dependent_var, 0, 2, self.all_seg_ids, total_images_per_seg, - self.model.to_json(), self.compile_params, self.fit_params, 0, 4, - previous_state.tostring(), **k) + None, self.dependent_var, self.independent_var , self.model.to_json(), + self.compile_params, self.fit_params, 0, self.all_seg_ids, + total_images_per_seg, 0, 4, previous_state.tostring(), **k) self.assertIn('only 1', str(error.exception)) def test_fit_transition_first_tuple_none_ind_var_dep_var(self): k = {} self.assertEqual('dummy_state', - self.subject.fit_transition('dummy_state', None , [0], 1, 2, - - [0,1,2], [3,3,3], 'dummy_model_json', "foo", "bar", 0, 4, + self.subject.fit_transition('dummy_state', [0], None, + 'dummy_model_json', "foo", "bar", + 1, [0,1,2], [3,3,3], 0, 4, 'dummy_prev_state', **k)) self.assertEqual('dummy_state', - self.subject.fit_transition('dummy_state', [[0.5]], None, 1, 2, - [0,1,2], [3,3,3], 'dummy_model_json', "foo", "bar", 0, 4, + self.subject.fit_transition('dummy_state', None, [[0.5]], + 'dummy_model_json', "foo", "bar", + 1, [0,1,2], [3,3,3], 0, 4, 'dummy_prev_state', **k)) self.assertEqual('dummy_state', - self.subject.fit_transition('dummy_state', None, None, 1, 2, - [0,1,2], [3,3,3], 'dummy_model_json', "foo", "bar", 0, 4, + self.subject.fit_transition('dummy_state', None, None, + 'dummy_model_json', "foo", "bar", + 1, [0,1,2], [3,3,3], 0, 4, 'dummy_prev_state', **k)) def test_fit_merge(self): @@ -1014,9 +1018,9 @@ class MadlibKerasEvaluationTestCase(unittest.TestCase): serialized_weights = np.array(serialized_weights, dtype=np.float32).tostring() new_state = self.subject.internal_keras_eval_transition( - state, self.dependent_var , self.independent_var, self.model.to_json(), serialized_weights, - self.compile_params, 0, 3, self.all_seg_ids, self.total_images_per_seg, - 0, **k) + state, self.dependent_var , self.independent_var, self.model.to_json(), + serialized_weights, self.compile_params, 0, self.all_seg_ids, + self.total_images_per_seg, 0, 3, **k) agg_loss, agg_accuracy, image_count = new_state @@ -1053,9 +1057,9 @@ class MadlibKerasEvaluationTestCase(unittest.TestCase): k['SD']['segment_model'] = self.model new_state = self.subject.internal_keras_eval_transition( - state, self.dependent_var , self.independent_var, self.model.to_json(), 'dummy_model_data', - None, 0, 3, self.all_seg_ids, self.total_images_per_seg, - 0, **k) + state, self.dependent_var , self.independent_var, self.model.to_json(), + 'dummy_model_data', None, 0,self.all_seg_ids, + self.total_images_per_seg, 0, 3, **k) agg_loss, agg_accuracy, image_count = new_state @@ -1090,9 +1094,9 @@ class MadlibKerasEvaluationTestCase(unittest.TestCase): k['SD']['segment_model'] = self.model new_state = self.subject.internal_keras_eval_transition( - state, self.dependent_var , self.independent_var, self.model.to_json(), 'dummy_model_data', - None, 0, 3, self.all_seg_ids, self.total_images_per_seg, - 0, **k) + state, self.dependent_var , self.independent_var, self.model.to_json(), + 'dummy_model_data', None, 0, self.all_seg_ids, + self.total_images_per_seg, 0, 3, **k) agg_loss, agg_accuracy, image_count = new_state @@ -1206,9 +1210,9 @@ class MadlibKerasEvaluationTestCase(unittest.TestCase): with self.assertRaises(plpy.PLPYException): self.subject.internal_keras_eval_transition( - state, self.dependent_var , self.independent_var, self.model.to_json(), 'dummy_model_data', - None, 0, 3, self.all_seg_ids, total_images_per_seg, - 0, **k) + state, self.dependent_var , self.independent_var, self.model.to_json(), + 'dummy_model_data', None, 0, self.all_seg_ids, + total_images_per_seg, 0, 3, **k) def test_internal_keras_eval_final_image_count_zero(self): input_state = [0, 0, 0]