[GitHub] [madlib] njayaram2 commented on a change in pull request #389: DL: Convert the keras_eval function from UDF to UDA

GitBox Fri, 10 May 2019 15:32:46 -0700

njayaram2 commented on a change in pull request #389: DL: Convert the 
keras_eval function from UDF to UDA
URL: https://github.com/apache/madlib/pull/389#discussion_r283047905


 ##########
 File path: src/ports/postgres/modules/deep_learning/madlib_keras.py_in
 ##########
 @@ -558,78 +529,118 @@ def evaluate1(schema_madlib, model_table, test_table, 
id_col, model_arch_table,
     plpy.info('evaluate result acc is {}'.format(loss_acc[1]))
 
 def get_loss_acc_from_keras_eval(schema_madlib, table, dependent_varname,
-                                 independent_varname, compile_params, 
model_arch,
-                                 model_data, gpus_per_host, segments_per_host,
-                                 seg_ids_val,
-                                 rows_per_seg_val, gp_segment_id_col):
+                                 independent_varname, compile_params,
+                                 model_arch, model_data, gpus_per_host,
+                                 segments_per_host, seg_ids, images_per_seg,
+                                 gp_segment_id_col):
     """
     This function will call the internal keras evaluate function to get the 
loss
     and accuracy of each tuple which then gets averaged to get the final 
result.
     """
     evaluate_query = plpy.prepare("""
-    select {schema_madlib}.array_avg(loss_acc, True) as final_loss_acc from
-    (
-        select ({schema_madlib}.internal_keras_evaluate({dependent_varname},
-                                            {independent_varname},
+    -- TODO:  really, we should not be casting integers and big integers to 
smallint's
+    --  The right solution is either to change the datatype of the agg 
function from
+    --  SMALLINT to INTEGER, or change the output of minibatch util to produce 
SMALLINT
+    --  For the first, we should change fit_step also
+    select 
({schema_madlib}.internal_keras_evaluate({dependent_varname}::SMALLINT[],
+                                            {independent_varname}::REAL[],
                                             $MAD${model_arch}$MAD$,
-                                            $1, {compile_params},
+                                            $1,
+                                            {compile_params},
                                             {gpus_per_host},
                                             {segments_per_host},
-                                            ARRAY{seg_ids_val},
-                                            ARRAY{rows_per_seg_val},
+                                            ARRAY{seg_ids},
+                                            ARRAY{images_per_seg},
                                             {gp_segment_id_col})) as loss_acc
         from {table}
-    ) q""".format(**locals()), ["bytea"])
+    """.format(**locals()), ["bytea"])
     res = plpy.execute(evaluate_query, [model_data])
-    loss_acc = res[0]['final_loss_acc']
+    loss_acc = res[0]['loss_acc']
     return loss_acc
 
-
-def internal_keras_evaluate(dependent_var, independent_var, model_architecture,
-                            model_data, compile_params, gpus_per_host,
-                            segments_per_host, seg_ids_val,
-                            rows_per_seg_val, current_seg, **kwargs):
+def internal_keras_eval_transition(state, dependent_var, independent_var, 
model_architecture,
+                            model_data, compile_params, gpus_per_host, 
segments_per_host,
+                            seg_ids, images_per_seg, current_seg_id, **kwargs):
     SD = kwargs['SD']
-    device_name = get_device_name_and_set_cuda_env(gpus_per_host,
-                                                   current_seg)
+    device_name = get_device_name_and_set_cuda_env(gpus_per_host, 
current_seg_id)
 
-    if 'segment_model' not in SD:
+    agg_loss, agg_accuracy, agg_image_count = state
+
+    if not agg_image_count:
         if not is_platform_pg():
             set_keras_session(gpus_per_host, segments_per_host)
         model = model_from_json(model_architecture)
         model_shapes = madlib_keras_serializer.get_model_shapes(model)
         _, _, _, model_weights = madlib_keras_serializer.deserialize_weights(
-            model_data, model_shapes)
+                model_data, model_shapes)
         model.set_weights(model_weights)
         with K.tf.device(device_name):
             compile_model(model, compile_params)
         SD['segment_model'] = model
-        SD['row_count'] = 0
+        # These should already be 0, but just in case make sure
+        agg_accuracy = 0
+        agg_loss = 0
     else:
+        # Same model every time, no need to re-compile or update weights
         model = SD['segment_model']
-    SD['row_count'] += 1
 
-    # Since the training data is batched but the validation data isn't,
-    # we have to make sure that the validation data np array has the same
-    # number of dimensions as training data. So we prepend a dimension to
-    # both x and y np arrays using expand_dims.
-    independent_var = expand_input_dims(independent_var, target_type='float32')
-    dependent_var = expand_input_dims(dependent_var)
 
 Review comment:
   Can you please remove the import for `expand_input_dims`?

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
[email protected]


With regards,
Apache Git Services

[GitHub] [madlib] njayaram2 commented on a change in pull request #389: DL: Convert the keras_eval function from UDF to UDA

Reply via email to