kaknikhil commented on a change in pull request #524:
URL: https://github.com/apache/madlib/pull/524#discussion_r578764993



##########
File path: src/ports/postgres/modules/deep_learning/madlib_keras_wrapper.py_in
##########
@@ -287,27 +298,50 @@ def parse_optimizer(compile_dict):
 
 
 # Parse the fit parameters into a dictionary.
-def parse_and_validate_fit_params(fit_param_str):
+def parse_and_validate_fit_params(fit_param_str, current_seg_id=0):
 
     if fit_param_str:
-        fit_params_dict = convert_string_of_args_to_dict(fit_param_str)
-
-        literal_eval_fit_params = ['batch_size','epochs','verbose',
+        fit_params_dict = convert_string_of_args_to_dict(fit_param_str, 
strip_quotes=False)
+        literal_eval_fit_params = ['batch_size','epochs','verbose', 'shuffle',
                                    
'class_weight','initial_epoch','steps_per_epoch']
-        accepted_fit_params = literal_eval_fit_params + ['shuffle']
+        accepted_fit_params = literal_eval_fit_params + ['callbacks']
 
         fit_params_dict = validate_and_literal_eval_keys(fit_params_dict,
                                                          
literal_eval_fit_params,
                                                          accepted_fit_params)
-        if 'shuffle' in fit_params_dict:
-            shuffle_value = fit_params_dict['shuffle']
-            if shuffle_value == 'True' or shuffle_value == 'False':
-                fit_params_dict['shuffle'] = bool(shuffle_value)
+
+        if 'callbacks' in fit_params_dict:
+            fit_params_dict['callbacks'] = 
parse_callbacks(fit_params_dict['callbacks'], current_seg_id)
 
         return fit_params_dict
     else:
         return {}
 
+# Parse the callback fit params and create the TensorBoard object in the 
dictionary
+def parse_callbacks(callbacks, current_seg_id=0):

Review comment:
       Does it make sense to add unit tests for this function ?

##########
File path: 
src/ports/postgres/modules/deep_learning/madlib_keras_model_selection.py_in
##########
@@ -361,7 +361,9 @@ class MstSearch():
             _assert(self.num_configs is None and self.random_state is None,
                     "DL: 'num_configs' and 'random_state' must be NULL for 
grid search")
             for distribution_type in self.accepted_distributions:
-                _assert(distribution_type not in compile_params_grid and 
distribution_type not in fit_params_grid,
+                tmp_dist = "'{0}']".format(distribution_type)
+                _assert(tmp_dist not in compile_params_grid and

Review comment:
       It might also make sense to add/update a test for this change. We can 
add both a positive and a negative test case to make sure that it passes and 
fails as expected

##########
File path: 
src/ports/postgres/modules/deep_learning/madlib_keras_model_selection.py_in
##########
@@ -361,7 +361,9 @@ class MstSearch():
             _assert(self.num_configs is None and self.random_state is None,
                     "DL: 'num_configs' and 'random_state' must be NULL for 
grid search")
             for distribution_type in self.accepted_distributions:
-                _assert(distribution_type not in compile_params_grid and 
distribution_type not in fit_params_grid,
+                tmp_dist = "'{0}']".format(distribution_type)

Review comment:
       I think adding a small explanation of why we need to format the type 
this way will help. Also renaming the variable to be something that will 
reflect it's purpose might make it easier to read.

##########
File path: src/ports/postgres/modules/deep_learning/madlib_keras_wrapper.py_in
##########
@@ -309,6 +311,8 @@ def parse_and_validate_fit_params(fit_param_str):
                                                          
literal_eval_fit_params,
                                                          accepted_fit_params)
         if 'callbacks' in fit_params_dict:
+            if not is_superuser(current_user()):

Review comment:
       Can we add a test for this ?

##########
File path: src/ports/postgres/modules/deep_learning/madlib_keras_wrapper.py_in
##########
@@ -144,7 +147,7 @@ def set_model_weights(segment_model, serialized_weights):
 Used to convert compile_params and fit_params to actual argument dictionaries
 """
 
-def convert_string_of_args_to_dict(str_of_args):
+def convert_string_of_args_to_dict(str_of_args, strip_quotes=True):

Review comment:
       consider adding a docstring for the strip_quotes argument

##########
File path: src/ports/postgres/modules/deep_learning/madlib_keras_wrapper.py_in
##########
@@ -287,27 +298,50 @@ def parse_optimizer(compile_dict):
 
 
 # Parse the fit parameters into a dictionary.
-def parse_and_validate_fit_params(fit_param_str):
+def parse_and_validate_fit_params(fit_param_str, current_seg_id=0):
 
     if fit_param_str:
-        fit_params_dict = convert_string_of_args_to_dict(fit_param_str)
-
-        literal_eval_fit_params = ['batch_size','epochs','verbose',
+        fit_params_dict = convert_string_of_args_to_dict(fit_param_str, 
strip_quotes=False)
+        literal_eval_fit_params = ['batch_size','epochs','verbose', 'shuffle',
                                    
'class_weight','initial_epoch','steps_per_epoch']
-        accepted_fit_params = literal_eval_fit_params + ['shuffle']
+        accepted_fit_params = literal_eval_fit_params + ['callbacks']
 
         fit_params_dict = validate_and_literal_eval_keys(fit_params_dict,
                                                          
literal_eval_fit_params,
                                                          accepted_fit_params)
-        if 'shuffle' in fit_params_dict:
-            shuffle_value = fit_params_dict['shuffle']
-            if shuffle_value == 'True' or shuffle_value == 'False':
-                fit_params_dict['shuffle'] = bool(shuffle_value)
+
+        if 'callbacks' in fit_params_dict:
+            fit_params_dict['callbacks'] = 
parse_callbacks(fit_params_dict['callbacks'], current_seg_id)
 
         return fit_params_dict
     else:
         return {}
 
+# Parse the callback fit params and create the TensorBoard object in the 
dictionary
+def parse_callbacks(callbacks, current_seg_id=0):
+    callbacks = callbacks.strip("'")
+    if not is_superuser(current_user()):
+        plpy.error("Only a superuser may use callbacks.")
+    try:
+        tree = ast.parse(callbacks, mode='eval')
+        assert(type(tree.body) == ast.List)

Review comment:
       All these assertions will raise a generic python assertion error without 
any message. Is that intentional ?

##########
File path: src/ports/postgres/modules/deep_learning/madlib_keras_wrapper.py_in
##########
@@ -170,11 +173,19 @@ def convert_string_of_args_to_dict(str_of_args):
         elif not stack and char == ",":
             value_str = result_str
             result_str = ""
-            compile_dict[key_str.strip()]=value_str.strip().strip('\'')
+            key_str = key_str.strip()
+            value_str = value_str.strip()
+            if strip_quotes:
+                value_str = value_str.strip('"\'')
+            compile_dict[key_str.strip()]=value_str

Review comment:
       do we need to call `.strip` on `key_str` again ?

##########
File path: src/ports/postgres/modules/deep_learning/madlib_keras_wrapper.py_in
##########
@@ -326,7 +330,8 @@ def parse_and_validate_fit_params(fit_param_str):
             accepted_tb_params = [ 'log_dir', 'histogram_freq', 'batch_size', 
'update_freq',
                                    'write_graph', 'write_grad', 'write_images' 
]
             tb_params_dict = validate_and_literal_eval_keys(tb_params_dict, 
accepted_tb_params, accepted_tb_params)
-            fit_params_dict['callbacks'] = [TensorBoard(tb_params_dict)]
+            tb_params_dict['log_dir'] = 
tb_params_dict['log_dir']+"{0}".format(current_seg_id)

Review comment:
       ```suggestion
     tb_params_dict['log_dir'] = 
"{0}{1}".format(tb_params_dict['log_dir'],(current_seg_id))
   ```

##########
File path: src/ports/postgres/modules/deep_learning/madlib_keras_wrapper.py_in
##########
@@ -287,27 +298,50 @@ def parse_optimizer(compile_dict):
 
 
 # Parse the fit parameters into a dictionary.
-def parse_and_validate_fit_params(fit_param_str):
+def parse_and_validate_fit_params(fit_param_str, current_seg_id=0):
 
     if fit_param_str:
-        fit_params_dict = convert_string_of_args_to_dict(fit_param_str)
-
-        literal_eval_fit_params = ['batch_size','epochs','verbose',
+        fit_params_dict = convert_string_of_args_to_dict(fit_param_str, 
strip_quotes=False)
+        literal_eval_fit_params = ['batch_size','epochs','verbose', 'shuffle',
                                    
'class_weight','initial_epoch','steps_per_epoch']
-        accepted_fit_params = literal_eval_fit_params + ['shuffle']
+        accepted_fit_params = literal_eval_fit_params + ['callbacks']
 
         fit_params_dict = validate_and_literal_eval_keys(fit_params_dict,
                                                          
literal_eval_fit_params,
                                                          accepted_fit_params)
-        if 'shuffle' in fit_params_dict:
-            shuffle_value = fit_params_dict['shuffle']
-            if shuffle_value == 'True' or shuffle_value == 'False':
-                fit_params_dict['shuffle'] = bool(shuffle_value)
+

Review comment:
       do we have a dev check test for `shuffle` ? If not should we add it ?

##########
File path: src/ports/postgres/modules/deep_learning/madlib_keras_wrapper.py_in
##########
@@ -287,27 +298,50 @@ def parse_optimizer(compile_dict):
 
 
 # Parse the fit parameters into a dictionary.
-def parse_and_validate_fit_params(fit_param_str):
+def parse_and_validate_fit_params(fit_param_str, current_seg_id=0):
 
     if fit_param_str:
-        fit_params_dict = convert_string_of_args_to_dict(fit_param_str)
-
-        literal_eval_fit_params = ['batch_size','epochs','verbose',
+        fit_params_dict = convert_string_of_args_to_dict(fit_param_str, 
strip_quotes=False)
+        literal_eval_fit_params = ['batch_size','epochs','verbose', 'shuffle',

Review comment:
       just curious, why did we have to move 'shuffle' from accepted_fit_params 
to literal_eval_fit_params ?

##########
File path: src/ports/postgres/modules/deep_learning/madlib_keras_wrapper.py_in
##########
@@ -287,27 +298,50 @@ def parse_optimizer(compile_dict):
 
 
 # Parse the fit parameters into a dictionary.
-def parse_and_validate_fit_params(fit_param_str):
+def parse_and_validate_fit_params(fit_param_str, current_seg_id=0):
 
     if fit_param_str:
-        fit_params_dict = convert_string_of_args_to_dict(fit_param_str)
-
-        literal_eval_fit_params = ['batch_size','epochs','verbose',
+        fit_params_dict = convert_string_of_args_to_dict(fit_param_str, 
strip_quotes=False)
+        literal_eval_fit_params = ['batch_size','epochs','verbose', 'shuffle',
                                    
'class_weight','initial_epoch','steps_per_epoch']
-        accepted_fit_params = literal_eval_fit_params + ['shuffle']
+        accepted_fit_params = literal_eval_fit_params + ['callbacks']
 
         fit_params_dict = validate_and_literal_eval_keys(fit_params_dict,
                                                          
literal_eval_fit_params,
                                                          accepted_fit_params)
-        if 'shuffle' in fit_params_dict:
-            shuffle_value = fit_params_dict['shuffle']
-            if shuffle_value == 'True' or shuffle_value == 'False':
-                fit_params_dict['shuffle'] = bool(shuffle_value)
+
+        if 'callbacks' in fit_params_dict:
+            fit_params_dict['callbacks'] = 
parse_callbacks(fit_params_dict['callbacks'], current_seg_id)
 
         return fit_params_dict
     else:
         return {}
 
+# Parse the callback fit params and create the TensorBoard object in the 
dictionary
+def parse_callbacks(callbacks, current_seg_id=0):
+    callbacks = callbacks.strip("'")
+    if not is_superuser(current_user()):
+        plpy.error("Only a superuser may use callbacks.")
+    try:
+        tree = ast.parse(callbacks, mode='eval')
+        assert(type(tree.body) == ast.List)
+        assert(len(tree.body.elts) == 1)
+        assert(type(tree.body.elts[0]) == ast.Call)
+        assert(tree.body.elts[0].func.id == 'TensorBoard')
+        tb_params = tree.body.elts[0].keywords
+        tb_params_dict = { tb_params[i].arg : tb_params[i].value \
+                        for i in range(len(tb_params)) }
+    except:
+        plpy.error("Invalid callbacks fit param.  Currently, "
+                    "only TensorBoard callbacks are accepted.")
+
+    accepted_tb_params = [ 'log_dir', 'histogram_freq', 'batch_size', 
'update_freq',

Review comment:
       In our dev check tests, we only test for the log_dir param. Should we 
add tests for some of the other params as well ? We can probably update the 
same test to accept these params

##########
File path: src/ports/postgres/modules/deep_learning/madlib_keras_wrapper.py_in
##########
@@ -287,27 +298,50 @@ def parse_optimizer(compile_dict):
 
 
 # Parse the fit parameters into a dictionary.
-def parse_and_validate_fit_params(fit_param_str):
+def parse_and_validate_fit_params(fit_param_str, current_seg_id=0):
 
     if fit_param_str:
-        fit_params_dict = convert_string_of_args_to_dict(fit_param_str)
-
-        literal_eval_fit_params = ['batch_size','epochs','verbose',
+        fit_params_dict = convert_string_of_args_to_dict(fit_param_str, 
strip_quotes=False)
+        literal_eval_fit_params = ['batch_size','epochs','verbose', 'shuffle',
                                    
'class_weight','initial_epoch','steps_per_epoch']
-        accepted_fit_params = literal_eval_fit_params + ['shuffle']
+        accepted_fit_params = literal_eval_fit_params + ['callbacks']
 
         fit_params_dict = validate_and_literal_eval_keys(fit_params_dict,
                                                          
literal_eval_fit_params,
                                                          accepted_fit_params)
-        if 'shuffle' in fit_params_dict:
-            shuffle_value = fit_params_dict['shuffle']
-            if shuffle_value == 'True' or shuffle_value == 'False':
-                fit_params_dict['shuffle'] = bool(shuffle_value)
+
+        if 'callbacks' in fit_params_dict:
+            fit_params_dict['callbacks'] = 
parse_callbacks(fit_params_dict['callbacks'], current_seg_id)
 
         return fit_params_dict
     else:
         return {}
 
+# Parse the callback fit params and create the TensorBoard object in the 
dictionary
+def parse_callbacks(callbacks, current_seg_id=0):
+    callbacks = callbacks.strip("'")
+    if not is_superuser(current_user()):
+        plpy.error("Only a superuser may use callbacks.")
+    try:
+        tree = ast.parse(callbacks, mode='eval')
+        assert(type(tree.body) == ast.List)
+        assert(len(tree.body.elts) == 1)
+        assert(type(tree.body.elts[0]) == ast.Call)
+        assert(tree.body.elts[0].func.id == 'TensorBoard')
+        tb_params = tree.body.elts[0].keywords
+        tb_params_dict = { tb_params[i].arg : tb_params[i].value \
+                        for i in range(len(tb_params)) }
+    except:
+        plpy.error("Invalid callbacks fit param.  Currently, "
+                    "only TensorBoard callbacks are accepted.")
+
+    accepted_tb_params = [ 'log_dir', 'histogram_freq', 'batch_size', 
'update_freq',
+                           'write_graph', 'write_grad', 'write_images' ]
+    tb_params_dict = validate_and_literal_eval_keys(tb_params_dict, 
accepted_tb_params, accepted_tb_params)
+    tb_params_dict['log_dir'] = 
tb_params_dict['log_dir']+"{0}".format(current_seg_id)

Review comment:
       ```suggestion
       tb_params_dict['log_dir'] = "{0}{1}".format(tb_params_dict['log_dir'], 
current_seg_id)
   ```

##########
File path: src/ports/postgres/modules/deep_learning/test/madlib_keras_fit.sql_in
##########
@@ -40,7 +40,7 @@ SELECT madlib_keras_fit(
     'model_arch',
     1,
     $$ optimizer=SGD(lr=0.01, decay=1e-6, nesterov=True), 
loss='categorical_crossentropy', metrics=['mae']$$::text,
-    $$ batch_size=2, epochs=1, verbose=0 $$::text,
+    $$ batch_size=2, epochs=1, verbose=0, 
callbacks=[TensorBoard(log_dir='/tmp/tensorflow/single/')] $$::text,

Review comment:
       can we assert that the directory `/tmp/tensorflow/single/` is not empty 
or contains expected files ?

##########
File path: src/ports/postgres/modules/deep_learning/madlib_keras_wrapper.py_in
##########
@@ -287,27 +298,50 @@ def parse_optimizer(compile_dict):
 
 
 # Parse the fit parameters into a dictionary.
-def parse_and_validate_fit_params(fit_param_str):
+def parse_and_validate_fit_params(fit_param_str, current_seg_id=0):

Review comment:
       Do we need to have a default value for `current_seg_id` for this 
function and `parse_callbacks` ? Won't it be better if the calling function 
passes the segment id every time ? 




----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
[email protected]


Reply via email to