This is an automated email from the ASF dual-hosted git repository. domino pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/madlib.git
The following commit(s) were added to refs/heads/master by this push: new 08acbeb Disallow 'deserialization', 'serialization', & 'get' loss and metrics 08acbeb is described below commit 08acbebff8483821f8377719d0a83760f7a966e3 Author: Domino Valdano <dvald...@pivotal.io> AuthorDate: Tue Mar 9 11:56:36 2021 -0500 Disallow 'deserialization', 'serialization', & 'get' loss and metrics Also: - Remove whitelisting of any metrics containing the string "top_k_categorical_accuracy". This is already a builtin metric, and would compromise security if we allowed arbitrary python code containing this string to be passed along to keras. - Remove elements which start with an underscore from list of builtins. - Avoid using metrics[2:-2] which assumes first 2 characters are [' or [" and '] or "]. This prevents sneaky inputs like metrics=[*__builtins__ ] --- .../madlib_keras_custom_function.py_in | 13 +++++++++++ .../madlib_keras_fit_multiple_model.py_in | 2 +- .../deep_learning/madlib_keras_validator.py_in | 20 +++++++++-------- .../deep_learning/madlib_keras_wrapper.py_in | 25 ++++++++++++++++------ 4 files changed, 43 insertions(+), 17 deletions(-) diff --git a/src/ports/postgres/modules/deep_learning/madlib_keras_custom_function.py_in b/src/ports/postgres/modules/deep_learning/madlib_keras_custom_function.py_in index 32a5757..f2f06d6 100644 --- a/src/ports/postgres/modules/deep_learning/madlib_keras_custom_function.py_in +++ b/src/ports/postgres/modules/deep_learning/madlib_keras_custom_function.py_in @@ -168,13 +168,26 @@ def delete_custom_function(schema_madlib, object_table, id=None, name=None, **kw sql = "DROP TABLE {0}".format(object_table) plpy.execute(sql, 0) +dangerous_builtins = set(('serialize', 'deserialize', 'get')) + def update_builtin_metrics(builtin_metrics): builtin_metrics.append('accuracy') builtin_metrics.append('acc') builtin_metrics.append('crossentropy') builtin_metrics.append('ce') + + builtin_metrics = [ b for b in builtin_metrics \ + if not b.startswith('_') and \ + b not in dangerous_builtins ] + return builtin_metrics +def update_builtin_losses(builtin_losses): + builtin_losses = [ b for b in builtin_losses \ + if not b.startswith('_') and \ + b not in dangerous_builtins ] + return builtin_losses + @MinWarning("error") def load_top_k_accuracy_function(schema_madlib, object_table, k, **kwargs): diff --git a/src/ports/postgres/modules/deep_learning/madlib_keras_fit_multiple_model.py_in b/src/ports/postgres/modules/deep_learning/madlib_keras_fit_multiple_model.py_in index 2db346e..aa88fbe 100644 --- a/src/ports/postgres/modules/deep_learning/madlib_keras_fit_multiple_model.py_in +++ b/src/ports/postgres/modules/deep_learning/madlib_keras_fit_multiple_model.py_in @@ -353,7 +353,7 @@ class FitMultipleModel(object): DEBUG.print_timing('eval_model_total') def populate_object_map(self): - builtin_losses = dir(losses) + builtin_losses = update_builtin_losses(dir(losses)) builtin_metrics = update_builtin_metrics(dir(metrics)) # Track distinct custom functions in compile_params diff --git a/src/ports/postgres/modules/deep_learning/madlib_keras_validator.py_in b/src/ports/postgres/modules/deep_learning/madlib_keras_validator.py_in index ab8d336..de5c63d 100644 --- a/src/ports/postgres/modules/deep_learning/madlib_keras_validator.py_in +++ b/src/ports/postgres/modules/deep_learning/madlib_keras_validator.py_in @@ -49,6 +49,8 @@ from utilities.validate_args import input_tbl_valid from utilities.validate_args import output_tbl_valid from madlib_keras_wrapper import parse_and_validate_fit_params from madlib_keras_wrapper import parse_and_validate_compile_params +from madlib_keras_custom_function import update_builtin_metrics +from madlib_keras_custom_function import update_builtin_losses import tensorflow.keras.losses as losses import tensorflow.keras.metrics as metrics @@ -541,18 +543,18 @@ class MstLoaderInputValidator(): """.format(fit_params, str(e))) if not self.compile_params_list: plpy.error( "compile_params_list cannot be NULL") - custom_fn_name = [] - ## Initialize builtin loss/metrics functions - builtin_losses = dir(losses) - builtin_metrics = dir(metrics) - # Default metrics, since it is not part of the builtin metrics list - builtin_metrics.append('accuracy') + custom_fn_names = [] + + # Initialize builtin loss/metrics functions + builtin_losses = update_builtin_losses(dir(losses)) + builtin_metrics = update_builtin_metrics(dir(metrics)) + if self.object_table is not None: res = plpy.execute("SELECT {0} from {1}".format(CustomFunctionSchema.FN_NAME, self.object_table)) for r in res: - custom_fn_name.append(r[CustomFunctionSchema.FN_NAME]) + custom_fn_names.append(r[CustomFunctionSchema.FN_NAME]) for compile_params in self.compile_params_list: try: _, _, res = parse_and_validate_compile_params(compile_params) @@ -563,11 +565,11 @@ class MstLoaderInputValidator(): if self.object_table is not None: error_suffix = "is not defined in object table '{0}'!".format(self.object_table) - _assert(res['loss'] in custom_fn_name or res['loss'] in builtin_losses, + _assert(res['loss'] in custom_fn_names or res['loss'] in builtin_losses, "custom function '{0}' used in compile params "\ "{1}".format(res['loss'], error_suffix)) if 'metrics' in res: - _assert((len(set(res['metrics']).intersection(custom_fn_name)) > 0 + _assert((len(set(res['metrics']).intersection(custom_fn_names)) > 0 or len(set(res['metrics']).intersection(builtin_metrics)) > 0), "custom function '{0}' used in compile params " \ "{1}".format(res['metrics'], error_suffix)) diff --git a/src/ports/postgres/modules/deep_learning/madlib_keras_wrapper.py_in b/src/ports/postgres/modules/deep_learning/madlib_keras_wrapper.py_in index c23f8d3..e3f9f01 100644 --- a/src/ports/postgres/modules/deep_learning/madlib_keras_wrapper.py_in +++ b/src/ports/postgres/modules/deep_learning/madlib_keras_wrapper.py_in @@ -28,6 +28,7 @@ import madlib_keras_serializer import madlib_keras_gpu_info from madlib_keras_custom_function import CustomFunctionSchema from madlib_keras_custom_function import update_builtin_metrics +from madlib_keras_custom_function import update_builtin_losses from utilities.utilities import _assert from utilities.utilities import is_platform_pg @@ -228,14 +229,14 @@ def parse_and_validate_compile_params(str_of_args, additional_params=[]): literal_eval_compile_params, accepted_compile_params) if len(additional_params) == 0: - # optimizer is not a required parameter for keras compile + # optimizer is a required parameter for keras compile _assert('optimizer' in compile_dict, "optimizer is a required parameter for compile") opt_name, opt_args = parse_optimizer(compile_dict) else: opt_name, opt_args = None, None _assert('loss' in compile_dict, "loss is a required parameter for compile") - unsupported_loss_list = ['sparse_categorical_crossentropy'] + unsupported_loss_list = ['sparse_categorical_crossentropy', 'serialize', 'deserialize', 'get' ] _assert(compile_dict['loss'] not in unsupported_loss_list, "Loss function {0} is not supported.".format(compile_dict['loss'])) validate_compile_param_types(compile_dict) @@ -251,7 +252,10 @@ def _validate_metrics(compile_dict): if 'metrics' in compile_dict and compile_dict['metrics']: unsupported_metrics_list = ['sparse_categorical_accuracy', 'sparse_categorical_crossentropy', - 'sparse_top_k_categorical_accuracy'] + 'sparse_top_k_categorical_accuracy', + 'serialize' + 'deserialize' + 'get'] _assert(len(compile_dict['metrics']) == 1, "Only one metric at a time is supported.") _assert(compile_dict['metrics'][0] not in unsupported_metrics_list, @@ -474,16 +478,23 @@ def get_custom_functions_list(compile_params): """ compile_dict = convert_string_of_args_to_dict(compile_params) - builtin_losses = dir(losses) + builtin_losses = update_builtin_losses(dir(losses)) builtin_metrics = update_builtin_metrics(dir(metrics)) custom_fn_list = [] + local_loss = compile_dict['loss'].lower() if 'loss' in compile_dict else None - local_metric = compile_dict['metrics'].lower()[2:-2] if 'metrics' in compile_dict else None + try: + metrics_list = ast.literal_eval(compile_dict['metrics']) \ + if 'metrics' in compile_dict else [] + except ValueError: + plpy.error(("Invalid input value for parameter {0}, " + "please refer to the documentation").format(compile_dict['metrics'])) + local_metric = metrics_list[0].lower() if (len(metrics_list) > 0) else None + if local_loss and (local_loss not in [a.lower() for a in builtin_losses]): custom_fn_list.append(local_loss) if local_metric and (local_metric not in [a.lower() for a in builtin_metrics]): - if 'top_k_categorical_accuracy' not in local_metric: - custom_fn_list.append(local_metric) + custom_fn_list.append(local_metric) return custom_fn_list