[madlib] branch master updated: update example in multi-fit to use new model config generator
This is an automated email from the ASF dual-hosted git repository. fmcquillan pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/madlib.git The following commit(s) were added to refs/heads/master by this push: new 33ad16c update example in multi-fit to use new model config generator 33ad16c is described below commit 33ad16c29af1e99a02a8a153671a9a16608e74c6 Author: Frank McQuillan AuthorDate: Fri Mar 5 16:54:34 2021 -0800 update example in multi-fit to use new model config generator --- .../madlib_keras_fit_multiple_model.sql_in | 69 -- 1 file changed, 37 insertions(+), 32 deletions(-) diff --git a/src/ports/postgres/modules/deep_learning/madlib_keras_fit_multiple_model.sql_in b/src/ports/postgres/modules/deep_learning/madlib_keras_fit_multiple_model.sql_in index 67ee2c7..e8c4d51 100644 --- a/src/ports/postgres/modules/deep_learning/madlib_keras_fit_multiple_model.sql_in +++ b/src/ports/postgres/modules/deep_learning/madlib_keras_fit_multiple_model.sql_in @@ -999,41 +999,44 @@ $$ 'MLP with 2 hidden layers' -- Descr ); --# Define model selection tuples and load. Select the model(s) from the model architecture -table that you want to run, along with the compile and fit parameters. Combinations will be -created for the set of model selection parameters will be loaded: +-# Generate model configurations using grid search. The output table for grid +search contains the unique combinations of model architectures, compile and +fit parameters. DROP TABLE IF EXISTS mst_table, mst_table_summary; -SELECT madlib.load_model_selection_table('model_arch_library', -- model architecture table - 'mst_table', -- model selection table output - ARRAY[1,2], -- model ids from model architecture table - ARRAY[ -- compile params - $$loss='categorical_crossentropy',optimizer='Adam(lr=0.1)',metrics=['accuracy']$$, - $$loss='categorical_crossentropy', optimizer='Adam(lr=0.01)',metrics=['accuracy']$$, - $$loss='categorical_crossentropy',optimizer='Adam(lr=0.001)',metrics=['accuracy']$$ - ], - ARRAY[-- fit params - $$batch_size=4,epochs=1$$, - $$batch_size=8,epochs=1$$ - ] +SELECT madlib.generate_model_configs( +'model_arch_library', -- model architecture table +'mst_table', -- model selection table output + ARRAY[1,2], -- model ids from model architecture table + $$ +{'loss': ['categorical_crossentropy'], + 'optimizer_params_list': [ {'optimizer': ['Adam'], 'lr': [0.001, 0.01, 0.1]} ], + 'metrics': ['accuracy']} + $$, -- compile_param_grid + $$ + { 'batch_size': [4, 8], + 'epochs': [1] + } + $$, -- fit_param_grid + 'grid' -- search_type ); SELECT * FROM mst_table ORDER BY mst_key; - mst_key | model_id | compile_params | fit_params + mst_key | model_id | compile_params | fit_params -+--+-+--- - 1 |1 | loss='categorical_crossentropy',optimizer='Adam(lr=0.1)',metrics=['accuracy'] | batch_size=4,epochs=1 - 2 |1 | loss='categorical_crossentropy',optimizer='Adam(lr=0.1)',metrics=['accuracy'] | batch_size=8,epochs=1 - 3 |1 | loss='categorical_crossentropy', optimizer='Adam(lr=0.01)',metrics=['accuracy'] | batch_size=4,epochs=1 - 4 |1 | loss='categorical_crossentropy', optimizer='Adam(lr=0.01)',metrics=['accuracy'] | batch_size=8,epochs=1 - 5 |1 | loss='categorical_crossentropy',optimizer='Adam(lr=0.001)',metrics=['accuracy'] | batch_size=4,epochs=1 - 6 |
[madlib] 01/02: DL: Check if the owner of the object table is a superuser
This is an automated email from the ASF dual-hosted git repository. okislal pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/madlib.git commit 4b87a71ba1f8b6036f172fbda573a5626f1c8482 Author: Orhan Kislal AuthorDate: Thu Feb 25 20:30:43 2021 +0300 DL: Check if the owner of the object table is a superuser --- .../modules/deep_learning/madlib_keras_custom_function.py_in | 8 +++- .../modules/deep_learning/madlib_keras_validator.py_in| 8 src/ports/postgres/modules/utilities/utilities.py_in | 11 +++ 3 files changed, 22 insertions(+), 5 deletions(-) diff --git a/src/ports/postgres/modules/deep_learning/madlib_keras_custom_function.py_in b/src/ports/postgres/modules/deep_learning/madlib_keras_custom_function.py_in index 1ebf9f6..32a5757 100644 --- a/src/ports/postgres/modules/deep_learning/madlib_keras_custom_function.py_in +++ b/src/ports/postgres/modules/deep_learning/madlib_keras_custom_function.py_in @@ -128,13 +128,11 @@ def load_custom_function(schema_madlib, object_table, object, name, description= def delete_custom_function(schema_madlib, object_table, id=None, name=None, **kwargs): if object_table is not None: -schema_name = get_schema(object_table) -if schema_name is None: -object_table = "{0}.{1}".format(schema_madlib, quote_ident(object_table)) -elif schema_name != schema_madlib: -plpy.error("DL: Custom function table has to be in the {0} schema".format(schema_madlib)) +object_table = "{0}.{1}".format(schema_madlib, quote_ident(object_table)) input_tbl_valid(object_table, "Keras Custom Funtion") +_assert(is_superuser(current_user()), "DL: The user has to have admin "\ +"privilages to delete a custom function") _assert(id is not None or name is not None, "{0}: function id/name cannot be NULL! " \ "Use \"SELECT delete_custom_function('usage')\" for help.".format(module_name)) diff --git a/src/ports/postgres/modules/deep_learning/madlib_keras_validator.py_in b/src/ports/postgres/modules/deep_learning/madlib_keras_validator.py_in index 535d70d..ab8d336 100644 --- a/src/ports/postgres/modules/deep_learning/madlib_keras_validator.py_in +++ b/src/ports/postgres/modules/deep_learning/madlib_keras_validator.py_in @@ -40,6 +40,8 @@ from utilities.utilities import _assert from utilities.utilities import add_postfix from utilities.utilities import is_platform_pg from utilities.utilities import is_var_valid +from utilities.utilities import is_superuser +from utilities.utilities import get_table_owner from utilities.validate_args import cols_in_tbl_valid from utilities.validate_args import columns_exist_in_table from utilities.validate_args import get_expr_type @@ -324,6 +326,9 @@ class FitCommonValidator(object): if self.object_table is not None: input_tbl_valid(self.object_table, self.module_name) + +_assert(is_superuser(get_table_owner(self.object_table)), +"DL: Cannot use a table of a non-superuser as object table.") cols_in_tbl_valid(self.object_table, CustomFunctionSchema.col_names, self.module_name) if self.warm_start: @@ -543,6 +548,7 @@ class MstLoaderInputValidator(): # Default metrics, since it is not part of the builtin metrics list builtin_metrics.append('accuracy') if self.object_table is not None: + res = plpy.execute("SELECT {0} from {1}".format(CustomFunctionSchema.FN_NAME, self.object_table)) for r in res: @@ -576,6 +582,8 @@ class MstLoaderInputValidator(): input_tbl_valid(self.model_arch_table, self.module_name) if self.object_table is not None: input_tbl_valid(self.object_table, self.module_name) +_assert(is_superuser(get_table_owner(self.object_table)), +"DL: Cannot use a table of a non-superuser as object table.") if self.module_name == 'load_model_selection_table' or self.module_name == 'madlib_keras_automl': output_tbl_valid(self.model_selection_table, self.module_name) output_tbl_valid(self.model_selection_summary_table, self.module_name) diff --git a/src/ports/postgres/modules/utilities/utilities.py_in b/src/ports/postgres/modules/utilities/utilities.py_in index e5a4c3d..8fc4a28 100644 --- a/src/ports/postgres/modules/utilities/utilities.py_in +++ b/src/ports/postgres/modules/utilities/utilities.py_in @@ -775,6 +775,17 @@ def is_superuser(user): return plpy.execute("SELECT rolsuper FROM pg_catalog.pg_roles "\ "WHERE rolname = '{0}'".format(user))[0]['rolsuper'] +def get_table_owner(schema_table): + +split_table = schema_table.split(".",1) +schema = split_table[0] +non_schema_table = split_table[1] + +q = """SELECT tableowner FROM pg_catalog.p
[madlib] branch master updated (7eeb29c -> 14a91ce)
This is an automated email from the ASF dual-hosted git repository. okislal pushed a change to branch master in repository https://gitbox.apache.org/repos/asf/madlib.git. from 7eeb29c clarify example in user docs for loading model arch new 4b87a71 DL: Check if the owner of the object table is a superuser new 14a91ce update user docs with security warnings The 2 revisions listed above as "new" are entirely new to this repository and will be described in separate emails. The revisions listed as "add" were already present in the repository and have only been added to this reference. Summary of changes: .../madlib_keras_custom_function.py_in | 8 ++-- .../madlib_keras_custom_function.sql_in| 54 -- .../deep_learning/madlib_keras_validator.py_in | 8 .../postgres/modules/utilities/utilities.py_in | 11 + 4 files changed, 61 insertions(+), 20 deletions(-)
[madlib] 02/02: update user docs with security warnings
This is an automated email from the ASF dual-hosted git repository. okislal pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/madlib.git commit 14a91cef3b89489be6d8110c8364a6c0662516c4 Author: Frank McQuillan AuthorDate: Thu Mar 4 15:01:56 2021 -0800 update user docs with security warnings --- .../madlib_keras_custom_function.sql_in| 54 -- 1 file changed, 39 insertions(+), 15 deletions(-) diff --git a/src/ports/postgres/modules/deep_learning/madlib_keras_custom_function.sql_in b/src/ports/postgres/modules/deep_learning/madlib_keras_custom_function.sql_in index 3046891..2bf3c56 100644 --- a/src/ports/postgres/modules/deep_learning/madlib_keras_custom_function.sql_in +++ b/src/ports/postgres/modules/deep_learning/madlib_keras_custom_function.sql_in @@ -41,6 +41,15 @@ m4_include(`SQLCommon.m4') Related Topics +\warning +For security reasons there are controls on custom functions in MADlib. +You must be a superuser to create custom functions because they +could theoretically allow execution of any untrusted Python code. +Regular users with MADlib USAGE permission can use existing custom +functions but cannot create new ones or update existing ones. +See references [1] and [2] for information +on privileges in Greenplum and PostgreSQL. + This function loads custom Python functions into a table for use by deep learning algorithms. @@ -48,9 +57,9 @@ Custom functions can be useful if, for example, you need loss functions or metrics that are not built into the standard libraries. The functions to be loaded must be in the form of serialized Python objects created using Dill, which extends Python's pickle module to the majority -of the built-in Python types [1]. +of the built-in Python types [3]. -Custom functions are also used to return top k categorical accuracy rate +Custom functions can also be used to return top k categorical accuracy in the case that you want a different k value than the default from Keras. This module includes a helper function to create the custom function automatically for a specified k. @@ -58,12 +67,18 @@ automatically for a specified k. There is also a utility function to delete a function from the table. +@note +Do not specify a schema for the argument 'object_table' containing the Python objects, +because the 'object_table' is automatically put in the MADlib schema. +Also, any subsequent SQL queries on this table by regular users must +specify '.object_table' in the usual way. + @anchor load_function @par Load Function load_custom_function( -object table, +object_table, object, name, description @@ -71,10 +86,12 @@ load_custom_function( \b Arguments - object table + object_table VARCHAR. Table to load serialized Python objects. If this table does not exist, it will be created. If this table already exists, a new row is inserted into the existing table. + Do not specify schema as part of the object table name, since + it will be put in the MADlib schema automatically. object @@ -84,7 +101,7 @@ load_custom_function( @note The Dill package must be installed on all segments of the - database cluster [1]. + database cluster [3]. name @@ -148,6 +165,7 @@ delete_custom_function( object_table VARCHAR. Table containing Python object to be deleted. +Do not specify schema as part of the object table name. id INTEGER. The id of the object to be deleted. @@ -161,22 +179,24 @@ delete_custom_function( @par Top k Accuracy Function Create and load a custom function for a specific k into the custom functions table. -The Keras accuracy parameter 'top_k_categorical_accuracy' returns top 5 accuracy by default [2]. +The Keras accuracy parameter 'top_k_categorical_accuracy' returns top 5 accuracy by default [4]. If you want a different top k value, use this helper function to create a custom Python function to compute the top k accuracy that you specify. load_top_k_accuracy_function( -object table, +object_table, k ) \b Arguments - object table + object_table VARCHAR. Table to load serialized Python objects. If this table does not exist, it will be created. If this table already exists, a new row is inserted into the existing table. + Do not specify schema as part of the object table name, since + it will be put in the MADlib schema automatically. k @@ -236,14 +256,14 @@ def rmse(y_true, y_pred): return K.sqrt(K.mean(K.square(y_pred - y_true), axis=-1)) pb_rmse=dill.dumps(rmse) \# call load function -cur.execute("DROP TABLE IF EXISTS custom_function_table") +cur.execute("DROP TABLE IF EXISTS madlib.custom_function_table") cur.execute("SELECT madlib.load_custom_function('custom_function_table', %s,'squared_error', 'squared error')", [p2.Binary(pb_squared_error)]) cur.execute("SELECT madlib.load_custom_function('custom_function_t