This is an automated email from the ASF dual-hosted git repository. okislal pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/madlib.git
The following commit(s) were added to refs/heads/master by this push: new 6263347 DL: Fix num_class parsing from model architecture 6263347 is described below commit 626334769fffd3f6ce069f4e646509b12341d698 Author: Orhan Kislal <okis...@apache.org> AuthorDate: Mon Mar 8 16:06:22 2021 +0300 DL: Fix num_class parsing from model architecture JIRA: MADLIB-1472 get_num_classes function did not work in certain models that end with activation layers. The regresion was caused by the changes from the multi-io commit. --- .../modules/deep_learning/madlib_keras.sql_in | 22 +++++--- .../modules/deep_learning/model_arch_info.py_in | 13 +++-- .../test/unit_tests/test_madlib_keras.py_in | 66 +++++++++++++++++++++- 3 files changed, 87 insertions(+), 14 deletions(-) diff --git a/src/ports/postgres/modules/deep_learning/madlib_keras.sql_in b/src/ports/postgres/modules/deep_learning/madlib_keras.sql_in index 429c0f0..05edc0e 100644 --- a/src/ports/postgres/modules/deep_learning/madlib_keras.sql_in +++ b/src/ports/postgres/modules/deep_learning/madlib_keras.sql_in @@ -84,7 +84,7 @@ Note that the following MADlib functions are targeting a specific TensorFlow kernel version (1.14). Using a newer or older version may or may not work as intended. MADlib's deep learning methods are designed to use the TensorFlow package and its built in Keras -functions. To ensure consistency, please use tensorflow.keras objects (models, layers, etc.) +functions. To ensure consistency, please use tensorflow.keras objects (models, layers, etc.) instead of importing Keras and using its objects. @note CUDA GPU memory cannot be released until the process holding it is terminated. @@ -165,15 +165,15 @@ madlib_keras_fit( @note - Custom loss functions and custom metrics can be used as defined in <a href="group__grp__custom__function.html">Define Custom Functions.</a> - List the custom function name and provide the name of the table where the + List the custom function name and provide the name of the table where the serialized Python objects reside using the parameter 'object_table' below. - The following loss function is not supported: <em>sparse_categorical_crossentropy</em>. The following metrics are not supported: <em>sparse_categorical_accuracy, sparse_top_k_categorical_accuracy</em>. - - The Keras accuracy parameter <em>top_k_categorical_accuracy</em> returns top 5 accuracy by + - The Keras accuracy parameter <em>top_k_categorical_accuracy</em> returns top 5 accuracy by default. If you want a different top k value, use the helper function - <a href="group__grp__custom__function.html#top_k_function">Top k Accuracy Function</a> + <a href="group__grp__custom__function.html#top_k_function">Top k Accuracy Function</a> to create a custom Python function to compute the top k accuracy that you want. @@ -609,10 +609,10 @@ madlib_keras_predict( <DD>TEXT. Column with independent variables in the test table. If a 'normalizing_const' is specified when preprocessing the training dataset, this same normalization will be applied to - the independent variables used in predict. In the case that there - are multiple independent variables, + the independent variables used in predict. In the case that there + are multiple independent variables, representing a multi-input neural network, - put the columns as a comma + put the columns as a comma separated list, e.g., 'indep_var1, indep_var2, indep_var3' in the same way as was done in the preprocessor step for the training data. </DD> @@ -695,7 +695,8 @@ madlib_keras_predict_byom( pred_type, use_gpus, class_values, - normalizing_const + normalizing_const, + dependent_count ) </pre> @@ -805,6 +806,11 @@ madlib_keras_predict_byom( array by. For example, you would use 255 for this value if the image data is in the form 0-255. </DD> + + <DT>dependent_count (optional)</DT> + <DD>INTEGER, default: 1. + The number of dependent variables in the model. + </DD> </DL> diff --git a/src/ports/postgres/modules/deep_learning/model_arch_info.py_in b/src/ports/postgres/modules/deep_learning/model_arch_info.py_in index 9c28c43..0081e58 100644 --- a/src/ports/postgres/modules/deep_learning/model_arch_info.py_in +++ b/src/ports/postgres/modules/deep_learning/model_arch_info.py_in @@ -66,12 +66,15 @@ def get_num_classes(model_arch, multi_dep_count): arch_layers = _get_layers(model_arch) num_classes = [] - layer_count = len(arch_layers) - 1 - for i in range(multi_dep_count): - if 'units' in arch_layers[layer_count-i]['config']: - num_classes.append(arch_layers[layer_count-i]['config']['units']) - + i = len(arch_layers) - 1 + dep_counter = 0 + while i >= 0 and dep_counter < multi_dep_count: + if 'units' in arch_layers[i]['config']: + num_classes.append(arch_layers[i]['config']['units']) + dep_counter +=1 + i -= 1 if num_classes: + num_classes.reverse() return num_classes plpy.error('Unable to get number of classes from model architecture.') diff --git a/src/ports/postgres/modules/deep_learning/test/unit_tests/test_madlib_keras.py_in b/src/ports/postgres/modules/deep_learning/test/unit_tests/test_madlib_keras.py_in index 0c91eb4..e46efd7 100644 --- a/src/ports/postgres/modules/deep_learning/test/unit_tests/test_madlib_keras.py_in +++ b/src/ports/postgres/modules/deep_learning/test/unit_tests/test_madlib_keras.py_in @@ -1410,7 +1410,7 @@ class InputValidatorTestCase(unittest.TestCase): self.model.add(Dense(1599)) with self.assertRaises(plpy.PLPYException) as error: self.subject.validate_class_values( - self.module_name, [range(1599), range(1598)], 'prob', self.model.to_json()) + self.module_name, [range(1599)], 'prob', self.model.to_json()) self.assertIn('1600', str(error.exception)) def test_validate_class_values_valid_class_values_prob(self): @@ -1487,6 +1487,70 @@ class InputValidatorTestCase(unittest.TestCase): obj = self.subject._validate_gpu_config(self.module_name, 'foo', [1,0,0,1]) self.assertIn('does not have gpu', str(error.exception).lower()) + def test_validate_class_values_last_layer_not_dense(self): + num_classes = 3 + model = Sequential() + model.add(Conv2D(2, kernel_size=(1, 1), activation='relu', + input_shape=(1,1,1,), padding='same')) + model.add(Dense(num_classes)) + model.add(Activation('relu')) + model.add(Activation('softmax')) + + self.subject.validate_class_values( + self.module_name, [range(num_classes)], 'prob', model.to_json()) + + def test_validate_class_values_last_layer_not_dense_multiio(self): + num_classes = 3 + model = Sequential() + model.add(Conv2D(2, kernel_size=(1, 1), activation='relu', + input_shape=(1,1,1,), padding='same')) + model.add(Dense(num_classes)) + model.add(Dense(num_classes)) + model.add(Activation('relu')) + model.add(Activation('softmax')) + + self.subject.validate_class_values( + self.module_name, [range(num_classes), range(num_classes)], 'prob', model.to_json()) + + def test_validate_class_values_mismatch(self): + expected_error_regex = ".*do not match.*architecture" + num_classes = 3 + + # only one dense layer but len(dep_var) = 2 + model = Sequential() + model.add(Conv2D(2, kernel_size=(1, 1), activation='relu', + input_shape=(1,1,1,), padding='same')) + model.add(Dense(num_classes)) + model.add(Activation('relu')) + with self.assertRaisesRegexp(plpy.PLPYException, expected_error_regex): + self.subject.validate_class_values( + self.module_name, [range(num_classes), range(num_classes)], 'prob', model.to_json()) + + # two dense layers + model = Sequential() + model.add(Conv2D(2, kernel_size=(1, 1), activation='relu', + input_shape=(1,1,1,), padding='same')) + model.add(Dense(2)) + model.add(Dense(num_classes)) + with self.assertRaisesRegexp(plpy.PLPYException, expected_error_regex): + self.subject.validate_class_values( + self.module_name, [range(num_classes), range(num_classes)], 'prob', model.to_json()) + + def test_validate_class_values_no_units(self): + expected_error_regex = ".*Unable.*classes.*architecture" + num_classes = 3 + #model arch is missing a dense layer + model = Sequential() + model.add(Activation('relu')) + model.add(Conv2D(2, kernel_size=(1, 1), activation='relu', + input_shape=(1,1,1,), padding='same')) + with self.assertRaisesRegexp(plpy.PLPYException, expected_error_regex): + self.subject.validate_class_values( + self.module_name, [range(num_classes)], 'prob', model.to_json()) + with self.assertRaisesRegexp(plpy.PLPYException, expected_error_regex): + self.subject.validate_class_values( + self.module_name, [range(num_classes), range(num_classes)], 'prob', model.to_json()) + class MadlibSerializerTestCase(unittest.TestCase): def setUp(self):