[madlib] branch master updated: DL: Fix num_class parsing from model architecture

okislal Wed, 10 Mar 2021 07:51:36 -0800

This is an automated email from the ASF dual-hosted git repository.

okislal pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/madlib.git



The following commit(s) were added to refs/heads/master by this push:
     new 6263347  DL: Fix num_class parsing from model architecture
6263347 is described below

commit 626334769fffd3f6ce069f4e646509b12341d698
Author: Orhan Kislal <okis...@apache.org>
AuthorDate: Mon Mar 8 16:06:22 2021 +0300

    DL: Fix num_class parsing from model architecture
    
    JIRA: MADLIB-1472
    get_num_classes function did not work in certain models that end
    with activation layers. The regresion was caused by the changes
    from the multi-io commit.
---
 .../modules/deep_learning/madlib_keras.sql_in      | 22 +++++---
 .../modules/deep_learning/model_arch_info.py_in    | 13 +++--
 .../test/unit_tests/test_madlib_keras.py_in        | 66 +++++++++++++++++++++-
 3 files changed, 87 insertions(+), 14 deletions(-)

diff --git a/src/ports/postgres/modules/deep_learning/madlib_keras.sql_in 
b/src/ports/postgres/modules/deep_learning/madlib_keras.sql_in
index 429c0f0..05edc0e 100644
--- a/src/ports/postgres/modules/deep_learning/madlib_keras.sql_in
+++ b/src/ports/postgres/modules/deep_learning/madlib_keras.sql_in
@@ -84,7 +84,7 @@ Note that the following MADlib functions are targeting a 
specific TensorFlow
 kernel version (1.14). Using a newer or older version may or may not work as 
intended.
 
 MADlib's deep learning methods are designed to use the TensorFlow package and 
its built in Keras
-functions.  To ensure consistency, please use tensorflow.keras objects 
(models, layers, etc.) 
+functions.  To ensure consistency, please use tensorflow.keras objects 
(models, layers, etc.)
 instead of importing Keras and using its objects.
 
 @note CUDA GPU memory cannot be released until the process holding it is 
terminated.
@@ -165,15 +165,15 @@ madlib_keras_fit(
     @note
     - Custom loss functions and custom metrics can be used as defined in
     <a href="group__grp__custom__function.html">Define Custom Functions.</a>
-    List the custom function name and provide the name of the table where the 
+    List the custom function name and provide the name of the table where the
     serialized Python objects reside using the parameter 'object_table' below.
     - The following loss function is
     not supported: <em>sparse_categorical_crossentropy</em>.
     The following metrics are not
     supported: <em>sparse_categorical_accuracy, 
sparse_top_k_categorical_accuracy</em>.
-    - The Keras accuracy parameter <em>top_k_categorical_accuracy</em> returns 
top 5 accuracy by 
+    - The Keras accuracy parameter <em>top_k_categorical_accuracy</em> returns 
top 5 accuracy by
     default.  If you want a different top k value, use the helper function
-    <a href="group__grp__custom__function.html#top_k_function">Top k Accuracy 
Function</a> 
+    <a href="group__grp__custom__function.html#top_k_function">Top k Accuracy 
Function</a>
     to create a custom
     Python function to compute the top k accuracy that you want.
 
@@ -609,10 +609,10 @@ madlib_keras_predict(
   <DD>TEXT. Column with independent variables in the test table.
   If a 'normalizing_const' is specified when preprocessing the
   training dataset, this same normalization will be applied to
-  the independent variables used in predict. In the case that there 
-  are multiple independent variables, 
+  the independent variables used in predict. In the case that there
+  are multiple independent variables,
   representing a multi-input neural network,
-  put the columns as a comma 
+  put the columns as a comma
   separated list, e.g., 'indep_var1, indep_var2, indep_var3' in the same
   way as was done in the preprocessor step for the training data.
   </DD>
@@ -695,7 +695,8 @@ madlib_keras_predict_byom(
     pred_type,
     use_gpus,
     class_values,
-    normalizing_const
+    normalizing_const,
+    dependent_count
     )
 </pre>
 
@@ -805,6 +806,11 @@ madlib_keras_predict_byom(
   array by. For example, you would use 255 for this value if the image data is
   in the form 0-255.
   </DD>
+
+  <DT>dependent_count (optional)</DT>
+  <DD>INTEGER, default: 1.
+  The number of dependent variables in the model.
+  </DD>
 </DL>
 
 
diff --git a/src/ports/postgres/modules/deep_learning/model_arch_info.py_in 
b/src/ports/postgres/modules/deep_learning/model_arch_info.py_in
index 9c28c43..0081e58 100644
--- a/src/ports/postgres/modules/deep_learning/model_arch_info.py_in
+++ b/src/ports/postgres/modules/deep_learning/model_arch_info.py_in
@@ -66,12 +66,15 @@ def get_num_classes(model_arch, multi_dep_count):
     arch_layers = _get_layers(model_arch)
     num_classes = []
 
-    layer_count = len(arch_layers) - 1
-    for i in range(multi_dep_count):
-        if 'units' in arch_layers[layer_count-i]['config']:
-            num_classes.append(arch_layers[layer_count-i]['config']['units'])
-
+    i = len(arch_layers) - 1
+    dep_counter = 0
+    while i >= 0 and dep_counter < multi_dep_count:
+        if 'units' in arch_layers[i]['config']:
+            num_classes.append(arch_layers[i]['config']['units'])
+            dep_counter +=1
+        i -= 1
     if num_classes:
+        num_classes.reverse()
         return num_classes
     plpy.error('Unable to get number of classes from model architecture.')
 
diff --git 
a/src/ports/postgres/modules/deep_learning/test/unit_tests/test_madlib_keras.py_in
 
b/src/ports/postgres/modules/deep_learning/test/unit_tests/test_madlib_keras.py_in
index 0c91eb4..e46efd7 100644
--- 
a/src/ports/postgres/modules/deep_learning/test/unit_tests/test_madlib_keras.py_in
+++ 
b/src/ports/postgres/modules/deep_learning/test/unit_tests/test_madlib_keras.py_in
@@ -1410,7 +1410,7 @@ class InputValidatorTestCase(unittest.TestCase):
         self.model.add(Dense(1599))
         with self.assertRaises(plpy.PLPYException) as error:
             self.subject.validate_class_values(
-                self.module_name, [range(1599), range(1598)], 'prob', 
self.model.to_json())
+                self.module_name, [range(1599)], 'prob', self.model.to_json())
         self.assertIn('1600', str(error.exception))
 
     def test_validate_class_values_valid_class_values_prob(self):
@@ -1487,6 +1487,70 @@ class InputValidatorTestCase(unittest.TestCase):
             obj = self.subject._validate_gpu_config(self.module_name, 'foo', 
[1,0,0,1])
         self.assertIn('does not have gpu', str(error.exception).lower())
 
+    def test_validate_class_values_last_layer_not_dense(self):
+        num_classes = 3
+        model = Sequential()
+        model.add(Conv2D(2, kernel_size=(1, 1), activation='relu',
+                         input_shape=(1,1,1,), padding='same'))
+        model.add(Dense(num_classes))
+        model.add(Activation('relu'))
+        model.add(Activation('softmax'))
+
+        self.subject.validate_class_values(
+            self.module_name, [range(num_classes)], 'prob', model.to_json())
+
+    def test_validate_class_values_last_layer_not_dense_multiio(self):
+        num_classes = 3
+        model = Sequential()
+        model.add(Conv2D(2, kernel_size=(1, 1), activation='relu',
+                         input_shape=(1,1,1,), padding='same'))
+        model.add(Dense(num_classes))
+        model.add(Dense(num_classes))
+        model.add(Activation('relu'))
+        model.add(Activation('softmax'))
+
+        self.subject.validate_class_values(
+            self.module_name, [range(num_classes), range(num_classes)], 
'prob', model.to_json())
+
+    def test_validate_class_values_mismatch(self):
+        expected_error_regex = ".*do not match.*architecture"
+        num_classes = 3
+
+        # only one dense layer but len(dep_var) = 2
+        model = Sequential()
+        model.add(Conv2D(2, kernel_size=(1, 1), activation='relu',
+                         input_shape=(1,1,1,), padding='same'))
+        model.add(Dense(num_classes))
+        model.add(Activation('relu'))
+        with self.assertRaisesRegexp(plpy.PLPYException, expected_error_regex):
+            self.subject.validate_class_values(
+                self.module_name, [range(num_classes), range(num_classes)], 
'prob', model.to_json())
+
+        # two dense layers
+        model = Sequential()
+        model.add(Conv2D(2, kernel_size=(1, 1), activation='relu',
+                         input_shape=(1,1,1,), padding='same'))
+        model.add(Dense(2))
+        model.add(Dense(num_classes))
+        with self.assertRaisesRegexp(plpy.PLPYException, expected_error_regex):
+            self.subject.validate_class_values(
+                self.module_name, [range(num_classes), range(num_classes)], 
'prob', model.to_json())
+
+    def test_validate_class_values_no_units(self):
+        expected_error_regex = ".*Unable.*classes.*architecture"
+        num_classes = 3
+        #model arch is missing a dense layer
+        model = Sequential()
+        model.add(Activation('relu'))
+        model.add(Conv2D(2, kernel_size=(1, 1), activation='relu',
+                         input_shape=(1,1,1,), padding='same'))
+        with self.assertRaisesRegexp(plpy.PLPYException, expected_error_regex):
+            self.subject.validate_class_values(
+                self.module_name, [range(num_classes)], 'prob', 
model.to_json())
+        with self.assertRaisesRegexp(plpy.PLPYException, expected_error_regex):
+            self.subject.validate_class_values(
+                self.module_name, [range(num_classes), range(num_classes)], 
'prob', model.to_json())
+
 
 class MadlibSerializerTestCase(unittest.TestCase):
     def setUp(self):

[madlib] branch master updated: DL: Fix num_class parsing from model architecture

Reply via email to