[madlib] branch master updated: DL: Add function for predict byom

nkak Tue, 06 Aug 2019 13:52:52 -0700

This is an automated email from the ASF dual-hosted git repository.

nkak pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/madlib.git



The following commit(s) were added to refs/heads/master by this push:
     new df03bc2  DL: Add function for predict byom
df03bc2 is described below

commit df03bc2029fffa081685c9ffd9471386666bc9a6
Author: Nikhil Kak <n...@pivotal.io>
AuthorDate: Fri Jul 19 17:39:27 2019 -0700

    DL: Add function for predict byom
    
    JIRA: MADLIB-1371 , MADLIB-1359
    
    Previously a user would have to train a deep learning model in madlib
    and only then they could use that model to predict.
    This commit adds a new function called `madlib_keras_predict_byom` which 
allows the
    user to run prediction on their own model which doesn't have to be
    trained on madlib.
    
    * Refactored the code to reuse the logic between predict and
    predict_byom
    * user doc changes
    
    Co-authored-by: Nandish Jayaram <njaya...@apache.org>
    Co-authored-by: Orhan Kislal <okis...@apache.org>
---
 .../deep_learning/input_data_preprocessor.py_in    |   2 +-
 .../modules/deep_learning/madlib_keras.py_in       |  71 +++--
 .../modules/deep_learning/madlib_keras.sql_in      | 337 +++++++++++++++++++-
 .../deep_learning/madlib_keras_helper.py_in        |   2 +
 .../deep_learning/madlib_keras_predict.py_in       | 312 +++++++++++++++----
 .../deep_learning/madlib_keras_validator.py_in     | 346 ++++++++++-----------
 .../model_arch_info.py_in                          |  36 +++
 .../modules/deep_learning/test/madlib_keras.sql_in | 196 +++++++++---
 .../test/unit_tests/test_madlib_keras.py_in        | 203 +++++++++---
 9 files changed, 1140 insertions(+), 365 deletions(-)

diff --git 
a/src/ports/postgres/modules/deep_learning/input_data_preprocessor.py_in 
b/src/ports/postgres/modules/deep_learning/input_data_preprocessor.py_in
index 82bec97..6a03eca 100644
--- a/src/ports/postgres/modules/deep_learning/input_data_preprocessor.py_in
+++ b/src/ports/postgres/modules/deep_learning/input_data_preprocessor.py_in
@@ -58,7 +58,7 @@ class InputDataPreprocessorDL(object):
         self.dependent_varname = dependent_varname
         self.independent_varname = independent_varname
         self.buffer_size = buffer_size
-        self.normalizing_const = normalizing_const if normalizing_const is not 
None else 1.0
+        self.normalizing_const = normalizing_const if normalizing_const is not 
None else DEFAULT_NORMALIZING_CONST
         self.num_classes = num_classes
         self.module_name = module_name
         self.output_summary_table = None
diff --git a/src/ports/postgres/modules/deep_learning/madlib_keras.py_in 
b/src/ports/postgres/modules/deep_learning/madlib_keras.py_in
index 47ce306..fa55093 100644
--- a/src/ports/postgres/modules/deep_learning/madlib_keras.py_in
+++ b/src/ports/postgres/modules/deep_learning/madlib_keras.py_in
@@ -29,25 +29,19 @@ import time
 import keras
 
 from keras import backend as K
-from keras import utils as keras_utils
 from keras.layers import *
 from keras.models import *
 from keras.optimizers import *
 from keras.regularizers import *
-import madlib_keras_serializer
 from madlib_keras_helper import *
 from madlib_keras_validator import *
 from madlib_keras_wrapper import *
-from keras_model_arch_table import ModelArchSchema
+from model_arch_info import *
 
-from utilities.control import MinWarning
-from utilities.model_arch_info import get_input_shape
-from utilities.model_arch_info import get_num_classes
 from utilities.utilities import _assert
 from utilities.utilities import is_platform_pg
 from utilities.utilities import get_segments_per_host
 from utilities.utilities import madlib_version
-from utilities.validate_args import get_col_value_and_type
 from utilities.validate_args import get_expr_type
 from utilities.validate_args import quote_ident
 from utilities.control import MinWarning
@@ -68,7 +62,7 @@ def fit(schema_madlib, source_table, model, model_arch_table,
 
     fit_validator = FitInputValidator(
         source_table, validation_table, model, model_arch_table,
-        mb_dep_var_col, mb_indep_var_col,
+        model_arch_id, mb_dep_var_col, mb_indep_var_col,
         num_iterations, metrics_compute_frequency, warm_start)
     if metrics_compute_frequency is None:
         metrics_compute_frequency = num_iterations
@@ -88,23 +82,13 @@ def fit(schema_madlib, source_table, model, 
model_arch_table,
 
     # Get the serialized master model
     start_deserialization = time.time()
-    model_arch_query = "SELECT {0}, {1} FROM {2} WHERE {3} = {4}".format(
-        ModelArchSchema.MODEL_ARCH, ModelArchSchema.MODEL_WEIGHTS,
-        model_arch_table, ModelArchSchema.MODEL_ID,
-        model_arch_id)
-    model_arch_result = plpy.execute(model_arch_query)
-    if not  model_arch_result:
-        plpy.error("no model arch found in table {0} with id {1}".format(
-            model_arch_table, model_arch_id))
-    model_arch_result = model_arch_result[0]
-    model_arch = model_arch_result[ModelArchSchema.MODEL_ARCH]
-    input_shape = get_input_shape(model_arch)
+    model_arch, model_weights = get_model_arch_weights(model_arch_table, 
model_arch_id)
     num_classes = get_num_classes(model_arch)
+    input_shape = get_input_shape(model_arch)
     fit_validator.validate_input_shapes(input_shape)
-
     gp_segment_id_col = '0' if is_platform_pg() else 'gp_segment_id'
 
-    serialized_weights = get_initial_weights(model, model_arch_result,
+    serialized_weights = get_initial_weights(model, model_arch, model_weights,
                                              warm_start, gpus_per_host)
     # Compute total images on each segment
     seg_ids_train, images_per_seg_train = 
get_image_count_per_seg_for_minibatched_data_from_db(source_table)
@@ -289,7 +273,7 @@ def fit(schema_madlib, source_table, model, 
model_arch_table,
     #TODO add a unit test for this in a future PR
     reset_cuda_env(original_cuda_env)
 
-def get_initial_weights(model_table, model_arch_result, warm_start, 
gpus_per_host):
+def get_initial_weights(model_table, model_arch, serialized_weights, 
warm_start, gpus_per_host):
     """
         If warm_start is True, return back initial weights from model table.
         If warm_start is False, first try to get the weights from model_arch
@@ -317,10 +301,8 @@ def get_initial_weights(model_table, model_arch_result, 
warm_start, gpus_per_hos
             SELECT model_data FROM {0}
         """.format(model_table))[0]['model_data']
     else:
-        serialized_weights = model_arch_result[ModelArchSchema.MODEL_WEIGHTS]
         if not serialized_weights:
-            model = model_from_json(
-                model_arch_result[ModelArchSchema.MODEL_ARCH])
+            model = model_from_json(model_arch)
             serialized_weights = madlib_keras_serializer.serialize_nd_weights(
                 model.get_weights())
     return serialized_weights
@@ -518,10 +500,12 @@ def get_segments_and_gpus(gpus_per_host):
 
 def evaluate(schema_madlib, model_table, test_table, output_table, 
gpus_per_host, **kwargs):
     module_name = 'madlib_keras_evaluate'
-    input_validator = EvaluateInputValidator(test_table, model_table, 
output_table, module_name)
-
-    model_summary_table = input_validator.model_summary_table
-    test_summary_table = input_validator.test_summary_table
+    if test_table:
+        test_summary_table = add_postfix(test_table, "_summary")
+    model_summary_table = None
+    if model_table:
+        model_summary_table = add_postfix(model_table, "_summary")
+    validate_evaluate(module_name, model_table, model_summary_table, 
test_table, test_summary_table, output_table)
 
     segments_per_host, gpus_per_host = get_segments_and_gpus(gpus_per_host)
 
@@ -531,7 +515,8 @@ def evaluate(schema_madlib, model_table, test_table, 
output_table, gpus_per_host
     model_arch = res['model_arch']
 
     input_shape = get_input_shape(model_arch)
-    input_validator.validate_input_shape(input_shape)
+    InputValidator.validate_input_shape(
+        test_table, MINIBATCH_OUTPUT_INDEPENDENT_COLNAME_DL, input_shape, 2)
 
     compile_params_query = "SELECT compile_params, metrics_type FROM 
{0}".format(model_summary_table)
     res = plpy.execute(compile_params_query)[0]
@@ -540,10 +525,11 @@ def evaluate(schema_madlib, model_table, test_table, 
output_table, gpus_per_host
 
     seg_ids, images_per_seg = 
get_image_count_per_seg_for_minibatched_data_from_db(test_table)
 
-    loss, metric =\
-        get_loss_metric_from_keras_eval(schema_madlib, test_table, 
compile_params, model_arch,
-                                        model_data, gpus_per_host, 
segments_per_host,
-                                        seg_ids, images_per_seg)
+    loss, metric = \
+        get_loss_metric_from_keras_eval(
+            schema_madlib, test_table, compile_params, model_arch,
+            model_data, gpus_per_host, segments_per_host,
+            seg_ids, images_per_seg)
 
     if not metrics_type:
         metrics_type = None
@@ -555,6 +541,23 @@ def evaluate(schema_madlib, model_table, test_table, 
output_table, gpus_per_host
             SELECT $1 as loss, $2 as metric, $3 as 
metrics_type""".format(output_table), ["FLOAT", "FLOAT", "TEXT[]"])
         plpy.execute(create_output_table, [loss, metric, metrics_type])
 
+def validate_evaluate(module_name, model_table, model_summary_table, 
test_table, test_summary_table, output_table):
+    def _validate_test_summary_tbl():
+        input_tbl_valid(test_summary_table, module_name,
+                error_suffix_str="Please ensure that the test table ({0}) "
+                                 "has been preprocessed by "
+                                 "the image preprocessor.".format(test_table))
+        cols_in_tbl_valid(test_summary_table, [CLASS_VALUES_COLNAME,
+            NORMALIZING_CONST_COLNAME, DEPENDENT_VARTYPE_COLNAME,
+            DEPENDENT_VARNAME_COLNAME, INDEPENDENT_VARNAME_COLNAME], 
module_name)
+
+    InputValidator.validate_predict_evaluate_tables(
+        module_name, model_table, model_summary_table,
+        test_table, output_table, MINIBATCH_OUTPUT_INDEPENDENT_COLNAME_DL)
+    _validate_test_summary_tbl()
+    validate_dependent_var_for_minibatch(test_table,
+                                         MINIBATCH_OUTPUT_DEPENDENT_COLNAME_DL)
+
 def get_loss_metric_from_keras_eval(schema_madlib, table, compile_params,
                                     model_arch, serialized_weights, 
gpus_per_host,
                                     segments_per_host, seg_ids, 
images_per_seg):
diff --git a/src/ports/postgres/modules/deep_learning/madlib_keras.sql_in 
b/src/ports/postgres/modules/deep_learning/madlib_keras.sql_in
index c5d8d35..c69f158 100644
--- a/src/ports/postgres/modules/deep_learning/madlib_keras.sql_in
+++ b/src/ports/postgres/modules/deep_learning/madlib_keras.sql_in
@@ -35,6 +35,7 @@ m4_include(`SQLCommon.m4')
 <li class="level1"><a href="#keras_fit">Fit</a></li>
 <li class="level1"><a href="#keras_evaluate">Evaluate</a></li>
 <li class="level1"><a href="#keras_predict">Predict</a></li>
+<li class="level1"><a href="#keras_predict_byom">Predict BYOM</a></li>
 <li class="level1"><a href="#example">Examples</a></li>
 <li class="level1"><a href="#notes">Notes</a></li>
 <li class="level1"><a href="#background">Technical Background</a></li>
@@ -616,6 +617,127 @@ madlib_keras_predict(
   </DD>
 </DL>
 
+
+
+@anchor keras_predict_byom
+@par Predict BYOM (Bring your own model)
+The predict byom function has the following format:
+<pre class="syntax">
+madlib_keras_predict_byom(
+    model_arch_table,
+    model_arch_id,
+    test_table,
+    id_col,
+    independent_varname,
+    output_table,
+    pred_type,
+    gpus_per_host,
+    class_values,
+    normalizing_const
+    )
+</pre>
+
+
+\b Arguments
+<dl class="arglist">
+
+<DT>model_arch_table</DT>
+  <DD>TEXT. Name of the architecture table containing the model
+  to use for prediction. The model weights and architecture can be loaded to
+  this table by using the
+  <a href="group__grp__keras__model__arch.html">load_keras_model</a> function
+  </DD>
+
+  <DT>model_arch_id</DT>
+  <DD>INTEGER. This is the id in 'model_arch_table'containing the model
+  architecture and model weights to use for prediction.
+  </DD>
+
+  <DT>test_table</DT>
+  <DD>TEXT. Name of the table containing the dataset to
+  predict on.  Note that test data is not preprocessed (unlike
+  fit and evaluate) so put one test image per row for prediction.
+  Also see the comment below for the 'independent_varname' parameter
+  regarding normalization.
+
+  </DD>
+
+  <DT>id_col</DT>
+  <DD>TEXT. Name of the id column in the test data table.
+  </DD>
+
+  <DT>independent_varname</DT>
+  <DD>TEXT. Column with independent variables in the test table.
+  If a 'normalizing_const' is specified when preprocessing the
+  training dataset, this same normalization will be applied to
+  the independent variables used in predict.
+  </DD>
+
+  <DT>output_table</DT>
+  <DD>TEXT. Name of the table that prediction output will be
+  written to. Table contains:</DD>
+    <table class="output">
+      <tr>
+        <th>id</th>
+        <td>Gives the 'id' for each prediction, corresponding to each row from 
the test_table.</td>
+      </tr>
+      <tr>
+        <th>estimated_dependent_var</th>
+        <td>
+        (For pred_type='response') The estimated class for classification. If
+        class_values is passed in as NULL, then we assume that the class
+        labels are [0,1,2...,n] where n in the num of classes in the model
+        architecture.
+        </td>
+      </tr>
+      <tr>
+        <th>prob_CLASS</th>
+        <td>
+         (For pred_type='prob' for classification)
+         The probability of a given class.
+         If class_values is passed in as NULL, we create just one column called
+         'prob' which is an array of probabilities of all the classes.
+         Otherwise if class_values is not NULL, then there will be one
+         column for each class in the training data.
+        </td>
+      </tr>
+
+  <DT>pred_type (optional)</DT>
+  <DD>TEXT, default: 'response'. The type of output desired, where 'response'
+   gives the actual prediction and 'prob' gives the probability value for each 
class.
+  </DD>
+
+  <DT>gpus_per_host (optional)</DT>
+  <DD>INTEGER, default: 0 (i.e., CPU).
+    Number of GPUs per segment host to be used for training the neural network.
+    For example, if you specify 4 for this parameter and your database cluster
+    is set up to have 4 segments per segment host, it means that each segment
+    will have a dedicated GPU. A value of 0 means that CPUs, not GPUs, will
+    be used for training.
+
+    @note
+    We have seen some memory related issues when segments share GPU resources.
+    For example, if you specify 1 for this parameter and your database cluster
+    is set up to have 4 segments per segment host, it means that all 4 segments
+     on a segment host will share the same GPU. The current recommended
+     configuration is 1 GPU per segment.
+  </DD>
+
+  <DT>class_values (optional)</DT>
+  <DD>TEXT[], default: NULL.
+    List of class labels that were used while training the model. See the
+    output_table column for more details.
+  </DD>
+
+  <DT>normalizing_const (optional)</DT>
+  <DD>DOUBLE PRECISION, default: 1.0.
+  The normalizing constant to divide each value in the 'independent_varname'
+  array by. For example, you would use 255 for this value if the image data is
+  in the form 0-255.
+  </DD>
+</DL>
+
+
 @anchor example
 @par Examples
 
@@ -814,7 +936,6 @@ SELECT COUNT(*) FROM iris_train;
 
 -# Call the preprocessor for deep learning.  For the training dataset:
 <pre class="example">
-DROP TABLE IF EXISTS mlp_prediction;
 \\x off
 DROP TABLE IF EXISTS iris_train_packed, iris_train_packed_summary;
 SELECT madlib.training_preprocessor_dl('iris_train',         -- Source table
@@ -1049,6 +1170,142 @@ WHERE q.actual=q.estimated;
 (1 row)
 </pre>
 
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+-# Predict BYOM.
+We will use the validation dataset for prediction
+as well, which is not usual but serves to show the
+syntax. See <a href="group__grp__keras__model__arch.html">load_keras_model</a>
+for details on how to load the model architecture and weights.
+
+
+The prediction is in the 'estimated_dependent_var'
+column:
+<pre class="example">
+UPDATE model_arch_library set model_weights = (select model_data from 
iris_model) WHERE model_id = 1;
+
+DROP TABLE IF EXISTS iris_predict_byom;
+SELECT madlib.madlib_keras_predict_byom('model_arch_library', -- model arch 
table
+                                   1, -- model arch id
+                                   'iris_test',  -- test_table
+                                   'id',  -- id column
+                                   'attributes', -- independent var
+                                   'iris_predict_byom',  -- output table
+                                   'response', -- pred_type
+                                   0, -- gpus_per_host
+                                   ARRAY['Iris-setosa', 'Iris-versicolor',
+                                   'Iris-virginica'], -- class_values
+                                   1.0 -- normalizing_const
+                                   );
+SELECT * FROM iris_predict_byom ORDER BY id;
+</pre>
+<pre class="result">
+ id  | estimated_dependent_var
+-----+-------------------------
+   1 | Iris-setosa
+   4 | Iris-setosa
+   9 | Iris-setosa
+  27 | Iris-setosa
+  32 | Iris-setosa
+  35 | Iris-setosa
+  40 | Iris-setosa
+  41 | Iris-setosa
+  44 | Iris-setosa
+  46 | Iris-setosa
+  55 | Iris-versicolor
+  56 | Iris-versicolor
+  66 | Iris-versicolor
+  69 | Iris-versicolor
+  75 | Iris-versicolor
+  76 | Iris-versicolor
+ 102 | Iris-virginica
+ 105 | Iris-virginica
+ 108 | Iris-virginica
+ 113 | Iris-virginica
+ 115 | Iris-virginica
+ 116 | Iris-virginica
+ 118 | Iris-virginica
+ 119 | Iris-virginica
+ 122 | Iris-virginica
+ 125 | Iris-virginica
+ 133 | Iris-virginica
+ 134 | Iris-virginica
+ 135 | Iris-virginica
+ 138 | Iris-virginica
+ </pre>
+Count missclassifications:
+<pre class="example">
+SELECT COUNT(*) FROM iris_predict_byom JOIN iris_test USING (id)
+WHERE iris_predict_byom.estimated_dependent_var != iris_test.class_text;
+</pre>
+<pre class="result">
+ count
+-------+
+     6
+(1 row)
+</pre>
+Percent missclassifications:
+<pre class="example">
+SELECT round(count(*)*100/(150*0.2),2) as test_accuracy_percent from
+    (select iris_test.class_text as actual, 
iris_predict_byom.estimated_dependent_var as estimated
+     from iris_predict_byom inner join iris_test
+     on iris_test.id=iris_predict_byom.id) q
+WHERE q.actual=q.estimated;
+</pre>
+<pre class="result">
+ test_accuracy_percent
+-----------------------+
+                 80.00
+(1 row)
+</pre>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
 <h4>Classification with Other Parameters</h4>
 
 -# Validation dataset.  Now use a validation dataset
@@ -1571,7 +1828,7 @@ CREATE OR REPLACE FUNCTION 
MADLIB_SCHEMA.madlib_keras_predict(
 ) RETURNS VOID AS $$
     PythonFunctionBodyOnly(`deep_learning', `madlib_keras_predict')
     with AOControl(False):
-        madlib_keras_predict.predict(schema_madlib,
+        madlib_keras_predict.Predict(schema_madlib,
                model_table,
                test_table,
                id_col,
@@ -1622,6 +1879,82 @@ CREATE OR REPLACE FUNCTION 
MADLIB_SCHEMA.internal_keras_predict(
 $$ LANGUAGE plpythonu VOLATILE
 m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `MODIFIES SQL DATA', `');
 
+-------------------------------------------------------------------------------
+CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.madlib_keras_predict_byom(
+    model_arch_table        VARCHAR,
+    model_arch_id           INTEGER,
+    test_table              VARCHAR,
+    id_col                  VARCHAR,
+    independent_varname     VARCHAR,
+    output_table            VARCHAR,
+    pred_type               VARCHAR,
+    gpus_per_host           INTEGER,
+    class_values            TEXT[],
+    normalizing_const       DOUBLE PRECISION
+) RETURNS VOID AS $$
+    PythonFunctionBodyOnly(`deep_learning', `madlib_keras_predict')
+    with AOControl(False):
+        madlib_keras_predict.PredictBYOM(**globals())
+$$ LANGUAGE plpythonu VOLATILE
+m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `MODIFIES SQL DATA', `');
+
+CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.madlib_keras_predict_byom(
+    model_arch_table        VARCHAR,
+    model_arch_id           INTEGER,
+    test_table              VARCHAR,
+    id_col                  VARCHAR,
+    independent_varname     VARCHAR,
+    output_table            VARCHAR,
+    pred_type               VARCHAR,
+    gpus_per_host           INTEGER,
+    class_values            TEXT[]
+) RETURNS VOID AS $$
+    SELECT MADLIB_SCHEMA.madlib_keras_predict_byom($1, $2, $3, $4, $5, $6, $7, 
$8, $9, NULL);
+$$ LANGUAGE sql VOLATILE
+m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `MODIFIES SQL DATA', `');
+
+
+CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.madlib_keras_predict_byom(
+    model_arch_table        VARCHAR,
+    model_arch_id           INTEGER,
+    test_table              VARCHAR,
+    id_col                  VARCHAR,
+    independent_varname     VARCHAR,
+    output_table            VARCHAR,
+    pred_type               VARCHAR,
+    gpus_per_host           INTEGER
+) RETURNS VOID AS $$
+    SELECT MADLIB_SCHEMA.madlib_keras_predict_byom($1, $2, $3, $4, $5, $6, $7, 
$8, NULL, NULL);
+$$ LANGUAGE sql VOLATILE
+m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `MODIFIES SQL DATA', `');
+
+
+CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.madlib_keras_predict_byom(
+    model_arch_table        VARCHAR,
+    model_arch_id           INTEGER,
+    test_table              VARCHAR,
+    id_col                  VARCHAR,
+    independent_varname     VARCHAR,
+    output_table            VARCHAR,
+    pred_type               VARCHAR
+) RETURNS VOID AS $$
+    SELECT MADLIB_SCHEMA.madlib_keras_predict_byom($1, $2, $3, $4, $5, $6, $7, 
NULL, NULL, NULL);
+$$ LANGUAGE sql VOLATILE
+m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `MODIFIES SQL DATA', `');
+
+CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.madlib_keras_predict_byom(
+    model_arch_table        VARCHAR,
+    model_arch_id           INTEGER,
+    test_table              VARCHAR,
+    id_col                  VARCHAR,
+    independent_varname     VARCHAR,
+    output_table            VARCHAR
+) RETURNS VOID AS $$
+    SELECT MADLIB_SCHEMA.madlib_keras_predict_byom($1, $2, $3, $4, $5, $6, 
NULL, NULL, NULL, NULL);
+$$ LANGUAGE sql VOLATILE
+m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `MODIFIES SQL DATA', `');
+
+-------------------------------------------------------------------------------
 CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.madlib_keras_evaluate(
     model_table             VARCHAR,
     test_table              VARCHAR,
diff --git a/src/ports/postgres/modules/deep_learning/madlib_keras_helper.py_in 
b/src/ports/postgres/modules/deep_learning/madlib_keras_helper.py_in
index 17bdda4..e8218a6 100644
--- a/src/ports/postgres/modules/deep_learning/madlib_keras_helper.py_in
+++ b/src/ports/postgres/modules/deep_learning/madlib_keras_helper.py_in
@@ -45,6 +45,8 @@ MINIBATCH_OUTPUT_INDEPENDENT_COLNAME_DL = "independent_var"
 FLOAT32_SQL_TYPE = 'REAL'
 SMALLINT_SQL_TYPE = 'SMALLINT'
 
+DEFAULT_NORMALIZING_CONST = 1.0
+
 #####################################################################
 
 # Prepend a dimension to np arrays using expand_dims.
diff --git 
a/src/ports/postgres/modules/deep_learning/madlib_keras_predict.py_in 
b/src/ports/postgres/modules/deep_learning/madlib_keras_predict.py_in
index ca7a9ad..819ff98 100644
--- a/src/ports/postgres/modules/deep_learning/madlib_keras_predict.py_in
+++ b/src/ports/postgres/modules/deep_learning/madlib_keras_predict.py_in
@@ -18,7 +18,6 @@
 # under the License.
 
 import plpy
-import os
 
 import keras
 from keras import backend as K
@@ -26,82 +25,190 @@ from keras.layers import *
 from keras.models import *
 from keras.optimizers import *
 
+from model_arch_info import *
 from madlib_keras_helper import *
-from madlib_keras_validator import PredictInputValidator
+from madlib_keras_validator import *
 from predict_input_params import PredictParamsProcessor
 from utilities.control import MinWarning
-from utilities.model_arch_info import get_input_shape
+from utilities.utilities import _assert
 from utilities.utilities import add_postfix
 from utilities.utilities import create_cols_from_array_sql_string
 from utilities.utilities import get_segments_per_host
-from utilities.utilities import is_platform_pg
 from utilities.utilities import unique_string
+from utilities.validate_args import input_tbl_valid
+from utilities.validate_args import quote_ident
 
 from madlib_keras_wrapper import *
 
-MODULE_NAME = 'madlib_keras_predict'
+class BasePredict():
+    def __init__(self, schema_madlib, table_to_validate, test_table, id_col,
+                 independent_varname, output_table, pred_type, gpus_per_host):
+        self.schema_madlib = schema_madlib
+        self.table_to_validate = table_to_validate
+        self.test_table = test_table
+        self.id_col = id_col
+        self.independent_varname = independent_varname
+        self.output_table = output_table
+        self.pred_type = pred_type
+        self.gpus_per_host = gpus_per_host
+        self._set_default_gpus_pred_type()
+
+    def _set_default_gpus_pred_type(self):
+        self.pred_type =  'response' if not self.pred_type else self.pred_type
+        self.is_response = True if self.pred_type == 'response' else False
+        self.gpus_per_host = 0 if self.gpus_per_host is None else 
self.gpus_per_host
+
+
+    def call_internal_keras(self):
+        if self.is_response:
+            pred_col_name = add_postfix("estimated_", self.dependent_varname)
+            pred_col_type = self.dependent_vartype
+        else:
+            pred_col_name = "prob"
+            pred_col_type = 'double precision'
+
+        intermediate_col = unique_string()
+        class_values = 
strip_trailing_nulls_from_class_values(self.class_values)
+
+        prediction_select_clause = create_cols_from_array_sql_string(
+            class_values, intermediate_col, pred_col_name,
+            pred_col_type, self.is_response, self.module_name)
+        gp_segment_id_col, seg_ids_test, \
+        images_per_seg_test = 
get_image_count_per_seg_for_non_minibatched_data_from_db(
+            self.test_table)
+        segments_per_host = get_segments_per_host()
+
+        predict_query = plpy.prepare("""
+            CREATE TABLE {self.output_table} AS
+            SELECT {self.id_col}, {prediction_select_clause}
+            FROM (
+                SELECT {self.test_table}.{self.id_col},
+                       ({self.schema_madlib}.internal_keras_predict
+                           ({self.independent_varname},
+                            $1,
+                            $2,
+                            {self.is_response},
+                            {self.normalizing_const},
+                            {gp_segment_id_col},
+                            ARRAY{seg_ids_test},
+                            ARRAY{images_per_seg_test},
+                            {self.gpus_per_host},
+                            {segments_per_host})
+                       ) AS {intermediate_col}
+            FROM {self.test_table}
+            ) q
+            """.format(self=self, 
prediction_select_clause=prediction_select_clause,
+                       seg_ids_test=seg_ids_test,
+                       images_per_seg_test=images_per_seg_test,
+                       gp_segment_id_col=gp_segment_id_col,
+                       segments_per_host=segments_per_host,
+                       intermediate_col=intermediate_col),
+                                     ["text", "bytea"])
+        plpy.execute(predict_query, [self.model_arch, self.model_weights])
+
+    def set_default_class_values(self, class_values):
+        self.class_values = class_values
+        if self.pred_type == 'prob':
+            return
+        if self.class_values is None:
+            num_classes = get_num_classes(self.model_arch)
+            self.class_values = range(0, num_classes)
 
 @MinWarning("warning")
-def predict(schema_madlib, model_table, test_table, id_col,
-            independent_varname, output_table, pred_type, gpus_per_host, 
**kwargs):
-    if not pred_type:
-        pred_type = 'response'
-    input_validator = PredictInputValidator(
-        test_table, model_table, id_col, independent_varname,
-        output_table, pred_type, MODULE_NAME)
-
-    param_proc = PredictParamsProcessor(model_table, MODULE_NAME)
-    class_values = param_proc.get_class_values()
-    input_validator.validate_pred_type(class_values)
-    dependent_varname = param_proc.get_dependent_varname()
-    dependent_vartype = param_proc.get_dependent_vartype()
-    model_data = param_proc.get_model_data()
-    model_arch = param_proc.get_model_arch()
-    normalizing_const = param_proc.get_normalizing_const()
-    input_shape = get_input_shape(model_arch)
-    input_validator.validate_input_shape(input_shape)
-
-    is_response = True if pred_type == 'response' else False
-    intermediate_col = unique_string()
-    if is_response:
-        pred_col_name = add_postfix("estimated_", dependent_varname)
-        pred_col_type = dependent_vartype
-    else:
-        pred_col_name = "prob"
-        pred_col_type = 'double precision'
-
-    class_values = strip_trailing_nulls_from_class_values(class_values)
-
-    prediction_select_clause = create_cols_from_array_sql_string(
-        class_values, intermediate_col, pred_col_name,
-        pred_col_type, is_response, MODULE_NAME)
-
-    gp_segment_id_col, seg_ids_test, \
-    images_per_seg_test = 
get_image_count_per_seg_for_non_minibatched_data_from_db(test_table)
-    segments_per_host = get_segments_per_host()
-
-    predict_query = plpy.prepare("""
-        CREATE TABLE {output_table} AS
-        SELECT {id_col}, {prediction_select_clause}
-        FROM (
-            SELECT {test_table}.{id_col},
-                   ({schema_madlib}.internal_keras_predict
-                       ({independent_varname},
-                        $1,
-                        $2,
-                        {is_response},
-                        {normalizing_const},
-                        {gp_segment_id_col},
-                        ARRAY{seg_ids_test},
-                        ARRAY{images_per_seg_test},
-                        {gpus_per_host},
-                        {segments_per_host})
-                   ) AS {intermediate_col}
-        FROM {test_table}
-        ) q
-        """.format(**locals()), ["text", "bytea"])
-    plpy.execute(predict_query, [model_arch, model_data])
+class Predict(BasePredict):
+    def __init__(self, schema_madlib, model_table,
+                 test_table, id_col, independent_varname,
+                 output_table, pred_type, gpus_per_host,
+                 **kwargs):
+
+        self.module_name = 'madlib_keras_predict'
+        self.model_table = model_table
+        if self.model_table:
+            self.model_summary_table = add_postfix(self.model_table, 
"_summary")
+
+        BasePredict.__init__(self, schema_madlib, model_table, test_table,
+                              id_col, independent_varname,
+                              output_table, pred_type,
+                              gpus_per_host)
+        param_proc = PredictParamsProcessor(model_table, self.module_name)
+        self.dependent_vartype = param_proc.get_dependent_vartype()
+        self.model_weights = param_proc.get_model_data()
+        self.model_arch = param_proc.get_model_arch()
+        class_values = param_proc.get_class_values()
+        self.set_default_class_values(class_values)
+        self.normalizing_const = param_proc.get_normalizing_const()
+        self.dependent_varname = param_proc.get_dependent_varname()
+
+        self.validate()
+        BasePredict.call_internal_keras(self)
+
+    def validate(self):
+        InputValidator.validate_predict_evaluate_tables(
+            self.module_name, self.model_table, self.model_summary_table,
+            self.test_table, self.output_table, self.independent_varname)
+
+        InputValidator.validate_id_in_test_tbl(
+            self.module_name, self.test_table, self.id_col)
+
+        InputValidator.validate_class_values(
+            self.module_name, self.class_values, self.pred_type, 
self.model_arch)
+        input_shape = get_input_shape(self.model_arch)
+        InputValidator.validate_pred_type(
+            self.module_name, self.pred_type, self.class_values)
+        InputValidator.validate_input_shape(
+            self.test_table, self.independent_varname, input_shape, 1)
+
+@MinWarning("warning")
+class PredictBYOM(BasePredict):
+    def __init__(self, schema_madlib, model_arch_table, model_arch_id,
+                 test_table, id_col, independent_varname, output_table,
+                 pred_type, gpus_per_host, class_values, normalizing_const,
+                 **kwargs):
 
+        self.module_name='madlib_keras_predict_byom'
+        self.model_arch_table = model_arch_table
+        self.model_arch_id = model_arch_id
+        self.class_values = class_values
+        self.normalizing_const = normalizing_const
+        self.dependent_varname = 'dependent_var'
+        BasePredict.__init__(self, schema_madlib, model_arch_table,
+                             test_table, id_col, independent_varname,
+                             output_table, pred_type, gpus_per_host)
+        if self.is_response:
+            self.dependent_vartype = 'text'
+        else:
+            self.dependent_vartype = 'double precision'
+        ## Set default values for norm const and class_values
+        # gpus_per_host and pred_type are defaulted in base_predict's init
+        self.normalizing_const = normalizing_const
+        if self.normalizing_const is None:
+            self.normalizing_const = DEFAULT_NORMALIZING_CONST
+        InputValidator.validate_predict_byom_tables(
+            self.module_name, self.model_arch_table, self.model_arch_id,
+            self.test_table, self.id_col, self.output_table,
+            self.independent_varname)
+        self.validate_and_set_defaults()
+        BasePredict.call_internal_keras(self)
+
+    def validate_and_set_defaults(self):
+        # Set some defaults first and then validate and then set some more 
defaults
+        self.model_arch, self.model_weights = get_model_arch_weights(
+            quote_ident(self.model_arch_table), self.model_arch_id)
+        # Assert model_weights and model_arch are not empty.
+        _assert(self.model_weights and self.model_arch,
+                "{0}: Model weights and architecture should not be 
NULL.".format(
+                    self.module_name))
+        self.set_default_class_values(self.class_values)
+
+        InputValidator.validate_pred_type(
+            self.module_name, self.pred_type, self.class_values)
+        InputValidator.validate_normalizing_const(
+            self.module_name, self.normalizing_const)
+        InputValidator.validate_class_values(
+            self.module_name, self.class_values, self.pred_type, 
self.model_arch)
+        InputValidator.validate_input_shape(
+            self.test_table, self.independent_varname,
+            get_input_shape(self.model_arch), 1)
 
 def internal_keras_predict(independent_var, model_architecture, model_data,
                            is_response, normalizing_const, current_seg_id, 
seg_ids,
@@ -216,9 +323,86 @@ estimated_COL_NAME: (For pred_type='response') The 
estimated class for
 prob_CLASS:         (For pred_type='prob' for classification) The
                     probability of a given class. There will be one
                     column for each class in the training data.
+                    TODO change this
 """
     else:
         help_string = "No such option. Use 
{schema_madlib}.madlib_keras_predict()"
 
     return help_string.format(schema_madlib=schema_madlib)
+
+def predict_byom_help(schema_madlib, message, **kwargs):
+    """
+    Help function for keras predict
+
+    Args:
+        @param schema_madlib
+        @param message: string, Help message string
+        @param kwargs
+
+    Returns:
+        String. Help/usage information
+    """
+    if not message:
+        help_string = """
+-----------------------------------------------------------------------
+                            SUMMARY
+-----------------------------------------------------------------------
+This function allows the user to predict with their own pre trained model (note
+that this model doesn't have to be trained using MADlib.)
+
+For more details on function usage:
+    SELECT {schema_madlib}.madlib_keras_predict_byom('usage')
+            """
+    elif message in ['usage', 'help', '?']:
+        help_string = """
+-----------------------------------------------------------------------
+                            USAGE
+-----------------------------------------------------------------------
+ SELECT {schema_madlib}.madlib_keras_predict_byom(
+    model_arch_table, -- Name of the table containing the model architecture
+                            and the pre trained model weights
+    model_arch_id,    -- This is the id in 'model_arch_table' containing the
+                         model architecture
+    test_table,     --  Name of the table containing the evaluation dataset
+    id_col,         --  Name of the id column in the test data table
+    independent_varname,    --  Name of the column with independent
+                                variables in the test table
+    output_table,   --  Name of the output table
+    pred_type,      --  The type of the desired output
+    gpus_per_host,   --  Number of GPUs per segment host to
+                        be used for training
+    class_values,     -- List of class labels that were used while training 
the 
+                         model. If class_values is passed in as NULL, the 
output
+                         table will have a column named 'prob' which is an 
array
+                         of probabilities of all the classes.
+                         Otherwise if class_values is not NULL, then the output
+                         table will contain a column for each class/label from
+                         the training data
+    normalizing_const -- Normalizing constant used for standardizing arrays in 
+                         independent_varname
+    )
+ );
+
+-----------------------------------------------------------------------
+                            OUTPUT
+-----------------------------------------------------------------------
+The output table ('output_table' above) contains the following columns:
+
+id:                 Gives the 'id' for each prediction, corresponding
+                    to each row from the test_table.
+estimated_dependent_var: (For pred_type='response') The estimated class for
+                    classification. If class_values is passed in as NULL, then 
we
+                    assume that the class labels are [0,1,2...,n] where n in 
the
+                    num of classes in the model architecture.
+prob_CLASS:         (For pred_type='prob' for classification) The
+                    probability of a given class.
+                    If class_values is passed in as NULL, we create just one 
column
+                    called 'prob' which is an array of probabilites of all the 
classes
+                    Otherwise if class_values is not NULL, then there will be 
one
+                    column for each class in the training data.
+"""
+    else:
+        help_string = "No such option. Use 
{schema_madlib}.madlib_keras_predict_byom()"
+
+    return help_string.format(schema_madlib=schema_madlib)
 # ---------------------------------------------------------------------
diff --git 
a/src/ports/postgres/modules/deep_learning/madlib_keras_validator.py_in 
b/src/ports/postgres/modules/deep_learning/madlib_keras_validator.py_in
index b111fc4..e9d7d14 100644
--- a/src/ports/postgres/modules/deep_learning/madlib_keras_validator.py_in
+++ b/src/ports/postgres/modules/deep_learning/madlib_keras_validator.py_in
@@ -19,6 +19,7 @@
 
 import plpy
 from keras_model_arch_table import ModelArchSchema
+from model_arch_info import get_input_shape, get_num_classes
 from madlib_keras_helper import CLASS_VALUES_COLNAME
 from madlib_keras_helper import COMPILE_PARAMS_COLNAME
 from madlib_keras_helper import DEPENDENT_VARNAME_COLNAME
@@ -45,182 +46,177 @@ from utilities.validate_args import get_expr_type
 from utilities.validate_args import input_tbl_valid
 from utilities.validate_args import output_tbl_valid
 
-
-def _validate_input_shapes(table, independent_varname, input_shape, offset):
-    """
-    Validate if the input shape specified in model architecture is the same
-    as the shape of the image specified in the indepedent var of the input
-    table.
-    offset: This offset is the index of the start of the image array. We also
-    need to consider that sql array indexes start from 1
-    For ex if the image is of shape [32,32,3] and is minibatched, the image 
will
-    look like [10, 32, 32, 3]. The offset in this case is 1 (start the index 
at 1) +
-    1 (ignore the buffer size 10) = 2.
-    If the image is not batched then it will look like [32, 32 ,3] and the 
offset in
-    this case is 1 (start the index at 1).
-    """
-    array_upper_query = ", ".join("array_upper({0}, {1}) AS n_{2}".format(
-        independent_varname, i+offset, i) for i in range(len(input_shape)))
-    query = """
-        SELECT {0}
-        FROM {1}
-        LIMIT 1
-    """.format(array_upper_query, table)
-    # This query will fail if an image in independent var does not have the
-    # same number of dimensions as the input_shape.
-    result = plpy.execute(query)[0]
-    _assert(len(result) == len(input_shape),
-        "model_keras error: The number of dimensions ({0}) of each image"
-        " in model architecture and {1} in {2} ({3}) do not match.".format(
-            len(input_shape), independent_varname, table, len(result)))
-    for i in range(len(input_shape)):
-        key_name = "n_{0}".format(i)
-        if result[key_name] != input_shape[i]:
-            # Construct the shape in independent varname to display
-            # meaningful error msg.
-            input_shape_from_table = [result["n_{0}".format(i)]
-                for i in range(len(input_shape))]
-            plpy.error("model_keras error: Input shape {0} in the model"
-                " architecture does not match the input shape {1} of column"
-                " {2} in table {3}.".format(
-                    input_shape, input_shape_from_table,
-                    independent_varname, table))
-
 class InputValidator:
-    def __init__(self, test_table, model_table, independent_varname,
-                 output_table, module_name):
-        self.test_table = test_table
-        self.model_table = model_table
-        self.independent_varname = independent_varname
-        self.output_table = output_table
-        if self.model_table:
-            self.model_summary_table = add_postfix(
-                self.model_table, "_summary")
-        self.module_name = module_name
-        self._validate_input_args()
-
-    def _validate_input_args(self):
-        input_tbl_valid(self.model_table, self.module_name)
-        self._validate_model_data_cols()
-        input_tbl_valid(self.model_summary_table, self.module_name)
-        self._validate_model_summary_tbl_cols()
-        input_tbl_valid(self.test_table, self.module_name)
-        self._validate_test_tbl_cols()
-        output_tbl_valid(self.output_table, self.module_name)
-
-
-    def _validate_model_data_cols(self):
-        _assert(is_var_valid(self.model_table, MODEL_DATA_COLNAME),
+    @staticmethod
+    def validate_predict_evaluate_tables(
+        module_name, model_table, model_summary_table, test_table, 
output_table,
+        independent_varname):
+        InputValidator._validate_model_data_tbl(module_name, model_table)
+        InputValidator._validate_model_summary_tbl(
+            module_name, model_summary_table)
+        InputValidator._validate_test_tbl(
+            module_name, test_table, independent_varname)
+        output_tbl_valid(output_table, module_name)
+
+    @staticmethod
+    def validate_id_in_test_tbl(module_name, test_table, id_col):
+        _assert(is_var_valid(test_table, id_col),
+                "{module_name} error: invalid id column "
+                "('{id_col}') for test table ({table}).".format(
+                    module_name=module_name,
+                    id_col=id_col,
+                    table=test_table))
+
+    @staticmethod
+    def validate_predict_byom_tables(module_name, model_arch_table, 
model_arch_id,
+                                     test_table, id_col, output_table,
+                                     independent_varname):
+        InputValidator.validate_model_arch_table(
+            module_name, model_arch_table, model_arch_id)
+        InputValidator._validate_test_tbl(
+            module_name, test_table, independent_varname)
+        InputValidator.validate_id_in_test_tbl(module_name, test_table, id_col)
+
+        output_tbl_valid(output_table, module_name)
+
+
+    @staticmethod
+    def validate_pred_type(module_name, pred_type, class_values):
+        if not pred_type in ['prob', 'response']:
+            plpy.error("{0}: Invalid value for pred_type param ({1}). Must be 
"\
+                "either response or prob.".format(module_name, pred_type))
+
+
+    @staticmethod
+    def validate_input_shape(table, independent_varname, input_shape, offset):
+        """
+        Validate if the input shape specified in model architecture is the same
+        as the shape of the image specified in the indepedent var of the input
+        table.
+        offset: This offset is the index of the start of the image array. We 
also
+        need to consider that sql array indexes start from 1
+        For ex if the image is of shape [32,32,3] and is minibatched, the 
image will
+        look like [10, 32, 32, 3]. The offset in this case is 1 (start the 
index at 1) +
+        1 (ignore the buffer size 10) = 2.
+        If the image is not batched then it will look like [32, 32 ,3] and the 
offset in
+        this case is 1 (start the index at 1).
+        """
+        array_upper_query = ", ".join("array_upper({0}, {1}) AS n_{2}".format(
+            independent_varname, i+offset, i) for i in range(len(input_shape)))
+        query = """
+            SELECT {0}
+            FROM {1}
+            LIMIT 1
+        """.format(array_upper_query, table)
+        # This query will fail if an image in independent var does not have the
+        # same number of dimensions as the input_shape.
+        result = plpy.execute(query)[0]
+        _assert(len(result) == len(input_shape),
+            "model_keras error: The number of dimensions ({0}) of each image"
+            " in model architecture and {1} in {2} ({3}) do not match.".format(
+                len(input_shape), independent_varname, table, len(result)))
+        for i in range(len(input_shape)):
+            key_name = "n_{0}".format(i)
+            if result[key_name] != input_shape[i]:
+                # Construct the shape in independent varname to display
+                # meaningful error msg.
+                input_shape_from_table = [result["n_{0}".format(i)]
+                    for i in range(len(input_shape))]
+                plpy.error("model_keras error: Input shape {0} in the model"
+                    " architecture does not match the input shape {1} of 
column"
+                    " {2} in table {3}.".format(
+                        input_shape, input_shape_from_table,
+                        independent_varname, table))
+
+    @staticmethod
+    def validate_model_arch_table(module_name, model_arch_table, 
model_arch_id):
+        input_tbl_valid(model_arch_table, module_name)
+        _assert(model_arch_id is not None,
+            "{0}: Invalid model architecture ID.".format(module_name))
+
+
+    @staticmethod
+    def validate_normalizing_const(module_name, normalizing_const):
+        _assert(normalizing_const > 0,
+                "{0} error: Normalizing constant has to be greater than 0.".
+                format(module_name))
+
+    @staticmethod
+    def validate_class_values(module_name, class_values, pred_type, 
model_arch):
+        if not class_values:
+            return
+        num_classes = len(class_values)
+        _assert(num_classes == get_num_classes(model_arch),
+                "{0}: The number of class values do not match the " \
+                "provided architecture.".format(module_name))
+        if pred_type == 'prob' and num_classes+1 >= 1600:
+            plpy.error({"{0}: The output will have {1} columns, exceeding the 
"\
+                " max number of columns that can be created (1600)".format(
+                    module_name, num_classes+1)})
+
+    @staticmethod
+    def validate_model_weights(module_name, model_arch, model_weights):
+        _assert(model_weights and model_arch,
+                "{0}: Model weights and architecture must be valid.".format(
+                    module_name))
+
+    @staticmethod
+    def _validate_model_data_tbl(module_name, model_table):
+        input_tbl_valid(model_table, module_name)
+        _assert(is_var_valid(model_table, MODEL_DATA_COLNAME),
                 "{module_name} error: column '{model_data}' "
                 "does not exist in model table '{table}'.".format(
-                    module_name=self.module_name,
+                    module_name=module_name,
                     model_data=MODEL_DATA_COLNAME,
-                    table=self.model_table))
-        _assert(is_var_valid(self.model_table, ModelArchSchema.MODEL_ARCH),
+                    table=model_table))
+        _assert(is_var_valid(model_table, ModelArchSchema.MODEL_ARCH),
                 "{module_name} error: column '{model_arch}' "
                 "does not exist in model table '{table}'.".format(
-                    module_name=self.module_name,
+                    module_name=module_name,
                     model_arch=ModelArchSchema.MODEL_ARCH,
-                    table=self.model_table))
+                    table=model_table))
 
-    def _validate_test_tbl_cols(self):
-        _assert(is_var_valid(self.test_table, self.independent_varname),
+    @staticmethod
+    def _validate_test_tbl(module_name, test_table, independent_varname):
+        input_tbl_valid(test_table, module_name)
+        _assert(is_var_valid(test_table, independent_varname),
                 "{module_name} error: invalid independent_varname "
                 "('{independent_varname}') for test table "
                 "({table}).".format(
-                    module_name=self.module_name,
-                    independent_varname=self.independent_varname,
-                    table=self.test_table))
+                    module_name=module_name,
+                    independent_varname=independent_varname,
+                    table=test_table))
 
-    def _validate_model_summary_tbl_cols(self):
+    @staticmethod
+    def _validate_model_summary_tbl(module_name, model_summary_table):
+        input_tbl_valid(model_summary_table, module_name)
         cols_to_check_for = [CLASS_VALUES_COLNAME,
                              DEPENDENT_VARNAME_COLNAME,
                              DEPENDENT_VARTYPE_COLNAME,
                              MODEL_ARCH_ID_COLNAME,
                              MODEL_ARCH_TABLE_COLNAME,
-                             NORMALIZING_CONST_COLNAME]
-        _assert(columns_exist_in_table(
-            self.model_summary_table, cols_to_check_for),
-            "{0} error: One or more expected columns missing in model "
-            "summary table ('{1}'). The expected columns are {2}.".format(
-                self.module_name, self.model_summary_table, cols_to_check_for))
-
-class EvaluateInputValidator(InputValidator):
-    def __init__(self, test_table, model_table, output_table, module_name):
-        self.test_summary_table = None
-        if test_table:
-            self.test_summary_table = add_postfix(test_table, "_summary")
-
-        self.independent_varname = MINIBATCH_OUTPUT_INDEPENDENT_COLNAME_DL
-        InputValidator.__init__(self, test_table, model_table,
-                                self.independent_varname,
-                                output_table, module_name)
-
-    def _validate_input_args(self):
-        input_tbl_valid(self.test_summary_table, self.module_name,
-                        error_suffix_str="Please ensure that the test table 
({0}) "
-                                         "has been preprocessed by "
-                                         "the image 
preprocessor.".format(self.test_table))
-        self._validate_test_summary_tbl_cols()
-        InputValidator._validate_input_args(self)
-        validate_dependent_var_for_minibatch(self.test_table,
-                                             
MINIBATCH_OUTPUT_DEPENDENT_COLNAME_DL)
-
-    def _validate_model_summary_tbl_cols(self):
-        cols_to_check_for = [COMPILE_PARAMS_COLNAME, METRIC_TYPE_COLNAME]
+                             NORMALIZING_CONST_COLNAME,
+                             COMPILE_PARAMS_COLNAME,
+                             METRIC_TYPE_COLNAME]
         _assert(columns_exist_in_table(
-            self.model_summary_table, cols_to_check_for),
+            model_summary_table, cols_to_check_for),
             "{0} error: One or more expected columns missing in model "
             "summary table ('{1}'). The expected columns are {2}.".format(
-                self.module_name, self.model_summary_table, cols_to_check_for))
+                module_name, model_summary_table, cols_to_check_for))
 
-    def _validate_test_summary_tbl_cols(self):
-        cols_in_tbl_valid(self.test_summary_table, [CLASS_VALUES_COLNAME,
-            NORMALIZING_CONST_COLNAME, DEPENDENT_VARTYPE_COLNAME,
-            DEPENDENT_VARNAME_COLNAME, INDEPENDENT_VARNAME_COLNAME], 
self.module_name)
-
-    def validate_input_shape(self, input_shape_from_arch):
-        _validate_input_shapes(self.test_table, self.independent_varname,
-                               input_shape_from_arch, 2)
-
-class PredictInputValidator(InputValidator):
-    def __init__(self, test_table, model_table, id_col, independent_varname,
-                 output_table, pred_type, module_name):
-        self.id_col = id_col
-        self.pred_type = pred_type
-        InputValidator.__init__(self, test_table, model_table, 
independent_varname,
-                               output_table, module_name)
-
-    def validate_pred_type(self, class_values):
-        if not self.pred_type in ['prob', 'response']:
-            plpy.error("{0}: Invalid value for pred_type param ({1}). Must be 
"\
-                "either response or prob.".format(self.module_name, 
self.pred_type))
-        if self.pred_type == 'prob' and class_values and len(class_values)+1 
>= 1600:
-            plpy.error({"{0}: The output will have {1} columns, exceeding the 
"\
-                " max number of columns that can be created (1600)".format(
-                    self.module_name, len(class_values)+1)})
 
-    def validate_input_shape(self, input_shape_from_arch):
-        _validate_input_shapes(self.test_table, self.independent_varname,
-                               input_shape_from_arch, 1)
 
-    def _validate_test_tbl_cols(self):
-        InputValidator._validate_test_tbl_cols(self)
-        _assert(is_var_valid(self.test_table, self.id_col),
-                "{module_name} error: invalid id column "
-                "('{id_col}') for test table ({table}).".format(
-                    module_name=self.module_name,
-                    id_col=self.id_col,
-                    table=self.test_table))
 
 class FitInputValidator:
     def __init__(self, source_table, validation_table, output_model_table,
-                 model_arch_table, dependent_varname, independent_varname,
-                 num_iterations, metrics_compute_frequency, warm_start):
+                 model_arch_table, model_arch_id, dependent_varname,
+                 independent_varname, num_iterations,
+                 metrics_compute_frequency, warm_start):
         self.source_table = source_table
         self.validation_table = validation_table
         self.output_model_table = output_model_table
         self.model_arch_table = model_arch_table
+        self.model_arch_id = model_arch_id
         self.dependent_varname = dependent_varname
         self.independent_varname = independent_varname
         self.metrics_compute_frequency = metrics_compute_frequency
@@ -236,30 +232,6 @@ class FitInputValidator:
         self.module_name = 'madlib_keras_fit'
         self._validate_input_args()
 
-    def _validate_input_table(self, table):
-        _assert(is_var_valid(table, self.independent_varname),
-                "{module_name}: invalid independent_varname "
-                "('{independent_varname}') for table ({table}). "
-                "Please ensure that the input table ({table}) "
-                "has been preprocessed by the image preprocessor.".format(
-                    module_name=self.module_name,
-                    independent_varname=self.independent_varname,
-                    table=table))
-
-        _assert(is_var_valid(table, self.dependent_varname),
-                "{module_name}: invalid dependent_varname "
-                "('{dependent_varname}') for table ({table}). "
-                "Please ensure that the input table ({table}) "
-                "has been preprocessed by the image preprocessor.".format(
-                    module_name=self.module_name,
-                    dependent_varname=self.dependent_varname,
-                    table=table))
-
-    def _is_valid_metrics_compute_frequency(self):
-        return self.metrics_compute_frequency is None or \
-               (self.metrics_compute_frequency >= 1 and \
-               self.metrics_compute_frequency <= self.num_iterations)
-
     def _validate_input_args(self):
         _assert(self.num_iterations > 0,
             "{0}: Number of iterations cannot be < 
1.".format(self.module_name))
@@ -281,8 +253,8 @@ class FitInputValidator:
                                              self.dependent_varname)
 
         self._validate_validation_table()
-
-        input_tbl_valid(self.model_arch_table, self.module_name)
+        InputValidator.validate_model_arch_table(self.module_name, 
self.model_arch_table,
+            self.model_arch_id)
         if self.warm_start:
             input_tbl_valid(self.output_model_table, self.module_name)
             input_tbl_valid(self.output_summary_model_table, self.module_name)
@@ -290,6 +262,31 @@ class FitInputValidator:
             output_tbl_valid(self.output_model_table, self.module_name)
             output_tbl_valid(self.output_summary_model_table, self.module_name)
 
+    def _validate_input_table(self, table):
+        _assert(is_var_valid(table, self.independent_varname),
+                "{module_name}: invalid independent_varname "
+                "('{independent_varname}') for table ({table}). "
+                "Please ensure that the input table ({table}) "
+                "has been preprocessed by the image preprocessor.".format(
+                    module_name=self.module_name,
+                    independent_varname=self.independent_varname,
+                    table=table))
+
+        _assert(is_var_valid(table, self.dependent_varname),
+                "{module_name}: invalid dependent_varname "
+                "('{dependent_varname}') for table ({table}). "
+                "Please ensure that the input table ({table}) "
+                "has been preprocessed by the image preprocessor.".format(
+                    module_name=self.module_name,
+                    dependent_varname=self.dependent_varname,
+                    table=table))
+
+    def _is_valid_metrics_compute_frequency(self):
+        return self.metrics_compute_frequency is None or \
+               (self.metrics_compute_frequency >= 1 and \
+               self.metrics_compute_frequency <= self.num_iterations)
+
+
 
     def _validate_validation_table(self):
         if self.validation_table and self.validation_table.strip() != '':
@@ -305,9 +302,10 @@ class FitInputValidator:
 
 
     def validate_input_shapes(self, input_shape):
-        _validate_input_shapes(self.source_table, self.independent_varname,
+        InputValidator.validate_input_shape(self.source_table, 
self.independent_varname,
                                input_shape, 2)
         if self.validation_table:
-            _validate_input_shapes(
+            InputValidator.validate_input_shape(
                 self.validation_table, self.independent_varname,
                 input_shape, 2)
+
diff --git a/src/ports/postgres/modules/utilities/model_arch_info.py_in 
b/src/ports/postgres/modules/deep_learning/model_arch_info.py_in
similarity index 66%
rename from src/ports/postgres/modules/utilities/model_arch_info.py_in
rename to src/ports/postgres/modules/deep_learning/model_arch_info.py_in
index a03594a..c749144 100644
--- a/src/ports/postgres/modules/utilities/model_arch_info.py_in
+++ b/src/ports/postgres/modules/deep_learning/model_arch_info.py_in
@@ -22,6 +22,7 @@ m4_changequote(`<!', `!>')
 import sys
 import json
 import plpy
+from keras_model_arch_table import ModelArchSchema
 
 def _get_layers(model_arch):
     d = json.loads(model_arch)
@@ -41,6 +42,22 @@ def get_input_shape(model_arch):
     plpy.error('Unable to get input shape from model architecture.')
 
 def get_num_classes(model_arch):
+    """
+     We assume that the last dense layer in the model architecture contains 
the num_classes (units)
+     An example can be:
+     ```
+     ...
+     model.add(Flatten())
+     model.add(Dense(512))
+     model.add(Activation('relu'))
+     model.add(Dropout(0.5))
+     model.add(Dense(num_classes))
+     model.add(Activation('softmax'))
+     ```
+     where activation can be after the dense layer.
+    :param model_arch:
+    :return:
+    """
     arch_layers = _get_layers(model_arch)
     i = len(arch_layers) - 1
     while i >= 0:
@@ -66,3 +83,22 @@ def get_model_arch_layers_str(model_arch):
         else:
             layers += "{1}\n".format(class_name)
     return layers
+
+def get_model_arch_weights(model_arch_table, model_arch_id):
+
+    #assume validation is already called
+    model_arch_query = "SELECT {0}, {1} FROM {2} WHERE {3} = {4}".format(
+        ModelArchSchema.MODEL_ARCH, ModelArchSchema.MODEL_WEIGHTS,
+        model_arch_table, ModelArchSchema.MODEL_ID,
+        model_arch_id)
+    model_arch_result = plpy.execute(model_arch_query)
+    if not model_arch_result:
+        plpy.error("no model arch found in table {0} with id {1}".format(
+            model_arch_table, model_arch_id))
+
+    model_arch_result = model_arch_result[0]
+
+    model_arch = model_arch_result[ModelArchSchema.MODEL_ARCH]
+    model_weights = model_arch_result[ModelArchSchema.MODEL_WEIGHTS]
+
+    return model_arch, model_weights
diff --git a/src/ports/postgres/modules/deep_learning/test/madlib_keras.sql_in 
b/src/ports/postgres/modules/deep_learning/test/madlib_keras.sql_in
index dacf236..28a500e 100644
--- a/src/ports/postgres/modules/deep_learning/test/madlib_keras.sql_in
+++ b/src/ports/postgres/modules/deep_learning/test/madlib_keras.sql_in
@@ -383,21 +383,20 @@ SELECT madlib_keras_predict(
     0);
 
 -- Validate that prediction output table exists and has correct schema
-SELECT assert(UPPER(atttypid::regtype::TEXT) = 'INTEGER', 'id column should be 
INTEGER type')
-    FROM pg_attribute WHERE attrelid = 'cifar10_predict'::regclass
-        AND attname = 'id';
+SELECT assert(UPPER(pg_typeof(id)::TEXT )= 'INTEGER',
+    'id column should be INTEGER type') FROM cifar10_predict;
 
-SELECT assert(UPPER(atttypid::regtype::TEXT) =
+SELECT assert(UPPER(pg_typeof(estimated_y)::TEXT) =
     'SMALLINT', 'prediction column should be SMALLINT type')
-    FROM pg_attribute WHERE attrelid = 'cifar10_predict'::regclass
-        AND attname = 'estimated_y';
+FROM cifar10_predict;
 
 -- Validate correct number of rows returned.
-SELECT assert(COUNT(*)=2, 'Output table of madlib_keras_predict should have 
two rows') FROM cifar10_predict;
+SELECT assert(COUNT(*)=2, 'Output table of madlib_keras_predict should have 
two rows')
+FROM cifar10_predict;
 
 -- First test that all values are in set of class values; if this breaks, it's 
definitely a problem.
 SELECT assert(estimated_y IN (0,1),
-              'Predicted value not in set of defined class values for model')
+    'Predicted value not in set of defined class values for model')
 FROM cifar10_predict;
 
 DROP TABLE IF EXISTS cifar10_predict;
@@ -512,15 +511,13 @@ SELECT madlib_keras_predict(
     'prob',
     0);
 
-SELECT assert(UPPER(atttypid::regtype::TEXT) =
+SELECT assert(UPPER(pg_typeof(prob_0)::TEXT) =
     'DOUBLE PRECISION', 'column prob_0 should be double precision type')
-    FROM pg_attribute WHERE attrelid = 'cifar10_predict'::regclass
-        AND attname = 'prob_0';
+FROM cifar10_predict;
 
-SELECT assert(UPPER(atttypid::regtype::TEXT) =
+SELECT assert(UPPER(pg_typeof(prob_1)::TEXT) =
     'DOUBLE PRECISION', 'column prob_1 should be double precision type')
-    FROM pg_attribute WHERE attrelid = 'cifar10_predict'::regclass
-        AND attname = 'prob_1';
+FROM cifar10_predict;
 
 SELECT assert(COUNT(*)=3, 'Predict out table must have exactly three cols.')
 FROM pg_attribute
@@ -616,20 +613,17 @@ SELECT madlib_keras_predict(
 -- Validate the output datatype of newly created prediction columns
 -- for prediction type = 'prob' and class_values 'TEXT' with NULL as a valid
 -- class_values
-SELECT assert(UPPER(atttypid::regtype::TEXT) =
+SELECT assert(UPPER(pg_typeof(prob_cat)::TEXT) =
     'DOUBLE PRECISION', 'column prob_cat should be double precision type')
-FROM pg_attribute
-WHERE attrelid = 'cifar10_predict'::regclass AND attname = 'prob_cat';
+FROM cifar10_predict;
 
-SELECT assert(UPPER(atttypid::regtype::TEXT) =
+SELECT assert(UPPER(pg_typeof(prob_dog)::TEXT) =
     'DOUBLE PRECISION', 'column prob_dog should be double precision type')
-FROM pg_attribute
-WHERE attrelid = 'cifar10_predict'::regclass AND attname = 'prob_dog';
+FROM cifar10_predict;
 
-SELECT assert(UPPER(atttypid::regtype::TEXT) =
+SELECT assert(UPPER(pg_typeof("prob_NULL")::TEXT) =
     'DOUBLE PRECISION', 'column prob_NULL should be double precision type')
-FROM pg_attribute
-WHERE attrelid = 'cifar10_predict'::regclass AND attname = 'prob_NULL';
+FROM cifar10_predict;
 
 -- Must have exactly 4 cols (3 for class_values and 1 for id)
 SELECT assert(COUNT(*)=4, 'Predict out table must have exactly four cols.')
@@ -650,11 +644,10 @@ SELECT madlib_keras_predict(
 -- Validate the output datatype of newly created prediction columns
 -- for prediction type = 'response' and class_values 'TEXT' with NULL
 -- as a valid class_values
-SELECT assert(UPPER(atttypid::regtype::TEXT) =
-    'TEXT', 'prediction column should be TEXT type')
-FROM pg_attribute
-WHERE attrelid = 'cifar10_predict'::regclass
-      AND attname = 'estimated_y';
+SELECT assert(UPPER(pg_typeof(estimated_y_text)::TEXT) = 'TEXT',
+       'prediction column should be TEXT type')
+FROM  cifar10_predict LIMIT 1;
+
 
 -- Tests where the assumption is user has one-hot encoded, so class_values
 -- in input summary table will be NULL.
@@ -674,10 +667,9 @@ SELECT madlib_keras_predict(
 -- Validate the output datatype of newly created prediction column
 -- for prediction type = 'response' and class_value = NULL
 -- Returns: Array of probabilities for user's one-hot encoded data
-SELECT assert(UPPER(atttypid::regtype::TEXT) =
-    'DOUBLE PRECISION[]', 'column prob should be double precision[] type')
-FROM pg_attribute
-WHERE attrelid = 'cifar10_predict'::regclass AND attname = 'prob';
+SELECT assert(UPPER(pg_typeof(prob)::TEXT) = 'DOUBLE PRECISION[]',
+       'column prob should be double precision[] type')
+FROM  cifar10_predict LIMIT 1;
 
 -- Predict with pred_type=response
 DROP TABLE IF EXISTS cifar10_predict;
@@ -694,11 +686,14 @@ SELECT madlib_keras_predict(
 -- for prediction type = 'response' and class_value = NULL
 -- Returns: Index of class value in user's one-hot encoded data with
 -- highest probability
-SELECT assert(UPPER(atttypid::regtype::TEXT) =
-    'DOUBLE PRECISION', 'prediction column should be double precision type')
-FROM pg_attribute
-WHERE attrelid = 'cifar10_predict'::regclass
-      AND attname = 'estimated_y';
+SELECT assert(UPPER(pg_typeof(estimated_y_text)::TEXT) = 'TEXT',
+       'column estimated_y_text should be text type')
+FROM  cifar10_predict LIMIT 1;
+
+SELECT assert(
+  estimated_y_text IN ('0', '1'),
+  'Predict failure for null class value and response pred_type.')
+FROM cifar10_predict;
 
 -- Test predict with INTEGER class_values
 -- with NULL as a valid class value
@@ -747,13 +742,11 @@ SELECT madlib_keras_predict(
 -- Validate the output datatype of newly created prediction column
 -- for prediction type = 'prob' and class_values 'INT' with NULL
 -- as a valid class_values
-SELECT assert(UPPER(atttypid::regtype::TEXT) =
+SELECT assert(UPPER(pg_typeof("prob_NULL")::TEXT) =
     'DOUBLE PRECISION', 'column prob_NULL should be double precision type')
-FROM pg_attribute
-WHERE attrelid = 'cifar10_predict'::regclass AND attname = 'prob_NULL';
-
+FROM cifar10_predict;
 -- Must have exactly 6 cols (5 for class_values and 1 for id)
-SELECT assert(COUNT(*)=6, 'Predict out table must have exactly four cols.')
+SELECT assert(COUNT(*)=6, 'Predict out table must have exactly six cols.')
 FROM pg_attribute
 WHERE attrelid='cifar10_predict'::regclass AND attnum>0;
 
@@ -772,10 +765,9 @@ SELECT madlib_keras_predict(
 -- for prediction type = 'response' and class_values 'TEXT' with NULL
 -- as a valid class_values
 -- Returns: class_value with highest probability
-SELECT assert(UPPER(atttypid::regtype::TEXT) =
+SELECT assert(UPPER(pg_typeof(estimated_y)::TEXT) =
     'SMALLINT', 'prediction column should be smallint type')
-FROM pg_attribute
-WHERE attrelid = 'cifar10_predict'::regclass AND attname = 'estimated_y';
+FROM cifar10_predict;
 
 -- Test case with a different input shape (3, 32, 32) instead of (32, 32, 3).
 -- Create a new table with image shape 3, 32, 32
@@ -1066,6 +1058,27 @@ SELECT madlib_keras_fit('iris_data_packed',   -- source 
table
                          1 -- metrics_compute_frequency
                         );
 
+DROP TABLE IF EXISTS iris_train, iris_test;
+-- Set seed so results are reproducible
+SELECT setseed(0);
+SELECT train_test_split('iris_data',     -- Source table
+                        'iris',          -- Output table root name
+                        0.8,            -- Train proportion
+                        NULL,           -- Test proportion (0.2)
+                        NULL,           -- Strata definition
+                        NULL,           -- Output all columns
+                        NULL,           -- Sample without replacement
+                        TRUE            -- Separate output tables
+                        );
+
+DROP TABLE IF EXISTS iris_predict;
+SELECT madlib_keras_predict('iris_model', -- model
+                            'iris_test',  -- test_table
+                            'id',  -- id column
+                            'attributes', -- independent var
+                            'iris_predict'  -- output table
+                            );
+
 -- Test that our code is indeed learning something and not broken. The loss
 -- from the first iteration should be less than the 5th, while the accuracy
 -- must be greater.
@@ -1179,3 +1192,96 @@ SELECT assert(
   abs(first.training_metrics_final-second.training_metrics[2]) < 1e-10,
   'Transfer learning test failed because training loss and metrics don''t 
match the expected value.')
 FROM iris_model_first_run AS first, iris_model_transfer_summary AS second;
+
+---------------------- Predict BYOM test --------------------------------
+
+-- class_values not NULL, pred_type is response
+DROP TABLE IF EXISTS iris_predict_byom;
+SELECT madlib_keras_predict_byom(
+                                 'iris_model_arch',
+                                 2,
+                                 'iris_test',
+                                 'id',
+                                 'attributes',
+                                 'iris_predict_byom',
+                                 'response',
+                                 -1,
+                                 ARRAY['Iris-setosa', 'Iris-versicolor',
+                                  'Iris-virginica']
+                                 );
+
+SELECT assert(
+  p0.estimated_class_text = p1.estimated_dependent_var,
+  'Predict byom failure for non null class value and response pred_type.')
+FROM iris_predict AS p0,  iris_predict_byom AS p1
+WHERE p0.id=p1.id;
+SELECT assert(UPPER(pg_typeof(estimated_dependent_var)::TEXT) = 'TEXT',
+       'Predict byom failure for non null class value and response pred_type.
+        Expeceted estimated_dependent_var to be of type TEXT')
+FROM  iris_predict_byom LIMIT 1;
+
+-- class_values NULL, pred_type is NULL (response)
+DROP TABLE IF EXISTS iris_predict_byom;
+SELECT madlib_keras_predict_byom(
+                                 'iris_model_arch',
+                                 2,
+                                 'iris_test',
+                                 'id',
+                                 'attributes',
+                                 'iris_predict_byom'
+                                 );
+SELECT assert(
+  p1.estimated_dependent_var IN ('0', '1', '2'),
+  'Predict byom failure for null class value and null pred_type.')
+FROM iris_predict_byom AS p1;
+SELECT assert(UPPER(pg_typeof(estimated_dependent_var)::TEXT) = 'TEXT',
+       'Predict byom failure for non null class value and response pred_type.
+        Expeceted estimated_dependent_var to be of type TEXT')
+FROM  iris_predict_byom LIMIT 1;
+
+-- class_values not NULL, pred_type is prob
+DROP TABLE IF EXISTS iris_predict_byom;
+SELECT madlib_keras_predict_byom(
+                                 'iris_model_arch',
+                                 2,
+                                 'iris_test',
+                                 'id',
+                                 'attributes',
+                                 'iris_predict_byom',
+                                 'prob',
+                                 -1,
+                                 ARRAY['Iris-setosa', 'Iris-versicolor',
+                                  'Iris-virginica'],
+                                 1.0
+                                 );
+
+SELECT assert(
+  (p1."prob_Iris-setosa" + p1."prob_Iris-virginica" + 
p1."prob_Iris-versicolor") - 1 < 1e-6,
+    'Predict byom failure for non null class value and prob pred_type.')
+FROM iris_predict_byom AS p1;
+SELECT assert(UPPER(pg_typeof("prob_Iris-setosa")::TEXT) = 'DOUBLE PRECISION',
+       'Predict byom failure for non null class value and prob pred_type.
+       Expeceted "prob_Iris-setosa" to be of type DOUBLE PRECISION')
+FROM  iris_predict_byom LIMIT 1;
+
+-- class_values NULL, pred_type is prob
+DROP TABLE IF EXISTS iris_predict_byom;
+SELECT madlib_keras_predict_byom(
+                                 'iris_model_arch',
+                                 2,
+                                 'iris_test',
+                                 'id',
+                                 'attributes',
+                                 'iris_predict_byom',
+                                 'prob',
+                                 0,
+                                 NULL
+                                 );
+SELECT assert(
+  (prob[1] + prob[2] + prob[3]) - 1 < 1e-6,
+    'Predict byom failure for null class value and prob pred_type.')
+FROM iris_predict_byom;
+SELECT assert(UPPER(pg_typeof(prob)::TEXT) = 'DOUBLE PRECISION[]',
+       'Predict byom failure for null class value and prob pred_type. 
Expeceted prob to
+       be of type DOUBLE PRECISION[]')
+FROM  iris_predict_byom LIMIT 1;
diff --git 
a/src/ports/postgres/modules/deep_learning/test/unit_tests/test_madlib_keras.py_in
 
b/src/ports/postgres/modules/deep_learning/test/unit_tests/test_madlib_keras.py_in
index 2a1c39e..9cce86a 100644
--- 
a/src/ports/postgres/modules/deep_learning/test/unit_tests/test_madlib_keras.py_in
+++ 
b/src/ports/postgres/modules/deep_learning/test/unit_tests/test_madlib_keras.py_in
@@ -301,7 +301,7 @@ class MadlibKerasFitTestCase(unittest.TestCase):
         self.assertEqual(True, res)
 
 
-class MadlibKerasPredictTestCase(unittest.TestCase):
+class InternalKerasPredictTestCase(unittest.TestCase):
     def setUp(self):
         self.plpy_mock = Mock(spec='error')
         patches = {
@@ -406,6 +406,90 @@ class MadlibKerasPredictTestCase(unittest.TestCase):
         self.assertEqual(False, 'row_count' in k['SD'])
         self.assertEqual(False, 'segment_model_predict' in k['SD'])
 
+
+class MadlibKerasPredictBYOMTestCase(unittest.TestCase):
+    def setUp(self):
+        self.plpy_mock = Mock(spec='error')
+        patches = {
+            'plpy': plpy
+        }
+
+        self.plpy_mock_execute = MagicMock()
+        plpy.execute = self.plpy_mock_execute
+
+        self.module_patcher = patch.dict('sys.modules', patches)
+        self.module_patcher.start()
+        self.num_classes = 5
+        self.model = Sequential()
+        self.model.add(Conv2D(2, kernel_size=(1, 1), activation='relu',
+                              input_shape=(1,1,1,), padding='same'))
+        self.model.add(Dense(self.num_classes))
+
+        self.pred_type = 'prob'
+        self.gpus_per_host = 2
+        self.class_values = ['foo', 'bar', 'baaz', 'foo2', 'bar2']
+        self.normalizing_const = 255.0
+
+        import madlib_keras_predict
+        self.module = madlib_keras_predict
+        self.module.get_model_arch_weights = Mock(return_value=(
+            self.model.to_json(), 'weights'))
+        self.module.InputValidator.validate_predict_byom_tables = Mock()
+        self.module.InputValidator.validate_input_shape = Mock()
+        self.module.BasePredict.call_internal_keras = Mock()
+
+    def tearDown(self):
+        self.module_patcher.stop()
+
+    def test_predictbyom_defaults_1(self):
+        res = self.module.PredictBYOM('schema_madlib', 'model_arch_table',
+                                 'model_arch_id', 'test_table', 'id_col',
+                                 'independent_varname', 'output_table', None,
+                                 None, None, None)
+        self.assertEqual('response', res.pred_type)
+        self.assertEqual(0, res.gpus_per_host)
+        self.assertEqual([0,1,2,3,4], res.class_values)
+        self.assertEqual(1.0, res.normalizing_const)
+        self.assertEqual('text', res.dependent_vartype)
+
+    def test_predictbyom_defaults_2(self):
+        res = self.module.PredictBYOM('schema_madlib', 'model_arch_table',
+                                       'model_arch_id', 'test_table', 'id_col',
+                                       'independent_varname', 'output_table',
+                                       self.pred_type, self.gpus_per_host,
+                                       self.class_values, 
self.normalizing_const)
+        self.assertEqual('prob', res.pred_type)
+        self.assertEqual(2, res.gpus_per_host)
+        self.assertEqual(['foo', 'bar', 'baaz', 'foo2', 'bar2'], 
res.class_values)
+        self.assertEqual(255.0, res.normalizing_const)
+        self.assertEqual('double precision', res.dependent_vartype)
+
+    def test_predictbyom_exception_invalid_params(self):
+        with self.assertRaises(plpy.PLPYException) as error:
+            self.module.PredictBYOM('schema_madlib', 'model_arch_table',
+                                     'model_arch_id', 'test_table', 'id_col',
+                                     'independent_varname', 'output_table',
+                                     'invalid_pred_type', self.gpus_per_host,
+                                     self.class_values, self.normalizing_const)
+        self.assertIn('invalid_pred_type', str(error.exception))
+
+        with self.assertRaises(plpy.PLPYException) as error:
+            self.module.PredictBYOM('schema_madlib', 'model_arch_table',
+                                     'model_arch_id', 'test_table', 'id_col',
+                                     'independent_varname', 'output_table',
+                                     self.pred_type, self.gpus_per_host,
+                                     ["foo", "bar", "baaz"], 
self.normalizing_const)
+        self.assertIn('class values', str(error.exception).lower())
+
+        with self.assertRaises(plpy.PLPYException) as error:
+            self.module.PredictBYOM('schema_madlib', 'model_arch_table',
+                                     'model_arch_id', 'test_table', 'id_col',
+                                     'independent_varname', 'output_table',
+                                     self.pred_type, self.gpus_per_host,
+                                     self.class_values, 0)
+        self.assertIn('normalizing const', str(error.exception).lower())
+
+
 class MadlibKerasWrapperTestCase(unittest.TestCase):
     def setUp(self):
         self.plpy_mock = Mock(spec='error')
@@ -748,58 +832,37 @@ class 
MadlibKerasFitInputValidatorTestCase(unittest.TestCase):
     def tearDown(self):
         self.module_patcher.stop()
 
-    def test_validate_input_shapes_shapes_do_not_match(self):
-        self.plpy_mock_execute.return_value = [{'n_0': 32, 'n_1': 32}]
-        self.subject._validate_input_args = Mock()
-        with self.assertRaises(plpy.PLPYException):
-            self.subject._validate_input_shapes(
-                'dummy_tbl', 'dummy_col', [32,32,3], 2)
-
-        self.plpy_mock_execute.return_value = [{'n_0': 3, 'n_1': 32, 'n_2': 
32}]
-        with self.assertRaises(plpy.PLPYException):
-            self.subject._validate_input_shapes(
-                'dummy_tbl', 'dummy_col', [32,32,3], 2)
-
-        self.plpy_mock_execute.return_value = [{'n_0': 3, 'n_1': None, 'n_2': 
None}]
-        with self.assertRaises(plpy.PLPYException):
-            self.subject._validate_input_shapes(
-                'dummy_tbl', 'dummy_col', [3,32], 2)
-
-    def test_validate_input_shapes_shapes_match(self):
-        self.plpy_mock_execute.return_value = [{'n_0': 32, 'n_1': 32, 'n_2': 
3}]
-        self.subject._validate_input_args = Mock()
-        self.subject._validate_input_shapes(
-            'dummy_tbl', 'dummy_col', [32,32,3], 1)
 
     def test_is_valid_metrics_compute_frequency_True_None(self):
         self.subject.FitInputValidator._validate_input_args = Mock()
         obj = self.subject.FitInputValidator(
-            'test_table', 'val_table', 'model_table', 'model_arch_table',
+            'test_table', 'val_table', 'model_table', 'model_arch_table', 2,
             'dep_varname', 'independent_varname', 5, None, False)
         self.assertEqual(True, obj._is_valid_metrics_compute_frequency())
 
     def test_is_valid_metrics_compute_frequency_True_num(self):
         self.subject.FitInputValidator._validate_input_args = Mock()
         obj = self.subject.FitInputValidator(
-            'test_table', 'val_table', 'model_table', 'model_arch_table',
+            'test_table', 'val_table', 'model_table', 'model_arch_table', 2,
             'dep_varname', 'independent_varname', 5, 3, False)
         self.assertEqual(True, obj._is_valid_metrics_compute_frequency())
 
     def test_is_valid_metrics_compute_frequency_False_zero(self):
         self.subject.FitInputValidator._validate_input_args = Mock()
         obj = self.subject.FitInputValidator(
-            'test_table', 'val_table', 'model_table', 'model_arch_table',
+            'test_table', 'val_table', 'model_table', 'model_arch_table', 2,
             'dep_varname', 'independent_varname', 5, 0, False)
         self.assertEqual(False, obj._is_valid_metrics_compute_frequency())
 
     def test_is_valid_metrics_compute_frequency_False_greater(self):
         self.subject.FitInputValidator._validate_input_args = Mock()
         obj = self.subject.FitInputValidator(
-            'test_table', 'val_table', 'model_table', 'model_arch_table',
+            'test_table', 'val_table', 'model_table', 'model_arch_table', 2,
             'dep_varname', 'independent_varname', 5, 6, False)
         self.assertEqual(False, obj._is_valid_metrics_compute_frequency())
 
-class PredictInputValidatorTestCases(unittest.TestCase):
+
+class InputValidatorTestCase(unittest.TestCase):
     def setUp(self):
         self.plpy_mock = Mock(spec='error')
         patches = {
@@ -813,34 +876,83 @@ class PredictInputValidatorTestCases(unittest.TestCase):
         self.module_patcher.start()
         import madlib_keras_validator
         self.module = madlib_keras_validator
-        self.module.PredictInputValidator._validate_input_args = Mock()
-        self.subject = self.module.PredictInputValidator(
-            'test_table', 'model_table', 'id_col', 'independent_varname',
-            'output_table', 'pred_type', 'module_name')
+        self.subject = self.module.InputValidator
+
+        self.module_name = 'module'
+        self.test_table = 'test_table'
+        self.model_table = 'model_table'
+        self.id_col = 'id_col'
+        self.ind_var = 'ind_var'
+        self.model_arch_table = 'model_arch_table'
+        self.model_arch_id = 2
+        self.num_classes = 1598
+        self.model = Sequential()
+        self.model.add(Conv2D(2, kernel_size=(1, 1), activation='relu',
+                              input_shape=(1,1,1,), padding='same'))
+        self.model.add(Dense(self.num_classes))
         self.classes = ['train', 'boat', 'car', 'airplane']
 
     def tearDown(self):
         self.module_patcher.stop()
 
     def test_validate_pred_type_invalid_pred_type(self):
-        self.subject.pred_type = 'invalid'
+        with self.assertRaises(plpy.PLPYException) as error:
+            self.subject.validate_pred_type(
+                self.module_name, 'invalid_pred_type', ['cat', 'dog'])
+        self.assertIn('type', str(error.exception).lower())
+
+    def test_validate_class_values_greater_than_1600_class_values(self):
+        self.model.add(Dense(1599))
+        with self.assertRaises(plpy.PLPYException) as error:
+            self.subject.validate_class_values(
+                self.module_name, range(1599), 'prob', self.model.to_json())
+        self.assertIn('1600', str(error.exception))
+
+    def test_validate_class_values_valid_class_values_prob(self):
+        self.subject.validate_class_values(
+            self.module_name, range(self.num_classes), 'prob', 
self.model.to_json())
+        self.subject.validate_class_values(
+            self.module_name, None, 'prob', self.model.to_json())
+
+    def 
test_validate_class_values_valid_pred_type_valid_class_values_response(self):
+        self.subject.validate_class_values(
+            self.module_name, range(self.num_classes), 'response', 
self.model.to_json())
+        self.subject.validate_class_values(
+            self.module_name, None, 'response', self.model.to_json())
+
+    def test_validate_input_shape_shapes_do_not_match(self):
+        self.plpy_mock_execute.return_value = [{'n_0': 32, 'n_1': 32}]
+        with self.assertRaises(plpy.PLPYException):
+            self.subject.validate_input_shape(
+                self.test_table, self.ind_var, [32,32,3], 2)
+
+        self.plpy_mock_execute.return_value = [{'n_0': 3, 'n_1': 32, 'n_2': 
32}]
         with self.assertRaises(plpy.PLPYException):
-            self.subject.validate_pred_type(['cat', 'dog'])
+            self.subject.validate_input_shape(
+                self.test_table, self.ind_var, [32,32,3], 2)
 
-    def test_validate_pred_type_valid_pred_type_invalid_num_class_values(self):
-        self.subject.pred_type = 'prob'
+        self.plpy_mock_execute.return_value = [{'n_0': 3, 'n_1': None, 'n_2': 
None}]
         with self.assertRaises(plpy.PLPYException):
-            self.subject.validate_pred_type(range(1599))
+            self.subject.validate_input_shape(
+                self.test_table, self.ind_var, [3,32], 2)
 
-    def test_validate_pred_type_valid_pred_type_valid_class_values_prob(self):
-        self.subject.pred_type = 'prob'
-        self.subject.validate_pred_type(range(1598))
-        self.subject.validate_pred_type(None)
+    def test_validate_input_shape_shapes_match(self):
+        self.plpy_mock_execute.return_value = [{'n_0': 32, 'n_1': 32, 'n_2': 
3}]
+        self.subject.validate_input_shape(
+            self.test_table, self.ind_var, [32,32,3], 1)
+
+    def test_validate_model_arch_table_none_values(self):
+        with self.assertRaises(plpy.PLPYException) as error:
+            obj = self.subject.validate_model_arch_table(
+                self.module_name, None, self.model_arch_id)
+        self.assertIn('null', str(error.exception).lower())
+
+        self.module.input_tbl_valid = Mock()
+        with self.assertRaises(plpy.PLPYException) as error:
+            obj = self.subject.validate_model_arch_table(
+                self.module_name, self.model_arch_table, None)
+        self.assertIn('id', str(error.exception).lower())
 
-    def 
test_validate_pred_type_valid_pred_type_valid_class_values_response(self):
-        self.subject.pred_type = 'response'
-        self.subject.validate_pred_type(range(1598))
-        self.subject.validate_pred_type(None)
 
 class MadlibSerializerTestCase(unittest.TestCase):
     def setUp(self):
@@ -921,6 +1033,7 @@ class MadlibSerializerTestCase(unittest.TestCase):
         self.assertEqual(np.array([0,1,3,4,5], dtype=np.float32).tostring(),
                          res)
 
+
 class MadlibKerasHelperTestCase(unittest.TestCase):
     def setUp(self):
         self.plpy_mock = Mock(spec='error')

[madlib] branch master updated: DL: Add function for predict byom

Reply via email to