This is an automated email from the ASF dual-hosted git repository. nkak pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/madlib.git
commit bf39c4c13c680a285ba6f61e5b6acb296eaf1fe8 Author: Orhan Kislal <okis...@apache.org> AuthorDate: Fri Oct 2 17:06:06 2020 +0300 DL: Remove keras dependency JIRA: MADLIB-1438 Remove keras dependency to use the keras from the tensorflow package. Also, removed the channels first test since it only works on GPUs. It fails on a the CPU for the following error: ``` ERROR: spiexceptions.ExternalRoutineException: tensorflow.python.framework.errors_impl.InvalidArgumentError: Default MaxPoolingOp only supports NHWC on device type CPU ``` In order for MaxPooling to work on a channel-first dataset with CPU's, we need a version of tensorflow that supports MKL(Intel Math Kernel Library). Since this is an edge case, we are removing the channels first tests. --- .../modules/deep_learning/gpu_info_from_tf.py_in | 7 +++--- .../modules/deep_learning/madlib_keras.py_in | 16 ++++++------ .../madlib_keras_custom_function.sql_in | 2 +- .../madlib_keras_fit_multiple_model.py_in | 3 ++- .../madlib_keras_model_selection.py_in | 5 ++-- .../deep_learning/madlib_keras_predict.py_in | 15 +++++------ .../deep_learning/madlib_keras_validator.py_in | 4 +-- .../deep_learning/madlib_keras_wrapper.py_in | 27 +++++++++++--------- .../test/keras_model_arch_table.sql_in | 4 +-- .../test/madlib_keras_cifar.setup.sql_in | 21 ---------------- .../deep_learning/test/madlib_keras_fit.sql_in | 21 +++++----------- .../deep_learning/test/madlib_keras_predict.sql_in | 29 +++------------------- .../test/unit_tests/test_madlib_keras.py_in | 13 +++------- .../test/unit_tests/test_madlib_keras_automl.py_in | 2 +- .../test_madlib_keras_model_selection_table.py_in | 2 +- 15 files changed, 61 insertions(+), 110 deletions(-) diff --git a/src/ports/postgres/modules/deep_learning/gpu_info_from_tf.py_in b/src/ports/postgres/modules/deep_learning/gpu_info_from_tf.py_in index b1e59b1..6456128 100644 --- a/src/ports/postgres/modules/deep_learning/gpu_info_from_tf.py_in +++ b/src/ports/postgres/modules/deep_learning/gpu_info_from_tf.py_in @@ -24,12 +24,13 @@ is intended to be called using subprocess. See madlib_keras_gpu_info.py_in for more details. """ +import tensorflow as tf from tensorflow.python.client import device_lib -from keras import backend as K +from tensorflow.keras import backend as K -config = K.tf.ConfigProto() +config = tf.ConfigProto() config.gpu_options.allow_growth = True -sess = K.tf.Session(config=config) +sess = tf.Session(config=config) local_device_protos = device_lib.list_local_devices() K.clear_session() sess.close() diff --git a/src/ports/postgres/modules/deep_learning/madlib_keras.py_in b/src/ports/postgres/modules/deep_learning/madlib_keras.py_in index d54deeb..ad64442 100644 --- a/src/ports/postgres/modules/deep_learning/madlib_keras.py_in +++ b/src/ports/postgres/modules/deep_learning/madlib_keras.py_in @@ -23,11 +23,6 @@ import plpy import sys import time -from keras import backend as K -from keras.layers import * -from keras.models import * -from keras.optimizers import * -from keras.regularizers import * from madlib_keras_helper import * from madlib_keras_validator import * from madlib_keras_wrapper import * @@ -45,8 +40,15 @@ from utilities.utilities import unique_string from utilities.validate_args import get_expr_type from utilities.validate_args import quote_ident from utilities.control import MinWarning + import tensorflow as tf +from tensorflow.keras import backend as K +from tensorflow.keras.layers import * +from tensorflow.keras.models import * +from tensorflow.keras.optimizers import * +from tensorflow.keras.regularizers import * + class SD_STORE: SESS = 'sess' SEGMENT_MODEL = 'segment_model' @@ -530,7 +532,6 @@ def fit_transition(state, dependent_var, independent_var, dependent_var_shape, if is_last_row: if is_final_iteration or is_multiple_model: SD_STORE.clear_SD(SD) - clear_keras_session(sess) return return_state @@ -873,10 +874,11 @@ def internal_keras_eval_transition(state, dependent_var, independent_var, agg_loss = 0 set_model_weights(segment_model, serialized_weights) + x_val = np_array_float32(independent_var, independent_var_shape) y_val = np_array_int16(dependent_var, dependent_var_shape) - with K.tf.device(device_name): + with tf.device(device_name): res = segment_model.evaluate(x_val, y_val) # if metric is None, model.evaluate will only return loss as a scalar diff --git a/src/ports/postgres/modules/deep_learning/madlib_keras_custom_function.sql_in b/src/ports/postgres/modules/deep_learning/madlib_keras_custom_function.sql_in index 440d814..979332b 100644 --- a/src/ports/postgres/modules/deep_learning/madlib_keras_custom_function.sql_in +++ b/src/ports/postgres/modules/deep_learning/madlib_keras_custom_function.sql_in @@ -438,7 +438,7 @@ n INTEGER, fn_name VARCHAR ) RETURNS BYTEA AS $$ import dill - from keras.metrics import top_k_categorical_accuracy + from tensorflow.keras.metrics import top_k_categorical_accuracy def fn(Y_true, Y_pred): return top_k_categorical_accuracy(Y_true, diff --git a/src/ports/postgres/modules/deep_learning/madlib_keras_fit_multiple_model.py_in b/src/ports/postgres/modules/deep_learning/madlib_keras_fit_multiple_model.py_in index 1e49261..9287524 100644 --- a/src/ports/postgres/modules/deep_learning/madlib_keras_fit_multiple_model.py_in +++ b/src/ports/postgres/modules/deep_learning/madlib_keras_fit_multiple_model.py_in @@ -21,7 +21,6 @@ import plpy import time import sys -from keras.models import * from madlib_keras import compute_loss_and_metrics from madlib_keras import get_initial_weights from madlib_keras import get_model_arch_weights @@ -49,6 +48,8 @@ import json from collections import defaultdict import random import datetime + +from tensorflow.keras.models import * mb_dep_var_col = MINIBATCH_OUTPUT_DEPENDENT_COLNAME_DL mb_indep_var_col = MINIBATCH_OUTPUT_INDEPENDENT_COLNAME_DL dist_key_col = DISTRIBUTION_KEY_COLNAME diff --git a/src/ports/postgres/modules/deep_learning/madlib_keras_model_selection.py_in b/src/ports/postgres/modules/deep_learning/madlib_keras_model_selection.py_in index 3ea37dc..f29d399 100644 --- a/src/ports/postgres/modules/deep_learning/madlib_keras_model_selection.py_in +++ b/src/ports/postgres/modules/deep_learning/madlib_keras_model_selection.py_in @@ -19,8 +19,6 @@ from ast import literal_eval from collections import OrderedDict from itertools import product as itertools_product -import keras.losses as losses -import keras.metrics as metrics from keras_model_arch_table import ModelArchSchema import numpy as np import plpy @@ -37,6 +35,9 @@ from utilities.utilities import add_postfix, _assert, _assert_equal, extract_key from utilities.utilities import quote_ident, get_schema from utilities.validate_args import table_exists, drop_tables +from tensorflow.keras import losses as losses +from tensorflow.keras import metrics as metrics + class ModelSelectionSchema: MST_KEY = 'mst_key' MODEL_ID = ModelArchSchema.MODEL_ID diff --git a/src/ports/postgres/modules/deep_learning/madlib_keras_predict.py_in b/src/ports/postgres/modules/deep_learning/madlib_keras_predict.py_in index 0d542b2..9c87395 100644 --- a/src/ports/postgres/modules/deep_learning/madlib_keras_predict.py_in +++ b/src/ports/postgres/modules/deep_learning/madlib_keras_predict.py_in @@ -19,12 +19,6 @@ import plpy -import keras -from keras import backend as K -from keras.layers import * -from keras.models import * -from keras.optimizers import * - from model_arch_info import * from madlib_keras_helper import * from madlib_keras_validator import * @@ -40,6 +34,13 @@ from utilities.validate_args import input_tbl_valid from madlib_keras_wrapper import * +import tensorflow as tf +from tensorflow import keras +from tensorflow.keras import backend as K +from tensorflow.keras.layers import * +from tensorflow.keras.models import * +from tensorflow.keras.optimizers import * + class BasePredict(): def __init__(self, schema_madlib, table_to_validate, test_table, id_col, independent_varname, output_table, pred_type, use_gpus, module_name): @@ -314,7 +315,7 @@ def internal_keras_predict(independent_var, model_architecture, model_weights, independent_var = expand_input_dims(independent_var) independent_var /= normalizing_const - with K.tf.device(device_name): + with tf.device(device_name): probs = model.predict(independent_var) # probs is a list containing a list of probability values, of all # class levels. Since we are assuming each input is a single image, diff --git a/src/ports/postgres/modules/deep_learning/madlib_keras_validator.py_in b/src/ports/postgres/modules/deep_learning/madlib_keras_validator.py_in index 41e4c72..f7d2076 100644 --- a/src/ports/postgres/modules/deep_learning/madlib_keras_validator.py_in +++ b/src/ports/postgres/modules/deep_learning/madlib_keras_validator.py_in @@ -47,8 +47,8 @@ from utilities.validate_args import input_tbl_valid from utilities.validate_args import output_tbl_valid from madlib_keras_wrapper import parse_and_validate_fit_params from madlib_keras_wrapper import parse_and_validate_compile_params -import keras.losses as losses -import keras.metrics as metrics +import tensorflow.keras.losses as losses +import tensorflow.keras.metrics as metrics class InputValidator: @staticmethod diff --git a/src/ports/postgres/modules/deep_learning/madlib_keras_wrapper.py_in b/src/ports/postgres/modules/deep_learning/madlib_keras_wrapper.py_in index 57827c5..c60a19b 100644 --- a/src/ports/postgres/modules/deep_learning/madlib_keras_wrapper.py_in +++ b/src/ports/postgres/modules/deep_learning/madlib_keras_wrapper.py_in @@ -24,14 +24,6 @@ import plpy from collections import defaultdict from math import ceil -from keras import backend as K -from keras import utils as keras_utils -from keras.optimizers import * - -import keras.optimizers as opt -import keras.losses as losses -import keras.metrics as metrics - import madlib_keras_serializer import madlib_keras_gpu_info from madlib_keras_custom_function import CustomFunctionSchema @@ -40,6 +32,15 @@ from madlib_keras_custom_function import update_builtin_metrics from utilities.utilities import _assert from utilities.utilities import is_platform_pg +import tensorflow as tf +from tensorflow.keras import backend as K +from tensorflow.keras import utils as keras_utils +from tensorflow.keras.optimizers import * + +import tensorflow.keras.optimizers as opt +import tensorflow.keras.losses as losses +import tensorflow.keras.metrics as metrics + CUDA_VISIBLE_DEVICES_KEY = 'CUDA_VISIBLE_DEVICES' ####################################################################### ########### Keras specific functions ##### @@ -79,12 +80,12 @@ def get_device_name_and_set_cuda_env(gpu_count, seg): return device_name def set_keras_session(device_name, gpu_count, segments_per_host): - with K.tf.device(device_name): + with tf.device(device_name): session = get_keras_session(device_name, gpu_count, segments_per_host) K.set_session(session) def get_keras_session(device_name, gpu_count, segments_per_host): - config = K.tf.ConfigProto() + config = tf.ConfigProto() if gpu_count > 0: memory_fraction = get_gpu_memory_fraction(gpu_count, segments_per_host) config.gpu_options.allow_growth = False @@ -251,7 +252,7 @@ def parse_optimizer(compile_dict): opt_split = compile_dict['optimizer'].split('(') opt_name = opt_split[0] optimizers = get_optimizers() - _assert(opt_name in optimizers, + _assert(opt_name.lower() in [o.lower() for o in optimizers.keys()], "model_keras error: invalid optimizer name: {0}".format(opt_name)) # If we use only the optimizer name @@ -325,7 +326,9 @@ def get_optimizers(): names = dir(opt) for n in names: optimizer = eval('opt.' + n) - if optimizer.__class__ == type and optimizer.__base__ == opt.Optimizer: + if isinstance(optimizer.__class__,type) and \ + '__module__' in dir(optimizer) and \ + 'tensorflow.python.keras.optimizer' in optimizer.__module__: optimizers[n] = optimizer return optimizers diff --git a/src/ports/postgres/modules/deep_learning/test/keras_model_arch_table.sql_in b/src/ports/postgres/modules/deep_learning/test/keras_model_arch_table.sql_in index 1f2009b..90eb335 100644 --- a/src/ports/postgres/modules/deep_learning/test/keras_model_arch_table.sql_in +++ b/src/ports/postgres/modules/deep_learning/test/keras_model_arch_table.sql_in @@ -127,8 +127,8 @@ FROM test_keras_model_arch_table WHERE model_id = 2; --------------------------- Test calling the UDF from python --------------------------------- CREATE OR REPLACE FUNCTION create_model_arch_transfer_learning() RETURNS VOID AS $$ -from keras.layers import * -from keras import Sequential +from tensorflow.keras.layers import * +from tensorflow.keras import Sequential import numpy as np import plpy diff --git a/src/ports/postgres/modules/deep_learning/test/madlib_keras_cifar.setup.sql_in b/src/ports/postgres/modules/deep_learning/test/madlib_keras_cifar.setup.sql_in index 7c9ad5e..41dbf84 100644 --- a/src/ports/postgres/modules/deep_learning/test/madlib_keras_cifar.setup.sql_in +++ b/src/ports/postgres/modules/deep_learning/test/madlib_keras_cifar.setup.sql_in @@ -95,24 +95,3 @@ SELECT load_keras_model('model_arch', {"class_name": "Zeros", "config": {}}, "units": 5, "use_bias": true, "activity_regularizer": null} }], "backend": "tensorflow"}$$); -SELECT load_keras_model('model_arch', - $${ - "class_name": "Sequential", - "keras_version": "2.1.6", - "config": [{ - "class_name": "Conv2D", "config": {"kernel_initializer": {"class_name": "VarianceScaling", "config": {"distribution": "uniform", "scale": 1.0, "seed": null, "mode": "fan_avg"}}, - "name": "conv2d_1", - "kernel_constraint": null, "bias_regularizer": null, "bias_constraint": null, - "dtype": "float32", "activation": "relu", "trainable": true, - "data_format": "channels_first", "filters": 32, "padding": "valid", - "strides": [1, 1], "dilation_rate": [1, 1], "kernel_regularizer": null, - "bias_initializer": {"class_name": "Zeros", "config": {}}, - "batch_input_shape": [null, 3, 32, 32], "use_bias": true, - "activity_regularizer": null, "kernel_size": [3, 3]}}, - {"class_name": "MaxPooling2D", "config": {"name": "max_pooling2d_1", "trainable": true, "data_format": "channels_first", "pool_size": [2, 2], "padding": "valid", "strides": [2, 2]}}, - {"class_name": "Dropout", "config": {"rate": 0.25, "noise_shape": null, "trainable": true, "seed": null, "name": "dropout_1"}}, - {"class_name": "Flatten", "config": {"trainable": true, "name": "flatten_1", "data_format": "channels_first"}}, - {"class_name": "Dense", "config": {"kernel_initializer": {"class_name": "VarianceScaling", "config": {"distribution": "uniform", "scale": 1.0, "seed": null, "mode": "fan_avg"}}, "name": "dense_1", "kernel_constraint": null, "bias_regularizer": null, "bias_constraint": null, "activation": "softmax", "trainable": true, "kernel_regularizer": null, "bias_initializer": - {"class_name": "Zeros", "config": {}}, "units": 3, "use_bias": true, "activity_regularizer": null} - }], "backend": "tensorflow"}$$); - diff --git a/src/ports/postgres/modules/deep_learning/test/madlib_keras_fit.sql_in b/src/ports/postgres/modules/deep_learning/test/madlib_keras_fit.sql_in index bd77532..3d7d49a 100644 --- a/src/ports/postgres/modules/deep_learning/test/madlib_keras_fit.sql_in +++ b/src/ports/postgres/modules/deep_learning/test/madlib_keras_fit.sql_in @@ -398,23 +398,14 @@ SELECT assert( 'Keras model output Summary Validation failed. Actual:' || __to_char(summary)) FROM (SELECT * FROM keras_saved_out_summary) summary; -DROP TABLE IF EXISTS keras_saved_out, keras_saved_out_summary; -SELECT madlib_keras_fit( - 'cifar_10_sample_test_shape_batched', - 'keras_saved_out', - 'model_arch', - 3, - $$ optimizer=SGD(lr=0.01, decay=1e-6, nesterov=True), loss='categorical_crossentropy', metrics=['accuracy']$$::text, - $$ batch_size=2, epochs=1, verbose=0 $$::text, - 3); - +-- Test tables with special chars DROP TABLE IF EXISTS keras_saved_out, keras_saved_out_summary; CREATE TABLE "special-char?" AS SELECT * FROM model_arch; SELECT madlib_keras_fit( - 'cifar_10_sample_test_shape_batched', + 'cifar_10_sample_int_batched', 'keras_saved_out', '"special-char?"', - 3, + 2, $$ optimizer=SGD(lr=0.01, decay=1e-6, nesterov=True), loss='categorical_crossentropy', metrics=['accuracy']$$::text, $$ batch_size=2, epochs=1, verbose=0 $$::text, 3); @@ -422,11 +413,11 @@ SELECT madlib_keras_fit( -- Test invalid loss function in compile_param DROP TABLE IF EXISTS keras_saved_out, keras_saved_out_summary; SELECT assert(trap_error($TRAP$SELECT madlib_keras_fit( - 'cifar_10_sample_test_shape_batched', + 'cifar_10_sample_int_batched', 'keras_saved_out', 'model_arch', - 3, + 2, $$ optimizer=SGD(lr=0.01, decay=1e-6, nesterov=True), loss='custom_fn', metrics=['accuracy']$$::text, $$ batch_size=2, epochs=1, verbose=0 $$::text, 3);$TRAP$) = 1, - 'Object table not specified for custom function in compile_params.'); + 'Object table not specified for custom function in compile_params.'); \ No newline at end of file diff --git a/src/ports/postgres/modules/deep_learning/test/madlib_keras_predict.sql_in b/src/ports/postgres/modules/deep_learning/test/madlib_keras_predict.sql_in index 3d9d0d9..3aa024f 100644 --- a/src/ports/postgres/modules/deep_learning/test/madlib_keras_predict.sql_in +++ b/src/ports/postgres/modules/deep_learning/test/madlib_keras_predict.sql_in @@ -290,47 +290,24 @@ SELECT madlib_keras_predict( SELECT assert(count(*)=1, 'Predict out table must have a single response') FROM cifar10_predict WHERE id = 0; --- Predict with correctly shaped data, must go thru. --- Update output_summary table to reflect --- class_values, num_classes and model_id for shaped data -DROP TABLE IF EXISTS keras_saved_out, keras_saved_out_summary; -SELECT madlib_keras_fit( - 'cifar_10_sample_test_shape_batched', - 'keras_saved_out', - 'model_arch', - 3, - $$ optimizer=SGD(lr=0.01, decay=1e-6, nesterov=True), loss='categorical_crossentropy', metrics=['accuracy']$$::text, - $$ batch_size=2, epochs=1, verbose=0 $$::text, - 3); - -DROP TABLE IF EXISTS cifar10_predict; -SELECT madlib_keras_predict( - 'keras_saved_out', - 'cifar_10_sample_test_shape', - 'id', - 'x', - 'cifar10_predict', - 'prob', - FALSE); - -- Prediction with incorrectly shaped data must error out. DROP TABLE IF EXISTS cifar10_predict; SELECT assert(trap_error($TRAP$SELECT madlib_keras_predict( 'keras_saved_out', - 'cifar_10_sample', + 'cifar_10_sample_test_shape', 'id', 'x', 'cifar10_predict', 'prob', FALSE);$TRAP$) = 1, - 'Input shape is (32, 32, 3) but model was trained with (3, 32, 32). Should have failed.'); + 'Input shape is (3, 32, 32) but model was trained with (32, 32, 3). Should have failed.'); -- Test model_arch is retrieved from model data table and not model architecture DROP TABLE IF EXISTS model_arch; DROP TABLE IF EXISTS cifar10_predict; SELECT madlib_keras_predict( 'keras_saved_out', - 'cifar_10_sample_test_shape', + 'cifar_10_sample', 'id', 'x', 'cifar10_predict', diff --git a/src/ports/postgres/modules/deep_learning/test/unit_tests/test_madlib_keras.py_in b/src/ports/postgres/modules/deep_learning/test/unit_tests/test_madlib_keras.py_in index 13bbfd1..a97e07e 100644 --- a/src/ports/postgres/modules/deep_learning/test/unit_tests/test_madlib_keras.py_in +++ b/src/ports/postgres/modules/deep_learning/test/unit_tests/test_madlib_keras.py_in @@ -28,12 +28,10 @@ from os import path sys.path.append(path.dirname(path.dirname(path.dirname(path.dirname(path.abspath(__file__)))))) sys.path.append(path.dirname(path.dirname(path.dirname(path.abspath(__file__))))) -import keras -from keras.models import * -from keras.layers import * import unittest from mock import * import plpy_mock as plpy + try: from pgsanity.pgsanity import check_string as pglint pglint("SELECT 1;") @@ -359,9 +357,6 @@ class MadlibKerasFitTestCase(unittest.TestCase): self.assertTrue((multiplied_weights == weights).all()) # set_session is always called self.assertEqual(1, self.subject.K.set_session.call_count) - # Clear session and sess.close must get called for the last buffer in gpdb, - # but not in postgres - self.assertEqual(1, self.subject.K.clear_session.call_count) # Non-last iteration Call self.subject.K.set_session.reset_mock() @@ -424,9 +419,6 @@ class MadlibKerasFitTestCase(unittest.TestCase): # set_session is always called self.assertEqual(1, self.subject.K.set_session.call_count) - # Clear session and sess.close must get called for the last buffer in gpdb, - # but not in postgres - self.assertEqual(1, self.subject.K.clear_session.call_count) def test_fit_transition_multiple_model_cache_last_buffer_pass(self): #TODO should we mock tensorflow's close_session and keras' @@ -1927,5 +1919,8 @@ class MadlibKerasEvaluationTestCase(unittest.TestCase): self.module_patcher.stop() if __name__ == '__main__': + from tensorflow import keras + from tensorflow.keras.models import * + from tensorflow.keras.layers import * unittest.main() # --------------------------------------------------------------------- diff --git a/src/ports/postgres/modules/deep_learning/test/unit_tests/test_madlib_keras_automl.py_in b/src/ports/postgres/modules/deep_learning/test/unit_tests/test_madlib_keras_automl.py_in index 9db4ea1..a4dfff7 100644 --- a/src/ports/postgres/modules/deep_learning/test/unit_tests/test_madlib_keras_automl.py_in +++ b/src/ports/postgres/modules/deep_learning/test/unit_tests/test_madlib_keras_automl.py_in @@ -26,7 +26,7 @@ import math sys.path.append(path.dirname(path.dirname(path.dirname(path.dirname(path.abspath(__file__)))))) sys.path.append(path.dirname(path.dirname(path.dirname(path.abspath(__file__))))) -import keras # still needed here even though not explicitly used. DO NOT REMOVE. +from tensorflow import keras # still needed here even though not explicitly used. DO NOT REMOVE. import unittest from mock import * import plpy_mock as plpy diff --git a/src/ports/postgres/modules/deep_learning/test/unit_tests/test_madlib_keras_model_selection_table.py_in b/src/ports/postgres/modules/deep_learning/test/unit_tests/test_madlib_keras_model_selection_table.py_in index 7de9868..0a1d58f 100644 --- a/src/ports/postgres/modules/deep_learning/test/unit_tests/test_madlib_keras_model_selection_table.py_in +++ b/src/ports/postgres/modules/deep_learning/test/unit_tests/test_madlib_keras_model_selection_table.py_in @@ -26,7 +26,7 @@ from os import path sys.path.append(path.dirname(path.dirname(path.dirname(path.dirname(path.abspath(__file__)))))) sys.path.append(path.dirname(path.dirname(path.dirname(path.abspath(__file__))))) -import keras +from tensorflow import keras import unittest from mock import * import plpy_mock as plpy