This is an automated email from the ASF dual-hosted git repository. domino pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/madlib.git
The following commit(s) were added to refs/heads/master by this push: new b4ef574 Fix fit_multiple() warm start b4ef574 is described below commit b4ef5748753977798ef39a38db51516d8c67d965 Author: Domino Valdano <dvald...@pivotal.io> AuthorDate: Fri Feb 21 00:31:22 2020 +0000 Fix fit_multiple() warm start The get_initial_weights() function is shared by both fit() and fit_multiple(). But until now the code was not updated to support fit_multiple(). When called with warm-start=TRUE, this was causing improper initialization of weights for small datasets, and out-of-memory errors for larger datasets. --- src/ports/postgres/modules/deep_learning/madlib_keras.py_in | 6 +++--- .../deep_learning/madlib_keras_fit_multiple_model.py_in | 12 +++++++++++- 2 files changed, 14 insertions(+), 4 deletions(-) diff --git a/src/ports/postgres/modules/deep_learning/madlib_keras.py_in b/src/ports/postgres/modules/deep_learning/madlib_keras.py_in index 01f9152..04458c4 100644 --- a/src/ports/postgres/modules/deep_learning/madlib_keras.py_in +++ b/src/ports/postgres/modules/deep_learning/madlib_keras.py_in @@ -334,7 +334,7 @@ def fit(schema_madlib, source_table, model, model_arch_table, reset_cuda_env(original_cuda_env) def get_initial_weights(model_table, model_arch, serialized_weights, warm_start, - use_gpus, accessible_gpus_for_seg): + use_gpus, accessible_gpus_for_seg, mst_filter=''): """ If warm_start is True, return back initial weights from model table. If warm_start is False, first try to get the weights from model_arch @@ -359,8 +359,8 @@ def get_initial_weights(model_table, model_arch, serialized_weights, warm_start, if warm_start: serialized_weights = plpy.execute(""" - SELECT model_weights FROM {0} - """.format(model_table))[0]['model_weights'] + SELECT model_weights FROM {model_table} {mst_filter} LIMIT 1 + """.format(**locals()))[0]['model_weights'] else: if not serialized_weights: model = model_from_json(model_arch) diff --git a/src/ports/postgres/modules/deep_learning/madlib_keras_fit_multiple_model.py_in b/src/ports/postgres/modules/deep_learning/madlib_keras_fit_multiple_model.py_in index 97c3c60..1bb77f0 100644 --- a/src/ports/postgres/modules/deep_learning/madlib_keras_fit_multiple_model.py_in +++ b/src/ports/postgres/modules/deep_learning/madlib_keras_fit_multiple_model.py_in @@ -341,13 +341,23 @@ class FitMultipleModel(): # used, even if a particular model doesn't have warm start weigths. if self.warm_start: model_weights = None + mst_filter = """ + WHERE {mst_col}={mst_key} + """.format( + mst_col=self.mst_key_col, + mst_key=mst['mst_key'] + ) + + else: + mst_filter = '' serialized_weights = get_initial_weights(self.model_output_table, model_arch, model_weights, mst['mst_key'] in warm_start_msts, self.use_gpus, - self.accessible_gpus_for_seg + self.accessible_gpus_for_seg, + mst_filter ) model_size = sys.getsizeof(serialized_weights) / 1024.0