This is an automated email from the ASF dual-hosted git repository.
okislal pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/madlib.git
The following commit(s) were added to refs/heads/master by this push:
new 5f10bc8 DL: Modify multi-fit warm start to accept non-matching
mst&model tables
5f10bc8 is described below
commit 5f10bc8e72e88986cd109745dddec672fdaa1d84
Author: Orhan Kislal
AuthorDate: Tue Jan 7 19:36:34 2020 -0500
DL: Modify multi-fit warm start to accept non-matching mst&model tables
JIRA: MADLIB-1400 #resolve
The warm start enforced that the model table had to have a tuple for each
mst_key in the mst table for warm start. This commit relaxes this
requirement
so that users can add as well as substract mst keys throughtout their
AutoML progress.
Closes #466
---
.../madlib_keras_fit_multiple_model.py_in | 70 ++
.../deep_learning/madlib_keras_validator.py_in | 7 ---
.../test/madlib_keras_transfer_learning.sql_in | 24 +---
3 files changed, 60 insertions(+), 41 deletions(-)
diff --git
a/src/ports/postgres/modules/deep_learning/madlib_keras_fit_multiple_model.py_in
b/src/ports/postgres/modules/deep_learning/madlib_keras_fit_multiple_model.py_in
index 5ce555a..273321e 100644
---
a/src/ports/postgres/modules/deep_learning/madlib_keras_fit_multiple_model.py_in
+++
b/src/ports/postgres/modules/deep_learning/madlib_keras_fit_multiple_model.py_in
@@ -162,8 +162,8 @@ class FitMultipleModel():
random.shuffle(self.msts_for_schedule)
self.grand_schedule = self.generate_schedule(self.msts_for_schedule)
self.gp_segment_id_col = '0' if is_platform_pg() else
GP_SEGMENT_ID_COLNAME
-if not self.warm_start:
-self.create_model_output_table()
+
+self.create_model_output_table()
self.weights_to_update_tbl = unique_string(desp='weights_to_update')
self.fit_multiple_model()
reset_cuda_env(original_cuda_env)
@@ -274,12 +274,26 @@ class FitMultipleModel():
plpy.execute(mst_insert_query)
def create_model_output_table(self):
-output_table_create_query = """
-CREATE TABLE {self.model_output_table}
-({self.mst_key_col} INTEGER PRIMARY KEY,
- {self.model_weights_col} BYTEA,
- {self.model_arch_col} JSON)
-""".format(self=self)
+warm_start_msts = []
+if self.warm_start:
+plpy.execute(""" DELETE FROM {self.model_output_table}
+WHERE {self.mst_key_col} NOT IN (
+SELECT {self.mst_key_col} FROM
{self.model_selection_table})
+""".format(self=self))
+warm_start_msts = plpy.execute(
+""" SELECT array_agg({0}) AS a FROM {1}
+""".format(self.mst_key_col, self.model_output_table))[0]['a']
+plpy.execute("DROP TABLE {0}".format(self.model_info_table))
+
+else:
+output_table_create_query = """
+CREATE TABLE {self.model_output_table}
+({self.mst_key_col} INTEGER PRIMARY
KEY,
+ {self.model_weights_col} BYTEA,
+ {self.model_arch_col} JSON)
+""".format(self=self)
+plpy.execute(output_table_create_query)
+
info_table_create_query = """
CREATE TABLE {self.model_info_table}
({self.mst_key_col} INTEGER PRIMARY KEY,
@@ -300,39 +314,32 @@ class FitMultipleModel():
validation_loss DOUBLE PRECISION[])
""".format(self=self)
-plpy.execute(output_table_create_query)
plpy.execute(info_table_create_query)
for mst in self.msts:
model_arch, model_weights =
get_model_arch_weights(self.model_arch_table,
mst[self.model_id_col])
+
+
+# If warm start is enabled, weights from transfer learning cannot
be
+# used, even if a particular model doesn't have warm start weigths.
+if self.warm_start:
+model_weights = None
+
serialized_weights = get_initial_weights(self.model_output_table,
model_arch,
model_weights,
- False,
+ mst['mst_key'] in
warm_start_msts,
self.use_gpus,