orhankislal commented on a change in pull request #518:
URL: https://github.com/apache/madlib/pull/518#discussion_r493744195
##########
File path: src/ports/postgres/modules/deep_learning/madlib_keras_automl.py_in
##########
@@ -500,63 +648,432 @@ class KerasAutoML():
"b (key integer, s_val integer, i_val integer) WHERE
t.mst_key=b.key".format(self=self, l=l)
plpy.execute(query)
- def update_model_selection_table(self):
+# @MinWarning("warning")
+class AutoMLHyperopt(KerasAutoML):
+ """
+ This class implements Hyperopt, another automl method that explores
awkward search spaces using
+ Random Search, Tree-structured Parzen Estimator (TPE), or Adaptive TPE.
+
+ This function executes hyperopt on top of our multiple model training
infrastructure powered with
+ Model hOpper Parallelism (MOP), a hybrid of data and task parallelism.
+
+ This automl method inherits qualities from the automl class.
+ """
+ def __init__(self, schema_madlib, source_table, model_output_table,
model_arch_table, model_selection_table,
+ model_id_list, compile_params_grid, fit_params_grid,
automl_method='hyperopt',
+ automl_params='num_models=20, num_iters=5, algorithm=tpe',
random_state=None, object_table=None,
+ use_gpus=False, validation_table=None,
metrics_compute_frequency=None,
+ name=None, description=None, **kwargs):
+ KerasAutoML.__init__(self, schema_madlib, source_table,
model_output_table, model_arch_table,
+ model_selection_table, model_id_list,
compile_params_grid, fit_params_grid,
+ automl_method, automl_params, random_state,
object_table, use_gpus,
+ validation_table, metrics_compute_frequency,
name, description, **kwargs)
+ self.compile_params_grid = self.compile_params_grid.replace('\n',
'').replace(' ', '')
+ self.fit_params_grid = self.fit_params_grid.replace('\n',
'').replace(' ', '')
+ try:
+ self.compile_params_grid = literal_eval(self.compile_params_grid)
+
+ except:
+ plpy.error("Invalid syntax in 'compile_params_dict'")
+ try:
+ self.fit_params_grid = literal_eval(self.fit_params_grid)
+ except:
+ plpy.error("Invalid syntax in 'fit_params_dict'")
+ self.validate_and_define_inputs()
+
+ self.num_workers = get_seg_number() * get_segments_per_host()
+
+ self.create_model_output_table()
+ self.create_model_output_info_table()
+ self.find_hyperopt_config()
+
+ def validate_and_define_inputs(self):
+ automl_params_dict = extract_keyvalue_params(self.automl_params,
+
default_values={'num_models': 20,
+
'num_iters': 5,
+
'algorithm': 'tpe'},
+ lower_case_names=True)
+ # casting relevant values to int
+ for i in automl_params_dict:
+ try:
+ automl_params_dict[i] = int(automl_params_dict[i])
+ except ValueError:
+ pass
+ _assert(len(automl_params_dict) >= 1 and len(automl_params_dict) <= 3,
+ "DL: Only num_models, num_iters, and algorithm may be
specified")
+ for i in automl_params_dict:
+ if i == AutoMLSchema.NUM_MODELS:
+ self.num_models = automl_params_dict[AutoMLSchema.NUM_MODELS]
+ elif i == AutoMLSchema.NUM_ITERS:
+ self.num_iters = automl_params_dict[AutoMLSchema.NUM_ITERS]
+ elif i == AutoMLSchema.ALGORITHM:
+ if automl_params_dict[AutoMLSchema.ALGORITHM].lower() ==
'rand':
+ self.algorithm = rand
+ elif automl_params_dict[AutoMLSchema.ALGORITHM].lower() ==
'tpe':
+ self.algorithm = tpe
+ # elif automl_params_dict[AutoMLSchema.ALGORITHM].lower() ==
'atpe':
+ # self.algorithm = atpe
+ # uncomment the above lines after atpe works
+ else:
+ plpy.error("DL: algorithm in 'automl_params' must be
'rand', 'tpe'") # , or 'atpe'
+ else:
+ plpy.error("DL: {0} is an invalid automl param".format(i))
+ _assert(self.num_models > 0 and self.num_iters > 0, "DL: num_models
and num_iters in 'automl_params' "
+ "must be positive")
+ _assert(self._is_valid_metrics_compute_frequency(self.num_iters), "DL:
'metrics_compute_frequency' "
+ "out
of iteration range")
+
+ def find_hyperopt_config(self):
"""
- Drops and re-create the mst table to only include the best performing
model configuration.
+ Executes hyperopt on top of MOP.
"""
- drop_tables([self.model_selection_table])
- # only retaining best performing config
- plpy.execute("CREATE TABLE {self.model_selection_table} AS SELECT
mst_key, model_id, compile_params, " \
- "fit_params FROM {self.model_info_table} " \
- "ORDER BY {AutoMLSchema.LOSS_METRIC} LIMIT
1".format(self=self, AutoMLSchema=AutoMLSchema))
+ make_mst_summary = True
+ trials = Trials()
+ domain = Domain(None, self.get_search_space())
+ rand_state = np.random.RandomState(self.random_state)
+ configs_lst = self.get_configs_list()
- def generate_model_output_summary_table(self, model_training):
+ self.start_training_time = self.get_current_timestamp()
+ for low, high in configs_lst:
+ i, n = low, high - low + 1
+
+ # Using HyperOpt TPE/ATPE to generate parameters
+ hyperopt_params = []
+ sampled_params = []
+ for j in range(i, i + n):
+ new_param = self.algorithm.suggest([j], domain, trials,
rand_state.randint(0, 2 ** 31 - 1))
Review comment:
I don't think we should calculate 2**31 every time we run this. Setting
an INT_MAX constant would be better.
----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
For queries about this service, please contact Infrastructure at:
[email protected]