Github user njayaram2 commented on a diff in the pull request:
https://github.com/apache/madlib/pull/243#discussion_r175950079
--- Diff: src/ports/postgres/modules/convex/mlp_igd.py_in ---
@@ -292,26 +329,33 @@ def mlp(schema_madlib, source_table, output_table,
independent_varname,
# used, it will be an empty list if there was not
grouping.
groups = [t[col_grp_key] for t in res if
t[col_grp_key]]
losses = [t['loss'] for t in res]
- loss = zip(groups, losses) if len(groups)==len(losses)
\
- else losses
- plpy.info("Iteration: " + str(it.iteration) + ", Loss:
<" + \
- ', '.join([str(l) for l in loss]) + ">")
+ loss = zip(groups, losses) if groups else losses
+ plpy.info("Iteration: {0}, Loss: <{1}>".
+ format(it.iteration, ', '.join(map(str,
loss))))
it.final()
_update_temp_model_table(it_args, it.iteration, temp_output_table,
- first_try)
+ is_minibatch_enabled, first_try)
first_try = False
- layer_sizes_str = py_list_to_sql_string(
- layer_sizes, array_type="integer")
- classes_str = py_list_to_sql_string(
- [strip_end_quotes(cl, "'") for cl in classes],
- array_type=dependent_type)
+ layer_sizes_str = py_list_to_sql_string(layer_sizes,
+ array_type="integer")
+
_create_summary_table(locals())
- _create_standardization_table(standardization_table, x_mean_table,
- warm_start)
+ if is_minibatch_enabled:
+ # We already have the mean and std in the input standardization
table
+ input_std_table = add_postfix(source_table, '_standardization')
+ _create_standardization_table(standardization_table,
input_std_table,
+ warm_start)
+ else:
+ _create_standardization_table(standardization_table, x_mean_table,
+ warm_start)
+ # The original input table is the tab_data_scaled for mini batch.
+ # Do NOT drop this, it will end up dropping the original data
table.
+ plpy.execute("DROP TABLE IF EXISTS {0}".format(tbl_data_scaled))
+ plpy.execute("DROP TABLE IF EXISTS {0}".format(x_mean_table))
--- End diff --
Yes.
---