Github user kaknikhil commented on a diff in the pull request: https://github.com/apache/madlib/pull/243#discussion_r175895832 --- Diff: src/ports/postgres/modules/convex/mlp_igd.py_in --- @@ -72,107 +73,127 @@ def mlp(schema_madlib, source_table, output_table, independent_varname, """ warm_start = bool(warm_start) optimizer_params = _get_optimizer_params(optimizer_param_str or "") + + tolerance = optimizer_params["tolerance"] + n_iterations = optimizer_params["n_iterations"] + step_size_init = optimizer_params["learning_rate_init"] + iterations_per_step = optimizer_params["iterations_per_step"] + power = optimizer_params["power"] + gamma = optimizer_params["gamma"] + step_size = step_size_init + n_tries = optimizer_params["n_tries"] + # lambda is a reserved word in python + lmbda = optimizer_params["lambda"] + batch_size = optimizer_params['batch_size'] + n_epochs = optimizer_params['n_epochs'] + summary_table = add_postfix(output_table, "_summary") standardization_table = add_postfix(output_table, "_standardization") - weights = '1' if not weights or not weights.strip() else weights.strip() hidden_layer_sizes = hidden_layer_sizes or [] - grouping_col = grouping_col or "" - activation = _get_activation_function_name(activation) - learning_rate_policy = _get_learning_rate_policy_name( - optimizer_params["learning_rate_policy"]) - activation_index = _get_activation_index(activation) - + # Note that we don't support weights with mini batching yet, so validate + # this based on is_minibatch_enabled. + weights = '1' if not weights or not weights.strip() else weights.strip() _validate_args(source_table, output_table, summary_table, standardization_table, independent_varname, dependent_varname, hidden_layer_sizes, optimizer_params, - is_classification, weights, warm_start, activation, - grouping_col) + warm_start, activation, grouping_col) + is_minibatch_enabled = check_if_minibatch_enabled(source_table, independent_varname) + _validate_params_based_on_minibatch(source_table, independent_varname, + dependent_varname, weights, + is_classification, + is_minibatch_enabled) + activation = _get_activation_function_name(activation) + learning_rate_policy = _get_learning_rate_policy_name( + optimizer_params["learning_rate_policy"]) + activation_index = _get_activation_index(activation) reserved_cols = ['coeff', 'loss', 'n_iterations'] + grouping_col = grouping_col or "" grouping_str, grouping_col = get_grouping_col_str(schema_madlib, 'MLP', reserved_cols, source_table, grouping_col) - current_iteration = 1 - prev_state = None - tolerance = optimizer_params["tolerance"] - n_iterations = optimizer_params["n_iterations"] - step_size_init = optimizer_params["learning_rate_init"] - iterations_per_step = optimizer_params["iterations_per_step"] - power = optimizer_params["power"] - gamma = optimizer_params["gamma"] - step_size = step_size_init - n_tries = optimizer_params["n_tries"] - # lambda is a reserved word in python - lmbda = optimizer_params["lambda"] - iterations_per_step = optimizer_params["iterations_per_step"] - num_input_nodes = array_col_dimension(source_table, - independent_varname) - num_output_nodes = 0 + dependent_varname_backup = dependent_varname --- End diff -- can we add a comment explaining why we need this backup variable ?
---