Github user kaknikhil commented on a diff in the pull request: https://github.com/apache/madlib/pull/243#discussion_r175888098 --- Diff: src/ports/postgres/modules/convex/mlp_igd.py_in --- @@ -590,51 +664,103 @@ def _validate_warm_start(output_table, summary_table, standardization_table, output_table + ". Invalid number of coefficients in model.") return coeff +def _validate_dependent_var(source_table, dependent_varname, + is_classification, is_minibatch_enabled): + expr_type = get_expr_type(dependent_varname, source_table) + int_types = ['integer', 'smallint', 'bigint'] + text_types = ['text', 'varchar', 'character varying', 'char', 'character'] + boolean_types = ['boolean'] + float_types = ['double precision', 'real'] + classification_types = int_types + boolean_types + text_types + regression_types = int_types + float_types + validate_type = classification_types if is_classification else regression_types + + if is_minibatch_enabled: + # With pre-processed data, dep type is always an array + _assert("[]" in expr_type, + "Dependent variable column should refer to an array.") + # The dependent variable is always a double precision array in + # preprocessed data (so use regression_types) + # strip out '[]' from expr_type + _assert(expr_type[:-2] in regression_types, --- End diff -- There are other numeric types like `decimal`, `numeric` etc. That makes me think if we really need this assert ? Same for the regression case for igd at line 696 And if we really want to assert this, consider using the function `is_psql_numeric_type` in `utilities_py.in`
---