khannaekta commented on code in PR #613: URL: https://github.com/apache/madlib/pull/613#discussion_r1496427774
########## src/ports/postgres/modules/pmml/formula.py_in: ########## @@ -2,25 +2,50 @@ import plpy import re class Formula(object): + def __init__(self, y_str, x_str, coef_len): - self.n_coef = coef_len + """ + :param y_str: Dependent variable used during training + :param x_str: Independent variable used during training. Can take + multiple formats like + 'ARRAY[1,x1,x2]', 'ARRAY[x1,x2]' or just 'x' + :param coef_len: Length of all the coefficients including the + intercept's coefficient(if any) + """ + # TODO: Fix the nested warning Review Comment: Didn't see any nested warning, do we still need this todo? ########## src/ports/postgres/modules/pmml/formula.py_in: ########## @@ -2,25 +2,50 @@ import plpy import re class Formula(object): + def __init__(self, y_str, x_str, coef_len): - self.n_coef = coef_len + """ + :param y_str: Dependent variable used during training + :param x_str: Independent variable used during training. Can take + multiple formats like + 'ARRAY[1,x1,x2]', 'ARRAY[x1,x2]' or just 'x' + :param coef_len: Length of all the coefficients including the + intercept's coefficient(if any) + """ + # TODO: Fix the nested warning + self.array_expr = re.compile(r'array[[]([0-1],)?(["a-z0-9_, .]+)[]]', flags=re.I) + self.non_array_expr = re.compile(r'["a-z0-9_]+', flags=re.I) + + self.intercept = self.has_intercept(x_str) + self.all_coef_len = coef_len + if self.intercept: + self.feature_coef_len = coef_len - 1 + else: + self.feature_coef_len = coef_len self.y = y_str.replace('"','') self.x = self.parse(x_str) + # TODO: add comments Review Comment: Did you mean to do it as part of this PR ? ########## src/ports/postgres/modules/pmml/test/pmml_glm_poisson.sql_in: ########## @@ -1,77 +1,59 @@ -DROP TABLE IF EXISTS warpbreaks CASCADE; - -CREATE TABLE warpbreaks( - id serial, - breaks integer, - wool char(1), - tension char(1), - g char(1) -); +\i m4_regexp(MADLIB_LIBRARY_PATH, + `\(.*\)/lib', + `\1/../modules/pmml/test/pmml.setup.sql_in' +) -INSERT INTO warpbreaks(breaks, wool, tension, g) VALUES -(26, 'A', 'L', '1'), -(30, 'A', 'L', '1'), -(54, 'A', 'L', '1'), -(25, 'A', 'L', '1'), -(70, 'A', 'L', '1'), -(52, 'A', 'L', '1'), -(51, 'A', 'L', '1'), -(26, 'A', 'L', '1'), -(67, 'A', 'L', '1'), -(18, 'A', 'M', '1'), -(21, 'A', 'M', '1'), -(29, 'A', 'M', '1'), -(17, 'A', 'M', '1'), -(12, 'A', 'M', '1'), -(18, 'A', 'M', '1'), -(35, 'A', 'M', '1'), -(30, 'A', 'M', '1'), -(36, 'A', 'M', '1'), -(36, 'A', 'H', '0'), -(21, 'A', 'H', '0'), -(24, 'A', 'H', '0'), -(18, 'A', 'H', '0'), -(10, 'A', 'H', '0'), -(43, 'A', 'H', '0'), -(28, 'A', 'H', '0'), -(15, 'A', 'H', '0'), -(26, 'A', 'H', '0'), -(27, 'B', 'L', '0'), -(14, 'B', 'L', '0'), -(29, 'B', 'L', '0'), -(19, 'B', 'L', '0'), -(29, 'B', 'L', '0'), -(31, 'B', 'L', '0'), -(41, 'B', 'L', '0'), -(20, 'B', 'L', '1'), -(44, 'B', 'L', '1'), -(42, 'B', 'M', '1'), -(26, 'B', 'M', '1'), -(19, 'B', 'M', '1'), -(16, 'B', 'M', '1'), -(39, 'B', 'M', '1'), -(28, 'B', 'M', '1'), -(21, 'B', 'M', '1'), -(39, 'B', 'M', '1'), -(29, 'B', 'M', '1'), -(20, 'B', 'H', '1'), -(21, 'B', 'H', '1'), -(24, 'B', 'H', '1'), -(17, 'B', 'H', '1'), -(13, 'B', 'H', '1'), -(15, 'B', 'H', '1'), -(15, 'B', 'H', '1'), -(16, 'B', 'H', '1'), -(28, 'B', 'H', '1'); +m4_changequote(`<!'', `!>'') -DROP TABLE IF EXISTS warpbreaks_dummy; -SELECT create_indicator_variables('warpbreaks', 'warpbreaks_dummy', 'wool,tension'); +DROP TABLE IF EXISTS warpbreaks CASCADE; +-------------------- with intercept ------------------------------ DROP TABLE IF EXISTS glm_model_sqrt, glm_model_sqrt_summary; SELECT glm('warpbreaks_dummy', 'glm_model_sqrt', 'breaks', - 'ARRAY[1.0,"wool_B","tension_M", "tension_H"]', + 'ARRAY[1, "wool_B","tension_M", "tension_H"]', Review Comment: We probably need to update the regex to make 1.0 work. -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: dev-unsubscr...@madlib.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org