khannaekta commented on code in PR #613:
URL: https://github.com/apache/madlib/pull/613#discussion_r1496427774


##########
src/ports/postgres/modules/pmml/formula.py_in:
##########
@@ -2,25 +2,50 @@ import plpy
 import re
 
 class Formula(object):
+
     def __init__(self, y_str, x_str, coef_len):
-        self.n_coef = coef_len
+        """
+        :param y_str:    Dependent variable used during training
+        :param x_str:    Independent variable used during training. Can take
+                         multiple formats like
+                         'ARRAY[1,x1,x2]', 'ARRAY[x1,x2]' or just 'x'
+        :param coef_len: Length of all the coefficients including the
+                         intercept's coefficient(if any)
+        """
+        # TODO: Fix the nested warning

Review Comment:
   Didn't see any nested warning, do we still need this todo?



##########
src/ports/postgres/modules/pmml/formula.py_in:
##########
@@ -2,25 +2,50 @@ import plpy
 import re
 
 class Formula(object):
+
     def __init__(self, y_str, x_str, coef_len):
-        self.n_coef = coef_len
+        """
+        :param y_str:    Dependent variable used during training
+        :param x_str:    Independent variable used during training. Can take
+                         multiple formats like
+                         'ARRAY[1,x1,x2]', 'ARRAY[x1,x2]' or just 'x'
+        :param coef_len: Length of all the coefficients including the
+                         intercept's coefficient(if any)
+        """
+        # TODO: Fix the nested warning
+        self.array_expr = re.compile(r'array[[]([0-1],)?(["a-z0-9_, .]+)[]]', 
flags=re.I)
+        self.non_array_expr = re.compile(r'["a-z0-9_]+', flags=re.I)
+
+        self.intercept = self.has_intercept(x_str)
+        self.all_coef_len = coef_len
+        if self.intercept:
+            self.feature_coef_len = coef_len - 1
+        else:
+            self.feature_coef_len = coef_len
         self.y = y_str.replace('"','')
         self.x = self.parse(x_str)
 
+    # TODO: add comments

Review Comment:
   Did you mean to do it as part of this PR ?



##########
src/ports/postgres/modules/pmml/test/pmml_glm_poisson.sql_in:
##########
@@ -1,77 +1,59 @@
-DROP TABLE IF EXISTS warpbreaks CASCADE;
-
-CREATE TABLE warpbreaks(
-    id      serial,
-    breaks  integer,
-    wool    char(1),
-    tension char(1),
-    g       char(1)
-);
+\i m4_regexp(MADLIB_LIBRARY_PATH,
+             `\(.*\)/lib',
+              `\1/../modules/pmml/test/pmml.setup.sql_in'
+)
 
-INSERT INTO warpbreaks(breaks, wool, tension, g) VALUES
-(26, 'A', 'L', '1'),
-(30, 'A', 'L', '1'),
-(54, 'A', 'L', '1'),
-(25, 'A', 'L', '1'),
-(70, 'A', 'L', '1'),
-(52, 'A', 'L', '1'),
-(51, 'A', 'L', '1'),
-(26, 'A', 'L', '1'),
-(67, 'A', 'L', '1'),
-(18, 'A', 'M', '1'),
-(21, 'A', 'M', '1'),
-(29, 'A', 'M', '1'),
-(17, 'A', 'M', '1'),
-(12, 'A', 'M', '1'),
-(18, 'A', 'M', '1'),
-(35, 'A', 'M', '1'),
-(30, 'A', 'M', '1'),
-(36, 'A', 'M', '1'),
-(36, 'A', 'H', '0'),
-(21, 'A', 'H', '0'),
-(24, 'A', 'H', '0'),
-(18, 'A', 'H', '0'),
-(10, 'A', 'H', '0'),
-(43, 'A', 'H', '0'),
-(28, 'A', 'H', '0'),
-(15, 'A', 'H', '0'),
-(26, 'A', 'H', '0'),
-(27, 'B', 'L', '0'),
-(14, 'B', 'L', '0'),
-(29, 'B', 'L', '0'),
-(19, 'B', 'L', '0'),
-(29, 'B', 'L', '0'),
-(31, 'B', 'L', '0'),
-(41, 'B', 'L', '0'),
-(20, 'B', 'L', '1'),
-(44, 'B', 'L', '1'),
-(42, 'B', 'M', '1'),
-(26, 'B', 'M', '1'),
-(19, 'B', 'M', '1'),
-(16, 'B', 'M', '1'),
-(39, 'B', 'M', '1'),
-(28, 'B', 'M', '1'),
-(21, 'B', 'M', '1'),
-(39, 'B', 'M', '1'),
-(29, 'B', 'M', '1'),
-(20, 'B', 'H', '1'),
-(21, 'B', 'H', '1'),
-(24, 'B', 'H', '1'),
-(17, 'B', 'H', '1'),
-(13, 'B', 'H', '1'),
-(15, 'B', 'H', '1'),
-(15, 'B', 'H', '1'),
-(16, 'B', 'H', '1'),
-(28, 'B', 'H', '1');
+m4_changequote(`<!'', `!>'')
 
-DROP TABLE IF EXISTS warpbreaks_dummy;
-SELECT create_indicator_variables('warpbreaks', 'warpbreaks_dummy', 
'wool,tension');
+DROP TABLE IF EXISTS warpbreaks CASCADE;
 
+-------------------- with intercept ------------------------------
 DROP TABLE IF EXISTS glm_model_sqrt, glm_model_sqrt_summary;
 SELECT glm('warpbreaks_dummy',
            'glm_model_sqrt',
            'breaks',
-           'ARRAY[1.0,"wool_B","tension_M", "tension_H"]',
+           'ARRAY[1, "wool_B","tension_M", "tension_H"]',

Review Comment:
   We probably need to update the regex to make 1.0 work.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: dev-unsubscr...@madlib.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org

Reply via email to