Github user kaknikhil commented on a diff in the pull request:
https://github.com/apache/madlib/pull/283#discussion_r198641216
--- Diff: src/ports/postgres/modules/crf/test/crf_train_large.sql_in ---
@@ -234,26 +234,40 @@ INSERT INTO train_new_segmenttbl VALUES
(30, 7, 'years', 13, 31),
(31, 7, '.', 44, 31);
-CREATE TABLE train_new_regex(pattern text,name text);
+CREATE TABLE train_new_regex(pattern text,name text);
INSERT INTO train_new_regex VALUES
-('^[A-Z][a-z]+$','InitCapital'), ('^[A-Z]+$','isAllCapital'),
+ ('^[A-Z][a-z]+$','InitCapital'), ('^[A-Z]+$','isAllCapital'),
('^.*[0-9]+.*$','containsDigit'),('^.+[.]$','endsWithDot'),
('^.+[,]$','endsWithComma'), ('^.+er$','endsWithER'),
('^.+est$','endsWithEst'), ('^.+ed$','endsWithED'),
('^.+s$','endsWithS'), ('^.+ing$','endsWithIng'),
('^.+ly$','endsWithly'),
('^.+-.+$','isDashSeparatedWords'),
('^.*@.*$','isEmailId');
- analyze train_new_regex;
+analyze train_new_regex;
- SELECT crf_train_fgen('train_new_segmenttbl', 'train_new_regex',
'crf_label', 'train_new_dictionary',
'train_new_featuretbl','train_new_featureset');
+CREATE TABLE crf_label_new (id integer,label character varying);
--- End diff --
The two files `crf_test_small.sql_in` and `crf_train_large.sql_in` have
different indentation. Can we make them consistent
---