[
https://issues.apache.org/jira/browse/MADLIB-1327?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=16891487#comment-16891487
]
Frank McQuillan commented on MADLIB-1327:
-----------------------------------------
code for testing
{code}
DROP TABLE IF EXISTS test_data;
CREATE TABLE test_data (
trans_id INT,
product TEXT
);
INSERT INTO test_data VALUES (1, 'beer');
INSERT INTO test_data VALUES (1, 'diapers');
INSERT INTO test_data VALUES (1, 'chips');
INSERT INTO test_data VALUES (2, 'beer');
INSERT INTO test_data VALUES (2, 'diapers');
INSERT INTO test_data VALUES (3, 'beer');
INSERT INTO test_data VALUES (3, 'diapers');
INSERT INTO test_data VALUES (4, 'beer');
INSERT INTO test_data VALUES (4, 'chips');
INSERT INTO test_data VALUES (5, 'beer');
INSERT INTO test_data VALUES (6, 'beer');
INSERT INTO test_data VALUES (6, 'diapers');
INSERT INTO test_data VALUES (6, 'chips');
INSERT INTO test_data VALUES (7, 'beer');
INSERT INTO test_data VALUES (7, 'diapers');
INSERT INTO test_data VALUES (1, 'bee');
INSERT INTO test_data VALUES (1, 'diaper');
INSERT INTO test_data VALUES (1, 'chip');
INSERT INTO test_data VALUES (2, 'bee');
INSERT INTO test_data VALUES (2, 'diaper');
INSERT INTO test_data VALUES (3, 'bee');
INSERT INTO test_data VALUES (3, 'diaper');
INSERT INTO test_data VALUES (4, 'bee');
INSERT INTO test_data VALUES (4, 'chip');
INSERT INTO test_data VALUES (5, 'bee');
INSERT INTO test_data VALUES (6, 'bee');
INSERT INTO test_data VALUES (6, 'diaper');
INSERT INTO test_data VALUES (6, 'chip');
INSERT INTO test_data VALUES (7, 'bee');
INSERT INTO test_data VALUES (7, 'diaper');
INSERT INTO test_data VALUES (1, 'be');
INSERT INTO test_data VALUES (1, 'diape');
INSERT INTO test_data VALUES (1, 'chi');
INSERT INTO test_data VALUES (2, 'be');
INSERT INTO test_data VALUES (2, 'diape');
INSERT INTO test_data VALUES (3, 'be');
INSERT INTO test_data VALUES (3, 'diape');
INSERT INTO test_data VALUES (4, 'be');
INSERT INTO test_data VALUES (4, 'chi');
INSERT INTO test_data VALUES (5, 'be');
INSERT INTO test_data VALUES (6, 'be');
INSERT INTO test_data VALUES (6, 'diape');
INSERT INTO test_data VALUES (6, 'chi');
INSERT INTO test_data VALUES (7, 'be');
INSERT INTO test_data VALUES (7, 'diape');
INSERT INTO test_data VALUES (1, 'bez');
INSERT INTO test_data VALUES (1, 'diapez');
INSERT INTO test_data VALUES (1, 'chiz');
INSERT INTO test_data VALUES (2, 'bez');
INSERT INTO test_data VALUES (2, 'diapez');
INSERT INTO test_data VALUES (3, 'bez');
INSERT INTO test_data VALUES (3, 'diapez');
INSERT INTO test_data VALUES (4, 'bez');
INSERT INTO test_data VALUES (4, 'chiz');
INSERT INTO test_data VALUES (5, 'bez');
INSERT INTO test_data VALUES (6, 'bez');
INSERT INTO test_data VALUES (6, 'diapez');
INSERT INTO test_data VALUES (6, 'chiz');
INSERT INTO test_data VALUES (7, 'bez');
INSERT INTO test_data VALUES (7, 'diapez');
SELECT * FROM madlib.assoc_rules( .25, -- Support
.5, -- Confidence
'trans_id', -- Transaction id col
'product', -- Product col
'test_data', -- Input data
NULL, -- Output schema
FALSE, -- Verbose output
NULL, -- Max itemset size (default 10)
NULL, -- Max LHS
1 -- Max RHS
);
select max(array_length(array_cat(pre, post),1)) from assoc_rules;
select max(array_length(pre,1)) from assoc_rules;
select avg(array_length(pre,1)) from assoc_rules;
select count(*) from assoc_rules where array_length(pre,1) = 1;
select avg(array_length(array_cat(pre, post),1)) from assoc_rules;
select max(array_length(post,1)) from assoc_rules;
select avg(array_length(post,1)) from assoc_rules;
select count(*) from assoc_rules where array_length(post,1) = 1;
{code}
> Add option to set number of posterior in association rules
> ----------------------------------------------------------
>
> Key: MADLIB-1327
> URL: https://issues.apache.org/jira/browse/MADLIB-1327
> Project: Apache MADlib
> Issue Type: Improvement
> Components: Module: Association Rules
> Reporter: Frank McQuillan
> Assignee: Orhan Kislal
> Priority: Major
> Fix For: v1.17
>
> Attachments: Association_rules_informal_scale_test_-_Google_Sheets.pdf
>
>
> Goal is to speed up rule generation. The arules package in R limits posterior
> to 1.
> In MADlib the goal of the feature is to allow users to specify number of
> posterior items to 1 or 2 or ... with the goal of reducing run-time.
> Does it make sense to specify number of anterior items too?
--
This message was sent by Atlassian JIRA
(v7.6.14#76016)