[
https://issues.apache.org/jira/browse/DATAFU-39?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=13982417#comment-13982417
]
Matthew Hayes commented on DATAFU-39:
-------------------------------------
Now that you've fixed BagGroup, you can implement this without the need for a
BagSum UDF. Pig allows you to use SUM within a nested FOREACH.
This test passes:
{code}
/**
define BagSum datafu.pig.bags.BagSum();
define BagGroup datafu.pig.bags.BagGroup();
data = LOAD 'input' USING PigStorage(',') AS (id:int, key:chararray, val:int);
describe data;
data2 = GROUP data BY id;
data3 = FOREACH data2 GENERATE group as id, BagGroup(data,data.key) as
grouped;
describe data3;
data4 = FOREACH data3 {
summed = FOREACH grouped GENERATE group as key, SUM($1.val) as total;
ordered = ORDER summed BY key;
GENERATE id, ordered;
}
describe data4;
STORE data4 INTO 'output';
*/
@Multiline
private String bagSumTest;
@Test
public void bagSumTest() throws Exception
{
PigTest test = createPigTestFromString(bagSumTest);
writeLinesToFile("input", "1,A,1","1,B,2","2,A,3","3,A,4","1,C,5","1,C,6",
"3,A,7","2,B,8","1,A,9","2,A,10");
test.runScript();
assertOutput(test, "data4",
"(1,{(A,10),(B,2),(C,11)})",
"(2,{(A,13),(B,8)})",
"(3,{(A,11)})");
}
{code}
> RFE: BagSum
> -----------
>
> Key: DATAFU-39
> URL: https://issues.apache.org/jira/browse/DATAFU-39
> Project: DataFu
> Issue Type: New Feature
> Reporter: Sam Steingold
>
> I need a new function {{BagSum}} which would help me solve the problem
> described in
> [http://stackoverflow.com/questions/22945236/how-do-i-accumulate-vectors-into-a-map].
> Test case:
> {code}
> /**
>
> define BagSum datafu.pig.bags.BagSum();
>
> data = LOAD 'input' AS (id:int, key:chararray, val:int);
> describe data;
>
> data2 = FOREACH (GROUP data BY id) GENERATE group as id,
> BagSum(data.(key,val),data.key) as keys;
> describe data2;
>
> STORE data2 INTO 'output';
> */
> @Multiline
> private String bagSumTest;
>
> @Test
> public void bagSumTest() throws Exception
> {
> PigTest test = createPigTestFromString(bagSumTest);
> writeLinesToFile("input",
> "(1,A,1)","(1,B,2)","(2,A,3)","(3,A,4)","(1,C,5)","(1,C,6)",
> "(3,A,7)","(2,B,8)","(1,A,9)","(2,A,10)");
> test.runScript();
> assertOutput(test, "data2", "(1,{(A,10),(B,2),(C,11)})",
> "(2,{(A,13),(B,8)})","(3,{(A,11)})");
> }
> {code}
> Thanks.
> (alternatively, please tell me how to implement this using existing features)
--
This message was sent by Atlassian JIRA
(v6.2#6252)