Sam created DATAFU-38:
-------------------------

             Summary: BagGroup merges rows
                 Key: DATAFU-38
                 URL: https://issues.apache.org/jira/browse/DATAFU-38
             Project: DataFu
          Issue Type: Bug
            Reporter: Sam


load 
{code}
1,a,A,1
1,b,A,2
1,a,B,3
2,c,C,4
2,b,B,5
2,b,C,6
{code}
using {{tmp_datafu = load 'test' using PigStorage(',') as (id:chararray, 
domain:chararray, keyword:chararray, weight:int);}}
and do
{code}
tmp_roll = foreach (group tmp_datafu by id) generate
  group as id,
  CountEach(tmp_datafu.domain) as domains,
  BagGroup(tmp_datafu.(keyword,weight),tmp_datafu.keyword) as keywords;
{code}
the result is
{code}
(1,{(b,1),(a,2)},{(B,{(B,3)}),(A,{(A,1),(A,2)})})
(2,{(c,1),(b,2)},{(B,{(B,3),(B,5)}),(A,{(A,1),(A,2)}),(C,{(C,4),(C,6)})})
{code}
instead of
{code}
(1,{(b,1),(a,2)},{(B,{(B,3)}),(A,{(A,1),(A,2)})})
(2,{(c,1),(b,2)},{(B,{(B,5)}),(C,{(C,4),(C,6)})})
{code}
see also
http://stackoverflow.com/questions/22945236/how-do-i-accumulate-vectors-into-a-map



--
This message was sent by Atlassian JIRA
(v6.2#6252)

Reply via email to