Hi Patcharee, I wasn't able to reproduce either issue on Pig 0.14.0.
1:
--
grunt dump join_height;
(1,1,2009,0,559,447,1,-4.964739,1,1,2009,0,559,447,1,109.71929)
grunt describe join_height;
join_height: {r_four_dim1::date: int,r_four_dim1::month:
int,r_four_dim1::year: int,r_four_dim1::hh: int,r_four_dim1::x:
int,r_four_dim1::y: int,r_four_dim1::z: int,r_four_dim1::u:
float,result_height::date: int,result_height::month:
int,result_height::year: int,result_height::hh: int,result_height::x:
int,result_height::y: int,result_height::z: int,result_height::height:
float}
2:
--
grunt dump join_height;
(1,1,2009,0,559,447,1,-4.964739,1,1,2009,0,559,447,1,109.71929)
grunt describe join_height;
join_height: {r_four_dim1::date: int,r_four_dim1::month:
int,r_four_dim1::year: int,r_four_dim1::hh: int,r_four_dim1::x:
int,r_four_dim1::y: int,r_four_dim1::z: int,r_four_dim1::u:
float,result_height::date: int,result_height::month:
int,result_height::year: int,result_height::hh: int,result_height::x:
int,result_height::y: int,result_height::z: int,result_height::height:
float}
On Thu, Jan 8, 2015 at 4:19 AM, Patcharee Thongtra patch...@hotmail.com
wrote:
Hi,
I am new to pig. I am using pig version 0.12. I found an unexpected
behaviour from left join on multiple columns as listed below
--
...
...
dump r_four_dim1;
describe r_four_dim1;
dump result_height;
describe result_height;
join_height = join r_four_dim1 by (date, month, year, hh, x, y, z) LEFT
OUTER, result_height by (date, month, year, hh, x, y, z);
dump join_height;
describe join_height;
--
Result
--
(1,1,2009,0,559,447,1,-4.964739)
r_four_dim1: {date: int,month: int,year: int,hh: int,x: int,y: int,z:
int,u: float}
(1,1,2009,0,559,447,1,109.71929)
result_height: {date: int,month: int,year: int,hh: int,x: int,y: int,z:
int,height: float}
(1,1,2009,0,559,447,1,-4.964739)
join_height: {r_four_dim1::date: int,r_four_dim1::month:
int,r_four_dim1::year: int,r_four_dim1::hh: int,r_four_dim1::x:
int,r_four_dim1::y: int,r_four_dim1::z: int,r_four_dim1::u:
float,result_height::date: int,result_height::month:
int,result_height::year: int,result_height::hh: int,result_height::x:
int,result_height::y: int,result_height::z: int,result_height::height:
float}
--
Left Join did not work as expected. In addition when I tried to join
only year (year: int) as below
join_height = join r_four_dim1 by year LEFT OUTER, result_height by year;
dump join_height;
describe join_height;
I got the ClassCastException
ERROR 2999: Unexpected internal error. java.lang.String cannot be cast
to java.lang.Integer
java.lang.ClassCastException: java.lang.String cannot be cast to
java.lang.Integer
at
org.apache.pig.backend.hadoop.HDataType.getWritableComparableTypes(HDataType.java:115)
at
org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.PigGenericMapReduce$Map.collect(PigGenericMapReduce.java:111)
at
org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.PigGenericMapBase.runPipeline(PigGenericMapBase.java:284)
at
org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.PigGenericMapBase.map(PigGenericMapBase.java:277)
at
org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.PigGenericMapBase.map(PigGenericMapBase.java:64)
at org.apache.hadoop.mapreduce.Mapper.run(Mapper.java:145)
at org.apache.hadoop.mapred.MapTask.runNewMapper(MapTask.java:764)
at org.apache.hadoop.mapred.MapTask.run(MapTask.java:340)
at org.apache.hadoop.mapred.YarnChild$2.run(YarnChild.java:168)
at java.security.AccessController.doPrivileged(Native Method)
at javax.security.auth.Subject.doAs(Subject.java:415)
at
org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1594)
Any suggestions?
BR,
Patcharee