[ 
https://issues.apache.org/jira/browse/PIG-5165?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=15965398#comment-15965398
 ] 

liyunzhang_intel commented on PIG-5165:
---------------------------------------

the predecessors of POSkewedJoin are valid in order after combine optimization. 
but before multiquery optimization, the predecessor of POSkewedJoin are 
inverted. see following
{code}
after combiner optimization:
scope-228->scope-230 scope-237 
scope-230->scope-243 
scope-237->scope-243 
scope-243->scope-245 scope-264 
scope-245
scope-249->scope-264 
scope-264
#--------------------------------------------------
# Spark Plan                                  
#--------------------------------------------------

Spark node scope-228
Store(hdfs://zly1.sh.intel.com:8020/tmp/temp691912264/tmp615375446:org.apache.pig.impl.io.InterStorage)
 - scope-229
|
|---a: New For Each(false,false,false)[bag] - scope-152
    |   |
    |   Project[bytearray][0] - scope-145
    |   |
    |   Project[bytearray][1] - scope-147
    |   |
    |   Cast[float] - scope-150
    |   |
    |   |---Project[bytearray][2] - scope-149
    |
    |---a: 
Load(hdfs://zly1.sh.intel.com:8020/user/root/studentnulltab10k:org.apache.pig.builtin.PigStorage)
 - scope-144--------

Spark node scope-230
Store(hdfs://zly1.sh.intel.com:8020/tmp/temp691912264/tmp-2031963854:org.apache.pig.impl.io.InterStorage)
 - scope-231
|
|---b: Filter[bag] - scope-156
    |   |
    |   Greater Than or Equal[boolean] - scope-160
    |   |
    |   |---Cast[double] - scope-158
    |   |   |
    |   |   |---Project[float][2] - scope-157
    |   |
    |   |---Constant(3.9) - scope-159
    |
    |---a: Filter[bag] - scope-154
        |   |
        |   Constant(true) - scope-155
        |
        
|---Load(hdfs://zly1.sh.intel.com:8020/tmp/temp691912264/tmp615375446:org.apache.pig.impl.io.InterStorage)
 - scope-152--------

Spark node scope-243
Store(hdfs://zly1.sh.intel.com:8020/tmp/temp691912264/tmp287701784:org.apache.pig.impl.io.InterStorage)
 - scope-244
|
|---a1: Union[bag] - scope-207
    |
    |---b3: Union[bag] - scope-180
    |   |
    |   |---b1: New For Each(false,false,false)[bag] - scope-170
    |   |   |   |
    |   |   |   Project[bytearray][0] - scope-164
    |   |   |   |
    |   |   |   Project[bytearray][1] - scope-166
    |   |   |   |
    |   |   |   Project[float][2] - scope-168
    |   |   |
    |   |   |---b: Filter[bag] - scope-162
    |   |       |   |
    |   |       |   Constant(true) - scope-163
    |   |       |
    |   |       
|---Load(hdfs://zly1.sh.intel.com:8020/tmp/temp691912264/tmp-2031963854:org.apache.pig.impl.io.InterStorage)
 - scope-156
    |   |
    |   |---b2: New For Each(false,false,false)[bag] - scope-179
    |       |   |
    |       |   Project[bytearray][0] - scope-173
    |       |   |
    |       |   Project[bytearray][1] - scope-175
    |       |   |
    |       |   Project[float][2] - scope-177
    |       |
    |       |---b: Filter[bag] - scope-171
    |           |   |
    |           |   Constant(true) - scope-172
    |           |
    |           
|---Load(hdfs://zly1.sh.intel.com:8020/tmp/temp691912264/tmp-2031963854:org.apache.pig.impl.io.InterStorage)
 - scope-233
    |
    |---c3: Union[bag] - scope-206
        |
        |---c1: New For Each(false,false,false)[bag] - scope-196
        |   |   |
        |   |   Project[bytearray][0] - scope-190
        |   |   |
        |   |   Project[bytearray][1] - scope-192
        |   |   |
        |   |   Project[float][2] - scope-194
        |   |
        |   |---c: Filter[bag] - scope-188
        |       |   |
        |       |   Constant(true) - scope-189
        |       |
        |       
|---Load(hdfs://zly1.sh.intel.com:8020/tmp/temp691912264/tmp-1315218881:org.apache.pig.impl.io.InterStorage)
 - scope-183
        |
        |---c2: New For Each(false,false,false)[bag] - scope-205
            |   |
            |   Project[bytearray][0] - scope-199
            |   |
            |   Project[bytearray][1] - scope-201
            |   |
            |   Project[float][2] - scope-203
            |
            |---c: Filter[bag] - scope-197
                |   |
                |   Constant(true) - scope-198
                |
                
|---Load(hdfs://zly1.sh.intel.com:8020/tmp/temp691912264/tmp-1315218881:org.apache.pig.impl.io.InterStorage)
 - scope-240--------

Spark node scope-245
a1: 
Store(hdfs://zly1.sh.intel.com:8020/user/root/MultiQuery_Union_7.out.1:org.apache.pig.builtin.PigStorage)
 - scope-211
|
|---a1: Filter[bag] - scope-209
    |   |
    |   Constant(true) - scope-210
    |
    
|---Load(hdfs://zly1.sh.intel.com:8020/tmp/temp691912264/tmp287701784:org.apache.pig.impl.io.InterStorage)
 - scope-207--------

Spark node scope-264
e: 
Store(hdfs://zly1.sh.intel.com:8020/user/root/MultiQuery_Union_7.out.2:org.apache.pig.builtin.PigStorage)
 - scope-227
|
|---e: SkewedJoin[tuple] - scope-226
    |   |
    |   Project[bytearray][0] - scope-224
    |   |
    |   Project[bytearray][0] - scope-225
    |
    |---a1: Filter[bag] - scope-212
    |   |   |
    |   |   Constant(true) - scope-213
    |   |
    |   
|---Load(hdfs://zly1.sh.intel.com:8020/tmp/temp691912264/tmp287701784:org.apache.pig.impl.io.InterStorage)
 - scope-246
    |
    |---d: New For Each(false,false,false,false)[bag] - scope-223
        |   |
        |   Project[bytearray][0] - scope-215
        |   |
        |   Project[bytearray][1] - scope-217
        |   |
        |   Project[bytearray][2] - scope-219
        |   |
        |   Project[bytearray][3] - scope-221
        |
        |---d: 
Load(hdfs://zly1.sh.intel.com:8020/user/root/voternulltab10k:org.apache.pig.builtin.PigStorage)
 - scope-214--------

Spark node scope-237
Store(hdfs://zly1.sh.intel.com:8020/tmp/temp691912264/tmp-1315218881:org.apache.pig.impl.io.InterStorage)
 - scope-238
|
|---c: Filter[bag] - scope-183
    |   |
    |   Less Than[boolean] - scope-186
    |   |
    |   |---Project[float][2] - scope-184
    |   |
    |   |---Constant(2.0) - scope-185
    |
    |---a: Filter[bag] - scope-181
        |   |
        |   Constant(true) - scope-182
        |
        
|---Load(hdfs://zly1.sh.intel.com:8020/tmp/temp691912264/tmp615375446:org.apache.pig.impl.io.InterStorage)
 - scope-236--------

Spark node scope-249
BroadcastSpark - scope-263
|
|---New For Each(false)[tuple] - scope-262
    |   |
    |   POUserFunc(org.apache.pig.impl.builtin.PartitionSkewedKeys)[tuple] - 
scope-261
    |   |
    |   |---Project[tuple][*] - scope-260
    |
    |---New For Each(false,false)[tuple] - scope-259
        |   |
        |   Constant(3) - scope-258
        |   |
        |   Project[bag][1] - scope-257
        |
        |---POSparkSort[tuple]() - scope-226
            |   |
            |   Project[bytearray][0] - scope-224
            |
            |---New For Each(false,true)[tuple] - scope-256
                |   |
                |   Project[bytearray][0] - scope-224
                |   |
                |   
POUserFunc(org.apache.pig.impl.builtin.GetMemNumRows)[tuple] - scope-254
                |   |
                |   |---Project[tuple][*] - scope-253
                |
                |---PoissonSampleSpark - scope-255
                    |
                    |---a1: Filter[bag] - scope-250
                        |   |
                        |   Constant(true) - scope-251
                        |
                        
|---Load(hdfs://zly1.sh.intel.com:8020/tmp/temp691912264/tmp287701784:org.apache.pig.impl.io.InterStorage)
 - scope-252--------
before multiquery optimization:
scope-228->scope-230 scope-237 
scope-230->scope-243 
scope-237->scope-243 
scope-243->scope-245 scope-264 
scope-245
scope-249->scope-264 
scope-264
#--------------------------------------------------
# Spark Plan                                  
#--------------------------------------------------

Spark node scope-228
Store(hdfs://zly1.sh.intel.com:8020/tmp/temp691912264/tmp615375446:org.apache.pig.impl.io.InterStorage)
 - scope-229
|
|---a: New For Each(false,false,false)[bag] - scope-152
    |   |
    |   Project[bytearray][0] - scope-145
    |   |
    |   Project[bytearray][1] - scope-147
    |   |
    |   Cast[float] - scope-150
    |   |
    |   |---Project[bytearray][2] - scope-149
    |
    |---a: 
Load(hdfs://zly1.sh.intel.com:8020/user/root/studentnulltab10k:org.apache.pig.builtin.PigStorage)
 - scope-144--------

Spark node scope-230
Store(hdfs://zly1.sh.intel.com:8020/tmp/temp691912264/tmp-2031963854:org.apache.pig.impl.io.InterStorage)
 - scope-231
|
|---b: Filter[bag] - scope-156
    |   |
    |   Greater Than or Equal[boolean] - scope-160
    |   |
    |   |---Cast[double] - scope-158
    |   |   |
    |   |   |---Project[float][2] - scope-157
    |   |
    |   |---Constant(3.9) - scope-159
    |
    
|---Load(hdfs://zly1.sh.intel.com:8020/tmp/temp691912264/tmp615375446:org.apache.pig.impl.io.InterStorage)
 - scope-152--------

Spark node scope-243
Store(hdfs://zly1.sh.intel.com:8020/tmp/temp691912264/tmp287701784:org.apache.pig.impl.io.InterStorage)
 - scope-244
|
|---a1: Union[bag] - scope-207
    |
    |---b3: Union[bag] - scope-180
    |   |
    |   |---b1: New For Each(false,false,false)[bag] - scope-170
    |   |   |   |
    |   |   |   Project[bytearray][0] - scope-164
    |   |   |   |
    |   |   |   Project[bytearray][1] - scope-166
    |   |   |   |
    |   |   |   Project[float][2] - scope-168
    |   |   |
    |   |   
|---Load(hdfs://zly1.sh.intel.com:8020/tmp/temp691912264/tmp-2031963854:org.apache.pig.impl.io.InterStorage)
 - scope-156
    |   |
    |   |---b2: New For Each(false,false,false)[bag] - scope-179
    |       |   |
    |       |   Project[bytearray][0] - scope-173
    |       |   |
    |       |   Project[bytearray][1] - scope-175
    |       |   |
    |       |   Project[float][2] - scope-177
    |       |
    |       
|---Load(hdfs://zly1.sh.intel.com:8020/tmp/temp691912264/tmp-2031963854:org.apache.pig.impl.io.InterStorage)
 - scope-233
    |
    |---c3: Union[bag] - scope-206
        |
        |---c1: New For Each(false,false,false)[bag] - scope-196
        |   |   |
        |   |   Project[bytearray][0] - scope-190
        |   |   |
        |   |   Project[bytearray][1] - scope-192
        |   |   |
        |   |   Project[float][2] - scope-194
        |   |
        |   
|---Load(hdfs://zly1.sh.intel.com:8020/tmp/temp691912264/tmp-1315218881:org.apache.pig.impl.io.InterStorage)
 - scope-183
        |
        |---c2: New For Each(false,false,false)[bag] - scope-205
            |   |
            |   Project[bytearray][0] - scope-199
            |   |
            |   Project[bytearray][1] - scope-201
            |   |
            |   Project[float][2] - scope-203
            |
            
|---Load(hdfs://zly1.sh.intel.com:8020/tmp/temp691912264/tmp-1315218881:org.apache.pig.impl.io.InterStorage)
 - scope-240--------

Spark node scope-245
a1: 
Store(hdfs://zly1.sh.intel.com:8020/user/root/MultiQuery_Union_7.out.1:org.apache.pig.builtin.PigStorage)
 - scope-211
|
|---Load(hdfs://zly1.sh.intel.com:8020/tmp/temp691912264/tmp287701784:org.apache.pig.impl.io.InterStorage)
 - scope-207--------

Spark node scope-264
e: 
Store(hdfs://zly1.sh.intel.com:8020/user/root/MultiQuery_Union_7.out.2:org.apache.pig.builtin.PigStorage)
 - scope-227
|
|---e: SkewedJoin[tuple] - scope-226
    |   |
    |   Project[bytearray][0] - scope-224
    |   |
    |   Project[bytearray][0] - scope-225
    |
    |---d: New For Each(false,false,false,false)[bag] - scope-223
    |   |   |
    |   |   Project[bytearray][0] - scope-215
    |   |   |
    |   |   Project[bytearray][1] - scope-217
    |   |   |
    |   |   Project[bytearray][2] - scope-219
    |   |   |
    |   |   Project[bytearray][3] - scope-221
    |   |
    |   |---d: 
Load(hdfs://zly1.sh.intel.com:8020/user/root/voternulltab10k:org.apache.pig.builtin.PigStorage)
 - scope-214
    |
    
|---Load(hdfs://zly1.sh.intel.com:8020/tmp/temp691912264/tmp287701784:org.apache.pig.impl.io.InterStorage)
 - scope-246--------

Spark node scope-237
Store(hdfs://zly1.sh.intel.com:8020/tmp/temp691912264/tmp-1315218881:org.apache.pig.impl.io.InterStorage)
 - scope-238
|
|---c: Filter[bag] - scope-183
    |   |
    |   Less Than[boolean] - scope-186
    |   |
    |   |---Project[float][2] - scope-184
    |   |
    |   |---Constant(2.0) - scope-185
    |
    
|---Load(hdfs://zly1.sh.intel.com:8020/tmp/temp691912264/tmp615375446:org.apache.pig.impl.io.InterStorage)
 - scope-236--------

Spark node scope-249
BroadcastSpark - scope-263
|
|---New For Each(false)[tuple] - scope-262
    |   |
    |   POUserFunc(org.apache.pig.impl.builtin.PartitionSkewedKeys)[tuple] - 
scope-261
    |   |
    |   |---Project[tuple][*] - scope-260
    |
    |---New For Each(false,false)[tuple] - scope-259
        |   |
        |   Constant(3) - scope-258
        |   |
        |   Project[bag][1] - scope-257
        |
        |---POSparkSort[tuple]() - scope-226
            |   |
            |   Project[bytearray][0] - scope-224
            |
            |---New For Each(false,true)[tuple] - scope-256
                |   |
                |   Project[bytearray][0] - scope-224
                |   |
                |   
POUserFunc(org.apache.pig.impl.builtin.GetMemNumRows)[tuple] - scope-254
                |   |
                |   |---Project[tuple][*] - scope-253
                |
                |---PoissonSampleSpark - scope-255
                    |
                    
|---Load(hdfs://zly1.sh.intel.com:8020/tmp/temp691912264/tmp287701784:org.apache.pig.impl.io.InterStorage)
 - scope-252--------

{code}

> MultiQuery_Union_7 is failing with spark exec type
> --------------------------------------------------
>
>                 Key: PIG-5165
>                 URL: https://issues.apache.org/jira/browse/PIG-5165
>             Project: Pig
>          Issue Type: Sub-task
>          Components: spark
>            Reporter: Nandor Kollar
>            Assignee: liyunzhang_intel
>             Fix For: spark-branch
>
>
> 1st output is fine, 2nd is different



--
This message was sent by Atlassian JIRA
(v6.3.15#6346)

Reply via email to