[ https://issues.apache.org/jira/browse/PIG-5444?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=17785673#comment-17785673 ]
Koji Noguchi edited comment on PIG-5444 at 11/13/23 9:44 PM: ------------------------------------------------------------- Issue seems to be happening inside MultiQueryOptimizerTez.java. Before the change {noformat} Tez vertex scope-132 # Plan on vertex POValueOutputTez - scope-133 -> [scope-134, scope-140] | |---A: New For Each(false,false)[bag] - scope-95 | | | Cast[int] - scope-90 | | | |---Project[bytearray][0] - scope-89 | | | Cast[int] - scope-93 | | | |---Project[bytearray][1] - scope-92 | |---A: Load(hdfs://localhost:39746/user/gtrain/testFrJoinInput.txt:org.apache.pig.builtin.PigStorage) - scope-88 Tez vertex scope-136 # Plan on vertex B: Split - scope-148 | | | Local Rearrange[tuple]{int}(false) - scope-115 -> scope-134 | | | | | Project[int][0] - scope-111 | | | Local Rearrange[tuple]{int}(false) - scope-127 -> scope-140 | | | | | Project[int][1] - scope-123 | |---B: New For Each(false,false)[bag] - scope-106 | | | Cast[int] - scope-101 | | | |---Project[bytearray][0] - scope-100 | | | Cast[int] - scope-104 | | | |---Project[bytearray][1] - scope-103 | |---B: Load(hdfs://localhost:39746/user/gtrain/testFrJoinInput2.txt:org.apache.pig.builtin.PigStorage) - scope-99 Tez vertex scope-134 # Plan on vertex POValueOutputTez - scope-146 -> [scope-144] | |---C: FRJoin[tuple] - scope-112 <- scope-136 | | | Project[int][0] - scope-110 | | | Project[int][0] - scope-111 | |---POValueInputTez - scope-135 <- scope-132 Tez vertex scope-140 # Plan on vertex POValueOutputTez - scope-147 -> [scope-144] | |---D: FRJoin[tuple] - scope-124 <- scope-136 | | | Project[int][1] - scope-122 | | | Project[int][1] - scope-123 | |---POValueInputTez - scope-141 <- scope-132 Tez vertex scope-144 # Plan on vertex E: Store(hdfs://localhost:39746/tmp/temp906575730/tmp1776475591:org.apache.pig.impl.io.InterStorage) - scope-131 | |---POShuffledValueInputTez - scope-145 <- [scope-134, scope-140] {noformat} After MultiQueryOptimizerTez::visitTezOp {code} // If all other conditions were satisfied, but it had a successor union // with unsupported storefunc keep it in the tentative list. {code} and decides to merge scope-134 and scope-140, {noformat} Tez vertex scope-136 # Plan on vertex B: Split - scope-148 | | | Local Rearrange[tuple]{int}(false) - scope-115 -> scope-132 | | | | | Project[int][0] - scope-111 | | | Local Rearrange[tuple]{int}(false) - scope-127 -> scope-132 | | | | | Project[int][1] - scope-123 | |---B: New For Each(false,false)[bag] - scope-106 | | | Cast[int] - scope-101 | | | |---Project[bytearray][0] - scope-100 | | | Cast[int] - scope-104 | | | |---Project[bytearray][1] - scope-103 | |---B: Load(hdfs://localhost:39746/user/gtrain/testFrJoinInput2.txt:org.apache.pig.builtin.PigStorage) - scope-99 Tez vertex scope-132 # Plan on vertex POValueOutputTez - scope-133 -> [] | |---A: New For Each(false,false)[bag] - scope-95 | | | Cast[int] - scope-90 | | | |---Project[bytearray][0] - scope-89 | | | Cast[int] - scope-93 | | | |---Project[bytearray][1] - scope-92 | |---A: Load(hdfs://localhost:39746/user/gtrain/testFrJoinInput.txt:org.apache.pig.builtin.PigStorage) - scope-88 Tez vertex scope-144 # Plan on vertex E: Store(hdfs://localhost:39746/tmp/temp906575730/tmp1776475591:org.apache.pig.impl.io.InterStorage) - scope-131 | |---POShuffledValueInputTez - scope-145 <- [scope-132] {noformat} This later fails with {panel} Caused by: java.lang.IllegalArgumentException: Edge [scope-136 : org.apache.pig.backend.hadoop.executionengine.tez.runtime.PigProcessor] -> [scope-132 : org.apache.pig.backend.hadoop.executionengine.tez.runtime.PigProcessor] (\{ BROADCAST : org.apache.tez.runtime.library.input.UnorderedKVInput >> PERSISTED >> org.apache.tez.runtime.library.output.UnorderedKVOutput >> NullEdgeManager }) already defined! {panel} was (Author: knoguchi): Issue seems to be happening inside MultiQueryOptimizerTez.java. Before the change {noformat} Tez vertex scope-132 # Plan on vertex POValueOutputTez - scope-133 -> [scope-134, scope-140] | |---A: New For Each(false,false)[bag] - scope-95 | | | Cast[int] - scope-90 | | | |---Project[bytearray][0] - scope-89 | | | Cast[int] - scope-93 | | | |---Project[bytearray][1] - scope-92 | |---A: Load(hdfs://localhost:39746/user/gtrain/testFrJoinInput.txt:org.apache.pig.builtin.PigStorage) - scope-88 Tez vertex scope-136 # Plan on vertex B: Split - scope-148 | | | Local Rearrange[tuple]{int}(false) - scope-115 -> scope-134 | | | | | Project[int][0] - scope-111 | | | Local Rearrange[tuple]{int}(false) - scope-127 -> scope-140 | | | | | Project[int][1] - scope-123 | |---B: New For Each(false,false)[bag] - scope-106 | | | Cast[int] - scope-101 | | | |---Project[bytearray][0] - scope-100 | | | Cast[int] - scope-104 | | | |---Project[bytearray][1] - scope-103 | |---B: Load(hdfs://localhost:39746/user/gtrain/testFrJoinInput2.txt:org.apache.pig.builtin.PigStorage) - scope-99 Tez vertex scope-134 # Plan on vertex POValueOutputTez - scope-146 -> [scope-144] | |---C: FRJoin[tuple] - scope-112 <- scope-136 | | | Project[int][0] - scope-110 | | | Project[int][0] - scope-111 | |---POValueInputTez - scope-135 <- scope-132 Tez vertex scope-140 # Plan on vertex POValueOutputTez - scope-147 -> [scope-144] | |---D: FRJoin[tuple] - scope-124 <- scope-136 | | | Project[int][1] - scope-122 | | | Project[int][1] - scope-123 | |---POValueInputTez - scope-141 <- scope-132 Tez vertex scope-144 # Plan on vertex E: Store(hdfs://localhost:39746/tmp/temp906575730/tmp1776475591:org.apache.pig.impl.io.InterStorage) - scope-131 | |---POShuffledValueInputTez - scope-145 <- [scope-134, scope-140] {noformat} After MultiQueryOptimizerTez::visitTezOp {code} // If all other conditions were satisfied, but it had a successor union // with unsupported storefunc keep it in the tentative list. {code} and decides to merge scope-134 and scope-140, {noformat} Tez vertex scope-136 # Plan on vertex B: Split - scope-148 | | | Local Rearrange[tuple]{int}(false) - scope-115 -> scope-132 | | | | | Project[int][0] - scope-111 | | | Local Rearrange[tuple]{int}(false) - scope-127 -> scope-132 | | | | | Project[int][1] - scope-123 | |---B: New For Each(false,false)[bag] - scope-106 | | | Cast[int] - scope-101 | | | |---Project[bytearray][0] - scope-100 | | | Cast[int] - scope-104 | | | |---Project[bytearray][1] - scope-103 | |---B: Load(hdfs://localhost:39746/user/gtrain/testFrJoinInput2.txt:org.apache.pig.builtin.PigStorage) - scope-99 Tez vertex scope-132 # Plan on vertex POValueOutputTez - scope-133 -> [] | |---A: New For Each(false,false)[bag] - scope-95 | | | Cast[int] - scope-90 | | | |---Project[bytearray][0] - scope-89 | | | Cast[int] - scope-93 | | | |---Project[bytearray][1] - scope-92 | |---A: Load(hdfs://localhost:39746/user/gtrain/testFrJoinInput.txt:org.apache.pig.builtin.PigStorage) - scope-88 Tez vertex scope-144 # Plan on vertex E: Store(hdfs://localhost:39746/tmp/temp906575730/tmp1776475591:org.apache.pig.impl.io.InterStorage) - scope-131 | |---POShuffledValueInputTez - scope-145 <- [scope-132] {noformat} This later fails with {noformat} Caused by: java.lang.IllegalArgumentException: Edge [scope-136 : org.apache.pig.backend.hadoop.executionengine.tez.runtime.PigProcessor] -> [scope-132 : org.apache.pig.backend.hadoop.executionengine.tez.runtime.PigProcessor] ({ BROADCAST : org.apache.tez.runtime.library.input.UnorderedKVInput >> PERSISTED >> org.apache.tez.runtime.library.output.UnorderedKVOutput >> NullEdgeManager }) already defined! {noformat} > TestFRJoin.testFRJoinOut7 and testFRJoinOut8 failing with Edge already > defined error on Tez > ------------------------------------------------------------------------------------------- > > Key: PIG-5444 > URL: https://issues.apache.org/jira/browse/PIG-5444 > Project: Pig > Issue Type: Bug > Components: tez > Reporter: Koji Noguchi > Assignee: Koji Noguchi > Priority: Major > > With Tez, when testing individual tests (TestFRJoin.testFRJoinOut7 and > testFRJoinOut8) separately, they pass the tests. But when entire TestFRJoin > is run, these two tests on Tez are failing with > {noformat} > Unable to open iterator for alias E > org.apache.pig.impl.logicalLayer.FrontendException: ERROR 1066: Unable to > open iterator for alias E > at org.apache.pig.PigServer.openIterator(PigServer.java:1024) > at org.apache.pig.test.TestFRJoin.testFRJoinOut7(TestFRJoin.java:409) > Caused by: org.apache.pig.PigException: ERROR 1002: Unable to store alias E > at org.apache.pig.PigServer.storeEx(PigServer.java:1127) > at org.apache.pig.PigServer.store(PigServer.java:1086) > at org.apache.pig.PigServer.openIterator(PigServer.java:999) > Caused by: > org.apache.pig.backend.hadoop.executionengine.JobCreationException: ERROR > 2017: Internal error creating job configuration. > at > org.apache.pig.backend.hadoop.executionengine.tez.TezJobCompiler.getJob(TezJobCompiler.java:153) > at > org.apache.pig.backend.hadoop.executionengine.tez.TezJobCompiler.compile(TezJobCompiler.java:81) > at > org.apache.pig.backend.hadoop.executionengine.tez.TezLauncher.launchPig(TezLauncher.java:200) > at > org.apache.pig.backend.hadoop.executionengine.HExecutionEngine.launchPig(HExecutionEngine.java:290) > at org.apache.pig.PigServer.launchPlan(PigServer.java:1479) > at org.apache.pig.PigServer.executeCompiledLogicalPlan(PigServer.java:1464) > at org.apache.pig.PigServer.storeEx(PigServer.java:1123) > Caused by: java.lang.IllegalArgumentException: Edge [scope-632 : > org.apache.pig.backend.hadoop.executionengine.tez.runtime.PigProcessor] -> > [scope-628 : > org.apache.pig.backend.hadoop.executionengine.tez.runtime.PigProcessor] ({ > BROADCAST : org.apache.tez.runtime.library.input.UnorderedKVInput >> > PERSISTED >> org.apache.tez.runtime.library.output.UnorderedKVOutput >> > NullEdgeManager }) already defined! > at org.apache.tez.dag.api.DAG.addEdge(DAG.java:296) > at > org.apache.pig.backend.hadoop.executionengine.tez.TezDagBuilder.visitTezOp(TezDagBuilder.java:410) > at > org.apache.pig.backend.hadoop.executionengine.tez.plan.TezOperator.visit(TezOperator.java:265) > at > org.apache.pig.backend.hadoop.executionengine.tez.plan.TezOperator.visit(TezOperator.java:56) > at > org.apache.pig.impl.plan.DependencyOrderWalker.walk(DependencyOrderWalker.java:87) > at org.apache.pig.impl.plan.PlanVisitor.visit(PlanVisitor.java:46) > at > org.apache.pig.backend.hadoop.executionengine.tez.TezJobCompiler.buildDAG(TezJobCompiler.java:69) > at > org.apache.pig.backend.hadoop.executionengine.tez.TezJobCompiler.getJob(TezJobCompiler.java:120) > {noformat} -- This message was sent by Atlassian Jira (v8.20.10#820010)