[ https://issues.apache.org/jira/browse/SPARK-50591?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ]
Chen Xia updated SPARK-50591: ----------------------------- Description: The task was run successfully using spark-sql -f sqlfile, but the spark-web ui showed some jobs failed with "Stage cancelled because SparkContext was shut down" Is this as expected? it should exit only after all stages are completed? {code:java} spark-sql --master local[2] --conf "spark.driver.extraJavaOptions=-Dlog4j.configuration=debug.properties" -f /home/hadoop/spark/saprk3/query95.sql {code} query95.sql {code:java} use autotest_hive; SELECT count(distinct ws1.ws_order_number) as order_count, sum(ws1.ws_ext_ship_cost) as total_shipping_cost, sum(ws1.ws_net_profit) as total_net_profit FROM web_sales ws1 JOIN customer_address ca ON (ws1.ws_ship_addr_sk = ca.ca_address_sk) JOIN web_site s ON (ws1.ws_web_site_sk = s.web_site_sk) JOIN date_dim d ON (ws1.ws_ship_date_sk = d.d_date_sk) LEFT SEMI JOIN (SELECT ws2.ws_order_number as ws_order_number FROM web_sales ws2 JOIN web_sales ws3 ON (ws2.ws_order_number = ws3.ws_order_number) WHERE ws2.ws_warehouse_sk <> ws3.ws_warehouse_sk) ws_wh1 ON (ws1.ws_order_number = ws_wh1.ws_order_number) LEFT SEMI JOIN (SELECT wr_order_number FROM web_returns wr JOIN (SELECT ws4.ws_order_number as ws_order_number FROM web_sales ws4 JOIN web_sales ws5 ON (ws4.ws_order_number = ws5.ws_order_number) WHERE ws4.ws_warehouse_sk <> ws5.ws_warehouse_sk) ws_wh2 ON (wr.wr_order_number = ws_wh2.ws_order_number)) tmp1 ON (ws1.ws_order_number = tmp1.wr_order_number) WHERE d.d_date between '2002-05-01' and '2002-06-30' and ca.ca_state = 'GA' and s.web_company_name = 'pri'; {code} some log info {code:java} 24/12/16 20:14:52 DEBUG SparkSQLCLIDriver: processCmd: SELECT count(distinct ws1.ws_order_number) as order_count, sum(ws1.ws_ext_ship_cost) as total_shipping_cost, sum(ws1.ws_net_profit) as total_net_profit FROM web_sales ws1 JOIN customer_address ca ON (ws1.ws_ship_addr_sk = ca.ca_address_sk) JOIN web_site s ON (ws1.ws_web_site_sk = s.web_site_sk) JOIN date_dim d ON (ws1.ws_ship_date_sk = d.d_date_sk) LEFT SEMI JOIN (SELECT ws2.ws_order_number as ws_order_number FROM web_sales ws2 JOIN web_sales ws3 ON (ws2.ws_order_number = ws3.ws_order_number) WHERE ws2.ws_warehouse_sk <> ws3.ws_warehouse_sk) ws_wh1 ON (ws1.ws_order_number = ws_wh1.ws_order_number) LEFT SEMI JOIN (SELECT wr_order_number FROM web_returns wr JOIN (SELECT ws4.ws_order_number as ws_order_number FROM web_sales ws4 JOIN web_sales ws5 ON (ws4.ws_order_number = ws5.ws_order_number) WHERE ws4.ws_warehouse_sk <> ws5.ws_warehouse_sk) ws_wh2 ON (wr.wr_order_number = ws_wh2.ws_order_number)) tmp1 ON (ws1.ws_order_number = tmp1.wr_order_number) WHERE d.d_date between '2002-05-01' and '2002-06-30' and ca.ca_state = 'GA' and s.web_company_name = 'pri' with ret:0 24/12/16 20:14:52 DEBUG SparkSQLCLIDriver: ignoreErrors:false 24/12/16 20:14:52 DEBUG SparkSQLCLIDriver: cli.processFile res:0 24/12/16 20:14:52 DEBUG SparkSQLEnv: Shutting down Spark SQL Environment 24/12/16 20:14:52 INFO SparkContext: SparkContext is stopping with exitCode 0. ........ 24/12/16 20:14:52 INFO DAGScheduler: ShuffleMapStage 4 (processLine at CliDriver.java:336) failed in 80.209 s due to Stage cancelled because SparkContext was shut down 24/12/16 20:14:52 INFO MapOutputTrackerMasterEndpoint: MapOutputTrackerMasterEndpoint stopped! {code} {code:java} SparkSQLCLIDriver if (sessionState.fileName != null) { exit(cli.processFile(sessionState.fileName)) } cli.processFile execution returns, but some stages are not fully completed, and the exit action is executed, causing some stages to be canceled {code} was: The task was run successfully using spark-sql -f sqlfile, but the spark-web ui showed some jobs failed with "Stage cancelled because SparkContext was shut down" Why don't we need to wait for all jobs to be completed? {code:java} spark-sql --master local[2] --conf "spark.driver.extraJavaOptions=-Dlog4j.configuration=debug.properties" -f /home/hadoop/spark/saprk3/query95.sql {code} query95.sql {code:java} use autotest_hive; SELECT count(distinct ws1.ws_order_number) as order_count, sum(ws1.ws_ext_ship_cost) as total_shipping_cost, sum(ws1.ws_net_profit) as total_net_profit FROM web_sales ws1 JOIN customer_address ca ON (ws1.ws_ship_addr_sk = ca.ca_address_sk) JOIN web_site s ON (ws1.ws_web_site_sk = s.web_site_sk) JOIN date_dim d ON (ws1.ws_ship_date_sk = d.d_date_sk) LEFT SEMI JOIN (SELECT ws2.ws_order_number as ws_order_number FROM web_sales ws2 JOIN web_sales ws3 ON (ws2.ws_order_number = ws3.ws_order_number) WHERE ws2.ws_warehouse_sk <> ws3.ws_warehouse_sk) ws_wh1 ON (ws1.ws_order_number = ws_wh1.ws_order_number) LEFT SEMI JOIN (SELECT wr_order_number FROM web_returns wr JOIN (SELECT ws4.ws_order_number as ws_order_number FROM web_sales ws4 JOIN web_sales ws5 ON (ws4.ws_order_number = ws5.ws_order_number) WHERE ws4.ws_warehouse_sk <> ws5.ws_warehouse_sk) ws_wh2 ON (wr.wr_order_number = ws_wh2.ws_order_number)) tmp1 ON (ws1.ws_order_number = tmp1.wr_order_number) WHERE d.d_date between '2002-05-01' and '2002-06-30' and ca.ca_state = 'GA' and s.web_company_name = 'pri'; {code} some log info {code:java} 24/12/16 20:14:52 DEBUG SparkSQLCLIDriver: processCmd: SELECT count(distinct ws1.ws_order_number) as order_count, sum(ws1.ws_ext_ship_cost) as total_shipping_cost, sum(ws1.ws_net_profit) as total_net_profit FROM web_sales ws1 JOIN customer_address ca ON (ws1.ws_ship_addr_sk = ca.ca_address_sk) JOIN web_site s ON (ws1.ws_web_site_sk = s.web_site_sk) JOIN date_dim d ON (ws1.ws_ship_date_sk = d.d_date_sk) LEFT SEMI JOIN (SELECT ws2.ws_order_number as ws_order_number FROM web_sales ws2 JOIN web_sales ws3 ON (ws2.ws_order_number = ws3.ws_order_number) WHERE ws2.ws_warehouse_sk <> ws3.ws_warehouse_sk) ws_wh1 ON (ws1.ws_order_number = ws_wh1.ws_order_number) LEFT SEMI JOIN (SELECT wr_order_number FROM web_returns wr JOIN (SELECT ws4.ws_order_number as ws_order_number FROM web_sales ws4 JOIN web_sales ws5 ON (ws4.ws_order_number = ws5.ws_order_number) WHERE ws4.ws_warehouse_sk <> ws5.ws_warehouse_sk) ws_wh2 ON (wr.wr_order_number = ws_wh2.ws_order_number)) tmp1 ON (ws1.ws_order_number = tmp1.wr_order_number) WHERE d.d_date between '2002-05-01' and '2002-06-30' and ca.ca_state = 'GA' and s.web_company_name = 'pri' with ret:0 24/12/16 20:14:52 DEBUG SparkSQLCLIDriver: ignoreErrors:false 24/12/16 20:14:52 DEBUG SparkSQLCLIDriver: cli.processFile res:0 24/12/16 20:14:52 DEBUG SparkSQLEnv: Shutting down Spark SQL Environment 24/12/16 20:14:52 INFO SparkContext: SparkContext is stopping with exitCode 0. ........ 24/12/16 20:14:52 INFO DAGScheduler: ShuffleMapStage 4 (processLine at CliDriver.java:336) failed in 80.209 s due to Stage cancelled because SparkContext was shut down 24/12/16 20:14:52 INFO MapOutputTrackerMasterEndpoint: MapOutputTrackerMasterEndpoint stopped! {code} {code:java} SparkSQLCLIDriver if (sessionState.fileName != null) { exit(cli.processFile(sessionState.fileName)) } cli.processFile execution returns, but some stages are not fully completed, and the exit action is executed, causing some stages to be canceled {code} > Using spark-sql -f sqlfile, some jobs failed with "Stage cancelled because > SparkContext was shut down" > ------------------------------------------------------------------------------------------------------- > > Key: SPARK-50591 > URL: https://issues.apache.org/jira/browse/SPARK-50591 > Project: Spark > Issue Type: Question > Components: SQL > Affects Versions: 3.4.4 > Reporter: Chen Xia > Priority: Major > Attachments: spark web ui 02_17343514806851.png, spark web ui01.png > > > The task was run successfully using spark-sql -f sqlfile, but the spark-web > ui showed some jobs failed with "Stage cancelled because SparkContext was > shut down" > Is this as expected? it should exit only after all stages are completed? > {code:java} > spark-sql --master local[2] --conf > "spark.driver.extraJavaOptions=-Dlog4j.configuration=debug.properties" -f > /home/hadoop/spark/saprk3/query95.sql > {code} > query95.sql > {code:java} > use autotest_hive; > SELECT count(distinct ws1.ws_order_number) as order_count, > sum(ws1.ws_ext_ship_cost) as total_shipping_cost, > sum(ws1.ws_net_profit) as total_net_profit > FROM web_sales ws1 > JOIN customer_address ca ON (ws1.ws_ship_addr_sk = ca.ca_address_sk) > JOIN web_site s ON (ws1.ws_web_site_sk = s.web_site_sk) > JOIN date_dim d ON (ws1.ws_ship_date_sk = d.d_date_sk) > LEFT SEMI JOIN (SELECT ws2.ws_order_number as ws_order_number > FROM web_sales ws2 JOIN web_sales ws3 > ON (ws2.ws_order_number = ws3.ws_order_number) > WHERE ws2.ws_warehouse_sk <> ws3.ws_warehouse_sk) ws_wh1 > ON (ws1.ws_order_number = ws_wh1.ws_order_number) > LEFT SEMI JOIN (SELECT wr_order_number > FROM web_returns wr > JOIN (SELECT ws4.ws_order_number as ws_order_number > FROM web_sales ws4 JOIN web_sales ws5 > ON (ws4.ws_order_number = ws5.ws_order_number) > WHERE ws4.ws_warehouse_sk <> ws5.ws_warehouse_sk) ws_wh2 > ON (wr.wr_order_number = ws_wh2.ws_order_number)) tmp1 > ON (ws1.ws_order_number = tmp1.wr_order_number) > WHERE d.d_date between '2002-05-01' and '2002-06-30' and ca.ca_state = 'GA' > and s.web_company_name = 'pri'; {code} > some log info > {code:java} > 24/12/16 20:14:52 DEBUG SparkSQLCLIDriver: processCmd: > SELECT count(distinct ws1.ws_order_number) as order_count, > sum(ws1.ws_ext_ship_cost) as total_shipping_cost, > sum(ws1.ws_net_profit) as total_net_profit > FROM web_sales ws1 > JOIN customer_address ca ON (ws1.ws_ship_addr_sk = ca.ca_address_sk) > JOIN web_site s ON (ws1.ws_web_site_sk = s.web_site_sk) > JOIN date_dim d ON (ws1.ws_ship_date_sk = d.d_date_sk) > LEFT SEMI JOIN (SELECT ws2.ws_order_number as ws_order_number > FROM web_sales ws2 JOIN web_sales ws3 > ON (ws2.ws_order_number = ws3.ws_order_number) > WHERE ws2.ws_warehouse_sk <> ws3.ws_warehouse_sk) ws_wh1 > ON (ws1.ws_order_number = ws_wh1.ws_order_number) > LEFT SEMI JOIN (SELECT wr_order_number > FROM web_returns wr > JOIN (SELECT ws4.ws_order_number as ws_order_number > FROM web_sales ws4 JOIN web_sales ws5 > ON (ws4.ws_order_number = ws5.ws_order_number) > WHERE ws4.ws_warehouse_sk <> ws5.ws_warehouse_sk) ws_wh2 > ON (wr.wr_order_number = ws_wh2.ws_order_number)) tmp1 > ON (ws1.ws_order_number = tmp1.wr_order_number) > WHERE d.d_date between '2002-05-01' and '2002-06-30' and ca.ca_state = 'GA' > and s.web_company_name = 'pri' with ret:0 > 24/12/16 20:14:52 DEBUG SparkSQLCLIDriver: ignoreErrors:false > 24/12/16 20:14:52 DEBUG SparkSQLCLIDriver: cli.processFile res:0 > 24/12/16 20:14:52 DEBUG SparkSQLEnv: Shutting down Spark SQL Environment > 24/12/16 20:14:52 INFO SparkContext: SparkContext is stopping with exitCode 0. > ........ > 24/12/16 20:14:52 INFO DAGScheduler: ShuffleMapStage 4 (processLine at > CliDriver.java:336) failed in 80.209 s due to Stage cancelled because > SparkContext was shut down > 24/12/16 20:14:52 INFO MapOutputTrackerMasterEndpoint: > MapOutputTrackerMasterEndpoint stopped! > {code} > {code:java} > SparkSQLCLIDriver > if (sessionState.fileName != null) { > exit(cli.processFile(sessionState.fileName)) > } > cli.processFile execution returns, but some stages are not fully completed, > and the exit action is executed, causing some stages to be canceled > {code} -- This message was sent by Atlassian Jira (v8.20.10#820010) --------------------------------------------------------------------- To unsubscribe, e-mail: issues-unsubscr...@spark.apache.org For additional commands, e-mail: issues-h...@spark.apache.org