[ https://issues.apache.org/jira/browse/SPARK-37581?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=17684361#comment-17684361 ]
Ritika Maheshwari commented on SPARK-37581: ------------------------------------------- Just executed this on 3.3.0 and it takes 4.14 second. I am also including the plan. So does not seem like an issue in 3.3.0 > left join > (select dt,count(distinct kb_code) as h_kbs > from test.test_b > where dt = '20211126' > group by dt) t8 > on calendar.dt = t8.dt > > left join > (select dt,count(distinct kb_code) as i_kbs > from test.test_b > where dt = '20211126' > group by dt) t9 > on calendar.dt = t9.dt > > left join > (select dt,count(distinct kb_code) as j_kbs > from test.test_b > where dt = '20211126' > group by dt) t10 > on calendar.dt = t10.dt > > left join > (select dt,count(distinct kb_code) as k_kbs > from test.test_b > where dt = '20211126' > group by dt) t11 > on calendar.dt = t11.dt; Time taken: 0.609 seconds 23/02/05 16:10:47 WARN HiveMetaStore: Location: file:/Users/ritka/Documents/spark-3.3.0/spark-warehouse/test.db/test_c specified for non-external table:test_c {color:#FF0000}*Time taken: 4.14 seconds*{color} spark-sql> select * from test.test_c > ; 1 1 2 1 1 1 1 1 1 1 1 1 1 1 Time taken: 0.296 seconds, Fetched 1 row(s) spark-sql> {color:#FF0000}*The plan for the query is* {color} CommandResult Execute OptimizedCreateHiveTableAsSelectCommand [Database: test, TableName: test_c, InsertIntoHadoopFsRelationCommand] +- OptimizedCreateHiveTableAsSelectCommand [Database: test, TableName: test_c, InsertIntoHadoopFsRelationCommand] +- Project [day#11, week#12, weekday#13, a_kbs#0L, b_kbs#1L, c_kbs#2L, d_kbs#3L, e_kbs#4L, f_kbs#5L, g_kbs#6L, h_kbs#7L, i_kbs#8L, j_kbs#9L, k_kbs#10L] +- Join LeftOuter, (dt#14 = dt#366) :- Join LeftOuter, (dt#14 = dt#334) : :- Join LeftOuter, (dt#14 = dt#302) : : :- Join LeftOuter, (dt#14 = dt#270) : : : :- Join LeftOuter, (dt#14 = dt#238) : : : : :- Join LeftOuter, (dt#14 = dt#206) : : : : : :- Join LeftOuter, (dt#14 = dt#174) : : : : : : :- Join LeftOuter, (dt#14 = dt#142) : : : : : : : :- Join LeftOuter, (dt#14 = dt#110) : : : : : : : : :- Join LeftOuter, (dt#14 = dt#78) : : : : : : : : : :- Join LeftOuter, (dt#14 = dt#46) : : : : : : : : : : :- SubqueryAlias calendar : : : : : : : : : : : +- Project [day#11, week#12, weekday#13, dt#14] : : : : : : : : : : : +- Filter (dt#14 = 20211126) : : : : : : : : : : : +- SubqueryAlias spark_catalog.test.test_a : : : : : : : : : : : +- Relation test.test_a[day#11,week#12,weekday#13,dt#14] orc : : : : : : : : : : +- SubqueryAlias t1 : : : : : : : : : : +- Aggregate [dt#46], [dt#46, count(distinct kb_code#40) AS a_kbs#0L] : : : : : : : : : : +- Filter (dt#46 = 20211126) : : : : : : : : : : +- SubqueryAlias spark_catalog.test.test_b : : : : : : : : : : +- Relation test.test_b[session_id#15,device_id#16,brand#17,model#18,wx_version#19,os#20,net_work_type#21,app_id#22,app_name#23,col_z#24,page_url#25,page_title#26,olabel#27,otitle#28,source#29,send_dt#30,recv_dt#31,request_time#32,write_time#33,client_ip#34,col_a#35,dt_hour#36,product#37,channelfrom#38,... 8 more fields] orc : : : : : : : : : +- SubqueryAlias t2 : : : : : : : : : +- Aggregate [dt#78], [dt#78, count(distinct kb_code#72) AS b_kbs#1L] : : : : : : : : : +- Filter (dt#78 = 20211126) : : : : : : : : : +- SubqueryAlias spark_catalog.test.test_b : : : : : : : : : +- Relation test.test_b[session_id#47,device_id#48,brand#49,model#50,wx_version#51,os#52,net_work_type#53,app_id#54,app_name#55,col_z#56,page_url#57,page_title#58,olabel#59,otitle#60,source#61,send_dt#62,recv_dt#63,request_time#64,write_time#65,client_ip#66,col_a#67,dt_hour#68,product#69,channelfrom#70,... 8 more fields] orc : : : : : : : : +- SubqueryAlias t3 : : : : : : : : +- Aggregate [dt#110], [dt#110, count(distinct kb_code#104) AS c_kbs#2L] : : : : : : : : +- Filter (dt#110 = 20211126) : : : : : : : : +- SubqueryAlias spark_catalog.test.test_b : : : : : : : : +- Relation test.test_b[session_id#79,device_id#80,brand#81,model#82,wx_version#83,os#84,net_work_type#85,app_id#86,app_name#87,col_z#88,page_url#89,page_title#90,olabel#91,otitle#92,source#93,send_dt#94,recv_dt#95,request_time#96,write_time#97,client_ip#98,col_a#99,dt_hour#100,product#101,channelfrom#102,... 8 more fields] orc : : : : : : : +- SubqueryAlias t4 : : : : : : : +- Aggregate [dt#142], [dt#142, count(distinct kb_code#136) AS d_kbs#3L] : : : : : : : +- Filter (dt#142 = 20211126) : : : : : : : +- SubqueryAlias spark_catalog.test.test_b : : : : : : : +- Relation test.test_b[session_id#111,device_id#112,brand#113,model#114,wx_version#115,os#116,net_work_type#117,app_id#118,app_name#119,col_z#120,page_url#121,page_title#122,olabel#123,otitle#124,source#125,send_dt#126,recv_dt#127,request_time#128,write_time#129,client_ip#130,col_a#131,dt_hour#132,product#133,channelfrom#134,... 8 more fields] orc : : : : : : +- SubqueryAlias t5 : : : : : : +- Aggregate [dt#174], [dt#174, count(distinct kb_code#168) AS e_kbs#4L] : : : : : : +- Filter (dt#174 = 20211126) : : : : : : +- SubqueryAlias spark_catalog.test.test_b : : : : : : +- Relation test.test_b[session_id#143,device_id#144,brand#145,model#146,wx_version#147,os#148,net_work_type#149,app_id#150,app_name#151,col_z#152,page_url#153,page_title#154,olabel#155,otitle#156,source#157,send_dt#158,recv_dt#159,request_time#160,write_time#161,client_ip#162,col_a#163,dt_hour#164,product#165,channelfrom#166,... 8 more fields] orc : : : : : +- SubqueryAlias t6 : : : : : +- Aggregate [dt#206], [dt#206, count(distinct kb_code#200) AS f_kbs#5L] : : : : : +- Filter (dt#206 = 20211126) : : : : : +- SubqueryAlias spark_catalog.test.test_b : : : : : +- Relation test.test_b[session_id#175,device_id#176,brand#177,model#178,wx_version#179,os#180,net_work_type#181,app_id#182,app_name#183,col_z#184,page_url#185,page_title#186,olabel#187,otitle#188,source#189,send_dt#190,recv_dt#191,request_time#192,write_time#193,client_ip#194,col_a#195,dt_hour#196,product#197,channelfrom#198,... 8 more fields] orc : : : : +- SubqueryAlias t7 : : : : +- Aggregate [dt#238], [dt#238, count(distinct kb_code#232) AS g_kbs#6L] : : : : +- Filter (dt#238 = 20211126) : : : : +- SubqueryAlias spark_catalog.test.test_b : : : : +- Relation test.test_b[session_id#207,device_id#208,brand#209,model#210,wx_version#211,os#212,net_work_type#213,app_id#214,app_name#215,col_z#216,page_url#217,page_title#218,olabel#219,otitle#220,source#221,send_dt#222,recv_dt#223,request_time#224,write_time#225,client_ip#226,col_a#227,dt_hour#228,product#229,channelfrom#230,... 8 more fields] orc : : : +- SubqueryAlias t8 : : : +- Aggregate [dt#270], [dt#270, count(distinct kb_code#264) AS h_kbs#7L] : : : +- Filter (dt#270 = 20211126) : : : +- SubqueryAlias spark_catalog.test.test_b : : : +- Relation test.test_b[session_id#239,device_id#240,brand#241,model#242,wx_version#243,os#244,net_work_type#245,app_id#246,app_name#247,col_z#248,page_url#249,page_title#250,olabel#251,otitle#252,source#253,send_dt#254,recv_dt#255,request_time#256,write_time#257,client_ip#258,col_a#259,dt_hour#260,product#261,channelfrom#262,... 8 more fields] orc : : +- SubqueryAlias t9 : : +- Aggregate [dt#302], [dt#302, count(distinct kb_code#296) AS i_kbs#8L] : : +- Filter (dt#302 = 20211126) : : +- SubqueryAlias spark_catalog.test.test_b : : +- Relation test.test_b[session_id#271,device_id#272,brand#273,model#274,wx_version#275,os#276,net_work_type#277,app_id#278,app_name#279,col_z#280,page_url#281,page_title#282,olabel#283,otitle#284,source#285,send_dt#286,recv_dt#287,request_time#288,write_time#289,client_ip#290,col_a#291,dt_hour#292,product#293,channelfrom#294,... 8 more fields] orc : +- SubqueryAlias t10 : +- Aggregate [dt#334], [dt#334, count(distinct kb_code#328) AS j_kbs#9L] : +- Filter (dt#334 = 20211126) : +- SubqueryAlias spark_catalog.test.test_b : +- Relation test.test_b[session_id#303,device_id#304,brand#305,model#306,wx_version#307,os#308,net_work_type#309,app_id#310,app_name#311,col_z#312,page_url#313,page_title#314,olabel#315,otitle#316,source#317,send_dt#318,recv_dt#319,request_time#320,write_time#321,client_ip#322,col_a#323,dt_hour#324,product#325,channelfrom#326,... 8 more fields] orc +- SubqueryAlias t11 +- Aggregate [dt#366], [dt#366, count(distinct kb_code#360) AS k_kbs#10L] +- Filter (dt#366 = 20211126) +- SubqueryAlias spark_catalog.test.test_b +- Relation test.test_b[session_id#335,device_id#336,brand#337,model#338,wx_version#339,os#340,net_work_type#341,app_id#342,app_name#343,col_z#344,page_url#345,page_title#346,olabel#347,otitle#348,source#349,send_dt#350,recv_dt#351,request_time#352,write_time#353,client_ip#354,col_a#355,dt_hour#356,product#357,channelfrom#358,... 8 more fields] orc == Analyzed Logical Plan == CommandResult Execute OptimizedCreateHiveTableAsSelectCommand [Database: test, TableName: test_c, InsertIntoHadoopFsRelationCommand] +- OptimizedCreateHiveTableAsSelectCommand [Database: test, TableName: test_c, InsertIntoHadoopFsRelationCommand] +- Project [day#11, week#12, weekday#13, a_kbs#0L, b_kbs#1L, c_kbs#2L, d_kbs#3L, e_kbs#4L, f_kbs#5L, g_kbs#6L, h_kbs#7L, i_kbs#8L, j_kbs#9L, k_kbs#10L] +- Join LeftOuter, (dt#14 = dt#366) :- Join LeftOuter, (dt#14 = dt#334) : :- Join LeftOuter, (dt#14 = dt#302) : : :- Join LeftOuter, (dt#14 = dt#270) : : : :- Join LeftOuter, (dt#14 = dt#238) : : : : :- Join LeftOuter, (dt#14 = dt#206) : : : : : :- Join LeftOuter, (dt#14 = dt#174) : : : : : : :- Join LeftOuter, (dt#14 = dt#142) : : : : : : : :- Join LeftOuter, (dt#14 = dt#110) : : : : : : : : :- Join LeftOuter, (dt#14 = dt#78) : : : : : : : : : :- Join LeftOuter, (dt#14 = dt#46) : : : : : : : : : : :- SubqueryAlias calendar : : : : : : : : : : : +- Project [day#11, week#12, weekday#13, dt#14] : : : : : : : : : : : +- Filter (dt#14 = 20211126) : : : : : : : : : : : +- SubqueryAlias spark_catalog.test.test_a : : : : : : : : : : : +- Relation test.test_a[day#11,week#12,weekday#13,dt#14] orc : : : : : : : : : : +- SubqueryAlias t1 : : : : : : : : : : +- Aggregate [dt#46], [dt#46, count(distinct kb_code#40) AS a_kbs#0L] : : : : : : : : : : +- Filter (dt#46 = 20211126) : : : : : : : : : : +- SubqueryAlias spark_catalog.test.test_b : : : : : : : : : : +- Relation test.test_b[session_id#15,device_id#16,brand#17,model#18,wx_version#19,os#20,net_work_type#21,app_id#22,app_name#23,col_z#24,page_url#25,page_title#26,olabel#27,otitle#28,source#29,send_dt#30,recv_dt#31,request_time#32,write_time#33,client_ip#34,col_a#35,dt_hour#36,product#37,channelfrom#38,... 8 more fields] orc : : : : : : : : : +- SubqueryAlias t2 : : : : : : : : : +- Aggregate [dt#78], [dt#78, count(distinct kb_code#72) AS b_kbs#1L] : : : : : : : : : +- Filter (dt#78 = 20211126) : : : : : : : : : +- SubqueryAlias spark_catalog.test.test_b : : : : : : : : : +- Relation test.test_b[session_id#47,device_id#48,brand#49,model#50,wx_version#51,os#52,net_work_type#53,app_id#54,app_name#55,col_z#56,page_url#57,page_title#58,olabel#59,otitle#60,source#61,send_dt#62,recv_dt#63,request_time#64,write_time#65,client_ip#66,col_a#67,dt_hour#68,product#69,channelfrom#70,... 8 more fields] orc : : : : : : : : +- SubqueryAlias t3 : : : : : : : : +- Aggregate [dt#110], [dt#110, count(distinct kb_code#104) AS c_kbs#2L] : : : : : : : : +- Filter (dt#110 = 20211126) : : : : : : : : +- SubqueryAlias spark_catalog.test.test_b : : : : : : : : +- Relation test.test_b[session_id#79,device_id#80,brand#81,model#82,wx_version#83,os#84,net_work_type#85,app_id#86,app_name#87,col_z#88,page_url#89,page_title#90,olabel#91,otitle#92,source#93,send_dt#94,recv_dt#95,request_time#96,write_time#97,client_ip#98,col_a#99,dt_hour#100,product#101,channelfrom#102,... 8 more fields] orc : : : : : : : +- SubqueryAlias t4 : : : : : : : +- Aggregate [dt#142], [dt#142, count(distinct kb_code#136) AS d_kbs#3L] : : : : : : : +- Filter (dt#142 = 20211126) : : : : : : : +- SubqueryAlias spark_catalog.test.test_b : : : : : : : +- Relation test.test_b[session_id#111,device_id#112,brand#113,model#114,wx_version#115,os#116,net_work_type#117,app_id#118,app_name#119,col_z#120,page_url#121,page_title#122,olabel#123,otitle#124,source#125,send_dt#126,recv_dt#127,request_time#128,write_time#129,client_ip#130,col_a#131,dt_hour#132,product#133,channelfrom#134,... 8 more fields] orc : : : : : : +- SubqueryAlias t5 : : : : : : +- Aggregate [dt#174], [dt#174, count(distinct kb_code#168) AS e_kbs#4L] : : : : : : +- Filter (dt#174 = 20211126) : : : : : : +- SubqueryAlias spark_catalog.test.test_b : : : : : : +- Relation test.test_b[session_id#143,device_id#144,brand#145,model#146,wx_version#147,os#148,net_work_type#149,app_id#150,app_name#151,col_z#152,page_url#153,page_title#154,olabel#155,otitle#156,source#157,send_dt#158,recv_dt#159,request_time#160,write_time#161,client_ip#162,col_a#163,dt_hour#164,product#165,channelfrom#166,... 8 more fields] orc : : : : : +- SubqueryAlias t6 : : : : : +- Aggregate [dt#206], [dt#206, count(distinct kb_code#200) AS f_kbs#5L] : : : : : +- Filter (dt#206 = 20211126) : : : : : +- SubqueryAlias spark_catalog.test.test_b : : : : : +- Relation test.test_b[session_id#175,device_id#176,brand#177,model#178,wx_version#179,os#180,net_work_type#181,app_id#182,app_name#183,col_z#184,page_url#185,page_title#186,olabel#187,otitle#188,source#189,send_dt#190,recv_dt#191,request_time#192,write_time#193,client_ip#194,col_a#195,dt_hour#196,product#197,channelfrom#198,... 8 more fields] orc : : : : +- SubqueryAlias t7 : : : : +- Aggregate [dt#238], [dt#238, count(distinct kb_code#232) AS g_kbs#6L] : : : : +- Filter (dt#238 = 20211126) : : : : +- SubqueryAlias spark_catalog.test.test_b : : : : +- Relation test.test_b[session_id#207,device_id#208,brand#209,model#210,wx_version#211,os#212,net_work_type#213,app_id#214,app_name#215,col_z#216,page_url#217,page_title#218,olabel#219,otitle#220,source#221,send_dt#222,recv_dt#223,request_time#224,write_time#225,client_ip#226,col_a#227,dt_hour#228,product#229,channelfrom#230,... 8 more fields] orc : : : +- SubqueryAlias t8 : : : +- Aggregate [dt#270], [dt#270, count(distinct kb_code#264) AS h_kbs#7L] : : : +- Filter (dt#270 = 20211126) : : : +- SubqueryAlias spark_catalog.test.test_b : : : +- Relation test.test_b[session_id#239,device_id#240,brand#241,model#242,wx_version#243,os#244,net_work_type#245,app_id#246,app_name#247,col_z#248,page_url#249,page_title#250,olabel#251,otitle#252,source#253,send_dt#254,recv_dt#255,request_time#256,write_time#257,client_ip#258,col_a#259,dt_hour#260,product#261,channelfrom#262,... 8 more fields] orc : : +- SubqueryAlias t9 : : +- Aggregate [dt#302], [dt#302, count(distinct kb_code#296) AS i_kbs#8L] : : +- Filter (dt#302 = 20211126) : : +- SubqueryAlias spark_catalog.test.test_b : : +- Relation test.test_b[session_id#271,device_id#272,brand#273,model#274,wx_version#275,os#276,net_work_type#277,app_id#278,app_name#279,col_z#280,page_url#281,page_title#282,olabel#283,otitle#284,source#285,send_dt#286,recv_dt#287,request_time#288,write_time#289,client_ip#290,col_a#291,dt_hour#292,product#293,channelfrom#294,... 8 more fields] orc : +- SubqueryAlias t10 : +- Aggregate [dt#334], [dt#334, count(distinct kb_code#328) AS j_kbs#9L] : +- Filter (dt#334 = 20211126) : +- SubqueryAlias spark_catalog.test.test_b : +- Relation test.test_b[session_id#303,device_id#304,brand#305,model#306,wx_version#307,os#308,net_work_type#309,app_id#310,app_name#311,col_z#312,page_url#313,page_title#314,olabel#315,otitle#316,source#317,send_dt#318,recv_dt#319,request_time#320,write_time#321,client_ip#322,col_a#323,dt_hour#324,product#325,channelfrom#326,... 8 more fields] orc +- SubqueryAlias t11 +- Aggregate [dt#366], [dt#366, count(distinct kb_code#360) AS k_kbs#10L] +- Filter (dt#366 = 20211126) +- SubqueryAlias spark_catalog.test.test_b +- Relation test.test_b[session_id#335,device_id#336,brand#337,model#338,wx_version#339,os#340,net_work_type#341,app_id#342,app_name#343,col_z#344,page_url#345,page_title#346,olabel#347,otitle#348,source#349,send_dt#350,recv_dt#351,request_time#352,write_time#353,client_ip#354,col_a#355,dt_hour#356,product#357,channelfrom#358,... 8 more fields] orc == Optimized Logical Plan == CommandResult Execute OptimizedCreateHiveTableAsSelectCommand [Database: test, TableName: test_c, InsertIntoHadoopFsRelationCommand] +- OptimizedCreateHiveTableAsSelectCommand [Database: test, TableName: test_c, InsertIntoHadoopFsRelationCommand] +- Project [day#11, week#12, weekday#13, a_kbs#0L, b_kbs#1L, c_kbs#2L, d_kbs#3L, e_kbs#4L, f_kbs#5L, g_kbs#6L, h_kbs#7L, i_kbs#8L, j_kbs#9L, k_kbs#10L] +- Join LeftOuter, (dt#14 = dt#366) :- Join LeftOuter, (dt#14 = dt#334) : :- Join LeftOuter, (dt#14 = dt#302) : : :- Join LeftOuter, (dt#14 = dt#270) : : : :- Join LeftOuter, (dt#14 = dt#238) : : : : :- Join LeftOuter, (dt#14 = dt#206) : : : : : :- Join LeftOuter, (dt#14 = dt#174) : : : : : : :- Join LeftOuter, (dt#14 = dt#142) : : : : : : : :- Join LeftOuter, (dt#14 = dt#110) : : : : : : : : :- Join LeftOuter, (dt#14 = dt#78) : : : : : : : : : :- Join LeftOuter, (dt#14 = dt#46) : : : : : : : : : : :- SubqueryAlias calendar : : : : : : : : : : : +- Project [day#11, week#12, weekday#13, dt#14] : : : : : : : : : : : +- Filter (dt#14 = 20211126) : : : : : : : : : : : +- SubqueryAlias spark_catalog.test.test_a : : : : : : : : : : : +- Relation test.test_a[day#11,week#12,weekday#13,dt#14] orc : : : : : : : : : : +- SubqueryAlias t1 : : : : : : : : : : +- Aggregate [dt#46], [dt#46, count(distinct kb_code#40) AS a_kbs#0L] : : : : : : : : : : +- Filter (dt#46 = 20211126) : : : : : : : : : : +- SubqueryAlias spark_catalog.test.test_b : : : : : : : : : : +- Relation test.test_b[session_id#15,device_id#16,brand#17,model#18,wx_version#19,os#20,net_work_type#21,app_id#22,app_name#23,col_z#24,page_url#25,page_title#26,olabel#27,otitle#28,source#29,send_dt#30,recv_dt#31,request_time#32,write_time#33,client_ip#34,col_a#35,dt_hour#36,product#37,channelfrom#38,... 8 more fields] orc : : : : : : : : : +- SubqueryAlias t2 : : : : : : : : : +- Aggregate [dt#78], [dt#78, count(distinct kb_code#72) AS b_kbs#1L] : : : : : : : : : +- Filter (dt#78 = 20211126) : : : : : : : : : +- SubqueryAlias spark_catalog.test.test_b : : : : : : : : : +- Relation test.test_b[session_id#47,device_id#48,brand#49,model#50,wx_version#51,os#52,net_work_type#53,app_id#54,app_name#55,col_z#56,page_url#57,page_title#58,olabel#59,otitle#60,source#61,send_dt#62,recv_dt#63,request_time#64,write_time#65,client_ip#66,col_a#67,dt_hour#68,product#69,channelfrom#70,... 8 more fields] orc : : : : : : : : +- SubqueryAlias t3 : : : : : : : : +- Aggregate [dt#110], [dt#110, count(distinct kb_code#104) AS c_kbs#2L] : : : : : : : : +- Filter (dt#110 = 20211126) : : : : : : : : +- SubqueryAlias spark_catalog.test.test_b : : : : : : : : +- Relation test.test_b[session_id#79,device_id#80,brand#81,model#82,wx_version#83,os#84,net_work_type#85,app_id#86,app_name#87,col_z#88,page_url#89,page_title#90,olabel#91,otitle#92,source#93,send_dt#94,recv_dt#95,request_time#96,write_time#97,client_ip#98,col_a#99,dt_hour#100,product#101,channelfrom#102,... 8 more fields] orc : : : : : : : +- SubqueryAlias t4 : : : : : : : +- Aggregate [dt#142], [dt#142, count(distinct kb_code#136) AS d_kbs#3L] : : : : : : : +- Filter (dt#142 = 20211126) : : : : : : : +- SubqueryAlias spark_catalog.test.test_b : : : : : : : +- Relation test.test_b[session_id#111,device_id#112,brand#113,model#114,wx_version#115,os#116,net_work_type#117,app_id#118,app_name#119,col_z#120,page_url#121,page_title#122,olabel#123,otitle#124,source#125,send_dt#126,recv_dt#127,request_time#128,write_time#129,client_ip#130,col_a#131,dt_hour#132,product#133,channelfrom#134,... 8 more fields] orc : : : : : : +- SubqueryAlias t5 : : : : : : +- Aggregate [dt#174], [dt#174, count(distinct kb_code#168) AS e_kbs#4L] : : : : : : +- Filter (dt#174 = 20211126) : : : : : : +- SubqueryAlias spark_catalog.test.test_b : : : : : : +- Relation test.test_b[session_id#143,device_id#144,brand#145,model#146,wx_version#147,os#148,net_work_type#149,app_id#150,app_name#151,col_z#152,page_url#153,page_title#154,olabel#155,otitle#156,source#157,send_dt#158,recv_dt#159,request_time#160,write_time#161,client_ip#162,col_a#163,dt_hour#164,product#165,channelfrom#166,... 8 more fields] orc : : : : : +- SubqueryAlias t6 : : : : : +- Aggregate [dt#206], [dt#206, count(distinct kb_code#200) AS f_kbs#5L] : : : : : +- Filter (dt#206 = 20211126) : : : : : +- SubqueryAlias spark_catalog.test.test_b : : : : : +- Relation test.test_b[session_id#175,device_id#176,brand#177,model#178,wx_version#179,os#180,net_work_type#181,app_id#182,app_name#183,col_z#184,page_url#185,page_title#186,olabel#187,otitle#188,source#189,send_dt#190,recv_dt#191,request_time#192,write_time#193,client_ip#194,col_a#195,dt_hour#196,product#197,channelfrom#198,... 8 more fields] orc : : : : +- SubqueryAlias t7 : : : : +- Aggregate [dt#238], [dt#238, count(distinct kb_code#232) AS g_kbs#6L] : : : : +- Filter (dt#238 = 20211126) : : : : +- SubqueryAlias spark_catalog.test.test_b : : : : +- Relation test.test_b[session_id#207,device_id#208,brand#209,model#210,wx_version#211,os#212,net_work_type#213,app_id#214,app_name#215,col_z#216,page_url#217,page_title#218,olabel#219,otitle#220,source#221,send_dt#222,recv_dt#223,request_time#224,write_time#225,client_ip#226,col_a#227,dt_hour#228,product#229,channelfrom#230,... 8 more fields] orc : : : +- SubqueryAlias t8 : : : +- Aggregate [dt#270], [dt#270, count(distinct kb_code#264) AS h_kbs#7L] : : : +- Filter (dt#270 = 20211126) : : : +- SubqueryAlias spark_catalog.test.test_b : : : +- Relation test.test_b[session_id#239,device_id#240,brand#241,model#242,wx_version#243,os#244,net_work_type#245,app_id#246,app_name#247,col_z#248,page_url#249,page_title#250,olabel#251,otitle#252,source#253,send_dt#254,recv_dt#255,request_time#256,write_time#257,client_ip#258,col_a#259,dt_hour#260,product#261,channelfrom#262,... 8 more fields] orc : : +- SubqueryAlias t9 : : +- Aggregate [dt#302], [dt#302, count(distinct kb_code#296) AS i_kbs#8L] : : +- Filter (dt#302 = 20211126) : : +- SubqueryAlias spark_catalog.test.test_b : : +- Relation test.test_b[session_id#271,device_id#272,brand#273,model#274,wx_version#275,os#276,net_work_type#277,app_id#278,app_name#279,col_z#280,page_url#281,page_title#282,olabel#283,otitle#284,source#285,send_dt#286,recv_dt#287,request_time#288,write_time#289,client_ip#290,col_a#291,dt_hour#292,product#293,channelfrom#294,... 8 more fields] orc : +- SubqueryAlias t10 : +- Aggregate [dt#334], [dt#334, count(distinct kb_code#328) AS j_kbs#9L] : +- Filter (dt#334 = 20211126) : +- SubqueryAlias spark_catalog.test.test_b : +- Relation test.test_b[session_id#303,device_id#304,brand#305,model#306,wx_version#307,os#308,net_work_type#309,app_id#310,app_name#311,col_z#312,page_url#313,page_title#314,olabel#315,otitle#316,source#317,send_dt#318,recv_dt#319,request_time#320,write_time#321,client_ip#322,col_a#323,dt_hour#324,product#325,channelfrom#326,... 8 more fields] orc +- SubqueryAlias t11 +- Aggregate [dt#366], [dt#366, count(distinct kb_code#360) AS k_kbs#10L] +- Filter (dt#366 = 20211126) +- SubqueryAlias spark_catalog.test.test_b +- Relation test.test_b[session_id#335,device_id#336,brand#337,model#338,wx_version#339,os#340,net_work_type#341,app_id#342,app_name#343,col_z#344,page_url#345,page_title#346,olabel#347,otitle#348,source#349,send_dt#350,recv_dt#351,request_time#352,write_time#353,client_ip#354,col_a#355,dt_hour#356,product#357,channelfrom#358,... 8 more fields] orc == Physical Plan == CommandResult <empty> +- Execute OptimizedCreateHiveTableAsSelectCommand [Database: test, TableName: test_c, InsertIntoHadoopFsRelationCommand] +- AdaptiveSparkPlan isFinalPlan=true +- == Final Plan == *(34) Project [day#11, week#12, weekday#13, a_kbs#0L, b_kbs#1L, c_kbs#2L, d_kbs#3L, e_kbs#4L, f_kbs#5L, g_kbs#6L, h_kbs#7L, i_kbs#8L, j_kbs#9L, k_kbs#10L] +- *(34) BroadcastHashJoin [dt#14], [dt#366], LeftOuter, BuildRight, false :- *(34) Project [day#11, week#12, weekday#13, dt#14, a_kbs#0L, b_kbs#1L, c_kbs#2L, d_kbs#3L, e_kbs#4L, f_kbs#5L, g_kbs#6L, h_kbs#7L, i_kbs#8L, j_kbs#9L] : +- *(34) BroadcastHashJoin [dt#14], [dt#334], LeftOuter, BuildRight, false : :- *(34) Project [day#11, week#12, weekday#13, dt#14, a_kbs#0L, b_kbs#1L, c_kbs#2L, d_kbs#3L, e_kbs#4L, f_kbs#5L, g_kbs#6L, h_kbs#7L, i_kbs#8L] : : +- *(34) BroadcastHashJoin [dt#14], [dt#302], LeftOuter, BuildRight, false : : :- *(34) Project [day#11, week#12, weekday#13, dt#14, a_kbs#0L, b_kbs#1L, c_kbs#2L, d_kbs#3L, e_kbs#4L, f_kbs#5L, g_kbs#6L, h_kbs#7L] : : : +- *(34) BroadcastHashJoin [dt#14], [dt#270], LeftOuter, BuildRight, false : : : :- *(34) Project [day#11, week#12, weekday#13, dt#14, a_kbs#0L, b_kbs#1L, c_kbs#2L, d_kbs#3L, e_kbs#4L, f_kbs#5L, g_kbs#6L] : : : : +- *(34) BroadcastHashJoin [dt#14], [dt#238], LeftOuter, BuildRight, false : : : : :- *(34) Project [day#11, week#12, weekday#13, dt#14, a_kbs#0L, b_kbs#1L, c_kbs#2L, d_kbs#3L, e_kbs#4L, f_kbs#5L] : : : : : +- *(34) BroadcastHashJoin [dt#14], [dt#206], LeftOuter, BuildRight, false : : : : : :- *(34) Project [day#11, week#12, weekday#13, dt#14, a_kbs#0L, b_kbs#1L, c_kbs#2L, d_kbs#3L, e_kbs#4L] : : : : : : +- *(34) BroadcastHashJoin [dt#14], [dt#174], LeftOuter, BuildRight, false : : : : : : :- *(34) Project [day#11, week#12, weekday#13, dt#14, a_kbs#0L, b_kbs#1L, c_kbs#2L, d_kbs#3L] : : : : : : : +- *(34) BroadcastHashJoin [dt#14], [dt#142], LeftOuter, BuildRight, false : : : : : : : :- *(34) Project [day#11, week#12, weekday#13, dt#14, a_kbs#0L, b_kbs#1L, c_kbs#2L] : : : : : : : : +- *(34) BroadcastHashJoin [dt#14], [dt#110], LeftOuter, BuildRight, false : : : : : : : : :- *(34) Project [day#11, week#12, weekday#13, dt#14, a_kbs#0L, b_kbs#1L] : : : : : : : : : +- *(34) BroadcastHashJoin [dt#14], [dt#78], LeftOuter, BuildRight, false : : : : : : : : : :- *(34) Project [day#11, week#12, weekday#13, dt#14, a_kbs#0L] : : : : : : : : : : +- *(34) BroadcastHashJoin [dt#14], [dt#46], LeftOuter, BuildRight, false : : : : : : : : : : :- *(34) ColumnarToRow : : : : : : : : : : : +- FileScan orc test.test_a[day#11,week#12,weekday#13,dt#14] Batched: true, DataFilters: [], Format: ORC, Location: InMemoryFileIndex(1 paths)[file:/Users/ritka/Documents/spark-3.3.0/spark-warehouse/test.db/test_a..., PartitionFilters: [isnotnull(dt#14), (dt#14 = 20211126)], PushedFilters: [], ReadSchema: struct<day:string,week:int,weekday:int> : : : : : : : : : : +- BroadcastQueryStage 42 : : : : : : : : : : +- BroadcastExchange HashedRelationBroadcastMode(List(input[0, string, true]),false), [id=#1643] : : : : : : : : : : +- *(23) HashAggregate(keys=[dt#46], functions=[count(distinct kb_code#40)], output=[dt#46, a_kbs#0L]) : : : : : : : : : : +- AQEShuffleRead coalesced : : : : : : : : : : +- ShuffleQueryStage 21 : : : : : : : : : : +- Exchange hashpartitioning(dt#46, 200), ENSURE_REQUIREMENTS, [id=#983] : : : : : : : : : : +- *(12) HashAggregate(keys=[dt#46], functions=[partial_count(distinct kb_code#40)], output=[dt#46, count#427L]) : : : : : : : : : : +- *(12) HashAggregate(keys=[dt#46, kb_code#40], functions=[], output=[dt#46, kb_code#40]) : : : : : : : : : : +- AQEShuffleRead coalesced : : : : : : : : : : +- ShuffleQueryStage 0 : : : : : : : : : : +- Exchange hashpartitioning(dt#46, kb_code#40, 200), ENSURE_REQUIREMENTS, [id=#427] : : : : : : : : : : +- *(1) HashAggregate(keys=[dt#46, kb_code#40], functions=[], output=[dt#46, kb_code#40]) : : : : : : : : : : +- *(1) ColumnarToRow : : : : : : : : : : +- FileScan orc test.test_b[kb_code#40,dt#46] Batched: true, DataFilters: [], Format: ORC, Location: InMemoryFileIndex(1 paths)[file:/Users/ritka/Documents/spark-3.3.0/spark-warehouse/test.db/test_b..., PartitionFilters: [isnotnull(dt#46), (dt#46 = 20211126)], PushedFilters: [], ReadSchema: struct<kb_code:string> : : : : : : : : : +- BroadcastQueryStage 44 : : : : : : : : : +- ReusedExchange [dt#78, b_kbs#1L], BroadcastExchange HashedRelationBroadcastMode(List(input[0, string, true]),false), [id=#1643] : : : : : : : : +- BroadcastQueryStage 46 : : : : : : : : +- ReusedExchange [dt#110, c_kbs#2L], BroadcastExchange HashedRelationBroadcastMode(List(input[0, string, true]),false), [id=#1643] : : : : : : : +- BroadcastQueryStage 48 : : : : : : : +- ReusedExchange [dt#142, d_kbs#3L], BroadcastExchange HashedRelationBroadcastMode(List(input[0, string, true]),false), [id=#1643] : : : : : : +- BroadcastQueryStage 50 : : : : : : +- ReusedExchange [dt#174, e_kbs#4L], BroadcastExchange HashedRelationBroadcastMode(List(input[0, string, true]),false), [id=#1643] : : : : : +- BroadcastQueryStage 52 : : : : : +- ReusedExchange [dt#206, f_kbs#5L], BroadcastExchange HashedRelationBroadcastMode(List(input[0, string, true]),false), [id=#1643] : : : : +- BroadcastQueryStage 54 : : : : +- ReusedExchange [dt#238, g_kbs#6L], BroadcastExchange HashedRelationBroadcastMode(List(input[0, string, true]),false), [id=#1643] : : : +- BroadcastQueryStage 56 : : : +- ReusedExchange [dt#270, h_kbs#7L], BroadcastExchange HashedRelationBroadcastMode(List(input[0, string, true]),false), [id=#1643] : : +- BroadcastQueryStage 58 : : +- ReusedExchange [dt#302, i_kbs#8L], BroadcastExchange HashedRelationBroadcastMode(List(input[0, string, true]),false), [id=#1643] : +- BroadcastQueryStage 60 : +- ReusedExchange [dt#334, j_kbs#9L], BroadcastExchange HashedRelationBroadcastMode(List(input[0, string, true]),false), [id=#1643] +- BroadcastQueryStage 62 +- ReusedExchange [dt#366, k_kbs#10L], BroadcastExchange HashedRelationBroadcastMode(List(input[0, string, true]),false), [id=#1643] +- == Initial Plan == Project [day#11, week#12, weekday#13, a_kbs#0L, b_kbs#1L, c_kbs#2L, d_kbs#3L, e_kbs#4L, f_kbs#5L, g_kbs#6L, h_kbs#7L, i_kbs#8L, j_kbs#9L, k_kbs#10L] +- BroadcastHashJoin [dt#14], [dt#366], LeftOuter, BuildRight, false :- Project [day#11, week#12, weekday#13, dt#14, a_kbs#0L, b_kbs#1L, c_kbs#2L, d_kbs#3L, e_kbs#4L, f_kbs#5L, g_kbs#6L, h_kbs#7L, i_kbs#8L, j_kbs#9L] : +- BroadcastHashJoin [dt#14], [dt#334], LeftOuter, BuildRight, false : :- Project [day#11, week#12, weekday#13, dt#14, a_kbs#0L, b_kbs#1L, c_kbs#2L, d_kbs#3L, e_kbs#4L, f_kbs#5L, g_kbs#6L, h_kbs#7L, i_kbs#8L] : : +- BroadcastHashJoin [dt#14], [dt#302], LeftOuter, BuildRight, false : : :- Project [day#11, week#12, weekday#13, dt#14, a_kbs#0L, b_kbs#1L, c_kbs#2L, d_kbs#3L, e_kbs#4L, f_kbs#5L, g_kbs#6L, h_kbs#7L] : : : +- BroadcastHashJoin [dt#14], [dt#270], LeftOuter, BuildRight, false : : : :- Project [day#11, week#12, weekday#13, dt#14, a_kbs#0L, b_kbs#1L, c_kbs#2L, d_kbs#3L, e_kbs#4L, f_kbs#5L, g_kbs#6L] : : : : +- BroadcastHashJoin [dt#14], [dt#238], LeftOuter, BuildRight, false : : : : :- Project [day#11, week#12, weekday#13, dt#14, a_kbs#0L, b_kbs#1L, c_kbs#2L, d_kbs#3L, e_kbs#4L, f_kbs#5L] : : : : : +- BroadcastHashJoin [dt#14], [dt#206], LeftOuter, BuildRight, false : : : : : :- Project [day#11, week#12, weekday#13, dt#14, a_kbs#0L, b_kbs#1L, c_kbs#2L, d_kbs#3L, e_kbs#4L] : : : : : : +- BroadcastHashJoin [dt#14], [dt#174], LeftOuter, BuildRight, false : : : : : : :- Project [day#11, week#12, weekday#13, dt#14, a_kbs#0L, b_kbs#1L, c_kbs#2L, d_kbs#3L] : : : : : : : +- BroadcastHashJoin [dt#14], [dt#142], LeftOuter, BuildRight, false : : : : : : : :- Project [day#11, week#12, weekday#13, dt#14, a_kbs#0L, b_kbs#1L, c_kbs#2L] : : : : : : : : +- BroadcastHashJoin [dt#14], [dt#110], LeftOuter, BuildRight, false : : : : : : : : :- Project [day#11, week#12, weekday#13, dt#14, a_kbs#0L, b_kbs#1L] : : : : : : : : : +- BroadcastHashJoin [dt#14], [dt#78], LeftOuter, BuildRight, false : : : : : : : : : :- Project [day#11, week#12, weekday#13, dt#14, a_kbs#0L] : : : : : : : : : : +- BroadcastHashJoin [dt#14], [dt#46], LeftOuter, BuildRight, false : : : : : : : : : : :- FileScan orc test.test_a[day#11,week#12,weekday#13,dt#14] Batched: true, DataFilters: [], Format: ORC, Location: InMemoryFileIndex(1 paths)[file:/Users/ritka/Documents/spark-3.3.0/spark-warehouse/test.db/test_a..., PartitionFilters: [isnotnull(dt#14), (dt#14 = 20211126)], PushedFilters: [], ReadSchema: struct<day:string,week:int,weekday:int> : : : : : : : : : : +- BroadcastExchange HashedRelationBroadcastMode(List(input[0, string, true]),false), [id=#302] : : : : : : : : : : +- HashAggregate(keys=[dt#46], functions=[count(distinct kb_code#40)], output=[dt#46, a_kbs#0L]) : : : : : : : : : : +- Exchange hashpartitioning(dt#46, 200), ENSURE_REQUIREMENTS, [id=#299] : : : : : : : : : : +- HashAggregate(keys=[dt#46], functions=[partial_count(distinct kb_code#40)], output=[dt#46, count#427L]) : : : : : : : : : : +- HashAggregate(keys=[dt#46, kb_code#40], functions=[], output=[dt#46, kb_code#40]) : : : : : : : : : : +- Exchange hashpartitioning(dt#46, kb_code#40, 200), ENSURE_REQUIREMENTS, [id=#295] : : : : : : : : : : +- HashAggregate(keys=[dt#46, kb_code#40], functions=[], output=[dt#46, kb_code#40]) : : : : : : : : : : +- FileScan orc test.test_b[kb_code#40,dt#46] Batched: true, DataFilters: [], Format: ORC, Location: InMemoryFileIndex(1 paths)[file:/Users/ritka/Documents/spark-3.3.0/spark-warehouse/test.db/test_b..., PartitionFilters: [isnotnull(dt#46), (dt#46 = 20211126)], PushedFilters: [], ReadSchema: struct<kb_code:string> : : : : : : : : : +- BroadcastExchange HashedRelationBroadcastMode(List(input[0, string, true]),false), [id=#312] : : : : : : : : : +- HashAggregate(keys=[dt#78], functions=[count(distinct kb_code#72)], output=[dt#78, b_kbs#1L]) : : : : : : : : : +- Exchange hashpartitioning(dt#78, 200), ENSURE_REQUIREMENTS, [id=#309] : : : : : : : : : +- HashAggregate(keys=[dt#78], functions=[partial_count(distinct kb_code#72)], output=[dt#78, count#431L]) : : : : : : : : : +- HashAggregate(keys=[dt#78, kb_code#72], functions=[], output=[dt#78, kb_code#72]) : : : : : : : : : +- Exchange hashpartitioning(dt#78, kb_code#72, 200), ENSURE_REQUIREMENTS, [id=#305] : : : : : : : : : +- HashAggregate(keys=[dt#78, kb_code#72], functions=[], output=[dt#78, kb_code#72]) : : : : : : : : : +- FileScan orc test.test_b[kb_code#72,dt#78] Batched: true, DataFilters: [], Format: ORC, Location: InMemoryFileIndex(1 paths)[file:/Users/ritka/Documents/spark-3.3.0/spark-warehouse/test.db/test_b..., PartitionFilters: [isnotnull(dt#78), (dt#78 = 20211126)], PushedFilters: [], ReadSchema: struct<kb_code:string> : : : : : : : : +- BroadcastExchange HashedRelationBroadcastMode(List(input[0, string, true]),false), [id=#322] : : : : : : : : +- HashAggregate(keys=[dt#110], functions=[count(distinct kb_code#104)], output=[dt#110, c_kbs#2L]) : : : : : : : : +- Exchange hashpartitioning(dt#110, 200), ENSURE_REQUIREMENTS, [id=#319] : : : : : : : : +- HashAggregate(keys=[dt#110], functions=[partial_count(distinct kb_code#104)], output=[dt#110, count#435L]) : : : : : : : : +- HashAggregate(keys=[dt#110, kb_code#104], functions=[], output=[dt#110, kb_code#104]) : : : : : : : : +- Exchange hashpartitioning(dt#110, kb_code#104, 200), ENSURE_REQUIREMENTS, [id=#315] : : : : : : : : +- HashAggregate(keys=[dt#110, kb_code#104], functions=[], output=[dt#110, kb_code#104]) : : : : : : : : +- FileScan orc test.test_b[kb_code#104,dt#110] Batched: true, DataFilters: [], Format: ORC, Location: InMemoryFileIndex(1 paths)[file:/Users/ritka/Documents/spark-3.3.0/spark-warehouse/test.db/test_b..., PartitionFilters: [isnotnull(dt#110), (dt#110 = 20211126)], PushedFilters: [], ReadSchema: struct<kb_code:string> : : : : : : : +- BroadcastExchange HashedRelationBroadcastMode(List(input[0, string, true]),false), [id=#332] : : : : : : : +- HashAggregate(keys=[dt#142], functions=[count(distinct kb_code#136)], output=[dt#142, d_kbs#3L]) : : : : : : : +- Exchange hashpartitioning(dt#142, 200), ENSURE_REQUIREMENTS, [id=#329] : : : : : : : +- HashAggregate(keys=[dt#142], functions=[partial_count(distinct kb_code#136)], output=[dt#142, count#439L]) : : : : : : : +- HashAggregate(keys=[dt#142, kb_code#136], functions=[], output=[dt#142, kb_code#136]) : : : : : : : +- Exchange hashpartitioning(dt#142, kb_code#136, 200), ENSURE_REQUIREMENTS, [id=#325] : : : : : : : +- HashAggregate(keys=[dt#142, kb_code#136], functions=[], output=[dt#142, kb_code#136]) : : : : : : : +- FileScan orc test.test_b[kb_code#136,dt#142] Batched: true, DataFilters: [], Format: ORC, Location: InMemoryFileIndex(1 paths)[file:/Users/ritka/Documents/spark-3.3.0/spark-warehouse/test.db/test_b..., PartitionFilters: [isnotnull(dt#142), (dt#142 = 20211126)], PushedFilters: [], ReadSchema: struct<kb_code:string> : : : : : : +- BroadcastExchange HashedRelationBroadcastMode(List(input[0, string, true]),false), [id=#342] : : : : : : +- HashAggregate(keys=[dt#174], functions=[count(distinct kb_code#168)], output=[dt#174, e_kbs#4L]) : : : : : : +- Exchange hashpartitioning(dt#174, 200), ENSURE_REQUIREMENTS, [id=#339] : : : : : : +- HashAggregate(keys=[dt#174], functions=[partial_count(distinct kb_code#168)], output=[dt#174, count#443L]) : : : : : : +- HashAggregate(keys=[dt#174, kb_code#168], functions=[], output=[dt#174, kb_code#168]) : : : : : : +- Exchange hashpartitioning(dt#174, kb_code#168, 200), ENSURE_REQUIREMENTS, [id=#335] : : : : : : +- HashAggregate(keys=[dt#174, kb_code#168], functions=[], output=[dt#174, kb_code#168]) : : : : : : +- FileScan orc test.test_b[kb_code#168,dt#174] Batched: true, DataFilters: [], Format: ORC, Location: InMemoryFileIndex(1 paths)[file:/Users/ritka/Documents/spark-3.3.0/spark-warehouse/test.db/test_b..., PartitionFilters: [isnotnull(dt#174), (dt#174 = 20211126)], PushedFilters: [], ReadSchema: struct<kb_code:string> : : : : : +- BroadcastExchange HashedRelationBroadcastMode(List(input[0, string, true]),false), [id=#352] : : : : : +- HashAggregate(keys=[dt#206], functions=[count(distinct kb_code#200)], output=[dt#206, f_kbs#5L]) : : : : : +- Exchange hashpartitioning(dt#206, 200), ENSURE_REQUIREMENTS, [id=#349] : : : : : +- HashAggregate(keys=[dt#206], functions=[partial_count(distinct kb_code#200)], output=[dt#206, count#447L]) : : : : : +- HashAggregate(keys=[dt#206, kb_code#200], functions=[], output=[dt#206, kb_code#200]) : : : : : +- Exchange hashpartitioning(dt#206, kb_code#200, 200), ENSURE_REQUIREMENTS, [id=#345] : : : : : +- HashAggregate(keys=[dt#206, kb_code#200], functions=[], output=[dt#206, kb_code#200]) : : : : : +- FileScan orc test.test_b[kb_code#200,dt#206] Batched: true, DataFilters: [], Format: ORC, Location: InMemoryFileIndex(1 paths)[file:/Users/ritka/Documents/spark-3.3.0/spark-warehouse/test.db/test_b..., PartitionFilters: [isnotnull(dt#206), (dt#206 = 20211126)], PushedFilters: [], ReadSchema: struct<kb_code:string> : : : : +- BroadcastExchange HashedRelationBroadcastMode(List(input[0, string, true]),false), [id=#362] : : : : +- HashAggregate(keys=[dt#238], functions=[count(distinct kb_code#232)], output=[dt#238, g_kbs#6L]) : : : : +- Exchange hashpartitioning(dt#238, 200), ENSURE_REQUIREMENTS, [id=#359] : : : : +- HashAggregate(keys=[dt#238], functions=[partial_count(distinct kb_code#232)], output=[dt#238, count#451L]) : : : : +- HashAggregate(keys=[dt#238, kb_code#232], functions=[], output=[dt#238, kb_code#232]) : : : : +- Exchange hashpartitioning(dt#238, kb_code#232, 200), ENSURE_REQUIREMENTS, [id=#355] : : : : +- HashAggregate(keys=[dt#238, kb_code#232], functions=[], output=[dt#238, kb_code#232]) : : : : +- FileScan orc test.test_b[kb_code#232,dt#238] Batched: true, DataFilters: [], Format: ORC, Location: InMemoryFileIndex(1 paths)[file:/Users/ritka/Documents/spark-3.3.0/spark-warehouse/test.db/test_b..., PartitionFilters: [isnotnull(dt#238), (dt#238 = 20211126)], PushedFilters: [], ReadSchema: struct<kb_code:string> : : : +- BroadcastExchange HashedRelationBroadcastMode(List(input[0, string, true]),false), [id=#372] : : : +- HashAggregate(keys=[dt#270], functions=[count(distinct kb_code#264)], output=[dt#270, h_kbs#7L]) : : : +- Exchange hashpartitioning(dt#270, 200), ENSURE_REQUIREMENTS, [id=#369] : : : +- HashAggregate(keys=[dt#270], functions=[partial_count(distinct kb_code#264)], output=[dt#270, count#455L]) : : : +- HashAggregate(keys=[dt#270, kb_code#264], functions=[], output=[dt#270, kb_code#264]) : : : +- Exchange hashpartitioning(dt#270, kb_code#264, 200), ENSURE_REQUIREMENTS, [id=#365] : : : +- HashAggregate(keys=[dt#270, kb_code#264], functions=[], output=[dt#270, kb_code#264]) : : : +- FileScan orc test.test_b[kb_code#264,dt#270] Batched: true, DataFilters: [], Format: ORC, Location: InMemoryFileIndex(1 paths)[file:/Users/ritka/Documents/spark-3.3.0/spark-warehouse/test.db/test_b..., PartitionFilters: [isnotnull(dt#270), (dt#270 = 20211126)], PushedFilters: [], ReadSchema: struct<kb_code:string> : : +- BroadcastExchange HashedRelationBroadcastMode(List(input[0, string, true]),false), [id=#382] : : +- HashAggregate(keys=[dt#302], functions=[count(distinct kb_code#296)], output=[dt#302, i_kbs#8L]) : : +- Exchange hashpartitioning(dt#302, 200), ENSURE_REQUIREMENTS, [id=#379] : : +- HashAggregate(keys=[dt#302], functions=[partial_count(distinct kb_code#296)], output=[dt#302, count#459L]) : : +- HashAggregate(keys=[dt#302, kb_code#296], functions=[], output=[dt#302, kb_code#296]) : : +- Exchange hashpartitioning(dt#302, kb_code#296, 200), ENSURE_REQUIREMENTS, [id=#375] : : +- HashAggregate(keys=[dt#302, kb_code#296], functions=[], output=[dt#302, kb_code#296]) : : +- FileScan orc test.test_b[kb_code#296,dt#302] Batched: true, DataFilters: [], Format: ORC, Location: InMemoryFileIndex(1 paths)[file:/Users/ritka/Documents/spark-3.3.0/spark-warehouse/test.db/test_b..., PartitionFilters: [isnotnull(dt#302), (dt#302 = 20211126)], PushedFilters: [], ReadSchema: struct<kb_code:string> : +- BroadcastExchange HashedRelationBroadcastMode(List(input[0, string, true]),false), [id=#392] : +- HashAggregate(keys=[dt#334], functions=[count(distinct kb_code#328)], output=[dt#334, j_kbs#9L]) : +- Exchange hashpartitioning(dt#334, 200), ENSURE_REQUIREMENTS, [id=#389] : +- HashAggregate(keys=[dt#334], functions=[partial_count(distinct kb_code#328)], output=[dt#334, count#463L]) : +- HashAggregate(keys=[dt#334, kb_code#328], functions=[], output=[dt#334, kb_code#328]) : +- Exchange hashpartitioning(dt#334, kb_code#328, 200), ENSURE_REQUIREMENTS, [id=#385] : +- HashAggregate(keys=[dt#334, kb_code#328], functions=[], output=[dt#334, kb_code#328]) : +- FileScan orc test.test_b[kb_code#328,dt#334] Batched: true, DataFilters: [], Format: ORC, Location: InMemoryFileIndex(1 paths)[file:/Users/ritka/Documents/spark-3.3.0/spark-warehouse/test.db/test_b..., PartitionFilters: [isnotnull(dt#334), (dt#334 = 20211126)], PushedFilters: [], ReadSchema: struct<kb_code:string> +- BroadcastExchange HashedRelationBroadcastMode(List(input[0, string, true]),false), [id=#402] +- HashAggregate(keys=[dt#366], functions=[count(distinct kb_code#360)], output=[dt#366, k_kbs#10L]) +- Exchange hashpartitioning(dt#366, 200), ENSURE_REQUIREMENTS, [id=#399] +- HashAggregate(keys=[dt#366], functions=[partial_count(distinct kb_code#360)], output=[dt#366, count#467L]) +- HashAggregate(keys=[dt#366, kb_code#360], functions=[], output=[dt#366, kb_code#360]) +- Exchange hashpartitioning(dt#366, kb_code#360, 200), ENSURE_REQUIREMENTS, [id=#395] +- HashAggregate(keys=[dt#366, kb_code#360], functions=[], output=[dt#366, kb_code#360]) +- FileScan orc test.test_b[kb_code#360,dt#366] Batched: true, DataFilters: [], Format: ORC, Location: InMemoryFileIndex(1 paths)[file:/Users/ritka/Documents/spark-3.3.0/spark-warehouse/test.db/test_b..., PartitionFilters: [isnotnull(dt#366), (dt#366 = 20211126)], PushedFilters: [], ReadSchema: struct<kb_code:string> > sql hang at planning stage > -------------------------- > > Key: SPARK-37581 > URL: https://issues.apache.org/jira/browse/SPARK-37581 > Project: Spark > Issue Type: Bug > Components: SQL > Affects Versions: 3.1.1, 3.2.0 > Reporter: ocean > Priority: Major > > when exec a sql, this sql hang at planning stage. > when disable DPP, sql can finish very quickly. > we can reproduce this problem through example below: > create table test.test_a ( > day string, > week int, > weekday int) > partitioned by ( > dt varchar(8)) > stored as orc; > insert into test.test_a partition (dt=20211126) values('1',1,2); > create table test.test_b ( > session_id string, > device_id string, > brand string, > model string, > wx_version string, > os string, > net_work_type string, > app_id string, > app_name string, > col_z string, > page_url string, > page_title string, > olabel string, > otitle string, > source string, > send_dt string, > recv_dt string, > request_time string, > write_time string, > client_ip string, > col_a string, > dt_hour varchar(12), > product string, > channelfrom string, > customer_um string, > kb_code string, > col_b string, > rectype string, > errcode string, > col_c string, > pageid_merge string) > partitioned by ( > dt varchar(8)) > stored as orc; > insert into test.test_b partition(dt=20211126) > values('2','2','2','2','2','2','2','2','2','2','2','2','2','2','2','2','2','2','2','2','2','2','2','2','2','2','2','2','2','2','2'); > > > drop table if exists test.test_c;create table if not exists test.test_c > stored as ORCFILE as > select calendar.day,calendar.week,calendar.weekday, a_kbs, > b_kbs, c_kbs,d_kbs,e_kbs,f_kbs,g_kbs,h_kbs,i_kbs,j_kbs,k_kbs > from (select * from test.test_a where dt = '20211126') calendar > left join > (select dt,count(distinct kb_code) as a_kbs > from test.test_b > where dt = '20211126' > group by dt) t1 > on calendar.dt = t1.dt > left join > (select dt,count(distinct kb_code) as b_kbs > from test.test_b > where dt = '20211126' > group by dt) t2 > on calendar.dt = t2.dt > left join > (select dt,count(distinct kb_code) as c_kbs > from test.test_b > where dt = '20211126' > group by dt) t3 > on calendar.dt = t3.dt > left join > (select dt,count(distinct kb_code) as d_kbs > from test.test_b > where dt = '20211126' > group by dt) t4 > on calendar.dt = t4.dt > left join > (select dt,count(distinct kb_code) as e_kbs > from test.test_b > where dt = '20211126' > group by dt) t5 > on calendar.dt = t5.dt > left join > (select dt,count(distinct kb_code) as f_kbs > from test.test_b > where dt = '20211126' > group by dt) t6 > on calendar.dt = t6.dt > left join > (select dt,count(distinct kb_code) as g_kbs > from test.test_b > where dt = '20211126' > group by dt) t7 > on calendar.dt = t7.dt > left join > (select dt,count(distinct kb_code) as h_kbs > from test.test_b > where dt = '20211126' > group by dt) t8 > on calendar.dt = t8.dt > left join > (select dt,count(distinct kb_code) as i_kbs > from test.test_b > where dt = '20211126' > group by dt) t9 > on calendar.dt = t9.dt > left join > (select dt,count(distinct kb_code) as j_kbs > from test.test_b > where dt = '20211126' > group by dt) t10 > on calendar.dt = t10.dt > left join > (select dt,count(distinct kb_code) as k_kbs > from test.test_b > where dt = '20211126' > group by dt) t11 > on calendar.dt = t11.dt > -- This message was sent by Atlassian Jira (v8.20.10#820010) --------------------------------------------------------------------- To unsubscribe, e-mail: issues-unsubscr...@spark.apache.org For additional commands, e-mail: issues-h...@spark.apache.org