[ https://issues.apache.org/jira/browse/SPARK-17396?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=15464602#comment-15464602 ]
pin_zhang commented on SPARK-17396: ----------------------------------- 1.Thousand of thread created look like ForkJoinPool-20-worker-9" #329 daemon prio=5 os_prio=0 tid=0x000000000ac87000 nid=0x3d43 waiting on condition [0x000000005069f000] "ForkJoinPool-19-worker-3" #324 daemon prio=5 os_prio=0 tid=0x000000000ae60000 nid=0x3c2a waiting on condition [0x000000005039c000] 2.The thread should be created by UnionRDD > Threads number keep increasing when query on external CSV partitioned table > --------------------------------------------------------------------------- > > Key: SPARK-17396 > URL: https://issues.apache.org/jira/browse/SPARK-17396 > Project: Spark > Issue Type: Bug > Components: Spark Core > Affects Versions: 2.0.0 > Reporter: pin_zhang > > 1. Create a external partitioned table row format CSV > 2. Add 16 partitions to the table > 3. Run SQL "select count(*) from test_csv" > 4. ForkJoinThread number keep increasing > This happend when table partitions number greater than 10. > 5. Test Code > import org.apache.spark.SparkConf > import org.apache.spark.SparkContext > import org.apache.spark.sql.hive.HiveContext > object Bugs { > def main(args: Array[String]): Unit = { > val location = "file:///g:/home/test/csv" > val create = s"""CREATE EXTERNAL TABLE test_csv > (ID string, SEQ string ) > PARTITIONED BY(index int) > ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.OpenCSVSerde' > LOCATION "${location}" > """ > val add_part = s""" > ALTER TABLE test_csv ADD > PARTITION (index=1)LOCATION '${location}/index=1' > PARTITION (index=2)LOCATION '${location}/index=2' > PARTITION (index=3)LOCATION '${location}/index=3' > PARTITION (index=4)LOCATION '${location}/index=4' > PARTITION (index=5)LOCATION '${location}/index=5' > PARTITION (index=6)LOCATION '${location}/index=6' > PARTITION (index=7)LOCATION '${location}/index=7' > PARTITION (index=8)LOCATION '${location}/index=8' > PARTITION (index=9)LOCATION '${location}/index=9' > PARTITION (index=10)LOCATION '${location}/index=10' > PARTITION (index=11)LOCATION '${location}/index=11' > PARTITION (index=12)LOCATION '${location}/index=12' > PARTITION (index=13)LOCATION '${location}/index=13' > PARTITION (index=14)LOCATION '${location}/index=14' > PARTITION (index=15)LOCATION '${location}/index=15' > PARTITION (index=16)LOCATION '${location}/index=16' > """ > val conf = new SparkConf().setAppName("scala").setMaster("local[2]") > conf.set("spark.sql.warehouse.dir", "file:///g:/home/warehouse") > val ctx = new SparkContext(conf) > val hctx = new HiveContext(ctx) > hctx.sql(create) > hctx.sql(add_part) > for (i <- 1 to 6) { > new Query(hctx).start() > } > } > class Query(htcx: HiveContext) extends Thread { > setName("Query-Thread") > override def run = { > while (true) { > htcx.sql("select count(*) from test_csv").show() > Thread.sleep(100) > } > } > } > } -- This message was sent by Atlassian JIRA (v6.3.4#6332) --------------------------------------------------------------------- To unsubscribe, e-mail: issues-unsubscr...@spark.apache.org For additional commands, e-mail: issues-h...@spark.apache.org