[jira] [Updated] (SPARK-17396) Threads number keep increasing when query on external CSV partitioned table

2016-09-10 Thread Sean Owen (JIRA)

 [ 
https://issues.apache.org/jira/browse/SPARK-17396?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
 ]

Sean Owen updated SPARK-17396:
--
Assignee: Ryan Blue

> Threads number keep increasing when query on external CSV partitioned table
> ---
>
> Key: SPARK-17396
> URL: https://issues.apache.org/jira/browse/SPARK-17396
> Project: Spark
>  Issue Type: Bug
>  Components: Spark Core
>Affects Versions: 2.0.0
>Reporter: pin_zhang
>Assignee: Ryan Blue
> Fix For: 2.0.1, 2.1.0
>
>
> 1. Create a external partitioned table row format CSV
> 2. Add 16 partitions to the table
> 3. Run SQL "select count(*) from test_csv"
> 4. ForkJoinThread number keep increasing 
> This happend when table partitions number greater than 10.
> 5. Test Code
> {code:lang=java}
> import org.apache.spark.SparkConf
> import org.apache.spark.SparkContext
> import org.apache.spark.sql.hive.HiveContext
> object Bugs {
>   def main(args: Array[String]): Unit = {
> val location = "file:///g:/home/test/csv"
> val create = s"""CREATE   EXTERNAL  TABLE  test_csv
>  (ID string,  SEQ string )
>   PARTITIONED BY(index int)
>   ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.OpenCSVSerde'
>   LOCATION "${location}" 
>   """
> val add_part = s"""
>   ALTER TABLE test_csv ADD 
>   PARTITION (index=1)LOCATION '${location}/index=1'
>   PARTITION (index=2)LOCATION '${location}/index=2'
>   PARTITION (index=3)LOCATION '${location}/index=3'
>   PARTITION (index=4)LOCATION '${location}/index=4'
>   PARTITION (index=5)LOCATION '${location}/index=5'
>   PARTITION (index=6)LOCATION '${location}/index=6'
>   PARTITION (index=7)LOCATION '${location}/index=7'
>   PARTITION (index=8)LOCATION '${location}/index=8'
>   PARTITION (index=9)LOCATION '${location}/index=9'
>   PARTITION (index=10)LOCATION '${location}/index=10'
>   PARTITION (index=11)LOCATION '${location}/index=11'
>   PARTITION (index=12)LOCATION '${location}/index=12'
>   PARTITION (index=13)LOCATION '${location}/index=13'
>   PARTITION (index=14)LOCATION '${location}/index=14'
>   PARTITION (index=15)LOCATION '${location}/index=15'
>   PARTITION (index=16)LOCATION '${location}/index=16'
> """
> val conf = new SparkConf().setAppName("scala").setMaster("local[2]")
> conf.set("spark.sql.warehouse.dir", "file:///g:/home/warehouse")
> val ctx = new SparkContext(conf)
> val hctx = new HiveContext(ctx)
> hctx.sql(create)
> hctx.sql(add_part)
>  for (i <- 1 to 6) {
>   new Query(hctx).start()
> }
>   }
>   class Query(htcx: HiveContext) extends Thread {
> setName("Query-Thread")
> override def run = {
>   while (true) {
> htcx.sql("select count(*) from test_csv").show()
> Thread.sleep(100)
>   }
> }
>   }
> }
> {code}



--
This message was sent by Atlassian JIRA
(v6.3.4#6332)

-
To unsubscribe, e-mail: issues-unsubscr...@spark.apache.org
For additional commands, e-mail: issues-h...@spark.apache.org



[jira] [Updated] (SPARK-17396) Threads number keep increasing when query on external CSV partitioned table

2016-09-05 Thread Ryan Blue (JIRA)

 [ 
https://issues.apache.org/jira/browse/SPARK-17396?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
 ]

Ryan Blue updated SPARK-17396:
--
Description: 
1. Create a external partitioned table row format CSV
2. Add 16 partitions to the table
3. Run SQL "select count(*) from test_csv"
4. ForkJoinThread number keep increasing 
This happend when table partitions number greater than 10.
5. Test Code

{code:lang=java}
import org.apache.spark.SparkConf
import org.apache.spark.SparkContext
import org.apache.spark.sql.hive.HiveContext

object Bugs {

  def main(args: Array[String]): Unit = {

val location = "file:///g:/home/test/csv"
val create = s"""CREATE   EXTERNAL  TABLE  test_csv
 (ID string,  SEQ string )
  PARTITIONED BY(index int)
  ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.OpenCSVSerde'
  LOCATION "${location}" 
  """
val add_part = s"""
  ALTER TABLE test_csv ADD 
  PARTITION (index=1)LOCATION '${location}/index=1'
  PARTITION (index=2)LOCATION '${location}/index=2'
  PARTITION (index=3)LOCATION '${location}/index=3'
  PARTITION (index=4)LOCATION '${location}/index=4'
  PARTITION (index=5)LOCATION '${location}/index=5'
  PARTITION (index=6)LOCATION '${location}/index=6'
  PARTITION (index=7)LOCATION '${location}/index=7'
  PARTITION (index=8)LOCATION '${location}/index=8'
  PARTITION (index=9)LOCATION '${location}/index=9'
  PARTITION (index=10)LOCATION '${location}/index=10'
  PARTITION (index=11)LOCATION '${location}/index=11'
  PARTITION (index=12)LOCATION '${location}/index=12'
  PARTITION (index=13)LOCATION '${location}/index=13'
  PARTITION (index=14)LOCATION '${location}/index=14'
  PARTITION (index=15)LOCATION '${location}/index=15'
  PARTITION (index=16)LOCATION '${location}/index=16'
"""

val conf = new SparkConf().setAppName("scala").setMaster("local[2]")
conf.set("spark.sql.warehouse.dir", "file:///g:/home/warehouse")
val ctx = new SparkContext(conf)
val hctx = new HiveContext(ctx)
hctx.sql(create)
hctx.sql(add_part)
 for (i <- 1 to 6) {
  new Query(hctx).start()
}
  }

  class Query(htcx: HiveContext) extends Thread {

setName("Query-Thread")

override def run = {
  while (true) {
htcx.sql("select count(*) from test_csv").show()
Thread.sleep(100)
  }

}
  }
}
{code}

  was:
1. Create a external partitioned table row format CSV
2. Add 16 partitions to the table
3. Run SQL "select count(*) from test_csv"
4. ForkJoinThread number keep increasing 
This happend when table partitions number greater than 10.
5. Test Code
import org.apache.spark.SparkConf
import org.apache.spark.SparkContext
import org.apache.spark.sql.hive.HiveContext

object Bugs {

  def main(args: Array[String]): Unit = {

val location = "file:///g:/home/test/csv"
val create = s"""CREATE   EXTERNAL  TABLE  test_csv
 (ID string,  SEQ string )
  PARTITIONED BY(index int)
  ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.OpenCSVSerde'
  LOCATION "${location}" 
  """
val add_part = s"""
  ALTER TABLE test_csv ADD 
  PARTITION (index=1)LOCATION '${location}/index=1'
  PARTITION (index=2)LOCATION '${location}/index=2'
  PARTITION (index=3)LOCATION '${location}/index=3'
  PARTITION (index=4)LOCATION '${location}/index=4'
  PARTITION (index=5)LOCATION '${location}/index=5'
  PARTITION (index=6)LOCATION '${location}/index=6'
  PARTITION (index=7)LOCATION '${location}/index=7'
  PARTITION (index=8)LOCATION '${location}/index=8'
  PARTITION (index=9)LOCATION '${location}/index=9'
  PARTITION (index=10)LOCATION '${location}/index=10'
  PARTITION (index=11)LOCATION '${location}/index=11'
  PARTITION (index=12)LOCATION '${location}/index=12'
  PARTITION (index=13)LOCATION '${location}/index=13'
  PARTITION (index=14)LOCATION '${location}/index=14'
  PARTITION (index=15)LOCATION '${location}/index=15'
  PARTITION (index=16)LOCATION '${location}/index=16'
"""

val conf = new SparkConf().setAppName("scala").setMaster("local[2]")
conf.set("spark.sql.warehouse.dir", "file:///g:/home/warehouse")
val ctx = new SparkContext(conf)
val hctx = new HiveContext(ctx)
hctx.sql(create)
hctx.sql(add_part)
 for (i <- 1 to 6) {
  new Query(hctx).start()
}
  }

  class Query(htcx: HiveContext) extends Thread {

setName("Query-Thread")

override def run = {
  while (true) {
htcx.sql("select count(*) from test_csv").show()
Thread.sleep(100)
  }

}
  }
}


> Threads number keep increasing when query on external CSV partitioned table
> ---
>
> Key: SPARK-17396
> URL: https://issues.apache.org/jira/browse/SPARK-17396
> Project: Spark
>  Issue Type: Bug
>  Components: Spark Core
>Affects