[ 
https://issues.apache.org/jira/browse/SPARK-32809?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
 ]

Hyukjin Kwon updated SPARK-32809:
---------------------------------
    Description: 
{code}
class Exec3 {
  private val exec: SparkConf = new 
SparkConf().setMaster("local[1]").setAppName("exec3")
  private val context = new SparkContext(exec)
  context.setCheckpointDir("checkPoint")
 
  /**
   * get total number by key 
   * in this project desired results are ("apple",25) ("huwei",20)
   * but in fact i get ("apple",150) ("huawei",20)
   *   when i change it to local[3] the result is correct
   *  i want to know   which cause it and how to slove it 
   */
  @Test
  def testError(): Unit ={
    val rdd = context.parallelize(Seq(("apple", 10), ("apple", 15), ("huawei", 
20)))
    rdd.aggregateByKey(1.0)(
      seqOp = (zero, price) => price * zero,
      combOp = (curr, agg) => curr + agg).collect().foreach(println(_))
    context.stop()
  }
}
{code}


  was:
class Exec3 {

private val exec: SparkConf = new 
SparkConf().setMaster("local[1]").setAppName("exec3")
 private val context = new SparkContext(exec)
 context.setCheckpointDir("checkPoint")
 
 /**
 * get total number by key 
 * in this project desired results are ("apple",25) ("huwei",20)
 * but in fact i get ("apple",150) ("huawei",20)
 *   when i change it to local[3] the result is correct
*  i want to know   which cause it and how to slove it 
 */
 @Test
 def testError(): Unit ={
 val rdd = context.parallelize(Seq(("apple", 10), ("apple", 15), ("huawei", 
20)))
 rdd.aggregateByKey(1.0)(
 seqOp = (zero, price) => price * zero,
 combOp = (curr, agg) => curr + agg
 ).collect().foreach(println(_))
 context.stop()
 }
 }


> RDD  different partitions cause didderent results 
> --------------------------------------------------
>
>                 Key: SPARK-32809
>                 URL: https://issues.apache.org/jira/browse/SPARK-32809
>             Project: Spark
>          Issue Type: Bug
>          Components: Spark Core
>    Affects Versions: 2.2.0
>         Environment: spark2.2.0 ,scala 2.11.8 , hadoop-client2.6.0
>            Reporter: zhangchenglong
>            Priority: Major
>   Original Estimate: 12h
>  Remaining Estimate: 12h
>
> {code}
> class Exec3 {
>   private val exec: SparkConf = new 
> SparkConf().setMaster("local[1]").setAppName("exec3")
>   private val context = new SparkContext(exec)
>   context.setCheckpointDir("checkPoint")
>  
>   /**
>    * get total number by key 
>    * in this project desired results are ("apple",25) ("huwei",20)
>    * but in fact i get ("apple",150) ("huawei",20)
>    *   when i change it to local[3] the result is correct
>    *  i want to know   which cause it and how to slove it 
>    */
>   @Test
>   def testError(): Unit ={
>     val rdd = context.parallelize(Seq(("apple", 10), ("apple", 15), 
> ("huawei", 20)))
>     rdd.aggregateByKey(1.0)(
>       seqOp = (zero, price) => price * zero,
>       combOp = (curr, agg) => curr + agg).collect().foreach(println(_))
>     context.stop()
>   }
> }
> {code}



--
This message was sent by Atlassian Jira
(v8.3.4#803005)

---------------------------------------------------------------------
To unsubscribe, e-mail: issues-unsubscr...@spark.apache.org
For additional commands, e-mail: issues-h...@spark.apache.org

Reply via email to