org.apache.spark.sql.catalyst.errors.package$TreeNodeException: sort, tree:
Sort [net_site#50 ASC,device#6 ASC], true
 Exchange (RangePartitioning 200)
  Project 
[net_site#50,device#6,total_count#105L,adblock_count#106L,noanalytics_count#107L,unique_nk_count#109L]
   HashOuterJoin [net_site#50,device#6], [net_site#530,device#449],
LeftOuter, None
    Project 
[adblock_count#106L,total_count#105L,net_site#50,noanalytics_count#107L,device#6]
     HashOuterJoin [net_site#50,device#6], [net_site#419,device#338],
LeftOuter, None
      Project [total_count#105L,device#6,adblock_count#106L,net_site#50]
       HashOuterJoin [net_site#50,device#6],
[net_site#308,device#227], LeftOuter, None
        Project [total_count#105L,device#6,net_site#50]
         HashOuterJoin [net_site#50,device#6],
[net_site#197,device#116], LeftOuter, None
          Project [device#6,net_site#50]
           Aggregate false, [net_site#50,device#6], [net_site#50,device#6]
            Exchange (HashPartitioning 200)
             Aggregate true, [net_site#50,device#6], [net_site#50,device#6]
              InMemoryColumnarTableScan [net_site#50,device#6],
(InMemoryRelation
[net_site#50,device#6,cbd#5,et#8,news_key#16,underscore_et#35], true,
10000, StorageLevel(true, true, false, true, 1), (Project
[net_site#50,device#6,cbd#5,et#8,news_key#16,underscore_et#35]), None)
          Aggregate false, [net_site#197,device#116],
[net_site#197,device#116,Coalesce(SUM(PartialCount#705L),0) AS
total_count#105L]
           Exchange (HashPartitioning 200)
            Aggregate true, [net_site#197,device#116],
[net_site#197,device#116,COUNT(device#116) AS PartialCount#705L]
             Project [net_site#197,device#116]
              Filter (IS NULL et#118 && (underscore_et#145 = view))
               InMemoryColumnarTableScan
[net_site#197,device#116,et#118,underscore_et#145], [IS NULL
et#118,(underscore_et#145 = view)], (InMemoryRelation
[net_site#197,device#116,cbd#115,et#118,news_key#126,underscore_et#145],
true, 10000, StorageLevel(true, true, false, true, 1), (Project
[net_site#50,device#6,cbd#5,et#8,news_key#16,underscore_et#35]), None)
        Aggregate false, [net_site#308,device#227],
[net_site#308,device#227,Coalesce(SUM(PartialCount#709L),0) AS
adblock_count#106L]
         Exchange (HashPartitioning 200)
          Aggregate true, [net_site#308,device#227],
[net_site#308,device#227,COUNT(device#227) AS PartialCount#709L]
           Project [net_site#308,device#227]
            Filter (cbd#226 LIKE _1___)
             InMemoryColumnarTableScan
[net_site#308,device#227,cbd#226], [(cbd#226 LIKE _1___)],
(InMemoryRelation
[net_site#308,device#227,cbd#226,et#229,news_key#237,underscore_et#256],
true, 10000, StorageLevel(true, true, false, true, 1), (Project
[net_site#50,device#6,cbd#5,et#8,news_key#16,underscore_et#35]), None)
      Aggregate false, [net_site#419,device#338],
[net_site#419,device#338,Coalesce(SUM(PartialCount#713L),0) AS
noanalytics_count#107L]
       Exchange (HashPartitioning 200)
        Aggregate true, [net_site#419,device#338],
[net_site#419,device#338,COUNT(device#338) AS PartialCount#713L]
         Project [net_site#419,device#338]
          Filter ((CAST(et#340, DoubleType) = 3.0) && IS NOT NULL net_site#419)
           InMemoryColumnarTableScan [net_site#419,device#338,et#340],
[(CAST(et#340, DoubleType) = 3.0),IS NOT NULL net_site#419],
(InMemoryRelation
[net_site#419,device#338,cbd#337,et#340,news_key#348,underscore_et#367],
true, 10000, StorageLevel(true, true, false, true, 1), (Project
[net_site#50,device#6,cbd#5,et#8,news_key#16,underscore_et#35]), None)
    Aggregate false, [net_site#530,device#449],
[net_site#530,device#449,Coalesce(SUM(PartialCount#717L),0) AS
unique_nk_count#109L]
     Exchange (HashPartitioning 200)
      Aggregate true, [net_site#530,device#449],
[net_site#530,device#449,COUNT(device#449) AS PartialCount#717L]
       Project [net_site#530,device#449]
        Filter (cnt#108L = 1)
         Aggregate false, [net_site#530,device#449,news_key#459],
[net_site#530,device#449,news_key#459,Coalesce(SUM(PartialCount#719L),0)
AS cnt#108L]
          Exchange (HashPartitioning 200)
           Aggregate true, [net_site#530,device#449,news_key#459],
[net_site#530,device#449,news_key#459,COUNT(news_key#459) AS
PartialCount#719L]
            Project [net_site#530,device#449,news_key#459]
             Filter (CAST(et#451, DoubleType) = 3.0)
              InMemoryColumnarTableScan
[net_site#530,device#449,news_key#459,et#451], [(CAST(et#451,
DoubleType) = 3.0)], (InMemoryRelation
[net_site#530,device#449,cbd#448,et#451,news_key#459,underscore_et#478],
true, 10000, StorageLevel(true, true, false, true, 1), (Project
[net_site#50,device#6,cbd#5,et#8,news_key#16,underscore_et#35]), None)

org.apache.spark.sql.catalyst.errors.package$TreeNodeException: sort, tree:
Sort [net_site#50 ASC,device#6 ASC], true
 Exchange (RangePartitioning 200)
  Project 
[net_site#50,device#6,total_count#105L,adblock_count#106L,noanalytics_count#107L,unique_nk_count#109L]
   HashOuterJoin [net_site#50,device#6], [net_site#530,device#449],
LeftOuter, None
    Project 
[adblock_count#106L,total_count#105L,net_site#50,noanalytics_count#107L,device#6]
     HashOuterJoin [net_site#50,device#6], [net_site#419,device#338],
LeftOuter, None
      Project [total_count#105L,device#6,adblock_count#106L,net_site#50]
       HashOuterJoin [net_site#50,device#6],
[net_site#308,device#227], LeftOuter, None
        Project [total_count#105L,device#6,net_site#50]
         HashOuterJoin [net_site#50,device#6],
[net_site#197,device#116], LeftOuter, None
          Project [device#6,net_site#50]
           Aggregate false, [net_site#50,device#6], [net_site#50,device#6]
            Exchange (HashPartitioning 200)
             Aggregate true, [net_site#50,device#6], [net_site#50,device#6]
              InMemoryColumnarTableScan [net_site#50,device#6],
(InMemoryRelation
[net_site#50,device#6,cbd#5,et#8,news_key#16,underscore_et#35], true,
10000, StorageLevel(true, true, false, true, 1), (Project
[net_site#50,device#6,cbd#5,et#8,news_key#16,underscore_et#35]), None)
          Aggregate false, [net_site#197,device#116],
[net_site#197,device#116,Coalesce(SUM(PartialCount#705L),0) AS
total_count#105L]
           Exchange (HashPartitioning 200)
            Aggregate true, [net_site#197,device#116],
[net_site#197,device#116,COUNT(device#116) AS PartialCount#705L]
             Project [net_site#197,device#116]
              Filter (IS NULL et#118 && (underscore_et#145 = view))
               InMemoryColumnarTableScan
[net_site#197,device#116,et#118,underscore_et#145], [IS NULL
et#118,(underscore_et#145 = view)], (InMemoryRelation
[net_site#197,device#116,cbd#115,et#118,news_key#126,underscore_et#145],
true, 10000, StorageLevel(true, true, false, true, 1), (Project
[net_site#50,device#6,cbd#5,et#8,news_key#16,underscore_et#35]), None)
        Aggregate false, [net_site#308,device#227],
[net_site#308,device#227,Coalesce(SUM(PartialCount#709L),0) AS
adblock_count#106L]
         Exchange (HashPartitioning 200)
          Aggregate true, [net_site#308,device#227],
[net_site#308,device#227,COUNT(device#227) AS PartialCount#709L]
           Project [net_site#308,device#227]
            Filter (cbd#226 LIKE _1___)
             InMemoryColumnarTableScan
[net_site#308,device#227,cbd#226], [(cbd#226 LIKE _1___)],
(InMemoryRelation
[net_site#308,device#227,cbd#226,et#229,news_key#237,underscore_et#256],
true, 10000, StorageLevel(true, true, false, true, 1), (Project
[net_site#50,device#6,cbd#5,et#8,news_key#16,underscore_et#35]), None)
      Aggregate false, [net_site#419,device#338],
[net_site#419,device#338,Coalesce(SUM(PartialCount#713L),0) AS
noanalytics_count#107L]
       Exchange (HashPartitioning 200)
        Aggregate true, [net_site#419,device#338],
[net_site#419,device#338,COUNT(device#338) AS PartialCount#713L]
         Project [net_site#419,device#338]
          Filter ((CAST(et#340, DoubleType) = 3.0) && IS NOT NULL net_site#419)
           InMemoryColumnarTableScan [net_site#419,device#338,et#340],
[(CAST(et#340, DoubleType) = 3.0),IS NOT NULL net_site#419],
(InMemoryRelation
[net_site#419,device#338,cbd#337,et#340,news_key#348,underscore_et#367],
true, 10000, StorageLevel(true, true, false, true, 1), (Project
[net_site#50,device#6,cbd#5,et#8,news_key#16,underscore_et#35]), None)
    Aggregate false, [net_site#530,device#449],
[net_site#530,device#449,Coalesce(SUM(PartialCount#717L),0) AS
unique_nk_count#109L]
     Exchange (HashPartitioning 200)
      Aggregate true, [net_site#530,device#449],
[net_site#530,device#449,COUNT(device#449) AS PartialCount#717L]
       Project [net_site#530,device#449]
        Filter (cnt#108L = 1)
         Aggregate false, [net_site#530,device#449,news_key#459],
[net_site#530,device#449,news_key#459,Coalesce(SUM(PartialCount#719L),0)
AS cnt#108L]
          Exchange (HashPartitioning 200)
           Aggregate true, [net_site#530,device#449,news_key#459],
[net_site#530,device#449,news_key#459,COUNT(news_key#459) AS
PartialCount#719L]
            Project [net_site#530,device#449,news_key#459]
             Filter (CAST(et#451, DoubleType) = 3.0)
              InMemoryColumnarTableScan
[net_site#530,device#449,news_key#459,et#451], [(CAST(et#451,
DoubleType) = 3.0)], (InMemoryRelation
[net_site#530,device#449,cbd#448,et#451,news_key#459,underscore_et#478],
true, 10000, StorageLevel(true, true, false, true, 1), (Project
[net_site#50,device#6,cbd#5,et#8,news_key#16,underscore_et#35]), None)

        at 
org.apache.spark.sql.catalyst.errors.package$.attachTree(package.scala:49)
        at 
org.apache.spark.sql.execution.Sort.doExecute(basicOperators.scala:189)
        at 
org.apache.spark.sql.execution.SparkPlan$$anonfun$execute$1.apply(SparkPlan.scala:88)
        at 
org.apache.spark.sql.execution.SparkPlan$$anonfun$execute$1.apply(SparkPlan.scala:88)
        at 
org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:147)
        at org.apache.spark.sql.execution.SparkPlan.execute(SparkPlan.scala:87)
        at org.apache.spark.sql.DataFrame.rdd$lzycompute(DataFrame.scala:1367)
        at org.apache.spark.sql.DataFrame.rdd(DataFrame.scala:1364)
        at 
com.databricks.spark.csv.package$CsvSchemaRDD.saveAsCsvFile(package.scala:135)
        at 
com.databricks.spark.csv.DefaultSource.createRelation(DefaultSource.scala:165)
        at org.apache.spark.sql.sources.ResolvedDataSource$.apply(ddl.scala:309)
        at org.apache.spark.sql.DataFrameWriter.save(DataFrameWriter.scala:144)
        at org.apache.spark.sql.DataFrameWriter.save(DataFrameWriter.scala:135)
        at 
com.news.report.adblock.AdblockReport.getdAdBlockOverview(AdblockReport.java:155)
        at 
com.news.report.adblock.AdblockReport.genReport(AdblockReport.java:104)
        at com.news.report.adblock.AdblockReport.run(AdblockReport.java:82)
        at com.news.report.adblock.AdblockReport.main(AdblockReport.java:54)
        at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
        at 
sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57)
        at 
sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
        at java.lang.reflect.Method.invoke(Method.java:606)
        at 
org.apache.spark.deploy.yarn.ApplicationMaster$$anon$2.run(ApplicationMaster.scala:483)
Caused by: org.apache.spark.sql.catalyst.errors.package$TreeNodeException:
execute, tree:
Exchange (RangePartitioning 200)
 Project 
[net_site#50,device#6,total_count#105L,adblock_count#106L,noanalytics_count#107L,unique_nk_count#109L]
  HashOuterJoin [net_site#50,device#6], [net_site#530,device#449],
LeftOuter, None
   Project 
[adblock_count#106L,total_count#105L,net_site#50,noanalytics_count#107L,device#6]
    HashOuterJoin [net_site#50,device#6], [net_site#419,device#338],
LeftOuter, None
     Project [total_count#105L,device#6,adblock_count#106L,net_site#50]
      HashOuterJoin [net_site#50,device#6], [net_site#308,device#227],
LeftOuter, None
       Project [total_count#105L,device#6,net_site#50]
        HashOuterJoin [net_site#50,device#6],
[net_site#197,device#116], LeftOuter, None
         Project [device#6,net_site#50]
          Aggregate false, [net_site#50,device#6], [net_site#50,device#6]
           Exchange (HashPartitioning 200)
            Aggregate true, [net_site#50,device#6], [net_site#50,device#6]
             InMemoryColumnarTableScan [net_site#50,device#6],
(InMemoryRelation
[net_site#50,device#6,cbd#5,et#8,news_key#16,underscore_et#35], true,
10000, StorageLevel(true, true, false, true, 1), (Project
[net_site#50,device#6,cbd#5,et#8,news_key#16,underscore_et#35]), None)
         Aggregate false, [net_site#197,device#116],
[net_site#197,device#116,Coalesce(SUM(PartialCount#705L),0) AS
total_count#105L]
          Exchange (HashPartitioning 200)
           Aggregate true, [net_site#197,device#116],
[net_site#197,device#116,COUNT(device#116) AS PartialCount#705L]
            Project [net_site#197,device#116]
             Filter (IS NULL et#118 && (underscore_et#145 = view))
              InMemoryColumnarTableScan
[net_site#197,device#116,et#118,underscore_et#145], [IS NULL
et#118,(underscore_et#145 = view)], (InMemoryRelation
[net_site#197,device#116,cbd#115,et#118,news_key#126,underscore_et#145],
true, 10000, StorageLevel(true, true, false, true, 1), (Project
[net_site#50,device#6,cbd#5,et#8,news_key#16,underscore_et#35]), None)
       Aggregate false, [net_site#308,device#227],
[net_site#308,device#227,Coalesce(SUM(PartialCount#709L),0) AS
adblock_count#106L]
        Exchange (HashPartitioning 200)
         Aggregate true, [net_site#308,device#227],
[net_site#308,device#227,COUNT(device#227) AS PartialCount#709L]
          Project [net_site#308,device#227]
           Filter (cbd#226 LIKE _1___)
            InMemoryColumnarTableScan
[net_site#308,device#227,cbd#226], [(cbd#226 LIKE _1___)],
(InMemoryRelation
[net_site#308,device#227,cbd#226,et#229,news_key#237,underscore_et#256],
true, 10000, StorageLevel(true, true, false, true, 1), (Project
[net_site#50,device#6,cbd#5,et#8,news_key#16,underscore_et#35]), None)
     Aggregate false, [net_site#419,device#338],
[net_site#419,device#338,Coalesce(SUM(PartialCount#713L),0) AS
noanalytics_count#107L]
      Exchange (HashPartitioning 200)
       Aggregate true, [net_site#419,device#338],
[net_site#419,device#338,COUNT(device#338) AS PartialCount#713L]
        Project [net_site#419,device#338]
         Filter ((CAST(et#340, DoubleType) = 3.0) && IS NOT NULL net_site#419)
          InMemoryColumnarTableScan [net_site#419,device#338,et#340],
[(CAST(et#340, DoubleType) = 3.0),IS NOT NULL net_site#419],
(InMemoryRelation
[net_site#419,device#338,cbd#337,et#340,news_key#348,underscore_et#367],
true, 10000, StorageLevel(true, true, false, true, 1), (Project
[net_site#50,device#6,cbd#5,et#8,news_key#16,underscore_et#35]), None)
   Aggregate false, [net_site#530,device#449],
[net_site#530,device#449,Coalesce(SUM(PartialCount#717L),0) AS
unique_nk_count#109L]
    Exchange (HashPartitioning 200)
     Aggregate true, [net_site#530,device#449],
[net_site#530,device#449,COUNT(device#449) AS PartialCount#717L]
      Project [net_site#530,device#449]
       Filter (cnt#108L = 1)
        Aggregate false, [net_site#530,device#449,news_key#459],
[net_site#530,device#449,news_key#459,Coalesce(SUM(PartialCount#719L),0)
AS cnt#108L]
         Exchange (HashPartitioning 200)
          Aggregate true, [net_site#530,device#449,news_key#459],
[net_site#530,device#449,news_key#459,COUNT(news_key#459) AS
PartialCount#719L]
           Project [net_site#530,device#449,news_key#459]
            Filter (CAST(et#451, DoubleType) = 3.0)
             InMemoryColumnarTableScan
[net_site#530,device#449,news_key#459,et#451], [(CAST(et#451,
DoubleType) = 3.0)], (InMemoryRelation
[net_site#530,device#449,cbd#448,et#451,news_key#459,underscore_et#478],
true, 10000, StorageLevel(true, true, false, true, 1), (Project
[net_site#50,device#6,cbd#5,et#8,news_key#16,underscore_et#35]), None)

        at 
org.apache.spark.sql.catalyst.errors.package$.attachTree(package.scala:49)
        at org.apache.spark.sql.execution.Exchange.doExecute(Exchange.scala:171)
        at 
org.apache.spark.sql.execution.SparkPlan$$anonfun$execute$1.apply(SparkPlan.scala:88)
        at 
org.apache.spark.sql.execution.SparkPlan$$anonfun$execute$1.apply(SparkPlan.scala:88)
        at 
org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:147)
        at org.apache.spark.sql.execution.SparkPlan.execute(SparkPlan.scala:87)
        at 
org.apache.spark.sql.execution.Sort$$anonfun$doExecute$5.apply(basicOperators.scala:190)
        at 
org.apache.spark.sql.execution.Sort$$anonfun$doExecute$5.apply(basicOperators.scala:190)
        at 
org.apache.spark.sql.catalyst.errors.package$.attachTree(package.scala:48)
        ... 21 more


On 16 November 2015 at 21:16, Zhang, Jingyu <jingyu.zh...@news.com.au>
wrote:

> I am using spark-csv to save files in s3, it shown Size exceeds. Please let 
> me know how to fix it. Thanks.
>
> df.write()
>     .format("com.databricks.spark.csv")
>     .option("header", "true")
>     .save("s3://newcars.csv");
>
> java.lang.IllegalArgumentException: Size exceeds Integer.MAX_VALUE
>       at sun.nio.ch.FileChannelImpl.map(FileChannelImpl.java:860)
>       at 
> org.apache.spark.storage.DiskStore$$anonfun$getBytes$2.apply(DiskStore.scala:125)
>       at 
> org.apache.spark.storage.DiskStore$$anonfun$getBytes$2.apply(DiskStore.scala:113)
>       at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1285)
>       at org.apache.spark.storage.DiskStore.getBytes(DiskStore.scala:127)
>       at org.apache.spark.storage.DiskStore.getBytes(DiskStore.scala:134)
>       at 
> org.apache.spark.storage.BlockManager.doGetLocal(BlockManager.scala:511)
>       at 
> org.apache.spark.storage.BlockManager.getLocal(BlockManager.scala:429)
>       at org.apache.spark.storage.BlockManager.get(BlockManager.scala:617)
>       at 
> org.apache.spark.CacheManager.putInBlockManager(CacheManager.scala:154)
>       at org.apache.spark.CacheManager.getOrCompute(CacheManager.scala:78)
>       at org.apache.spark.rdd.RDD.iterator(RDD.scala:242)
>       at 
> org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:35)
>       at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:277)
>       at org.apache.spark.rdd.RDD.iterator(RDD.scala:244)
>       at 
> org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:35)
>       at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:277)
>       at org.apache.spark.rdd.RDD.iterator(RDD.scala:244)
>       at 
> org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:35)
>       at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:277)
>       at org.apache.spark.rdd.RDD.iterator(RDD.scala:244)
>       at 
> org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:35)
>       at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:277)
>       at org.apache.spark.rdd.RDD.iterator(RDD.scala:244)
>       at 
> org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:35)
>       at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:277)
>       at org.apache.spark.rdd.RDD.iterator(RDD.scala:244)
>       at 
> org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:70)
>       at 
> org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:41)
>       at org.apache.spark.scheduler.Task.run(Task.scala:70)
>       at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:213)
>       at 
> java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1145)
>       at 
> java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:615)
>       at java.lang.Thread.run(Thread.java:745)
>
>
>

-- 
This message and its attachments may contain legally privileged or 
confidential information. It is intended solely for the named addressee. If 
you are not the addressee indicated in this message or responsible for 
delivery of the message to the addressee, you may not copy or deliver this 
message or its attachments to anyone. Rather, you should permanently delete 
this message and its attachments and kindly notify the sender by reply 
e-mail. Any content of this message and its attachments which does not 
relate to the official business of the sending company must be taken not to 
have been sent or endorsed by that company or any of its related entities. 
No warranty is made that the e-mail or attachments are free from computer 
virus or other defect.

Reply via email to