[CARBONDATA-368]Imporve performance of dataframe loading This closes #278
Project: http://git-wip-us.apache.org/repos/asf/incubator-carbondata/repo Commit: http://git-wip-us.apache.org/repos/asf/incubator-carbondata/commit/567fa513 Tree: http://git-wip-us.apache.org/repos/asf/incubator-carbondata/tree/567fa513 Diff: http://git-wip-us.apache.org/repos/asf/incubator-carbondata/diff/567fa513 Branch: refs/heads/master Commit: 567fa5131628b70c8c4829368fda6d48cb013af3 Parents: 879bfe7 f8a0c87 Author: jackylk <jacky.li...@huawei.com> Authored: Tue Nov 29 17:15:20 2016 +0800 Committer: jackylk <jacky.li...@huawei.com> Committed: Tue Nov 29 17:15:20 2016 +0800 ---------------------------------------------------------------------- .../spark/rdd/CarbonDataLoadRDD.scala | 96 ++--- .../spark/rdd/CarbonDataRDDFactory.scala | 88 +++-- .../spark/rdd/CarbonGlobalDictionaryRDD.scala | 11 +- .../carbondata/spark/util/CarbonScalaUtil.scala | 51 +++ .../spark/util/GlobalDictionaryUtil.scala | 11 +- .../apache/spark/rdd/DataLoadCoalescedRDD.scala | 68 ++++ .../spark/rdd/DataLoadPartitionCoalescer.scala | 363 +++++++++++++++++++ .../spark/sql/hive/DistributionUtil.scala | 19 +- .../org/apache/spark/util/TaskContextUtil.scala | 29 ++ .../TestDataLoadPartitionCoalescer.scala | 170 +++++++++ .../spark/util/AllDictionaryTestCase.scala | 9 +- .../util/ExternalColumnDictionaryTestCase.scala | 14 +- ...GlobalDictionaryUtilConcurrentTestCase.scala | 23 +- .../util/GlobalDictionaryUtilTestCase.scala | 10 +- .../processing/csvreaderstep/CsvInput.java | 73 +++- .../csvreaderstep/JavaRddIterator.java | 32 ++ .../processing/csvreaderstep/RddInputUtils.java | 11 +- 17 files changed, 921 insertions(+), 157 deletions(-) ----------------------------------------------------------------------