[ https://issues.apache.org/jira/browse/CARBONDATA-49?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ]
ChenLiang updated CARBONDATA-49: -------------------------------- Description: CSV data be stored at local machine(not HDSF), test result as below. 1.If the csv data is 1 million rows, all query is ok. 2.If the csv data is 3 million rows, query of cc.sql("select * from tablename") having the below errors: ERROR 11-07 20:56:54,131 - [Executor task launch worker-12][partitionID:connectdemo;queryID:33111337863067_0] org.carbondata.scan.executor.exception.QueryExecutionException: at org.carbondata.scan.executor.impl.AbstractQueryExecutor.initQuery(AbstractQueryExecutor.java:99) at org.carbondata.scan.executor.impl.AbstractQueryExecutor.getBlockExecutionInfos(AbstractQueryExecutor.java:178) at org.carbondata.scan.executor.impl.DetailRawRecordQueryExecutor.execute(DetailRawRecordQueryExecutor.java:20) at org.carbondata.spark.rdd.CarbonScanRDD$$anon$1.<init>(CarbonScanRDD.scala:174) at org.carbondata.spark.rdd.CarbonScanRDD.compute(CarbonScanRDD.scala:155) at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:306) at org.apache.spark.rdd.RDD.iterator(RDD.scala:270) at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:38) at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:306) at org.apache.spark.rdd.RDD.iterator(RDD.scala:270) at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:38) at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:306) at org.apache.spark.rdd.RDD.iterator(RDD.scala:270) at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:38) at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:306) at org.apache.spark.rdd.RDD.iterator(RDD.scala:270) at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:38) at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:306) at org.apache.spark.rdd.RDD.iterator(RDD.scala:270) at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:38) at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:306) at org.apache.spark.rdd.RDD.iterator(RDD.scala:270) at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:38) at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:306) at org.apache.spark.rdd.RDD.iterator(RDD.scala:270) at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:66) at org.apache.spark.scheduler.Task.run(Task.scala:89) at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:214) at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142) at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617) at java.lang.Thread.run(Thread.java:745) Caused by: org.carbondata.core.carbon.datastore.exception.IndexBuilderException: at org.carbondata.core.carbon.datastore.BlockIndexStore.fillLoadedBlocks(BlockIndexStore.java:211) at org.carbondata.core.carbon.datastore.BlockIndexStore.loadAndGetBlocks(BlockIndexStore.java:191) at org.carbondata.scan.executor.impl.AbstractQueryExecutor.initQuery(AbstractQueryExecutor.java:96) was: CSV data be stored at local machine(not HDSF), test result as below. 1.If the data is 1 million rows, all query is ok. 2.If the data is 3 million rows, query of cc.sql("select * from tablename") having the below errors: ERROR 11-07 20:56:54,131 - [Executor task launch worker-12][partitionID:connectdemo;queryID:33111337863067_0] org.carbondata.scan.executor.exception.QueryExecutionException: at org.carbondata.scan.executor.impl.AbstractQueryExecutor.initQuery(AbstractQueryExecutor.java:99) at org.carbondata.scan.executor.impl.AbstractQueryExecutor.getBlockExecutionInfos(AbstractQueryExecutor.java:178) at org.carbondata.scan.executor.impl.DetailRawRecordQueryExecutor.execute(DetailRawRecordQueryExecutor.java:20) at org.carbondata.spark.rdd.CarbonScanRDD$$anon$1.<init>(CarbonScanRDD.scala:174) at org.carbondata.spark.rdd.CarbonScanRDD.compute(CarbonScanRDD.scala:155) at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:306) at org.apache.spark.rdd.RDD.iterator(RDD.scala:270) at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:38) at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:306) at org.apache.spark.rdd.RDD.iterator(RDD.scala:270) at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:38) at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:306) at org.apache.spark.rdd.RDD.iterator(RDD.scala:270) at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:38) at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:306) at org.apache.spark.rdd.RDD.iterator(RDD.scala:270) at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:38) at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:306) at org.apache.spark.rdd.RDD.iterator(RDD.scala:270) at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:38) at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:306) at org.apache.spark.rdd.RDD.iterator(RDD.scala:270) at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:38) at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:306) at org.apache.spark.rdd.RDD.iterator(RDD.scala:270) at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:66) at org.apache.spark.scheduler.Task.run(Task.scala:89) at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:214) at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142) at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617) at java.lang.Thread.run(Thread.java:745) Caused by: org.carbondata.core.carbon.datastore.exception.IndexBuilderException: at org.carbondata.core.carbon.datastore.BlockIndexStore.fillLoadedBlocks(BlockIndexStore.java:211) at org.carbondata.core.carbon.datastore.BlockIndexStore.loadAndGetBlocks(BlockIndexStore.java:191) at org.carbondata.scan.executor.impl.AbstractQueryExecutor.initQuery(AbstractQueryExecutor.java:96) > Can not query 3 million rows data which be loaded through local store > system(not HDFS) > -------------------------------------------------------------------------------------- > > Key: CARBONDATA-49 > URL: https://issues.apache.org/jira/browse/CARBONDATA-49 > Project: CarbonData > Issue Type: Bug > Environment: spark 1.6.1 > Reporter: ChenLiang > Priority: Minor > > CSV data be stored at local machine(not HDSF), test result as below. > 1.If the csv data is 1 million rows, all query is ok. > 2.If the csv data is 3 million rows, query of cc.sql("select * from > tablename") having the below errors: > ERROR 11-07 20:56:54,131 - [Executor task launch > worker-12][partitionID:connectdemo;queryID:33111337863067_0] > org.carbondata.scan.executor.exception.QueryExecutionException: > at > org.carbondata.scan.executor.impl.AbstractQueryExecutor.initQuery(AbstractQueryExecutor.java:99) > at > org.carbondata.scan.executor.impl.AbstractQueryExecutor.getBlockExecutionInfos(AbstractQueryExecutor.java:178) > at > org.carbondata.scan.executor.impl.DetailRawRecordQueryExecutor.execute(DetailRawRecordQueryExecutor.java:20) > at > org.carbondata.spark.rdd.CarbonScanRDD$$anon$1.<init>(CarbonScanRDD.scala:174) > at > org.carbondata.spark.rdd.CarbonScanRDD.compute(CarbonScanRDD.scala:155) > at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:306) > at org.apache.spark.rdd.RDD.iterator(RDD.scala:270) > at > org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:38) > at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:306) > at org.apache.spark.rdd.RDD.iterator(RDD.scala:270) > at > org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:38) > at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:306) > at org.apache.spark.rdd.RDD.iterator(RDD.scala:270) > at > org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:38) > at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:306) > at org.apache.spark.rdd.RDD.iterator(RDD.scala:270) > at > org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:38) > at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:306) > at org.apache.spark.rdd.RDD.iterator(RDD.scala:270) > at > org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:38) > at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:306) > at org.apache.spark.rdd.RDD.iterator(RDD.scala:270) > at > org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:38) > at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:306) > at org.apache.spark.rdd.RDD.iterator(RDD.scala:270) > at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:66) > at org.apache.spark.scheduler.Task.run(Task.scala:89) > at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:214) > at > java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142) > at > java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617) > at java.lang.Thread.run(Thread.java:745) > Caused by: > org.carbondata.core.carbon.datastore.exception.IndexBuilderException: > at > org.carbondata.core.carbon.datastore.BlockIndexStore.fillLoadedBlocks(BlockIndexStore.java:211) > at > org.carbondata.core.carbon.datastore.BlockIndexStore.loadAndGetBlocks(BlockIndexStore.java:191) > at > org.carbondata.scan.executor.impl.AbstractQueryExecutor.initQuery(AbstractQueryExecutor.java:96) -- This message was sent by Atlassian JIRA (v6.3.4#6332)