[CARBONDATA-2604] Getting ArrayIndexOutOfBoundException during compaction after IUD in cluster is fixed
Issue: if some records are deleted then during filling the measure and dimension data no of valid rows count and actual rows may be different if some records are deleted . and during filling of measure data it will iterrate over the scanned result. so it will cause ArrayIndexOutOfBoundException Solution : Make a new temp list to collect the measure and dimension data during scan and fill inside RawBasedResultCollector and add it to final list This closes #2369 Project: http://git-wip-us.apache.org/repos/asf/carbondata/repo Commit: http://git-wip-us.apache.org/repos/asf/carbondata/commit/efad40d5 Tree: http://git-wip-us.apache.org/repos/asf/carbondata/tree/efad40d5 Diff: http://git-wip-us.apache.org/repos/asf/carbondata/diff/efad40d5 Branch: refs/heads/carbonstore Commit: efad40d5723849a351ec700e8e4e346cac8c3454 Parents: ff03645 Author: rahul <rahul.ku...@knoldus.in> Authored: Tue Jun 12 19:26:40 2018 +0530 Committer: manishgupta88 <tomanishgupt...@gmail.com> Committed: Wed Jun 13 20:38:24 2018 +0530 ---------------------------------------------------------------------- .../collector/impl/RawBasedResultCollector.java | 12 +++++++++--- .../sdv/generated/DataLoadingIUDTestCase.scala | 19 +++++++++++++++++++ 2 files changed, 28 insertions(+), 3 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/carbondata/blob/efad40d5/core/src/main/java/org/apache/carbondata/core/scan/collector/impl/RawBasedResultCollector.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/org/apache/carbondata/core/scan/collector/impl/RawBasedResultCollector.java b/core/src/main/java/org/apache/carbondata/core/scan/collector/impl/RawBasedResultCollector.java index d28df0a..7302b2c 100644 --- a/core/src/main/java/org/apache/carbondata/core/scan/collector/impl/RawBasedResultCollector.java +++ b/core/src/main/java/org/apache/carbondata/core/scan/collector/impl/RawBasedResultCollector.java @@ -92,16 +92,22 @@ public class RawBasedResultCollector extends AbstractScannedResultCollector { // re initialized with left over value batchSize = 0; } + // for every iteration of available rows filling newly created list of Object[] and add it to + // the final list so there is no mismatch in the counter while filling dimension and + // measure data + List<Object[]> collectedData = new ArrayList<>(availableBatchRowCount); // fill dimension data - fillDimensionData(scannedResult, listBasedResult, queryMeasures, availableBatchRowCount); - fillMeasureData(scannedResult, listBasedResult); + fillDimensionData(scannedResult, collectedData, queryMeasures, availableBatchRowCount); + fillMeasureData(scannedResult, collectedData); // increment the number of rows scanned in scanned result statistics incrementScannedResultRowCounter(scannedResult, availableBatchRowCount); // assign the left over rows to batch size if the number of rows fetched are lesser // than batchSize - if (listBasedResult.size() < availableBatchRowCount) { + if (collectedData.size() < availableBatchRowCount) { batchSize += availableBatchRowCount - listBasedResult.size(); } + // add the collected data to the final list + listBasedResult.addAll(collectedData); } } http://git-wip-us.apache.org/repos/asf/carbondata/blob/efad40d5/integration/spark-common-cluster-test/src/test/scala/org/apache/carbondata/cluster/sdv/generated/DataLoadingIUDTestCase.scala ---------------------------------------------------------------------- diff --git a/integration/spark-common-cluster-test/src/test/scala/org/apache/carbondata/cluster/sdv/generated/DataLoadingIUDTestCase.scala b/integration/spark-common-cluster-test/src/test/scala/org/apache/carbondata/cluster/sdv/generated/DataLoadingIUDTestCase.scala index 4c232be..79458f5 100644 --- a/integration/spark-common-cluster-test/src/test/scala/org/apache/carbondata/cluster/sdv/generated/DataLoadingIUDTestCase.scala +++ b/integration/spark-common-cluster-test/src/test/scala/org/apache/carbondata/cluster/sdv/generated/DataLoadingIUDTestCase.scala @@ -3671,6 +3671,23 @@ test("HQ_Defect_TC_2016110901163", Include) { sql(s"""drop table default.t_carbn01 """).collect } + test("[CARBONDATA-2604] ", Include){ + sql("drop table if exists brinjal").collect + sql("create table brinjal (imei string,AMSize string,channelsId string,ActiveCountry string, Activecity string,gamePointId double,deviceInformationId double,productionDate Timestamp,deliveryDate timestamp,deliverycharge double) STORED BY 'org.apache.carbondata.format' TBLPROPERTIES('table_blocksize'='2000','sort_columns'='imei')").collect + sql(s"""LOAD DATA INPATH '$resourcesPath/Data/InsertData/vardhandaterestruct.csv' INTO TABLE brinjal OPTIONS('DELIMITER'=',', 'QUOTECHAR'= '','BAD_RECORDS_ACTION'='FORCE','FILEHEADER'= 'imei,deviceInformationId,AMSize,channelsId,ActiveCountry,Activecity,gamePointId,productionDate,deliveryDate,deliverycharge')""").collect + sql(s"""LOAD DATA INPATH '$resourcesPath/Data/InsertData/vardhandaterestruct.csv' INTO TABLE brinjal OPTIONS('DELIMITER'=',', 'QUOTECHAR'= '','BAD_RECORDS_ACTION'='FORCE','FILEHEADER'= 'imei,deviceInformationId,AMSize,channelsId,ActiveCountry,Activecity,gamePointId,productionDate,deliveryDate,deliverycharge')""").collect + sql(s"""LOAD DATA INPATH '$resourcesPath/Data/InsertData/vardhandaterestruct.csv' INTO TABLE brinjal OPTIONS('DELIMITER'=',', 'QUOTECHAR'= '','BAD_RECORDS_ACTION'='FORCE','FILEHEADER'= 'imei,deviceInformationId,AMSize,channelsId,ActiveCountry,Activecity,gamePointId,productionDate,deliveryDate,deliverycharge')""").collect + sql("insert into brinjal select * from brinjal").collect + sql("update brinjal set (AMSize)= ('8RAM size') where AMSize='4RAM size'").collect + sql("delete from brinjal where AMSize='8RAM size'").collect + sql("delete from table brinjal where segment.id IN(0)").collect + sql("clean files for table brinjal").collect + sql("alter table brinjal compact 'minor'").collect + sql("alter table brinjal compact 'major'").collect + checkAnswer(s"""select count(*) from brinjal""", + Seq(Row(335)), "CARBONDATA-2604") + sql("drop table if exists brinjal") + } override def afterAll { sql("use default").collect sql("drop table if exists t_carbn02").collect @@ -3701,5 +3718,7 @@ override def afterAll { sql("drop table if exists t_carbn01b").collect sql("drop table if exists T_Hive1").collect sql("drop table if exists T_Hive6").collect + sql("drop table if exists brinjal") + } } \ No newline at end of file