[CARBONDATA-2615][32K] Support page size less than 32000 in CarbondataV3

Since we support super long string, if it is long enough, a column page
with 32000 rows will exceed 2GB, so we support a page less than 32000
rows.

This closes #2383


Project: http://git-wip-us.apache.org/repos/asf/carbondata/repo
Commit: http://git-wip-us.apache.org/repos/asf/carbondata/commit/091a28bf
Tree: http://git-wip-us.apache.org/repos/asf/carbondata/tree/091a28bf
Diff: http://git-wip-us.apache.org/repos/asf/carbondata/diff/091a28bf

Branch: refs/heads/carbonstore
Commit: 091a28bf833a5296dd3878ddb11b243f7f37a8fc
Parents: 2ea3b2d
Author: xuchuanyin <xuchuan...@hust.edu.cn>
Authored: Wed Jun 20 19:07:03 2018 +0800
Committer: kumarvishal09 <kumarvishal1...@gmail.com>
Committed: Thu Jun 21 11:00:02 2018 +0530

----------------------------------------------------------------------
 .../testsuite/dataload/TestLoadDataGeneral.scala    | 16 ++++++++++++++++
 .../store/CarbonFactDataHandlerColumnar.java        |  7 ++++++-
 2 files changed, 22 insertions(+), 1 deletion(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/carbondata/blob/091a28bf/integration/spark-common-test/src/test/scala/org/apache/carbondata/integration/spark/testsuite/dataload/TestLoadDataGeneral.scala
----------------------------------------------------------------------
diff --git 
a/integration/spark-common-test/src/test/scala/org/apache/carbondata/integration/spark/testsuite/dataload/TestLoadDataGeneral.scala
 
b/integration/spark-common-test/src/test/scala/org/apache/carbondata/integration/spark/testsuite/dataload/TestLoadDataGeneral.scala
index 688928f..8b51090 100644
--- 
a/integration/spark-common-test/src/test/scala/org/apache/carbondata/integration/spark/testsuite/dataload/TestLoadDataGeneral.scala
+++ 
b/integration/spark-common-test/src/test/scala/org/apache/carbondata/integration/spark/testsuite/dataload/TestLoadDataGeneral.scala
@@ -259,6 +259,22 @@ class TestLoadDataGeneral extends QueryTest with 
BeforeAndAfterEach {
       CarbonLoadOptionConstants.ENABLE_CARBON_LOAD_DIRECT_WRITE_HDFS,
       originStatus)
   }
+
+  test("test data loading with page size less than 32000") {
+    CarbonProperties.getInstance().addProperty(
+      CarbonCommonConstants.BLOCKLET_SIZE, "16000")
+
+    val testData = s"$resourcesPath/sample.csv"
+    sql(s"LOAD DATA LOCAL INPATH '$testData' into table loadtest")
+    checkAnswer(
+      sql("SELECT COUNT(*) FROM loadtest"),
+      Seq(Row(6))
+    )
+
+    
CarbonProperties.getInstance().addProperty(CarbonCommonConstants.BLOCKLET_SIZE,
+      CarbonCommonConstants.BLOCKLET_SIZE_DEFAULT_VAL)
+  }
+
   override def afterEach {
     sql("DROP TABLE if exists loadtest")
     sql("drop table if exists invalidMeasures")

http://git-wip-us.apache.org/repos/asf/carbondata/blob/091a28bf/processing/src/main/java/org/apache/carbondata/processing/store/CarbonFactDataHandlerColumnar.java
----------------------------------------------------------------------
diff --git 
a/processing/src/main/java/org/apache/carbondata/processing/store/CarbonFactDataHandlerColumnar.java
 
b/processing/src/main/java/org/apache/carbondata/processing/store/CarbonFactDataHandlerColumnar.java
index c0acadd..5fe3261 100644
--- 
a/processing/src/main/java/org/apache/carbondata/processing/store/CarbonFactDataHandlerColumnar.java
+++ 
b/processing/src/main/java/org/apache/carbondata/processing/store/CarbonFactDataHandlerColumnar.java
@@ -371,8 +371,13 @@ public class CarbonFactDataHandlerColumnar implements 
CarbonFactHandler {
     this.pageSize = Integer.parseInt(CarbonProperties.getInstance()
         .getProperty(CarbonCommonConstants.BLOCKLET_SIZE,
             CarbonCommonConstants.BLOCKLET_SIZE_DEFAULT_VAL));
+    // support less than 32000 rows in one page, because we support super long 
string,
+    // if it is long enough, a clomun page with 32000 rows will exceed 2GB
     if (version == ColumnarFormatVersion.V3) {
-      this.pageSize = 
CarbonV3DataFormatConstants.NUMBER_OF_ROWS_PER_BLOCKLET_COLUMN_PAGE_DEFAULT;
+      this.pageSize =
+          pageSize < 
CarbonV3DataFormatConstants.NUMBER_OF_ROWS_PER_BLOCKLET_COLUMN_PAGE_DEFAULT ?
+              pageSize :
+              
CarbonV3DataFormatConstants.NUMBER_OF_ROWS_PER_BLOCKLET_COLUMN_PAGE_DEFAULT;
     }
     LOGGER.info("Number of rows per column blocklet " + pageSize);
     dataRows = new ArrayList<>(this.pageSize);

Reply via email to