[49/50] [abbrv] carbondata git commit: [CARBONDATA-2513][32K] Support write long string from dataframe

2018-06-21 Thread jackylk
[CARBONDATA-2513][32K] Support write long string from dataframe

support write long string from dataframe

This closes #2382


Project: http://git-wip-us.apache.org/repos/asf/carbondata/repo
Commit: http://git-wip-us.apache.org/repos/asf/carbondata/commit/55f4bc6c
Tree: http://git-wip-us.apache.org/repos/asf/carbondata/tree/55f4bc6c
Diff: http://git-wip-us.apache.org/repos/asf/carbondata/diff/55f4bc6c

Branch: refs/heads/carbonstore
Commit: 55f4bc6c89f637b162b414033512901e9bd8a745
Parents: 218a8de
Author: xuchuanyin 
Authored: Wed Jun 20 19:01:24 2018 +0800
Committer: kumarvishal09 
Committed: Thu Jun 21 12:31:21 2018 +0530

--
 .../VarcharDataTypesBasicTestCase.scala | 32 +++-
 .../apache/carbondata/spark/CarbonOption.scala  |  2 ++
 .../spark/rdd/NewCarbonDataLoadRDD.scala| 15 +++--
 .../carbondata/spark/util/CarbonScalaUtil.scala |  3 +-
 .../spark/sql/CarbonDataFrameWriter.scala   |  1 +
 .../streaming/parser/FieldConverter.scala   | 11 +--
 6 files changed, 57 insertions(+), 7 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/carbondata/blob/55f4bc6c/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/longstring/VarcharDataTypesBasicTestCase.scala
--
diff --git 
a/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/longstring/VarcharDataTypesBasicTestCase.scala
 
b/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/longstring/VarcharDataTypesBasicTestCase.scala
index 9ea3f1f..9798178 100644
--- 
a/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/longstring/VarcharDataTypesBasicTestCase.scala
+++ 
b/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/longstring/VarcharDataTypesBasicTestCase.scala
@@ -20,8 +20,9 @@ package org.apache.carbondata.spark.testsuite.longstring
 import java.io.{File, PrintWriter}
 
 import org.apache.commons.lang3.RandomStringUtils
-import org.apache.spark.sql.Row
+import org.apache.spark.sql.{DataFrame, Row, SaveMode}
 import org.apache.spark.sql.test.util.QueryTest
+import org.apache.spark.sql.types.{IntegerType, StringType, StructField, 
StructType}
 import org.scalatest.{BeforeAndAfterAll, BeforeAndAfterEach}
 
 import org.apache.carbondata.core.constants.CarbonCommonConstants
@@ -36,6 +37,7 @@ class VarcharDataTypesBasicTestCase extends QueryTest with 
BeforeAndAfterEach wi
   private val inputFile_2g_column_page = s"$inputDir$fileName_2g_column_page"
   private val lineNum = 1000
   private var content: Content = _
+  private var longStringDF: DataFrame = _
   private var originMemorySize = CarbonProperties.getInstance().getProperty(
 CarbonCommonConstants.UNSAFE_WORKING_MEMORY_IN_MB,
 CarbonCommonConstants.UNSAFE_WORKING_MEMORY_IN_MB_DEFAULT)
@@ -257,6 +259,34 @@ class VarcharDataTypesBasicTestCase extends QueryTest with 
BeforeAndAfterEach wi
 // since after exception wrapper, we cannot get the root cause directly
   }
 
+  private def prepareDF(): Unit = {
+val schema = StructType(
+  StructField("id", IntegerType, nullable = true) ::
+  StructField("name", StringType, nullable = true) ::
+  StructField("description", StringType, nullable = true) ::
+  StructField("address", StringType, nullable = true) ::
+  StructField("note", StringType, nullable = true) :: Nil
+)
+longStringDF = sqlContext.sparkSession.read
+  .schema(schema)
+  .csv(inputFile)
+  }
+
+  test("write from dataframe with long string datatype") {
+prepareDF()
+// write spark dataframe to carbondata with `long_string_columns` property
+longStringDF.write
+  .format("carbondata")
+  .option("tableName", longStringTable)
+  .option("single_pass", "false")
+  .option("sort_columns", "name")
+  .option("long_string_columns", "description, note")
+  .mode(SaveMode.Overwrite)
+  .save()
+
+checkQuery()
+  }
+
   // will create 2 long string columns
   private def createFile(filePath: String, line: Int = 1, start: Int = 0,
   varcharLen: Int = Short.MaxValue + 1000): Content = {

http://git-wip-us.apache.org/repos/asf/carbondata/blob/55f4bc6c/integration/spark-common/src/main/scala/org/apache/carbondata/spark/CarbonOption.scala
--
diff --git 
a/integration/spark-common/src/main/scala/org/apache/carbondata/spark/CarbonOption.scala
 
b/integration/spark-common/src/main/scala/org/apache/carbondata/spark/CarbonOption.scala
index a48e63d..5f23f77 100644
--- 
a/integration/spark-common/src/main/scala/org/apache/carbondata/spark/CarbonOption.scala
+++ 
b/integration/spark-common/src/main/scala/org/apache/carbondata/spark/CarbonOption.sc

carbondata git commit: [CARBONDATA-2513][32K] Support write long string from dataframe

2018-06-21 Thread kumarvishal09
Repository: carbondata
Updated Branches:
  refs/heads/master 218a8deb6 -> 55f4bc6c8


[CARBONDATA-2513][32K] Support write long string from dataframe

support write long string from dataframe

This closes #2382


Project: http://git-wip-us.apache.org/repos/asf/carbondata/repo
Commit: http://git-wip-us.apache.org/repos/asf/carbondata/commit/55f4bc6c
Tree: http://git-wip-us.apache.org/repos/asf/carbondata/tree/55f4bc6c
Diff: http://git-wip-us.apache.org/repos/asf/carbondata/diff/55f4bc6c

Branch: refs/heads/master
Commit: 55f4bc6c89f637b162b414033512901e9bd8a745
Parents: 218a8de
Author: xuchuanyin 
Authored: Wed Jun 20 19:01:24 2018 +0800
Committer: kumarvishal09 
Committed: Thu Jun 21 12:31:21 2018 +0530

--
 .../VarcharDataTypesBasicTestCase.scala | 32 +++-
 .../apache/carbondata/spark/CarbonOption.scala  |  2 ++
 .../spark/rdd/NewCarbonDataLoadRDD.scala| 15 +++--
 .../carbondata/spark/util/CarbonScalaUtil.scala |  3 +-
 .../spark/sql/CarbonDataFrameWriter.scala   |  1 +
 .../streaming/parser/FieldConverter.scala   | 11 +--
 6 files changed, 57 insertions(+), 7 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/carbondata/blob/55f4bc6c/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/longstring/VarcharDataTypesBasicTestCase.scala
--
diff --git 
a/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/longstring/VarcharDataTypesBasicTestCase.scala
 
b/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/longstring/VarcharDataTypesBasicTestCase.scala
index 9ea3f1f..9798178 100644
--- 
a/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/longstring/VarcharDataTypesBasicTestCase.scala
+++ 
b/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/longstring/VarcharDataTypesBasicTestCase.scala
@@ -20,8 +20,9 @@ package org.apache.carbondata.spark.testsuite.longstring
 import java.io.{File, PrintWriter}
 
 import org.apache.commons.lang3.RandomStringUtils
-import org.apache.spark.sql.Row
+import org.apache.spark.sql.{DataFrame, Row, SaveMode}
 import org.apache.spark.sql.test.util.QueryTest
+import org.apache.spark.sql.types.{IntegerType, StringType, StructField, 
StructType}
 import org.scalatest.{BeforeAndAfterAll, BeforeAndAfterEach}
 
 import org.apache.carbondata.core.constants.CarbonCommonConstants
@@ -36,6 +37,7 @@ class VarcharDataTypesBasicTestCase extends QueryTest with 
BeforeAndAfterEach wi
   private val inputFile_2g_column_page = s"$inputDir$fileName_2g_column_page"
   private val lineNum = 1000
   private var content: Content = _
+  private var longStringDF: DataFrame = _
   private var originMemorySize = CarbonProperties.getInstance().getProperty(
 CarbonCommonConstants.UNSAFE_WORKING_MEMORY_IN_MB,
 CarbonCommonConstants.UNSAFE_WORKING_MEMORY_IN_MB_DEFAULT)
@@ -257,6 +259,34 @@ class VarcharDataTypesBasicTestCase extends QueryTest with 
BeforeAndAfterEach wi
 // since after exception wrapper, we cannot get the root cause directly
   }
 
+  private def prepareDF(): Unit = {
+val schema = StructType(
+  StructField("id", IntegerType, nullable = true) ::
+  StructField("name", StringType, nullable = true) ::
+  StructField("description", StringType, nullable = true) ::
+  StructField("address", StringType, nullable = true) ::
+  StructField("note", StringType, nullable = true) :: Nil
+)
+longStringDF = sqlContext.sparkSession.read
+  .schema(schema)
+  .csv(inputFile)
+  }
+
+  test("write from dataframe with long string datatype") {
+prepareDF()
+// write spark dataframe to carbondata with `long_string_columns` property
+longStringDF.write
+  .format("carbondata")
+  .option("tableName", longStringTable)
+  .option("single_pass", "false")
+  .option("sort_columns", "name")
+  .option("long_string_columns", "description, note")
+  .mode(SaveMode.Overwrite)
+  .save()
+
+checkQuery()
+  }
+
   // will create 2 long string columns
   private def createFile(filePath: String, line: Int = 1, start: Int = 0,
   varcharLen: Int = Short.MaxValue + 1000): Content = {

http://git-wip-us.apache.org/repos/asf/carbondata/blob/55f4bc6c/integration/spark-common/src/main/scala/org/apache/carbondata/spark/CarbonOption.scala
--
diff --git 
a/integration/spark-common/src/main/scala/org/apache/carbondata/spark/CarbonOption.scala
 
b/integration/spark-common/src/main/scala/org/apache/carbondata/spark/CarbonOption.scala
index a48e63d..5f23f77 100644
--- 
a/integration/spark-common/src/main/scala/org/apache/carbondata/spark/CarbonOption.scala
+++ 
b/in