[49/50] [abbrv] carbondata git commit: [CARBONDATA-2513][32K] Support write long string from dataframe
[CARBONDATA-2513][32K] Support write long string from dataframe support write long string from dataframe This closes #2382 Project: http://git-wip-us.apache.org/repos/asf/carbondata/repo Commit: http://git-wip-us.apache.org/repos/asf/carbondata/commit/55f4bc6c Tree: http://git-wip-us.apache.org/repos/asf/carbondata/tree/55f4bc6c Diff: http://git-wip-us.apache.org/repos/asf/carbondata/diff/55f4bc6c Branch: refs/heads/carbonstore Commit: 55f4bc6c89f637b162b414033512901e9bd8a745 Parents: 218a8de Author: xuchuanyin Authored: Wed Jun 20 19:01:24 2018 +0800 Committer: kumarvishal09 Committed: Thu Jun 21 12:31:21 2018 +0530 -- .../VarcharDataTypesBasicTestCase.scala | 32 +++- .../apache/carbondata/spark/CarbonOption.scala | 2 ++ .../spark/rdd/NewCarbonDataLoadRDD.scala| 15 +++-- .../carbondata/spark/util/CarbonScalaUtil.scala | 3 +- .../spark/sql/CarbonDataFrameWriter.scala | 1 + .../streaming/parser/FieldConverter.scala | 11 +-- 6 files changed, 57 insertions(+), 7 deletions(-) -- http://git-wip-us.apache.org/repos/asf/carbondata/blob/55f4bc6c/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/longstring/VarcharDataTypesBasicTestCase.scala -- diff --git a/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/longstring/VarcharDataTypesBasicTestCase.scala b/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/longstring/VarcharDataTypesBasicTestCase.scala index 9ea3f1f..9798178 100644 --- a/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/longstring/VarcharDataTypesBasicTestCase.scala +++ b/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/longstring/VarcharDataTypesBasicTestCase.scala @@ -20,8 +20,9 @@ package org.apache.carbondata.spark.testsuite.longstring import java.io.{File, PrintWriter} import org.apache.commons.lang3.RandomStringUtils -import org.apache.spark.sql.Row +import org.apache.spark.sql.{DataFrame, Row, SaveMode} import org.apache.spark.sql.test.util.QueryTest +import org.apache.spark.sql.types.{IntegerType, StringType, StructField, StructType} import org.scalatest.{BeforeAndAfterAll, BeforeAndAfterEach} import org.apache.carbondata.core.constants.CarbonCommonConstants @@ -36,6 +37,7 @@ class VarcharDataTypesBasicTestCase extends QueryTest with BeforeAndAfterEach wi private val inputFile_2g_column_page = s"$inputDir$fileName_2g_column_page" private val lineNum = 1000 private var content: Content = _ + private var longStringDF: DataFrame = _ private var originMemorySize = CarbonProperties.getInstance().getProperty( CarbonCommonConstants.UNSAFE_WORKING_MEMORY_IN_MB, CarbonCommonConstants.UNSAFE_WORKING_MEMORY_IN_MB_DEFAULT) @@ -257,6 +259,34 @@ class VarcharDataTypesBasicTestCase extends QueryTest with BeforeAndAfterEach wi // since after exception wrapper, we cannot get the root cause directly } + private def prepareDF(): Unit = { +val schema = StructType( + StructField("id", IntegerType, nullable = true) :: + StructField("name", StringType, nullable = true) :: + StructField("description", StringType, nullable = true) :: + StructField("address", StringType, nullable = true) :: + StructField("note", StringType, nullable = true) :: Nil +) +longStringDF = sqlContext.sparkSession.read + .schema(schema) + .csv(inputFile) + } + + test("write from dataframe with long string datatype") { +prepareDF() +// write spark dataframe to carbondata with `long_string_columns` property +longStringDF.write + .format("carbondata") + .option("tableName", longStringTable) + .option("single_pass", "false") + .option("sort_columns", "name") + .option("long_string_columns", "description, note") + .mode(SaveMode.Overwrite) + .save() + +checkQuery() + } + // will create 2 long string columns private def createFile(filePath: String, line: Int = 1, start: Int = 0, varcharLen: Int = Short.MaxValue + 1000): Content = { http://git-wip-us.apache.org/repos/asf/carbondata/blob/55f4bc6c/integration/spark-common/src/main/scala/org/apache/carbondata/spark/CarbonOption.scala -- diff --git a/integration/spark-common/src/main/scala/org/apache/carbondata/spark/CarbonOption.scala b/integration/spark-common/src/main/scala/org/apache/carbondata/spark/CarbonOption.scala index a48e63d..5f23f77 100644 --- a/integration/spark-common/src/main/scala/org/apache/carbondata/spark/CarbonOption.scala +++ b/integration/spark-common/src/main/scala/org/apache/carbondata/spark/CarbonOption.sc
carbondata git commit: [CARBONDATA-2513][32K] Support write long string from dataframe
Repository: carbondata Updated Branches: refs/heads/master 218a8deb6 -> 55f4bc6c8 [CARBONDATA-2513][32K] Support write long string from dataframe support write long string from dataframe This closes #2382 Project: http://git-wip-us.apache.org/repos/asf/carbondata/repo Commit: http://git-wip-us.apache.org/repos/asf/carbondata/commit/55f4bc6c Tree: http://git-wip-us.apache.org/repos/asf/carbondata/tree/55f4bc6c Diff: http://git-wip-us.apache.org/repos/asf/carbondata/diff/55f4bc6c Branch: refs/heads/master Commit: 55f4bc6c89f637b162b414033512901e9bd8a745 Parents: 218a8de Author: xuchuanyin Authored: Wed Jun 20 19:01:24 2018 +0800 Committer: kumarvishal09 Committed: Thu Jun 21 12:31:21 2018 +0530 -- .../VarcharDataTypesBasicTestCase.scala | 32 +++- .../apache/carbondata/spark/CarbonOption.scala | 2 ++ .../spark/rdd/NewCarbonDataLoadRDD.scala| 15 +++-- .../carbondata/spark/util/CarbonScalaUtil.scala | 3 +- .../spark/sql/CarbonDataFrameWriter.scala | 1 + .../streaming/parser/FieldConverter.scala | 11 +-- 6 files changed, 57 insertions(+), 7 deletions(-) -- http://git-wip-us.apache.org/repos/asf/carbondata/blob/55f4bc6c/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/longstring/VarcharDataTypesBasicTestCase.scala -- diff --git a/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/longstring/VarcharDataTypesBasicTestCase.scala b/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/longstring/VarcharDataTypesBasicTestCase.scala index 9ea3f1f..9798178 100644 --- a/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/longstring/VarcharDataTypesBasicTestCase.scala +++ b/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/longstring/VarcharDataTypesBasicTestCase.scala @@ -20,8 +20,9 @@ package org.apache.carbondata.spark.testsuite.longstring import java.io.{File, PrintWriter} import org.apache.commons.lang3.RandomStringUtils -import org.apache.spark.sql.Row +import org.apache.spark.sql.{DataFrame, Row, SaveMode} import org.apache.spark.sql.test.util.QueryTest +import org.apache.spark.sql.types.{IntegerType, StringType, StructField, StructType} import org.scalatest.{BeforeAndAfterAll, BeforeAndAfterEach} import org.apache.carbondata.core.constants.CarbonCommonConstants @@ -36,6 +37,7 @@ class VarcharDataTypesBasicTestCase extends QueryTest with BeforeAndAfterEach wi private val inputFile_2g_column_page = s"$inputDir$fileName_2g_column_page" private val lineNum = 1000 private var content: Content = _ + private var longStringDF: DataFrame = _ private var originMemorySize = CarbonProperties.getInstance().getProperty( CarbonCommonConstants.UNSAFE_WORKING_MEMORY_IN_MB, CarbonCommonConstants.UNSAFE_WORKING_MEMORY_IN_MB_DEFAULT) @@ -257,6 +259,34 @@ class VarcharDataTypesBasicTestCase extends QueryTest with BeforeAndAfterEach wi // since after exception wrapper, we cannot get the root cause directly } + private def prepareDF(): Unit = { +val schema = StructType( + StructField("id", IntegerType, nullable = true) :: + StructField("name", StringType, nullable = true) :: + StructField("description", StringType, nullable = true) :: + StructField("address", StringType, nullable = true) :: + StructField("note", StringType, nullable = true) :: Nil +) +longStringDF = sqlContext.sparkSession.read + .schema(schema) + .csv(inputFile) + } + + test("write from dataframe with long string datatype") { +prepareDF() +// write spark dataframe to carbondata with `long_string_columns` property +longStringDF.write + .format("carbondata") + .option("tableName", longStringTable) + .option("single_pass", "false") + .option("sort_columns", "name") + .option("long_string_columns", "description, note") + .mode(SaveMode.Overwrite) + .save() + +checkQuery() + } + // will create 2 long string columns private def createFile(filePath: String, line: Int = 1, start: Int = 0, varcharLen: Int = Short.MaxValue + 1000): Content = { http://git-wip-us.apache.org/repos/asf/carbondata/blob/55f4bc6c/integration/spark-common/src/main/scala/org/apache/carbondata/spark/CarbonOption.scala -- diff --git a/integration/spark-common/src/main/scala/org/apache/carbondata/spark/CarbonOption.scala b/integration/spark-common/src/main/scala/org/apache/carbondata/spark/CarbonOption.scala index a48e63d..5f23f77 100644 --- a/integration/spark-common/src/main/scala/org/apache/carbondata/spark/CarbonOption.scala +++ b/in