[CARBONDATA-2479] Multiple issue fixes in SDK writer and external table flow
[CARBONDATA-2479] Multiple issues: fixed external table path display fixed default value for array in AVRO fixed NPE when delete folder before the second select query fixed: avro float value precision change issue This closes #2306 Project: http://git-wip-us.apache.org/repos/asf/carbondata/repo Commit: http://git-wip-us.apache.org/repos/asf/carbondata/commit/cf1b50bc Tree: http://git-wip-us.apache.org/repos/asf/carbondata/tree/cf1b50bc Diff: http://git-wip-us.apache.org/repos/asf/carbondata/diff/cf1b50bc Branch: refs/heads/spark-2.3 Commit: cf1b50bcc697be5353be469737a7dacdc57b1d7e Parents: 1d302a8 Author: ajantha-bhat <ajanthab...@gmail.com> Authored: Mon May 14 15:28:23 2018 +0530 Committer: ravipesala <ravi.pes...@gmail.com> Committed: Thu May 17 18:47:04 2018 +0530 ---------------------------------------------------------------------- .../LatestFilesReadCommittedScope.java | 2 +- .../TestNonTransactionalCarbonTable.scala | 82 ++++++++++++++++++-- .../table/CarbonDescribeFormattedCommand.scala | 7 +- .../carbondata/sdk/file/AvroCarbonWriter.java | 12 +-- .../carbondata/sdk/file/CarbonReaderTest.java | 2 +- 5 files changed, 85 insertions(+), 20 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/carbondata/blob/cf1b50bc/core/src/main/java/org/apache/carbondata/core/readcommitter/LatestFilesReadCommittedScope.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/org/apache/carbondata/core/readcommitter/LatestFilesReadCommittedScope.java b/core/src/main/java/org/apache/carbondata/core/readcommitter/LatestFilesReadCommittedScope.java index 2306330..6106174 100644 --- a/core/src/main/java/org/apache/carbondata/core/readcommitter/LatestFilesReadCommittedScope.java +++ b/core/src/main/java/org/apache/carbondata/core/readcommitter/LatestFilesReadCommittedScope.java @@ -138,7 +138,7 @@ public class LatestFilesReadCommittedScope implements ReadCommittedScope { @Override public void takeCarbonIndexFileSnapShot() throws IOException { // Read the current file Path get the list of indexes from the path. CarbonFile file = FileFactory.getCarbonFile(carbonFilePath); - if (file == null) { + if (file.listFiles().length == 0) { // For nonTransactional table, files can be removed at any point of time. // So cannot assume files will be present throw new IOException("No files are present in the table location :" + carbonFilePath); http://git-wip-us.apache.org/repos/asf/carbondata/blob/cf1b50bc/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/createTable/TestNonTransactionalCarbonTable.scala ---------------------------------------------------------------------- diff --git a/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/createTable/TestNonTransactionalCarbonTable.scala b/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/createTable/TestNonTransactionalCarbonTable.scala index 5ab1c60..cc3cbb5 100644 --- a/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/createTable/TestNonTransactionalCarbonTable.scala +++ b/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/createTable/TestNonTransactionalCarbonTable.scala @@ -1050,7 +1050,7 @@ class TestNonTransactionalCarbonTable extends QueryTest with BeforeAndAfterAll { | "type": "record", | "fields": [ | { "name": "name", "type": "string"}, - | { "name": "age", "type": "int"}, + | { "name": "age", "type": "float"}, | { "name": "address", "type": { | "type" : "record", "name" : "my_address", | "fields" : [ @@ -1059,11 +1059,11 @@ class TestNonTransactionalCarbonTable extends QueryTest with BeforeAndAfterAll { |]} """.stripMargin - val json = """ {"name":"bob", "age":10, "address" : {"street":"abc", "city":"bang"}} """ + val json = """ {"name":"bob", "age":10.24, "address" : {"street":"abc", "city":"bang"}} """ val fields = new Array[Field](3) fields(0) = new Field("name", DataTypes.STRING) - fields(1) = new Field("age", DataTypes.INT) + fields(1) = new Field("age", DataTypes.DOUBLE) val fld = new util.ArrayList[StructField] fld.add(new StructField("street", DataTypes.STRING)) fld.add(new StructField("city", DataTypes.STRING)) @@ -1340,11 +1340,10 @@ class TestNonTransactionalCarbonTable extends QueryTest with BeforeAndAfterAll { s"""CREATE EXTERNAL TABLE sdkOutputTable STORED BY 'carbondata' LOCATION |'$writerPath' """.stripMargin) - checkAnswer(sql("select * from sdkOutputTable"), Seq( - Row("bob", 10, Row("abc","bang")), - Row("bob", 10, Row("abc","bang")), - Row("bob", 10, Row("abc","bang")))) + Row("bob", 10.24, Row("abc","bang")), + Row("bob", 10.24, Row("abc","bang")), + Row("bob", 10.24, Row("abc","bang")))) sql("DROP TABLE sdkOutputTable") // drop table should not delete the files @@ -1372,6 +1371,75 @@ class TestNonTransactionalCarbonTable extends QueryTest with BeforeAndAfterAll { cleanTestData() } + // array type Default value test + def buildAvroTestDataArrayDefaultType(rows: Int, options: util.Map[String, String]): Any = { + FileUtils.deleteDirectory(new File(writerPath)) + + val mySchema = """ { + | "name": "address", + | "type": "record", + | "fields": [ + | { + | "name": "name", + | "type": "string" + | }, + | { + | "name": "age", + | "type": "int" + | }, + | { + | "name": "address", + | "type": { + | "type": "array", + | "items": "string" + | }, + | "default": ["sc","ab"] + | } + | ] + | } + """.stripMargin + + // skip giving array value to take default values + val json: String = "{\"name\": \"bob\",\"age\": 10}" + + val fields = new Array[Field](3) + fields(0) = new Field("name", DataTypes.STRING) + fields(1) = new Field("age", DataTypes.INT) + // fields[1] = new Field("age", DataTypes.INT); + val fld = new util.ArrayList[StructField] + fld.add(new StructField("street", DataTypes.STRING)) + fields(2) = new Field("address", "array", fld) + + WriteFilesWithAvroWriter(rows, mySchema, json, fields) + } + + def buildAvroTestDataSingleFileArrayDefaultType(): Any = { + FileUtils.deleteDirectory(new File(writerPath)) + buildAvroTestDataArrayDefaultType(3, null) + } + + test("Read sdk writer Avro output Array Type with Default value") { + buildAvroTestDataSingleFileArrayDefaultType() + assert(new File(writerPath).exists()) + sql("DROP TABLE IF EXISTS sdkOutputTable") + sql( + s"""CREATE EXTERNAL TABLE sdkOutputTable STORED BY 'carbondata' LOCATION + |'$writerPath' """.stripMargin) + + sql("select * from sdkOutputTable").show(200,false) + + checkAnswer(sql("select * from sdkOutputTable"), Seq( + Row("bob", 10, new mutable.WrappedArray.ofRef[String](Array("sc", "ab"))), + Row("bob", 10, new mutable.WrappedArray.ofRef[String](Array("sc", "ab"))), + Row("bob", 10, new mutable.WrappedArray.ofRef[String](Array("sc", "ab"))))) + + sql("DROP TABLE sdkOutputTable") + // drop table should not delete the files + assert(new File(writerPath).listFiles().length > 0) + cleanTestData() + } + + test("Read sdk writer Avro output with both Array and Struct Type") { buildAvroTestDataBothStructArrayType() assert(new File(writerPath).exists()) http://git-wip-us.apache.org/repos/asf/carbondata/blob/cf1b50bc/integration/spark2/src/main/scala/org/apache/spark/sql/execution/command/table/CarbonDescribeFormattedCommand.scala ---------------------------------------------------------------------- diff --git a/integration/spark2/src/main/scala/org/apache/spark/sql/execution/command/table/CarbonDescribeFormattedCommand.scala b/integration/spark2/src/main/scala/org/apache/spark/sql/execution/command/table/CarbonDescribeFormattedCommand.scala index 375c413..ce03959 100644 --- a/integration/spark2/src/main/scala/org/apache/spark/sql/execution/command/table/CarbonDescribeFormattedCommand.scala +++ b/integration/spark2/src/main/scala/org/apache/spark/sql/execution/command/table/CarbonDescribeFormattedCommand.scala @@ -86,12 +86,7 @@ private[sql] case class CarbonDescribeFormattedCommand( results ++= Seq(("Database Name", relation.carbonTable.getDatabaseName, "") ) results ++= Seq(("Table Name", relation.carbonTable.getTableName, "")) - if (!carbonTable.isExternalTable) { - results ++= Seq(("CARBON Store Path ", CarbonProperties.getStorePath, "")) - } else { - // for external table should show files path. - results ++= Seq(("CARBON Store Path ", carbonTable.getTablePath, "")) - } + results ++= Seq(("CARBON Store Path ", carbonTable.getTablePath, "")) val tblProps = carbonTable.getTableInfo.getFactTable.getTableProperties http://git-wip-us.apache.org/repos/asf/carbondata/blob/cf1b50bc/store/sdk/src/main/java/org/apache/carbondata/sdk/file/AvroCarbonWriter.java ---------------------------------------------------------------------- diff --git a/store/sdk/src/main/java/org/apache/carbondata/sdk/file/AvroCarbonWriter.java b/store/sdk/src/main/java/org/apache/carbondata/sdk/file/AvroCarbonWriter.java index 137e3f4..9f2f295 100644 --- a/store/sdk/src/main/java/org/apache/carbondata/sdk/file/AvroCarbonWriter.java +++ b/store/sdk/src/main/java/org/apache/carbondata/sdk/file/AvroCarbonWriter.java @@ -97,8 +97,9 @@ public class AvroCarbonWriter extends CarbonWriter { out = fieldValue; break; case FLOAT: - Float f = (Float) fieldValue; - out = f.doubleValue(); + // direct conversion will change precision. So parse from string. + // also carbon internally needs float as double + out = Double.parseDouble(fieldValue.toString()); break; case RECORD: List<Schema.Field> fields = avroField.schema().getFields(); @@ -121,7 +122,8 @@ public class AvroCarbonWriter extends CarbonWriter { arrayChildObjects = new Object[size]; for (int i = 0; i < size; i++) { Object childObject = avroFieldToObject( - new Schema.Field(avroField.name(), avroField.schema().getElementType(), null, true), + new Schema.Field(avroField.name(), avroField.schema().getElementType(), + avroField.doc(), avroField.defaultVal()), ((GenericData.Array) fieldValue).get(i)); if (childObject != null) { arrayChildObjects[i] = childObject; @@ -132,8 +134,8 @@ public class AvroCarbonWriter extends CarbonWriter { arrayChildObjects = new Object[size]; for (int i = 0; i < size; i++) { Object childObject = avroFieldToObject( - new Schema.Field(avroField.name(), avroField.schema().getElementType(), null, true), - ((ArrayList) fieldValue).get(i)); + new Schema.Field(avroField.name(), avroField.schema().getElementType(), + avroField.doc(), avroField.defaultVal()), ((ArrayList) fieldValue).get(i)); if (childObject != null) { arrayChildObjects[i] = childObject; } http://git-wip-us.apache.org/repos/asf/carbondata/blob/cf1b50bc/store/sdk/src/test/java/org/apache/carbondata/sdk/file/CarbonReaderTest.java ---------------------------------------------------------------------- diff --git a/store/sdk/src/test/java/org/apache/carbondata/sdk/file/CarbonReaderTest.java b/store/sdk/src/test/java/org/apache/carbondata/sdk/file/CarbonReaderTest.java index 08d6e52..f2c6d45 100644 --- a/store/sdk/src/test/java/org/apache/carbondata/sdk/file/CarbonReaderTest.java +++ b/store/sdk/src/test/java/org/apache/carbondata/sdk/file/CarbonReaderTest.java @@ -162,7 +162,7 @@ public class CarbonReaderTest { i++; } Assert.assertEquals(i, 100); - + reader.close(); FileUtils.deleteDirectory(new File(path)); } }