Ravindra Pesala created CARBONDATA-786: ------------------------------------------
Summary: Data mismatch if the data data is loaded across blocklet groups Key: CARBONDATA-786 URL: https://issues.apache.org/jira/browse/CARBONDATA-786 Project: CarbonData Issue Type: Bug Reporter: Ravindra Pesala Data mismatch if the data data is loaded across blocklet groups and filter applied on second column onwards. Follow testcase {code} CarbonProperties.getInstance() .addProperty("carbon.blockletgroup.size.in.mb", "16") .addProperty("carbon.enable.vector.reader", "true") .addProperty("enable.unsafe.sort", "true") val rdd = sqlContext.sparkContext .parallelize(1 to 1200000, 4) .map { x => ("city" + x % 8, "country" + x % 1103, "planet" + x % 10007, x.toString, (x % 16).toShort, x / 2, (x << 1).toLong, x.toDouble / 13, x.toDouble / 11) }.map { x => Row(x._1, x._2, x._3, x._4, x._5, x._6, x._7, x._8, x._9) } val schema = StructType( Seq( StructField("city", StringType, nullable = false), StructField("country", StringType, nullable = false), StructField("planet", StringType, nullable = false), StructField("id", StringType, nullable = false), StructField("m1", ShortType, nullable = false), StructField("m2", IntegerType, nullable = false), StructField("m3", LongType, nullable = false), StructField("m4", DoubleType, nullable = false), StructField("m5", DoubleType, nullable = false) ) ) val input = sqlContext.createDataFrame(rdd, schema) sql(s"drop table if exists testBigData") input.write .format("carbondata") .option("tableName", "testBigData") .option("tempCSV", "false") .option("single_pass", "true") .option("dictionary_exclude", "id") // id is high cardinality column .mode(SaveMode.Overwrite) .save() sql(s"select city, sum(m1) from testBigData " + s"where country='country12' group by city order by city").show() {code} The above code supposed return following data, but not returning it. {code} +-----+-------+ | city|sum(m1)| +-----+-------+ |city0| 544| |city1| 680| |city2| 816| |city3| 952| |city4| 1088| |city5| 1224| |city6| 1360| |city7| 1496| +-----+-------+ {code} -- This message was sent by Atlassian JIRA (v6.3.15#6346)