[ 
https://issues.apache.org/jira/browse/CARBONDATA-3902?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
 ]

Indhumathi Muthumurugesh updated CARBONDATA-3902:
-------------------------------------------------
    Description: 
Steps to Reproduce Issue :
import scala.collection.JavaConverters._import java.sql.Dateimport 
org.apache.spark.sql._import org.apache.spark.sql.CarbonSession._import 
org.apache.spark.sql.catalyst.TableIdentifierimport 
org.apache.spark.sql.execution.command.mutation.merge.\{CarbonMergeDataSetCommand,
 DeleteAction, InsertAction, InsertInHistoryTableAction, MergeDataSetMatches, 
MergeMatch, UpdateAction, WhenMatched, WhenNotMatched, 
WhenNotMatchedAndExistsOnlyOnTarget}import 
org.apache.spark.sql.functions._import 
org.apache.spark.sql.test.util.QueryTestimport 
org.apache.spark.sql.types.\{BooleanType, DateType, IntegerType, StringType, 
StructField, StructType}import spark.implicits._
        
sql("drop table if exists target").show()

val initframe = spark.createDataFrame(Seq(
  Row("a", "0"),
  Row("b", "1"),
  Row("c", "2"),
  Row("d", "3")
).asJava, StructType(Seq(StructField("key", StringType), StructField("value", 
StringType))))

initframe.write
  .format("carbondata")
  .option("tableName", "target")
  .option("partitionColumns", "value")
  .mode(SaveMode.Overwrite)
  .save()
  
val target = spark.read.format("carbondata").option("tableName", 
"target").load()var ccd =
  spark.createDataFrame(Seq(
    Row("a", "10", false,  0),
    Row("a", null, true, 1),   
    Row("b", null, true, 2),   
    Row("c", null, true, 3),   
    Row("c", "20", false, 4),
    Row("c", "200", false, 5),
    Row("e", "100", false, 6) 
  ).asJava,
    StructType(Seq(StructField("key", StringType),
      StructField("newValue", StringType),
      StructField("deleted", BooleanType), StructField("time", IntegerType))))
          
ccd.createOrReplaceTempView("changes")

ccd = sql("SELECT key, latest.newValue as newValue, latest.deleted as deleted 
FROM ( SELECT key, max(struct(time, newValue, deleted)) as latest FROM changes 
GROUP BY key)")

val updateMap = Map("key" -> "B.key", "value" -> 
"B.newValue").asInstanceOf[Map[Any, Any]]

val insertMap = Map("key" -> "B.key", "value" -> 
"B.newValue").asInstanceOf[Map[Any, Any]]

target.as("A").merge(ccd.as("B"), "A.key=B.key").
  whenMatched("B.deleted=true").
  delete().execute()

> Query on partition table gives incorrect results after Delete records using 
> CDC
> -------------------------------------------------------------------------------
>
>                 Key: CARBONDATA-3902
>                 URL: https://issues.apache.org/jira/browse/CARBONDATA-3902
>             Project: CarbonData
>          Issue Type: Bug
>            Reporter: Indhumathi Muthumurugesh
>            Priority: Major
>
> Steps to Reproduce Issue :
> import scala.collection.JavaConverters._import java.sql.Dateimport 
> org.apache.spark.sql._import org.apache.spark.sql.CarbonSession._import 
> org.apache.spark.sql.catalyst.TableIdentifierimport 
> org.apache.spark.sql.execution.command.mutation.merge.\{CarbonMergeDataSetCommand,
>  DeleteAction, InsertAction, InsertInHistoryTableAction, MergeDataSetMatches, 
> MergeMatch, UpdateAction, WhenMatched, WhenNotMatched, 
> WhenNotMatchedAndExistsOnlyOnTarget}import 
> org.apache.spark.sql.functions._import 
> org.apache.spark.sql.test.util.QueryTestimport 
> org.apache.spark.sql.types.\{BooleanType, DateType, IntegerType, StringType, 
> StructField, StructType}import spark.implicits._
>       
> sql("drop table if exists target").show()
> val initframe = spark.createDataFrame(Seq(
>   Row("a", "0"),
>   Row("b", "1"),
>   Row("c", "2"),
>   Row("d", "3")
> ).asJava, StructType(Seq(StructField("key", StringType), StructField("value", 
> StringType))))
> initframe.write
>   .format("carbondata")
>   .option("tableName", "target")
>   .option("partitionColumns", "value")
>   .mode(SaveMode.Overwrite)
>   .save()
>   
> val target = spark.read.format("carbondata").option("tableName", 
> "target").load()var ccd =
>   spark.createDataFrame(Seq(
>     Row("a", "10", false,  0),
>     Row("a", null, true, 1),   
>     Row("b", null, true, 2),   
>     Row("c", null, true, 3),   
>     Row("c", "20", false, 4),
>     Row("c", "200", false, 5),
>     Row("e", "100", false, 6) 
>   ).asJava,
>     StructType(Seq(StructField("key", StringType),
>       StructField("newValue", StringType),
>       StructField("deleted", BooleanType), StructField("time", IntegerType))))
>         
> ccd.createOrReplaceTempView("changes")
> ccd = sql("SELECT key, latest.newValue as newValue, latest.deleted as deleted 
> FROM ( SELECT key, max(struct(time, newValue, deleted)) as latest FROM 
> changes GROUP BY key)")
> val updateMap = Map("key" -> "B.key", "value" -> 
> "B.newValue").asInstanceOf[Map[Any, Any]]
> val insertMap = Map("key" -> "B.key", "value" -> 
> "B.newValue").asInstanceOf[Map[Any, Any]]
> target.as("A").merge(ccd.as("B"), "A.key=B.key").
>   whenMatched("B.deleted=true").
>   delete().execute()



--
This message was sent by Atlassian Jira
(v8.3.4#803005)

Reply via email to