hudi-bot opened a new issue, #17311:
URL: https://github.com/apache/hudi/issues/17311
start with (1, a), first col is primary key, second is partition key
update set partitionKey=b where primaryKey=1, expect (1,b)
but actually we got (1,a)
The {{mergeForPartitionUpdatesIfNeeded}} which is responsible for handling
partition path update, it only works for insert into and merge into, for update
it goes a different route so the issue is surfaced
{code:java}
test("Test Type Casting with Global Index for Primary Key and Partition Key
Updates") {
Seq("cow", "mor").foreach { tableType =>
withRecordType()(withTempDir { tmp =>
withSQLConf("hoodie.index.type" -> "GLOBAL_SIMPLE",
"hoodie.simple.index.update.partition.path"->"true") {
valtableName= generateTableName
// Create table with both primary key and partition key
spark.sql(
s"""
|create table $tableName (
| c1 int,
| c2 int,
| c3 string,
| ts long
|) using hudi
|partitioned by (c2)
|location '${tmp.getCanonicalPath}/$tableName'
|tblproperties (
| type = '$tableType',
| primaryKey = 'c1',
| preCombineField = 'ts'
|)
""".stripMargin)
// Test Case 1: Initial insert with double values
spark.sql(
s"""
|insert into $tableName
|select
| cast(1.0 as double) as c1,
| cast(1.0 as double) as c2,
| 'a' as c3,
| 1000 as ts
""".stripMargin)
// Verify initial insert
checkAnswer(
s"select c1, c2, c3 from $tableName")(
Seq(1, 1, "a")
)
// Test Case 2: Update partition key (c2)
spark.sql(
s"""
|update $tableName
|set c2 = cast(2.0 as double)
|where c3 = 'a'
""".stripMargin)
// Verify partition key update
checkAnswer(
s"select c1, c2, c3 from $tableName")(
Seq(1, 2, "a") <---- failed as I got (1,1,"a")
)
// Test Case 3: Insert overwrite with double values
spark.sql(
s"""
|insert overwrite table $tableName
|select
| cast(3.0 as double) as c1,
| cast(3.0 as double) as c2,
| 'a' as c3,
| 1003 as ts
""".stripMargin)
// Verify final state after insert overwrite
checkAnswer(
s"select c1, c2, c3 from $tableName")(
Seq(3, 3, "a")
)
// Additional verification: check complete table state with sorting
checkAnswer(
s"""
|select c1, c2, c3 from $tableName
|order by c1, c2
""")(
Seq(3, 3, "a")
)
// Verify record count
valcount= spark.sql(s"select count(*) from
$tableName").collect()(0).getLong(0)
assert(count == 1L,
s"$tableType table: Expected 1 record but found $count records")
}
})
}
}
{code}
## JIRA info
- Link: https://issues.apache.org/jira/browse/HUDI-8568
- Type: Sub-task
- Parent: https://issues.apache.org/jira/browse/HUDI-9109
- Fix version(s):
- 1.1.0
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]