ad1happy2go commented on issue #6869:
URL: https://github.com/apache/hudi/issues/6869#issuecomment-1524975366
Not able to reproduce the issue. Code is working as expected.
```import org.apache.hudi.QuickstartUtils._
import scala.collection.JavaConversions._
import org.apache.spark.sql.SaveMode._
import org.apache.hudi.DataSourceReadOptions._
import org.apache.hudi.DataSourceWriteOptions._
import org.apache.hudi.config.HoodieWriteConfig._
import org.apache.hudi.common.model.HoodieRecord
val jsonStr = """{
| "abcd": {
| "payment": "upi",
| "delivery": "2"
| },
| "xyz": {
| "vouchers": {
| "items": [
| {
| "manifests": {
| "items": [
| {
| "type": "online",
| "version": "1.0.0"
| }
| ]
| }
| }
| ]
| },
| "recordedAt": 1661730366620
| }
| }"""
val df = spark.read.json(Seq(jsonStr).toDS)
val basePath = "file:///tmp/try_6869_1"
val tableName = "try_6869_1"
val configs = Map(
"hoodie.datasource.write.operation" -> "upsert",
"hoodie.datasource.write.recordkey.field" ->
"abcd.delivery,abcd.payment",
"hoodie.datasource.write.precombine.field" -> "xyz.recordedAt",
"hoodie.datasource.write.table.type" -> "COPY_ON_WRITE",
"hoodie.insert.shuffle.parallelism" -> "68",
"hoodie.finalize.write.parallelism" -> "54",
"hoodie.insert.shuffle.parallelism" -> "54",
"hoodie.datasource.write.hive_style_partitioning" -> "true",
"hoodie.table.name" -> tableName,
"hoodie.cleaner.commits.retained" -> "10",
"hoodie.keep.min.commits" -> "325",
"hoodie.keep.max.commits" -> "350",
"hoodie.parquet.compression.codec" -> "snappy",
"hoodie.metadata.enable" -> "true",
"hoodie.metadata.index.column.stats.enable" -> "true",
"hoodie.datasource.write.keygenerator.class" ->
"org.apache.hudi.keygen.NonpartitionedKeyGenerator",
"hoodie.parquet.small.file.limit" -> "307200",
"hoodie.parquet.max.file.size" -> "12800"
)
df.write.format("hudi").options(configs).mode(Append).save(basePath)
spark.read.format("hudi").load(basePath).show(false)
val jsonStr = """{
| "abcd": {
| "payment": "upi",
| "delivery": "2"
| },
| "xyz": {
| "vouchers": {
| "items": [
| {
| "manifests": {
| "items": [
| {
| "type": "online",
| "version": "2.0.0"
| }
| ]
| }
| }
| ]
| },
| "recordedAt": 1661730366620
| }
| }"""
val df = spark.read.json(Seq(jsonStr).toDS)
df.write.format("hudi").options(configs).mode(Append).save(basePath)
spark.read.format("hudi").load(basePath).show(false)
```
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: commits-unsubscr...@hudi.apache.org
For queries about this service, please contact Infrastructure at:
us...@infra.apache.org