MrAladdin opened a new issue, #11007:
URL: https://github.com/apache/hudi/issues/11007
**Describe the problem you faced**
1、spark structured streaming : upsert mor (record_index)
2、After compacting, there are a large number of logs with size 0, and they
can never be cleared.
**Environment Description**
* Hudi version :0.14.1
* Spark version :3.4.1
* Hive version :3.1.2
* Hadoop version :3.1.3
* Storage (HDFS/S3/GCS..) :hdfs
* Running on Docker? (yes/no) :no
**Additional context**
.writeStream
.format("hudi")
.option("hoodie.table.base.file.format", "PARQUET")
.option("hoodie.allow.empty.commit", "true")
.option("hoodie.datasource.write.drop.partition.columns","false")
.option("hoodie.table.services.enabled", "true")
.option("hoodie.datasource.write.streaming.checkpoint.identifier",
"lakehouse-dwd-social-kbi-beauty-v1-writer-1")
.option(PRECOMBINE_FIELD.key(), "date_kbiUdate")
.option(RECORDKEY_FIELD.key(), "records_key")
.option(PARTITIONPATH_FIELD.key(), "partition_index_date")
.option(DataSourceWriteOptions.OPERATION.key(),
DataSourceWriteOptions.UPSERT_OPERATION_OPT_VAL)
.option(DataSourceWriteOptions.TABLE_TYPE.key(),
DataSourceWriteOptions.MOR_TABLE_TYPE_OPT_VAL)
.option("hoodie.combine.before.upsert", "true")
.option("hoodie.datasource.write.payload.class","org.apache.hudi.common.model.OverwriteWithLatestAvroPayload")
//markers
.option("hoodie.write.markers.type", "DIRECT")
//timeline server
.option("hoodie.embed.timeline.server", "true")
//File System View Storage Configurations
.option("hoodie.filesystem.view.remote.timeout.secs", "1200")
.option("hoodie.filesystem.view.remote.retry.enable", "true")
.option("hoodie.filesystem.view.remote.retry.initial_interval_ms",
"500")
.option("hoodie.filesystem.view.remote.retry.max_numbers", "15")
.option("hoodie.filesystem.view.remote.retry.max_interval_ms",
"8000")
//schema cache
.option("hoodie.schema.cache.enable", "true")
//spark write
.option("hoodie.datasource.write.streaming.ignore.failed.batch",
"false")
.option("hoodie.datasource.write.streaming.retry.count", "6")
.option("hoodie.datasource.write.streaming.retry.interval.ms",
"3000")
//metadata
.option("hoodie.metadata.enable", "true")
.option("hoodie.metadata.index.async", "false")
.option("hoodie.metadata.index.check.timeout.seconds", "900")
.option("hoodie.auto.adjust.lock.configs", "true")
.option("hoodie.metadata.optimized.log.blocks.scan.enable", "true")
.option("hoodie.metadata.index.column.stats.enable", "false")
.option("hoodie.metadata.index.column.stats.parallelism", "100")
.option("hoodie.metadata.index.column.stats.file.group.count", "4")
.option("hoodie.metadata.index.column.stats.column.list","date_udate,date_publishedAt")
.option("hoodie.metadata.compact.max.delta.commits", "10")
//metadata
.option("hoodie.metadata.record.index.enable", "true")
.option("hoodie.index.type", "RECORD_INDEX")
.option("hoodie.metadata.max.init.parallelism", "10")
.option("hoodie.metadata.record.index.min.filegroup.count", "10")
.option("hoodie.metadata.record.index.max.filegroup.count",
"1")
.option("hoodie.metadata.record.index.max.filegroup.size",
"1073741824")
.option("hoodie.metadata.auto.initialize", "true")
.option("hoodie.metadata.record.index.growth.factor", "2.0")
.option("hoodie.metadata.max.logfile.size", "2147483648")
.option("hoodie.metadata.log.compaction.enable", "false")
.option("hoodie.metadata.log.compaction.blocks.threshold", "5")
.option("hoodie.metadata.max.deltacommits.when_pending", "1000")
//file size
.option("hoodie.parquet.field_id.write.enabled", "true")
.option("hoodie.copyonwrite.insert.auto.split", "true")
.option("hoodie.record.size.estimation.threshold", "1.0")
.option("hoodie.parquet.block.size", "536870912")
.option("hoodie.parquet.max.file.size", "536870912")
.option("hoodie.parquet.small.file.limit", "314572800")
.option("hoodie.logfile.max.size", "536870912")
.option("hoodie.logfile.data.block.max.size", "536870912")
.option("hoodie.logfile.to.parquet.compression.ratio", "0.35")
//archive
.option("hoodie.