Y Ethan Guo created HUDI-8744:
---------------------------------
Summary: Improve expression index config experience
Key: HUDI-8744
URL: https://issues.apache.org/jira/browse/HUDI-8744
Project: Apache Hudi
Issue Type: Sub-task
Reporter: Y Ethan Guo
When running Hudi streamer with the following config and no expression index
created from SQL, the Hudi stream failed after some time.
{code:java}
hoodie.write.record.merge.mode=COMMIT_TIME_ORDERING
# Key fields, for kafka example
hoodie.datasource.write.recordkey.field=key
hoodie.datasource.write.partitionpath.field=partition
# Schema provider props (change to absolute path based on your installation)
hoodie.deltastreamer.schemaprovider.source.schema.file=s3a://hudi-benchmark-source/benchmark_schema_city.avsc
hoodie.deltastreamer.schemaprovider.target.schema.file=s3a://hudi-benchmark-source/benchmark_schema_city.avsc
# DFS Source
hoodie.deltastreamer.source.dfs.root=s3a://hudi-benchmark-source/upserts-20p-0.2update-last10-100r
benchmark.input.source.path=s3a://hudi-benchmark-source/upserts-20p-0.2update-last10-100r
# Compaction
hoodie.compact.inline.max.delta.commits=3
# Clean and archive
hoodie.clean.async=true
hoodie.keep.max.commits=7
hoodie.keep.min.commits=5
hoodie.cleaner.commits.retained=4
# Concurrency control
hoodie.write.concurrency.mode=optimistic_concurrency_control
hoodie.cleaner.policy.failed.writes=LAZY
hoodie.write.lock.provider=org.apache.hudi.client.transaction.lock.InProcessLockProvider
# Metadata table
hoodie.metadata.enable=true
hoodie.metadata.index.bloom.filter.enable=true
hoodie.metadata.index.bloom.filter.file.group.count=10
hoodie.metadata.index.column.stats.enable=true
hoodie.metadata.index.column.stats.file.group.count=10
hoodie.metadata.record.index.enable=true
hoodie.metadata.index.functional.enable=true
hoodie.metadata.index.partition.stats.enable=true
hoodie.metadata.index.secondary.enable=true
hoodie.metadata.index.secondary.column=city
10:09:28.881 [pool-30-thread-1] ERROR
org.apache.hudi.utilities.streamer.HoodieStreamer - Shutting down delta-sync
due to exception
org.apache.hudi.exception.HoodieException: Error waiting for async clean
service to finish
at
org.apache.hudi.async.AsyncCleanerService.waitForCompletion(AsyncCleanerService.java:76)
~[hudi-utilities-slim-bundle_2.12-1.0.0-rc1.jar:1.0.0-rc1]
at
org.apache.hudi.client.BaseHoodieTableServiceClient.asyncClean(BaseHoodieTableServiceClient.java:144)
~[hudi-utilities-slim-bundle_2.12-1.0.0-rc1.jar:1.0.0-rc1]
at
org.apache.hudi.client.BaseHoodieWriteClient.autoCleanOnCommit(BaseHoodieWriteClient.java:596)
~[hudi-utilities-slim-bundle_2.12-1.0.0-rc1.jar:1.0.0-rc1]
at
org.apache.hudi.client.BaseHoodieWriteClient.mayBeCleanAndArchive(BaseHoodieWriteClient.java:581)
~[hudi-utilities-slim-bundle_2.12-1.0.0-rc1.jar:1.0.0-rc1]
at
org.apache.hudi.client.BaseHoodieWriteClient.commitStats(BaseHoodieWriteClient.java:258)
~[hudi-utilities-slim-bundle_2.12-1.0.0-rc1.jar:1.0.0-rc1]
at
org.apache.hudi.client.SparkRDDWriteClient.commit(SparkRDDWriteClient.java:93)
~[hudi-spark3.5-bundle_2.12-1.0.0-rc1.jar:1.0.0-rc1]
at
org.apache.hudi.utilities.streamer.StreamSync.writeToSinkAndDoMetaSync(StreamSync.java:948)
~[hudi-utilities-slim-bundle_2.12-1.0.0-rc1.jar:1.0.0-rc1]
at
org.apache.hudi.utilities.streamer.StreamSync.syncOnce(StreamSync.java:520)
~[hudi-utilities-slim-bundle_2.12-1.0.0-rc1.jar:1.0.0-rc1]
at
org.apache.hudi.utilities.streamer.HoodieStreamer$StreamSyncService.lambda$startService$1(HoodieStreamer.java:820)
~[hudi-utilities-slim-bundle_2.12-1.0.0-rc1.jar:1.0.0-rc1]
at
java.util.concurrent.CompletableFuture$AsyncSupply.run(CompletableFuture.java:1604)
[?:1.8.0_432]
at
java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
[?:1.8.0_432]
at
java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
[?:1.8.0_432]
at java.lang.Thread.run(Thread.java:750) [?:1.8.0_432]
Caused by: java.util.concurrent.ExecutionException:
org.apache.hudi.exception.HoodieMetadataException: Functional index metadata
not found
at
java.util.concurrent.CompletableFuture.reportGet(CompletableFuture.java:357)
~[?:1.8.0_432]
at
java.util.concurrent.CompletableFuture.get(CompletableFuture.java:1908)
~[?:1.8.0_432]
at
org.apache.hudi.async.HoodieAsyncService.waitForShutdown(HoodieAsyncService.java:102)
~[hudi-utilities-slim-bundle_2.12-1.0.0-rc1.jar:1.0.0-rc1]
at
org.apache.hudi.async.AsyncCleanerService.waitForCompletion(AsyncCleanerService.java:74)
~[hudi-utilities-slim-bundle_2.12-1.0.0-rc1.jar:1.0.0-rc1]
... 12 more
Caused by: org.apache.hudi.exception.HoodieMetadataException: Functional index
metadata not found
at
org.apache.hudi.metadata.HoodieTableMetadataUtil.convertMetadataToFunctionalIndexRecords(HoodieTableMetadataUtil.java:613)
~[hudi-utilities-slim-bundle_2.12-1.0.0-rc1.jar:1.0.0-rc1]
at
org.apache.hudi.metadata.HoodieTableMetadataUtil.convertMetadataToRecords(HoodieTableMetadataUtil.java:577)
~[hudi-utilities-slim-bundle_2.12-1.0.0-rc1.jar:1.0.0-rc1]
at
org.apache.hudi.metadata.HoodieBackedTableMetadataWriter.lambda$update$39(HoodieBackedTableMetadataWriter.java:1223)
~[hudi-utilities-slim-bundle_2.12-1.0.0-rc1.jar:1.0.0-rc1]
at
org.apache.hudi.metadata.HoodieBackedTableMetadataWriter.processAndCommit(HoodieBackedTableMetadataWriter.java:993)
~[hudi-utilities-slim-bundle_2.12-1.0.0-rc1.jar:1.0.0-rc1]
at
org.apache.hudi.metadata.HoodieBackedTableMetadataWriter.update(HoodieBackedTableMetadataWriter.java:1223)
~[hudi-utilities-slim-bundle_2.12-1.0.0-rc1.jar:1.0.0-rc1]
at
org.apache.hudi.table.action.BaseActionExecutor.writeTableMetadata(BaseActionExecutor.java:105)
~[hudi-utilities-slim-bundle_2.12-1.0.0-rc1.jar:1.0.0-rc1]
at
org.apache.hudi.table.action.clean.CleanActionExecutor.runClean(CleanActionExecutor.java:234)
~[hudi-utilities-slim-bundle_2.12-1.0.0-rc1.jar:1.0.0-rc1]
at
org.apache.hudi.table.action.clean.CleanActionExecutor.runPendingClean(CleanActionExecutor.java:199)
~[hudi-utilities-slim-bundle_2.12-1.0.0-rc1.jar:1.0.0-rc1]
at
org.apache.hudi.table.action.clean.CleanActionExecutor.execute(CleanActionExecutor.java:270)
~[hudi-utilities-slim-bundle_2.12-1.0.0-rc1.jar:1.0.0-rc1]
at
org.apache.hudi.table.HoodieSparkCopyOnWriteTable.clean(HoodieSparkCopyOnWriteTable.java:269)
~[hudi-spark3.5-bundle_2.12-1.0.0-rc1.jar:1.0.0-rc1]
at
org.apache.hudi.client.BaseHoodieTableServiceClient.clean(BaseHoodieTableServiceClient.java:808)
~[hudi-utilities-slim-bundle_2.12-1.0.0-rc1.jar:1.0.0-rc1]
at
org.apache.hudi.client.BaseHoodieWriteClient.clean(BaseHoodieWriteClient.java:863)
~[hudi-utilities-slim-bundle_2.12-1.0.0-rc1.jar:1.0.0-rc1]
at
org.apache.hudi.client.BaseHoodieWriteClient.clean(BaseHoodieWriteClient.java:836)
~[hudi-utilities-slim-bundle_2.12-1.0.0-rc1.jar:1.0.0-rc1]
at
org.apache.hudi.async.AsyncCleanerService.lambda$startService$0(AsyncCleanerService.java:54)
~[hudi-utilities-slim-bundle_2.12-1.0.0-rc1.jar:1.0.0-rc1]
... 4 more
10:09:38.620 [main] ERROR org.apache.hudi.async.HoodieAsyncService - Service
shutdown with error
java.util.concurrent.ExecutionException:
org.apache.hudi.exception.HoodieException: Error waiting for async clean
service to finish
at
java.util.concurrent.CompletableFuture.reportGet(CompletableFuture.java:357)
~[?:1.8.0_432]
at
java.util.concurrent.CompletableFuture.get(CompletableFuture.java:1908)
~[?:1.8.0_432]
at
org.apache.hudi.async.HoodieAsyncService.waitForShutdown(HoodieAsyncService.java:102)
~[hudi-utilities-slim-bundle_2.12-1.0.0-rc1.jar:1.0.0-rc1]
at
org.apache.hudi.utilities.ingestion.HoodieIngestionService.startIngestion(HoodieIngestionService.java:65)
~[hudi-utilities-slim-bundle_2.12-1.0.0-rc1.jar:1.0.0-rc1]
at org.apache.hudi.common.util.Option.ifPresent(Option.java:101)
[hudi-utilities-slim-bundle_2.12-1.0.0-rc1.jar:1.0.0-rc1]
at
org.apache.hudi.utilities.streamer.HoodieStreamer.sync(HoodieStreamer.java:222)
[hudi-utilities-slim-bundle_2.12-1.0.0-rc1.jar:1.0.0-rc1]
at
org.apache.hudi.utilities.streamer.HoodieStreamer.main(HoodieStreamer.java:637)
[hudi-utilities-slim-bundle_2.12-1.0.0-rc1.jar:1.0.0-rc1]
at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
~[?:1.8.0_432]
at
sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
~[?:1.8.0_432]
at
sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
~[?:1.8.0_432]
at java.lang.reflect.Method.invoke(Method.java:498) ~[?:1.8.0_432]
at
org.apache.spark.deploy.JavaMainApplication.start(SparkApplication.scala:52)
[spark-core_2.12-3.5.3.jar:3.5.3]
at
org.apache.spark.deploy.SparkSubmit.org$apache$spark$deploy$SparkSubmit$$runMain(SparkSubmit.scala:1029)
[spark-core_2.12-3.5.3.jar:3.5.3]
at
org.apache.spark.deploy.SparkSubmit.doRunMain$1(SparkSubmit.scala:194)
[spark-core_2.12-3.5.3.jar:3.5.3]
at org.apache.spark.deploy.SparkSubmit.submit(SparkSubmit.scala:217)
[spark-core_2.12-3.5.3.jar:3.5.3]
at org.apache.spark.deploy.SparkSubmit.doSubmit(SparkSubmit.scala:91)
[spark-core_2.12-3.5.3.jar:3.5.3]
at
org.apache.spark.deploy.SparkSubmit$$anon$2.doSubmit(SparkSubmit.scala:1120)
[spark-core_2.12-3.5.3.jar:3.5.3]
at org.apache.spark.deploy.SparkSubmit$.main(SparkSubmit.scala:1129)
[spark-core_2.12-3.5.3.jar:3.5.3]
at org.apache.spark.deploy.SparkSubmit.main(SparkSubmit.scala)
[spark-core_2.12-3.5.3.jar:3.5.3]
Caused by: org.apache.hudi.exception.HoodieException: Error waiting for async
clean service to finish
at
org.apache.hudi.utilities.streamer.HoodieStreamer$StreamSyncService.lambda$startService$1(HoodieStreamer.java:858)
~[hudi-utilities-slim-bundle_2.12-1.0.0-rc1.jar:1.0.0-rc1]
at
java.util.concurrent.CompletableFuture$AsyncSupply.run(CompletableFuture.java:1604)
~[?:1.8.0_432]
at
java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
~[?:1.8.0_432]
at
java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
~[?:1.8.0_432]
at java.lang.Thread.run(Thread.java:750) ~[?:1.8.0_432]
Caused by: org.apache.hudi.exception.HoodieException: Error waiting for async
clean service to finish
at
org.apache.hudi.async.AsyncCleanerService.waitForCompletion(AsyncCleanerService.java:76)
~[hudi-utilities-slim-bundle_2.12-1.0.0-rc1.jar:1.0.0-rc1]
at
org.apache.hudi.client.BaseHoodieTableServiceClient.asyncClean(BaseHoodieTableServiceClient.java:144)
~[hudi-utilities-slim-bundle_2.12-1.0.0-rc1.jar:1.0.0-rc1]
at
org.apache.hudi.client.BaseHoodieWriteClient.autoCleanOnCommit(BaseHoodieWriteClient.java:596)
~[hudi-utilities-slim-bundle_2.12-1.0.0-rc1.jar:1.0.0-rc1]
at
org.apache.hudi.client.BaseHoodieWriteClient.mayBeCleanAndArchive(BaseHoodieWriteClient.java:581)
~[hudi-utilities-slim-bundle_2.12-1.0.0-rc1.jar:1.0.0-rc1]
at
org.apache.hudi.client.BaseHoodieWriteClient.commitStats(BaseHoodieWriteClient.java:258)
~[hudi-utilities-slim-bundle_2.12-1.0.0-rc1.jar:1.0.0-rc1]
at
org.apache.hudi.client.SparkRDDWriteClient.commit(SparkRDDWriteClient.java:93)
~[hudi-spark3.5-bundle_2.12-1.0.0-rc1.jar:1.0.0-rc1]
at
org.apache.hudi.utilities.streamer.StreamSync.writeToSinkAndDoMetaSync(StreamSync.java:948)
~[hudi-utilities-slim-bundle_2.12-1.0.0-rc1.jar:1.0.0-rc1]
at
org.apache.hudi.utilities.streamer.StreamSync.syncOnce(StreamSync.java:520)
~[hudi-utilities-slim-bundle_2.12-1.0.0-rc1.jar:1.0.0-rc1]
at
org.apache.hudi.utilities.streamer.HoodieStreamer$StreamSyncService.lambda$startService$1(HoodieStreamer.java:820)
~[hudi-utilities-slim-bundle_2.12-1.0.0-rc1.jar:1.0.0-rc1]
at
java.util.concurrent.CompletableFuture$AsyncSupply.run(CompletableFuture.java:1604)
~[?:1.8.0_432]
at
java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
~[?:1.8.0_432]
at
java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
~[?:1.8.0_432]
at java.lang.Thread.run(Thread.java:750) ~[?:1.8.0_432]
Caused by: java.util.concurrent.ExecutionException:
org.apache.hudi.exception.HoodieMetadataException: Functional index metadata
not found
at
java.util.concurrent.CompletableFuture.reportGet(CompletableFuture.java:357)
~[?:1.8.0_432]
at
java.util.concurrent.CompletableFuture.get(CompletableFuture.java:1908)
~[?:1.8.0_432]
at
org.apache.hudi.async.HoodieAsyncService.waitForShutdown(HoodieAsyncService.java:102)
~[hudi-utilities-slim-bundle_2.12-1.0.0-rc1.jar:1.0.0-rc1]
at
org.apache.hudi.async.AsyncCleanerService.waitForCompletion(AsyncCleanerService.java:74)
~[hudi-utilities-slim-bundle_2.12-1.0.0-rc1.jar:1.0.0-rc1]
at
org.apache.hudi.client.BaseHoodieTableServiceClient.asyncClean(BaseHoodieTableServiceClient.java:144)
~[hudi-utilities-slim-bundle_2.12-1.0.0-rc1.jar:1.0.0-rc1]
at
org.apache.hudi.client.BaseHoodieWriteClient.autoCleanOnCommit(BaseHoodieWriteClient.java:596)
~[hudi-utilities-slim-bundle_2.12-1.0.0-rc1.jar:1.0.0-rc1]
at
org.apache.hudi.client.BaseHoodieWriteClient.mayBeCleanAndArchive(BaseHoodieWriteClient.java:581)
~[hudi-utilities-slim-bundle_2.12-1.0.0-rc1.jar:1.0.0-rc1]
at
org.apache.hudi.client.BaseHoodieWriteClient.commitStats(BaseHoodieWriteClient.java:258)
~[hudi-utilities-slim-bundle_2.12-1.0.0-rc1.jar:1.0.0-rc1]
at
org.apache.hudi.client.SparkRDDWriteClient.commit(SparkRDDWriteClient.java:93)
~[hudi-spark3.5-bundle_2.12-1.0.0-rc1.jar:1.0.0-rc1]
at
org.apache.hudi.utilities.streamer.StreamSync.writeToSinkAndDoMetaSync(StreamSync.java:948)
~[hudi-utilities-slim-bundle_2.12-1.0.0-rc1.jar:1.0.0-rc1]
at
org.apache.hudi.utilities.streamer.StreamSync.syncOnce(StreamSync.java:520)
~[hudi-utilities-slim-bundle_2.12-1.0.0-rc1.jar:1.0.0-rc1]
at
org.apache.hudi.utilities.streamer.HoodieStreamer$StreamSyncService.lambda$startService$1(HoodieStreamer.java:820)
~[hudi-utilities-slim-bundle_2.12-1.0.0-rc1.jar:1.0.0-rc1]
at
java.util.concurrent.CompletableFuture$AsyncSupply.run(CompletableFuture.java:1604)
~[?:1.8.0_432]
at
java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
~[?:1.8.0_432]
at
java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
~[?:1.8.0_432]
at java.lang.Thread.run(Thread.java:750) ~[?:1.8.0_432]
Caused by: org.apache.hudi.exception.HoodieMetadataException: Functional index
metadata not found
at
org.apache.hudi.metadata.HoodieTableMetadataUtil.convertMetadataToFunctionalIndexRecords(HoodieTableMetadataUtil.java:613)
~[hudi-utilities-slim-bundle_2.12-1.0.0-rc1.jar:1.0.0-rc1]
at
org.apache.hudi.metadata.HoodieTableMetadataUtil.convertMetadataToRecords(HoodieTableMetadataUtil.java:577)
~[hudi-utilities-slim-bundle_2.12-1.0.0-rc1.jar:1.0.0-rc1]
at
org.apache.hudi.metadata.HoodieBackedTableMetadataWriter.lambda$update$39(HoodieBackedTableMetadataWriter.java:1223)
~[hudi-utilities-slim-bundle_2.12-1.0.0-rc1.jar:1.0.0-rc1]
at
org.apache.hudi.metadata.HoodieBackedTableMetadataWriter.processAndCommit(HoodieBackedTableMetadataWriter.java:993)
~[hudi-utilities-slim-bundle_2.12-1.0.0-rc1.jar:1.0.0-rc1]
at
org.apache.hudi.metadata.HoodieBackedTableMetadataWriter.update(HoodieBackedTableMetadataWriter.java:1223)
~[hudi-utilities-slim-bundle_2.12-1.0.0-rc1.jar:1.0.0-rc1]
at
org.apache.hudi.table.action.BaseActionExecutor.writeTableMetadata(BaseActionExecutor.java:105)
~[hudi-utilities-slim-bundle_2.12-1.0.0-rc1.jar:1.0.0-rc1]
at
org.apache.hudi.table.action.clean.CleanActionExecutor.runClean(CleanActionExecutor.java:234)
~[hudi-utilities-slim-bundle_2.12-1.0.0-rc1.jar:1.0.0-rc1]
at
org.apache.hudi.table.action.clean.CleanActionExecutor.runPendingClean(CleanActionExecutor.java:199)
~[hudi-utilities-slim-bundle_2.12-1.0.0-rc1.jar:1.0.0-rc1]
at
org.apache.hudi.table.action.clean.CleanActionExecutor.execute(CleanActionExecutor.java:270)
~[hudi-utilities-slim-bundle_2.12-1.0.0-rc1.jar:1.0.0-rc1]
at
org.apache.hudi.table.HoodieSparkCopyOnWriteTable.clean(HoodieSparkCopyOnWriteTable.java:269)
~[hudi-spark3.5-bundle_2.12-1.0.0-rc1.jar:1.0.0-rc1]
at
org.apache.hudi.client.BaseHoodieTableServiceClient.clean(BaseHoodieTableServiceClient.java:808)
~[hudi-utilities-slim-bundle_2.12-1.0.0-rc1.jar:1.0.0-rc1]
at
org.apache.hudi.client.BaseHoodieWriteClient.clean(BaseHoodieWriteClient.java:863)
~[hudi-utilities-slim-bundle_2.12-1.0.0-rc1.jar:1.0.0-rc1]
at
org.apache.hudi.client.BaseHoodieWriteClient.clean(BaseHoodieWriteClient.java:836)
~[hudi-utilities-slim-bundle_2.12-1.0.0-rc1.jar:1.0.0-rc1]
at
org.apache.hudi.async.AsyncCleanerService.lambda$startService$0(AsyncCleanerService.java:54)
~[hudi-utilities-slim-bundle_2.12-1.0.0-rc1.jar:1.0.0-rc1]
at
java.util.concurrent.CompletableFuture$AsyncSupply.run(CompletableFuture.java:1604)
~[?:1.8.0_432]
at
java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
~[?:1.8.0_432]
at
java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
~[?:1.8.0_432]
at java.lang.Thread.run(Thread.java:750) ~[?:1.8.0_432]
{code}
--
This message was sent by Atlassian Jira
(v8.20.10#820010)