[ https://issues.apache.org/jira/browse/HUDI-3708?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ]
Ethan Guo closed HUDI-3708. --------------------------- Resolution: Fixed > Upsert to metadata table fails due to schema change > --------------------------------------------------- > > Key: HUDI-3708 > URL: https://issues.apache.org/jira/browse/HUDI-3708 > Project: Apache Hudi > Issue Type: Bug > Reporter: Ethan Guo > Assignee: Ethan Guo > Priority: Blocker > Labels: pull-request-available > Fix For: 0.11.0 > > > Scenario: Deltastreamer continuous mode, COW table, single writer with async > clustering and cleaning. Only files partition is enabled in metadata table. > The table is written before the metadata schema change (adding "columnName"). > When using the new writer with the new schema, the upsert to metadata table > fails with schema compatibility check. > {code:java} > 22/03/23 23:11:38 WARN CleanActionExecutor: Failed to perform previous clean > operation, instant: [==>20220314172020474__clean__INFLIGHT] > org.apache.hudi.exception.HoodieUpsertException: Failed upsert schema > compatibility check. > at > org.apache.hudi.table.HoodieTable.validateUpsertSchema(HoodieTable.java:729) > at > org.apache.hudi.client.SparkRDDWriteClient.upsertPreppedRecords(SparkRDDWriteClient.java:169) > at > org.apache.hudi.metadata.SparkHoodieBackedTableMetadataWriter.commit(SparkHoodieBackedTableMetadataWriter.java:154) > at > org.apache.hudi.metadata.HoodieBackedTableMetadataWriter.processAndCommit(HoodieBackedTableMetadataWriter.java:670) > at > org.apache.hudi.metadata.HoodieBackedTableMetadataWriter.update(HoodieBackedTableMetadataWriter.java:694) > at > org.apache.hudi.table.action.BaseActionExecutor.lambda$writeTableMetadata$1(BaseActionExecutor.java:69) > at org.apache.hudi.common.util.Option.ifPresent(Option.java:97) > at > org.apache.hudi.table.action.BaseActionExecutor.writeTableMetadata(BaseActionExecutor.java:69) > at > org.apache.hudi.table.action.clean.CleanActionExecutor.runClean(CleanActionExecutor.java:211) > at > org.apache.hudi.table.action.clean.CleanActionExecutor.runPendingClean(CleanActionExecutor.java:176) > at > org.apache.hudi.table.action.clean.CleanActionExecutor.lambda$execute$6(CleanActionExecutor.java:238) > at java.util.ArrayList.forEach(ArrayList.java:1259) > at > org.apache.hudi.table.action.clean.CleanActionExecutor.execute(CleanActionExecutor.java:232) > at > org.apache.hudi.table.HoodieSparkCopyOnWriteTable.clean(HoodieSparkCopyOnWriteTable.java:339) > at > org.apache.hudi.client.BaseHoodieWriteClient.clean(BaseHoodieWriteClient.java:781) > at > org.apache.hudi.client.BaseHoodieWriteClient.clean(BaseHoodieWriteClient.java:738) > at > org.apache.hudi.async.AsyncCleanerService.lambda$startService$0(AsyncCleanerService.java:55) > at > java.util.concurrent.CompletableFuture$AsyncSupply.run(CompletableFuture.java:1604) > at > java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149) > at > java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624) > at java.lang.Thread.run(Thread.java:748) > Caused by: org.apache.hudi.exception.HoodieException: Failed schema > compatibility check for writerSchema > :{"type":"record","name":"HoodieMetadataRecord","namespace":"org.apache.hudi.avro.model","doc":"A > record saved within the Metadata > Table","fields":[{"name":"_hoodie_commit_time","type":["null","string"],"doc":"","default":null},{"name":"_hoodie_commit_seqno","type":["null","string"],"doc":"","default":null},{"name":"_hoodie_record_key","type":["null","string"],"doc":"","default":null},{"name":"_hoodie_partition_path","type":["null","string"],"doc":"","default":null},{"name":"_hoodie_file_name","type":["null","string"],"doc":"","default":null},{"name":"key","type":{"type":"string","avro.java.string":"String"}},{"name":"type","type":"int","doc":"Type > of the metadata > record"},{"name":"filesystemMetadata","type":["null",{"type":"map","values":{"type":"record","name":"HoodieMetadataFileInfo","fields":[{"name":"size","type":"long","doc":"Size > of the file"},{"name":"isDeleted","type":"boolean","doc":"True if this file > has been deleted"}]},"avro.java.string":"String"}],"doc":"Contains > information about partitions and files within the > dataset"},{"name":"BloomFilterMetadata","type":["null",{"type":"record","name":"HoodieMetadataBloomFilter","doc":"Data > file bloom filter > details","fields":[{"name":"type","type":{"type":"string","avro.java.string":"String"},"doc":"Bloom > filter type > code"},{"name":"timestamp","type":{"type":"string","avro.java.string":"String"},"doc":"Instant > timestamp when this metadata was > created/updated"},{"name":"bloomFilter","type":"bytes","doc":"Bloom filter > binary byte array"},{"name":"isDeleted","type":"boolean","doc":"Bloom filter > entry valid/deleted flag"}]}],"doc":"Metadata Index of bloom filters for all > data files in the user > table","default":null},{"name":"ColumnStatsMetadata","type":["null",{"type":"record","name":"HoodieMetadataColumnStats","doc":"Data > file column > statistics","fields":[{"name":"fileName","type":["null",{"type":"string","avro.java.string":"String"}],"doc":"File > name for which this column statistics > applies"},{"name":"columnName","type":["null",{"type":"string","avro.java.string":"String"}],"doc":"Column > name for which this column statistics > applies"},{"name":"minValue","type":["null",{"type":"string","avro.java.string":"String"}],"doc":"Minimum > value in the range. Based on user data table schema, we can convert this to > appropriate > type"},{"name":"maxValue","type":["null",{"type":"string","avro.java.string":"String"}],"doc":"Maximum > value in the range. Based on user data table schema, we can convert it to > appropriate type"},{"name":"valueCount","type":["null","long"],"doc":"Total > count of values"},{"name":"nullCount","type":["null","long"],"doc":"Total > count of null > values"},{"name":"totalSize","type":["null","long"],"doc":"Total storage size > on disk"},{"name":"totalUncompressedSize","type":["null","long"],"doc":"Total > uncompressed storage size on > disk"},{"name":"isDeleted","type":"boolean","doc":"Column range entry > valid/deleted flag"}]}],"doc":"Metadata Index of column statistics for all > data files in the user table","default":null}]}, table schema > :{"type":"record","name":"HoodieMetadataRecord","namespace":"org.apache.hudi.avro.model","doc":"A > record saved within the Metadata > Table","fields":[{"name":"_hoodie_commit_time","type":["null","string"],"doc":"","default":null},{"name":"_hoodie_commit_seqno","type":["null","string"],"doc":"","default":null},{"name":"_hoodie_record_key","type":["null","string"],"doc":"","default":null},{"name":"_hoodie_partition_path","type":["null","string"],"doc":"","default":null},{"name":"_hoodie_file_name","type":["null","string"],"doc":"","default":null},{"name":"key","type":{"type":"string","avro.java.string":"String"}},{"name":"type","type":"int","doc":"Type > of the metadata > record"},{"name":"filesystemMetadata","type":["null",{"type":"map","values":{"type":"record","name":"HoodieMetadataFileInfo","fields":[{"name":"size","type":"long","doc":"Size > of the file"},{"name":"isDeleted","type":"boolean","doc":"True if this file > has been deleted"}]},"avro.java.string":"String"}],"doc":"Contains > information about partitions and files within the > dataset"},{"name":"BloomFilterMetadata","type":["null",{"type":"record","name":"HoodieMetadataBloomFilter","doc":"Data > file bloom filter > details","fields":[{"name":"type","type":{"type":"string","avro.java.string":"String"},"doc":"Bloom > filter type > code"},{"name":"timestamp","type":{"type":"string","avro.java.string":"String"},"doc":"Instant > timestamp when this metadata was > created/updated"},{"name":"bloomFilter","type":"bytes","doc":"Bloom filter > binary byte array"},{"name":"isDeleted","type":"boolean","doc":"Bloom filter > entry valid/deleted flag"}]}],"doc":"Metadata Index of bloom filters for all > data files in the user > table","default":null},{"name":"ColumnStatsMetadata","type":["null",{"type":"record","name":"HoodieMetadataColumnStats","doc":"Data > file column > statistics","fields":[{"name":"fileName","type":["null",{"type":"string","avro.java.string":"String"}],"doc":"File > name for which this column statistics > applies"},{"name":"minValue","type":["null",{"type":"string","avro.java.string":"String"}],"doc":"Minimum > value in the range. Based on user data table schema, we can convert this to > appropriate > type"},{"name":"maxValue","type":["null",{"type":"string","avro.java.string":"String"}],"doc":"Maximum > value in the range. Based on user data table schema, we can convert it to > appropriate type"},{"name":"valueCount","type":["null","long"],"doc":"Total > count of values"},{"name":"nullCount","type":["null","long"],"doc":"Total > count of null > values"},{"name":"totalSize","type":["null","long"],"doc":"Total storage size > on disk"},{"name":"totalUncompressedSize","type":["null","long"],"doc":"Total > uncompressed storage size on > disk"},{"name":"isDeleted","type":"boolean","doc":"Column range entry > valid/deleted flag"}]}],"doc":"Metadata Index of column statistics for all > data files in the user table","default":null}]}, base path > :file:/Users/ethan/Work/scripts/mt_rollout_testing/deploy_b_single_writer_async_services/b3_ds_cow_010mt_011mt_conf_fix2/test_table/.hoodie/metadata > at org.apache.hudi.table.HoodieTable.validateSchema(HoodieTable.java:721) > at > org.apache.hudi.table.HoodieTable.validateUpsertSchema(HoodieTable.java:727) > ... 20 more {code} > > -- This message was sent by Atlassian Jira (v8.20.1#820001)