This is an automated email from the ASF dual-hosted git repository.

timbrown pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hudi.git


The following commit(s) were added to refs/heads/master by this push:
     new 387752218cf3 feat(schema): Phase 24 - Restore O(1) reference equality 
comparison in HoodieMetadataPayload (#17672)
387752218cf3 is described below

commit 387752218cf3cd53c2d7f8fafcb89eca07dd155c
Author: voonhous <[email protected]>
AuthorDate: Wed Dec 24 02:29:40 2025 +0800

    feat(schema): Phase 24 - Restore O(1) reference equality comparison in 
HoodieMetadataPayload (#17672)
---
 .../org/apache/hudi/metadata/HoodieMetadataPayload.java  | 16 +++++++++++-----
 1 file changed, 11 insertions(+), 5 deletions(-)

diff --git 
a/hudi-common/src/main/java/org/apache/hudi/metadata/HoodieMetadataPayload.java 
b/hudi-common/src/main/java/org/apache/hudi/metadata/HoodieMetadataPayload.java
index 8346889b6a73..87a787c9461c 100644
--- 
a/hudi-common/src/main/java/org/apache/hudi/metadata/HoodieMetadataPayload.java
+++ 
b/hudi-common/src/main/java/org/apache/hudi/metadata/HoodieMetadataPayload.java
@@ -32,6 +32,8 @@ import org.apache.hudi.common.model.HoodieKey;
 import org.apache.hudi.common.model.HoodieRecord;
 import org.apache.hudi.common.model.HoodieRecordGlobalLocation;
 import org.apache.hudi.common.model.HoodieRecordPayload;
+import org.apache.hudi.common.schema.HoodieSchema;
+import org.apache.hudi.common.schema.HoodieSchemaCache;
 import org.apache.hudi.common.table.timeline.TimelineUtils;
 import org.apache.hudi.common.util.CollectionUtils;
 import org.apache.hudi.common.util.Option;
@@ -102,7 +104,12 @@ import static 
org.apache.hudi.metadata.HoodieTableMetadataUtil.getPartitionStats
  * During compaction on the table, the deletions are merged with additions and 
hence records are pruned.
  */
 public class HoodieMetadataPayload implements 
HoodieRecordPayload<HoodieMetadataPayload> {
-  private static final Schema HOODIE_METADATA_SCHEMA = 
AvroSchemaCache.intern(HoodieMetadataRecord.getClassSchema());
+
+  // Note: Variable is unused, but caching is required.
+  private static final HoodieSchema HOODIE_METADATA_SCHEMA = 
HoodieSchemaCache.intern(
+      HoodieSchema.fromAvroSchema(HoodieMetadataRecord.getClassSchema()));
+  // Cache the Avro schema reference for O(1) equality checks during 
Avro.Schema -> HoodieSchema migration
+  private static final Schema HOODIE_METADATA_AVRO_SCHEMA = 
AvroSchemaCache.intern(HoodieMetadataRecord.getClassSchema());
   /**
    * Field offsets when metadata fields are present
    */
@@ -410,10 +417,9 @@ public class HoodieMetadataPayload implements 
HoodieRecordPayload<HoodieMetadata
       return Option.empty();
     }
 
-    // TODO: feature(schema): HoodieSchema change, we removed caching in a few 
areas, during the migration of Avro.Schema -> HoodieSchema.
-    // The schema objects might have been the same reference (due to caching), 
but now after converting from HoodieSchema to Avro Schema using .toAvroSchema(),
-    // it creates a new Schema object that's not the same reference as 
HOODIE_METADATA_SCHEMA
-    if (schema == null || HOODIE_METADATA_SCHEMA.equals(schema)) {
+    // TODO: feature(schema): Swap this over to HOODIE_METADATA_SCHEMA after 
HoodieRecordPayload implementations are using HoodieSchema
+    // Uses cached Avro schema reference for O(1) equality check.
+    if (schema == null || schema == HOODIE_METADATA_AVRO_SCHEMA) {
       // If the schema is same or none is provided, we can return the record 
directly
       HoodieMetadataRecord record = new HoodieMetadataRecord(key, type, 
filesystemMetadata, bloomFilterMetadata,
           columnStatMetadata, recordIndexMetadata, secondaryIndexMetadata);

Reply via email to