fengjian428 commented on code in PR #4676: URL: https://github.com/apache/hudi/pull/4676#discussion_r973537469
########## hudi-common/src/main/java/org/apache/hudi/common/model/PartialUpdateAvroPayload.java: ########## @@ -0,0 +1,248 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hudi.common.model; + +import org.apache.avro.Schema; +import org.apache.avro.generic.GenericRecord; +import org.apache.avro.generic.GenericRecordBuilder; +import org.apache.avro.generic.IndexedRecord; + +import org.apache.hudi.avro.HoodieAvroUtils; +import org.apache.hudi.common.util.Option; +import org.apache.hudi.common.util.ReflectionUtils; +import org.apache.hudi.common.util.StringUtils; +import org.apache.hudi.keygen.constant.KeyGeneratorOptions; + +import java.io.IOException; +import java.util.List; +import java.util.Properties; + +/** + * Payload clazz that is used for partial update Hudi Table. + * + * <p>Simplified partial update Logic: + * <pre> + * 1. #preCombine + * For records with the same record key in one batch + * or in the delta logs that belongs to same File Group, + * Checks whether one record's ordering value is larger than the other record. + * If yes, overwrites the existing one for specified fields that doesn't equal to null. + * + * 2. #combineAndGetUpdateValue + * For every incoming record with existing record in storage (same record key) + * Checks whether incoming record's ordering value is larger than the existing record. + * If yes, overwrites the existing one for specified fields that doesn't equal to null. + * else overwrites the incoming one with the existing record for specified fields that doesn't equal to null + * and returns a merged record. + * + * Illustration with simple data. + * let's say the order field is 'ts' and schema is : + * { + * [ + * {"name":"id","type":"string"}, + * {"name":"ts","type":"long"}, + * {"name":"name","type":"string"}, + * {"name":"price","type":"string"} + * ] + * } + * + * case 1 + * Current data: + * id ts name price + * 1 1 name_1 price_1 + * Insert data: + * id ts name price + * 1 2 null price_2 + * + * Result data after #preCombine or #combineAndGetUpdateValue: + * id ts name price + * 1 2 name_1 price_2 + * + * case 2 + * Current data: + * id ts name price + * 1 2 name_1 null + * Insert data: + * id ts name price + * 1 1 null price_1 + * + * Result data after preCombine or combineAndGetUpdateValue: + * id ts name price + * 1 2 name_1 price_1 + *</pre> + */ +public class PartialUpdateAvroPayload extends OverwriteNonDefaultsWithLatestAvroPayload { + + /* + flag for deleted record combine logic + 1 preCombine: if delete record is newer, return merged record with _hoodie_is_deleted=true + 1 combineAndGetUpdateValue: return empty since we don't need to store deleted data to storage + */ + private boolean isPrecombining = false; Review Comment: ok, will do it in another JIRA/PR -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: commits-unsubscr...@hudi.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org