jerqi commented on code in PR #7850: URL: https://github.com/apache/gravitino/pull/7850#discussion_r2269256125
########## core/src/main/java/org/apache/gravitino/stats/storage/PartitionStatisticStorage.java: ########## @@ -0,0 +1,119 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.gravitino.stats.storage; + +import com.google.common.collect.Lists; +import java.io.Closeable; +import java.util.List; +import java.util.stream.Collectors; +import org.apache.gravitino.MetadataObject; +import org.apache.gravitino.stats.PartitionRange; +import org.apache.gravitino.stats.PartitionStatisticsDrop; + +/** Interface for managing partition statistics in a storage system. */ +public interface PartitionStatisticStorage extends Closeable { + + /** + * Lists statistics for a given metadata object within a specified range of partition names. The + * implementation should guarantee the thread safe. The upper layer will add the parent object + * level write lock. For example, if the metadata object is a table, the read lock of the table + * level will be held. + * + * @param metalake the name of the metalake + * @param metadataObject the metadata object for which statistics are being listed + * @param partitionRange the range of partition names for which statistics are being listed + * @return a list of {@link PersistedPartitionStatistics} objects, each containing the partition + * name + */ + List<PersistedPartitionStatistics> listStatistics( + String metalake, MetadataObject metadataObject, PartitionRange partitionRange); + + /** + * Lists statistics for a given metadata object and specific partition names. This interface may + * be used in the future. The upper logic layer won't call this method now. The implementation + * should guarantee the thread safe. The upper layer will add the parent object level write lock. + * For example, if the metadata object is a table, the read lock of the table level will be held. + * + * @param metalake the name of the metalake + * @param metadataObject the metadata object for which statistics are being listed + * @param partitionNames a list of partition names for which statistics are being listed + * @return a list of {@link PersistedPartitionStatistics} objects, each containing the partition + * name + */ + default List<PersistedPartitionStatistics> listStatistics( + String metalake, MetadataObject metadataObject, List<String> partitionNames) { + throw new UnsupportedOperationException( + "Don't support listStatistics with partition names yet."); + } + + /** + * Appends statistics to the storage for a given metadata object. The implementation should + * guarantee the thread safe. The upper layer will add the parent object level write lock. For + * example, if the metadata object is a table, the write lock of the schema level will be held. + * + * @param metalake the name of the metalake + * @param statisticsToAppend a list of {@link MetadataObjectStatisticsUpdate} objects, each + * containing the metadata object and its associated statistics updates. + */ + void appendStatistics(String metalake, List<MetadataObjectStatisticsUpdate> statisticsToAppend); + + /** + * Drops statistics for specified partitions of a metadata object. The implementation should + * guarantee the thread safe. The upper layer will add the parent object level write lock. For + * example, if the metadata object is a table, the write lock of the schema level will be held. + * + * @param metalake the name of the metalake + * @param partitionStatisticsToDrop a map where the key is a {@link MetadataObject} and the value + * is a list of {@link PartitionStatisticsDrop} + */ + void dropStatistics( + String metalake, List<MetadataObjectStatisticsDrop> partitionStatisticsToDrop); + + /** + * Updates statistics for a given metadata object. The default implementation is to first drop and + * then append the statistics. Developer can override this logic if needed. The implementation + * should guarantee the thread safe. The upper layer will add the parent object level write lock. + * For example, if the metadata object is a table, the write lock of the schema level will be + * held. + * + * @param metalake the name of the metalake + * @param statisticsToUpdate a list of {@link MetadataObjectStatisticsUpdate} objects, each + * containing the metadata object and its associated statistics updates. + */ + default void updateStatistics( + String metalake, List<MetadataObjectStatisticsUpdate> statisticsToUpdate) { + List<MetadataObjectStatisticsDrop> statisticsToDrop = + statisticsToUpdate.stream() + .map( + update -> + MetadataObjectStatisticsDrop.of( + update.metadataObject(), + update.partitionUpdates().stream() + .map( + partitionUpdate -> + PartitionStatisticsDrop.of( + partitionUpdate.partitionName(), + Lists.newArrayList(partitionUpdate.statistics().keySet()))) + .collect(Collectors.toList()))) + .collect(Collectors.toList()); + + dropStatistics(metalake, statisticsToDrop); + appendStatistics(metalake, statisticsToUpdate); + } +} Review Comment: Some storage doesn't support upsert sematics. For example, Lance format doesn't support upsert in Java's API. We need to delete first and append later. The design is mainly considered the storage which doesn't support to upsert , but supports delete and append. -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: commits-unsubscr...@gravitino.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org