ivandika3 commented on code in PR #8681: URL: https://github.com/apache/ozone/pull/8681#discussion_r2205080842
########## hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/service/KeyLifecycleService.java: ########## @@ -0,0 +1,668 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.ozone.om.service; + +import static org.apache.hadoop.ozone.om.OMConfigKeys.OZONE_KEY_LIFECYCLE_SERVICE_DELETE_BATCH_SIZE; +import static org.apache.hadoop.ozone.om.OMConfigKeys.OZONE_KEY_LIFECYCLE_SERVICE_DELETE_BATCH_SIZE_DEFAULT; +import static org.apache.hadoop.ozone.om.OMConfigKeys.OZONE_KEY_LIFECYCLE_SERVICE_ENABLED; +import static org.apache.hadoop.ozone.om.OMConfigKeys.OZONE_KEY_LIFECYCLE_SERVICE_ENABLED_DEFAULT; + +import com.google.common.annotations.VisibleForTesting; +import com.google.common.base.Preconditions; +import com.google.protobuf.ServiceException; +import java.io.IOException; +import java.nio.file.Paths; +import java.util.ArrayList; +import java.util.Iterator; +import java.util.List; +import java.util.concurrent.ConcurrentHashMap; +import java.util.concurrent.TimeUnit; +import java.util.concurrent.atomic.AtomicBoolean; +import java.util.concurrent.atomic.AtomicLong; +import java.util.stream.Collectors; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.hdds.conf.ConfigurationSource; +import org.apache.hadoop.hdds.conf.StorageUnit; +import org.apache.hadoop.hdds.utils.BackgroundService; +import org.apache.hadoop.hdds.utils.BackgroundTask; +import org.apache.hadoop.hdds.utils.BackgroundTaskQueue; +import org.apache.hadoop.hdds.utils.BackgroundTaskResult; +import org.apache.hadoop.hdds.utils.BackgroundTaskResult.EmptyTaskResult; +import org.apache.hadoop.hdds.utils.FaultInjector; +import org.apache.hadoop.hdds.utils.db.Table; +import org.apache.hadoop.hdds.utils.db.TableIterator; +import org.apache.hadoop.ozone.ClientVersion; +import org.apache.hadoop.ozone.OzoneConsts; +import org.apache.hadoop.ozone.om.KeyManager; +import org.apache.hadoop.ozone.om.OMConfigKeys; +import org.apache.hadoop.ozone.om.OMMetadataManager; +import org.apache.hadoop.ozone.om.OzoneManager; +import org.apache.hadoop.ozone.om.OzoneTrash; +import org.apache.hadoop.ozone.om.helpers.BucketLayout; +import org.apache.hadoop.ozone.om.helpers.OmBucketInfo; +import org.apache.hadoop.ozone.om.helpers.OmDirectoryInfo; +import org.apache.hadoop.ozone.om.helpers.OmKeyInfo; +import org.apache.hadoop.ozone.om.helpers.OmLCRule; +import org.apache.hadoop.ozone.om.helpers.OmLifecycleConfiguration; +import org.apache.hadoop.ozone.om.helpers.OmVolumeArgs; +import org.apache.hadoop.ozone.om.ratis.utils.OzoneManagerRatisUtils; +import org.apache.hadoop.ozone.protocol.proto.OzoneManagerProtocolProtos; +import org.apache.hadoop.ozone.protocol.proto.OzoneManagerProtocolProtos.DeleteKeyArgs; +import org.apache.hadoop.ozone.protocol.proto.OzoneManagerProtocolProtos.DeleteKeysRequest; +import org.apache.hadoop.ozone.protocol.proto.OzoneManagerProtocolProtos.OMRequest; +import org.apache.ratis.protocol.ClientId; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * This is the background service to manage object lifecycle based on bucket lifecycle configuration. + */ +public class KeyLifecycleService extends BackgroundService { + public static final Logger LOG = + LoggerFactory.getLogger(KeyLifecycleService.class); + + private final OzoneManager ozoneManager; + //TODO: honor this parameter in next patch + private int keyLimitPerIterator; + private int keyLimitPerRequest; + private final AtomicBoolean suspended; + private KeyLifecycleServiceMetrics metrics; + private boolean isServiceEnabled; + // A set of bucket name that have LifecycleActionTask scheduled + private final ConcurrentHashMap<String, LifecycleActionTask> inFlight; + private OMMetadataManager omMetadataManager; + private int ratisByteLimit; + private ClientId clientId = ClientId.randomId(); + private AtomicLong callId = new AtomicLong(0); + private OzoneTrash ozoneTrash; + private static List<FaultInjector> injectors; + + public KeyLifecycleService(OzoneManager ozoneManager, + KeyManager manager, long serviceInterval, + long serviceTimeout, int poolSize, + ConfigurationSource conf) { + super(KeyLifecycleService.class.getSimpleName(), serviceInterval, TimeUnit.MILLISECONDS, + poolSize, serviceTimeout, ozoneManager.getThreadNamePrefix()); + this.ozoneManager = ozoneManager; + this.keyLimitPerIterator = conf.getInt(OZONE_KEY_LIFECYCLE_SERVICE_DELETE_BATCH_SIZE, + OZONE_KEY_LIFECYCLE_SERVICE_DELETE_BATCH_SIZE_DEFAULT); + Preconditions.checkArgument(keyLimitPerIterator >= 0, + OZONE_KEY_LIFECYCLE_SERVICE_DELETE_BATCH_SIZE + " cannot be negative."); + this.keyLimitPerRequest = 100000; Review Comment: I think there are some tradeoffs here. Batching the keys should reduce the request count and the Ratis logs, but deleting a lot of keys together, generates a lot of tombstones in keyTable / fileTable that might trigger cause subsequent list slowness and L0 compaction that might have some overhead. But since there is no evidence of the tombstones issue, we can follow the result based on `testPerformanceWithNestedDir`. In the future, we might want to trigger compaction on `keyTable` after every key lifecycle service run. ########## hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/service/KeyLifecycleService.java: ########## @@ -0,0 +1,668 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.ozone.om.service; + +import static org.apache.hadoop.ozone.om.OMConfigKeys.OZONE_KEY_LIFECYCLE_SERVICE_DELETE_BATCH_SIZE; +import static org.apache.hadoop.ozone.om.OMConfigKeys.OZONE_KEY_LIFECYCLE_SERVICE_DELETE_BATCH_SIZE_DEFAULT; +import static org.apache.hadoop.ozone.om.OMConfigKeys.OZONE_KEY_LIFECYCLE_SERVICE_ENABLED; +import static org.apache.hadoop.ozone.om.OMConfigKeys.OZONE_KEY_LIFECYCLE_SERVICE_ENABLED_DEFAULT; + +import com.google.common.annotations.VisibleForTesting; +import com.google.common.base.Preconditions; +import com.google.protobuf.ServiceException; +import java.io.IOException; +import java.nio.file.Paths; +import java.util.ArrayList; +import java.util.Iterator; +import java.util.List; +import java.util.concurrent.ConcurrentHashMap; +import java.util.concurrent.TimeUnit; +import java.util.concurrent.atomic.AtomicBoolean; +import java.util.concurrent.atomic.AtomicLong; +import java.util.stream.Collectors; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.hdds.conf.ConfigurationSource; +import org.apache.hadoop.hdds.conf.StorageUnit; +import org.apache.hadoop.hdds.utils.BackgroundService; +import org.apache.hadoop.hdds.utils.BackgroundTask; +import org.apache.hadoop.hdds.utils.BackgroundTaskQueue; +import org.apache.hadoop.hdds.utils.BackgroundTaskResult; +import org.apache.hadoop.hdds.utils.BackgroundTaskResult.EmptyTaskResult; +import org.apache.hadoop.hdds.utils.FaultInjector; +import org.apache.hadoop.hdds.utils.db.Table; +import org.apache.hadoop.hdds.utils.db.TableIterator; +import org.apache.hadoop.ozone.ClientVersion; +import org.apache.hadoop.ozone.OzoneConsts; +import org.apache.hadoop.ozone.om.KeyManager; +import org.apache.hadoop.ozone.om.OMConfigKeys; +import org.apache.hadoop.ozone.om.OMMetadataManager; +import org.apache.hadoop.ozone.om.OzoneManager; +import org.apache.hadoop.ozone.om.OzoneTrash; +import org.apache.hadoop.ozone.om.helpers.BucketLayout; +import org.apache.hadoop.ozone.om.helpers.OmBucketInfo; +import org.apache.hadoop.ozone.om.helpers.OmDirectoryInfo; +import org.apache.hadoop.ozone.om.helpers.OmKeyInfo; +import org.apache.hadoop.ozone.om.helpers.OmLCRule; +import org.apache.hadoop.ozone.om.helpers.OmLifecycleConfiguration; +import org.apache.hadoop.ozone.om.helpers.OmVolumeArgs; +import org.apache.hadoop.ozone.om.ratis.utils.OzoneManagerRatisUtils; +import org.apache.hadoop.ozone.protocol.proto.OzoneManagerProtocolProtos; +import org.apache.hadoop.ozone.protocol.proto.OzoneManagerProtocolProtos.DeleteKeyArgs; +import org.apache.hadoop.ozone.protocol.proto.OzoneManagerProtocolProtos.DeleteKeysRequest; +import org.apache.hadoop.ozone.protocol.proto.OzoneManagerProtocolProtos.OMRequest; +import org.apache.ratis.protocol.ClientId; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * This is the background service to manage object lifecycle based on bucket lifecycle configuration. + */ +public class KeyLifecycleService extends BackgroundService { + public static final Logger LOG = + LoggerFactory.getLogger(KeyLifecycleService.class); + + private final OzoneManager ozoneManager; + //TODO: honor this parameter in next patch + private int keyLimitPerIterator; + private int keyLimitPerRequest; + private final AtomicBoolean suspended; + private KeyLifecycleServiceMetrics metrics; + private boolean isServiceEnabled; + // A set of bucket name that have LifecycleActionTask scheduled + private final ConcurrentHashMap<String, LifecycleActionTask> inFlight; + private OMMetadataManager omMetadataManager; + private int ratisByteLimit; + private ClientId clientId = ClientId.randomId(); + private AtomicLong callId = new AtomicLong(0); + private OzoneTrash ozoneTrash; + private static List<FaultInjector> injectors; + + public KeyLifecycleService(OzoneManager ozoneManager, + KeyManager manager, long serviceInterval, + long serviceTimeout, int poolSize, + ConfigurationSource conf) { + super(KeyLifecycleService.class.getSimpleName(), serviceInterval, TimeUnit.MILLISECONDS, + poolSize, serviceTimeout, ozoneManager.getThreadNamePrefix()); + this.ozoneManager = ozoneManager; + this.keyLimitPerIterator = conf.getInt(OZONE_KEY_LIFECYCLE_SERVICE_DELETE_BATCH_SIZE, + OZONE_KEY_LIFECYCLE_SERVICE_DELETE_BATCH_SIZE_DEFAULT); + Preconditions.checkArgument(keyLimitPerIterator >= 0, + OZONE_KEY_LIFECYCLE_SERVICE_DELETE_BATCH_SIZE + " cannot be negative."); + this.keyLimitPerRequest = 100000; Review Comment: I think there are some tradeoffs here. Batching the keys should reduce the request count and the Ratis logs, but deleting a lot of keys together, generates a lot of tombstones in keyTable / fileTable that might trigger cause subsequent list slowness and L0 compaction that might have some overhead. But since there is no evidence of the tombstones issue, we can follow the result based on `testPerformanceWithNestedDir`. In the future, we might want to trigger RocksDB compaction on `keyTable` after every key lifecycle service run. -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: issues-unsubscr...@ozone.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: issues-unsubscr...@ozone.apache.org For additional commands, e-mail: issues-h...@ozone.apache.org