achandel-01 commented on code in PR #406: URL: https://github.com/apache/atlas/pull/406#discussion_r2319529172
########## repository/src/main/java/org/apache/atlas/services/PurgeService.java: ########## @@ -0,0 +1,532 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * <p> + * http://www.apache.org/licenses/LICENSE-2.0 + * <p> + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.atlas.services; + +import org.apache.atlas.ApplicationProperties; +import org.apache.atlas.AtlasException; +import org.apache.atlas.DeleteType; +import org.apache.atlas.RequestContext; +import org.apache.atlas.annotation.AtlasService; +import org.apache.atlas.annotation.Timed; +import org.apache.atlas.model.instance.AtlasEntityHeader; +import org.apache.atlas.model.instance.EntityMutationResponse; +import org.apache.atlas.model.typedef.AtlasEntityDef; +import org.apache.atlas.pc.WorkItemBuilder; +import org.apache.atlas.pc.WorkItemConsumer; +import org.apache.atlas.pc.WorkItemManager; +import org.apache.atlas.repository.graphdb.AtlasGraph; +import org.apache.atlas.repository.graphdb.AtlasIndexQuery.Result; +import org.apache.atlas.repository.graphdb.AtlasVertex; +import org.apache.atlas.repository.store.graph.AtlasEntityStore; +import org.apache.atlas.repository.store.graph.v1.DeleteHandlerV1; +import org.apache.atlas.repository.store.graph.v2.AtlasGraphUtilsV2; +import org.apache.atlas.service.Service; +import org.apache.atlas.type.AtlasTypeRegistry; +import org.apache.atlas.utils.AtlasPerfTracer; +import org.apache.commons.collections.CollectionUtils; +import org.apache.commons.configuration.Configuration; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; +import org.springframework.core.annotation.Order; +import org.springframework.stereotype.Component; + +import javax.inject.Inject; +import java.util.*; +import java.util.concurrent.BlockingQueue; +import java.util.stream.Collectors; + +import static org.apache.atlas.discovery.SearchProcessor.AND_STR; +import static org.apache.atlas.model.instance.EntityMutations.EntityOperation.PURGE; +import static org.apache.atlas.repository.Constants.*; + +@AtlasService +@Order(9) +@Component +public class PurgeService implements Service { + private static final Logger LOG = LoggerFactory.getLogger(PurgeService.class); + private static final Logger PERF_LOG = AtlasPerfTracer.getPerfLogger("service.Purge"); + private final AtlasGraph atlasGraph; + private static Configuration atlasProperties; + private final AtlasEntityStore entityStore; + private final AtlasTypeRegistry typeRegistry; + + private static final String ENABLE_PROCESS_SOFT_DELETION = "atlas.enable.process.soft.delete"; + private static final boolean ENABLE_PROCESS_SOFT_DELETION_DEFAULT = false; + private static final String PURGE_ENABLED_SERVICE_TYPES = "atlas.purge.enabled.services"; + private static final String SOFT_DELETE_ENABLED_PROCESS_TYPES = "atlas.soft.delete.enabled.process.types"; + private static final String PURGE_BATCH_SIZE = "atlas.purge.batch.size"; + private static final int DEFAULT_PURGE_BATCH_SIZE = 1000; // fetching limit at a time + private static final String PURGE_WORKER_BATCH_SIZE = "atlas.purge.worker.batch.size"; + private static final int DEFAULT_PURGE_WORKER_BATCH_SIZE = 100; + private static final String CLEANUP_WORKER_BATCH_SIZE = "atlas.cleanup.worker.batch.size"; + private static final int DEFAULT_CLEANUP_WORKER_BATCH_SIZE = 100; + private static final String PURGE_RETENTION_PERIOD = "atlas.purge.deleted.entity.retention.days"; + private static final int PURGE_RETENTION_PERIOD_DEFAULT = 30;// days + private static final String PURGE_WORKERS_COUNT = "atlas.purge.workers.count"; + private static final int DEFAULT_PURGE_WORKERS_COUNT = 2; + private static final String CLEANUP_WORKERS_COUNT = "atlas.cleanup.workers.count"; + private static final int DEFAULT_CLEANUP_WORKERS_COUNT = 2; + private static final String PROCESS_ENTITY_CLEANER_THREAD_NAME = "Process-Entity-Cleaner"; + private final String indexSearchPrefix = AtlasGraphUtilsV2.getIndexSearchPrefix(); + private static final int DEFAULT_CLEANUP_BATCH_SIZE = 1000; + private static final String CLEANUP_WORKERS_NAME = "Process-Cleanup-Worker"; + private static final String PURGE_WORKERS_NAME = "Entity-Purge-Worker"; + private final static String DELETED = "DELETED"; + private final static String ACTIVE = "ACTIVE"; + + + static { + try { + atlasProperties = ApplicationProperties.get(); + } catch (Exception e) { + LOG.info("Failed to load application properties", e); + } + } + + @Inject + public PurgeService(AtlasGraph atlasgraph, AtlasEntityStore entityStore, AtlasTypeRegistry typeRegistry){ + this.atlasGraph = atlasgraph; + this.entityStore = entityStore; + this.typeRegistry = typeRegistry; + } + + @Override + public void start() throws AtlasException { + if(!getSoftDeletionFlag()) { + LOG.info("==> cleanup not enabled"); + return ; + } + + LOG.info("==> PurgeService.start()"); + + launchCleanUp(); + + LOG.info("<== Launched the clean up thread"); + } + + @Override + public void stop() throws AtlasException { + LOG.info("==> stopping the purge service"); + } + + public void launchCleanUp() { + LOG.info("==> launching the new thread"); + + Thread thread = new Thread( + () -> { + long startTime = System.currentTimeMillis(); + LOG.info("==> {} started", PROCESS_ENTITY_CLEANER_THREAD_NAME); + softDeleteProcessEntities(); + LOG.info("==> exiting thread {}", PROCESS_ENTITY_CLEANER_THREAD_NAME); + long endTime = System.currentTimeMillis(); + LOG.info("==> completed cleanup {} seconds !", (endTime-startTime)/1000); + } + ); + + thread.setName(PROCESS_ENTITY_CLEANER_THREAD_NAME); + thread.start(); + LOG.info("==> launched the thread for the clean up"); + } + + @SuppressWarnings("unchecked") + @Timed + public EntityMutationResponse purgeEntities() { + LOG.info("==> PurgeService.purgeEntities()"); + // index query of specific batch size + AtlasPerfTracer perf = null; + EntityMutationResponse entityMutationResponse = new EntityMutationResponse(); + RequestContext requestContext = RequestContext.get(); + requestContext.setDeleteType(DeleteType.HARD);// hard delete + requestContext.setPurgeRequested(true); + + try { + + if (AtlasPerfTracer.isPerfTraceEnabled(PERF_LOG)) { + perf = AtlasPerfTracer.getPerfTracer(PERF_LOG, "PurgeService.purgeEntities"); + } + + Set<String> allEligibleTypes = getEntityTypes(); + + try { + //bring n number of entities like 1000 at point of type Processes + WorkItemsQualifier wiq = createQualifier(typeRegistry, entityStore, atlasGraph, getPurgeWorkerBatchSize(), getPurgeWorkersCount(), true); + + String indexQuery = getBulkQueryString(allEligibleTypes, getPurgeRetentionPeriod()); + Iterator<Result> itr = atlasGraph.indexQuery(VERTEX_INDEX, indexQuery).vertices(0, getPurgeBatchSize()); + LOG.info("==> fetched Deleted entities"); + + if (!itr.hasNext()) { + LOG.info("==> no Purge Entities found"); + return entityMutationResponse; + } + + Set<String> producedDeletionCandidates = new HashSet<>(); // look up + + while (itr.hasNext()) { + AtlasVertex vertex = itr.next().getVertex(); + + if (vertex == null) { + continue; + } + + String guid = vertex.getProperty(GUID_PROPERTY_KEY, String.class); + + if (!producedDeletionCandidates.contains(guid)) { + Set<String> instanceVertex = new HashSet<>(); + instanceVertex.add(guid); + + Set<AtlasVertex> deletionCandidates = entityStore.accumulateDeletionCandidates(instanceVertex); + Review Comment: it's risky because we don't know , like how many entities in total are there for purge after accumulation. Suppose the fetched entities are tables then we will accumulate its composite as well as upstream entities which can lead to the significant rise in the number of entities.If something goes wrong the whole job will get fail without any results.That chances are very less in doing it individually , as accumulated entities in a single time will not be that large. -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: [email protected] For queries about this service, please contact Infrastructure at: [email protected]
