rkundam commented on code in PR #406:
URL: https://github.com/apache/atlas/pull/406#discussion_r2317155085
##########
repository/src/main/java/org/apache/atlas/repository/store/graph/v2/AtlasEntityStoreV2.java:
##########
@@ -577,6 +577,53 @@ public EntityMutationResponse purgeByIds(Set<String>
guids) throws AtlasBaseExce
return ret;
}
+ @Override
+ @GraphTransaction
+ public EntityMutationResponse
schedulePurgeEntities(Set<String>purgeCandidates) throws AtlasBaseException {
+ LOG.info("==> schedulePurgeEntities()");
+
+ Collection<AtlasVertex>purgeVertices = new ArrayList<>();
+ EntityMutationResponse response = new EntityMutationResponse();
+
+ RequestContext requestContext = RequestContext.get();
+ requestContext.setDeleteType(DeleteType.HARD);// hard deleter
+ requestContext.setPurgeRequested(true);
+
+ for (String guid : purgeCandidates) {
+ AtlasVertex vertex = AtlasGraphUtilsV2.findByGuid(graph, guid);
+ if( vertex != null ) {
+ AtlasEntityHeader entityHeader =
entityRetriever.toAtlasEntityHeader(vertex);
Review Comment:
Fetching EntityHeader info for all input GUIDs might be expensive. Did you
evaluate whether this is necessary?
##########
repository/src/main/java/org/apache/atlas/repository/store/graph/v1/DeleteHandlerV1.java:
##########
@@ -183,15 +179,147 @@ public void deleteEntities(Collection<AtlasVertex>
instanceVertices) throws Atla
requestContext.recordEntityDelete(entityHeader);
deletionCandidateVertices.add(vertexInfo.getVertex());
}
+
+ AtlasEntityHeader entity =
entityRetriever.toAtlasEntityHeader(instanceVertex);
+ AtlasEntityType entityType =
typeRegistry.getEntityTypeByName(entity.getTypeName());
+
+ if (entityType.getEntityDef().hasSuperType(ATLAS_TYPE_DATASET)) {
+ addUpstreamProcessEntities(instanceVertex,
deletionCandidateVertices, instanceVertexGuids);
+ }
+
+ if (entityType.getEntityDef().hasSuperType(ATLAS_TYPE_PROCESS)) {
+ getColumnLineageEntities(instanceVertex,
deletionCandidateVertices);
+ }
}
+ return deletionCandidateVertices;
+ }
- // Delete traits and vertices.
+ /*
+ actually delete traits and then the vertex along its references
+ */
+ public void deleteTraitsAndVertices(Collection<AtlasVertex>
deletionCandidateVertices) throws AtlasBaseException {
for (AtlasVertex deletionCandidateVertex : deletionCandidateVertices) {
deleteAllClassifications(deletionCandidateVertex);
deleteTypeVertex(deletionCandidateVertex,
isInternalType(deletionCandidateVertex));
}
}
+ public void addUpstreamProcessEntities(AtlasVertex entityVertex,
Set<AtlasVertex> deletionCandidateVertices, Set<String>instanceVertexGuids)
throws AtlasBaseException {
+ RequestContext requestContext = RequestContext.get();
+
+ Iterator<AtlasEdge> edgeIterator =
GraphHelper.getIncomingEdgesByLabel(entityVertex, PROCESS_OUTPUTS_EDGE);
+
+ String entityVertexGuid = entityVertex.getProperty(GUID_PROPERTY_KEY,
String.class);
+
+ while (edgeIterator.hasNext()) {
+ AtlasEdge edge = edgeIterator.next();
+ AtlasVertex processVertex = edge.getOutVertex();
+
+ String guid = processVertex.getProperty(GUID_PROPERTY_KEY,
String.class);
+ if (instanceVertexGuids.contains(guid)) {
+ return ; // already added
+ }
+
+ boolean isEligible = isEligible(processVertex, entityVertexGuid,
instanceVertexGuids);
+
+ if (isEligible) {
+ instanceVertexGuids.add(guid);
+
+ getColumnLineageEntities(processVertex,
deletionCandidateVertices);
+
+ for (GraphHelper.VertexInfo vertexInfo :
getOwnedVertices(processVertex)) {
+ AtlasEntityHeader entityHeader = vertexInfo.getEntity();
+
+ if (requestContext.isPurgeRequested()) {
+
entityHeader.setClassifications(entityRetriever.getAllClassifications(vertexInfo.getVertex()));
+ }
+
+ requestContext.recordEntityDelete(entityHeader);
+ deletionCandidateVertices.add(vertexInfo.getVertex());
+ }
+ }
+ }
+ }
+
+ public void getColumnLineageEntities(AtlasVertex process, Set<AtlasVertex>
deletionCandidateVertices) throws AtlasBaseException {
+ LOG.info("==> getColumnLineageEntities");
+ RequestContext requestContext = RequestContext.get();
+
+ AtlasEntityHeader entity =
entityRetriever.toAtlasEntityHeader(process);
+ AtlasEntityType entityType =
typeRegistry.getEntityTypeByName(entity.getTypeName());
+ Map<String, Map<String, AtlasAttribute>> relationshipAttributes =
entityType.getRelationshipAttributes();
+ Map<String,AtlasAttribute> columnLineages =
relationshipAttributes.get(RELATIONSHIP_ATTRIBUTE_KEY_STRING);
+
+ if (columnLineages != null && !columnLineages.isEmpty()) {
+ AtlasAttribute atlasAttribute=
columnLineages.values().iterator().next();
+ String relationshipEdgeLabel =
atlasAttribute.getRelationshipEdgeLabel();
+
+ Iterator<AtlasEdge>edgeIterator =
GraphHelper.getIncomingEdgesByLabel(process, relationshipEdgeLabel);
+
+ while (edgeIterator.hasNext()) {
+ AtlasVertex columnLineageVertex =
edgeIterator.next().getOutVertex();
+ String typeName =
columnLineageVertex.getProperty(TYPE_NAME_PROPERTY_KEY, String.class);
+ LOG.info("typeName {} ", typeName);
Review Comment:
Update the comment with more meaningful and descriptive information.
##########
repository/src/main/java/org/apache/atlas/services/PurgeService.java:
##########
@@ -0,0 +1,532 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ * <p>
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * <p>
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.atlas.services;
+
+import org.apache.atlas.ApplicationProperties;
+import org.apache.atlas.AtlasException;
+import org.apache.atlas.DeleteType;
+import org.apache.atlas.RequestContext;
+import org.apache.atlas.annotation.AtlasService;
+import org.apache.atlas.annotation.Timed;
+import org.apache.atlas.model.instance.AtlasEntityHeader;
+import org.apache.atlas.model.instance.EntityMutationResponse;
+import org.apache.atlas.model.typedef.AtlasEntityDef;
+import org.apache.atlas.pc.WorkItemBuilder;
+import org.apache.atlas.pc.WorkItemConsumer;
+import org.apache.atlas.pc.WorkItemManager;
+import org.apache.atlas.repository.graphdb.AtlasGraph;
+import org.apache.atlas.repository.graphdb.AtlasIndexQuery.Result;
+import org.apache.atlas.repository.graphdb.AtlasVertex;
+import org.apache.atlas.repository.store.graph.AtlasEntityStore;
+import org.apache.atlas.repository.store.graph.v1.DeleteHandlerV1;
+import org.apache.atlas.repository.store.graph.v2.AtlasGraphUtilsV2;
+import org.apache.atlas.service.Service;
+import org.apache.atlas.type.AtlasTypeRegistry;
+import org.apache.atlas.utils.AtlasPerfTracer;
+import org.apache.commons.collections.CollectionUtils;
+import org.apache.commons.configuration.Configuration;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+import org.springframework.core.annotation.Order;
+import org.springframework.stereotype.Component;
+
+import javax.inject.Inject;
+import java.util.*;
+import java.util.concurrent.BlockingQueue;
+import java.util.stream.Collectors;
+
+import static org.apache.atlas.discovery.SearchProcessor.AND_STR;
+import static
org.apache.atlas.model.instance.EntityMutations.EntityOperation.PURGE;
+import static org.apache.atlas.repository.Constants.*;
+
+@AtlasService
+@Order(9)
+@Component
+public class PurgeService implements Service {
+ private static final Logger LOG =
LoggerFactory.getLogger(PurgeService.class);
+ private static final Logger PERF_LOG =
AtlasPerfTracer.getPerfLogger("service.Purge");
+ private final AtlasGraph atlasGraph;
+ private static Configuration atlasProperties;
+ private final AtlasEntityStore entityStore;
+ private final AtlasTypeRegistry typeRegistry;
+
+ private static final String ENABLE_PROCESS_SOFT_DELETION =
"atlas.enable.process.soft.delete";
+ private static final boolean ENABLE_PROCESS_SOFT_DELETION_DEFAULT = false;
+ private static final String PURGE_ENABLED_SERVICE_TYPES =
"atlas.purge.enabled.services";
+ private static final String SOFT_DELETE_ENABLED_PROCESS_TYPES =
"atlas.soft.delete.enabled.process.types";
+ private static final String PURGE_BATCH_SIZE =
"atlas.purge.batch.size";
+ private static final int DEFAULT_PURGE_BATCH_SIZE = 1000;
// fetching limit at a time
+ private static final String PURGE_WORKER_BATCH_SIZE =
"atlas.purge.worker.batch.size";
+ private static final int DEFAULT_PURGE_WORKER_BATCH_SIZE = 100;
+ private static final String CLEANUP_WORKER_BATCH_SIZE =
"atlas.cleanup.worker.batch.size";
+ private static final int DEFAULT_CLEANUP_WORKER_BATCH_SIZE = 100;
+ private static final String PURGE_RETENTION_PERIOD =
"atlas.purge.deleted.entity.retention.days";
+ private static final int PURGE_RETENTION_PERIOD_DEFAULT = 30;//
days
+ private static final String PURGE_WORKERS_COUNT =
"atlas.purge.workers.count";
+ private static final int DEFAULT_PURGE_WORKERS_COUNT = 2;
+ private static final String CLEANUP_WORKERS_COUNT =
"atlas.cleanup.workers.count";
+ private static final int DEFAULT_CLEANUP_WORKERS_COUNT = 2;
+ private static final String PROCESS_ENTITY_CLEANER_THREAD_NAME =
"Process-Entity-Cleaner";
+ private final String indexSearchPrefix =
AtlasGraphUtilsV2.getIndexSearchPrefix();
+ private static final int DEFAULT_CLEANUP_BATCH_SIZE = 1000;
+ private static final String CLEANUP_WORKERS_NAME =
"Process-Cleanup-Worker";
+ private static final String PURGE_WORKERS_NAME =
"Entity-Purge-Worker";
+ private final static String DELETED =
"DELETED";
+ private final static String ACTIVE =
"ACTIVE";
+
+
+ static {
+ try {
+ atlasProperties = ApplicationProperties.get();
+ } catch (Exception e) {
+ LOG.info("Failed to load application properties", e);
+ }
+ }
+
+ @Inject
+ public PurgeService(AtlasGraph atlasgraph, AtlasEntityStore entityStore,
AtlasTypeRegistry typeRegistry){
+ this.atlasGraph = atlasgraph;
+ this.entityStore = entityStore;
+ this.typeRegistry = typeRegistry;
+ }
+
+ @Override
+ public void start() throws AtlasException {
+ if(!getSoftDeletionFlag()) {
+ LOG.info("==> cleanup not enabled");
+ return ;
+ }
+
+ LOG.info("==> PurgeService.start()");
+
+ launchCleanUp();
+
+ LOG.info("<== Launched the clean up thread");
+ }
+
+ @Override
+ public void stop() throws AtlasException {
+ LOG.info("==> stopping the purge service");
+ }
+
+ public void launchCleanUp() {
+ LOG.info("==> launching the new thread");
+
+ Thread thread = new Thread(
+ () -> {
+ long startTime = System.currentTimeMillis();
+ LOG.info("==> {} started",
PROCESS_ENTITY_CLEANER_THREAD_NAME);
+ softDeleteProcessEntities();
+ LOG.info("==> exiting thread {}",
PROCESS_ENTITY_CLEANER_THREAD_NAME);
+ long endTime = System.currentTimeMillis();
+ LOG.info("==> completed cleanup {} seconds !",
(endTime-startTime)/1000);
+ }
+ );
+
+ thread.setName(PROCESS_ENTITY_CLEANER_THREAD_NAME);
+ thread.start();
+ LOG.info("==> launched the thread for the clean up");
+ }
+
+ @SuppressWarnings("unchecked")
+ @Timed
+ public EntityMutationResponse purgeEntities() {
+ LOG.info("==> PurgeService.purgeEntities()");
+ // index query of specific batch size
+ AtlasPerfTracer perf = null;
+ EntityMutationResponse entityMutationResponse = new
EntityMutationResponse();
+ RequestContext requestContext = RequestContext.get();
+ requestContext.setDeleteType(DeleteType.HARD);// hard delete
+ requestContext.setPurgeRequested(true);
+
+ try {
+
+ if (AtlasPerfTracer.isPerfTraceEnabled(PERF_LOG)) {
+ perf = AtlasPerfTracer.getPerfTracer(PERF_LOG,
"PurgeService.purgeEntities");
+ }
+
+ Set<String> allEligibleTypes = getEntityTypes();
+
+ try {
+ //bring n number of entities like 1000 at point of type
Processes
+ WorkItemsQualifier wiq = createQualifier(typeRegistry,
entityStore, atlasGraph, getPurgeWorkerBatchSize(), getPurgeWorkersCount(),
true);
+
+ String indexQuery = getBulkQueryString(allEligibleTypes,
getPurgeRetentionPeriod());
+ Iterator<Result> itr = atlasGraph.indexQuery(VERTEX_INDEX,
indexQuery).vertices(0, getPurgeBatchSize());
+ LOG.info("==> fetched Deleted entities");
+
+ if (!itr.hasNext()) {
+ LOG.info("==> no Purge Entities found");
+ return entityMutationResponse;
+ }
+
+ Set<String> producedDeletionCandidates = new HashSet<>(); //
look up
+
+ while (itr.hasNext()) {
+ AtlasVertex vertex = itr.next().getVertex();
+
+ if (vertex == null) {
+ continue;
+ }
+
+ String guid = vertex.getProperty(GUID_PROPERTY_KEY,
String.class);
+
+ if (!producedDeletionCandidates.contains(guid)) {
+ Set<String> instanceVertex = new HashSet<>();
+ instanceVertex.add(guid);
+
+ Set<AtlasVertex> deletionCandidates =
entityStore.accumulateDeletionCandidates(instanceVertex);
+
Review Comment:
Right now, accumulateDeletionCandidates is being called once per GUID inside
the loop, which may be inefficient. Since accumulateDeletionCandidates supports
batch input, we could collect all eligible GUIDs into a set and make a single
call outside the loop. This would reduce repeated calls and improve
performance. If there’s a specific reason it’s implemented this way, please
clarify.
##########
repository/src/main/java/org/apache/atlas/repository/store/graph/v1/DeleteHandlerV1.java:
##########
@@ -77,17 +68,8 @@
import static org.apache.atlas.model.instance.AtlasEntity.Status.DELETED;
import static org.apache.atlas.model.instance.AtlasEntity.Status.PURGED;
import static
org.apache.atlas.model.typedef.AtlasRelationshipDef.PropagateTags.ONE_TO_TWO;
-import static
org.apache.atlas.repository.Constants.CLASSIFICATION_EDGE_NAME_PROPERTY_KEY;
-import static
org.apache.atlas.repository.Constants.CLASSIFICATION_ENTITY_STATUS;
-import static org.apache.atlas.repository.Constants.CLASSIFICATION_LABEL;
-import static
org.apache.atlas.repository.Constants.CLASSIFICATION_NAME_DELIMITER;
-import static
org.apache.atlas.repository.Constants.EDGE_PENDING_TASKS_PROPERTY_KEY;
-import static
org.apache.atlas.repository.Constants.MODIFICATION_TIMESTAMP_PROPERTY_KEY;
-import static org.apache.atlas.repository.Constants.MODIFIED_BY_KEY;
-import static
org.apache.atlas.repository.Constants.PROPAGATED_CLASSIFICATION_NAMES_KEY;
-import static
org.apache.atlas.repository.Constants.PROPAGATED_TRAIT_NAMES_PROPERTY_KEY;
-import static
org.apache.atlas.repository.Constants.RELATIONSHIPTYPE_TAG_PROPAGATION_KEY;
-import static
org.apache.atlas.repository.Constants.RELATIONSHIP_GUID_PROPERTY_KEY;
+import static org.apache.atlas.repository.Constants.*;
Review Comment:
To follow checkstyle coding guidelines, please avoid using wildcard imports
(*). Import only the required classes explicitly.
##########
repository/src/main/java/org/apache/atlas/services/PurgeService.java:
##########
@@ -0,0 +1,532 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ * <p>
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * <p>
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.atlas.services;
+
+import org.apache.atlas.ApplicationProperties;
+import org.apache.atlas.AtlasException;
+import org.apache.atlas.DeleteType;
+import org.apache.atlas.RequestContext;
+import org.apache.atlas.annotation.AtlasService;
+import org.apache.atlas.annotation.Timed;
+import org.apache.atlas.model.instance.AtlasEntityHeader;
+import org.apache.atlas.model.instance.EntityMutationResponse;
+import org.apache.atlas.model.typedef.AtlasEntityDef;
+import org.apache.atlas.pc.WorkItemBuilder;
+import org.apache.atlas.pc.WorkItemConsumer;
+import org.apache.atlas.pc.WorkItemManager;
+import org.apache.atlas.repository.graphdb.AtlasGraph;
+import org.apache.atlas.repository.graphdb.AtlasIndexQuery.Result;
+import org.apache.atlas.repository.graphdb.AtlasVertex;
+import org.apache.atlas.repository.store.graph.AtlasEntityStore;
+import org.apache.atlas.repository.store.graph.v1.DeleteHandlerV1;
+import org.apache.atlas.repository.store.graph.v2.AtlasGraphUtilsV2;
+import org.apache.atlas.service.Service;
+import org.apache.atlas.type.AtlasTypeRegistry;
+import org.apache.atlas.utils.AtlasPerfTracer;
+import org.apache.commons.collections.CollectionUtils;
+import org.apache.commons.configuration.Configuration;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+import org.springframework.core.annotation.Order;
+import org.springframework.stereotype.Component;
+
+import javax.inject.Inject;
+import java.util.*;
+import java.util.concurrent.BlockingQueue;
+import java.util.stream.Collectors;
+
+import static org.apache.atlas.discovery.SearchProcessor.AND_STR;
+import static
org.apache.atlas.model.instance.EntityMutations.EntityOperation.PURGE;
+import static org.apache.atlas.repository.Constants.*;
+
+@AtlasService
+@Order(9)
+@Component
+public class PurgeService implements Service {
+ private static final Logger LOG =
LoggerFactory.getLogger(PurgeService.class);
+ private static final Logger PERF_LOG =
AtlasPerfTracer.getPerfLogger("service.Purge");
+ private final AtlasGraph atlasGraph;
+ private static Configuration atlasProperties;
+ private final AtlasEntityStore entityStore;
+ private final AtlasTypeRegistry typeRegistry;
+
+ private static final String ENABLE_PROCESS_SOFT_DELETION =
"atlas.enable.process.soft.delete";
+ private static final boolean ENABLE_PROCESS_SOFT_DELETION_DEFAULT = false;
+ private static final String PURGE_ENABLED_SERVICE_TYPES =
"atlas.purge.enabled.services";
+ private static final String SOFT_DELETE_ENABLED_PROCESS_TYPES =
"atlas.soft.delete.enabled.process.types";
+ private static final String PURGE_BATCH_SIZE =
"atlas.purge.batch.size";
+ private static final int DEFAULT_PURGE_BATCH_SIZE = 1000;
// fetching limit at a time
+ private static final String PURGE_WORKER_BATCH_SIZE =
"atlas.purge.worker.batch.size";
+ private static final int DEFAULT_PURGE_WORKER_BATCH_SIZE = 100;
+ private static final String CLEANUP_WORKER_BATCH_SIZE =
"atlas.cleanup.worker.batch.size";
+ private static final int DEFAULT_CLEANUP_WORKER_BATCH_SIZE = 100;
+ private static final String PURGE_RETENTION_PERIOD =
"atlas.purge.deleted.entity.retention.days";
+ private static final int PURGE_RETENTION_PERIOD_DEFAULT = 30;//
days
+ private static final String PURGE_WORKERS_COUNT =
"atlas.purge.workers.count";
+ private static final int DEFAULT_PURGE_WORKERS_COUNT = 2;
+ private static final String CLEANUP_WORKERS_COUNT =
"atlas.cleanup.workers.count";
+ private static final int DEFAULT_CLEANUP_WORKERS_COUNT = 2;
+ private static final String PROCESS_ENTITY_CLEANER_THREAD_NAME =
"Process-Entity-Cleaner";
+ private final String indexSearchPrefix =
AtlasGraphUtilsV2.getIndexSearchPrefix();
+ private static final int DEFAULT_CLEANUP_BATCH_SIZE = 1000;
+ private static final String CLEANUP_WORKERS_NAME =
"Process-Cleanup-Worker";
+ private static final String PURGE_WORKERS_NAME =
"Entity-Purge-Worker";
+ private final static String DELETED =
"DELETED";
+ private final static String ACTIVE =
"ACTIVE";
+
+
+ static {
+ try {
+ atlasProperties = ApplicationProperties.get();
+ } catch (Exception e) {
+ LOG.info("Failed to load application properties", e);
+ }
+ }
+
+ @Inject
+ public PurgeService(AtlasGraph atlasgraph, AtlasEntityStore entityStore,
AtlasTypeRegistry typeRegistry){
+ this.atlasGraph = atlasgraph;
+ this.entityStore = entityStore;
+ this.typeRegistry = typeRegistry;
+ }
+
+ @Override
+ public void start() throws AtlasException {
+ if(!getSoftDeletionFlag()) {
+ LOG.info("==> cleanup not enabled");
+ return ;
+ }
+
+ LOG.info("==> PurgeService.start()");
+
+ launchCleanUp();
+
+ LOG.info("<== Launched the clean up thread");
+ }
+
+ @Override
+ public void stop() throws AtlasException {
+ LOG.info("==> stopping the purge service");
+ }
+
+ public void launchCleanUp() {
+ LOG.info("==> launching the new thread");
+
+ Thread thread = new Thread(
+ () -> {
+ long startTime = System.currentTimeMillis();
+ LOG.info("==> {} started",
PROCESS_ENTITY_CLEANER_THREAD_NAME);
+ softDeleteProcessEntities();
+ LOG.info("==> exiting thread {}",
PROCESS_ENTITY_CLEANER_THREAD_NAME);
+ long endTime = System.currentTimeMillis();
+ LOG.info("==> completed cleanup {} seconds !",
(endTime-startTime)/1000);
+ }
+ );
+
+ thread.setName(PROCESS_ENTITY_CLEANER_THREAD_NAME);
+ thread.start();
+ LOG.info("==> launched the thread for the clean up");
+ }
+
+ @SuppressWarnings("unchecked")
+ @Timed
+ public EntityMutationResponse purgeEntities() {
+ LOG.info("==> PurgeService.purgeEntities()");
+ // index query of specific batch size
+ AtlasPerfTracer perf = null;
+ EntityMutationResponse entityMutationResponse = new
EntityMutationResponse();
+ RequestContext requestContext = RequestContext.get();
+ requestContext.setDeleteType(DeleteType.HARD);// hard delete
+ requestContext.setPurgeRequested(true);
+
+ try {
+
+ if (AtlasPerfTracer.isPerfTraceEnabled(PERF_LOG)) {
+ perf = AtlasPerfTracer.getPerfTracer(PERF_LOG,
"PurgeService.purgeEntities");
+ }
+
+ Set<String> allEligibleTypes = getEntityTypes();
+
+ try {
+ //bring n number of entities like 1000 at point of type
Processes
+ WorkItemsQualifier wiq = createQualifier(typeRegistry,
entityStore, atlasGraph, getPurgeWorkerBatchSize(), getPurgeWorkersCount(),
true);
+
+ String indexQuery = getBulkQueryString(allEligibleTypes,
getPurgeRetentionPeriod());
+ Iterator<Result> itr = atlasGraph.indexQuery(VERTEX_INDEX,
indexQuery).vertices(0, getPurgeBatchSize());
+ LOG.info("==> fetched Deleted entities");
+
+ if (!itr.hasNext()) {
+ LOG.info("==> no Purge Entities found");
+ return entityMutationResponse;
+ }
+
+ Set<String> producedDeletionCandidates = new HashSet<>(); //
look up
+
+ while (itr.hasNext()) {
+ AtlasVertex vertex = itr.next().getVertex();
+
+ if (vertex == null) {
+ continue;
+ }
+
+ String guid = vertex.getProperty(GUID_PROPERTY_KEY,
String.class);
+
+ if (!producedDeletionCandidates.contains(guid)) {
+ Set<String> instanceVertex = new HashSet<>();
+ instanceVertex.add(guid);
+
+ Set<AtlasVertex> deletionCandidates =
entityStore.accumulateDeletionCandidates(instanceVertex);
+
+ for (AtlasVertex deletionCandidate :
deletionCandidates) {
+ String deletionCandidateGuid =
deletionCandidate.getProperty(GUID_PROPERTY_KEY, String.class);
+ if
(!producedDeletionCandidates.contains(deletionCandidateGuid)) {
+
producedDeletionCandidates.add(deletionCandidateGuid);
+ wiq.checkProduce(deletionCandidate);
+ }
+ }
+ }
+ }
+
+ wiq.shutdown();
+
+ // collecting all the results
+ Queue results = wiq.getResults();
+
+ LOG.info("==> Purged {} !", results.size());
+
+ while (!results.isEmpty()) {
+ AtlasEntityHeader entityHeader = (AtlasEntityHeader)
results.poll();
+ if (entityHeader == null) {
+ continue;
+ }
+ entityMutationResponse.addEntity(PURGE, entityHeader);
+ }
+
+ } catch (Exception ex) {
+ LOG.error("purge: failed!", ex);
+ } finally {
+ LOG.info("purge: Done!");
+ }
+ } finally {
+ AtlasPerfTracer.log(perf);
+ }
+
+ LOG.info("<== PurgeService.purgeEntities()");
+
+ return entityMutationResponse;
+ }
+
+ @SuppressWarnings("unchecked")
+ @Timed
+ public void softDeleteProcessEntities() {
+ LOG.info("==> softDeleteProcessEntities()");
+
+ AtlasPerfTracer perf = null;
+ try {
+
+ if (AtlasPerfTracer.isPerfTraceEnabled(PERF_LOG)) {
+ perf = AtlasPerfTracer.getPerfTracer(PERF_LOG,
"PurgeService.softDeleteProcessEntities");
+ }
+
+ Set<String> validProcessTypes = getProcessTypes();
+ try {
+ //bring n number of entities like 1000 at point of type
Processes
+ WorkItemsQualifier wiq = createQualifier(typeRegistry,
entityStore, atlasGraph, getCleanupWorkerBatchSize(), getCleanUpWorkersCount(),
false);
+ int offset = 0;
+ boolean moreResults = true;
+
+ while (moreResults) {
+ String indexQuery = getBulkQueryString(validProcessTypes,
0);
+ Iterator<Result> itr = atlasGraph.indexQuery(VERTEX_INDEX,
indexQuery).vertices(offset, DEFAULT_CLEANUP_BATCH_SIZE);
+ LOG.info("==> fetched entities");
+
+ if (!itr.hasNext()) {
+ moreResults = false;
+ }
+
+ while (itr.hasNext()) {
+ AtlasVertex vertex = itr.next().getVertex();
+ if (vertex != null) {
+ wiq.checkProduce(vertex);
+ }
+ }
+
+ offset += DEFAULT_CLEANUP_BATCH_SIZE;
+ LOG.info("==> offset {}", offset);
+ }
+
+ wiq.shutdown();
+ } catch (Exception ex) {
+ LOG.error("cleanUp: failed!", ex);
+ } finally {
+ LOG.info("cleanUp: Done!");
+ }
+ }finally {
+ AtlasPerfTracer.log(perf);
+ }
+
+ LOG.info("<== softDeleteProcessEntities()");
+ }
+
+ static class EntityQualifier extends WorkItemConsumer<AtlasVertex> {
+ private final Set<String> batch = new HashSet<>();
+ private final AtlasEntityStore entityStore;
+ private final AtlasTypeRegistry typeRegistry;
+ private final AtlasGraph atlasGraph;
+ private final boolean isPurgeEnabled;
+ private int batchesProcessed;
+ private int batchSize;
+
+ public EntityQualifier(BlockingQueue<AtlasVertex> queue,
AtlasTypeRegistry typeRegistry, AtlasEntityStore entityStore, AtlasGraph
atlasGraph, boolean isPurgeEnabled, int batchSize){
+ super(queue);
+ this.typeRegistry = typeRegistry;
+ this.entityStore = entityStore;
+ this.atlasGraph = atlasGraph;
+ this.isPurgeEnabled = isPurgeEnabled;
+ this.batchesProcessed = 0;
+ this.batchSize = batchSize;
+
+ if (isPurgeEnabled) {
+ LOG.info("==> consumers are purge enabled , batch size is {}",
batchSize);
+ } else {
+ LOG.info("==> consumers are soft delete enabled , batch size
is {}", batchSize);
+ }
+ }
+
+ @Override
+ protected void processItem(AtlasVertex vertex) {
+ String guid = vertex.getProperty(GUID_PROPERTY_KEY, String.class);
+ LOG.info("==> processing the entity {}", guid);
+
+ try {
+ if (!isPurgeEnabled && !isEligible(vertex)) {
Review Comment:
In the condition if (!isPurgeEnabled && !isEligible(vertex)), it seems we
only skip processing when both are false. If the intent is to skip when either
isPurgeEnabled is false or isEligible(vertex) is false, this should use ||
instead of &&. Can you confirm the intended logic?
##########
repository/src/main/java/org/apache/atlas/repository/store/graph/v1/DeleteHandlerV1.java:
##########
@@ -57,16 +57,7 @@
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
-import java.util.ArrayList;
-import java.util.Collection;
-import java.util.Collections;
-import java.util.HashMap;
-import java.util.HashSet;
-import java.util.List;
-import java.util.Map;
-import java.util.Objects;
-import java.util.Set;
-import java.util.Stack;
+import java.util.*;
Review Comment:
To follow checkstyle coding guidelines, please avoid using wildcard imports
(*). Import only the required classes explicitly.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]