[ https://issues.apache.org/jira/browse/CLOUDSTACK-9764?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=15878466#comment-15878466 ]
ASF GitHub Bot commented on CLOUDSTACK-9764: -------------------------------------------- Github user rafaelweingartner commented on a diff in the pull request: https://github.com/apache/cloudstack/pull/1935#discussion_r102323551 --- Diff: server/src/com/cloud/user/DomainManagerImpl.java --- @@ -273,82 +289,133 @@ public boolean deleteDomain(long domainId, Boolean cleanup) { @Override public boolean deleteDomain(DomainVO domain, Boolean cleanup) { - // mark domain as inactive - s_logger.debug("Marking domain id=" + domain.getId() + " as " + Domain.State.Inactive + " before actually deleting it"); - domain.setState(Domain.State.Inactive); - _domainDao.update(domain.getId(), domain); - boolean rollBackState = false; - boolean hasDedicatedResources = false; + GlobalLock lock = getGlobalLock("AccountCleanup"); + if (lock == null) { + s_logger.debug("Couldn't get the global lock"); + return false; + } + + if (!lock.lock(30)) { + s_logger.debug("Couldn't lock the db"); + return false; + } try { - long ownerId = domain.getAccountId(); - if ((cleanup != null) && cleanup.booleanValue()) { - if (!cleanupDomain(domain.getId(), ownerId)) { - rollBackState = true; - CloudRuntimeException e = - new CloudRuntimeException("Failed to clean up domain resources and sub domains, delete failed on domain " + domain.getName() + " (id: " + - domain.getId() + ")."); - e.addProxyObject(domain.getUuid(), "domainId"); - throw e; - } - } else { - //don't delete the domain if there are accounts set for cleanup, or non-removed networks exist, or domain has dedicated resources - List<Long> networkIds = _networkDomainDao.listNetworkIdsByDomain(domain.getId()); - List<AccountVO> accountsForCleanup = _accountDao.findCleanupsForRemovedAccounts(domain.getId()); - List<DedicatedResourceVO> dedicatedResources = _dedicatedDao.listByDomainId(domain.getId()); - if (dedicatedResources != null && !dedicatedResources.isEmpty()) { - s_logger.error("There are dedicated resources for the domain " + domain.getId()); - hasDedicatedResources = true; - } - if (accountsForCleanup.isEmpty() && networkIds.isEmpty() && !hasDedicatedResources) { - _messageBus.publish(_name, MESSAGE_PRE_REMOVE_DOMAIN_EVENT, PublishScope.LOCAL, domain); - if (!_domainDao.remove(domain.getId())) { + // mark domain as inactive + s_logger.debug("Marking domain id=" + domain.getId() + " as " + Domain.State.Inactive + " before actually deleting it"); + domain.setState(Domain.State.Inactive); + _domainDao.update(domain.getId(), domain); + + try { + long ownerId = domain.getAccountId(); + if (BooleanUtils.toBoolean(cleanup)) { + if (!cleanupDomain(domain.getId(), ownerId)) { rollBackState = true; CloudRuntimeException e = - new CloudRuntimeException("Delete failed on domain " + domain.getName() + " (id: " + domain.getId() + - "); Please make sure all users and sub domains have been removed from the domain before deleting"); + new CloudRuntimeException("Failed to clean up domain resources and sub domains, delete failed on domain " + domain.getName() + " (id: " + + domain.getId() + ")."); e.addProxyObject(domain.getUuid(), "domainId"); throw e; } - _messageBus.publish(_name, MESSAGE_REMOVE_DOMAIN_EVENT, PublishScope.LOCAL, domain); } else { - rollBackState = true; - String msg = null; - if (!accountsForCleanup.isEmpty()) { - msg = accountsForCleanup.size() + " accounts to cleanup"; - } else if (!networkIds.isEmpty()) { - msg = networkIds.size() + " non-removed networks"; - } else if (hasDedicatedResources) { - msg = "dedicated resources."; - } + checkDomainAccountsNetworksAndResourcesBeforeRemoving(domain); + } - CloudRuntimeException e = new CloudRuntimeException("Can't delete the domain yet because it has " + msg); - e.addProxyObject(domain.getUuid(), "domainId"); - throw e; + cleanupDomainOfferings(domain.getId()); + CallContext.current().putContextParameter(Domain.class, domain.getUuid()); + return true; + } catch (Exception ex) { + s_logger.error("Exception deleting domain with id " + domain.getId(), ex); + if (ex instanceof CloudRuntimeException) + throw (CloudRuntimeException)ex; + else + return false; + } finally { + //when success is false + if (rollBackState) { + s_logger.debug("Changing domain id=" + domain.getId() + " state back to " + Domain.State.Active + + " because it can't be removed due to resources referencing to it"); + domain.setState(Domain.State.Active); + _domainDao.update(domain.getId(), domain); } } + } + finally { + lock.unlock(); + rollBackState = false; + } + } - cleanupDomainOfferings(domain.getId()); - CallContext.current().putContextParameter(Domain.class, domain.getUuid()); - return true; - } catch (Exception ex) { - s_logger.error("Exception deleting domain with id " + domain.getId(), ex); - if (ex instanceof CloudRuntimeException) - throw (CloudRuntimeException)ex; - else - return false; - } finally { - //when success is false - if (rollBackState) { - s_logger.debug("Changing domain id=" + domain.getId() + " state back to " + Domain.State.Active + - " because it can't be removed due to resources referencing to it"); - domain.setState(Domain.State.Active); - _domainDao.update(domain.getId(), domain); - } + /** + * Check domain resources before removing domain. There are 2 cases: + * <ol> + * <li>Domain doesn't have accounts for cleanup, non-removed networks, or dedicated resources</li> + * <ul><li>Delete domain</li></ul> + * <li>Domain has one of the following: accounts set for cleanup, non-removed networks, dedicated resources</li> + * <ul><li>Dont' delete domain</li><li>Fail operation</li></ul> + * </ol> + * @param domain domain to remove + * @throws CloudRuntimeException when case 2 or when domain cannot be deleted on case 1 + */ + protected void checkDomainAccountsNetworksAndResourcesBeforeRemoving(DomainVO domain) { --- End diff -- It is not just a "check/validate" method, right? If the conditions described in the Java doc are met, the domain will be removed. Reading the method name I get the feeling that it just executes the checks before removing. However, at line 370, it seems that the domain removal happens there. > Delete domain failure due to Account Cleanup task > ------------------------------------------------- > > Key: CLOUDSTACK-9764 > URL: https://issues.apache.org/jira/browse/CLOUDSTACK-9764 > Project: CloudStack > Issue Type: Bug > Security Level: Public(Anyone can view this level - this is the > default.) > Affects Versions: 4.10.0.0 > Reporter: Nicolas Vazquez > Assignee: Nicolas Vazquez > Fix For: 4.10.0.0 > > > It was noticed in production environments that {{deleteDomain}} task failed > for domains with multiple accounts and resources. Examining logs it was found > out that if Account Cleanup Task got executed after domain (and all of its > subchilds) got marked as Inactive; and before delete domain task finishes, it > produces a failure. > {{AccountCleanupTask}} gets executed every {{account.cleanup.interval}} > seconds looking for: > * Removed accounts > * Disabled accounts > * Inactive domains > As {{deleteDomain}} marks domain to delete (and its subchilds) as Inactive > before deleting them, when {{AccountCleanupTask}} is executed, it removes > marked domains. When there are resources to cleanup on domain accounts, > domain is not found throwing exception: > {{com.cloud.exception.InvalidParameterValueException: Please specify a valid > domain ID}} > h3. Example > {{account.cleanup.interval}} = 100 > {noformat} > 2017-01-26 06:07:03,621 DEBUG [cloud.api.ApiServlet] > (catalina-exec-8:ctx-50cfa3b6 ctx-92ad5b38) ===END=== 10.39.251.17 -- GET > command=deleteDomain&id=1910a3dc-6fa6-457b-ab3a-602b0cfb6686&cleanup=true&response=json&_=1485439623475 > ... > // Domain and its subchilds marked as Inactive > 2017-01-26 06:07:03,640 DEBUG [cloud.user.DomainManagerImpl] > (API-Job-Executor-29:ctx-23415942 job-7165 ctx-fe3d13d6) Marking domain id=27 > as Inactive before actually deleting it > 2017-01-26 06:07:03,646 DEBUG [cloud.user.DomainManagerImpl] > (API-Job-Executor-29:ctx-23415942 job-7165 ctx-fe3d13d6) Cleaning up domain > id=27 > 2017-01-26 06:07:03,670 DEBUG [cloud.user.DomainManagerImpl] > (API-Job-Executor-29:ctx-23415942 job-7165 ctx-fe3d13d6) Cleaning up domain > id=28 > 2017-01-26 06:07:03,685 DEBUG [cloud.user.DomainManagerImpl] > (API-Job-Executor-29:ctx-23415942 job-7165 ctx-fe3d13d6) Cleaning up domain > id=29 > ... > // AccountCleanupTask removes Inactive domain id=29, no rollback for it > 2017-01-26 06:07:44,285 INFO [cloud.user.AccountManagerImpl] > (AccountChecker-1:ctx-b8a01824) Found 0 removed accounts to cleanup > 2017-01-26 06:07:44,287 INFO [cloud.user.AccountManagerImpl] > (AccountChecker-1:ctx-b8a01824) Found 0 disabled accounts to cleanup > 2017-01-26 06:07:44,289 INFO [cloud.user.AccountManagerImpl] > (AccountChecker-1:ctx-b8a01824) Found 3 inactive domains to cleanup > 2017-01-26 06:07:44,292 DEBUG [cloud.user.AccountManagerImpl] > (AccountChecker-1:ctx-b8a01824) Removing inactive domain id=27 > 2017-01-26 06:07:44,297 DEBUG [db.Transaction.Transaction] > (AccountChecker-1:ctx-b8a01824) Rolling back the transaction: Time = 2 Name = > AccountChecker-1; called by > -TransactionLegacy.rollback:889-TransactionLegacy.removeUpTo:832-TransactionLegacy.close:656-TransactionContextInterceptor.invoke:36-ReflectiveMethodInvocation.proceed:161-ExposeInvocationInterceptor.invoke:91-ReflectiveMethodInvocation.proceed:172-JdkDynamicAopProxy.invoke:204-$Proxy63.remove:-1-DomainManagerImpl.removeDomain:248-NativeMethodAccessorImpl.invoke0:-2-NativeMethodAccessorImpl.invoke:62 > 2017-01-26 06:07:44,301 DEBUG [cloud.user.AccountManagerImpl] > (AccountChecker-1:ctx-b8a01824) Removing inactive domain id=28 > 2017-01-26 06:07:44,304 DEBUG [db.Transaction.Transaction] > (AccountChecker-1:ctx-b8a01824) Rolling back the transaction: Time = 2 Name = > AccountChecker-1; called by > -TransactionLegacy.rollback:889-TransactionLegacy.removeUpTo:832-TransactionLegacy.close:656-TransactionContextInterceptor.invoke:36-ReflectiveMethodInvocation.proceed:161-ExposeInvocationInterceptor.invoke:91-ReflectiveMethodInvocation.proceed:172-JdkDynamicAopProxy.invoke:204-$Proxy63.remove:-1-DomainManagerImpl.removeDomain:248-NativeMethodAccessorImpl.invoke0:-2-NativeMethodAccessorImpl.invoke:62 > 2017-01-26 06:07:44,307 DEBUG [cloud.user.AccountManagerImpl] > (AccountChecker-1:ctx-b8a01824) Removing inactive domain id=29 > 2017-01-26 06:07:44,319 INFO [cloud.user.AccountManagerImpl] > (AccountChecker-1:ctx-b8a01824) Found 0 disabled projects to cleanup > ... > // Failure due to domain is already removed > 2017-01-26 06:07:46,369 WARN [cloud.user.AccountManagerImpl] > (API-Job-Executor-29:ctx-23415942 job-7165 ctx-fe3d13d6) Failed to cleanup > account Acct[6a6e63ad-d89b-4a53-b3ae-1c06ea3d1899-ac2] due to > com.cloud.exception.InvalidParameterValueException: Please specify a valid > domain ID. > at > com.cloud.resourcelimit.ResourceLimitManagerImpl.recalculateResourceCount(ResourceLimitManagerImpl.java:752) > at com.cloud.vm.UserVmManagerImpl.expunge(UserVmManagerImpl.java:2053) > ... > 2017-01-26 06:07:46,381 INFO [cloud.user.AccountManagerImpl] > (API-Job-Executor-29:ctx-23415942 job-7165 ctx-fe3d13d6) Cleanup for account > 2580 is needed. > 2017-01-26 06:07:46,390 DEBUG [cloud.user.DomainManagerImpl] > (API-Job-Executor-29:ctx-23415942 job-7165 ctx-fe3d13d6) Deleting networks > for domain id=29 > 2017-01-26 06:07:46,392 DEBUG [cloud.user.DomainManagerImpl] > (API-Job-Executor-29:ctx-23415942 job-7165 ctx-fe3d13d6) Can't delete the > domain yet because it has 1accounts that need a cleanup > 2017-01-26 06:07:46,392 WARN [cloud.user.DomainManagerImpl] > (API-Job-Executor-29:ctx-23415942 job-7165 ctx-fe3d13d6) Failed to cleanup > domain id=29 > 2017-01-26 06:07:46,394 DEBUG [cloud.user.DomainManagerImpl] > (API-Job-Executor-29:ctx-23415942 job-7165 ctx-fe3d13d6) Deleting networks > for domain id=28 > 2017-01-26 06:07:46,416 WARN [cloud.user.DomainManagerImpl] > (API-Job-Executor-29:ctx-23415942 job-7165 ctx-fe3d13d6) Failed to cleanup > domain id=28 > 2017-01-26 06:07:46,418 DEBUG [cloud.user.DomainManagerImpl] > (API-Job-Executor-29:ctx-23415942 job-7165 ctx-fe3d13d6) Deleting networks > for domain id=27 > 2017-01-26 06:07:46,440 ERROR [cloud.user.DomainManagerImpl] > (API-Job-Executor-29:ctx-23415942 job-7165 ctx-fe3d13d6) Exception deleting > domain with id 27 > com.cloud.utils.exception.CloudRuntimeException: Failed to clean up domain > resources and sub domains, delete failed on domain A (id: 27). > ... > 2017-01-26 06:07:46,441 DEBUG [cloud.user.DomainManagerImpl] > (API-Job-Executor-29:ctx-23415942 job-7165 ctx-fe3d13d6) Changing domain > id=27 state back to Active because it can't be removed due to resources > referencing to it > 2017-01-26 06:07:46,459 ERROR [cloud.api.ApiAsyncJobDispatcher] > (API-Job-Executor-29:ctx-23415942 job-7165) Unexpected exception while > executing org.apache.cloudstack.api.command.admin.domain.DeleteDomainCmd > com.cloud.utils.exception.CloudRuntimeException: Failed to clean up domain > resources and sub domains, delete failed on domain A (id: 27). > at > com.cloud.user.DomainManagerImpl.deleteDomain(DomainManagerImpl.java:290) > at > com.cloud.user.DomainManagerImpl.deleteDomain(DomainManagerImpl.java:271) > ... > {noformat}} -- This message was sent by Atlassian JIRA (v6.3.15#6346)