[ https://issues.apache.org/jira/browse/CLOUDSTACK-9764?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=15876318#comment-15876318 ]
ASF GitHub Bot commented on CLOUDSTACK-9764: -------------------------------------------- Github user nvazquez commented on a diff in the pull request: https://github.com/apache/cloudstack/pull/1935#discussion_r102265306 --- Diff: server/src/com/cloud/user/DomainManagerImpl.java --- @@ -273,79 +274,97 @@ public boolean deleteDomain(long domainId, Boolean cleanup) { @Override public boolean deleteDomain(DomainVO domain, Boolean cleanup) { - // mark domain as inactive - s_logger.debug("Marking domain id=" + domain.getId() + " as " + Domain.State.Inactive + " before actually deleting it"); - domain.setState(Domain.State.Inactive); - _domainDao.update(domain.getId(), domain); - boolean rollBackState = false; - boolean hasDedicatedResources = false; + GlobalLock lock = GlobalLock.getInternLock("AccountCleanup"); + if (lock == null) { + s_logger.debug("Couldn't get the global lock"); + return false; + } + + if (!lock.lock(30)) { + s_logger.debug("Couldn't lock the db"); + return false; + } try { - long ownerId = domain.getAccountId(); - if ((cleanup != null) && cleanup.booleanValue()) { - if (!cleanupDomain(domain.getId(), ownerId)) { - rollBackState = true; - CloudRuntimeException e = - new CloudRuntimeException("Failed to clean up domain resources and sub domains, delete failed on domain " + domain.getName() + " (id: " + - domain.getId() + ")."); - e.addProxyObject(domain.getUuid(), "domainId"); - throw e; - } - } else { - //don't delete the domain if there are accounts set for cleanup, or non-removed networks exist, or domain has dedicated resources - List<Long> networkIds = _networkDomainDao.listNetworkIdsByDomain(domain.getId()); - List<AccountVO> accountsForCleanup = _accountDao.findCleanupsForRemovedAccounts(domain.getId()); - List<DedicatedResourceVO> dedicatedResources = _dedicatedDao.listByDomainId(domain.getId()); - if (dedicatedResources != null && !dedicatedResources.isEmpty()) { - s_logger.error("There are dedicated resources for the domain " + domain.getId()); - hasDedicatedResources = true; - } - if (accountsForCleanup.isEmpty() && networkIds.isEmpty() && !hasDedicatedResources) { - _messageBus.publish(_name, MESSAGE_PRE_REMOVE_DOMAIN_EVENT, PublishScope.LOCAL, domain); - if (!_domainDao.remove(domain.getId())) { + // mark domain as inactive + s_logger.debug("Marking domain id=" + domain.getId() + " as " + Domain.State.Inactive + " before actually deleting it"); + domain.setState(Domain.State.Inactive); + _domainDao.update(domain.getId(), domain); + boolean rollBackState = false; + boolean hasDedicatedResources = false; + + try { + long ownerId = domain.getAccountId(); + if ((cleanup != null) && cleanup.booleanValue()) { --- End diff -- Done, thanks @rafaelweingartner > Delete domain failure due to Account Cleanup task > ------------------------------------------------- > > Key: CLOUDSTACK-9764 > URL: https://issues.apache.org/jira/browse/CLOUDSTACK-9764 > Project: CloudStack > Issue Type: Bug > Security Level: Public(Anyone can view this level - this is the > default.) > Affects Versions: 4.10.0.0 > Reporter: Nicolas Vazquez > Assignee: Nicolas Vazquez > Fix For: 4.10.0.0 > > > It was noticed in production environments that {{deleteDomain}} task failed > for domains with multiple accounts and resources. Examining logs it was found > out that if Account Cleanup Task got executed after domain (and all of its > subchilds) got marked as Inactive; and before delete domain task finishes, it > produces a failure. > {{AccountCleanupTask}} gets executed every {{account.cleanup.interval}} > seconds looking for: > * Removed accounts > * Disabled accounts > * Inactive domains > As {{deleteDomain}} marks domain to delete (and its subchilds) as Inactive > before deleting them, when {{AccountCleanupTask}} is executed, it removes > marked domains. When there are resources to cleanup on domain accounts, > domain is not found throwing exception: > {{com.cloud.exception.InvalidParameterValueException: Please specify a valid > domain ID}} > h3. Example > {{account.cleanup.interval}} = 100 > {noformat} > 2017-01-26 06:07:03,621 DEBUG [cloud.api.ApiServlet] > (catalina-exec-8:ctx-50cfa3b6 ctx-92ad5b38) ===END=== 10.39.251.17 -- GET > command=deleteDomain&id=1910a3dc-6fa6-457b-ab3a-602b0cfb6686&cleanup=true&response=json&_=1485439623475 > ... > // Domain and its subchilds marked as Inactive > 2017-01-26 06:07:03,640 DEBUG [cloud.user.DomainManagerImpl] > (API-Job-Executor-29:ctx-23415942 job-7165 ctx-fe3d13d6) Marking domain id=27 > as Inactive before actually deleting it > 2017-01-26 06:07:03,646 DEBUG [cloud.user.DomainManagerImpl] > (API-Job-Executor-29:ctx-23415942 job-7165 ctx-fe3d13d6) Cleaning up domain > id=27 > 2017-01-26 06:07:03,670 DEBUG [cloud.user.DomainManagerImpl] > (API-Job-Executor-29:ctx-23415942 job-7165 ctx-fe3d13d6) Cleaning up domain > id=28 > 2017-01-26 06:07:03,685 DEBUG [cloud.user.DomainManagerImpl] > (API-Job-Executor-29:ctx-23415942 job-7165 ctx-fe3d13d6) Cleaning up domain > id=29 > ... > // AccountCleanupTask removes Inactive domain id=29, no rollback for it > 2017-01-26 06:07:44,285 INFO [cloud.user.AccountManagerImpl] > (AccountChecker-1:ctx-b8a01824) Found 0 removed accounts to cleanup > 2017-01-26 06:07:44,287 INFO [cloud.user.AccountManagerImpl] > (AccountChecker-1:ctx-b8a01824) Found 0 disabled accounts to cleanup > 2017-01-26 06:07:44,289 INFO [cloud.user.AccountManagerImpl] > (AccountChecker-1:ctx-b8a01824) Found 3 inactive domains to cleanup > 2017-01-26 06:07:44,292 DEBUG [cloud.user.AccountManagerImpl] > (AccountChecker-1:ctx-b8a01824) Removing inactive domain id=27 > 2017-01-26 06:07:44,297 DEBUG [db.Transaction.Transaction] > (AccountChecker-1:ctx-b8a01824) Rolling back the transaction: Time = 2 Name = > AccountChecker-1; called by > -TransactionLegacy.rollback:889-TransactionLegacy.removeUpTo:832-TransactionLegacy.close:656-TransactionContextInterceptor.invoke:36-ReflectiveMethodInvocation.proceed:161-ExposeInvocationInterceptor.invoke:91-ReflectiveMethodInvocation.proceed:172-JdkDynamicAopProxy.invoke:204-$Proxy63.remove:-1-DomainManagerImpl.removeDomain:248-NativeMethodAccessorImpl.invoke0:-2-NativeMethodAccessorImpl.invoke:62 > 2017-01-26 06:07:44,301 DEBUG [cloud.user.AccountManagerImpl] > (AccountChecker-1:ctx-b8a01824) Removing inactive domain id=28 > 2017-01-26 06:07:44,304 DEBUG [db.Transaction.Transaction] > (AccountChecker-1:ctx-b8a01824) Rolling back the transaction: Time = 2 Name = > AccountChecker-1; called by > -TransactionLegacy.rollback:889-TransactionLegacy.removeUpTo:832-TransactionLegacy.close:656-TransactionContextInterceptor.invoke:36-ReflectiveMethodInvocation.proceed:161-ExposeInvocationInterceptor.invoke:91-ReflectiveMethodInvocation.proceed:172-JdkDynamicAopProxy.invoke:204-$Proxy63.remove:-1-DomainManagerImpl.removeDomain:248-NativeMethodAccessorImpl.invoke0:-2-NativeMethodAccessorImpl.invoke:62 > 2017-01-26 06:07:44,307 DEBUG [cloud.user.AccountManagerImpl] > (AccountChecker-1:ctx-b8a01824) Removing inactive domain id=29 > 2017-01-26 06:07:44,319 INFO [cloud.user.AccountManagerImpl] > (AccountChecker-1:ctx-b8a01824) Found 0 disabled projects to cleanup > ... > // Failure due to domain is already removed > 2017-01-26 06:07:46,369 WARN [cloud.user.AccountManagerImpl] > (API-Job-Executor-29:ctx-23415942 job-7165 ctx-fe3d13d6) Failed to cleanup > account Acct[6a6e63ad-d89b-4a53-b3ae-1c06ea3d1899-ac2] due to > com.cloud.exception.InvalidParameterValueException: Please specify a valid > domain ID. > at > com.cloud.resourcelimit.ResourceLimitManagerImpl.recalculateResourceCount(ResourceLimitManagerImpl.java:752) > at com.cloud.vm.UserVmManagerImpl.expunge(UserVmManagerImpl.java:2053) > ... > 2017-01-26 06:07:46,381 INFO [cloud.user.AccountManagerImpl] > (API-Job-Executor-29:ctx-23415942 job-7165 ctx-fe3d13d6) Cleanup for account > 2580 is needed. > 2017-01-26 06:07:46,390 DEBUG [cloud.user.DomainManagerImpl] > (API-Job-Executor-29:ctx-23415942 job-7165 ctx-fe3d13d6) Deleting networks > for domain id=29 > 2017-01-26 06:07:46,392 DEBUG [cloud.user.DomainManagerImpl] > (API-Job-Executor-29:ctx-23415942 job-7165 ctx-fe3d13d6) Can't delete the > domain yet because it has 1accounts that need a cleanup > 2017-01-26 06:07:46,392 WARN [cloud.user.DomainManagerImpl] > (API-Job-Executor-29:ctx-23415942 job-7165 ctx-fe3d13d6) Failed to cleanup > domain id=29 > 2017-01-26 06:07:46,394 DEBUG [cloud.user.DomainManagerImpl] > (API-Job-Executor-29:ctx-23415942 job-7165 ctx-fe3d13d6) Deleting networks > for domain id=28 > 2017-01-26 06:07:46,416 WARN [cloud.user.DomainManagerImpl] > (API-Job-Executor-29:ctx-23415942 job-7165 ctx-fe3d13d6) Failed to cleanup > domain id=28 > 2017-01-26 06:07:46,418 DEBUG [cloud.user.DomainManagerImpl] > (API-Job-Executor-29:ctx-23415942 job-7165 ctx-fe3d13d6) Deleting networks > for domain id=27 > 2017-01-26 06:07:46,440 ERROR [cloud.user.DomainManagerImpl] > (API-Job-Executor-29:ctx-23415942 job-7165 ctx-fe3d13d6) Exception deleting > domain with id 27 > com.cloud.utils.exception.CloudRuntimeException: Failed to clean up domain > resources and sub domains, delete failed on domain A (id: 27). > ... > 2017-01-26 06:07:46,441 DEBUG [cloud.user.DomainManagerImpl] > (API-Job-Executor-29:ctx-23415942 job-7165 ctx-fe3d13d6) Changing domain > id=27 state back to Active because it can't be removed due to resources > referencing to it > 2017-01-26 06:07:46,459 ERROR [cloud.api.ApiAsyncJobDispatcher] > (API-Job-Executor-29:ctx-23415942 job-7165) Unexpected exception while > executing org.apache.cloudstack.api.command.admin.domain.DeleteDomainCmd > com.cloud.utils.exception.CloudRuntimeException: Failed to clean up domain > resources and sub domains, delete failed on domain A (id: 27). > at > com.cloud.user.DomainManagerImpl.deleteDomain(DomainManagerImpl.java:290) > at > com.cloud.user.DomainManagerImpl.deleteDomain(DomainManagerImpl.java:271) > ... > {noformat}} -- This message was sent by Atlassian JIRA (v6.3.15#6346)