DaanHoogland commented on a change in pull request #3350: Get Diagnostics: Download logs and diagnostics data from SSVM, CPVM, Router URL: https://github.com/apache/cloudstack/pull/3350#discussion_r362797808
########## File path: server/src/main/java/org/apache/cloudstack/diagnostics/DiagnosticsServiceImpl.java ########## @@ -126,10 +185,336 @@ protected String prepareShellCmd(String cmdType, String ipAddress, String option } } + private String zipFilesInSystemVm(VMInstanceVO vmInstance, List<String> optionalFilesList) { + List<String> fileList = getFileListToBeRetrieved(optionalFilesList, vmInstance); + + if (CollectionUtils.isEmpty(fileList)) { + throw new CloudRuntimeException("Failed to generate diagnostics file list for retrieval."); + } + + final Answer zipFilesAnswer = prepareDiagnosticsFilesInSystemVm(vmInstance, fileList); + + if (zipFilesAnswer == null) { + throw new CloudRuntimeException(String.format("Failed to generate diagnostics zip file in the system VM %s", vmInstance.getUuid())); + } + + if (!zipFilesAnswer.getResult()) { + throw new CloudRuntimeException(String.format("Failed to generate diagnostics zip file in VM %s due to: %s", vmInstance.getUuid(), zipFilesAnswer.getDetails())); + } + + return zipFilesAnswer.getDetails().replace("\n", ""); + } + + @Override + @ActionEvent(eventType = EventTypes.EVENT_SYSTEM_VM_DIAGNOSTICS, eventDescription = "getting diagnostics files on system vm", async = true) + public String getDiagnosticsDataCommand(GetDiagnosticsDataCmd cmd) { + final Long vmId = cmd.getId(); + final List<String> optionalFilesList = cmd.getFilesList(); + final VMInstanceVO vmInstance = getSystemVMInstance(vmId); + final DataStore store = getImageStore(vmInstance.getDataCenterId()); + + final String zipFileInSystemVm = zipFilesInSystemVm(vmInstance, optionalFilesList); + final Long vmHostId = vmInstance.getHostId(); + copyZipFileToSecondaryStorage(vmInstance, vmHostId, zipFileInSystemVm, store); + deleteDiagnosticsZipFileInsystemVm(vmInstance, zipFileInSystemVm); + + // Now we need to create the file download URL + // Find ssvm of store + final long zoneId = vmInstance.getDataCenterId(); + VMInstanceVO ssvm = getSecondaryStorageVmInZone(zoneId); + if (ssvm == null) { + throw new CloudRuntimeException("No SSVM found in zone with ID: " + zoneId); + } + + // Secondary Storage install path = "diagnostics_data/diagnostics_files_xxxx.tar + String installPath = DIAGNOSTICS_DIRECTORY + File.separator + zipFileInSystemVm.replace("/root", ""); + return createFileDownloadUrl(store, ssvm.getHypervisorType(), installPath); + } + + /** + * Copy retrieved diagnostics zip file from system vm to secondary storage + * For VMware use the mgmt server, and for Xen/KVM use the hyperhost of the target VM + * The strategy is to mount secondary storage on mgmt server or host and scp directly to /mnt/SecStorage/diagnostics_data + * + * @param fileToCopy zip file in system vm to be copied + * @param store secondary storage to copy zip file to + */ + private Pair<Boolean, String> copyZipFileToSecondaryStorage(VMInstanceVO vmInstance, Long vmHostId, String fileToCopy, DataStore store) { + String vmControlIp = getVMSshIp(vmInstance); + if (StringUtils.isBlank(vmControlIp)) { + return new Pair<>(false, "Unable to find system vm ssh/control IP for vm with ID: " + vmInstance.getId()); + } + Pair<Boolean, String> copyResult; + if (vmInstance.getHypervisorType() == Hypervisor.HypervisorType.VMware) { + copyResult = orchestrateCopyToSecondaryStorageVMware(store, vmControlIp, fileToCopy); + } else { + copyResult = orchestrateCopyToSecondaryStorageNonVMware(store, vmControlIp, fileToCopy, vmHostId); + } + + if (!copyResult.first()) { + throw new CloudRuntimeException(String.format("Failed to copy %s to secondary storage %s due to: %s.", fileToCopy, store.getUri(), copyResult.second())); + } + + return copyResult; + } + + private void configureNetworkElementCommand(NetworkElementCommand cmd, VMInstanceVO vmInstance) { + Map<String, String> accessDetails = networkManager.getSystemVMAccessDetails(vmInstance); + if (StringUtils.isBlank(accessDetails.get(NetworkElementCommand.ROUTER_IP))) { + throw new CloudRuntimeException("Unable to set system vm ControlIP for system vm with ID: " + vmInstance.getId()); + } + cmd.setAccessDetail(accessDetails); + } + + private Answer prepareDiagnosticsFilesInSystemVm(VMInstanceVO vmInstance, List<String> fileList) { + final PrepareFilesCommand cmd = new PrepareFilesCommand(fileList, DataRetrievalTimeout.value()); + configureNetworkElementCommand(cmd, vmInstance); + Answer answer = agentManager.easySend(vmInstance.getHostId(), cmd); + return answer; + } + + private Answer deleteDiagnosticsZipFileInsystemVm(VMInstanceVO vmInstance, String zipFileName) { + final DeleteFileInVrCommand cmd = new DeleteFileInVrCommand(zipFileName); + configureNetworkElementCommand(cmd, vmInstance); + final Answer fileCleanupAnswer = agentManager.easySend(vmInstance.getHostId(), cmd); + if (fileCleanupAnswer == null) { + LOGGER.error(String.format("Failed to cleanup diagnostics zip file on vm: %s", vmInstance.getUuid())); + } else { + if (!fileCleanupAnswer.getResult()) { + LOGGER.error(String.format("Zip file cleanup for vm %s has failed with: %s", vmInstance.getUuid(), fileCleanupAnswer.getDetails())); + } + } + + return fileCleanupAnswer; + } + + /** + * Generate a list of diagnostics file to be retrieved depending on the system VM type + * + * @param optionalFileList Optional list of files that user may want to retrieve, empty by default + * @param vmInstance system VM instance, either SSVM, CPVM or VR + * @return a list of files to be retrieved for system VM, either generated from defaults depending on the VM type, or specified + * by the optional list param + */ + private List<String> getFileListToBeRetrieved(List<String> optionalFileList, VMInstanceVO vmInstance) { + DiagnosticsFilesList fileListObject = DiagnosticsFilesListFactory.getDiagnosticsFilesList(optionalFileList, vmInstance); + List<String> fileList = new ArrayList<>(); + + if (fileListObject != null) { + fileList = fileListObject.generateFileList(); + } + return fileList; + } + + private Pair<Boolean, String> orchestrateCopyToSecondaryStorageNonVMware(final DataStore store, final String vmControlIp, String fileToCopy, Long vmHostId) { + CopyToSecondaryStorageCommand toSecondaryStorageCommand = new CopyToSecondaryStorageCommand(store.getUri(), vmControlIp, fileToCopy); + Answer copyToSecondaryAnswer = agentManager.easySend(vmHostId, toSecondaryStorageCommand); + Pair<Boolean, String> copyAnswer; + if (copyToSecondaryAnswer != null) { + copyAnswer = new Pair<>(copyToSecondaryAnswer.getResult(), copyToSecondaryAnswer.getDetails()); + } else { + copyAnswer = new Pair<>(false, "Diagnostics Zip file to secondary storage failed"); + } + return copyAnswer; + } + + private Pair<Boolean, String> orchestrateCopyToSecondaryStorageVMware(final DataStore store, final String vmSshIp, String diagnosticsFile) { + String mountPoint; + boolean success; + + Integer nfsVersion = imageStoreDetailsUtil.getNfsVersion(store.getId()); + mountPoint = mountManager.getMountPoint(store.getUri(), nfsVersion); + if (StringUtils.isNotBlank(mountPoint)) { + LOGGER.info(String.format("Copying %s from %s to secondary store %s", diagnosticsFile, vmSshIp, store.getUri())); + + // dirIn/mnt/SecStorage/uuid/diagnostics_data + String dataDirectoryInSecondaryStore = String.format("%s/%s", mountPoint, DIAGNOSTICS_DIRECTORY); + try { + File dataDirectory = new File(dataDirectoryInSecondaryStore); + boolean existsInSecondaryStore = dataDirectory.exists() || dataDirectory.mkdir(); + + if (existsInSecondaryStore) { + // scp from system VM to mounted sec storage directory + int port = 3922; + File permKey = new File("/var/cloudstack/management/.ssh/id_rsa"); + SshHelper.scpFrom(vmSshIp, port, "root", permKey, dataDirectoryInSecondaryStore, diagnosticsFile); + } + // Verify File copy to Secondary Storage + File fileInSecondaryStore = new File(dataDirectoryInSecondaryStore + diagnosticsFile.replace("/root", "")); + success = fileInSecondaryStore.exists(); + } catch (Exception e) { + String msg = String.format("Exception caught during scp from %s to secondary store %s: ", vmSshIp, dataDirectoryInSecondaryStore); + LOGGER.error(msg, e); + return new Pair<>(false, msg); + } finally { + // umount secondary storage + umountSecondaryStorage(mountPoint); + } + } else { + return new Pair<>(false, "Failed to mount secondary storage:" + store.getName()); + } + return new Pair<>(success, "File copied to secondary storage successfully"); + } + + // Get ssvm from the zone to use for creating entity download URL + private VMInstanceVO getSecondaryStorageVmInZone(Long zoneId) { + List<VMInstanceVO> ssvm = instanceDao.listByZoneIdAndType(zoneId, VirtualMachine.Type.SecondaryStorageVm); + return (CollectionUtils.isEmpty(ssvm)) ? null : ssvm.get(0); + } + + /** + * Iterate through all Image stores in the current running zone and select any that has less than DiskQuotaPercentageThreshold.value() disk usage + * + * @param zoneId of the current running zone + * @return a valid secondary storage with less than DiskQuotaPercentageThreshold set by global config + */ + private DataStore getImageStore(Long zoneId) { + List<DataStore> stores = storeMgr.getImageStoresByScope(new ZoneScope(zoneId)); + if (CollectionUtils.isEmpty(stores)) { + throw new CloudRuntimeException("No Secondary storage found in Zone with Id: " + zoneId); + } + DataStore imageStore = null; + for (DataStore store : stores) { + // Return image store if used percentage is less then threshold value set by global config diagnostics.data.disable.threshold + if (statsCollector.imageStoreHasEnoughCapacity(store, DiskQuotaPercentageThreshold.value())) { + imageStore = store; + break; + } + } + if (imageStore == null) { + throw new CloudRuntimeException("No suitable secondary storage found to retrieve diagnostics in Zone: " + zoneId); + } + return imageStore; + } + + // createEntityExtractUrl throws CloudRuntime exception in case of failure + private String createFileDownloadUrl(DataStore store, Hypervisor.HypervisorType hypervisorType, String filePath) { + // Get image store driver + ImageStoreEntity secStore = (ImageStoreEntity) store; + + //Create dummy TO with hyperType + DataTO dataTO = new DiagnosticsDataTO(hypervisorType, store.getTO()); + DataObject dataObject = new DiagnosticsDataObject(dataTO, store); + return secStore.createEntityExtractUrl(filePath, Storage.ImageFormat.ZIP, dataObject); + } + + private VMInstanceVO getSystemVMInstance(Long vmId) { + VMInstanceVO vmInstance = instanceDao.findByIdTypes(vmId, VirtualMachine.Type.ConsoleProxy, + VirtualMachine.Type.DomainRouter, VirtualMachine.Type.SecondaryStorageVm); + if (vmInstance == null) { + String msg = String.format("Unable to find vm instance with id: %s", vmId); + LOGGER.error(msg); + throw new CloudRuntimeException("Diagnostics command execution failed, " + msg); + } + + final Long hostId = vmInstance.getHostId(); + if (hostId == null) { + throw new CloudRuntimeException("Unable to find host for virtual machine instance: " + vmInstance.getInstanceName()); + } + return vmInstance; + } + + private String getVMSshIp(final VMInstanceVO vmInstance) { + Map<String, String> accessDetails = networkManager.getSystemVMAccessDetails(vmInstance); + String controlIP = accessDetails.get(NetworkElementCommand.ROUTER_IP); + if (StringUtils.isBlank(controlIP)) { + throw new CloudRuntimeException("Unable to find system vm ssh/control IP for vm with ID: " + vmInstance.getId()); + } + return controlIP; + } + + @Override + public boolean start() { + super.start(); + return true; + } + + @Override + public boolean configure(final String name, final Map<String, Object> params) throws ConfigurationException { + if (EnableGarbageCollector.value()) { + backgroundPollManager.submitTask(new GCBackgroundTask(this)); + } + return true; + } + + public static final class GCBackgroundTask extends ManagedContextRunnable implements BackgroundPollTask { + private DiagnosticsServiceImpl serviceImpl; + + public GCBackgroundTask(DiagnosticsServiceImpl serviceImpl) { + this.serviceImpl = serviceImpl; + } + + private static void deleteOldDiagnosticsFiles(File directory, String storeName) { + final File[] fileList = directory.listFiles(); + if (fileList != null) { + String msg = String.format("Found %s diagnostics files in store %s for garbage collection", fileList.length, storeName); + LOGGER.info(msg); + for (File file : fileList) { + if (file.isFile()) { + if (MaximumFileAgeforGarbageCollection.value() <= getTimeDifference(file)) { + boolean success = file.delete(); + LOGGER.info(file.getName() + " delete status: " + success); + } + } + } + } + } + + @Override + protected void runInContext() { + List<DataCenterVO> dcList = serviceImpl.dataCenterDao.listEnabledZones(); + for (DataCenterVO vo: dcList) { + // Get All Image Stores in current running Zone + List<DataStore> storeList = serviceImpl.storeMgr.getImageStoresByScope(new ZoneScope(vo.getId())); + for (DataStore store : storeList) { + String mountPoint = null; + try { + mountPoint = serviceImpl.mountManager.getMountPoint(store.getUri(), null); + if (StringUtils.isNotBlank(mountPoint)) { + File directory = new File(mountPoint + "/" + DIAGNOSTICS_DIRECTORY); + if (directory.isDirectory()) { + deleteOldDiagnosticsFiles(directory, store.getName()); + } + } + } finally { + // umount secondary storage + umountSecondaryStorage(mountPoint); Review comment: Do we have a guarantee that we are not calling `umountSecondaryStorage(null);`? ---------------------------------------------------------------- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org With regards, Apache Git Services