winterhazel commented on code in PR #13053:
URL: https://github.com/apache/cloudstack/pull/13053#discussion_r3492103074


##########
plugins/storage/volume/ontap/src/main/java/org/apache/cloudstack/storage/vmsnapshot/OntapVMSnapshotStrategy.java:
##########
@@ -0,0 +1,931 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.cloudstack.storage.vmsnapshot;
+
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.concurrent.TimeUnit;
+
+import javax.inject.Inject;
+import javax.naming.ConfigurationException;
+
+import org.apache.cloudstack.engine.subsystem.api.storage.StrategyPriority;
+import org.apache.cloudstack.engine.subsystem.api.storage.VMSnapshotOptions;
+import org.apache.cloudstack.storage.datastore.db.StoragePoolDetailsDao;
+import org.apache.cloudstack.storage.datastore.db.StoragePoolVO;
+import org.apache.cloudstack.storage.feign.client.SnapshotFeignClient;
+import org.apache.cloudstack.storage.feign.model.CliSnapshotRestoreRequest;
+import org.apache.cloudstack.storage.feign.model.FlexVolSnapshot;
+import org.apache.cloudstack.storage.feign.model.response.JobResponse;
+import org.apache.cloudstack.storage.feign.model.response.OntapResponse;
+import org.apache.cloudstack.storage.service.StorageStrategy;
+import org.apache.cloudstack.storage.service.model.ProtocolType;
+import org.apache.cloudstack.storage.to.VolumeObjectTO;
+import org.apache.cloudstack.storage.utils.OntapStorageUtils;
+import org.apache.commons.collections.CollectionUtils;
+import org.apache.logging.log4j.LogManager;
+import org.apache.logging.log4j.Logger;
+
+import com.cloud.agent.api.CreateVMSnapshotAnswer;
+import com.cloud.agent.api.CreateVMSnapshotCommand;
+import com.cloud.agent.api.DeleteVMSnapshotAnswer;
+import com.cloud.agent.api.DeleteVMSnapshotCommand;
+import com.cloud.agent.api.FreezeThawVMAnswer;
+import com.cloud.agent.api.FreezeThawVMCommand;
+import com.cloud.agent.api.RevertToVMSnapshotAnswer;
+import com.cloud.agent.api.RevertToVMSnapshotCommand;
+import com.cloud.agent.api.VMSnapshotTO;
+import com.cloud.event.EventTypes;
+import com.cloud.exception.AgentUnavailableException;
+import com.cloud.exception.OperationTimedoutException;
+import com.cloud.hypervisor.Hypervisor;
+import com.cloud.storage.GuestOSVO;
+import com.cloud.storage.VolumeDetailVO;
+import com.cloud.storage.VolumeVO;
+import com.cloud.storage.dao.VolumeDetailsDao;
+import com.cloud.uservm.UserVm;
+import com.cloud.utils.exception.CloudRuntimeException;
+import com.cloud.utils.fsm.NoTransitionException;
+import com.cloud.vm.VirtualMachine;
+import com.cloud.vm.snapshot.VMSnapshot;
+import com.cloud.vm.snapshot.VMSnapshotDetailsVO;
+import com.cloud.vm.snapshot.VMSnapshotVO;
+import org.apache.cloudstack.storage.utils.OntapStorageConstants;
+
+/**
+ * VM Snapshot strategy for NetApp ONTAP managed storage using 
FlexVolume-level snapshots.
+ *
+ * <p>This strategy handles VM-level (instance) snapshots for VMs whose volumes
+ * reside on ONTAP managed primary storage. Instead of creating per-file clones
+ * (the old approach), it takes <b>ONTAP FlexVolume-level snapshots</b> via the
+ * ONTAP REST API ({@code POST /api/storage/volumes/{uuid}/snapshots}).</p>
+ *
+ * <h3>Key Advantage:</h3>
+ * <p>When multiple CloudStack disks (ROOT + DATA) reside on the same ONTAP
+ * FlexVolume, a single FlexVolume snapshot atomically captures all of them.
+ * This is both faster and more storage-efficient than per-file clones.</p>
+ *
+ * <h3>Flow:</h3>
+ * <ol>
+ *   <li>Group all VM volumes by their parent FlexVolume UUID</li>
+ *   <li>Freeze the VM via QEMU guest agent ({@code fsfreeze}) — if quiesce 
requested</li>
+ *   <li>For each unique FlexVolume, create one ONTAP snapshot</li>
+ *   <li>Thaw the VM</li>
+ *   <li>Record FlexVolume → snapshot UUID mappings in {@code 
vm_snapshot_details}</li>
+ * </ol>
+ *
+ * <h3>Metadata in vm_snapshot_details:</h3>
+ * <p>Each FlexVolume snapshot is stored as a detail row with:
+ * <ul>
+ *   <li>name = {@value OntapStorageConstants#ONTAP_FLEXVOL_SNAPSHOT}</li>
+ *   <li>value = {@code 
"<flexVolUuid>::<snapshotUuid>::<snapshotName>::<volumePath>::<poolId>::<protocol>"}</li>
+ * </ul>
+ * One row is persisted per CloudStack volume (not per FlexVolume) so that the
+ * revert operation can restore individual files/LUNs using the ONTAP Snapshot
+ * File Restore API ({@code POST 
/api/storage/volumes/{vol}/snapshots/{snap}/files/{path}/restore}).</p>
+ *
+ * <h3>Strategy Selection:</h3>
+ * <p>Returns {@code StrategyPriority.HIGHEST} when:</p>
+ * <ul>
+ *   <li>Hypervisor is KVM</li>
+ *   <li>Snapshot type is Disk-only (no memory)</li>
+ *   <li>All VM volumes are on ONTAP managed primary storage</li>
+ * </ul>
+ */
+public class OntapVMSnapshotStrategy extends StorageVMSnapshotStrategy {
+
+    private static final Logger logger = 
LogManager.getLogger(OntapVMSnapshotStrategy.class);
+
+    /** Separator used in the vm_snapshot_details value to delimit FlexVol 
UUID, snapshot UUID, snapshot name, and pool ID. */
+    static final String DETAIL_SEPARATOR = "::";
+
+    @Inject
+    private StoragePoolDetailsDao storagePoolDetailsDao;
+
+    @Inject
+    private VolumeDetailsDao volumeDetailsDao;
+
+    @Override
+    public boolean configure(String name, Map<String, Object> params) throws 
ConfigurationException {
+        return super.configure(name, params);
+    }
+
+    // 
──────────────────────────────────────────────────────────────────────────
+    // Strategy Selection
+    // 
──────────────────────────────────────────────────────────────────────────
+
+    @Override
+    public StrategyPriority canHandle(VMSnapshot vmSnapshot) {
+        VMSnapshotVO vmSnapshotVO = (VMSnapshotVO) vmSnapshot;
+
+        // For existing (non-Allocated) snapshots, check if we created them
+        if (!VMSnapshot.State.Allocated.equals(vmSnapshotVO.getState())) {
+            // Check for our FlexVolume snapshot details first
+            List<VMSnapshotDetailsVO> flexVolDetails = 
vmSnapshotDetailsDao.findDetails(vmSnapshot.getId(), 
OntapStorageConstants.ONTAP_FLEXVOL_SNAPSHOT);
+            if (CollectionUtils.isNotEmpty(flexVolDetails)) {
+                // Verify the volumes are still on ONTAP storage
+                if (allVolumesOnOntapManagedStorage(vmSnapshot.getVmId())) {
+                    return StrategyPriority.HIGHEST;
+                }
+                return StrategyPriority.CANT_HANDLE;
+            }
+            // Also check legacy STORAGE_SNAPSHOT details for backward 
compatibility
+            List<VMSnapshotDetailsVO> legacyDetails = 
vmSnapshotDetailsDao.findDetails(vmSnapshot.getId(), STORAGE_SNAPSHOT);
+            if (CollectionUtils.isNotEmpty(legacyDetails) && 
allVolumesOnOntapManagedStorage(vmSnapshot.getVmId())) {
+                return StrategyPriority.HIGHEST;
+            }
+            return StrategyPriority.CANT_HANDLE;
+        }
+
+        // For new snapshots (Allocated state), check if we can handle this VM
+        // ONTAP only supports disk-only snapshots, not memory snapshots
+        if (allVolumesOnOntapManagedStorage(vmSnapshot.getVmId())) {
+            if (vmSnapshotVO.getType() == VMSnapshot.Type.DiskAndMemory) {
+                logger.debug("canHandle: Memory snapshots (DiskAndMemory) are 
not supported for VMs on ONTAP storage. VMSnapshot [{}]", vmSnapshot.getId());
+                return StrategyPriority.CANT_HANDLE;
+            }
+            return StrategyPriority.HIGHEST;
+        }
+
+        return StrategyPriority.CANT_HANDLE;
+    }
+
+    @Override
+    public StrategyPriority canHandle(Long vmId, Long rootPoolId, boolean 
snapshotMemory) {
+        // ONTAP FlexVolume snapshots only support disk-only 
(crash-consistent) snapshots.
+        // Memory snapshots (snapshotMemory=true) are not supported because:
+        // 1. ONTAP snapshots capture disk state only, not VM memory
+        // 2. Allowing memory snapshots would require falling back to libvirt 
snapshots,
+        //    creating mixed snapshot chains that would cause issues during 
revert
+        // Return CANT_HANDLE so VMSnapshotManagerImpl can provide a clear 
error message.
+        if (snapshotMemory) {
+            logger.debug("canHandle: Memory snapshots (snapshotMemory=true) 
are not supported for VMs on ONTAP storage. VM [{}]", vmId);
+            return StrategyPriority.CANT_HANDLE;
+        }
+
+        if (allVolumesOnOntapManagedStorage(vmId)) {
+            return StrategyPriority.HIGHEST;
+        }
+
+        return StrategyPriority.CANT_HANDLE;
+    }
+
+    /**
+     * Checks whether all volumes of a VM reside on ONTAP managed primary 
storage.
+     */
+    boolean allVolumesOnOntapManagedStorage(long vmId) {
+        UserVm userVm = userVmDao.findById(vmId);
+        if (userVm == null) {
+            logger.debug("allVolumesOnOntapManagedStorage: VM with id [{}] not 
found", vmId);
+            return false;
+        }
+
+        if (!Hypervisor.HypervisorType.KVM.equals(userVm.getHypervisorType())) 
{
+            logger.debug("allVolumesOnOntapManagedStorage: ONTAP VM snapshot 
strategy only supports KVM hypervisor, VM [{}] uses [{}]",
+                    vmId, userVm.getHypervisorType());
+            return false;
+        }
+
+        // ONTAP VM snapshots work for both Running and Stopped VMs.
+        // Running VMs may be frozen/thawed (if quiesce is requested).
+        // Stopped VMs don't need freeze/thaw - just take the FlexVol snapshot 
directly.
+        VirtualMachine.State vmState = userVm.getState();
+        if (!VirtualMachine.State.Running.equals(vmState) && 
!VirtualMachine.State.Stopped.equals(vmState)) {
+            logger.info("allVolumesOnOntapManagedStorage: ONTAP VM snapshot 
strategy requires VM to be Running or Stopped, VM [{}] is in state [{}], 
returning false",
+                    vmId, vmState);
+            return false;
+        }
+
+        List<VolumeVO> volumes = volumeDao.findByInstance(vmId);
+        if (volumes == null || volumes.isEmpty()) {
+            logger.debug("allVolumesOnOntapManagedStorage: No volumes found 
for VM [{}]", vmId);
+            return false;
+        }
+
+        for (VolumeVO volume : volumes) {
+            if (volume.getPoolId() == null) {
+                return false;
+            }
+            StoragePoolVO pool = storagePool.findById(volume.getPoolId());
+            if (pool == null) {
+                return false;
+            }
+            if (!pool.isManaged()) {
+                logger.debug("allVolumesOnOntapManagedStorage: Volume [{}] is 
on non-managed storage pool [{}], not ONTAP",
+                        volume.getId(), pool.getName());
+                return false;
+            }
+            if 
(!OntapStorageConstants.ONTAP_PLUGIN_NAME.equals(pool.getStorageProviderName()))
 {
+                logger.debug("allVolumesOnOntapManagedStorage: Volume [{}] is 
on managed pool [{}] with provider [{}], not ONTAP",
+                        volume.getId(), pool.getName(), 
pool.getStorageProviderName());
+                return false;
+            }
+        }
+
+        logger.debug("allVolumesOnOntapManagedStorage: All volumes of VM [{}] 
are on ONTAP managed storage, this strategy can handle", vmId);
+        return true;
+    }
+
+    // 
──────────────────────────────────────────────────────────────────────────
+    // Take VM Snapshot (FlexVolume-level)
+    // 
──────────────────────────────────────────────────────────────────────────
+
+    /**
+     * Takes a VM-level snapshot by freezing the VM, creating ONTAP 
FlexVolume-level
+     * snapshots (one per unique FlexVolume), and then thawing the VM.
+     *
+     * <p>Volumes are grouped by their parent FlexVolume UUID (from storage 
pool details).
+     * For each unique FlexVolume, exactly one ONTAP snapshot is created via
+     * {@code POST /api/storage/volumes/{uuid}/snapshots}. This means if a VM 
has
+     * ROOT and DATA disks on the same FlexVolume, only one snapshot is 
created.</p>
+     *
+     * <p><b>Memory Snapshots Not Supported:</b> This strategy only supports 
disk-only
+     * (crash-consistent) snapshots. Memory snapshots (snapshotmemory=true) 
are rejected
+     * with a clear error message. This is because ONTAP FlexVolume snapshots 
capture disk
+     * state only, and allowing mixed snapshot chains (ONTAP disk + libvirt 
memory) would
+     * cause issues during revert operations.</p>
+     *
+     * @throws CloudRuntimeException if memory snapshot is requested
+     */
+    @Override
+    public VMSnapshot takeVMSnapshot(VMSnapshot vmSnapshot) {
+        Long hostId = vmSnapshotHelper.pickRunningHost(vmSnapshot.getVmId());
+        UserVm userVm = userVmDao.findById(vmSnapshot.getVmId());
+        VMSnapshotVO vmSnapshotVO = (VMSnapshotVO) vmSnapshot;
+
+        // Transition to Creating state FIRST - this is required so that the 
finally block
+        // can properly transition to Error state via OperationFailed event if 
anything fails.
+        // (OperationFailed can only transition FROM Creating state, not from 
Allocated)
+        try {
+            vmSnapshotHelper.vmSnapshotStateTransitTo(vmSnapshotVO, 
VMSnapshot.Event.CreateRequested);
+        } catch (NoTransitionException e) {
+            throw new CloudRuntimeException(e.getMessage());
+        }
+
+        FreezeThawVMAnswer freezeAnswer = null;
+        FreezeThawVMCommand thawCmd = null;
+        FreezeThawVMAnswer thawAnswer = null;
+        long startFreeze = 0;
+
+        // Track which FlexVolume snapshots were created (for rollback)
+        List<FlexVolSnapshotDetail> createdSnapshots = new ArrayList<>();
+
+        boolean result = false;
+        try {
+            GuestOSVO guestOS = guestOSDao.findById(userVm.getGuestOSId());
+            List<VolumeObjectTO> volumeTOs = 
vmSnapshotHelper.getVolumeTOList(userVm.getId());
+
+            long prev_chain_size = 0;
+            long virtual_size = 0;
+
+            // Build snapshot parent chain
+            VMSnapshotTO current = null;
+            VMSnapshotVO currentSnapshot = 
vmSnapshotDao.findCurrentSnapshotByVmId(userVm.getId());
+            if (currentSnapshot != null) {
+                current = 
vmSnapshotHelper.getSnapshotWithParents(currentSnapshot);
+            }
+
+            // Respect the user's quiesce option from the VM snapshot request
+            boolean quiescevm = true; // default to true for safety
+            VMSnapshotOptions options = vmSnapshotVO.getOptions();
+            if (options != null) {
+                quiescevm = options.needQuiesceVM();
+            }
+
+            // Check if VM is actually running - freeze/thaw only makes sense 
for running VMs
+            boolean vmIsRunning = 
VirtualMachine.State.Running.equals(userVm.getState());
+            boolean shouldFreezeThaw = quiescevm && vmIsRunning;
+
+            if (!vmIsRunning) {
+                logger.info("takeVMSnapshot: VM [{}] is in state [{}] (not 
Running). Skipping freeze/thaw - " +
+                        "FlexVolume snapshot will be taken directly.", 
userVm.getInstanceName(), userVm.getState());
+            } else if (quiescevm) {
+                logger.info("takeVMSnapshot: Quiesce option is enabled for 
ONTAP VM Snapshot of VM [{}]. " +
+                        "VM file systems will be frozen/thawed for 
application-consistent snapshots.", userVm.getInstanceName());
+            } else {
+                logger.info("takeVMSnapshot: Quiesce option is disabled for 
ONTAP VM Snapshot of VM [{}]. " +
+                        "Snapshots will be crash-consistent only.", 
userVm.getInstanceName());
+            }
+
+            VMSnapshotTO target = new VMSnapshotTO(vmSnapshot.getId(), 
vmSnapshot.getName(),
+                    vmSnapshot.getType(), null, vmSnapshot.getDescription(), 
false, current, quiescevm);
+
+            if (current == null) {
+                vmSnapshotVO.setParent(null);
+            } else {
+                vmSnapshotVO.setParent(current.getId());
+            }
+
+            CreateVMSnapshotCommand ccmd = new CreateVMSnapshotCommand(
+                    userVm.getInstanceName(), userVm.getUuid(), target, 
volumeTOs, guestOS.getDisplayName());
+
+            logger.info("takeVMSnapshot: Creating ONTAP FlexVolume VM Snapshot 
for VM [{}] with quiesce={}", userVm.getInstanceName(), quiescevm);
+
+            // Prepare volume info list and calculate sizes
+            for (VolumeObjectTO volumeObjectTO : volumeTOs) {
+                virtual_size += volumeObjectTO.getSize();
+                VolumeVO volumeVO = volumeDao.findById(volumeObjectTO.getId());
+                prev_chain_size += volumeVO.getVmSnapshotChainSize() == null ? 
0 : volumeVO.getVmSnapshotChainSize();
+            }
+
+            // ── Group volumes by FlexVolume UUID ──
+            Map<String, FlexVolGroupInfo> flexVolGroups = 
groupVolumesByFlexVol(volumeTOs);
+
+            logger.info("takeVMSnapshot: VM [{}] has {} volumes across {} 
unique FlexVolume(s)",
+                    userVm.getInstanceName(), volumeTOs.size(), 
flexVolGroups.size());
+
+            // ── Step 1: Freeze the VM (only if quiescing is requested AND VM 
is running) ──
+            if (shouldFreezeThaw) {
+                FreezeThawVMCommand freezeCommand = new 
FreezeThawVMCommand(userVm.getInstanceName());
+                freezeCommand.setOption(FreezeThawVMCommand.FREEZE);
+                freezeAnswer = (FreezeThawVMAnswer) agentMgr.send(hostId, 
freezeCommand);
+                startFreeze = System.nanoTime();
+
+                thawCmd = new FreezeThawVMCommand(userVm.getInstanceName());
+                thawCmd.setOption(FreezeThawVMCommand.THAW);
+
+                if (freezeAnswer == null || !freezeAnswer.getResult()) {
+                    String detail = (freezeAnswer != null) ? 
freezeAnswer.getDetails() : "no response from agent";
+                    throw new CloudRuntimeException("Could not freeze VM [" + 
userVm.getInstanceName() +
+                            "] for ONTAP snapshot. Ensure qemu-guest-agent is 
installed and running. Details: " + detail);
+                }
+
+                logger.info("takeVMSnapshot: VM [{}] frozen successfully via 
QEMU guest agent", userVm.getInstanceName());
+            } else {
+                logger.info("takeVMSnapshot: Skipping VM freeze for VM [{}] 
(quiesce={}, vmIsRunning={})",
+                        userVm.getInstanceName(), quiescevm, vmIsRunning);
+            }
+
+            // ── Step 2: Create FlexVolume-level snapshots ──
+            try {
+                String snapshotNameBase = buildSnapshotName(vmSnapshot);
+
+                for (Map.Entry<String, FlexVolGroupInfo> entry : 
flexVolGroups.entrySet()) {
+                    String flexVolUuid = entry.getKey();
+                    FlexVolGroupInfo groupInfo = entry.getValue();
+                    long startSnapshot = System.nanoTime();
+
+                    // Build storage strategy from pool details to get the 
feign client
+                    StorageStrategy storageStrategy = 
OntapStorageUtils.getStrategyByStoragePoolDetails(groupInfo.poolDetails);
+                    SnapshotFeignClient snapshotClient = 
storageStrategy.getSnapshotFeignClient();
+                    String authHeader = storageStrategy.getAuthHeader();
+
+                    // Use the same snapshot name for all FlexVolumes in this 
VM snapshot
+                    // (each FlexVolume gets its own independent snapshot with 
this name)
+                    FlexVolSnapshot snapshotRequest = new 
FlexVolSnapshot(snapshotNameBase,
+                            "CloudStack VM snapshot " + vmSnapshot.getName() + 
" for VM " + userVm.getInstanceName());
+
+                    logger.info("takeVMSnapshot: Creating ONTAP FlexVolume 
snapshot [{}] on FlexVol UUID [{}] covering {} volume(s)",
+                            snapshotNameBase, flexVolUuid, 
groupInfo.volumeIds.size());
+
+                    JobResponse jobResponse = 
snapshotClient.createSnapshot(authHeader, flexVolUuid, snapshotRequest);
+                    if (jobResponse == null || jobResponse.getJob() == null) {
+                        throw new CloudRuntimeException("Failed to initiate 
FlexVolume snapshot on FlexVol UUID [" + flexVolUuid + "]");
+                    }
+
+                    // Poll for job completion
+                    Boolean jobSucceeded = 
storageStrategy.jobPollForSuccess(jobResponse.getJob().getUuid(), 30, 2);
+                    if (!jobSucceeded) {
+                        throw new CloudRuntimeException("FlexVolume snapshot 
job failed on FlexVol UUID [" + flexVolUuid + "]");
+                    }
+
+                    // Retrieve the created snapshot UUID by name
+                    String snapshotUuid = resolveSnapshotUuid(snapshotClient, 
authHeader, flexVolUuid, snapshotNameBase);
+
+                    String protocol = 
groupInfo.poolDetails.get(OntapStorageConstants.PROTOCOL);
+
+                    // Create one detail per CloudStack volume in this FlexVol 
group (for single-file restore during revert)
+                    for (Long volumeId : groupInfo.volumeIds) {
+                        String volumePath = resolveVolumePathOnOntap(volumeId, 
protocol, groupInfo.poolDetails);
+                        FlexVolSnapshotDetail detail = new 
FlexVolSnapshotDetail(
+                                flexVolUuid, snapshotUuid, snapshotNameBase, 
volumePath, groupInfo.poolId, protocol);
+                        createdSnapshots.add(detail);
+                    }
+
+                    logger.info("takeVMSnapshot: ONTAP FlexVolume snapshot 
[{}] (uuid={}) on FlexVol [{}] completed in {} ms. Covers volumes: {}",
+                            snapshotNameBase, snapshotUuid, flexVolUuid,
+                            TimeUnit.MILLISECONDS.convert(System.nanoTime() - 
startSnapshot, TimeUnit.NANOSECONDS),
+                            groupInfo.volumeIds);
+                }
+            } finally {
+                // ── Step 3: Thaw the VM (only if it was frozen, always even 
on error) ──
+                if (quiescevm && freezeAnswer != null && 
freezeAnswer.getResult()) {
+                    try {
+                        thawAnswer = (FreezeThawVMAnswer) 
agentMgr.send(hostId, thawCmd);
+                        if (thawAnswer != null && thawAnswer.getResult()) {
+                            logger.info("takeVMSnapshot: VM [{}] thawed 
successfully. Total freeze duration: {} ms",
+                                    userVm.getInstanceName(),
+                                    
TimeUnit.MILLISECONDS.convert(System.nanoTime() - startFreeze, 
TimeUnit.NANOSECONDS));
+                        } else {
+                            logger.warn("takeVMSnapshot: Failed to thaw VM 
[{}]: {}", userVm.getInstanceName(),
+                                    (thawAnswer != null) ? 
thawAnswer.getDetails() : "no response");
+                        }
+                    } catch (Exception thawEx) {
+                        logger.error("takeVMSnapshot: Exception while thawing 
VM [{}]: {}", userVm.getInstanceName(), thawEx.getMessage(), thawEx);
+                    }
+                }
+            }
+
+            // ── Step 4: Persist FlexVolume snapshot details (one row per 
CloudStack volume) ──
+            for (FlexVolSnapshotDetail detail : createdSnapshots) {
+                vmSnapshotDetailsDao.persist(new VMSnapshotDetailsVO(
+                        vmSnapshot.getId(), 
OntapStorageConstants.ONTAP_FLEXVOL_SNAPSHOT, detail.toString(), true));
+            }
+
+            // ── Step 5: Finalize via parent processAnswer ──
+            CreateVMSnapshotAnswer answer = new CreateVMSnapshotAnswer(ccmd, 
true, "");
+            answer.setVolumeTOs(volumeTOs);
+
+            processAnswer(vmSnapshotVO, userVm, answer, null);
+            logger.info("takeVMSnapshot: ONTAP FlexVolume VM Snapshot [{}] 
created successfully for VM [{}] ({} FlexVol snapshot(s))",
+                    vmSnapshot.getName(), userVm.getInstanceName(), 
createdSnapshots.size());
+
+            long new_chain_size = 0;
+            for (VolumeObjectTO volumeTo : answer.getVolumeTOs()) {
+                publishUsageEvent(EventTypes.EVENT_VM_SNAPSHOT_CREATE, 
vmSnapshot, userVm, volumeTo);
+                new_chain_size += volumeTo.getSize();
+            }
+            publishUsageEvent(EventTypes.EVENT_VM_SNAPSHOT_ON_PRIMARY, 
vmSnapshot, userVm,
+                    new_chain_size - prev_chain_size, virtual_size);
+
+            result = true;
+            return vmSnapshot;
+
+        } catch (OperationTimedoutException e) {
+            logger.error("takeVMSnapshot: ONTAP VM Snapshot [{}] timed out: 
{}", vmSnapshot.getName(), e.getMessage());
+            throw new CloudRuntimeException("Creating Instance Snapshot: " + 
vmSnapshot.getName() + " timed out: " + e.getMessage());
+        } catch (AgentUnavailableException e) {
+            logger.error("takeVMSnapshot: ONTAP VM Snapshot [{}] failed, agent 
unavailable: {}", vmSnapshot.getName(), e.getMessage());
+            throw new CloudRuntimeException("Creating Instance Snapshot: " + 
vmSnapshot.getName() + " failed: " + e.getMessage());
+        } catch (CloudRuntimeException e) {
+            throw e;
+        } finally {
+            if (!result) {
+                // Rollback all FlexVolume snapshots created so far 
(deduplicate by FlexVol+Snapshot)
+                Map<String, Boolean> rolledBack = new HashMap<>();
+                for (FlexVolSnapshotDetail detail : createdSnapshots) {
+                    String dedupeKey = detail.flexVolUuid + "::" + 
detail.snapshotUuid;
+                    if (!rolledBack.containsKey(dedupeKey)) {
+                        try {
+                            rollbackFlexVolSnapshot(detail);
+                            rolledBack.put(dedupeKey, Boolean.TRUE);
+                        } catch (Exception rollbackEx) {
+                            logger.error("takeVMSnapshot: Failed to rollback 
FlexVol snapshot [{}] on FlexVol [{}]: {}",
+                                    detail.snapshotUuid, detail.flexVolUuid, 
rollbackEx.getMessage());
+                        }
+                    }
+                }
+
+                // Ensure VM is thawed if we haven't done so
+                if (thawAnswer == null && freezeAnswer != null && 
freezeAnswer.getResult()) {
+                    try {
+                        logger.info("takeVMSnapshot: Thawing VM [{}] during 
error cleanup", userVm.getInstanceName());
+                        thawAnswer = (FreezeThawVMAnswer) 
agentMgr.send(hostId, thawCmd);
+                    } catch (Exception ex) {
+                        logger.error("takeVMSnapshot: Could not thaw VM during 
cleanup: {}", ex.getMessage());
+                    }
+                }
+
+                // Clean up VM snapshot details and transition state
+                try {
+                    List<VMSnapshotDetailsVO> vmSnapshotDetails = 
vmSnapshotDetailsDao.listDetails(vmSnapshot.getId());
+                    for (VMSnapshotDetailsVO detail : vmSnapshotDetails) {
+                        if 
(OntapStorageConstants.ONTAP_FLEXVOL_SNAPSHOT.equals(detail.getName())) {
+                            vmSnapshotDetailsDao.remove(detail.getId());
+                        }
+                    }
+                    vmSnapshotHelper.vmSnapshotStateTransitTo(vmSnapshot, 
VMSnapshot.Event.OperationFailed);
+                } catch (NoTransitionException e1) {
+                    logger.error("takeVMSnapshot: Cannot set VM Snapshot state 
to OperationFailed: {}", e1.getMessage());
+                }
+            }
+        }
+    }
+
+    // 
──────────────────────────────────────────────────────────────────────────
+    // Delete VM Snapshot
+    // 
──────────────────────────────────────────────────────────────────────────
+
+    @Override
+    public boolean deleteVMSnapshot(VMSnapshot vmSnapshot) {
+        VMSnapshotVO vmSnapshotVO = (VMSnapshotVO) vmSnapshot;
+        UserVm userVm = userVmDao.findById(vmSnapshot.getVmId());
+
+        try {
+            vmSnapshotHelper.vmSnapshotStateTransitTo(vmSnapshotVO, 
VMSnapshot.Event.ExpungeRequested);
+        } catch (NoTransitionException e) {
+            throw new CloudRuntimeException(e.getMessage());
+        }
+
+        try {
+            List<VolumeObjectTO> volumeTOs = 
vmSnapshotHelper.getVolumeTOList(userVm.getId());
+            String vmInstanceName = userVm.getInstanceName();
+            VMSnapshotTO parent = 
vmSnapshotHelper.getSnapshotWithParents(vmSnapshotVO).getParent();
+
+            VMSnapshotTO vmSnapshotTO = new VMSnapshotTO(vmSnapshotVO.getId(), 
vmSnapshotVO.getName(), vmSnapshotVO.getType(),
+                    vmSnapshotVO.getCreated().getTime(), 
vmSnapshotVO.getDescription(), vmSnapshotVO.getCurrent(), parent, true);
+            GuestOSVO guestOS = guestOSDao.findById(userVm.getGuestOSId());
+            DeleteVMSnapshotCommand deleteSnapshotCommand = new 
DeleteVMSnapshotCommand(vmInstanceName, vmSnapshotTO,
+                    volumeTOs, guestOS.getDisplayName());
+
+            // Check for FlexVolume snapshots (new approach)
+            List<VMSnapshotDetailsVO> flexVolDetails = 
vmSnapshotDetailsDao.findDetails(vmSnapshot.getId(), 
OntapStorageConstants.ONTAP_FLEXVOL_SNAPSHOT);
+            if (CollectionUtils.isNotEmpty(flexVolDetails)) {
+                deleteFlexVolSnapshots(flexVolDetails);
+            }
+
+            // Also handle legacy STORAGE_SNAPSHOT details (backward 
compatibility)
+            List<VMSnapshotDetailsVO> legacyDetails = 
vmSnapshotDetailsDao.findDetails(vmSnapshot.getId(), STORAGE_SNAPSHOT);
+            if (CollectionUtils.isNotEmpty(legacyDetails)) {
+                deleteDiskSnapshot(vmSnapshot);
+            }
+
+            processAnswer(vmSnapshotVO, userVm, new 
DeleteVMSnapshotAnswer(deleteSnapshotCommand, volumeTOs), null);
+            long full_chain_size = 0;
+            for (VolumeObjectTO volumeTo : volumeTOs) {
+                publishUsageEvent(EventTypes.EVENT_VM_SNAPSHOT_DELETE, 
vmSnapshot, userVm, volumeTo);
+                full_chain_size += volumeTo.getSize();
+            }
+            publishUsageEvent(EventTypes.EVENT_VM_SNAPSHOT_OFF_PRIMARY, 
vmSnapshot, userVm, full_chain_size, 0L);
+            return true;
+        } catch (CloudRuntimeException err) {
+            String errMsg = String.format("Delete of ONTAP VM Snapshot [%s] of 
VM [%s] failed: %s",
+                    vmSnapshot.getName(), userVm.getInstanceName(), 
err.getMessage());
+            logger.error(errMsg, err);
+            throw new CloudRuntimeException(errMsg, err);
+        }
+    }
+
+    // 
──────────────────────────────────────────────────────────────────────────
+    // Revert VM Snapshot
+    // 
──────────────────────────────────────────────────────────────────────────
+
+    @Override
+    public boolean revertVMSnapshot(VMSnapshot vmSnapshot) {
+        VMSnapshotVO vmSnapshotVO = (VMSnapshotVO) vmSnapshot;
+        UserVm userVm = userVmDao.findById(vmSnapshot.getVmId());
+
+        try {
+            vmSnapshotHelper.vmSnapshotStateTransitTo(vmSnapshotVO, 
VMSnapshot.Event.RevertRequested);
+        } catch (NoTransitionException e) {
+            throw new CloudRuntimeException(e.getMessage());
+        }
+
+        boolean result = false;
+        try {
+            List<VolumeObjectTO> volumeTOs = 
vmSnapshotHelper.getVolumeTOList(userVm.getId());
+            String vmInstanceName = userVm.getInstanceName();
+            VMSnapshotTO parent = 
vmSnapshotHelper.getSnapshotWithParents(vmSnapshotVO).getParent();
+
+            VMSnapshotTO vmSnapshotTO = new VMSnapshotTO(vmSnapshotVO.getId(), 
vmSnapshotVO.getName(), vmSnapshotVO.getType(),
+                    vmSnapshotVO.getCreated().getTime(), 
vmSnapshotVO.getDescription(), vmSnapshotVO.getCurrent(), parent, true);
+            GuestOSVO guestOS = guestOSDao.findById(userVm.getGuestOSId());
+            RevertToVMSnapshotCommand revertToSnapshotCommand = new 
RevertToVMSnapshotCommand(vmInstanceName,
+                    userVm.getUuid(), vmSnapshotTO, volumeTOs, 
guestOS.getDisplayName());
+
+            // Check for FlexVolume snapshots (new approach)
+            List<VMSnapshotDetailsVO> flexVolDetails = 
vmSnapshotDetailsDao.findDetails(vmSnapshot.getId(), 
OntapStorageConstants.ONTAP_FLEXVOL_SNAPSHOT);
+            if (CollectionUtils.isNotEmpty(flexVolDetails)) {
+                revertFlexVolSnapshots(flexVolDetails);
+            }
+
+            // Also handle legacy STORAGE_SNAPSHOT details (backward 
compatibility)
+            List<VMSnapshotDetailsVO> legacyDetails = 
vmSnapshotDetailsDao.findDetails(vmSnapshot.getId(), STORAGE_SNAPSHOT);
+            if (CollectionUtils.isNotEmpty(legacyDetails)) {
+                revertDiskSnapshot(vmSnapshot);
+            }
+
+            RevertToVMSnapshotAnswer answer = new 
RevertToVMSnapshotAnswer(revertToSnapshotCommand, true, "");
+            answer.setVolumeTOs(volumeTOs);
+            processAnswer(vmSnapshotVO, userVm, answer, null);
+            result = true;
+        } catch (CloudRuntimeException e) {
+            logger.error("revertVMSnapshot: Revert ONTAP VM Snapshot [{}] 
failed: {}", vmSnapshot.getName(), e.getMessage(), e);
+            throw new CloudRuntimeException("Revert ONTAP VM Snapshot ["+ 
vmSnapshot.getName() +"] failed.");
+        } finally {
+            if (!result) {
+                try {
+                    vmSnapshotHelper.vmSnapshotStateTransitTo(vmSnapshot, 
VMSnapshot.Event.OperationFailed);
+                } catch (NoTransitionException e1) {
+                    logger.error("Cannot set Instance Snapshot state due to: " 
+ e1.getMessage());
+                }
+            }
+        }
+        return result;
+    }
+
+    // 
──────────────────────────────────────────────────────────────────────────
+    // FlexVolume Snapshot Helpers
+    // 
──────────────────────────────────────────────────────────────────────────
+
+    /**
+     * Groups volumes by their parent FlexVolume UUID using storage pool 
details.
+     *
+     * @param volumeTOs list of volume transfer objects
+     * @return map of FlexVolume UUID → group info (pool details, pool ID, 
volume IDs)
+     */
+    Map<String, FlexVolGroupInfo> groupVolumesByFlexVol(List<VolumeObjectTO> 
volumeTOs) {
+        Map<String, FlexVolGroupInfo> groups = new HashMap<>();
+
+        for (VolumeObjectTO volumeTO : volumeTOs) {
+            VolumeVO volumeVO = volumeDao.findById(volumeTO.getId());
+            if (volumeVO == null || volumeVO.getPoolId() == null) {
+                throw new CloudRuntimeException("Volume [" + volumeTO.getId() 
+ "] not found or has no pool assigned");
+            }
+
+            Map<String, String> poolDetails = 
storagePoolDetailsDao.listDetailsKeyPairs(volumeVO.getPoolId());
+            String flexVolUuid = 
poolDetails.get(OntapStorageConstants.VOLUME_UUID);
+            if (flexVolUuid == null || flexVolUuid.isEmpty()) {
+                throw new CloudRuntimeException("FlexVolume UUID not found in 
pool details for pool [" + volumeVO.getPoolId() + "]");
+            }
+
+            FlexVolGroupInfo group = groups.get(flexVolUuid);
+            if (group == null) {
+                group = new FlexVolGroupInfo(poolDetails, 
volumeVO.getPoolId());
+                groups.put(flexVolUuid, group);
+            }
+            group.volumeIds.add(volumeVO.getId());
+        }
+
+        return groups;
+    }
+
+    /**
+     * Builds a deterministic, ONTAP-safe snapshot name for a VM snapshot.
+     * Format: {@code vmsnap_<vmSnapshotId>_<timestamp>}
+     */
+    String buildSnapshotName(VMSnapshot vmSnapshot) {
+        String name = "vmsnap_" + vmSnapshot.getId() + "_" + 
System.currentTimeMillis();
+        // ONTAP snapshot names: max 256 chars, must start with letter, only 
alphanumeric and underscores
+        if (name.length() > OntapStorageConstants.MAX_SNAPSHOT_NAME_LENGTH) {

Review Comment:
   > This code has been refactored, it is not available any more
   
   It is available and called by `OntapVMSnapshotStrategy#takeVMSnapshot` in 
the latest commit



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


Reply via email to