[ 
https://issues.apache.org/jira/browse/IGNITE-16194?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
 ]

Pavel Pereslegin updated IGNITE-16194:
--------------------------------------
    Description: 
Snapshot restore operation fails if any baseline node doesn't contain metadata 
for the specified snapshot. Current tests do not reproduce this problem because 
they share the same working folder for the snapshots. 

Reproducer (uses dedicated work dir):
{code:java}
public class IgniteSnapshotRestoreWithNewNodeTest extends 
AbstractSnapshotSelfTest {
    private static final String DEDICATED_CLUSTER_PREFIX = "tmp-cluster-";

    @Test
    public void testRestoreOnNewTopologyWithDedicatedSnapshotLocation() throws 
Exception {
        String workDir = U.defaultWorkDirectory();

        IgniteEx ignite = startGridsWithCache(2, CACHE_KEYS_RANGE, valBuilder,
            (id, cfg) -> Paths.get(workDir, DEDICATED_CLUSTER_PREFIX + 
U.maskForFileName(cfg.getIgniteInstanceName())).toString(), dfltCacheCfg);

        ignite.snapshot().createSnapshot(SNAPSHOT_NAME).get(TIMEOUT);

        ignite.destroyCache(DEFAULT_CACHE_NAME);
        awaitPartitionMapExchange();

        // Start new node with an empty snapshots work directory.
        
startGrid(optimize(getConfiguration(getTestIgniteInstanceName(2)).setCacheConfiguration()));
        resetBaselineTopology();

        ignite.snapshot().restoreSnapshot(SNAPSHOT_NAME, null).get(TIMEOUT);

        for (Ignite grid : G.allGrids())
            assertCacheKeys(grid.cache(DEFAULT_CACHE_NAME), CACHE_KEYS_RANGE);
    }

    @Parameterized.Parameters(name = "Encryption is disabled")
    public static Iterable<Boolean> disabledEncryption() {
        return Collections.singletonList(false);
    }

    /** {@inheritDoc} */
    @After
    @Override public void afterTestSnapshot() throws Exception {
        super.afterTestSnapshot();

        try (DirectoryStream<Path> ds = 
Files.newDirectoryStream(Paths.get(U.defaultWorkDirectory()),
            path -> Files.isDirectory(path) && 
path.getFileName().toString().toLowerCase().startsWith(DEDICATED_CLUSTER_PREFIX))
        ) {
            for (Path dir : ds)
                U.delete(dir);
        }
    }
}
{code}

Log output
{noformat}
class org.apache.ignite.compute.ComputeUserUndeclaredException: Failed to 
reduce job results due to undeclared user exception 
[task=org.apache.ignite.internal.processors.cache.persistence.snapshot.SnapshotMetadataCollectorTask@4bb91e74,
 err=class 
org.apache.ignite.internal.processors.cache.persistence.snapshot.IgniteSnapshotVerifyException:
 /home/user/ignite/source/work/snapshots/testSnapshot]

        at 
org.apache.ignite.internal.processors.task.GridTaskWorker.reduce(GridTaskWorker.java:1188)
        at 
org.apache.ignite.internal.processors.task.GridTaskWorker.onResponse(GridTaskWorker.java:976)
        at 
org.apache.ignite.internal.processors.task.GridTaskProcessor.processJobExecuteResponse(GridTaskProcessor.java:1155)
        at 
org.apache.ignite.internal.processors.task.GridTaskProcessor$JobMessageListener.onMessage(GridTaskProcessor.java:1390)
        at 
org.apache.ignite.internal.managers.communication.GridIoManager.invokeListener(GridIoManager.java:1907)
        at 
org.apache.ignite.internal.managers.communication.GridIoManager.processRegularMessage0(GridIoManager.java:1528)
        at 
org.apache.ignite.internal.managers.communication.GridIoManager.access$5300(GridIoManager.java:242)
        at 
org.apache.ignite.internal.managers.communication.GridIoManager$9.execute(GridIoManager.java:1421)
        at 
org.apache.ignite.internal.managers.communication.TraceRunnable.run(TraceRunnable.java:55)
        at 
java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
        at 
java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
        at java.lang.Thread.run(Thread.java:748)
Caused by: class 
org.apache.ignite.internal.processors.cache.persistence.snapshot.IgniteSnapshotVerifyException:
 /home/user/ignite/source/work/snapshots/testSnapshot
        at 
org.apache.ignite.internal.processors.cache.persistence.snapshot.SnapshotMetadataCollectorTask.reduce(SnapshotMetadataCollectorTask.java:105)
        at 
org.apache.ignite.internal.processors.cache.persistence.snapshot.SnapshotMetadataCollectorTask.reduce(SnapshotMetadataCollectorTask.java:39)
        at 
org.apache.ignite.internal.processors.task.GridTaskWorker$6.call(GridTaskWorker.java:1149)
        at 
org.apache.ignite.internal.util.IgniteUtils.wrapThreadLoader(IgniteUtils.java:7276)
        at 
org.apache.ignite.internal.processors.task.GridTaskWorker.reduce(GridTaskWorker.java:1147)
        ... 11 more
Caused by: class org.apache.ignite.IgniteException: 
/home/user/ignite/source/work/snapshots/testSnapshot
        at 
org.apache.ignite.internal.processors.cache.persistence.snapshot.IgniteSnapshotManager.readSnapshotMetadatas(IgniteSnapshotManager.java:1253)
        at 
org.apache.ignite.internal.processors.cache.persistence.snapshot.SnapshotMetadataCollectorTask$1.execute(SnapshotMetadataCollectorTask.java:59)
        at 
org.apache.ignite.internal.processors.cache.persistence.snapshot.SnapshotMetadataCollectorTask$1.execute(SnapshotMetadataCollectorTask.java:53)
        at 
org.apache.ignite.internal.processors.job.GridJobWorker$2.call(GridJobWorker.java:601)
        at 
org.apache.ignite.internal.util.IgniteUtils.wrapThreadLoader(IgniteUtils.java:7276)
        at 
org.apache.ignite.internal.processors.job.GridJobWorker.execute0(GridJobWorker.java:595)
        at 
org.apache.ignite.internal.processors.job.GridJobWorker.body(GridJobWorker.java:522)
        at 
org.apache.ignite.internal.util.worker.GridWorker.run(GridWorker.java:125)
        at 
org.apache.ignite.internal.processors.job.GridJobProcessor.processJobExecuteRequest(GridJobProcessor.java:1305)
        at 
org.apache.ignite.internal.processors.job.GridJobProcessor$JobExecutionListener.onMessage(GridJobProcessor.java:2155)
        ... 8 more
Caused by: java.nio.file.NoSuchFileException: 
/home/user/ignite/source/work/snapshots/testSnapshot
        at 
sun.nio.fs.UnixException.translateToIOException(UnixException.java:86)
        at sun.nio.fs.UnixException.rethrowAsIOException(UnixException.java:102)
        at sun.nio.fs.UnixException.rethrowAsIOException(UnixException.java:107)
        at 
sun.nio.fs.UnixFileSystemProvider.newDirectoryStream(UnixFileSystemProvider.java:427)
        at java.nio.file.Files.newDirectoryStream(Files.java:457)
        at 
org.apache.ignite.internal.processors.cache.persistence.snapshot.IgniteSnapshotManager.readSnapshotMetadatas(IgniteSnapshotManager.java:1246)
        ... 17 more
{noformat}



  was:
Snapshot restore operation fails if any baseline node doesn't contain metadata 
for the specified snapshot. Current tests do not reproduce this problem because 
they share the same working folder for the snapshots. 

Reproducer (uses dedicated work dir):
{code:java}
public class IgniteSnapshotRestoreWithNewNodeTest extends 
AbstractSnapshotSelfTest {
    private static final String DEDICATED_CLUSTER_PREFIX = "tmp-cluster-";

    @Test
    public void testRestoreOnNewTopologyWithDedicatedSnapshotLocation() throws 
Exception {
        String workDir = U.defaultWorkDirectory();

        IgniteEx ignite = startGridsWithCache(2, CACHE_KEYS_RANGE, valBuilder,
            (id, cfg) -> Paths.get(workDir, DEDICATED_CLUSTER_PREFIX + 
U.maskForFileName(cfg.getIgniteInstanceName())).toString(), dfltCacheCfg);

        ignite.snapshot().createSnapshot(SNAPSHOT_NAME).get(TIMEOUT);

        ignite.destroyCache(DEFAULT_CACHE_NAME);
        awaitPartitionMapExchange();

        // Start new node with an empty snapshots work directory.
        
startGrid(optimize(getConfiguration(getTestIgniteInstanceName(2)).setCacheConfiguration()));
        resetBaselineTopology();

        ignite.snapshot().restoreSnapshot(SNAPSHOT_NAME, null).get(TIMEOUT);

        for (Ignite grid : G.allGrids())
            assertCacheKeys(grid.cache(DEFAULT_CACHE_NAME), CACHE_KEYS_RANGE);
    }

    @Parameterized.Parameters(name = "Encryption is disabled")
    public static Iterable<Boolean> disabledEncryption() {
        return Collections.singletonList(false);
    }

    /** {@inheritDoc} */
    @After
    @Override public void afterTestSnapshot() throws Exception {
        super.afterTestSnapshot();

        try (DirectoryStream<Path> ds = 
Files.newDirectoryStream(Paths.get(U.defaultWorkDirectory()),
            path -> Files.isDirectory(path) && 
path.getFileName().toString().toLowerCase().startsWith(DEDICATED_CLUSTER_PREFIX))
        ) {
            for (Path dir : ds)
                U.delete(dir);
        }
    }
}
{code}

Log output
{noformat}
class org.apache.ignite.compute.ComputeUserUndeclaredException: Failed to 
reduce job results due to undeclared user exception 
[task=org.apache.ignite.internal.processors.cache.persistence.snapshot.SnapshotMetadataCollectorTask@4bb91e74,
 err=class 
org.apache.ignite.internal.processors.cache.persistence.snapshot.IgniteSnapshotVerifyException:
 /home/xtern/src/java/ignite/source/work/snapshots/testSnapshot]

        at 
org.apache.ignite.internal.processors.task.GridTaskWorker.reduce(GridTaskWorker.java:1188)
        at 
org.apache.ignite.internal.processors.task.GridTaskWorker.onResponse(GridTaskWorker.java:976)
        at 
org.apache.ignite.internal.processors.task.GridTaskProcessor.processJobExecuteResponse(GridTaskProcessor.java:1155)
        at 
org.apache.ignite.internal.processors.task.GridTaskProcessor$JobMessageListener.onMessage(GridTaskProcessor.java:1390)
        at 
org.apache.ignite.internal.managers.communication.GridIoManager.invokeListener(GridIoManager.java:1907)
        at 
org.apache.ignite.internal.managers.communication.GridIoManager.processRegularMessage0(GridIoManager.java:1528)
        at 
org.apache.ignite.internal.managers.communication.GridIoManager.access$5300(GridIoManager.java:242)
        at 
org.apache.ignite.internal.managers.communication.GridIoManager$9.execute(GridIoManager.java:1421)
        at 
org.apache.ignite.internal.managers.communication.TraceRunnable.run(TraceRunnable.java:55)
        at 
java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
        at 
java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
        at java.lang.Thread.run(Thread.java:748)
Caused by: class 
org.apache.ignite.internal.processors.cache.persistence.snapshot.IgniteSnapshotVerifyException:
 /home/xtern/src/java/ignite/source/work/snapshots/testSnapshot
        at 
org.apache.ignite.internal.processors.cache.persistence.snapshot.SnapshotMetadataCollectorTask.reduce(SnapshotMetadataCollectorTask.java:105)
        at 
org.apache.ignite.internal.processors.cache.persistence.snapshot.SnapshotMetadataCollectorTask.reduce(SnapshotMetadataCollectorTask.java:39)
        at 
org.apache.ignite.internal.processors.task.GridTaskWorker$6.call(GridTaskWorker.java:1149)
        at 
org.apache.ignite.internal.util.IgniteUtils.wrapThreadLoader(IgniteUtils.java:7276)
        at 
org.apache.ignite.internal.processors.task.GridTaskWorker.reduce(GridTaskWorker.java:1147)
        ... 11 more
Caused by: class org.apache.ignite.IgniteException: 
/home/xtern/src/java/ignite/source/work/snapshots/testSnapshot
        at 
org.apache.ignite.internal.processors.cache.persistence.snapshot.IgniteSnapshotManager.readSnapshotMetadatas(IgniteSnapshotManager.java:1253)
        at 
org.apache.ignite.internal.processors.cache.persistence.snapshot.SnapshotMetadataCollectorTask$1.execute(SnapshotMetadataCollectorTask.java:59)
        at 
org.apache.ignite.internal.processors.cache.persistence.snapshot.SnapshotMetadataCollectorTask$1.execute(SnapshotMetadataCollectorTask.java:53)
        at 
org.apache.ignite.internal.processors.job.GridJobWorker$2.call(GridJobWorker.java:601)
        at 
org.apache.ignite.internal.util.IgniteUtils.wrapThreadLoader(IgniteUtils.java:7276)
        at 
org.apache.ignite.internal.processors.job.GridJobWorker.execute0(GridJobWorker.java:595)
        at 
org.apache.ignite.internal.processors.job.GridJobWorker.body(GridJobWorker.java:522)
        at 
org.apache.ignite.internal.util.worker.GridWorker.run(GridWorker.java:125)
        at 
org.apache.ignite.internal.processors.job.GridJobProcessor.processJobExecuteRequest(GridJobProcessor.java:1305)
        at 
org.apache.ignite.internal.processors.job.GridJobProcessor$JobExecutionListener.onMessage(GridJobProcessor.java:2155)
        ... 8 more
Caused by: java.nio.file.NoSuchFileException: 
/home/xtern/src/java/ignite/source/work/snapshots/testSnapshot
        at 
sun.nio.fs.UnixException.translateToIOException(UnixException.java:86)
        at sun.nio.fs.UnixException.rethrowAsIOException(UnixException.java:102)
        at sun.nio.fs.UnixException.rethrowAsIOException(UnixException.java:107)
        at 
sun.nio.fs.UnixFileSystemProvider.newDirectoryStream(UnixFileSystemProvider.java:427)
        at java.nio.file.Files.newDirectoryStream(Files.java:457)
        at 
org.apache.ignite.internal.processors.cache.persistence.snapshot.IgniteSnapshotManager.readSnapshotMetadatas(IgniteSnapshotManager.java:1246)
        ... 17 more
{noformat}




> Snapshot restore operation fails if any baseline node doesn't contain 
> metadata for the specified snapshot.
> ----------------------------------------------------------------------------------------------------------
>
>                 Key: IGNITE-16194
>                 URL: https://issues.apache.org/jira/browse/IGNITE-16194
>             Project: Ignite
>          Issue Type: Bug
>    Affects Versions: 2.11, 2.12
>            Reporter: Pavel Pereslegin
>            Priority: Critical
>
> Snapshot restore operation fails if any baseline node doesn't contain 
> metadata for the specified snapshot. Current tests do not reproduce this 
> problem because they share the same working folder for the snapshots. 
> Reproducer (uses dedicated work dir):
> {code:java}
> public class IgniteSnapshotRestoreWithNewNodeTest extends 
> AbstractSnapshotSelfTest {
>     private static final String DEDICATED_CLUSTER_PREFIX = "tmp-cluster-";
>     @Test
>     public void testRestoreOnNewTopologyWithDedicatedSnapshotLocation() 
> throws Exception {
>         String workDir = U.defaultWorkDirectory();
>         IgniteEx ignite = startGridsWithCache(2, CACHE_KEYS_RANGE, valBuilder,
>             (id, cfg) -> Paths.get(workDir, DEDICATED_CLUSTER_PREFIX + 
> U.maskForFileName(cfg.getIgniteInstanceName())).toString(), dfltCacheCfg);
>         ignite.snapshot().createSnapshot(SNAPSHOT_NAME).get(TIMEOUT);
>         ignite.destroyCache(DEFAULT_CACHE_NAME);
>         awaitPartitionMapExchange();
>         // Start new node with an empty snapshots work directory.
>         
> startGrid(optimize(getConfiguration(getTestIgniteInstanceName(2)).setCacheConfiguration()));
>         resetBaselineTopology();
>         ignite.snapshot().restoreSnapshot(SNAPSHOT_NAME, null).get(TIMEOUT);
>         for (Ignite grid : G.allGrids())
>             assertCacheKeys(grid.cache(DEFAULT_CACHE_NAME), CACHE_KEYS_RANGE);
>     }
>     @Parameterized.Parameters(name = "Encryption is disabled")
>     public static Iterable<Boolean> disabledEncryption() {
>         return Collections.singletonList(false);
>     }
>     /** {@inheritDoc} */
>     @After
>     @Override public void afterTestSnapshot() throws Exception {
>         super.afterTestSnapshot();
>         try (DirectoryStream<Path> ds = 
> Files.newDirectoryStream(Paths.get(U.defaultWorkDirectory()),
>             path -> Files.isDirectory(path) && 
> path.getFileName().toString().toLowerCase().startsWith(DEDICATED_CLUSTER_PREFIX))
>         ) {
>             for (Path dir : ds)
>                 U.delete(dir);
>         }
>     }
> }
> {code}
> Log output
> {noformat}
> class org.apache.ignite.compute.ComputeUserUndeclaredException: Failed to 
> reduce job results due to undeclared user exception 
> [task=org.apache.ignite.internal.processors.cache.persistence.snapshot.SnapshotMetadataCollectorTask@4bb91e74,
>  err=class 
> org.apache.ignite.internal.processors.cache.persistence.snapshot.IgniteSnapshotVerifyException:
>  /home/user/ignite/source/work/snapshots/testSnapshot]
>       at 
> org.apache.ignite.internal.processors.task.GridTaskWorker.reduce(GridTaskWorker.java:1188)
>       at 
> org.apache.ignite.internal.processors.task.GridTaskWorker.onResponse(GridTaskWorker.java:976)
>       at 
> org.apache.ignite.internal.processors.task.GridTaskProcessor.processJobExecuteResponse(GridTaskProcessor.java:1155)
>       at 
> org.apache.ignite.internal.processors.task.GridTaskProcessor$JobMessageListener.onMessage(GridTaskProcessor.java:1390)
>       at 
> org.apache.ignite.internal.managers.communication.GridIoManager.invokeListener(GridIoManager.java:1907)
>       at 
> org.apache.ignite.internal.managers.communication.GridIoManager.processRegularMessage0(GridIoManager.java:1528)
>       at 
> org.apache.ignite.internal.managers.communication.GridIoManager.access$5300(GridIoManager.java:242)
>       at 
> org.apache.ignite.internal.managers.communication.GridIoManager$9.execute(GridIoManager.java:1421)
>       at 
> org.apache.ignite.internal.managers.communication.TraceRunnable.run(TraceRunnable.java:55)
>       at 
> java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
>       at 
> java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
>       at java.lang.Thread.run(Thread.java:748)
> Caused by: class 
> org.apache.ignite.internal.processors.cache.persistence.snapshot.IgniteSnapshotVerifyException:
>  /home/user/ignite/source/work/snapshots/testSnapshot
>       at 
> org.apache.ignite.internal.processors.cache.persistence.snapshot.SnapshotMetadataCollectorTask.reduce(SnapshotMetadataCollectorTask.java:105)
>       at 
> org.apache.ignite.internal.processors.cache.persistence.snapshot.SnapshotMetadataCollectorTask.reduce(SnapshotMetadataCollectorTask.java:39)
>       at 
> org.apache.ignite.internal.processors.task.GridTaskWorker$6.call(GridTaskWorker.java:1149)
>       at 
> org.apache.ignite.internal.util.IgniteUtils.wrapThreadLoader(IgniteUtils.java:7276)
>       at 
> org.apache.ignite.internal.processors.task.GridTaskWorker.reduce(GridTaskWorker.java:1147)
>       ... 11 more
> Caused by: class org.apache.ignite.IgniteException: 
> /home/user/ignite/source/work/snapshots/testSnapshot
>       at 
> org.apache.ignite.internal.processors.cache.persistence.snapshot.IgniteSnapshotManager.readSnapshotMetadatas(IgniteSnapshotManager.java:1253)
>       at 
> org.apache.ignite.internal.processors.cache.persistence.snapshot.SnapshotMetadataCollectorTask$1.execute(SnapshotMetadataCollectorTask.java:59)
>       at 
> org.apache.ignite.internal.processors.cache.persistence.snapshot.SnapshotMetadataCollectorTask$1.execute(SnapshotMetadataCollectorTask.java:53)
>       at 
> org.apache.ignite.internal.processors.job.GridJobWorker$2.call(GridJobWorker.java:601)
>       at 
> org.apache.ignite.internal.util.IgniteUtils.wrapThreadLoader(IgniteUtils.java:7276)
>       at 
> org.apache.ignite.internal.processors.job.GridJobWorker.execute0(GridJobWorker.java:595)
>       at 
> org.apache.ignite.internal.processors.job.GridJobWorker.body(GridJobWorker.java:522)
>       at 
> org.apache.ignite.internal.util.worker.GridWorker.run(GridWorker.java:125)
>       at 
> org.apache.ignite.internal.processors.job.GridJobProcessor.processJobExecuteRequest(GridJobProcessor.java:1305)
>       at 
> org.apache.ignite.internal.processors.job.GridJobProcessor$JobExecutionListener.onMessage(GridJobProcessor.java:2155)
>       ... 8 more
> Caused by: java.nio.file.NoSuchFileException: 
> /home/user/ignite/source/work/snapshots/testSnapshot
>       at 
> sun.nio.fs.UnixException.translateToIOException(UnixException.java:86)
>       at sun.nio.fs.UnixException.rethrowAsIOException(UnixException.java:102)
>       at sun.nio.fs.UnixException.rethrowAsIOException(UnixException.java:107)
>       at 
> sun.nio.fs.UnixFileSystemProvider.newDirectoryStream(UnixFileSystemProvider.java:427)
>       at java.nio.file.Files.newDirectoryStream(Files.java:457)
>       at 
> org.apache.ignite.internal.processors.cache.persistence.snapshot.IgniteSnapshotManager.readSnapshotMetadatas(IgniteSnapshotManager.java:1246)
>       ... 17 more
> {noformat}



--
This message was sent by Atlassian Jira
(v8.20.1#820001)

Reply via email to