This is an automated email from the ASF dual-hosted git repository.
sergey-chugunov-1985 pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/ignite.git
The following commit(s) were added to refs/heads/master by this push:
new ea5b175c488 IGNITE-28242 Rearrange cp read lock aquisition in
GridCacheMapEntry#unswap to ensure correct lock ordering (#12972)
ea5b175c488 is described below
commit ea5b175c488b9c31b22173654a1043717f99590d
Author: Sergey Chugunov <[email protected]>
AuthorDate: Wed May 13 12:26:26 2026 +0400
IGNITE-28242 Rearrange cp read lock aquisition in GridCacheMapEntry#unswap
to ensure correct lock ordering (#12972)
---
.../processors/cache/GridCacheMapEntry.java | 21 ++-
.../db/TxPutTxGetCheckpointerDeadlockTest.java | 171 +++++++++++++++++++++
.../datastreamer/DataStreamProcessorSelfTest.java | 12 +-
.../ignite/testsuites/IgnitePdsTestSuite7.java | 3 +
4 files changed, 199 insertions(+), 8 deletions(-)
diff --git
a/modules/core/src/main/java/org/apache/ignite/internal/processors/cache/GridCacheMapEntry.java
b/modules/core/src/main/java/org/apache/ignite/internal/processors/cache/GridCacheMapEntry.java
index b3af6e7b4d9..161a25feb9d 100644
---
a/modules/core/src/main/java/org/apache/ignite/internal/processors/cache/GridCacheMapEntry.java
+++
b/modules/core/src/main/java/org/apache/ignite/internal/processors/cache/GridCacheMapEntry.java
@@ -429,7 +429,17 @@ public abstract class GridCacheMapEntry extends
GridMetadataAwareAdapter impleme
/** {@inheritDoc} */
@Override public final CacheObject unswap(CacheDataRow row) throws
IgniteCheckedException, GridCacheEntryRemovedException {
- row = unswap(row, true);
+ assert cctx.shared().database().checkpointLockIsHeldByThread() ||
!lockedByCurrentThread() :
+ "Lock order violation, checkpoint lock must be acquired before
entry lock";
+
+ cctx.shared().database().checkpointReadLock();
+
+ try {
+ row = unswap(row, true);
+ }
+ finally {
+ cctx.shared().database().checkpointReadUnlock();
+ }
return row != null ? row.value() : null;
}
@@ -437,9 +447,7 @@ public abstract class GridCacheMapEntry extends
GridMetadataAwareAdapter impleme
/** {@inheritDoc} */
@Nullable @Override public final CacheObject unswap(boolean needVal)
throws IgniteCheckedException, GridCacheEntryRemovedException {
- CacheDataRow row = unswap(null, true);
-
- return row != null ? row.value() : null;
+ return unswap(null);
}
/**
@@ -457,7 +465,8 @@ public abstract class GridCacheMapEntry extends
GridMetadataAwareAdapter impleme
boolean deferred = false;
GridCacheVersion ver0 = null;
- cctx.shared().database().checkpointReadLock();
+ assert !checkExpire ||
cctx.shared().database().checkpointLockIsHeldByThread() :
+ "Checkpoint read lock should be acquired to perform entry
expiration";
lockEntry();
@@ -494,8 +503,6 @@ public abstract class GridCacheMapEntry extends
GridMetadataAwareAdapter impleme
}
finally {
unlockEntry();
-
- cctx.shared().database().checkpointReadUnlock();
}
if (obsolete) {
diff --git
a/modules/core/src/test/java/org/apache/ignite/internal/processors/cache/persistence/db/TxPutTxGetCheckpointerDeadlockTest.java
b/modules/core/src/test/java/org/apache/ignite/internal/processors/cache/persistence/db/TxPutTxGetCheckpointerDeadlockTest.java
new file mode 100644
index 00000000000..7aeb3159c38
--- /dev/null
+++
b/modules/core/src/test/java/org/apache/ignite/internal/processors/cache/persistence/db/TxPutTxGetCheckpointerDeadlockTest.java
@@ -0,0 +1,171 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.ignite.internal.processors.cache.persistence.db;
+
+import java.util.concurrent.TimeUnit;
+import java.util.concurrent.atomic.AtomicBoolean;
+import java.util.concurrent.locks.ReentrantReadWriteLock;
+import org.apache.ignite.IgniteCache;
+import org.apache.ignite.cache.CacheAtomicityMode;
+import org.apache.ignite.cluster.ClusterState;
+import org.apache.ignite.configuration.CacheConfiguration;
+import org.apache.ignite.configuration.DataRegionConfiguration;
+import org.apache.ignite.configuration.DataStorageConfiguration;
+import org.apache.ignite.configuration.IgniteConfiguration;
+import org.apache.ignite.failure.StopNodeFailureHandler;
+import org.apache.ignite.internal.IgniteEx;
+import
org.apache.ignite.internal.processors.cache.persistence.GridCacheDatabaseSharedManager;
+import
org.apache.ignite.internal.processors.cache.persistence.IgniteCacheDatabaseSharedManager;
+import
org.apache.ignite.internal.processors.cache.persistence.checkpoint.CheckpointManager;
+import
org.apache.ignite.internal.processors.cache.persistence.checkpoint.CheckpointTimeoutLock;
+import org.apache.ignite.testframework.GridTestUtils;
+import org.apache.ignite.testframework.junits.common.GridCommonAbstractTest;
+import org.apache.ignite.transactions.Transaction;
+import org.junit.Test;
+
+import static
org.apache.ignite.transactions.TransactionConcurrency.PESSIMISTIC;
+import static
org.apache.ignite.transactions.TransactionIsolation.READ_COMMITTED;
+
+/**
+ * Test verifies there is no deadlock between GridCacheMapEntry.unswap() in tx
put operation and a checkpointer requesting cp write lock
+ * with a parallel tx get operation.
+ * <p/>
+ * Root cause of the deadlock was wrong locking order when cp readlock is
acquired under already locked GridCacheMapEntry instance
+ * by the first tx put op.
+ */
+public class TxPutTxGetCheckpointerDeadlockTest extends GridCommonAbstractTest
{
+ /** */
+ private final AtomicBoolean deadlockDetected = new AtomicBoolean(false);
+
+ /** */
+ private final AtomicBoolean testFinished = new AtomicBoolean(false);
+
+ /** {@inheritDoc} */
+ @Override protected IgniteConfiguration getConfiguration(String
igniteInstanceName) throws Exception {
+ IgniteConfiguration cfg = super.getConfiguration(igniteInstanceName);
+
+ cfg.setDataStorageConfiguration(
+ new DataStorageConfiguration()
+ .setDefaultDataRegionConfiguration(
+ new DataRegionConfiguration()
+ .setPersistenceEnabled(true)
+ )
+ );
+
+ cfg.setFailureHandler(new StopNodeFailureHandler());
+
+ return cfg;
+ }
+
+ /** {@inheritDoc} */
+ @Override protected void beforeTest() throws Exception {
+ stopAllGrids();
+
+ cleanPersistenceDir();
+ }
+
+ /** {@inheritDoc} */
+ @Override protected void afterTest() throws Exception {
+ testFinished.set(true);
+
+ stopAllGrids();
+
+ cleanPersistenceDir();
+ }
+
+ /**
+ * Tests for the absence of a deadlock between transactional cache
operations and checkpointer write lock acquisition.
+ * <p>
+ * This test simulates a scenario where:
+ * <ul>
+ * <li>One thread performs continuous transactional {@code put}
operations on a cache entry.</li>
+ * <li>Another thread performs continuous transactional {@code get}
operations on the same entry, potentially
+ * triggering unswapping of the entry under lock.</li>
+ * <li>A third thread repeatedly acquires and releases the checkpoint
write lock, simulating checkpointer activity.</li>
+ * </ul>
+ * </p>
+ * <p>
+ * The primary purpose is to verify that there is no deadlock caused by
incorrect lock ordering,
+ * specifically when a transactional {@code put} holds a lock on a {@link
org.apache.ignite.internal.processors.cache.GridCacheMapEntry}
+ * while attempting to acquire a checkpoint read lock, at the same time as
the checkpointer
+ * tries to acquire checkpoint write lock.
+ * </p>
+ *
+ * @throws Exception If failed.
+ */
+ @Test
+ public void testDeadlock() throws Exception {
+ IgniteEx ignite = startGrid(0);
+
+ ignite.cluster().state(ClusterState.ACTIVE);
+
+ IgniteCache<Object, Object> cache = ignite
+ .getOrCreateCache(new
CacheConfiguration<>("test").setAtomicityMode(CacheAtomicityMode.TRANSACTIONAL));
+
+ GridTestUtils.runAsync(
+ () -> {
+ while (!testFinished.get()) {
+ try (Transaction tx =
ignite.transactions().txStart(PESSIMISTIC, READ_COMMITTED)) {
+ cache.put(0, 1);
+ tx.commit();
+ }
+ }
+ },
+ "write-tx-runner"
+ );
+
+ GridTestUtils.runAsync(
+ () -> {
+ while (!testFinished.get()) {
+ try (Transaction tx =
ignite.transactions().txStart(PESSIMISTIC, READ_COMMITTED)) {
+ cache.get(0);
+ }
+ }
+ },
+ "read-tx-runner"
+ );
+
+ GridTestUtils.runAsync(
+ () -> {
+ IgniteCacheDatabaseSharedManager db =
ignite.context().cache().context().database();
+ CheckpointManager cpMgr =
((GridCacheDatabaseSharedManager)db).getCheckpointManager();
+ CheckpointTimeoutLock timeoutLock =
cpMgr.checkpointTimeoutLock();
+ ReentrantReadWriteLock.WriteLock cpWriteLock =
GridTestUtils.getFieldValue(timeoutLock,
+ "checkpointReadWriteLock", "checkpointLock", "writeLock");
+
+ while (!testFinished.get()) {
+ // An interruptible version of lock method is used to
allow end the deadlock at the end of the test.
+ try {
+ if (!cpWriteLock.tryLock(4, TimeUnit.SECONDS)) {
+ deadlockDetected.set(true);
+ testFinished.set(true);
+
+ return;
+ }
+ }
+ finally {
+ cpWriteLock.unlock();
+ }
+ }
+ },
+ "cp-write-lock-switching-runner"
+ );
+
+ assertFalse("Unexpected deadlock detected",
GridTestUtils.waitForCondition(deadlockDetected::get, 10_000L));
+ }
+}
diff --git
a/modules/core/src/test/java/org/apache/ignite/internal/processors/datastreamer/DataStreamProcessorSelfTest.java
b/modules/core/src/test/java/org/apache/ignite/internal/processors/datastreamer/DataStreamProcessorSelfTest.java
index ba484477194..be549b01534 100644
---
a/modules/core/src/test/java/org/apache/ignite/internal/processors/datastreamer/DataStreamProcessorSelfTest.java
+++
b/modules/core/src/test/java/org/apache/ignite/internal/processors/datastreamer/DataStreamProcessorSelfTest.java
@@ -55,6 +55,7 @@ import org.apache.ignite.internal.IgniteKernal;
import org.apache.ignite.internal.processors.cache.GridCacheAdapter;
import org.apache.ignite.internal.processors.cache.GridCacheEntryEx;
import
org.apache.ignite.internal.processors.cache.distributed.near.GridNearCacheAdapter;
+import
org.apache.ignite.internal.processors.cache.persistence.IgniteCacheDatabaseSharedManager;
import org.apache.ignite.internal.util.lang.GridAbsPredicate;
import org.apache.ignite.internal.util.typedef.G;
import org.apache.ignite.internal.util.typedef.internal.CU;
@@ -377,6 +378,8 @@ public class DataStreamProcessorSelfTest extends
GridCommonAbstractTest {
Affinity<Integer> aff = cache0.affinity();
+ IgniteCacheDatabaseSharedManager db =
grid(g).context().cache().context().database();
+
for (int key = 0; key < cnt * threads; key++) {
if (aff.isPrimary(locNode, key) || aff.isBackup(locNode,
key)) {
GridCacheEntryEx entry = cache0.entryEx(key);
@@ -394,7 +397,14 @@ public class DataStreamProcessorSelfTest extends
GridCommonAbstractTest {
entry = cache0.entryEx(key);
}
- entry.unswap();
+ db.checkpointReadLock();
+
+ try {
+ entry.unswap();
+ }
+ finally {
+ db.checkpointReadUnlock();
+ }
assertEquals(new Integer((key < 100 ? -1 : key)),
CU.value(entry.rawGet(), cache0.context(),
false));
diff --git
a/modules/core/src/test/java/org/apache/ignite/testsuites/IgnitePdsTestSuite7.java
b/modules/core/src/test/java/org/apache/ignite/testsuites/IgnitePdsTestSuite7.java
index 7cd4bb16d5b..627ad5e8fb3 100644
---
a/modules/core/src/test/java/org/apache/ignite/testsuites/IgnitePdsTestSuite7.java
+++
b/modules/core/src/test/java/org/apache/ignite/testsuites/IgnitePdsTestSuite7.java
@@ -21,6 +21,7 @@ import java.util.ArrayList;
import java.util.Collection;
import java.util.List;
import org.apache.ignite.internal.processors.cache.persistence.EagerTtlTest;
+import
org.apache.ignite.internal.processors.cache.persistence.db.TxPutTxGetCheckpointerDeadlockTest;
import
org.apache.ignite.internal.processors.cache.persistence.wal.WalRotatedIdPartRecordTest;
import org.apache.ignite.testframework.GridTestUtils;
import org.apache.ignite.testframework.junits.DynamicSuite;
@@ -47,6 +48,8 @@ public class IgnitePdsTestSuite7 {
GridTestUtils.addTestIfNeeded(suite, WalRotatedIdPartRecordTest.class,
ignoredTests);
+ GridTestUtils.addTestIfNeeded(suite,
TxPutTxGetCheckpointerDeadlockTest.class, ignoredTests);
+
return suite;
}
}