Copilot commented on code in PR #12925: URL: https://github.com/apache/ignite/pull/12925#discussion_r3248105794
########## modules/core/src/main/java/org/apache/ignite/internal/processors/cache/distributed/dht/GridDhtTxSalvageMessage.java: ########## @@ -0,0 +1,56 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.ignite.internal.processors.cache.distributed.dht; + +import org.apache.ignite.internal.Order; +import org.apache.ignite.internal.processors.cache.GridCacheMessage; +import org.apache.ignite.internal.processors.cache.version.GridCacheVersion; + +/** Salvage tx. */ +public class GridDhtTxSalvageMessage extends GridCacheMessage { + /** */ + @Order(0) + GridCacheVersion ver; + + /** Empty constructor. */ + public GridDhtTxSalvageMessage() { + // No-op. + } + + /** + * @param ver Global transaction identifier within cluster, assigned by transaction coordinator. + */ + public GridDhtTxSalvageMessage(GridCacheVersion ver) { + this.ver = ver; + } + + /** Tx version. */ + public GridCacheVersion version() { + return ver; + } + + /** {@inheritDoc} */ + @Override public boolean addDeploymentInfo() { + return addDepInfo; + } + + /** */ + @Override public short directType() { + return 119; Review Comment: This message is sent over the communication layer but it is not registered in `CoreMessagesProvider`, and the hard-coded `directType()` bypasses the factory-assigned type range used by other cache messages. Receivers will not be able to deserialize/create this message type reliably; add `GridDhtTxSalvageMessage` to the core message provider and let the normal registered direct type be used instead of returning an arbitrary constant. ########## modules/control-utility/src/test/java/org/apache/ignite/util/IdleVerifyCheckWithWriteThroughTest.java: ########## @@ -0,0 +1,414 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.ignite.util; + +import java.util.Collection; +import java.util.List; +import java.util.Map; +import java.util.UUID; +import java.util.concurrent.ConcurrentHashMap; +import java.util.concurrent.CountDownLatch; +import java.util.regex.Pattern; +import javax.cache.Cache; +import org.apache.ignite.Ignite; +import org.apache.ignite.IgniteCache; +import org.apache.ignite.cache.CacheAtomicityMode; +import org.apache.ignite.cache.CacheMode; +import org.apache.ignite.cache.CacheWriteSynchronizationMode; +import org.apache.ignite.cache.QueryEntity; +import org.apache.ignite.cache.query.SqlFieldsQuery; +import org.apache.ignite.cache.store.CacheStoreAdapter; +import org.apache.ignite.cluster.ClusterState; +import org.apache.ignite.configuration.CacheConfiguration; +import org.apache.ignite.configuration.IgniteConfiguration; +import org.apache.ignite.events.DiscoveryEvent; +import org.apache.ignite.internal.GridTopic; +import org.apache.ignite.internal.IgniteEx; +import org.apache.ignite.internal.IgniteInternalFuture; +import org.apache.ignite.internal.TestRecordingCommunicationSpi; +import org.apache.ignite.internal.managers.communication.GridMessageListener; +import org.apache.ignite.internal.managers.discovery.DiscoCache; +import org.apache.ignite.internal.managers.eventstorage.DiscoveryEventListener; +import org.apache.ignite.internal.managers.eventstorage.HighPriorityListener; +import org.apache.ignite.internal.processors.cache.distributed.near.GridNearTxPrepareResponse; +import org.apache.ignite.internal.processors.cache.transactions.IgniteInternalTx; +import org.apache.ignite.internal.processors.cache.transactions.IgniteTxManager; +import org.apache.ignite.internal.processors.cache.version.GridCacheVersion; +import org.apache.ignite.internal.util.typedef.G; +import org.apache.ignite.internal.util.typedef.internal.U; +import org.apache.ignite.lang.IgniteBiInClosure; +import org.apache.ignite.lang.IgniteCallable; +import org.apache.ignite.resources.IgniteInstanceResource; +import org.apache.ignite.testframework.GridTestUtils; +import org.apache.ignite.transactions.Transaction; +import org.apache.ignite.transactions.TransactionConcurrency; +import org.junit.Assert; +import org.junit.Test; +import org.junit.runners.Parameterized; + +import static org.apache.ignite.events.EventType.EVT_NODE_FAILED; +import static org.apache.ignite.events.EventType.EVT_NODE_LEFT; +import static org.apache.ignite.internal.commandline.CommandHandler.EXIT_CODE_OK; +import static org.apache.ignite.testframework.GridTestUtils.cartesianProduct; +import static org.apache.ignite.testframework.GridTestUtils.waitForCondition; +import static org.apache.ignite.transactions.TransactionConcurrency.OPTIMISTIC; +import static org.apache.ignite.transactions.TransactionConcurrency.PESSIMISTIC; +import static org.apache.ignite.transactions.TransactionIsolation.READ_COMMITTED; +import static org.hamcrest.CoreMatchers.anyOf; +import static org.hamcrest.CoreMatchers.containsString; +import static org.hamcrest.CoreMatchers.is; + +/** */ +public class IdleVerifyCheckWithWriteThroughTest extends GridCommandHandlerClusterPerMethodAbstractTest { + /** Node kill trigger. */ + private static CountDownLatch nodeKillLatch; + + /** Node left on backup. */ + private static CountDownLatch nodeLeftRegisteredOnBackup; + + /** */ + @Parameterized.Parameter(1) + public Boolean withPersistence; + + /** */ + @Parameterized.Parameter(2) + public TransactionConcurrency conc; + + /** */ + private static final String CORRECT_VERIFY_MSG = "The check procedure has finished, no conflicts have been found."; + + /** */ + @Parameterized.Parameters(name = "cmdHnd={0}, withPersistence={1}, concMode={2}") + public static Collection<Object[]> parameters() { + return cartesianProduct( + List.of(CLI_CMD_HND), + List.of(true, false), + List.of(OPTIMISTIC, PESSIMISTIC) + ); + } + + /** {@inheritDoc} */ + @Override protected void beforeTest() throws Exception { + super.beforeTest(); + + stopAllGrids(); + + persistenceEnable(withPersistence); + + if (withPersistence) + cleanPersistenceDir(); + + nodeKillLatch = new CountDownLatch(1); + nodeLeftRegisteredOnBackup = new CountDownLatch(1); + + MapCacheStore.salvagedLatch = new CountDownLatch(1); + MapCacheStore.txCoordStoreLatch = new CountDownLatch(2); + } + + /** {@inheritDoc} */ + @Override protected void afterTest() throws Exception { + try { + for (Ignite node : G.allGrids()) { + Collection<IgniteInternalTx> txs = ((IgniteEx)node).context().cache().context().tm().activeTransactions(); + + assertTrue("Unfinished txs [node=" + node.name() + ", txs=" + txs + ']', txs.isEmpty()); + } + } + finally { + stopAllGrids(); + + super.afterTest(); + } + } + + /** {@inheritDoc} */ + @Override protected boolean persistenceEnable() { + return withPersistence; + } + + /** {@inheritDoc} */ + @Override protected IgniteConfiguration getConfiguration(String igniteInstanceName) throws Exception { + return super.getConfiguration(igniteInstanceName) + .setCommunicationSpi(new TestRecordingCommunicationSpi()); + } + + /** Test scenario: + * <ul> + * <li>Start 3 nodes [node0, node1, node2].</li> + * <li>Initialize put operation into transactional cache where [node1] holds primary partition for such insertion.</li> + * <li>Kill [node1] right after tx PREPARE stage is completed (it triggers tx recovery procedure).</li> + * </ul> + * + * @see IgniteTxManager#salvageTx(IgniteInternalTx) + */ + @Test + public void testTxCoordinatorLeftClusterWithEnabledReadWriteThrough() throws Exception { + // sequential start is important here + IgniteEx nodeCoord = startGrid(0); + // near node + IgniteEx nodePrimary = startGrid(1); + // backup node + IgniteEx nodeBackup = startGrid(2); + + int firstVal = 0; + int secondVal = 1; + + nodeCoord.cluster().state(ClusterState.ACTIVE); + + CacheConfiguration<Object, Object> ccfgWithWriteThrough = createCache(DEFAULT_CACHE_NAME, true); + IgniteCache<Object, Object> cache = nodeCoord.createCache(ccfgWithWriteThrough); + + Integer primaryKey = primaryKey(nodePrimary.cache(DEFAULT_CACHE_NAME)); + + try (Transaction tx = nodeCoord.transactions().txStart()) { + cache.put(primaryKey, firstVal); + + tx.commit(); + } + + sqlVisibilityCheck(List.of(nodeCoord, nodeBackup), primaryKey, firstVal); + + nodeCoord.cluster().state(ClusterState.INACTIVE); + + GridMessageListener lsnr = new GridMessageListener() { + @Override public void onMessage(UUID nodeId, Object msg, byte plc) { + if (msg instanceof GridNearTxPrepareResponse) { + IgniteTxManager txMgr = nodeBackup.context().cache().context().tm(); + Collection<IgniteInternalTx> txs = txMgr.activeTransactions(); + + assertEquals(1, txs.size()); + IgniteInternalTx idleTx = txs.iterator().next(); + assertFalse(idleTx.local()); + + Map<GridCacheVersion, IgniteInternalTx> activeTx = GridTestUtils.getFieldValue(txMgr, "idMap"); + assertEquals(1, activeTx.size()); + + nodeKillLatch.countDown(); + + U.awaitQuiet(nodeLeftRegisteredOnBackup); + + // let`s wait until all discovery events have been processed on backup node. + doSleep(1000); + + MapCacheStore.txCoordStoreLatch.countDown(); + } + } + }; + + nodeCoord.context().io().removeMessageListener(GridTopic.TOPIC_CACHE); // Remove old cache listener. + nodeCoord.context().io().addMessageListener(GridTopic.TOPIC_CACHE, lsnr); // Register as first listener. + nodeCoord.context().cache().context().io().start0(); // Register cache listener again. + + nodeCoord.cluster().state(ClusterState.ACTIVE); + + nodeCoord.context().event().addDiscoveryEventListener(new BeforeRecoveryListener(), EVT_NODE_FAILED, EVT_NODE_LEFT); + nodeBackup.context().event().addDiscoveryEventListener(new BeforeBackupRecoveryListener(), EVT_NODE_FAILED, EVT_NODE_LEFT); + + IgniteInternalFuture<Object> stopFut = GridTestUtils.runAsync(() -> { + nodeKillLatch.await(); + nodePrimary.close(); + }); + + injectTestSystemOut(); + + try (Transaction tx = nodeCoord.transactions().txStart(conc, READ_COMMITTED)) { + cache.put(primaryKey, secondVal); + + tx.commit(); + } + catch (Throwable th) { + fail("Unexpected exception: " + th); + } + + stopFut.get(getTestTimeout()); + + awaitPartitionMapExchange(); + + assertEquals(EXIT_CODE_OK, execute("--port", connectorPort(grid(2)), "--cache", "idle_verify")); + + String out = testOut.toString(); + + // partVerHash can be different + if (withPersistence) { + Assert.assertThat(out, anyOf(is(containsString("updateCntr=[lwm=2, missed=[], hwm=2], " + + "partitionState=OWNING, size=1")), is(containsString(CORRECT_VERIFY_MSG)))); + Assert.assertThat(out, anyOf(is(containsString("updateCntr=[lwm=2, missed=[], hwm=2], " + + "partitionState=OWNING, size=1")), is(containsString(CORRECT_VERIFY_MSG)))); + } + else { + Assert.assertThat(out, anyOf(is(containsString("consistentId=gridCommandHandlerTest0, " + + "updateCntr=1, partitionState=OWNING, size=1")), is(containsString(CORRECT_VERIFY_MSG)))); + Assert.assertThat(out, anyOf(is(containsString("consistentId=gridCommandHandlerTest2, " + + "updateCntr=1, partitionState=OWNING, size=1")), is(containsString(CORRECT_VERIFY_MSG)))); + } + testOut.reset(); + + for (int nodeIdx : List.of(0, 2)) { + IgniteEx g = grid(nodeIdx); + IgniteCache<Object, Object> cacheInner = g.cache(DEFAULT_CACHE_NAME); + waitForCondition(() -> secondVal == (int)cacheInner.get(primaryKey), 1_000); Review Comment: This wait result is ignored, so the test continues even when the cache value never becomes `secondVal`. Wrap the `waitForCondition` call in an assertion (or assert the value after waiting) so this regression check fails when the update is not visible. ########## modules/core/src/main/java/org/apache/ignite/internal/processors/cache/distributed/dht/GridDhtTxFinishFuture.java: ########## @@ -473,7 +473,7 @@ private boolean finish(boolean commit, if (isNull(cctx.discovery().getAlive(n.id()))) { log.error("Unable to send message (node left topology): " + n); - fut.onNodeLeft(); + fut.onNodeLeft(n.id()); Review Comment: The salvage path is only used when the node is already absent before sending; if `sendTransactionMessage` throws `ClusterTopologyCheckedException` after this alive check, the catch block below still calls the no-argument `onNodeLeft()` and skips the salvage message. Use the node-id overload in that topology-exception path as well so the recovery behavior is consistent for the race where the node leaves between the check and the send. -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: [email protected] For queries about this service, please contact Infrastructure at: [email protected]
