This is an automated email from the ASF dual-hosted git repository. smiklosovic pushed a commit to branch trunk in repository https://gitbox.apache.org/repos/asf/cassandra.git
commit b7e2b3b9dc6246f75962eaf5facbb74cc1216be5 Merge: acec78abf4 74e0794d92 Author: Stefan Miklosovic <smikloso...@apache.org> AuthorDate: Wed Aug 20 10:54:42 2025 +0200 Merge branch 'cassandra-5.0' into trunk CHANGES.txt | 1 + .../cassandra/db/compaction/CompactionTask.java | 74 ++++++++++++++++++++++ src/java/org/apache/cassandra/index/Index.java | 22 +++++++ .../cassandra/index/sai/StorageAttachedIndex.java | 6 ++ .../org/apache/cassandra/index/sasi/SASIIndex.java | 6 ++ .../apache/cassandra/index/CustomIndexTest.java | 60 ++++++++++++++++++ 6 files changed, 169 insertions(+) diff --cc CHANGES.txt index aeef6ebc6c,86b9274c38..a524728bc4 --- a/CHANGES.txt +++ b/CHANGES.txt @@@ -294,12 -74,7 +294,13 @@@ Merged from 4.1 * Optionally skip exception logging on invalid legacy protocol magic exception (CASSANDRA-19483) * Fix SimpleClient ability to release acquired capacity (CASSANDRA-20202) * Fix WaitQueue.Signal.awaitUninterruptibly may block forever if invoking thread is interrupted (CASSANDRA-20084) + * Run audit_logging_options through santiation and validation on startup (CASSANDRA-20208) + * Enforce CQL message size limit on multiframe messages (CASSANDRA-20052) + * Fix race condition in DecayingEstimatedHistogramReservoir during rescale (CASSANDRA-19365) Merged from 4.0: ++ * Make secondary index implementations notified about rows in fully expired SSTables in compaction (CASSANDRA-20829) + * Ensure prepared_statement INSERT timestamp precedes eviction DELETE (CASSANDRA-19703) + * Gossip doesn't converge due to race condition when updating EndpointStates multiple fields (CASSANDRA-20659) * Handle sstable metadata stats file getting a new mtime after compaction has finished (CASSANDRA-18119) * Honor MAX_PARALLEL_TRANSFERS correctly (CASSANDRA-20532) * Updating a column with a new TTL but same expiration time is non-deterministic and causes repair mismatches. (CASSANDRA-20561) diff --cc src/java/org/apache/cassandra/db/compaction/CompactionTask.java index eb331dda8f,cc8cdae157..1f9e8ea403 --- a/src/java/org/apache/cassandra/db/compaction/CompactionTask.java +++ b/src/java/org/apache/cassandra/db/compaction/CompactionTask.java @@@ -17,6 -17,8 +17,7 @@@ */ package org.apache.cassandra.db.compaction; -import java.time.Instant; + import java.util.ArrayList; import java.util.Collection; import java.util.HashMap; import java.util.HashSet; @@@ -26,14 -28,10 +27,16 @@@ import java.util.Set import java.util.concurrent.TimeUnit; import com.google.common.base.Predicate; +import com.google.common.collect.ImmutableList; import com.google.common.collect.ImmutableMap; import com.google.common.collect.Iterables; -import com.google.common.collect.Sets; import com.google.common.util.concurrent.RateLimiter; + +import org.apache.cassandra.db.compaction.unified.UnifiedCompactionTask; ++import org.apache.cassandra.db.rows.Unfiltered; ++import org.apache.cassandra.db.rows.UnfilteredRowIterator; +import org.apache.cassandra.dht.Range; +import org.apache.cassandra.dht.Token; import org.apache.commons.lang3.StringUtils; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@@ -42,9 -40,16 +45,14 @@@ import org.apache.cassandra.config.Data import org.apache.cassandra.db.ColumnFamilyStore; import org.apache.cassandra.db.Directories; import org.apache.cassandra.db.SystemKeyspace; + import org.apache.cassandra.db.WriteContext; import org.apache.cassandra.db.compaction.writers.CompactionAwareWriter; import org.apache.cassandra.db.compaction.writers.DefaultCompactionWriter; -import org.apache.cassandra.db.lifecycle.LifecycleTransaction; +import org.apache.cassandra.db.lifecycle.ILifecycleTransaction; + import org.apache.cassandra.db.rows.Row; -import org.apache.cassandra.db.rows.Unfiltered; -import org.apache.cassandra.db.rows.UnfilteredRowIterator; + import org.apache.cassandra.index.Index; + import org.apache.cassandra.index.transactions.IndexTransaction; + import org.apache.cassandra.io.sstable.ISSTableScanner; import org.apache.cassandra.io.sstable.format.SSTableReader; import org.apache.cassandra.io.sstable.metadata.MetadataCollector; import org.apache.cassandra.io.util.File; @@@ -160,34 -134,21 +168,37 @@@ public class CompactionTask extends Abs if (DatabaseDescriptor.isSnapshotBeforeCompaction()) { - Instant creationTime = now(); - cfs.snapshotWithoutMemtable(creationTime.toEpochMilli() + "-compact-" + cfs.name, creationTime); + SnapshotOptions options = SnapshotOptions.systemSnapshot(cfs.name, SnapshotType.COMPACT, cfs.getKeyspaceTableName()).skipFlush().build(); + SnapshotManager.instance.takeSnapshot(options); } - try (CompactionController controller = getCompactionController(transaction.originals())) + try (CompactionController controller = getCompactionController(inputSSTables())) { + // Note: the controller set-up above relies on using the transaction-provided sstable list, from which + // fully-expired sstables should not be removed (so that the overlap tracker does not include them), but + // sstables excluded for scope reduction should be removed. + Set<SSTableReader> actuallyCompact = new HashSet<>(inputSSTables()); final Set<SSTableReader> fullyExpiredSSTables = controller.getFullyExpiredSSTables(); + ++ maybeNotifyIndexersAboutRowsInFullyExpiredSSTables(fullyExpiredSSTables); ++ + if (!fullyExpiredSSTables.isEmpty()) + { + logger.debug("Compaction {} dropping expired sstables: {}", transaction.opIdString(), fullyExpiredSSTables); + actuallyCompact.removeAll(fullyExpiredSSTables); + } + TimeUUID taskId = transaction.opId(); // select SSTables to compact based on available disk space. - if (!buildCompactionCandidatesForAvailableDiskSpace(fullyExpiredSSTables, taskId)) + final boolean hasExpirations = !fullyExpiredSSTables.isEmpty(); + if ((shouldReduceScopeForSpace() && !buildCompactionCandidatesForAvailableDiskSpace(actuallyCompact, hasExpirations, taskId)) + || hasExpirations) { // The set of sstables has changed (one or more were excluded due to limited available disk space). - // We need to recompute the overlaps between sstables. + // We need to recompute the overlaps between sstables. The iterators used in the compaction controller + // and tracker will reflect the changed set of sstables made by LifecycleTransaction.cancel(), + // so refreshing the overlaps will be based on the updated set of sstables. controller.refreshOverlaps(); } diff --cc test/unit/org/apache/cassandra/index/CustomIndexTest.java index 3c0d75b5fc,5504a8760b..3cd756b9e3 --- a/test/unit/org/apache/cassandra/index/CustomIndexTest.java +++ b/test/unit/org/apache/cassandra/index/CustomIndexTest.java @@@ -43,7 -33,8 +43,9 @@@ import com.google.common.collect.Immuta import com.google.common.collect.Maps; import com.google.common.collect.Sets; import org.junit.Assume; +import org.junit.BeforeClass; + import com.google.common.util.concurrent.Uninterruptibles; + import org.junit.Assert; import org.junit.Test; import com.datastax.driver.core.exceptions.QueryValidationException; --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@cassandra.apache.org For additional commands, e-mail: commits-h...@cassandra.apache.org