This is an automated email from the ASF dual-hosted git repository. jmckenzie pushed a commit to branch trunk in repository https://gitbox.apache.org/repos/asf/cassandra.git
The following commit(s) were added to refs/heads/trunk by this push: new f4a2135 Remove strong ref loop in LogTransaction.SSTableTidier f4a2135 is described below commit f4a2135c5ba442aafd27bb7c12c85b376d5a2b87 Author: Josh McKenzie <jmcken...@apache.org> AuthorDate: Tue Dec 14 12:01:47 2021 -0500 Remove strong ref loop in LogTransaction.SSTableTidier Patch by Josh McKenzie; reviewed by Benedict Elliott Smith for CASSANDRA-17205 --- CHANGES.txt | 9 ++++--- .../cassandra/db/lifecycle/LogTransaction.java | 29 +++++++++++++++------- 2 files changed, 25 insertions(+), 13 deletions(-) diff --git a/CHANGES.txt b/CHANGES.txt index 5a99669..e829293 100644 --- a/CHANGES.txt +++ b/CHANGES.txt @@ -1,7 +1,8 @@ 4.1 + * Remove self-reference in SSTableTidier (CASSANDRA-17205) * Add guardrail for query page size (CASSANDRA-17189) * Allow column_index_size_in_kb to be configurable through nodetool (CASSANDRA-17121) - * Emit a metric for number of local read and write calls + * Emit a metric for number of local read and write calls * Add non-blocking mode for CDC writes (CASSANDRA-17001) * Add guardrails framework (CASSANDRA-17147) * Harden resource management on SSTable components to prevent future leaks (CASSANDRA-17174) @@ -228,7 +229,7 @@ Merged from 3.0: * Promote protocol V5 out of beta (CASSANDRA-14973) * Fix incorrect encoding for strings can be UTF8 (CASSANDRA-16429) * Fix node unable to join when RF > N in multi-DC with added warning (CASSANDRA-16296) - * Add an option to nodetool tablestats to check sstable location correctness (CASSANDRA-16344) + * Add an option to nodetool tablestats to check sstable location correctness (CASSANDRA-16344) * Unable to ALTER KEYSPACE while decommissioned/assassinated nodes are in gossip (CASSANDRA-16422) * Metrics backward compatibility restored after CASSANDRA-15066 (CASSANDRA-16083) * Reduce new reserved keywords introduced since 3.0 (CASSANDRA-16439) @@ -768,7 +769,7 @@ Merged from 3.0: * Use standard Amazon naming for datacenter and rack in Ec2Snitch (CASSANDRA-7839) * Abstract write path for pluggable storage (CASSANDRA-14118) * nodetool describecluster should be more informative (CASSANDRA-13853) - * Compaction performance improvements (CASSANDRA-14261) + * Compaction performance improvements (CASSANDRA-14261) * Refactor Pair usage to avoid boxing ints/longs (CASSANDRA-14260) * Add options to nodetool tablestats to sort and limit output (CASSANDRA-13889) * Rename internals to reflect CQL vocabulary (CASSANDRA-14354) @@ -1153,7 +1154,7 @@ Merged from 2.1: 3.11.2 * Fix ReadCommandTest (CASSANDRA-14234) * Remove trailing period from latency reports at keyspace level (CASSANDRA-14233) - * Remove dependencies on JVM internal classes from JMXServerUtils (CASSANDRA-14173) + * Remove dependencies on JVM internal classes from JMXServerUtils (CASSANDRA-14173) * Add DEFAULT, UNSET, MBEAN and MBEANS to `ReservedKeywords` (CASSANDRA-14205) * Print correct snitch info from nodetool describecluster (CASSANDRA-13528) * Enable CDC unittest (CASSANDRA-14141) diff --git a/src/java/org/apache/cassandra/db/lifecycle/LogTransaction.java b/src/java/org/apache/cassandra/db/lifecycle/LogTransaction.java index 09717fc..5a909e0 100644 --- a/src/java/org/apache/cassandra/db/lifecycle/LogTransaction.java +++ b/src/java/org/apache/cassandra/db/lifecycle/LogTransaction.java @@ -30,12 +30,12 @@ import java.util.function.Predicate; import com.google.common.annotations.VisibleForTesting; import com.google.common.util.concurrent.Runnables; -import org.apache.cassandra.io.util.File; +import com.codahale.metrics.Counter; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.apache.cassandra.concurrent.ScheduledExecutors; -import org.apache.cassandra.schema.TableMetadata; +import org.apache.cassandra.config.DatabaseDescriptor; import org.apache.cassandra.db.Directories; import org.apache.cassandra.db.SystemKeyspace; import org.apache.cassandra.db.compaction.OperationType; @@ -46,7 +46,9 @@ import org.apache.cassandra.io.sstable.Descriptor; import org.apache.cassandra.io.sstable.SSTable; import org.apache.cassandra.io.sstable.SnapshotDeletingTask; import org.apache.cassandra.io.sstable.format.SSTableReader; +import org.apache.cassandra.io.util.File; import org.apache.cassandra.io.util.FileUtils; +import org.apache.cassandra.schema.TableMetadata; import org.apache.cassandra.service.StorageService; import org.apache.cassandra.utils.*; import org.apache.cassandra.utils.concurrent.Ref; @@ -349,21 +351,25 @@ class LogTransaction extends Transactional.AbstractTransactional implements Tran // must not retain a reference to the SSTableReader, else leak detection cannot kick in private final Descriptor desc; private final long sizeOnDisk; - private final Tracker tracker; private final boolean wasNew; private final Object lock; private final Ref<LogTransaction> parentRef; - private final UUID txnId; + private final Counter totalDiskSpaceUsed; public SSTableTidier(SSTableReader referent, boolean wasNew, LogTransaction parent) { this.desc = referent.descriptor; this.sizeOnDisk = referent.bytesOnDisk(); - this.tracker = parent.tracker; this.wasNew = wasNew; this.lock = parent.lock; this.parentRef = parent.selfRef.tryRef(); - this.txnId = parent.id(); + + // While the parent cfs may be dropped in the interim of us taking a reference to this and using it, at worst + // we'll be updating a metric for a now dropped ColumnFamilyStore. We do not hold a reference to the tracker or + // cfs as that would create a strong ref loop and violate our ability to do leak detection. + totalDiskSpaceUsed = parent.tracker != null && parent.tracker.cfstore != null ? + parent.tracker.cfstore.metric.totalDiskSpaceUsed : + null; if (this.parentRef == null) throw new IllegalStateException("Transaction already completed"); @@ -371,7 +377,9 @@ class LogTransaction extends Transactional.AbstractTransactional implements Tran public void run() { - if (tracker != null && !tracker.isDummy()) + // While this may be a dummy tracker w/out information in the metrics table, we attempt to delete regardless + // and allow the delete to silently fail if this is an invalid ks + cf combination at time of tidy run. + if (DatabaseDescriptor.isDaemonInitialized()) SystemKeyspace.clearSSTableReadMeter(desc.ksname, desc.cfname, desc.generation); synchronized (lock) @@ -399,8 +407,11 @@ class LogTransaction extends Transactional.AbstractTransactional implements Tran return; } - if (tracker != null && tracker.cfstore != null && !wasNew) - tracker.cfstore.metric.totalDiskSpaceUsed.dec(sizeOnDisk); + // It's possible we're the last one's holding a ref to this metric if it's already been released in the + // parent TableMetrics; we run this regardless rather than holding a ref to that CFS or Tracker and thus + // creating a strong ref loop + if (DatabaseDescriptor.isDaemonInitialized() && totalDiskSpaceUsed != null && !wasNew) + totalDiskSpaceUsed.dec(sizeOnDisk); // release the referent to the parent so that the all transaction files can be released parentRef.release(); --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@cassandra.apache.org For additional commands, e-mail: commits-h...@cassandra.apache.org