A simple diagnostic utility I use to detect these problems:
import java.lang.ref.WeakReference;
import java.util.ArrayList;
import java.util.LinkedList;
import java.util.List;
import org.apache.ignite.Ignite;
import org.apache.ignite.internal.GridComponent;
import org.apache.ignite.internal.IgniteKernal;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
public class IgniteWeakRefTracker {
private static final Logger LOGGER =
LogManager.getLogger(IgniteWeakRefTracker.class);
private final String clazz;
private final String testName;
private final String name;
private final WeakReference<Ignite> innerRef;
private final List<WeakReference<GridComponent>> componentRefs = new
ArrayList<>(128);
private static final LinkedList<IgniteWeakRefTracker> refs = new
LinkedList<>();
private IgniteWeakRefTracker(String testName, Ignite ignite) {
this.clazz = ignite.getClass().getCanonicalName();
this.innerRef = new WeakReference<>(ignite);
this.name = ignite.name();
this.testName = testName;
if (ignite instanceof IgniteKernal) {
IgniteKernal ik = (IgniteKernal) ignite;
List<GridComponent> components = ik.context().components();
for (GridComponent c : components) {
componentRefs.add(new WeakReference<>(c));
}
}
}
public static void register(String testName, Ignite ignite) {
refs.add(new IgniteWeakRefTracker(testName, ignite));
}
public static void trimCollectedRefs() {
List<IgniteWeakRefTracker> toRemove = new ArrayList<>();
for (IgniteWeakRefTracker ref : refs) {
if (ref.isIgniteCollected()) {
LOGGER.info("Collected ignite: ignite {} from test {}",
ref.getIgniteName(), ref.getTestName());
toRemove.add(ref);
if (ref.igniteComponentsNonCollectedCount() != 0) {
throw new IllegalStateException("Non collected
components for collected ignite.");
}
} else {
LOGGER.warn("Leaked ignite: ignite {} from test {}",
ref.getIgniteName(), ref.getTestName());
}
}
refs.removeAll(toRemove);
LOGGER.info("Leaked ignites count: {}", refs.size());
}
public static int getLeakedSize() {
return refs.size();
}
public boolean isIgniteCollected() {
return innerRef.get() == null;
}
public int igniteComponentsNonCollectedCount() {
int res = 0;
for (WeakReference<GridComponent> cr : componentRefs) {
GridComponent gridComponent = cr.get();
if (gridComponent != null) {
LOGGER.warn("Uncollected component: {}",
gridComponent.getClass().getSimpleName());
res++;
}
}
return res;
}
public String getClazz() {
return clazz;
}
public String getTestName() {
return testName;
}
public String getIgniteName() {
return name;
}
}
On Fri, Mar 20, 2020 at 11:51 PM Andrey Davydov <[email protected]>
wrote:
> I found one more way for leak and understand reason:
>
>
> this - value: org.apache.ignite.internal.IgniteKernal #1
> <- grid - class: org.apache.ignite.internal.GridKernalContextImpl,
> value: org.apache.ignite.internal.IgniteKernal #1
> <- ctx - class:
> org.apache.ignite.internal.processors.timeout.GridTimeoutProcessor, value:
> org.apache.ignite.internal.GridKernalContextImpl #3
> <- this$0 - class:
> org.apache.ignite.internal.processors.timeout.GridTimeoutProcessor$CancelableTask,
> value: org.apache.ignite.internal.processors.timeout.GridTimeoutProcessor #1
> <- stmtCleanupTask - class:
> org.apache.ignite.internal.processors.query.h2.ConnectionManager, value:
> org.apache.ignite.internal.processors.timeout.GridTimeoutProcessor$CancelableTask
> #11
> <- arg$1 - class:
> org.apache.ignite.internal.processors.query.h2.ConnectionManager$$Lambda$174,
> value: org.apache.ignite.internal.processors.query.h2.ConnectionManager #1
> <- recycler - class:
> org.apache.ignite.internal.processors.query.h2.ThreadLocalObjectPool,
> value:
> org.apache.ignite.internal.processors.query.h2.ConnectionManager$$Lambda$174
> #1
> <- this$0 - class:
> org.apache.ignite.internal.processors.query.h2.ThreadLocalObjectPool$Reusable,
> value: org.apache.ignite.internal.processors.query.h2.ThreadLocalObjectPool
> #1
> <- value - class: java.lang.ThreadLocal$ThreadLocalMap$Entry,
> value:
> org.apache.ignite.internal.processors.query.h2.ThreadLocalObjectPool$Reusable
> #1
> <- [411] - class:
> java.lang.ThreadLocal$ThreadLocalMap$Entry[], value:
> java.lang.ThreadLocal$ThreadLocalMap$Entry #35
> <- table - class: java.lang.ThreadLocal$ThreadLocalMap,
> value: java.lang.ThreadLocal$ThreadLocalMap$Entry[] #25
> <- threadLocals (thread object) - class: java.lang.Thread,
> value: java.lang.ThreadLocal$ThreadLocalMap #2
>
>
>
> Reason:
>
>
> org.apache.ignite.internal.processors.query.h2.ConnectionManager has some
> ThreadLocal fields, including connPool, threadConns, threadConn,
> detachedConns etc.
>
>
> ConnectionManager store Lambdas it this thread local storages, so link to
> ConnectionManager leaks to thread local context.
>
>
> And seems that method not valid enoght
>
> private void closeConnections() {
> threadConns.values().forEach(set ->
> set.keySet().forEach(U::closeQuiet));
> detachedConns.keySet().forEach(U::closeQuiet);
>
> threadConns.clear();
> detachedConns.clear();
> }
>
>
> So when Ignition.start() and Ignition.stop() was from different thread,
> caches not cleared properly and starter thread save link to
> ConnectionManager via ThreadLocal context. And we get one Ignite instance
> leak every time.
>
>
> Im sure you run "tens of thousands nodes during every suite run." But
> majority of runs may be without Indexing, and start and stop node in same
> thread.
>
>
> To reproduce leak, start ignite with indexing, save lint to weak
> reference, and stop it asynchroniouly in other thread, null local link,
> check weak ref and see heap dump.
>
>
>
> Andrey.
>
>
>
> *От: *Andrey Davydov <[email protected]>
> *Отправлено: *18 марта 2020 г. в 18:37
> *Кому: *[email protected]
> *Тема: *Ignite memory leaks in 2.8.0
>
>
>
> Hello,
>
>
>
> There are at least two way link to IgniteKernal leaks to GC root and makes
> it unavailable for GC.
>
>
>
> 1. The first one:
>
>
>
> this - value: org.apache.ignite.internal.IgniteKernal #1
>
> <- grid - class: org.apache.ignite.internal.GridKernalContextImpl,
> value: org.apache.ignite.internal.IgniteKernal #1
>
> <- ctx - class:
> org.apache.ignite.internal.processors.query.h2.IgniteH2Indexing, value:
> org.apache.ignite.internal.GridKernalContextImpl #2
>
> <- this$0 - class:
> org.apache.ignite.internal.processors.query.h2.IgniteH2Indexing$10, value:
> org.apache.ignite.internal.processors.query.h2.IgniteH2Indexing #2
>
> <- serializer - class: org.h2.util.JdbcUtils, value:
> org.apache.ignite.internal.processors.query.h2.IgniteH2Indexing$10 #1
>
> <- [5395] - class: java.lang.Object[], value:
> org.h2.util.JdbcUtils class JdbcUtils
>
> <- elementData - class: java.util.Vector, value:
> java.lang.Object[] #37309
>
> <- classes - class: sun.misc.Launcher$AppClassLoader, value:
> java.util.Vector #31
>
> <- contextClassLoader (thread object) - class:
> java.lang.Thread, value: sun.misc.Launcher$AppClassLoader #1
>
>
>
> org.h2.util.JdbcUtils has static field JavaObjectSerializer serializer, which
> see IgniteKernal via IgniteH2Indexing. It make closed and stopped
> IgniteKernal non collectable by GC.
>
> If some Ignites run in same JVM, JdbcUtils will always use only one, and
> it can cause some races.
>
>
>
> 1. The second way:
>
>
>
> this - value: org.apache.ignite.internal.IgniteKernal #2
>
> <- grid - class: org.apache.ignite.internal.GridKernalContextImpl,
> value: org.apache.ignite.internal.IgniteKernal #2
>
> <- ctx - class:
> org.apache.ignite.internal.processors.cache.GridCacheContext, value:
> org.apache.ignite.internal.GridKernalContextImpl #1
>
> <- cctx - class:
> org.apache.ignite.internal.processors.cache.distributed.dht.GridDhtCacheEntry,
> value: org.apache.ignite.internal.processors.cache.GridCacheContext #24
>
> <- parent - class:
> org.apache.ignite.internal.processors.cache.GridCacheMvccCandidate, value:
> org.apache.ignite.internal.processors.cache.distributed.dht.GridDhtCacheEntry
> #4
>
> <- [0] - class: java.lang.Object[], value:
> org.apache.ignite.internal.processors.cache.GridCacheMvccCandidate #1
>
> <- elements - class: java.util.ArrayDeque, value:
> java.lang.Object[] #43259
>
> <- value - class: java.lang.ThreadLocal$ThreadLocalMap$Entry,
> value: java.util.ArrayDeque #816
>
> <- [119] - class:
> java.lang.ThreadLocal$ThreadLocalMap$Entry[], value:
> java.lang.ThreadLocal$ThreadLocalMap$Entry #51
>
> <- table - class: java.lang.ThreadLocal$ThreadLocalMap,
> value: java.lang.ThreadLocal$ThreadLocalMap$Entry[] #21
>
> <- threadLocals (thread object) - class: java.lang.Thread,
> value: java.lang.ThreadLocal$ThreadLocalMap #2
>
>
>
> Link to IgniteKernal leaks to ThreadLocal variable, so when we start/stop
> many instances of Ignite in same jvm during testing, we got many stopped
> “zomby” ignites on ThreadLocal context of main test thread and it cause
> OutOfMemory after some dozens of tests.
>
>
>
> Andrey.
>
>
>
>
>