[
https://issues.apache.org/jira/browse/TINKERPOP-2958?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=17731756#comment-17731756
]
ASF GitHub Bot commented on TINKERPOP-2958:
-------------------------------------------
lyndonbauto commented on PR #2090:
URL: https://github.com/apache/tinkerpop/pull/2090#issuecomment-1587954864
@kenhuuu
> Hi Lyndon, is it possible for you to share what kind of testing you were
running that showed this issue?
Hey Ken, yeah sure, this isn't pretty code but here it is:
```
package com.aerospike.firefly.server;
import com.aerospike.firefly.Server;
import org.apache.tinkerpop.gremlin.driver.Cluster;
import org.apache.tinkerpop.gremlin.driver.remote.DriverRemoteConnection;
import
org.apache.tinkerpop.gremlin.process.traversal.dsl.graph.GraphTraversalSource;
import org.apache.tinkerpop.gremlin.structure.Property;
import org.junit.Test;
import java.util.ArrayList;
import java.util.List;
import java.util.Random;
import java.util.Timer;
import java.util.TimerTask;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.concurrent.Future;
import java.util.stream.Collectors;
import static org.apache.tinkerpop.gremlin.driver.Tokens.ARGS_EVAL_TIMEOUT;
import static
org.apache.tinkerpop.gremlin.process.traversal.AnonymousTraversalSource.traversal;
public class TestServer {
private static final long SECOND = 1000;
private static final long MINUTE = 60 * SECOND;
private static final long TEST_DURATION = 4 * MINUTE;
private long totalExecuted = 0;
Server server;
@Test
public void testServer() throws Exception {
server = Server.main(new String[]
{"../conf/firefly-gremlin-server-local.yaml"});
HEAP_REPORTING_TIMER.schedule(new HeapReport(), 0, 5000);
exposeLeak();
}
private static final Timer HEAP_REPORTING_TIMER = new Timer(true);
private class HeapReport extends TimerTask {
@Override
public void run() {
System.gc();
System.gc();
System.gc();
System.out.printf("Heap size: %d MB%n",
Runtime.getRuntime().totalMemory() / 1024 / 1024);
System.out.printf("Executed %d queries%n", totalExecuted);
}
}
public void exposeLeak() throws Exception {
final long startTime = System.currentTimeMillis();
Cluster cluster =
Cluster.build().addContactPoint("localhost").port(8182).create();
DriverRemoteConnection drc = DriverRemoteConnection.using(cluster,
"g");
// final DriverRemoteConnection drc =
DriverRemoteConnection.using("localhost", 8182, "g");
final GraphTraversalSource g = traversal().withRemote(drc);
final ExecutorService executorService =
Executors.newFixedThreadPool(16);
g.with(ARGS_EVAL_TIMEOUT, 1L).V().drop().iterate();
for (int i = 0; i < 25000; i++) {
g.addV("randomVertex").
property("last_seen", String.format("%d",
System.currentTimeMillis())).
property("something", String.format("%d", i)).
property("otherThing", String.format("%d", i)).
property("anotherThing", String.format("%d", i)).
property("yetAnotherThing", String.format("%d", i)).
property("andAnotherThing", String.format("%d", i)).
property("andYetAnotherThing", String.format("%d", i)).
iterate();
}
final List<Object> ids = g.V().id().toList();
final List<Future<List<? extends Property>>> futures = new
ArrayList<>();
while (System.currentTimeMillis() - startTime <= TEST_DURATION) {
while (futures.size() > 500) {
Thread.sleep(100);
final List<Future<List<? extends Property>>>
completedFutures =
futures.stream().filter(Future::isDone).collect(Collectors.toList());
completedFutures.forEach(f -> {
try {
f.get();
} catch (Exception e) {
e.printStackTrace();
}
});
totalExecuted += completedFutures.size();
futures.removeAll(completedFutures);
}
final Random rand = new Random();
final List<Object> idz = new ArrayList<>();
for (int i = 0; i < 9; i++) {
idz.add(ids.get(rand.nextInt(ids.size())));
}
futures.add(executorService.submit(() ->
g.V(idz).properties("last_seen").toList()));
}
futures.forEach(f-> {
try {
f.get();
} catch (Exception e) {
e.printStackTrace();
}
});
drc.close();
Thread.sleep(30000);
}
}
```
The server code is:
```
package com.aerospike.firefly;
import org.apache.tinkerpop.gremlin.server.GremlinServer;
import org.apache.tinkerpop.gremlin.server.Settings;
import org.apache.tinkerpop.gremlin.server.util.ServerGremlinExecutor;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.util.concurrent.CompletableFuture;
public class Server {
private static final Logger logger =
LoggerFactory.getLogger(Server.class);
private GremlinServer gremlinServer = null;
private final String confPath;
private CompletableFuture<Void> serverStarted = null;
private CompletableFuture<Void> serverStopped = null;
public Server(final String file) {
confPath = file;
}
public static Server main(final String[] args) {
if (args.length != 1) {
System.err.println("Usage: Server <conf file>");
System.exit(1);
}
String file = args[0];
Server fireflyServer = new Server(file);
fireflyServer.start().exceptionally(t -> {
logger.error("Firefly Server was unable to start and will now
begin shutdown", t);
fireflyServer.stop().join();
return null;
}).join();
return fireflyServer;
}
public synchronized CompletableFuture<Void> start() {
if (serverStarted != null) {
return serverStarted;
}
serverStarted = new CompletableFuture<>();
try {
Settings settings = Settings.read(confPath);
gremlinServer = new GremlinServer(settings);
serverStarted = CompletableFuture.allOf(gremlinServer.start());
} catch (Exception ex) {
serverStarted.completeExceptionally(ex);
}
return serverStarted;
}
public synchronized CompletableFuture<Void> stop() {
if (gremlinServer == null) {
return CompletableFuture.completedFuture(null);
}
if (serverStopped != null) {
return serverStopped;
}
serverStopped = gremlinServer.stop();
return serverStopped;
}
}
```
which i adapted from the JanusGraphServer implementation specifically to
test this since I didn't have any in Java server bootstrapping code.
Basically you just run and watch the memory, for Aerospike Graph after a few
mins we see about 1 GB total if the bug is not present, if the bug is present I
see ~1 GB per minute growth and will see ~3-4 GB at the end of the test.
This is kind of obvious, but make sure the evaluationTimeout in the server
yaml is big (mine was 20 mins).
> ScheduledExecutorService for timeouts are never cancelled
> ---------------------------------------------------------
>
> Key: TINKERPOP-2958
> URL: https://issues.apache.org/jira/browse/TINKERPOP-2958
> Project: TinkerPop
> Issue Type: Bug
> Components: server
> Affects Versions: 3.6.3, 3.6.4
> Reporter: Lyndon Bauto
> Assignee: Lyndon Bauto
> Priority: Major
>
> A scheduled task was added to timeout a request in the future.
>
> This task is not cancelled when the request is completed before the timeout.
> With a long timeout, this can lead to extreme memory usage (~1 GB growth per
> minute on my laptop with my graph implementation).
--
This message was sent by Atlassian Jira
(v8.20.10#820010)