[
https://issues.apache.org/jira/browse/HADOOP-19624?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=18050530#comment-18050530
]
ASF GitHub Bot commented on HADOOP-19624:
-----------------------------------------
anmolanmol1234 commented on code in PR #7852:
URL: https://github.com/apache/hadoop/pull/7852#discussion_r2671116520
##########
hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/services/TestAbfsClientThrottlingAnalyzer.java:
##########
@@ -180,4 +184,103 @@ public void testManySuccessAndErrorsAndWaiting() {
sleep(10 * ANALYSIS_PERIOD);
validate(0, analyzer.getSleepDuration());
}
+
+ /**
+ * Test that timer thread is properly cleaned up when analyzer is closed.
+ * This validates the fix for HADOOP-19624.
+ */
+ @Test
+ public void testAnalyzerTimerCleanup() throws Exception {
+ int initialTimerThreads = countAbfsTimerThreads();
+
+ // Create analyzer - should create one timer thread
+ AbfsClientThrottlingAnalyzer analyzer =
+ new AbfsClientThrottlingAnalyzer("test-cleanup", abfsConfiguration);
+
+ // Verify timer thread was created
+ assertEquals(initialTimerThreads + 1, countAbfsTimerThreads(),
+ "Timer thread should be created");
+
+ // Close analyzer - should clean up timer thread
+ analyzer.close();
+
+ // Wait for cleanup to complete
+ sleep(1000);
+
+ // Verify timer thread was cleaned up
+ assertEquals(initialTimerThreads, countAbfsTimerThreads(),
+ "Timer thread should be cleaned up after close");
+ }
+
+ /**
+ * Test that close() is idempotent and can be called multiple times.
+ */
+ @Test
+ public void testAnalyzerCloseIdempotent() throws Exception {
+ AbfsClientThrottlingAnalyzer analyzer =
+ new AbfsClientThrottlingAnalyzer("test-idempotent", abfsConfiguration);
+
+ int beforeClose = countAbfsTimerThreads();
+
+ // Close multiple times - should not throw exceptions
+ analyzer.close();
+ analyzer.close();
+ analyzer.close();
+
+ sleep(500);
+
+ // Should only clean up once
+ assertTrue(countAbfsTimerThreads() < beforeClose,
+ "Multiple close() calls should be safe");
+ }
+
+ /**
+ * Test cleanup with multiple analyzers to ensure no interference.
+ */
+ @Test
+ public void testMultipleAnalyzersCleanup() throws Exception {
+ int initialTimerThreads = countAbfsTimerThreads();
+
+ // Create multiple analyzers
+ AbfsClientThrottlingAnalyzer analyzer1 =
+ new AbfsClientThrottlingAnalyzer("test-multi-1", abfsConfiguration);
+ AbfsClientThrottlingAnalyzer analyzer2 =
+ new AbfsClientThrottlingAnalyzer("test-multi-2", abfsConfiguration);
+ AbfsClientThrottlingAnalyzer analyzer3 =
+ new AbfsClientThrottlingAnalyzer("test-multi-3", abfsConfiguration);
+
+ // Should have created 3 timer threads
+ assertEquals(initialTimerThreads + 3, countAbfsTimerThreads(),
+ "Should create 3 timer threads");
+
+ // Close all analyzers
+ analyzer1.close();
+ analyzer2.close();
+ analyzer3.close();
+
+ sleep(1000);
+
+ // All timer threads should be cleaned up
+ assertEquals(initialTimerThreads, countAbfsTimerThreads(),
+ "All timer threads should be cleaned up");
+ }
+
+ /**
+ * Helper method to count ABFS timer threads.
+ */
+ private int countAbfsTimerThreads() {
+ java.lang.management.ThreadMXBean threadBean =
+ java.lang.management.ManagementFactory.getThreadMXBean();
+ long[] threadIds = threadBean.getAllThreadIds();
+
+ int count = 0;
+ for (long id : threadIds) {
+ java.lang.management.ThreadInfo info = threadBean.getThreadInfo(id);
+ if (info != null &&
+
info.getThreadName().contains("abfs-timer-client-throttling-analyzer")) {
+ count++;
+ }
+ }
+ return count;
+ }
}
Review Comment:
Extra line needed at end of file
> [Bug Report] Thread leak in ABFS AbfsClientThrottlingAnalyzer
> -------------------------------------------------------------
>
> Key: HADOOP-19624
> URL: https://issues.apache.org/jira/browse/HADOOP-19624
> Project: Hadoop Common
> Issue Type: Sub-task
> Components: fs/azure
> Affects Versions: 3.5.0, 3.4.1
> Reporter: Anuj Modi
> Priority: Major
> Labels: pull-request-available
>
> Bug reported by Matt over common-dev discussion.
> > What seems to be the issue is that the timer tasks are cleaned up but
> > the timer threads themselves are never actually cleaned up. This will
> > eventually lead to an OOM since nothing is collecting these. I was
> > able to reproduce this locally in 3.3.6 and 3.4.1 but I believe that
> > it would affect any version that relies on autothrottling for ABFS.
> >
> > I was also able to make a quick fix as well as confirm a workaround --
> > the long term fix would be to include `timer.cancel()` and
> > `timer.purge()` in a method for AbfsClientThrottlingAnalyzer.java. The
> > short term workaround is to disable autothrottling and rely on Azure
> > to throttle the connections as needed with the below configuration.
--
This message was sent by Atlassian Jira
(v8.20.10#820010)
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]