This is an automated email from the ASF dual-hosted git repository.
smengcl pushed a commit to branch branch-3.5
in repository https://gitbox.apache.org/repos/asf/hadoop.git
The following commit(s) were added to refs/heads/branch-3.5 by this push:
new 24b9a855f8a HADOOP-19900. EC write can fail with
ArrayIndexOutOfBoundsException due to CoderUtil emptyChunk resize race (#8518)
24b9a855f8a is described below
commit 24b9a855f8afd82e40265275fdba1acf32df96de
Author: Siyao Meng <[email protected]>
AuthorDate: Wed May 27 17:33:05 2026 -0700
HADOOP-19900. EC write can fail with ArrayIndexOutOfBoundsException due to
CoderUtil emptyChunk resize race (#8518)
Generated-by: Claude Code (Opus 4.7)
(cherry picked from commit e33eb713c842cd399140b6e9e9c935228f8ad194)
---
.../hadoop/io/erasurecode/rawcoder/CoderUtil.java | 18 ++++--
.../io/erasurecode/rawcoder/TestCoderUtil.java | 70 +++++++++++++++++++++-
2 files changed, 83 insertions(+), 5 deletions(-)
diff --git
a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/erasurecode/rawcoder/CoderUtil.java
b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/erasurecode/rawcoder/CoderUtil.java
index 6072f13c15b..3fb43f0dad8 100644
---
a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/erasurecode/rawcoder/CoderUtil.java
+++
b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/erasurecode/rawcoder/CoderUtil.java
@@ -42,15 +42,25 @@ private CoderUtil() {
* @return empty chunk of zero bytes
*/
static byte[] getEmptyChunk(int leastLength) {
- if (emptyChunk.length >= leastLength) {
- return emptyChunk; // In most time
+ byte[] chunk = emptyChunk;
+ if (chunk.length >= leastLength) {
+ return chunk; // In most time
}
synchronized (CoderUtil.class) {
- emptyChunk = new byte[leastLength];
+ /*
+ * Recheck under the lock: another caller may already have grown the
+ * cache while this caller waited. A larger cached chunk is valid for a
+ * smaller request, so only allocate when the cache is still too small.
+ */
+ chunk = emptyChunk;
+ if (chunk.length < leastLength) {
+ chunk = new byte[leastLength];
+ emptyChunk = chunk;
+ }
}
- return emptyChunk;
+ return chunk;
}
/**
diff --git
a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/io/erasurecode/rawcoder/TestCoderUtil.java
b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/io/erasurecode/rawcoder/TestCoderUtil.java
index 04268a524e9..d47bb802aef 100644
---
a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/io/erasurecode/rawcoder/TestCoderUtil.java
+++
b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/io/erasurecode/rawcoder/TestCoderUtil.java
@@ -19,20 +19,42 @@
package org.apache.hadoop.io.erasurecode.rawcoder;
import org.apache.hadoop.HadoopIllegalArgumentException;
+import org.junit.jupiter.api.BeforeEach;
import org.junit.jupiter.api.Test;
+import java.lang.reflect.Field;
import java.nio.ByteBuffer;
+import java.util.concurrent.ExecutorService;
+import java.util.concurrent.Executors;
+import java.util.concurrent.Future;
+import java.util.concurrent.TimeUnit;
+import java.util.concurrent.atomic.AtomicReference;
import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.jupiter.api.Assertions.assertThrows;
+import static org.junit.jupiter.api.Assertions.assertTrue;
+import static org.junit.jupiter.api.Assertions.fail;
/**
* Test of the utility of raw erasure coder.
*/
public class TestCoderUtil {
+ private static final int INITIAL_EMPTY_CHUNK_LENGTH = 4096;
+ private static final int SMALL_CHUNK_SIZE = INITIAL_EMPTY_CHUNK_LENGTH + 1;
+ private static final int LARGE_CHUNK_SIZE = SMALL_CHUNK_SIZE * 2;
+
private final int numInputs = 9;
private final int chunkSize = 1024;
+ @BeforeEach
+ public void resetEmptyChunk() throws Exception {
+ Field emptyChunk = CoderUtil.class.getDeclaredField("emptyChunk");
+ emptyChunk.setAccessible(true);
+ synchronized (CoderUtil.class) {
+ emptyChunk.set(null, new byte[INITIAL_EMPTY_CHUNK_LENGTH]);
+ }
+ }
+
@Test
public void testGetEmptyChunk() {
byte[] ret = CoderUtil.getEmptyChunk(chunkSize);
@@ -58,6 +80,36 @@ public void testResetBuffer() {
}
}
+ @Test
+ public void testGetEmptyChunkDoesNotShrinkWhenCacheGrowsConcurrently()
+ throws Exception {
+ AtomicReference<Thread> workerThread = new AtomicReference<>();
+ ExecutorService executor = Executors.newSingleThreadExecutor(r -> {
+ Thread thread = new Thread(r, "get-empty-chunk-small");
+ workerThread.set(thread);
+ return thread;
+ });
+
+ try {
+ Future<byte[]> smallChunk;
+ synchronized (CoderUtil.class) {
+ smallChunk = executor.submit(() -> CoderUtil.getEmptyChunk(
+ SMALL_CHUNK_SIZE));
+ waitUntilBlocked(workerThread);
+ assertTrue(CoderUtil.getEmptyChunk(LARGE_CHUNK_SIZE).length
+ >= LARGE_CHUNK_SIZE);
+ }
+
+ assertTrue(smallChunk.get(10, TimeUnit.SECONDS).length
+ >= LARGE_CHUNK_SIZE,
+ "concurrent caller should return the larger chunk already cached");
+ assertTrue(CoderUtil.getEmptyChunk(LARGE_CHUNK_SIZE).length
+ >= LARGE_CHUNK_SIZE, "empty chunk cache should not shrink");
+ } finally {
+ executor.shutdownNow();
+ }
+ }
+
@Test
public void testGetValidIndexes() {
byte[][] inputs = new byte[numInputs][];
@@ -124,4 +176,20 @@ public void testNoValidInput() {
CoderUtil.findFirstValidInput(inputs);
});
}
-}
\ No newline at end of file
+
+ private static void waitUntilBlocked(AtomicReference<Thread> threadRef)
+ throws InterruptedException {
+ long deadline = System.nanoTime() + TimeUnit.SECONDS.toNanos(10);
+ while (System.nanoTime() < deadline) {
+ Thread thread = threadRef.get();
+ if (thread != null && thread.getState() == Thread.State.BLOCKED) {
+ return;
+ }
+ Thread.sleep(10);
+ }
+
+ Thread thread = threadRef.get();
+ fail("small getEmptyChunk caller did not block on CoderUtil.class; state="
+ + (thread == null ? "not started" : thread.getState()));
+ }
+}
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]