This is an automated email from the ASF dual-hosted git repository.

smengcl pushed a commit to branch branch-3.5
in repository https://gitbox.apache.org/repos/asf/hadoop.git


The following commit(s) were added to refs/heads/branch-3.5 by this push:
     new 24b9a855f8a HADOOP-19900. EC write can fail with 
ArrayIndexOutOfBoundsException due to CoderUtil emptyChunk resize race (#8518)
24b9a855f8a is described below

commit 24b9a855f8afd82e40265275fdba1acf32df96de
Author: Siyao Meng <[email protected]>
AuthorDate: Wed May 27 17:33:05 2026 -0700

    HADOOP-19900. EC write can fail with ArrayIndexOutOfBoundsException due to 
CoderUtil emptyChunk resize race (#8518)
    
    Generated-by: Claude Code (Opus 4.7)
    (cherry picked from commit e33eb713c842cd399140b6e9e9c935228f8ad194)
---
 .../hadoop/io/erasurecode/rawcoder/CoderUtil.java  | 18 ++++--
 .../io/erasurecode/rawcoder/TestCoderUtil.java     | 70 +++++++++++++++++++++-
 2 files changed, 83 insertions(+), 5 deletions(-)

diff --git 
a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/erasurecode/rawcoder/CoderUtil.java
 
b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/erasurecode/rawcoder/CoderUtil.java
index 6072f13c15b..3fb43f0dad8 100644
--- 
a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/erasurecode/rawcoder/CoderUtil.java
+++ 
b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/erasurecode/rawcoder/CoderUtil.java
@@ -42,15 +42,25 @@ private CoderUtil() {
    * @return empty chunk of zero bytes
    */
   static byte[] getEmptyChunk(int leastLength) {
-    if (emptyChunk.length >= leastLength) {
-      return emptyChunk; // In most time
+    byte[] chunk = emptyChunk;
+    if (chunk.length >= leastLength) {
+      return chunk; // In most time
     }
 
     synchronized (CoderUtil.class) {
-      emptyChunk = new byte[leastLength];
+      /*
+       * Recheck under the lock: another caller may already have grown the
+       * cache while this caller waited. A larger cached chunk is valid for a
+       * smaller request, so only allocate when the cache is still too small.
+       */
+      chunk = emptyChunk;
+      if (chunk.length < leastLength) {
+        chunk = new byte[leastLength];
+        emptyChunk = chunk;
+      }
     }
 
-    return emptyChunk;
+    return chunk;
   }
 
   /**
diff --git 
a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/io/erasurecode/rawcoder/TestCoderUtil.java
 
b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/io/erasurecode/rawcoder/TestCoderUtil.java
index 04268a524e9..d47bb802aef 100644
--- 
a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/io/erasurecode/rawcoder/TestCoderUtil.java
+++ 
b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/io/erasurecode/rawcoder/TestCoderUtil.java
@@ -19,20 +19,42 @@
 package org.apache.hadoop.io.erasurecode.rawcoder;
 
 import org.apache.hadoop.HadoopIllegalArgumentException;
+import org.junit.jupiter.api.BeforeEach;
 import org.junit.jupiter.api.Test;
 
+import java.lang.reflect.Field;
 import java.nio.ByteBuffer;
+import java.util.concurrent.ExecutorService;
+import java.util.concurrent.Executors;
+import java.util.concurrent.Future;
+import java.util.concurrent.TimeUnit;
+import java.util.concurrent.atomic.AtomicReference;
 
 import static org.junit.jupiter.api.Assertions.assertEquals;
 import static org.junit.jupiter.api.Assertions.assertThrows;
+import static org.junit.jupiter.api.Assertions.assertTrue;
+import static org.junit.jupiter.api.Assertions.fail;
 
 /**
  * Test of the utility of raw erasure coder.
  */
 public class TestCoderUtil {
+  private static final int INITIAL_EMPTY_CHUNK_LENGTH = 4096;
+  private static final int SMALL_CHUNK_SIZE = INITIAL_EMPTY_CHUNK_LENGTH + 1;
+  private static final int LARGE_CHUNK_SIZE = SMALL_CHUNK_SIZE * 2;
+
   private final int numInputs = 9;
   private final int chunkSize = 1024;
 
+  @BeforeEach
+  public void resetEmptyChunk() throws Exception {
+    Field emptyChunk = CoderUtil.class.getDeclaredField("emptyChunk");
+    emptyChunk.setAccessible(true);
+    synchronized (CoderUtil.class) {
+      emptyChunk.set(null, new byte[INITIAL_EMPTY_CHUNK_LENGTH]);
+    }
+  }
+
   @Test
   public void testGetEmptyChunk() {
     byte[] ret = CoderUtil.getEmptyChunk(chunkSize);
@@ -58,6 +80,36 @@ public void testResetBuffer() {
     }
   }
 
+  @Test
+  public void testGetEmptyChunkDoesNotShrinkWhenCacheGrowsConcurrently()
+      throws Exception {
+    AtomicReference<Thread> workerThread = new AtomicReference<>();
+    ExecutorService executor = Executors.newSingleThreadExecutor(r -> {
+      Thread thread = new Thread(r, "get-empty-chunk-small");
+      workerThread.set(thread);
+      return thread;
+    });
+
+    try {
+      Future<byte[]> smallChunk;
+      synchronized (CoderUtil.class) {
+        smallChunk = executor.submit(() -> CoderUtil.getEmptyChunk(
+            SMALL_CHUNK_SIZE));
+        waitUntilBlocked(workerThread);
+        assertTrue(CoderUtil.getEmptyChunk(LARGE_CHUNK_SIZE).length
+            >= LARGE_CHUNK_SIZE);
+      }
+
+      assertTrue(smallChunk.get(10, TimeUnit.SECONDS).length
+          >= LARGE_CHUNK_SIZE,
+          "concurrent caller should return the larger chunk already cached");
+      assertTrue(CoderUtil.getEmptyChunk(LARGE_CHUNK_SIZE).length
+          >= LARGE_CHUNK_SIZE, "empty chunk cache should not shrink");
+    } finally {
+      executor.shutdownNow();
+    }
+  }
+
   @Test
   public void testGetValidIndexes() {
     byte[][] inputs = new byte[numInputs][];
@@ -124,4 +176,20 @@ public void testNoValidInput() {
       CoderUtil.findFirstValidInput(inputs);
     });
   }
-}
\ No newline at end of file
+
+  private static void waitUntilBlocked(AtomicReference<Thread> threadRef)
+      throws InterruptedException {
+    long deadline = System.nanoTime() + TimeUnit.SECONDS.toNanos(10);
+    while (System.nanoTime() < deadline) {
+      Thread thread = threadRef.get();
+      if (thread != null && thread.getState() == Thread.State.BLOCKED) {
+        return;
+      }
+      Thread.sleep(10);
+    }
+
+    Thread thread = threadRef.get();
+    fail("small getEmptyChunk caller did not block on CoderUtil.class; state="
+        + (thread == null ? "not started" : thread.getState()));
+  }
+}


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to