This is an automated email from the ASF dual-hosted git repository.

zhangduo pushed a commit to branch branch-2.6
in repository https://gitbox.apache.org/repos/asf/hbase.git


The following commit(s) were added to refs/heads/branch-2.6 by this push:
     new f11c4dd5a00 HBASE-28850 Only return from 
ReplicationSink.replicationEntries while all background tasks are finished 
(#6263) (#6271)
f11c4dd5a00 is described below

commit f11c4dd5a0053c4f4ffe11e0398d64ef594dcc37
Author: Duo Zhang <zhang...@apache.org>
AuthorDate: Thu Sep 19 23:05:06 2024 +0800

    HBASE-28850 Only return from ReplicationSink.replicationEntries while all 
background tasks are finished (#6263) (#6271)
    
    Signed-off-by: Andrew Purtell <apurt...@apache.org>
    (cherry picked from commit 52082bc5b80a60406bfaaa630ed5cb23027436c1)
    (cherry picked from commit 0dc334f572329be7eb2455cec3519fc820c04c25)
---
 .../replication/regionserver/ReplicationSink.java  | 22 +++++++++++++++++++---
 1 file changed, 19 insertions(+), 3 deletions(-)

diff --git 
a/hbase-server/src/main/java/org/apache/hadoop/hbase/replication/regionserver/ReplicationSink.java
 
b/hbase-server/src/main/java/org/apache/hadoop/hbase/replication/regionserver/ReplicationSink.java
index 2fe15ab867d..961e4d85966 100644
--- 
a/hbase-server/src/main/java/org/apache/hadoop/hbase/replication/regionserver/ReplicationSink.java
+++ 
b/hbase-server/src/main/java/org/apache/hadoop/hbase/replication/regionserver/ReplicationSink.java
@@ -509,17 +509,33 @@ public class ReplicationSink {
       }
       
futures.addAll(batchRows.stream().map(table::batchAll).collect(Collectors.toList()));
     }
-
+    // Here we will always wait until all futures are finished, even if there 
are failures when
+    // getting from a future in the middle. This is because this method may be 
called in a rpc call,
+    // so the batch operations may reference some off heap cells(through 
CellScanner). If we return
+    // earlier here, the rpc call may be finished and they will release the 
off heap cells before
+    // some of the batch operations finish, and then cause corrupt data or 
even crash the region
+    // server. See HBASE-28584 and HBASE-28850 for more details.
+    IOException error = null;
     for (Future<?> future : futures) {
       try {
         FutureUtils.get(future);
       } catch (RetriesExhaustedException e) {
+        IOException ioe;
         if (e.getCause() instanceof TableNotFoundException) {
-          throw new TableNotFoundException("'" + tableName + "'");
+          ioe = new TableNotFoundException("'" + tableName + "'");
+        } else {
+          ioe = e;
+        }
+        if (error == null) {
+          error = ioe;
+        } else {
+          error.addSuppressed(ioe);
         }
-        throw e;
       }
     }
+    if (error != null) {
+      throw error;
+    }
   }
 
   /**

Reply via email to