(hbase-thirdparty) branch master updated: HBASE-29942 [hbase-thirdparty] Bump protobuf java to 4.34.0 (#156)

zhangduo Thu, 05 Mar 2026 19:37:33 -0800

This is an automated email from the ASF dual-hosted git repository.

zhangduo pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hbase-thirdparty.git



The following commit(s) were added to refs/heads/master by this push:
     new 4832fa6  HBASE-29942 [hbase-thirdparty] Bump protobuf java to 4.34.0 
(#156)
4832fa6 is described below

commit 4832fa673c257a442198d95cb82b7ed97f601597
Author: Duo Zhang <[email protected]>
AuthorDate: Fri Mar 6 11:35:49 2026 +0800

    HBASE-29942 [hbase-thirdparty] Bump protobuf java to 4.34.0 (#156)
    
    Signed-off-by: Nihal Jain <[email protected]>
---
 .../src/main/patches/HBASE-15789.patch             | 293 ++++++---------------
 .../src/main/patches/HBASE-17087.patch             |   4 +-
 .../src/main/patches/HBASE-17239.patch             |   5 +-
 pom.xml                                            |   2 +-
 4 files changed, 90 insertions(+), 214 deletions(-)

diff --git a/hbase-shaded-protobuf/src/main/patches/HBASE-15789.patch 
b/hbase-shaded-protobuf/src/main/patches/HBASE-15789.patch
index c02bbb8..e28bedb 100644
--- a/hbase-shaded-protobuf/src/main/patches/HBASE-15789.patch
+++ b/hbase-shaded-protobuf/src/main/patches/HBASE-15789.patch
@@ -13,10 +13,10 @@ index e7d9eec8e..e17fefd39 100644
      byte[] buffer = getBuffer();
 diff --git a/src/main/java/com/google/protobuf/ByteInput.java 
b/src/main/java/com/google/protobuf/ByteInput.java
 new file mode 100644
-index 000000000..37c3824a3
+index 000000000..190aac07b
 --- /dev/null
 +++ b/src/main/java/com/google/protobuf/ByteInput.java
-@@ -0,0 +1,81 @@
+@@ -0,0 +1,86 @@
 +// Protocol Buffers - Google's data interchange format
 +// Copyright 2008 Google Inc.  All rights reserved.
 +// https://developers.google.com/protocol-buffers/
@@ -94,16 +94,21 @@ index 000000000..37c3824a3
 +  public abstract int read(int offset, ByteBuffer out);
 +
 +  /**
++   * Get a long value from the given offset
++   */
++  public abstract long getLong(int offset);
++
++  /**
 +   * @return Total number of bytes in this ByteInput.
 +   */
 +  public abstract int size();
 +}
 diff --git a/src/main/java/com/google/protobuf/ByteInputByteString.java 
b/src/main/java/com/google/protobuf/ByteInputByteString.java
 new file mode 100644
-index 000000000..320977290
+index 000000000..3493393c8
 --- /dev/null
 +++ b/src/main/java/com/google/protobuf/ByteInputByteString.java
-@@ -0,0 +1,254 @@
+@@ -0,0 +1,235 @@
 +// Protocol Buffers - Google's data interchange format
 +// Copyright 2008 Google Inc.  All rights reserved.
 +// https://developers.google.com/protocol-buffers/
@@ -265,30 +270,11 @@ index 000000000..320977290
 +  }
 +
 +  @Override
-+  protected int partialIsValidUtf8(int state, int offset, int length) {
-+    int off = getAbsoluteOffset(offset);
-+    return Utf8.partialIsValidUtf8(state, buffer, off, off + length);
-+  }
-+
-+  @Override
-+  public boolean equals(Object other) {
-+    if (other == this) {
-+      return true;
-+    }
-+    if (!(other instanceof ByteString)) {
-+      return false;
-+    }
-+    ByteString otherString = ((ByteString) other);
-+    if (size() != otherString.size()) {
-+      return false;
-+    }
-+    if (size() == 0) {
-+      return true;
-+    }
++  public boolean equalsInternal(ByteString other) {
 +    if (other instanceof RopeByteString) {
-+      return other.equals(this);
++      return other.equalsInternal(this);
 +    }
-+    return Arrays.equals(this.toByteArray(), otherString.toByteArray());
++    return Arrays.equals(this.toByteArray(), other.toByteArray());
 +  }
 +
 +  @Override
@@ -359,11 +345,11 @@ index 000000000..320977290
 +  }
 +}
 diff --git a/src/main/java/com/google/protobuf/ByteString.java 
b/src/main/java/com/google/protobuf/ByteString.java
-index 558d5a6ab..28795acc2 100644
+index 900f98850..6789cafdf 100644
 --- a/src/main/java/com/google/protobuf/ByteString.java
 +++ b/src/main/java/com/google/protobuf/ByteString.java
-@@ -429,6 +429,13 @@ public abstract class ByteString implements 
Iterable<Byte>, Serializable {
-     return new NioByteString(buffer);
+@@ -457,6 +457,13 @@ public abstract class ByteString implements 
Iterable<Byte>, Serializable {
+     }
    }
 
 +  /**
@@ -376,7 +362,7 @@ index 558d5a6ab..28795acc2 100644
    /**
     * Wraps the given bytes into a {@code ByteString}. Intended for internal 
usage within the library
     * to force a classload of ByteString before LiteralByteString.
-@@ -961,8 +968,6 @@ public abstract class ByteString implements 
Iterable<Byte>, Serializable {
+@@ -1042,8 +1049,6 @@ public abstract class ByteString implements 
Iterable<Byte>, Serializable {
       * @return true for equality of substrings, else false.
       */
      abstract boolean equalsRange(ByteString other, int offset, int length);
@@ -386,10 +372,10 @@ index 558d5a6ab..28795acc2 100644
 
    /**
 diff --git a/src/main/java/com/google/protobuf/CodedInputStream.java 
b/src/main/java/com/google/protobuf/CodedInputStream.java
-index fbdabf225..cd6f80779 100644
+index 0d68d1797..1cfd5765d 100644
 --- a/src/main/java/com/google/protobuf/CodedInputStream.java
 +++ b/src/main/java/com/google/protobuf/CodedInputStream.java
-@@ -191,6 +191,15 @@ public abstract class CodedInputStream {
+@@ -200,6 +200,15 @@ public abstract class CodedInputStream {
      }
    }
 
@@ -405,7 +391,7 @@ index fbdabf225..cd6f80779 100644
    /** Disable construction/inheritance outside of this class. */
    private CodedInputStream() {}
 
-@@ -3943,4 +3952,652 @@ public abstract class CodedInputStream {
+@@ -2605,4 +2614,652 @@ public abstract class CodedInputStream {
        }
      }
    }
@@ -1059,219 +1045,110 @@ index fbdabf225..cd6f80779 100644
 +  }
  }
 diff --git a/src/main/java/com/google/protobuf/Utf8.java 
b/src/main/java/com/google/protobuf/Utf8.java
-index d52006754..92ed1f1f7 100644
+index 5abe89dc2..60a4de572 100644
 --- a/src/main/java/com/google/protobuf/Utf8.java
 +++ b/src/main/java/com/google/protobuf/Utf8.java
-@@ -196,6 +196,16 @@ final class Utf8 {
+@@ -122,6 +122,10 @@ final class Utf8 {
      }
    }
 
-+  private static int incompleteStateFor(ByteInput bytes, int index, int 
limit) {
-+    int byte1 = bytes.read(index - 1);
-+    switch (limit - index) {
-+      case 0: return incompleteStateFor(byte1);
-+      case 1: return incompleteStateFor(byte1, bytes.read(index));
-+      case 2: return incompleteStateFor(byte1, bytes.read(index), 
bytes.read(index + 1));
-+      default: throw new AssertionError();
-+    }
++  static boolean isValidUtf8(ByteInput buffer, int offset, int limit) {
++    return processor.isValidUtf8ByteInput(buffer, offset, limit);
 +  }
 +
    // These UTF-8 handling methods are copied from Guava's Utf8 class with a 
modification to throw
    // a protocol buffer local exception. This exception is then caught in 
CodedOutputStream so it can
    // fallback to more lenient behavior.
-@@ -318,6 +328,24 @@ final class Utf8 {
-     return processor.decodeUtf8(bytes, index, size);
+@@ -266,6 +270,16 @@ final class Utf8 {
+     return i - index;
    }
 
-+  /**
-+   * Determines if the given {@link ByteInput} is a valid UTF-8 string.
-+   *
-+   * @param buffer the buffer to check.
-+   */
-+  static boolean isValidUtf8(ByteInput buffer, int index, int limit) {
-+    return processor.isValidUtf8(buffer, index, limit);
-+  }
-+
-+  /**
-+   * Determines if the given {@link ByteInput} is a partially valid UTF-8 
string.
-+   *
-+   * @param buffer the buffer to check.
-+   */
-+  static int partialIsValidUtf8(int state, ByteInput buffer, int index, int 
limit) {
-+    return processor.partialIsValidUtf8(state, buffer, index, limit);
++  private static int estimateConsecutiveAscii(ByteInput buffer, int index, 
int limit) {
++    int i = index;
++    final int lim = limit - 7;
++    // This simple loop stops when we encounter a byte >= 0x80 (i.e. 
non-ASCII).
++    // To speed things up further, we're reading longs instead of bytes so we 
use a mask to
++    // determine if any byte in the current long is non-ASCII.
++    for (; i < lim && (buffer.getLong(i) & ASCII_MASK_LONG) == 0; i += 8) {}
++    return i - index;
 +  }
 +
-   /**
-    * Encodes the given characters to the target {@link ByteBuffer} using 
UTF-8 encoding.
-    *
-@@ -694,6 +722,169 @@ final class Utf8 {
-       return new String(resultArr, 0, resultPos);
+   /** A processor of UTF-8 strings, providing methods for checking validity 
and encoding. */
+   abstract static class Processor {
+     /**
+@@ -357,6 +371,75 @@ final class Utf8 {
+       }
      }
 
-+    public boolean isValidUtf8(ByteInput buffer, int index, int limit) {
-+      return partialIsValidUtf8(COMPLETE, buffer, index, limit) == COMPLETE;
-+    }
-+
-+    int partialIsValidUtf8(int state, ByteInput bytes, int index, int limit) {
-+      if (state != COMPLETE) {
-+        // The previous decoding operation was incomplete (or malformed).
-+        // We look for a well-formed sequence consisting of bytes from
-+        // the previous decoding operation (stored in state) together
-+        // with bytes from the array slice.
-+        //
-+        // We expect such "straddler characters" to be rare.
-+
-+        if (index >= limit) {  // No bytes? No progress.
-+          return state;
-+        }
-+        int byte1 = (byte) state;
-+        // byte1 is never ASCII.
-+        if (byte1 < (byte) 0xE0) {
-+          // two-byte form
-+
-+          // Simultaneously checks for illegal trailing-byte in
-+          // leading position and overlong 2-byte form.
-+          if (byte1 < (byte) 0xC2
-+              // byte2 trailing-byte test
-+              || bytes.read(index++) > (byte) 0xBF) {
-+            return MALFORMED;
-+          }
-+        } else if (byte1 < (byte) 0xF0) {
-+          // three-byte form
-+
-+          // Get byte2 from saved state or array
-+          int byte2 = (byte) ~(state >> 8);
-+          if (byte2 == 0) {
-+            byte2 = bytes.read(index++);
-+            if (index >= limit) {
-+              return incompleteStateFor(byte1, byte2);
-+            }
-+          }
-+          if (byte2 > (byte) 0xBF
-+              // overlong? 5 most significant bits must not all be zero
-+              || (byte1 == (byte) 0xE0 && byte2 < (byte) 0xA0)
-+              // illegal surrogate codepoint?
-+              || (byte1 == (byte) 0xED && byte2 >= (byte) 0xA0)
-+              // byte3 trailing-byte test
-+              || bytes.read(index++) > (byte) 0xBF) {
-+            return MALFORMED;
-+          }
-+        } else {
-+          // four-byte form
-+
-+          // Get byte2 and byte3 from saved state or array
-+          int byte2 = (byte) ~(state >> 8);
-+          int byte3 = 0;
-+          if (byte2 == 0) {
-+            byte2 = bytes.read(index++);
-+            if (index >= limit) {
-+              return incompleteStateFor(byte1, byte2);
-+            }
-+          } else {
-+            byte3 = (byte) (state >> 16);
-+          }
-+          if (byte3 == 0) {
-+            byte3 = bytes.read(index++);
-+            if (index >= limit) {
-+              return incompleteStateFor(byte1, byte2, byte3);
-+            }
-+          }
-+
-+          // If we were called with state == MALFORMED, then byte1 is 0xFF,
-+          // which never occurs in well-formed UTF-8, and so we will return
-+          // MALFORMED again below.
-+
-+          if (byte2 > (byte) 0xBF
-+              // Check that 1 <= plane <= 16.  Tricky optimized form of:
-+              // if (byte1 > (byte) 0xF4 ||
-+              //     byte1 == (byte) 0xF0 && byte2 < (byte) 0x90 ||
-+              //     byte1 == (byte) 0xF4 && byte2 > (byte) 0x8F)
-+              || (((byte1 << 28) + (byte2 - (byte) 0x90)) >> 30) != 0
-+              // byte3 trailing-byte test
-+              || byte3 > (byte) 0xBF
-+              // byte4 trailing-byte test
-+              || bytes.read(index++) > (byte) 0xBF) {
-+            return MALFORMED;
-+          }
-+        }
-+      }
-+
-+      return partialIsValidUtf8(bytes, index, limit);
-+    }
-+
-+    private static int partialIsValidUtf8(ByteInput bytes, int index, int 
limit) {
-+      // Optimize for 100% ASCII (Hotspot loves small simple top-level loops 
like this).
-+      // This simple loop stops when we encounter a byte >= 0x80 (i.e. 
non-ASCII).
-+      while (index < limit && bytes.read(index) >= 0) {
-+        index++;
-+      }
-+
-+      return (index >= limit) ? COMPLETE : partialIsValidUtf8NonAscii(bytes, 
index, limit);
-+    }
-+
-+    private static int partialIsValidUtf8NonAscii(ByteInput bytes, int index, 
int limit) {
-+      for (;;) {
-+        int byte1, byte2;
++    protected boolean isValidUtf8ByteInput(ByteInput buffer, int index, int 
limit) {
++      index += estimateConsecutiveAscii(buffer, index, limit);
 +
++      for (; ; ) {
 +        // Optimize for interior runs of ASCII bytes.
++        int byte1;
 +        do {
 +          if (index >= limit) {
-+            return COMPLETE;
++            return true;
 +          }
-+        } while ((byte1 = bytes.read(index++)) >= 0);
++        } while ((byte1 = buffer.read(index++)) >= 0);
 +
++        // If we're here byte1 is not ASCII. Only need to handle 2-4 byte 
forms.
 +        if (byte1 < (byte) 0xE0) {
-+          // two-byte form
-+
++          // Two-byte form (110xxxxx 10xxxxxx)
 +          if (index >= limit) {
 +            // Incomplete sequence
-+            return byte1;
++            return false;
 +          }
 +
 +          // Simultaneously checks for illegal trailing-byte in
 +          // leading position and overlong 2-byte form.
-+          if (byte1 < (byte) 0xC2
-+              || bytes.read(index++) > (byte) 0xBF) {
-+            return MALFORMED;
++          if (byte1 < (byte) 0xC2 || buffer.read(index) > (byte) 0xBF) {
++            return false;
 +          }
++          index++;
 +        } else if (byte1 < (byte) 0xF0) {
-+          // three-byte form
-+
-+          if (index >= limit - 1) { // incomplete sequence
-+            return incompleteStateFor(bytes, index, limit);
++          // Three-byte form (1110xxxx 10xxxxxx 10xxxxxx)
++          if (index >= limit - 1) {
++            // Incomplete sequence
++            return false;
 +          }
-+          if ((byte2 = bytes.read(index++)) > (byte) 0xBF
-+              // overlong? 5 most significant bits must not all be zero
-+              || (byte1 == (byte) 0xE0 && byte2 < (byte) 0xA0)
-+              // check for illegal surrogate codepoints
-+              || (byte1 == (byte) 0xED && byte2 >= (byte) 0xA0)
-+              // byte3 trailing-byte test
-+              || bytes.read(index++) > (byte) 0xBF) {
-+            return MALFORMED;
++
++          byte byte2 = buffer.read(index++);
++          if (byte2 > (byte) 0xBF
++                  // overlong? 5 most significant bits must not all be zero
++                  || (byte1 == (byte) 0xE0 && byte2 < (byte) 0xA0)
++                  // check for illegal surrogate codepoints
++                  || (byte1 == (byte) 0xED && byte2 >= (byte) 0xA0)
++                  // byte3 trailing-byte test
++                  || buffer.read(index) > (byte) 0xBF) {
++            return false;
 +          }
++          index++;
 +        } else {
-+          // four-byte form
-+
-+          if (index >= limit - 2) {  // incomplete sequence
-+            return incompleteStateFor(bytes, index, limit);
++          // Four-byte form (1110xxxx 10xxxxxx 10xxxxxx 10xxxxxx)
++          if (index >= limit - 2) {
++            // Incomplete sequence
++            return false;
 +          }
-+          if ((byte2 = bytes.read(index++)) > (byte) 0xBF
-+              // Check that 1 <= plane <= 16.  Tricky optimized form of:
-+              // if (byte1 > (byte) 0xF4 ||
-+              //     byte1 == (byte) 0xF0 && byte2 < (byte) 0x90 ||
-+              //     byte1 == (byte) 0xF4 && byte2 > (byte) 0x8F)
-+              || (((byte1 << 28) + (byte2 - (byte) 0x90)) >> 30) != 0
-+              // byte3 trailing-byte test
-+              || bytes.read(index++) > (byte) 0xBF
-+              // byte4 trailing-byte test
-+              || bytes.read(index++) > (byte) 0xBF) {
-+            return MALFORMED;
++
++          // TODO: Consider using getInt() to improve performance.
++          int byte2 = buffer.read(index++);
++          if (byte2 > (byte) 0xBF
++                  // Check that 1 <= plane <= 16.  Tricky optimized form of:
++                  // if (byte1 > (byte) 0xF4 ||
++                  //     byte1 == (byte) 0xF0 && byte2 < (byte) 0x90 ||
++                  //     byte1 == (byte) 0xF4 && byte2 > (byte) 0x8F)
++                  || (((byte1 << 28) + (byte2 - (byte) 0x90)) >> 30) != 0
++                  // byte3 trailing-byte test
++                  || buffer.read(index++) > (byte) 0xBF
++                  // byte4 trailing-byte test
++                  || buffer.read(index++) > (byte) 0xBF) {
++            return false;
 +          }
 +        }
 +      }
 +    }
 +
      /**
-      * Encodes an input character sequence ({@code in}) to UTF-8 in the 
target array ({@code out}).
-      * For a string, this method is similar to
-
+      * Decodes the given byte array slice into a {@link String}.
+      *
diff --git a/hbase-shaded-protobuf/src/main/patches/HBASE-17087.patch 
b/hbase-shaded-protobuf/src/main/patches/HBASE-17087.patch
index 297d2cf..fb5032c 100644
--- a/hbase-shaded-protobuf/src/main/patches/HBASE-17087.patch
+++ b/hbase-shaded-protobuf/src/main/patches/HBASE-17087.patch
@@ -1,8 +1,8 @@
 diff --git a/src/main/java/com/google/protobuf/ByteInputByteString.java 
b/src/main/java/com/google/protobuf/ByteInputByteString.java
-index 320977290..2e5776eea 100644
+index 3493393c8..136b90059 100644
 --- a/src/main/java/com/google/protobuf/ByteInputByteString.java
 +++ b/src/main/java/com/google/protobuf/ByteInputByteString.java
-@@ -249,6 +249,8 @@ final class ByteInputByteString extends 
ByteString.LeafByteString {
+@@ -230,6 +230,8 @@ final class ByteInputByteString extends 
ByteString.LeafByteString {
    public CodedInputStream newCodedInput() {
      // We trust CodedInputStream not to modify the bytes, or to give anyone
      // else access to them.
diff --git a/hbase-shaded-protobuf/src/main/patches/HBASE-17239.patch 
b/hbase-shaded-protobuf/src/main/patches/HBASE-17239.patch
index 19bb9eb..1c521fc 100644
--- a/hbase-shaded-protobuf/src/main/patches/HBASE-17239.patch
+++ b/hbase-shaded-protobuf/src/main/patches/HBASE-17239.patch
@@ -1,8 +1,8 @@
 diff --git a/src/main/java/com/google/protobuf/CodedInputStream.java 
b/src/main/java/com/google/protobuf/CodedInputStream.java
-index cd6f80779..3c6d8e878 100644
+index 1cfd5765d..3308cebdd 100644
 --- a/src/main/java/com/google/protobuf/CodedInputStream.java
 +++ b/src/main/java/com/google/protobuf/CodedInputStream.java
-@@ -192,11 +192,7 @@ public abstract class CodedInputStream {
+@@ -201,11 +201,7 @@ public abstract class CodedInputStream {
    }
 
    /** Create a new CodedInputStream wrapping the given {@link ByteInput}. */
@@ -37,4 +37,3 @@ index 15c1da969..54d2f975a 100644
    /**
     * Writes the given {@link ByteString} to the provided {@link ByteOutput}. 
Calling this method may
     * result in multiple operations on the target {@link ByteOutput} (i.e. for 
roped {@link
-
diff --git a/pom.xml b/pom.xml
index 3fc8d11..f8d10e8 100644
--- a/pom.xml
+++ b/pom.xml
@@ -136,7 +136,7 @@
     <maven.min.version>3.3.3</maven.min.version>
     <os.maven.version>1.7.1</os.maven.version>
     <rename.offset>org.apache.hbase.thirdparty</rename.offset>
-    <protobuf.version>4.31.1</protobuf.version>
+    <protobuf.version>4.34.0</protobuf.version>
     <netty.version>4.1.131.Final</netty.version>
     <netty.tcnative.version>2.0.75.Final</netty.tcnative.version>
     <guava.version>33.4.8-jre</guava.version>

(hbase-thirdparty) branch master updated: HBASE-29942 [hbase-thirdparty] Bump protobuf java to 4.34.0 (#156)

Reply via email to