This is an automated email from the ASF dual-hosted git repository.
chaokunyang pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/fury.git
The following commit(s) were added to refs/heads/main by this push:
new 0bb4fefc fix(java): Fix empty string processing in MetaStringBytes
(#2212)
0bb4fefc is described below
commit 0bb4fefc9ea5560e0c47e7792c34cdd94b8201b1
Author: LouShaokun <[email protected]>
AuthorDate: Fri May 9 18:12:49 2025 +0800
fix(java): Fix empty string processing in MetaStringBytes (#2212)
## What does this PR do?
This PR fixes issue #2096 by improving the handling of empty strings in
MetaStringBytes. The primary changes are:
1. Explicitly defining that empty strings will use UTF-8 encoding for
meta string encoding
2. Adding a dedicated constant for empty MetaStringBytes: `public static
final MetaStringBytes EMPTY = MetaStringBytes.of(MetaString.EMPTY)`
3. Adding a length check to prevent potential buffer reading issues when
length is zero
These changes ensure that empty strings are handled consistently
throughout the codebase and prevent potential errors when processing
empty strings during serialization and deserialization.
## Related issues
- #2096
## Does this PR introduce any user-facing change?
- [ ] Does this PR introduce any public API change?
- [ ] Does this PR introduce any binary protocol compatibility change?
## Benchmark
## Additional Notes
Since this PR involves changes across multiple components related to
string handling, I'd appreciate a thorough review to ensure there are no
unintended side effects. If there's a better approach to solving the
empty string issue, I'm open to suggestions.
Also, please note that the current main branch has an issue (#2211)
affecting CrossLanguageTest, which this PR will also encounter. It might
be beneficial to address #2211 first or at least be aware of it when
reviewing this PR.
---
.../main/java/org/apache/fury/meta/MetaString.java | 7 ++++++
.../org/apache/fury/meta/MetaStringEncoder.java | 29 +++++++++-------------
.../org/apache/fury/resolver/MetaStringBytes.java | 1 +
.../apache/fury/resolver/MetaStringResolver.java | 12 +++++++--
4 files changed, 30 insertions(+), 19 deletions(-)
diff --git a/java/fury-core/src/main/java/org/apache/fury/meta/MetaString.java
b/java/fury-core/src/main/java/org/apache/fury/meta/MetaString.java
index 3c2be21b..a85426f9 100644
--- a/java/fury-core/src/main/java/org/apache/fury/meta/MetaString.java
+++ b/java/fury-core/src/main/java/org/apache/fury/meta/MetaString.java
@@ -55,8 +55,15 @@ public class MetaString {
}
throw new IllegalArgumentException("Encoding flag not recognized: " +
value);
}
+
+ public static Encoding forEmptyStr() {
+ return UTF_8;
+ }
}
+ public static final MetaString EMPTY =
+ new MetaString("", Encoding.forEmptyStr(), '\0', '\0', new byte[0]);
+
private final String string;
private final Encoding encoding;
private final char specialChar1;
diff --git
a/java/fury-core/src/main/java/org/apache/fury/meta/MetaStringEncoder.java
b/java/fury-core/src/main/java/org/apache/fury/meta/MetaStringEncoder.java
index 396721f6..c163f5a7 100644
--- a/java/fury-core/src/main/java/org/apache/fury/meta/MetaStringEncoder.java
+++ b/java/fury-core/src/main/java/org/apache/fury/meta/MetaStringEncoder.java
@@ -20,8 +20,6 @@
package org.apache.fury.meta;
import java.nio.charset.StandardCharsets;
-import java.util.HashSet;
-import org.apache.fury.collection.Collections;
import org.apache.fury.meta.MetaString.Encoding;
import org.apache.fury.util.Preconditions;
import org.apache.fury.util.StringUtils;
@@ -55,7 +53,7 @@ public class MetaStringEncoder {
public MetaString encode(String input, Encoding[] encodings) {
if (input.isEmpty()) {
- return new MetaString(input, Encoding.UTF_8, specialChar1, specialChar2,
new byte[0]);
+ return MetaString.EMPTY;
}
if (!StringUtils.isLatin(input.toCharArray())) {
return new MetaString(
@@ -83,7 +81,7 @@ public class MetaStringEncoder {
throw new IllegalArgumentException("Non-ASCII characters in meta string
are not allowed");
}
if (input.isEmpty()) {
- return new MetaString(input, Encoding.UTF_8, specialChar1, specialChar2,
new byte[0]);
+ return MetaString.EMPTY;
}
byte[] bytes;
switch (encoding) {
@@ -107,42 +105,39 @@ public class MetaStringEncoder {
}
}
- public Encoding computeEncoding(String input) {
- return computeEncoding(input, Encoding.values());
- }
-
public Encoding computeEncoding(String input, Encoding[] encodings) {
- HashSet<Encoding> encodingSet = Collections.ofHashSet(encodings);
if (input.isEmpty()) {
- if (encodingSet.contains(Encoding.LOWER_SPECIAL)) {
- return Encoding.LOWER_SPECIAL;
- }
+ return Encoding.forEmptyStr();
+ }
+ boolean[] encodingFlags = new boolean[Encoding.values().length];
+ for (Encoding encoding : encodings) {
+ encodingFlags[encoding.ordinal()] = true;
}
char[] chars = input.toCharArray();
StringStatistics statistics = computeStatistics(chars);
if (statistics.canLowerSpecialEncoded) {
- if (encodingSet.contains(Encoding.LOWER_SPECIAL)) {
+ if (encodingFlags[Encoding.LOWER_SPECIAL.ordinal()]) {
return Encoding.LOWER_SPECIAL;
}
}
if (statistics.canLowerUpperDigitSpecialEncoded) {
if (statistics.digitCount != 0) {
- if (encodingSet.contains(Encoding.LOWER_UPPER_DIGIT_SPECIAL)) {
+ if (encodingFlags[Encoding.LOWER_UPPER_DIGIT_SPECIAL.ordinal()]) {
return Encoding.LOWER_UPPER_DIGIT_SPECIAL;
}
}
int upperCount = statistics.upperCount;
if (upperCount == 1 && Character.isUpperCase(chars[0])) {
- if (encodingSet.contains(Encoding.FIRST_TO_LOWER_SPECIAL)) {
+ if (encodingFlags[Encoding.FIRST_TO_LOWER_SPECIAL.ordinal()]) {
return Encoding.FIRST_TO_LOWER_SPECIAL;
}
}
if ((chars.length + upperCount) * 5 < (chars.length * 6)) {
- if (encodingSet.contains(Encoding.ALL_TO_LOWER_SPECIAL)) {
+ if (encodingFlags[Encoding.ALL_TO_LOWER_SPECIAL.ordinal()]) {
return Encoding.ALL_TO_LOWER_SPECIAL;
}
}
- if (encodingSet.contains(Encoding.LOWER_UPPER_DIGIT_SPECIAL)) {
+ if (encodingFlags[Encoding.LOWER_UPPER_DIGIT_SPECIAL.ordinal()]) {
return Encoding.LOWER_UPPER_DIGIT_SPECIAL;
}
}
diff --git
a/java/fury-core/src/main/java/org/apache/fury/resolver/MetaStringBytes.java
b/java/fury-core/src/main/java/org/apache/fury/resolver/MetaStringBytes.java
index 94458c94..a83a480b 100644
--- a/java/fury-core/src/main/java/org/apache/fury/resolver/MetaStringBytes.java
+++ b/java/fury-core/src/main/java/org/apache/fury/resolver/MetaStringBytes.java
@@ -30,6 +30,7 @@ import org.apache.fury.util.MurmurHash3;
@Internal
public final class MetaStringBytes {
static final short DEFAULT_DYNAMIC_WRITE_STRING_ID = -1;
+ public static final MetaStringBytes EMPTY =
MetaStringBytes.of(MetaString.EMPTY);
private static final int HEADER_MASK = 0xff;
final byte[] bytes;
diff --git
a/java/fury-core/src/main/java/org/apache/fury/resolver/MetaStringResolver.java
b/java/fury-core/src/main/java/org/apache/fury/resolver/MetaStringResolver.java
index 1a6a3ee4..1742043b 100644
---
a/java/fury-core/src/main/java/org/apache/fury/resolver/MetaStringResolver.java
+++
b/java/fury-core/src/main/java/org/apache/fury/resolver/MetaStringResolver.java
@@ -222,8 +222,12 @@ public final class MetaStringResolver {
}
private MetaStringBytes readSmallMetaStringBytes(MemoryBuffer buffer, int
len) {
- long v1, v2 = 0;
byte encoding = buffer.readByte();
+ if (len == 0) {
+ assert encoding == MetaString.Encoding.UTF_8.getValue();
+ return MetaStringBytes.EMPTY;
+ }
+ long v1, v2 = 0;
if (len <= 8) {
v1 = buffer.readBytesAsInt64(len);
} else {
@@ -239,8 +243,12 @@ public final class MetaStringResolver {
private MetaStringBytes readSmallMetaStringBytes(
MemoryBuffer buffer, MetaStringBytes cache, int len) {
- long v1, v2 = 0;
byte encoding = buffer.readByte();
+ if (len == 0) {
+ assert encoding == MetaString.Encoding.UTF_8.getValue();
+ return MetaStringBytes.EMPTY;
+ }
+ long v1, v2 = 0;
if (len <= 8) {
v1 = buffer.readBytesAsInt64(len);
} else {
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]