>From Ali Alsuliman <[email protected]>:
Ali Alsuliman has uploaded this change for review. (
https://asterix-gerrit.ics.uci.edu/c/asterixdb/+/20769?usp=email )
Change subject: [NO ISSUE][RT] Truncate warning message to UTF-8 limit
......................................................................
[NO ISSUE][RT] Truncate warning message to UTF-8 limit
- user model changes: no
- storage format changes: no
- interface changes: no
Details:
When serializing the warnings, truncate the message to
the UTF-8 limit of 65535 since DataOutput.writeUTF()
has a hard limit.
Ext-ref: MB-69956
Change-Id: Ic18a027bc88da8813c6f58c40f6eef236fdb1d14
---
M
hyracks-fullstack/hyracks/hyracks-api/src/main/java/org/apache/hyracks/api/exceptions/Warning.java
M
hyracks-fullstack/hyracks/hyracks-api/src/main/java/org/apache/hyracks/api/util/JavaSerializationUtils.java
2 files changed, 39 insertions(+), 1 deletion(-)
git pull ssh://asterix-gerrit.ics.uci.edu:29418/asterixdb
refs/changes/69/20769/1
diff --git
a/hyracks-fullstack/hyracks/hyracks-api/src/main/java/org/apache/hyracks/api/exceptions/Warning.java
b/hyracks-fullstack/hyracks/hyracks-api/src/main/java/org/apache/hyracks/api/exceptions/Warning.java
index 5f01559..8705f36 100644
---
a/hyracks-fullstack/hyracks/hyracks-api/src/main/java/org/apache/hyracks/api/exceptions/Warning.java
+++
b/hyracks-fullstack/hyracks/hyracks-api/src/main/java/org/apache/hyracks/api/exceptions/Warning.java
@@ -26,6 +26,7 @@
import org.apache.commons.lang3.SerializationUtils;
import org.apache.hyracks.api.util.ErrorMessageUtil;
+import org.apache.hyracks.api.util.JavaSerializationUtils;
public class Warning implements Serializable {
@@ -85,7 +86,7 @@
public void writeFields(DataOutput output) throws IOException {
output.writeUTF(component);
output.writeInt(code);
- output.writeUTF(message);
+ JavaSerializationUtils.writeTruncatedUTF(output, message);
SourceLocation.writeFields(srcLocation, output);
writeParams(output, params);
}
diff --git
a/hyracks-fullstack/hyracks/hyracks-api/src/main/java/org/apache/hyracks/api/util/JavaSerializationUtils.java
b/hyracks-fullstack/hyracks/hyracks-api/src/main/java/org/apache/hyracks/api/util/JavaSerializationUtils.java
index 8e24204..90a82db 100644
---
a/hyracks-fullstack/hyracks/hyracks-api/src/main/java/org/apache/hyracks/api/util/JavaSerializationUtils.java
+++
b/hyracks-fullstack/hyracks/hyracks-api/src/main/java/org/apache/hyracks/api/util/JavaSerializationUtils.java
@@ -20,6 +20,7 @@
import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
+import java.io.DataOutput;
import java.io.IOException;
import java.io.InputStream;
import java.io.ObjectInputStream;
@@ -34,10 +35,46 @@
public class JavaSerializationUtils {
private static IJavaSerializationProvider serProvider =
DefaultJavaSerializationProvider.INSTANCE;
+ private static final int MAX_UTF_BYTES = 65535;
+ private static final int SAFE_CHAR_LIMIT = MAX_UTF_BYTES / 3;
private JavaSerializationUtils() {
}
+ /**
+ * This is to be used when small strings are the usual case. For strings
whose modified UTF-8 bytes count is larger
+ * than 65535, then it truncates the string to a one that is less than
65535.
+ */
+ public static void writeTruncatedUTF(DataOutput out, String s) throws
IOException {
+ if (s.length() <= SAFE_CHAR_LIMIT) {
+ out.writeUTF(s);
+ return;
+ }
+ truncateToUTFLimit(out, s);
+ }
+
+ private static void truncateToUTFLimit(DataOutput out, String s) throws
IOException {
+ int modifiedUtf8Len = 0;
+ int i = 0;
+ while (i < s.length()) {
+ int c = s.charAt(i);
+ int bytes;
+ if (c >= 0x0001 && c <= 0x007F) {
+ bytes = 1;
+ } else if (c <= 0x07FF) {
+ bytes = 2;
+ } else {
+ bytes = 3;
+ }
+ modifiedUtf8Len += bytes;
+ if (modifiedUtf8Len > MAX_UTF_BYTES) {
+ break;
+ }
+ i++;
+ }
+ out.writeUTF(i == s.length() ? s : s.substring(0, i));
+ }
+
public static byte[] serialize(Serializable jobSpec) throws IOException {
if (jobSpec instanceof byte[]) {
return (byte[]) jobSpec;
--
To view, visit https://asterix-gerrit.ics.uci.edu/c/asterixdb/+/20769?usp=email
To unsubscribe, or for help writing mail filters, visit
https://asterix-gerrit.ics.uci.edu/settings?usp=email
Gerrit-MessageType: newchange
Gerrit-Project: asterixdb
Gerrit-Branch: trinity
Gerrit-Change-Id: Ic18a027bc88da8813c6f58c40f6eef236fdb1d14
Gerrit-Change-Number: 20769
Gerrit-PatchSet: 1
Gerrit-Owner: Ali Alsuliman <[email protected]>