>From Ali Alsuliman <[email protected]>: Ali Alsuliman has submitted this change. ( https://asterix-gerrit.ics.uci.edu/c/asterixdb/+/20769?usp=email )
Change subject: [NO ISSUE][RT] Truncate warning message to writeUTF limit ...................................................................... [NO ISSUE][RT] Truncate warning message to writeUTF limit - user model changes: no - storage format changes: no - interface changes: no Details: When serializing the warnings, truncate the message to the UTF-8 limit of 65535 since DataOutput.writeUTF() has a hard limit. Ext-ref: MB-69956 Change-Id: Ic18a027bc88da8813c6f58c40f6eef236fdb1d14 Reviewed-on: https://asterix-gerrit.ics.uci.edu/c/asterixdb/+/20769 Tested-by: Jenkins <[email protected]> Reviewed-by: Ali Alsuliman <[email protected]> Reviewed-by: Ian Maxon <[email protected]> --- M hyracks-fullstack/hyracks/hyracks-api/src/main/java/org/apache/hyracks/api/exceptions/Warning.java M hyracks-fullstack/hyracks/hyracks-api/src/main/java/org/apache/hyracks/api/util/JavaSerializationUtils.java 2 files changed, 39 insertions(+), 1 deletion(-) Approvals: Ali Alsuliman: Looks good to me, but someone else must approve Ian Maxon: Looks good to me, approved Jenkins: Verified diff --git a/hyracks-fullstack/hyracks/hyracks-api/src/main/java/org/apache/hyracks/api/exceptions/Warning.java b/hyracks-fullstack/hyracks/hyracks-api/src/main/java/org/apache/hyracks/api/exceptions/Warning.java index 5f01559..8705f36 100644 --- a/hyracks-fullstack/hyracks/hyracks-api/src/main/java/org/apache/hyracks/api/exceptions/Warning.java +++ b/hyracks-fullstack/hyracks/hyracks-api/src/main/java/org/apache/hyracks/api/exceptions/Warning.java @@ -26,6 +26,7 @@ import org.apache.commons.lang3.SerializationUtils; import org.apache.hyracks.api.util.ErrorMessageUtil; +import org.apache.hyracks.api.util.JavaSerializationUtils; public class Warning implements Serializable { @@ -85,7 +86,7 @@ public void writeFields(DataOutput output) throws IOException { output.writeUTF(component); output.writeInt(code); - output.writeUTF(message); + JavaSerializationUtils.writeTruncatedUTF(output, message); SourceLocation.writeFields(srcLocation, output); writeParams(output, params); } diff --git a/hyracks-fullstack/hyracks/hyracks-api/src/main/java/org/apache/hyracks/api/util/JavaSerializationUtils.java b/hyracks-fullstack/hyracks/hyracks-api/src/main/java/org/apache/hyracks/api/util/JavaSerializationUtils.java index 8e24204..90a82db 100644 --- a/hyracks-fullstack/hyracks/hyracks-api/src/main/java/org/apache/hyracks/api/util/JavaSerializationUtils.java +++ b/hyracks-fullstack/hyracks/hyracks-api/src/main/java/org/apache/hyracks/api/util/JavaSerializationUtils.java @@ -20,6 +20,7 @@ import java.io.ByteArrayInputStream; import java.io.ByteArrayOutputStream; +import java.io.DataOutput; import java.io.IOException; import java.io.InputStream; import java.io.ObjectInputStream; @@ -34,10 +35,46 @@ public class JavaSerializationUtils { private static IJavaSerializationProvider serProvider = DefaultJavaSerializationProvider.INSTANCE; + private static final int MAX_UTF_BYTES = 65535; + private static final int SAFE_CHAR_LIMIT = MAX_UTF_BYTES / 3; private JavaSerializationUtils() { } + /** + * This is to be used when small strings are the usual case. For strings whose modified UTF-8 bytes count is larger + * than 65535, then it truncates the string to a one that is less than 65535. + */ + public static void writeTruncatedUTF(DataOutput out, String s) throws IOException { + if (s.length() <= SAFE_CHAR_LIMIT) { + out.writeUTF(s); + return; + } + truncateToUTFLimit(out, s); + } + + private static void truncateToUTFLimit(DataOutput out, String s) throws IOException { + int modifiedUtf8Len = 0; + int i = 0; + while (i < s.length()) { + int c = s.charAt(i); + int bytes; + if (c >= 0x0001 && c <= 0x007F) { + bytes = 1; + } else if (c <= 0x07FF) { + bytes = 2; + } else { + bytes = 3; + } + modifiedUtf8Len += bytes; + if (modifiedUtf8Len > MAX_UTF_BYTES) { + break; + } + i++; + } + out.writeUTF(i == s.length() ? s : s.substring(0, i)); + } + public static byte[] serialize(Serializable jobSpec) throws IOException { if (jobSpec instanceof byte[]) { return (byte[]) jobSpec; -- To view, visit https://asterix-gerrit.ics.uci.edu/c/asterixdb/+/20769?usp=email To unsubscribe, or for help writing mail filters, visit https://asterix-gerrit.ics.uci.edu/settings?usp=email Gerrit-MessageType: merged Gerrit-Project: asterixdb Gerrit-Branch: trinity Gerrit-Change-Id: Ic18a027bc88da8813c6f58c40f6eef236fdb1d14 Gerrit-Change-Number: 20769 Gerrit-PatchSet: 3 Gerrit-Owner: Ali Alsuliman <[email protected]> Gerrit-Reviewer: Ali Alsuliman <[email protected]> Gerrit-Reviewer: Anon. E. Moose #1000171 Gerrit-Reviewer: Ian Maxon <[email protected]> Gerrit-Reviewer: Jenkins <[email protected]> Gerrit-Reviewer: Michael Blow <[email protected]> Gerrit-Reviewer: Murtadha Hubail <[email protected]> Gerrit-Reviewer: Peeyush Gupta <[email protected]>
