This is an automated email from the ASF dual-hosted git repository.
sruehl pushed a commit to branch develop
in repository https://gitbox.apache.org/repos/asf/plc4x.git
The following commit(s) were added to refs/heads/develop by this push:
new 93341ca76c Fix reading UTF-8 strings (from OPC UA nodes) (#832)
93341ca76c is described below
commit 93341ca76c503705c737ecc86675db39ac1db26e
Author: Marc Aurel Fritz <[email protected]>
AuthorDate: Mon Mar 20 14:43:10 2023 +0100
Fix reading UTF-8 strings (from OPC UA nodes) (#832)
* plc4j/spi: Fix readString() on non-ascii encodings
The previous logic could result in a `StringIndexOutOfBoundsException`
upon calling `substring()`.
Reason is that the calculated `realLength` is in bytes and the length
of the created string is the number of characters. For UTF encodings,
multiple bytes can be just one character. This causes `realLength`
to be longer than the actual string and thus the `substring()`-call
to fail.
Fix this by applying `realLength` to the byte-array instead of the
string. This is done by utilizing a different String constructor.
Note that on the UTF16 cases the `realLength` needs to be incremented
by 2 to account for the two added bytes.
This commit contains a regression test that fails without these
adaptations.
Signed-off-by: Marc Aurel Fritz <[email protected]>
* plc4j/spi: Always convert encoding to upper case at readString()
Previously, the inner switch statement at the UTF-16 cases
would not convert encoding to upper case. This prevented
matching from working properly.
Do so directly at the beginning in order to prevent future
bugs.
Signed-off-by: Marc Aurel Fritz <[email protected]>
* plc4j/spi: Merge ASCII and UTF-8 cases on readString()
Prevent having the same logic twice.
Signed-off-by: Marc Aurel Fritz <[email protected]>
---------
Signed-off-by: Marc Aurel Fritz <[email protected]>
---
.../java/spi/generation/ReadBufferByteBased.java | 37 ++++++++--------------
.../plc4x/java/spi/generation/ReadBufferTest.java | 13 ++++++++
2 files changed, 27 insertions(+), 23 deletions(-)
diff --git
a/plc4j/spi/src/main/java/org/apache/plc4x/java/spi/generation/ReadBufferByteBased.java
b/plc4j/spi/src/main/java/org/apache/plc4x/java/spi/generation/ReadBufferByteBased.java
index 2904705ceb..011d905061 100644
---
a/plc4j/spi/src/main/java/org/apache/plc4x/java/spi/generation/ReadBufferByteBased.java
+++
b/plc4j/spi/src/main/java/org/apache/plc4x/java/spi/generation/ReadBufferByteBased.java
@@ -473,26 +473,9 @@ public class ReadBufferByteBased implements ReadBuffer,
BufferCommons {
public String readString(String logicalName, int bitLength,
WithReaderArgs... readerArgs) throws ParseException {
String encoding = extractEncoding(readerArgs).orElse("UTF-8");
encoding = encoding.replaceAll("[^a-zA-Z0-9]", "");
- switch (encoding.toUpperCase()) {
- case "ASCII": {
- byte[] strBytes = new byte[bitLength / 8];
- int realLength = 0;
- boolean finishedReading = false;
- for (int i = 0; (i < (bitLength / 8)) && hasMore(8); i++) {
- try {
- byte b = readByte(logicalName);
- if (!disable0Termination() && (b == 0x00)) {
- finishedReading = true;
- } else if (!finishedReading) {
- strBytes[i] = b;
- realLength++;
- }
- } catch (Exception e) {
- throw new PlcRuntimeException(e);
- }
- }
- return new String(strBytes,
StandardCharsets.US_ASCII).substring(0, realLength);
- }
+ encoding = encoding.toUpperCase();
+ switch (encoding) {
+ case "ASCII":
case "UTF8": {
byte[] strBytes = new byte[bitLength / 8];
int realLength = 0;
@@ -510,7 +493,15 @@ public class ReadBufferByteBased implements ReadBuffer,
BufferCommons {
throw new PlcRuntimeException(e);
}
}
- return new String(strBytes,
StandardCharsets.UTF_8).substring(0, realLength);
+ Charset charset;
+ switch (encoding) {
+ case "UTF8":
+ charset = StandardCharsets.UTF_8;
+ break;
+ default:
+ charset = StandardCharsets.US_ASCII;
+ }
+ return new String(strBytes, 0, realLength, charset);
}
case "UTF16":
case "UTF16LE":
@@ -527,7 +518,7 @@ public class ReadBufferByteBased implements ReadBuffer,
BufferCommons {
} else if (!finishedReading) {
strBytes[(i * 2)] = b1;
strBytes[(i * 2) + 1] = b2;
- realLength++;
+ realLength += 2;
}
} catch (Exception e) {
throw new PlcRuntimeException(e);
@@ -544,7 +535,7 @@ public class ReadBufferByteBased implements ReadBuffer,
BufferCommons {
default:
charset = StandardCharsets.UTF_16;
}
- return new String(strBytes, charset).substring(0, realLength);
+ return new String(strBytes, 0, realLength, charset);
}
default:
throw new ParseException("Unsupported encoding: " + encoding);
diff --git
a/plc4j/spi/src/test/java/org/apache/plc4x/java/spi/generation/ReadBufferTest.java
b/plc4j/spi/src/test/java/org/apache/plc4x/java/spi/generation/ReadBufferTest.java
index 798f2ee220..841e67cee4 100644
---
a/plc4j/spi/src/test/java/org/apache/plc4x/java/spi/generation/ReadBufferTest.java
+++
b/plc4j/spi/src/test/java/org/apache/plc4x/java/spi/generation/ReadBufferTest.java
@@ -38,4 +38,17 @@ class ReadBufferTest {
assertEquals(value, answer);
}
+
+ /**
+ * Test which makes sure that UTF8 encoding with multi-byte characters
works
+ */
+ @Test
+ void readStringUtf8() throws ParseException {
+ String value = "molybdän";
+ final var serialized = value.getBytes(StandardCharsets.UTF_8);
+ final ReadBuffer buffer = new ReadBufferByteBased(serialized);
+ String answer = buffer.readString("", serialized.length * 8,
WithOption.WithEncoding(StandardCharsets.UTF_8.name()));
+
+ assertEquals(value, answer);
+ }
}
\ No newline at end of file