AlexanderSaydakov commented on code in PR #447:
URL: https://github.com/apache/datasketches-java/pull/447#discussion_r1287728653
##########
src/main/java/org/apache/datasketches/common/ArrayOfStringsSerDe.java:
##########
@@ -31,45 +34,101 @@
* this method is 2 times more compact, but it takes more time to encode and
decode
* by a factor of 1.5 to 2.
*
+ * <p>The serialization
+ *
* @author Alexander Saydakov
*/
public class ArrayOfStringsSerDe extends ArrayOfItemsSerDe<String> {
+ @Override
+ public byte[] serializeToByteArray(final String item) {
+ Objects.requireNonNull(item, "Item must not be null");
+ if (item.isEmpty()) { return new byte[] { 0, 0, 0, 0 }; }
+ final byte[] utf8ByteArr = item.getBytes(StandardCharsets.UTF_8);
+ final int numBytes = utf8ByteArr.length;
+ final byte[] out = new byte[numBytes + Integer.BYTES];
+ copyBytes(utf8ByteArr, 0, out, 4, numBytes);
+ putIntLE(out, 0, numBytes);
+ return out;
+ }
+
@Override
public byte[] serializeToByteArray(final String[] items) {
- int length = 0;
- final byte[][] itemsBytes = new byte[items.length][];
- for (int i = 0; i < items.length; i++) {
- itemsBytes[i] = items[i].getBytes(StandardCharsets.UTF_8);
- length += itemsBytes[i].length + Integer.BYTES;
+ Objects.requireNonNull(items, "Items must not be null");
+ if (items.length == 0) { return new byte[0]; }
+ int totalBytes = 0;
+ final int numItems = items.length;
+ final byte[][] serialized2DArray = new byte[numItems][];
+ for (int i = 0; i < numItems; i++) {
+ serialized2DArray[i] = items[i].getBytes(StandardCharsets.UTF_8);
+ totalBytes += serialized2DArray[i].length + Integer.BYTES;
}
- final byte[] bytes = new byte[length];
- final WritableMemory mem = WritableMemory.writableWrap(bytes);
- long offsetBytes = 0;
- for (int i = 0; i < items.length; i++) {
- mem.putInt(offsetBytes, itemsBytes[i].length);
- offsetBytes += Integer.BYTES;
- mem.putByteArray(offsetBytes, itemsBytes[i], 0, itemsBytes[i].length);
- offsetBytes += itemsBytes[i].length;
+ final byte[] bytesOut = new byte[totalBytes];
+ int offset = 0;
+ for (int i = 0; i < numItems; i++) {
+ final int utf8len = serialized2DArray[i].length;
+ putIntLE(bytesOut, offset, utf8len);
+ offset += Integer.BYTES;
+ copyBytes(serialized2DArray[i], 0, bytesOut, offset, utf8len);
+ offset += utf8len;
}
- return bytes;
+ return bytesOut;
}
@Override
+ @Deprecated
public String[] deserializeFromMemory(final Memory mem, final int numItems) {
+ return deserializeFromMemory(mem, 0, numItems);
+ }
+
+ @Override
+ public String[] deserializeFromMemory(final Memory mem, final long
offsetBytes, final int numItems) {
+ Objects.requireNonNull(mem, "Memory must not be null");
+ if (numItems <= 0) { return new String[0]; }
final String[] array = new String[numItems];
- long offsetBytes = 0;
+ long offset = offsetBytes;
for (int i = 0; i < numItems; i++) {
- Util.checkBounds(offsetBytes, Integer.BYTES, mem.getCapacity());
- final int strLength = mem.getInt(offsetBytes);
- offsetBytes += Integer.BYTES;
- final byte[] bytes = new byte[strLength];
- Util.checkBounds(offsetBytes, strLength, mem.getCapacity());
- mem.getByteArray(offsetBytes, bytes, 0, strLength);
- offsetBytes += strLength;
- array[i] = new String(bytes, StandardCharsets.UTF_8);
+ Util.checkBounds(offset, Integer.BYTES, mem.getCapacity());
+ final int strLength = mem.getInt(offset);
+ offset += Integer.BYTES;
+ final byte[] utf8Bytes = new byte[strLength];
+ Util.checkBounds(offset, strLength, mem.getCapacity());
+ mem.getByteArray(offset, utf8Bytes, 0, strLength);
+ offset += strLength;
+ array[i] = new String(utf8Bytes, StandardCharsets.UTF_8);
}
return array;
}
+ @Override
+ public int sizeOf(final String item) {
+ Objects.requireNonNull(item, "Item must not be null");
+ if (item.isEmpty()) { return Integer.BYTES; }
+ return item.getBytes(StandardCharsets.UTF_8).length + Integer.BYTES;
+ }
+
+ @Override
+ public int sizeOf(final Memory mem, final long offsetBytes, final int
numItems) {
Review Comment:
what is the purpose of this? seems like dry-run deserialization
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]