nssalian commented on code in PR #3481:
URL: https://github.com/apache/parquet-java/pull/3481#discussion_r3088190077
##########
parquet-variant/src/main/java/org/apache/parquet/variant/Variant.java:
##########
@@ -67,6 +87,26 @@ public Variant(ByteBuffer value, ByteBuffer metadata) {
"Unsupported variant metadata version: %d",
metadata.get(metadata.position()) & VariantUtil.VERSION_MASK));
}
+
+ // Pre-compute dictionary size for lazy metadata cache allocation.
+ int pos = this.metadata.position();
+ int metaOffsetSize = ((this.metadata.get(pos) >> 6) & 0x3) + 1;
+ if (this.metadata.remaining() > 1) {
+ this.dictSize = VariantUtil.readUnsigned(this.metadata, pos + 1,
metaOffsetSize);
+ } else {
+ this.dictSize = 0;
Review Comment:
cautiously added here in case there might be empty metadata. I doubt there's
a perf concern here.
##########
parquet-variant/src/main/java/org/apache/parquet/variant/Variant.java:
##########
@@ -324,23 +333,56 @@ public int numArrayElements() {
* @throws IllegalArgumentException if `getType()` does not return
`Type.ARRAY`
*/
public Variant getElementAtIndex(int index) {
- VariantUtil.ArrayInfo info = VariantUtil.getArrayInfo(value);
+ VariantUtil.ArrayInfo info = arrayInfo();
if (index < 0 || index >= info.numElements) {
return null;
}
- return getElementAtIndex(
- index,
- value,
- metadata,
- info.offsetSize,
- value.position() + info.offsetStartOffset,
- value.position() + info.dataStartOffset);
+ int offsetStart = value.position() + info.offsetStartOffset;
+ int dataStart = value.position() + info.dataStartOffset;
+ int offset = VariantUtil.readUnsigned(value, offsetStart + info.offsetSize
* index, info.offsetSize);
+ return childVariant(VariantUtil.slice(value, dataStart + offset));
+ }
+
+ /**
+ * Creates a child Variant that shares this instance's metadata cache.
+ */
+ private Variant childVariant(ByteBuffer childValue) {
+ return new Variant(childValue, metadata, metadataCache, dictSize);
}
- private static Variant getElementAtIndex(
- int index, ByteBuffer value, ByteBuffer metadata, int offsetSize, int
offsetStart, int dataStart) {
- // offsetStart and dataStart are absolute positions in the `value` buffer.
- int offset = VariantUtil.readUnsigned(value, offsetStart + offsetSize *
index, offsetSize);
- return new Variant(VariantUtil.slice(value, dataStart + offset), metadata);
+ /**
+ * Returns the metadata dictionary string for the given ID, caching the
result.
+ */
+ String getMetadataKeyCached(int id) {
+ if (metadataCache == null) {
Review Comment:
Will do
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]