Copilot commented on code in PR #329:
URL: https://github.com/apache/arrow-dotnet/pull/329#discussion_r3141367050
##########
src/Apache.Arrow.Scalars/Variant/VariantValueWriter.cs:
##########
@@ -314,160 +411,232 @@ public void WriteDecimal16(SqlDecimal value)
lo = (long)uLo;
}
- ms.WriteByte(scale);
- WriteInt64LE(ms, lo);
- WriteInt64LE(ms, hi);
+ buf.Append(scale);
+ buf.WriteInt64LE(lo);
+ buf.WriteInt64LE(hi);
}
/// <summary>Writes a string. Uses the short-string encoding when the
UTF-8 byte length is ≤ 63.</summary>
public void WriteString(string value)
{
if (value == null) throw new ArgumentNullException(nameof(value));
- MemoryStream ms = BeforeWriteValue();
+ ref Buffer<byte> buf = ref BeforeWriteValue();
int byteCount = Encoding.UTF8.GetByteCount(value);
if (byteCount <= 63)
{
-
ms.WriteByte(VariantEncodingHelper.MakeShortStringHeader(byteCount));
+
buf.Append(VariantEncodingHelper.MakeShortStringHeader(byteCount));
}
else
{
-
ms.WriteByte(VariantEncodingHelper.MakePrimitiveHeader(VariantPrimitiveType.String));
- WriteInt32LE(ms, byteCount);
+
buf.Append(VariantEncodingHelper.MakePrimitiveHeader(VariantPrimitiveType.String));
+ buf.WriteInt32LE(byteCount);
}
- // Encode UTF-8 directly into the MemoryStream's buffer.
- int dataPos = (int)ms.Position;
- int needed = dataPos + byteCount;
- if (needed > ms.Length)
- {
- ms.SetLength(needed);
- }
- Encoding.UTF8.GetBytes(value, 0, value.Length, ms.GetBuffer(),
dataPos);
- ms.Position = needed;
+ // Encode UTF-8 directly into the buffer.
+ Span<byte> dest = buf.GetSpan(byteCount);
+#if NET8_0_OR_GREATER
+ Encoding.UTF8.GetBytes(value, dest);
+#else
+ Encoding.UTF8.GetBytes(value, 0, value.Length, buf.RawBuffer,
buf.Length);
+#endif
+ buf.Advance(byteCount);
}
/// <summary>Writes a binary blob.</summary>
- public void WriteBinary(byte[] data)
+ public void WriteBinary(ReadOnlySpan<byte> data)
{
- if (data == null) throw new ArgumentNullException(nameof(data));
- MemoryStream ms = BeforeWriteValue();
-
ms.WriteByte(VariantEncodingHelper.MakePrimitiveHeader(VariantPrimitiveType.Binary));
- WriteInt32LE(ms, data.Length);
- ms.Write(data, 0, data.Length);
+ ref Buffer<byte> buf = ref BeforeWriteValue();
+
buf.Append(VariantEncodingHelper.MakePrimitiveHeader(VariantPrimitiveType.Binary));
Review Comment:
`WriteBinary` changed from `byte[]` to `ReadOnlySpan<byte>`, which also
changes null semantics: passing a null `byte[]` now becomes a default/empty
span and will encode a zero-length binary instead of throwing
`ArgumentNullException` as before. Since this is a public API, consider
restoring the original signature as an overload (and keep the null check) while
optionally adding the span overload for allocation-free callers, or clearly
documenting/handling the intended null behavior.
##########
src/Apache.Arrow.Scalars/Variant/VariantValueWriter.cs:
##########
@@ -112,7 +168,7 @@ public void WriteFieldName(string name)
throw new InvalidOperationException("A value must be written
for the previous field before writing the next field name.");
}
int fieldId = _idRemap[_metadata.GetId(name)];
- AppendInt(ref objFrame.FieldIds, ref objFrame.FieldIdCount,
fieldId);
+ objFrame.FieldIds.Append(fieldId);
objFrame.PendingValue = true;
Review Comment:
`WriteFieldName` doesn’t guard against `_disposed`. After `Dispose()`,
internal buffers may have been released while `_frame` is still non-null, so
calling this can throw `NullReferenceException`/corrupt state instead of the
documented `ObjectDisposedException`. Add an `_disposed` check (or a shared
`ThrowIfDisposed()` helper) at the start of this method.
##########
src/Apache.Arrow.Scalars/Variant/VariantValueWriter.cs:
##########
@@ -129,20 +185,47 @@ public void EndObject()
}
_frame = _frameStack.Pop();
- MemoryStream output = _frame != null ? _frame.Buffer : _root;
- WriteObjectBody(output, objFrame);
- ArrayPool<int>.Shared.Return(objFrame.FieldIds);
- ArrayPool<int>.Shared.Return(objFrame.ValueStarts);
- ReturnStream(objFrame.Buffer);
+ // Once objFrame is popped it's no longer visible to Dispose, so
+ // WriteObjectBody must not leave its buffers unreleased on throw.
+ try
Review Comment:
`EndObject` doesn’t check `_disposed`. If a caller disposes the writer with
an open frame and then mistakenly calls `EndObject()`, buffers have already
been released and this can throw `NullReferenceException` (e.g., when appending
to `_root`) rather than `ObjectDisposedException` as documented. Add an
`_disposed` guard at the start of `EndObject`.
##########
src/Apache.Arrow.Scalars/Variant/VariantValueWriter.cs:
##########
@@ -314,160 +411,232 @@ public void WriteDecimal16(SqlDecimal value)
lo = (long)uLo;
}
- ms.WriteByte(scale);
- WriteInt64LE(ms, lo);
- WriteInt64LE(ms, hi);
+ buf.Append(scale);
+ buf.WriteInt64LE(lo);
+ buf.WriteInt64LE(hi);
}
/// <summary>Writes a string. Uses the short-string encoding when the
UTF-8 byte length is ≤ 63.</summary>
public void WriteString(string value)
{
if (value == null) throw new ArgumentNullException(nameof(value));
- MemoryStream ms = BeforeWriteValue();
+ ref Buffer<byte> buf = ref BeforeWriteValue();
int byteCount = Encoding.UTF8.GetByteCount(value);
if (byteCount <= 63)
{
-
ms.WriteByte(VariantEncodingHelper.MakeShortStringHeader(byteCount));
+
buf.Append(VariantEncodingHelper.MakeShortStringHeader(byteCount));
}
else
{
-
ms.WriteByte(VariantEncodingHelper.MakePrimitiveHeader(VariantPrimitiveType.String));
- WriteInt32LE(ms, byteCount);
+
buf.Append(VariantEncodingHelper.MakePrimitiveHeader(VariantPrimitiveType.String));
+ buf.WriteInt32LE(byteCount);
}
- // Encode UTF-8 directly into the MemoryStream's buffer.
- int dataPos = (int)ms.Position;
- int needed = dataPos + byteCount;
- if (needed > ms.Length)
- {
- ms.SetLength(needed);
- }
- Encoding.UTF8.GetBytes(value, 0, value.Length, ms.GetBuffer(),
dataPos);
- ms.Position = needed;
+ // Encode UTF-8 directly into the buffer.
+ Span<byte> dest = buf.GetSpan(byteCount);
+#if NET8_0_OR_GREATER
+ Encoding.UTF8.GetBytes(value, dest);
+#else
+ Encoding.UTF8.GetBytes(value, 0, value.Length, buf.RawBuffer,
buf.Length);
+#endif
+ buf.Advance(byteCount);
}
/// <summary>Writes a binary blob.</summary>
- public void WriteBinary(byte[] data)
+ public void WriteBinary(ReadOnlySpan<byte> data)
{
- if (data == null) throw new ArgumentNullException(nameof(data));
- MemoryStream ms = BeforeWriteValue();
-
ms.WriteByte(VariantEncodingHelper.MakePrimitiveHeader(VariantPrimitiveType.Binary));
- WriteInt32LE(ms, data.Length);
- ms.Write(data, 0, data.Length);
+ ref Buffer<byte> buf = ref BeforeWriteValue();
+
buf.Append(VariantEncodingHelper.MakePrimitiveHeader(VariantPrimitiveType.Binary));
+ buf.WriteInt32LE(data.Length);
+ buf.Append(data);
}
/// <summary>Writes a UUID.</summary>
public void WriteUuid(Guid value)
{
- MemoryStream ms = BeforeWriteValue();
-
ms.WriteByte(VariantEncodingHelper.MakePrimitiveHeader(VariantPrimitiveType.Uuid));
+ ref Buffer<byte> buf = ref BeforeWriteValue();
+
buf.Append(VariantEncodingHelper.MakePrimitiveHeader(VariantPrimitiveType.Uuid));
+ Span<byte> raw = stackalloc byte[16];
#if NET8_0_OR_GREATER
- Span<byte> buf = stackalloc byte[16];
- value.TryWriteBytes(buf, bigEndian: true, out int _);
- ms.Write(buf);
+ value.TryWriteBytes(raw, bigEndian: true, out _);
#else
- byte[] native = value.ToByteArray();
// Convert from .NET mixed-endian to big-endian (RFC 4122).
- _scratch[0] = native[3]; _scratch[1] = native[2]; _scratch[2] =
native[1]; _scratch[3] = native[0];
- _scratch[4] = native[5]; _scratch[5] = native[4];
- _scratch[6] = native[7]; _scratch[7] = native[6];
- Buffer.BlockCopy(native, 8, _scratch, 8, 8);
- ms.Write(_scratch, 0, 16);
+ byte[] native = value.ToByteArray();
+ raw[0] = native[3]; raw[1] = native[2]; raw[2] = native[1]; raw[3]
= native[0];
+ raw[4] = native[5]; raw[5] = native[4];
+ raw[6] = native[7]; raw[7] = native[6];
+ native.AsSpan(8, 8).CopyTo(raw.Slice(8));
#endif
+ buf.Append(raw);
}
/// <summary>Writes a date as days since the Unix epoch.</summary>
public void WriteDateDays(int days)
{
- MemoryStream ms = BeforeWriteValue();
-
ms.WriteByte(VariantEncodingHelper.MakePrimitiveHeader(VariantPrimitiveType.Date));
- WriteInt32LE(ms, days);
+ ref Buffer<byte> buf = ref BeforeWriteValue();
+
buf.Append(VariantEncodingHelper.MakePrimitiveHeader(VariantPrimitiveType.Date));
+ buf.WriteInt32LE(days);
}
/// <summary>Writes a timestamp (tz-adjusted microseconds since the
Unix epoch).</summary>
public void WriteTimestampMicros(long micros)
{
- MemoryStream ms = BeforeWriteValue();
-
ms.WriteByte(VariantEncodingHelper.MakePrimitiveHeader(VariantPrimitiveType.Timestamp));
- WriteInt64LE(ms, micros);
+ ref Buffer<byte> buf = ref BeforeWriteValue();
+
buf.Append(VariantEncodingHelper.MakePrimitiveHeader(VariantPrimitiveType.Timestamp));
+ buf.WriteInt64LE(micros);
}
/// <summary>Writes a timestamp-without-timezone (microseconds since
the Unix epoch).</summary>
public void WriteTimestampNtzMicros(long micros)
{
- MemoryStream ms = BeforeWriteValue();
-
ms.WriteByte(VariantEncodingHelper.MakePrimitiveHeader(VariantPrimitiveType.TimestampNtz));
- WriteInt64LE(ms, micros);
+ ref Buffer<byte> buf = ref BeforeWriteValue();
+
buf.Append(VariantEncodingHelper.MakePrimitiveHeader(VariantPrimitiveType.TimestampNtz));
+ buf.WriteInt64LE(micros);
}
/// <summary>Writes a time-without-timezone value (microseconds since
midnight).</summary>
public void WriteTimeNtzMicros(long micros)
{
- MemoryStream ms = BeforeWriteValue();
-
ms.WriteByte(VariantEncodingHelper.MakePrimitiveHeader(VariantPrimitiveType.TimeNtz));
- WriteInt64LE(ms, micros);
+ ref Buffer<byte> buf = ref BeforeWriteValue();
+
buf.Append(VariantEncodingHelper.MakePrimitiveHeader(VariantPrimitiveType.TimeNtz));
+ buf.WriteInt64LE(micros);
}
/// <summary>Writes a timestamp with timezone (nanoseconds since the
Unix epoch).</summary>
public void WriteTimestampTzNanos(long nanos)
{
- MemoryStream ms = BeforeWriteValue();
-
ms.WriteByte(VariantEncodingHelper.MakePrimitiveHeader(VariantPrimitiveType.TimestampTzNanos));
- WriteInt64LE(ms, nanos);
+ ref Buffer<byte> buf = ref BeforeWriteValue();
+
buf.Append(VariantEncodingHelper.MakePrimitiveHeader(VariantPrimitiveType.TimestampTzNanos));
+ buf.WriteInt64LE(nanos);
}
/// <summary>Writes a timestamp without timezone (nanoseconds since
the Unix epoch).</summary>
public void WriteTimestampNtzNanos(long nanos)
{
- MemoryStream ms = BeforeWriteValue();
-
ms.WriteByte(VariantEncodingHelper.MakePrimitiveHeader(VariantPrimitiveType.TimestampNtzNanos));
- WriteInt64LE(ms, nanos);
+ ref Buffer<byte> buf = ref BeforeWriteValue();
+
buf.Append(VariantEncodingHelper.MakePrimitiveHeader(VariantPrimitiveType.TimestampNtzNanos));
+ buf.WriteInt64LE(nanos);
+ }
+
+ // ---------------------------------------------------------------
+ // Transcode from a VariantReader
+ // ---------------------------------------------------------------
+
+ /// <summary>
+ /// Copies the variant value pointed to by <paramref name="source"/>
into this
+ /// writer. Useful when copying between metadata dictionaries: field
IDs in the
+ /// source are re-looked-up against this writer's <see
cref="VariantMetadataBuilder"/>
+ /// on the fly, via <see cref="WriteFieldName"/>.
+ /// </summary>
+ /// <remarks>
+ /// All field names referenced anywhere in <paramref name="source"/>
must already
+ /// exist in the metadata builder used to construct this writer. Use
+ /// <see
cref="VariantMetadataBuilder.CollectFieldNames(VariantReader)"/> during
+ /// the metadata-collection phase of a two-pass encode to accumulate
them.
+ /// </remarks>
+ public void CopyValue(VariantReader source)
+ {
+ switch (source.BasicType)
+ {
+ case VariantBasicType.Primitive:
+ CopyPrimitive(source);
Review Comment:
`CopyValue` introduces new public behavior (transcoding from
`VariantReader`, including field-name re-lookup against this writer’s
metadata), but there are no unit tests covering it. Please add tests that
exercise primitives, nested objects/arrays, and the metadata-remap scenario
(copy between different metadata dictionaries) to prevent subtle encoding
regressions.
##########
src/Apache.Arrow.Scalars/Variant/VariantValueWriter.cs:
##########
@@ -155,10 +238,24 @@ public void EndArray()
}
_frame = _frameStack.Pop();
- MemoryStream output = _frame != null ? _frame.Buffer : _root;
- WriteArrayBody(output, arrFrame);
- ArrayPool<int>.Shared.Return(arrFrame.ValueStarts);
- ReturnStream(arrFrame.Buffer);
+ // Popped frame is no longer visible to Dispose; the finally makes
+ // sure its buffers are released even if WriteArrayBody throws.
+ try
Review Comment:
`EndArray` doesn’t guard against `_disposed`. After `Dispose()`,
`_root`/frame buffers are released, so a post-dispose `EndArray()` call can
fail with `NullReferenceException` instead of the documented
`ObjectDisposedException`. Add an `_disposed` check at the start of `EndArray`.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]