eerhardt commented on a change in pull request #7032:
URL: https://github.com/apache/arrow/pull/7032#discussion_r418391298
##########
File path: csharp/src/Apache.Arrow/Arrays/PrimitiveArrayBuilder.cs
##########
@@ -99,55 +105,75 @@ public abstract class PrimitiveArrayBuilder<T, TArray,
TBuilder> : IArrowArrayBu
{
protected TBuilder Instance => this as TBuilder;
protected ArrowBuffer.Builder<T> ValueBuffer { get; }
+ protected BooleanArray.Builder ValidityBuffer { get; }
public int Length => ValueBuffer.Length;
-
- // TODO: Implement support for null values (null bitmaps)
+ protected int NullCount { get; set; }
internal PrimitiveArrayBuilder()
{
ValueBuffer = new ArrowBuffer.Builder<T>();
+ ValidityBuffer = new BooleanArray.Builder();
}
public TBuilder Resize(int length)
{
ValueBuffer.Resize(length);
+ ValidityBuffer.Resize(length + 1);
return Instance;
}
public TBuilder Reserve(int capacity)
{
ValueBuffer.Reserve(capacity);
+ ValidityBuffer.Reserve(capacity + 1);
return Instance;
}
public TBuilder Append(T value)
{
ValueBuffer.Append(value);
+ ValidityBuffer.Append(true);
return Instance;
}
public TBuilder Append(ReadOnlySpan<T> span)
{
+ var len = ValueBuffer.Length;
ValueBuffer.Append(span);
+ ValidityBuffer.AppendRange(Enumerable.Repeat(true,
ValueBuffer.Length - len));
return Instance;
}
public TBuilder AppendRange(IEnumerable<T> values)
{
+ var len = ValueBuffer.Length;
ValueBuffer.AppendRange(values);
+ ValidityBuffer.AppendRange(Enumerable.Repeat(true,
ValueBuffer.Length - len));
+ return Instance;
+ }
+
+ public TBuilder AppendNull()
+ {
+ ValidityBuffer.Append(false);
+ NullCount++;
+ // Need this until this is refactored to use
Review comment:
Thanks for explaining. I think we can remove this comment.
PrimitiveArrayBuilder won't use an offset builder because it doesn't have an
offset buffer. PrimitiveArrays have 2 buffers:
- Values
- Validity
See
https://arrow.apache.org/docs/format/Columnar.html#fixed-size-primitive-layout
Note that for each value (including `null`s) they take up space in the
Values buffer.
```
* Length: 5, Null count: 1
* Validity bitmap buffer:
|Byte 0 (validity bitmap) | Bytes 1-63 |
|-------------------------|-----------------------|
| 00011101 | 0 (padding) |
* Value Buffer:
|Bytes 0-3 | Bytes 4-7 | Bytes 8-11 | Bytes 12-15 | Bytes 16-19 |
Bytes 20-63 |
|------------|-------------|-------------|-------------|-------------|-------------|
| 1 | unspecified | 2 | 4 | 8 |
unspecified |
````
Notice `Bytes 4-7` are `unspecified`, because it doesn't matter what values
are here, the logical value at this position is `null`. So what we are
currently doing (writing `default` at this position) will be correct going
forward.
`BuilderBase` above is really `BinaryArray.BuilderBase`. It isn't the base
of all builders - it is just the base of "binary array" builders. These are
covered in the next section of the spec:
https://arrow.apache.org/docs/format/Columnar.html#variable-size-binary-layout
Note that they have 3 buffers:
- Values
- Offsets
- Validity
----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
For queries about this service, please contact Infrastructure at:
[email protected]