Copilot commented on code in PR #152:
URL: https://github.com/apache/arrow-swift/pull/152#discussion_r3006805894
##########
Sources/Arrow/ArrowReader.swift:
##########
@@ -195,9 +195,12 @@ public class ArrowReader { // swiftlint:disable:this
type_body_length
let arrowNullBuffer = makeBuffer(nullBuffer, fileData:
loadInfo.fileData,
length: nullLength, messageOffset:
loadInfo.messageOffset)
let arrowOffsetBuffer = makeBuffer(offsetBuffer, fileData:
loadInfo.fileData,
- length: UInt(node.length),
messageOffset: loadInfo.messageOffset)
+ length: UInt(node.length + 1),
messageOffset: loadInfo.messageOffset)
+ let lastOffset = arrowOffsetBuffer.rawPointer
+ .advanced(by: Int(node.length) * MemoryLayout<Int32>.stride)
+ .load(as: Int32.self)
let arrowValueBuffer = makeBuffer(valueBuffer, fileData:
loadInfo.fileData,
- length: UInt(node.length),
messageOffset: loadInfo.messageOffset)
+ length: UInt(lastOffset),
messageOffset: loadInfo.messageOffset)
Review Comment:
`loadVariableData` derives the values buffer logical length from the last
offset without validating it. If the IPC input is malformed, `lastOffset` can
be negative or larger than the underlying `valueBuffer.length`, and the
subsequent `UInt(lastOffset)` conversion / later value reads can crash or read
out of bounds. Add guards to ensure the offset buffer is large enough to read
the last offset, `lastOffset >= 0`, and `lastOffset <= valueBuffer.length`
(bytes) before creating the value buffer (otherwise return
`.failure(.invalid(...))`).
##########
Sources/Arrow/ArrowData.swift:
##########
@@ -26,9 +26,17 @@ public class ArrowData {
public let stride: Int
convenience init(_ arrowType: ArrowType, buffers: [ArrowBuffer],
nullCount: UInt) throws {
+ let arrayLength: UInt
+ switch arrowType.info {
+ case .variableInfo:
+ arrayLength = max(buffers[1].length, 1) - 1
Review Comment:
The variable-width `ArrowData` convenience init silently maps an offsets
buffer length of 0 to an array length of 0 via `max(buffers[1].length, 1) - 1`.
That avoids underflow but can also mask invalid input (variable-width arrays
should have at least 1 offset). Consider validating `buffers.count >= 2` and
`buffers[1].length >= 1` for `.variableInfo` and throwing an `.invalid(...)`
error when the invariant is violated.
```suggestion
guard buffers.count >= 2 else {
throw ArrowError.invalid("Variable-width ArrowData requires
at least two buffers (null bitmap and offsets).")
}
let offsetsLength = buffers[1].length
guard offsetsLength >= 1 else {
throw ArrowError.invalid("Variable-width ArrowData requires
a non-empty offsets buffer.")
}
arrayLength = offsetsLength - 1
```
##########
Sources/Arrow/ArrowBufferBuilder.swift:
##########
@@ -205,80 +205,79 @@ public class VariableBufferBuilder<T>:
ValuesBufferBuilder<T>, ArrowBufferBuilde
public required init() throws {
let values = ArrowBuffer.createBuffer(0, size: UInt(binaryStride))
let nulls = ArrowBuffer.createBuffer(0, size: UInt(binaryStride))
- self.offsets = ArrowBuffer.createBuffer(0, size:
UInt(MemoryLayout<Int32>.stride))
+ self.offsets = ArrowBuffer.createBuffer(1, size:
UInt(MemoryLayout<Int32>.stride))
+ self.offsets.rawPointer.storeBytes(of: Int32(0), as: Int32.self)
super.init(values: values, nulls: nulls, stride: binaryStride)
}
public func append(_ newValue: ItemType?) {
let index = UInt(self.length)
- self.length += 1
let offsetIndex = MemoryLayout<Int32>.stride * Int(index)
- if self.length >= self.offsets.length {
- self.resize(UInt( self.offsets.length + 1))
- }
- var binData: Data
- var isNull = false
- if let val = newValue {
- binData = getBytesFor(val)!
- } else {
- var nullVal = 0
- isNull = true
- binData = Data(bytes: &nullVal, count: MemoryLayout<UInt32>.size)
- }
+ let nextLength = index + 1
+ self.resize(nextLength)
- var currentIndex: Int32 = 0
- var currentOffset: Int32 = Int32(binData.count)
- if index > 0 {
- currentIndex = self.offsets.rawPointer.advanced(by:
offsetIndex).load(as: Int32.self)
- currentOffset += currentIndex
- if currentOffset > self.values.length {
- self.value_resize(UInt(currentOffset))
- }
- }
+ let currentOffset = self.offsets.rawPointer.advanced(by:
offsetIndex).load(as: Int32.self)
+ var nextOffset = currentOffset
- if isNull {
+ if let val = newValue {
+ let binData = getBytesFor(val)!
+ nextOffset += Int32(binData.count)
Review Comment:
`VariableBufferBuilder.append` uses `Int32` offsets and does `nextOffset +=
Int32(binData.count)`. If a single value (or the running total) exceeds
`Int32.max`, the `Int32(...)` conversion will trap or the addition can
overflow, causing a crash. Consider explicitly checking that `binData.count`
and the computed `nextOffset` stay within `Int32.max` (and fail fast with a
clear error/precondition), or gate this builder to types that are guaranteed to
fit Int32 offsets.
```suggestion
// Ensure per-value size fits into Int32.
precondition(
binData.count <= Int(Int32.max),
"VariableBufferBuilder.append: element size exceeds
Int32.max"
)
// Compute the next offset in a wider type to detect overflow.
let proposedOffset = Int64(currentOffset) + Int64(binData.count)
precondition(
proposedOffset <= Int64(Int32.max),
"VariableBufferBuilder.append: total buffer size exceeds
Int32.max"
)
nextOffset = Int32(proposedOffset)
```
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]