Copilot commented on code in PR #152:
URL: https://github.com/apache/arrow-swift/pull/152#discussion_r3006805894


##########
Sources/Arrow/ArrowReader.swift:
##########
@@ -195,9 +195,12 @@ public class ArrowReader { // swiftlint:disable:this 
type_body_length
         let arrowNullBuffer = makeBuffer(nullBuffer, fileData: 
loadInfo.fileData,
                                          length: nullLength, messageOffset: 
loadInfo.messageOffset)
         let arrowOffsetBuffer = makeBuffer(offsetBuffer, fileData: 
loadInfo.fileData,
-                                           length: UInt(node.length), 
messageOffset: loadInfo.messageOffset)
+                                           length: UInt(node.length + 1), 
messageOffset: loadInfo.messageOffset)
+        let lastOffset = arrowOffsetBuffer.rawPointer
+            .advanced(by: Int(node.length) * MemoryLayout<Int32>.stride)
+            .load(as: Int32.self)
         let arrowValueBuffer = makeBuffer(valueBuffer, fileData: 
loadInfo.fileData,
-                                          length: UInt(node.length), 
messageOffset: loadInfo.messageOffset)
+                                          length: UInt(lastOffset), 
messageOffset: loadInfo.messageOffset)

Review Comment:
   `loadVariableData` derives the values buffer logical length from the last 
offset without validating it. If the IPC input is malformed, `lastOffset` can 
be negative or larger than the underlying `valueBuffer.length`, and the 
subsequent `UInt(lastOffset)` conversion / later value reads can crash or read 
out of bounds. Add guards to ensure the offset buffer is large enough to read 
the last offset, `lastOffset >= 0`, and `lastOffset <= valueBuffer.length` 
(bytes) before creating the value buffer (otherwise return 
`.failure(.invalid(...))`).



##########
Sources/Arrow/ArrowData.swift:
##########
@@ -26,9 +26,17 @@ public class ArrowData {
     public let stride: Int
 
     convenience init(_ arrowType: ArrowType, buffers: [ArrowBuffer], 
nullCount: UInt) throws {
+        let arrayLength: UInt
+        switch arrowType.info {
+        case .variableInfo:
+            arrayLength = max(buffers[1].length, 1) - 1

Review Comment:
   The variable-width `ArrowData` convenience init silently maps an offsets 
buffer length of 0 to an array length of 0 via `max(buffers[1].length, 1) - 1`. 
That avoids underflow but can also mask invalid input (variable-width arrays 
should have at least 1 offset). Consider validating `buffers.count >= 2` and 
`buffers[1].length >= 1` for `.variableInfo` and throwing an `.invalid(...)` 
error when the invariant is violated.
   ```suggestion
               guard buffers.count >= 2 else {
                   throw ArrowError.invalid("Variable-width ArrowData requires 
at least two buffers (null bitmap and offsets).")
               }
               let offsetsLength = buffers[1].length
               guard offsetsLength >= 1 else {
                   throw ArrowError.invalid("Variable-width ArrowData requires 
a non-empty offsets buffer.")
               }
               arrayLength = offsetsLength - 1
   ```



##########
Sources/Arrow/ArrowBufferBuilder.swift:
##########
@@ -205,80 +205,79 @@ public class VariableBufferBuilder<T>: 
ValuesBufferBuilder<T>, ArrowBufferBuilde
     public required init() throws {
         let values = ArrowBuffer.createBuffer(0, size: UInt(binaryStride))
         let nulls = ArrowBuffer.createBuffer(0, size: UInt(binaryStride))
-        self.offsets = ArrowBuffer.createBuffer(0, size: 
UInt(MemoryLayout<Int32>.stride))
+        self.offsets = ArrowBuffer.createBuffer(1, size: 
UInt(MemoryLayout<Int32>.stride))
+        self.offsets.rawPointer.storeBytes(of: Int32(0), as: Int32.self)
         super.init(values: values, nulls: nulls, stride: binaryStride)
     }
 
     public func append(_ newValue: ItemType?) {
         let index = UInt(self.length)
-        self.length += 1
         let offsetIndex = MemoryLayout<Int32>.stride * Int(index)
-        if self.length >= self.offsets.length {
-            self.resize(UInt( self.offsets.length + 1))
-        }
-        var binData: Data
-        var isNull = false
-        if let val = newValue {
-            binData = getBytesFor(val)!
-        } else {
-            var nullVal = 0
-            isNull = true
-            binData = Data(bytes: &nullVal, count: MemoryLayout<UInt32>.size)
-        }
+        let nextLength = index + 1
+        self.resize(nextLength)
 
-        var currentIndex: Int32 = 0
-        var currentOffset: Int32 = Int32(binData.count)
-        if index > 0 {
-            currentIndex = self.offsets.rawPointer.advanced(by: 
offsetIndex).load(as: Int32.self)
-            currentOffset += currentIndex
-            if currentOffset > self.values.length {
-                self.value_resize(UInt(currentOffset))
-            }
-        }
+        let currentOffset = self.offsets.rawPointer.advanced(by: 
offsetIndex).load(as: Int32.self)
+        var nextOffset = currentOffset
 
-        if isNull {
+        if let val = newValue {
+            let binData = getBytesFor(val)!
+            nextOffset += Int32(binData.count)

Review Comment:
   `VariableBufferBuilder.append` uses `Int32` offsets and does `nextOffset += 
Int32(binData.count)`. If a single value (or the running total) exceeds 
`Int32.max`, the `Int32(...)` conversion will trap or the addition can 
overflow, causing a crash. Consider explicitly checking that `binData.count` 
and the computed `nextOffset` stay within `Int32.max` (and fail fast with a 
clear error/precondition), or gate this builder to types that are guaranteed to 
fit Int32 offsets.
   ```suggestion
   
               // Ensure per-value size fits into Int32.
               precondition(
                   binData.count <= Int(Int32.max),
                   "VariableBufferBuilder.append: element size exceeds 
Int32.max"
               )
   
               // Compute the next offset in a wider type to detect overflow.
               let proposedOffset = Int64(currentOffset) + Int64(binData.count)
               precondition(
                   proposedOffset <= Int64(Int32.max),
                   "VariableBufferBuilder.append: total buffer size exceeds 
Int32.max"
               )
               nextOffset = Int32(proposedOffset)
   ```



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to