This is an automated email from the ASF dual-hosted git repository.
curth pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-dotnet.git
The following commit(s) were added to refs/heads/main by this push:
new 0194c4d feat: Bounds checking for Flatbuf should be enabled in the
default build (#47)
0194c4d is described below
commit 0194c4d0abf87b41e71d77438227e75195ed72dd
Author: Curt Hagenlocher <[email protected]>
AuthorDate: Sun Sep 14 19:14:52 2025 -0700
feat: Bounds checking for Flatbuf should be enabled in the default build
(#47)
## What's Changed
Enables bounds checking for Flatbuf as input can't generally be trusted.
Adds a test for a malformed column name length.
Makes some fixes required to be able to benchmark the change and
demonstrate that there's no significant regression.
Closes #48.
---
src/Apache.Arrow/Apache.Arrow.csproj | 2 +-
src/Apache.Arrow/Flatbuf/FlatBuffers/ByteBuffer.cs | 1 +
.../ArrowWriterBenchmark.cs | 4 +--
test/Apache.Arrow.Tests/ArrowStreamReaderTests.cs | 36 +++++++++++++++++++++-
test/Apache.Arrow.Tests/TestData.cs | 13 +++++---
5 files changed, 48 insertions(+), 8 deletions(-)
diff --git a/src/Apache.Arrow/Apache.Arrow.csproj
b/src/Apache.Arrow/Apache.Arrow.csproj
index 301894f..0d21e75 100644
--- a/src/Apache.Arrow/Apache.Arrow.csproj
+++ b/src/Apache.Arrow/Apache.Arrow.csproj
@@ -2,7 +2,7 @@
<PropertyGroup>
<AllowUnsafeBlocks>true</AllowUnsafeBlocks>
-
<DefineConstants>$(DefineConstants);UNSAFE_BYTEBUFFER;BYTEBUFFER_NO_BOUNDS_CHECK;ENABLE_SPAN_T</DefineConstants>
+
<DefineConstants>$(DefineConstants);UNSAFE_BYTEBUFFER;ENABLE_SPAN_T</DefineConstants>
<Description>Apache Arrow is a cross-language development platform for
in-memory data. It specifies a standardized language-independent columnar
memory format for flat and hierarchical data, organized for efficient analytic
operations on modern hardware.</Description>
</PropertyGroup>
diff --git a/src/Apache.Arrow/Flatbuf/FlatBuffers/ByteBuffer.cs
b/src/Apache.Arrow/Flatbuf/FlatBuffers/ByteBuffer.cs
index c3b3a17..0e83a50 100644
--- a/src/Apache.Arrow/Flatbuf/FlatBuffers/ByteBuffer.cs
+++ b/src/Apache.Arrow/Flatbuf/FlatBuffers/ByteBuffer.cs
@@ -693,6 +693,7 @@ namespace Google.FlatBuffers
#if ENABLE_SPAN_T && UNSAFE_BYTEBUFFER
public unsafe string GetStringUTF8(int startPos, int len)
{
+ AssertOffsetAndLength(startPos, len);
fixed (byte* buffer =
&MemoryMarshal.GetReference(_buffer.ReadOnlySpan.Slice(startPos)))
{
return Encoding.UTF8.GetString(buffer, len);
diff --git a/test/Apache.Arrow.Benchmarks/ArrowWriterBenchmark.cs
b/test/Apache.Arrow.Benchmarks/ArrowWriterBenchmark.cs
index 36e8921..0e8780b 100644
--- a/test/Apache.Arrow.Benchmarks/ArrowWriterBenchmark.cs
+++ b/test/Apache.Arrow.Benchmarks/ArrowWriterBenchmark.cs
@@ -25,10 +25,10 @@ namespace Apache.Arrow.Benchmarks
[MemoryDiagnoser]
public class ArrowWriterBenchmark
{
- [Params(10_000, 1_000_000)]
+ [Params(10_000, 300_000)]
public int BatchLength { get; set; }
- //Max column set count is 15 before reaching 2gb limit of memory stream
+ //Max column set count is 14 before reaching 2gb limit of memory stream
[Params(10, 14)]
public int ColumnSetCount { get; set; }
diff --git a/test/Apache.Arrow.Tests/ArrowStreamReaderTests.cs
b/test/Apache.Arrow.Tests/ArrowStreamReaderTests.cs
index 15dc425..cd8bb62 100644
--- a/test/Apache.Arrow.Tests/ArrowStreamReaderTests.cs
+++ b/test/Apache.Arrow.Tests/ArrowStreamReaderTests.cs
@@ -20,6 +20,7 @@ using System.Threading;
using System.Threading.Tasks;
using Apache.Arrow.Ipc;
using Apache.Arrow.Memory;
+using Apache.Arrow.Types;
using Xunit;
namespace Apache.Arrow.Tests
@@ -280,6 +281,39 @@ namespace Apache.Arrow.Tests
}
#endif
}
+
+ [Fact]
+ public unsafe void MalformedColumnNameLength()
+ {
+ const int FieldNameLengthOffset = 108;
+ const int FakeFieldNameLength = 165535;
+
+ byte[] buffer;
+ using (var stream = new MemoryStream())
+ {
+ Schema schema = new(
+ [new Field("index", Int32Type.Default, nullable: false)],
+ metadata: []);
+ using (var writer = new ArrowStreamWriter(stream, schema,
leaveOpen: true))
+ {
+ writer.WriteStart();
+ writer.WriteEnd();
+ }
+ buffer = stream.ToArray();
+ }
+
+ Span<int> length = buffer.AsSpan().Slice(FieldNameLengthOffset,
sizeof(int)).CastTo<int>();
+ Assert.Equal(5, length[0]);
+ length[0] = FakeFieldNameLength;
+
+ Assert.Throws<ArgumentOutOfRangeException>(() =>
+ {
+ using (var stream = new MemoryStream(buffer))
+ using (var reader = new ArrowStreamReader(stream))
+ {
+ reader.ReadNextRecordBatch();
+ }
+ });
+ }
}
}
-
diff --git a/test/Apache.Arrow.Tests/TestData.cs
b/test/Apache.Arrow.Tests/TestData.cs
index 3eede0a..3991bcd 100644
--- a/test/Apache.Arrow.Tests/TestData.cs
+++ b/test/Apache.Arrow.Tests/TestData.cs
@@ -15,6 +15,7 @@
using System;
using System.Collections.Generic;
+using System.Data.SqlTypes;
using System.Linq;
using Apache.Arrow.Arrays;
using Apache.Arrow.Scalars;
@@ -201,7 +202,8 @@ namespace Apache.Arrow.Tests
for (var i = 0; i < Length; i++)
{
- builder.Append((decimal)i / Length);
+ SqlDecimal value =
SqlDecimal.ConvertToPrecScale((decimal)i / Length, type.Precision, type.Scale);
+ builder.Append((decimal)value);
}
Array = builder.Build();
@@ -213,7 +215,8 @@ namespace Apache.Arrow.Tests
for (var i = 0; i < Length; i++)
{
- builder.Append((decimal)i / Length);
+ SqlDecimal value =
SqlDecimal.ConvertToPrecScale((decimal)i / Length, type.Precision, type.Scale);
+ builder.Append((decimal)value);
}
Array = builder.Build();
@@ -225,7 +228,8 @@ namespace Apache.Arrow.Tests
for (var i = 0; i < Length; i++)
{
- builder.Append((decimal)i / Length);
+ SqlDecimal value =
SqlDecimal.ConvertToPrecScale((decimal)i / Length, type.Precision, type.Scale);
+ builder.Append((decimal)value);
}
Array = builder.Build();
@@ -237,7 +241,8 @@ namespace Apache.Arrow.Tests
for (var i = 0; i < Length; i++)
{
- builder.Append((decimal)i / Length);
+ SqlDecimal value =
SqlDecimal.ConvertToPrecScale((decimal)i / Length, type.Precision, type.Scale);
+ builder.Append((decimal)value);
}
Array = builder.Build();