domoritz commented on code in PR #439:
URL: https://github.com/apache/arrow-js/pull/439#discussion_r3356520491


##########
test/unit/ipc/writer/json-offset-views-tests.ts:
##########
@@ -0,0 +1,128 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+// The bug: bigNumsToStrings() in src/visitor/jsonvectorassembler.ts used to

Review Comment:
   I don't think we need this much detail in the unit test comment especially 
on old code. 



##########
test/unit/ipc/writer/json-offset-views-tests.ts:
##########
@@ -0,0 +1,128 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+// The bug: bigNumsToStrings() in src/visitor/jsonvectorassembler.ts used to
+// do `new Uint32Array(values.buffer)` and ignore values.byteOffset/byteLength.
+// When the underlying typed array was a subarray view with byteOffset > 0
+// (which happens whenever Data was constructed against a shared/padded buffer
+// or after slicing through tableFromIPC), the helper would read the wrong
+// 64-bit words and emit incorrect JSON for OFFSET/DATA fields.
+//
+// For each affected type, we take a freshly-generated Vector (byteOffset === 
0),
+// rewrap every backing typed array in a subarray view at byteOffset > 0, and
+// assert the JSONVectorAssembler produces the same JSON in both cases.
+//
+// These are focused unit tests of bigNumsToStrings' buffer-windowing contract.
+// They pinpoint exactly which type / which callsite breaks if a regression
+// lands, and they don't depend on tableFromIPC continuing to produce offset
+// views. End-to-end coverage of the same flow on a broader type matrix lives
+// in json-writer-tests.ts, which catches the same regression at the
+// user-facing API level.
+
+import {
+    Data, DataType, Field, RecordBatch, Schema, Struct,
+} from 'apache-arrow';
+import { JSONVectorAssembler } from 'apache-arrow/visitor/jsonvectorassembler';
+import * as generate from '../../../generate-test-data.js';
+
+/**
+ * Return a copy of `arr` whose data lives at a nonzero byteOffset inside a
+ * larger underlying buffer. Equivalent to the state of a typed array that
+ * came out of `tableFromIPC` — same logical contents, shifted memory layout.
+ */
+function padBuffer<T extends ArrayBufferView>(arr: T | undefined): T | 
undefined {
+    if (!arr) return arr;
+    const Ctor = arr.constructor as new (n: number) => any;
+    const padding = 4;
+    const padded = new Ctor((arr as any).length + padding * 2);
+    padded.set(arr, padding);
+    return padded.subarray(padding, padding + (arr as any).length) as T;
+}
+
+/**
+ * Clone a Data so every backing typed array sits at byteOffset > 0.
+ * Recurses into children so nested types stay consistent.
+ */
+function withOffsetViews<T extends DataType>(d: Data<T>): Data<T> {
+    const newBuffers = Array.from(d.buffers as unknown as 
Array<ArrayBufferView | undefined>, padBuffer);
+    const newChildren = d.children.map((c) => withOffsetViews(c));
+    return d.clone(d.type, d.offset, d.length, d.nullCount, newBuffers as any, 
newChildren);
+}
+
+/**
+ * Wrap a Data in a single-field RecordBatch so we can hand it to assemble().
+ */
+function batchOf<T extends DataType>(type: T, data: Data<T>): RecordBatch<any> 
{
+    const field = new Field('x', type, true);
+    const schema = new Schema([field]);
+    const structType = new Struct([field]);
+    const root = data.clone(structType as any, 0, data.length, 0, [undefined, 
undefined, undefined, undefined] as any, [data]);
+    return new RecordBatch(schema, root as Data<Struct>);
+}
+
+// Every affected callsite of bigNumsToStrings, one entry per type. Adding

Review Comment:
   Can we test bigNumsToStrings directly as well? 



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to