This is an automated email from the ASF dual-hosted git repository.
bhulette pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git
The following commit(s) were added to refs/heads/master by this push:
new 0a672bc ARROW-2226, ARROW-2233: [JS] Dictionary bugfixes
0a672bc is described below
commit 0a672bcbfd05eb126cd252e5abb0fbef95c051a2
Author: Paul Taylor <[email protected]>
AuthorDate: Wed Feb 28 14:23:56 2018 -0500
ARROW-2226, ARROW-2233: [JS] Dictionary bugfixes
Resolves https://issues.apache.org/jira/browse/ARROW-2226
Author: Paul Taylor <[email protected]>
Author: Brian Hulette <[email protected]>
Closes #1671 from trxcllnt/js-fix-dictionary-data and squashes the
following commits:
ccecf55 <Paul Taylor> Merge pull request #5 from
TheNeuralBit/dictionary-vector-tests
3fb9a26 <Brian Hulette> Fix bug in DictionaryVector with nullable indices
2888657 <Brian Hulette> Add dictionary vector unit tests
b0a0c08 <Paul Taylor> use indicies.offset in DictionaryData constructor
---
js/src/data.ts | 3 +--
js/src/vector.ts | 3 +++
js/test/unit/vector-tests.ts | 61 +++++++++++++++++++++++++++++++++++++++-----
3 files changed, 59 insertions(+), 8 deletions(-)
diff --git a/js/src/data.ts b/js/src/data.ts
index cdd9f29..3bfb320 100644
--- a/js/src/data.ts
+++ b/js/src/data.ts
@@ -152,10 +152,9 @@ export class DictionaryData<T extends DataType> extends
BaseData<Dictionary<T>>
public get indices() { return this._indices; }
public get dictionary() { return this._dictionary; }
constructor(type: Dictionary<T>, dictionary: Vector<T>, indices:
Data<Int<any>>) {
- super(type, indices.length, (indices as any)._nullCount);
+ super(type, indices.length, indices.offset, (indices as
any)._nullCount);
this._indices = indices;
this._dictionary = dictionary;
- this.length = this._indices.length;
}
public get nullCount() { return this._indices.nullCount; }
public get nullBitmap() { return this._indices.nullBitmap; }
diff --git a/js/src/vector.ts b/js/src/vector.ts
index f36c691..6c2bbbb 100644
--- a/js/src/vector.ts
+++ b/js/src/vector.ts
@@ -399,6 +399,9 @@ export class DictionaryVector<T extends DataType =
DataType> extends Vector<Dict
public readonly dictionary: Vector<T>;
constructor(data: Data<Dictionary<T>>, view: View<Dictionary<T>> = new
DictionaryView<T>(data.dictionary, new IntVector(data.indices))) {
super(data as Data<any>, view);
+ if (view instanceof ValidityView) {
+ view = (view as any).view;
+ }
if (data instanceof DictionaryData && view instanceof DictionaryView) {
this.indices = view.indices;
this.dictionary = data.dictionary;
diff --git a/js/test/unit/vector-tests.ts b/js/test/unit/vector-tests.ts
index e2be229..3eb3fbe 100644
--- a/js/test/unit/vector-tests.ts
+++ b/js/test/unit/vector-tests.ts
@@ -17,14 +17,15 @@
import { TextEncoder } from 'text-encoding-utf-8';
import Arrow from '../Arrow';
-import { type, TypedArray, TypedArrayConstructor } from '../../src/Arrow';
+import { type, TypedArray, TypedArrayConstructor, Vector } from
'../../src/Arrow';
+import { packBools } from '../../src/util/bit'
const utf8Encoder = new TextEncoder('utf-8');
-const { BoolData, FlatData, FlatListData } = Arrow.data;
-const { IntVector, FloatVector, BoolVector, Utf8Vector } = Arrow.vector;
+const { BoolData, FlatData, FlatListData, DictionaryData } = Arrow.data;
+const { IntVector, FloatVector, BoolVector, Utf8Vector, DictionaryVector } =
Arrow.vector;
const {
- Utf8, Bool,
+ Dictionary, Utf8, Bool,
Float16, Float32, Float64,
Int8, Int16, Int32, Int64,
Uint8, Uint16, Uint32, Uint64,
@@ -310,6 +311,54 @@ describe(`Utf8Vector`, () => {
let offset = 0;
const offsets = Uint32Array.of(0, ...values.map((d) => { offset +=
d.length; return offset; }));
const vector = new Utf8Vector(new FlatListData(new Utf8(), n, null,
offsets, utf8Encoder.encode(values.join(''))));
+ basicVectorTests(vector, values, ['abc', '123']);
+ describe(`sliced`, () => {
+ basicVectorTests(vector.slice(1,3), values.slice(1,3), ['foo', 'abc']);
+ });
+});
+
+describe(`DictionaryVector`, () => {
+ const dictionary = ['foo', 'bar', 'baz'];
+ const extras = ['abc', '123']; // values to search for that should NOT be
found
+ let offset = 0;
+ const offsets = Uint32Array.of(0, ...dictionary.map((d) => { offset +=
d.length; return offset; }));
+ const dictionary_vec = new Utf8Vector(new FlatListData(new Utf8(),
dictionary.length, null, offsets, utf8Encoder.encode(dictionary.join(''))));
+
+ const indices = Array.from({length: 50}, () => Math.random() * 3 | 0);
+
+ describe(`index with nullCount == 0`, () => {
+ const indices_data = new FlatData(new Int32(), indices.length, new
Uint8Array(0), indices);
+
+ const values = Array.from(indices).map((d) => dictionary[d]);
+ const vector = new DictionaryVector(new DictionaryData(new
Dictionary(dictionary_vec.type, indices_data.type), dictionary_vec,
indices_data));
+
+ basicVectorTests(vector, values, extras);
+
+ describe(`sliced`, () => {
+ basicVectorTests(vector.slice(10, 20), values.slice(10,20),
extras);
+ })
+ });
+
+ describe(`index with nullCount > 0`, () => {
+ const validity = Array.from({length: indices.length}, () =>
Math.random() > 0.2 ? true : false);
+ const indices_data = new FlatData(new Int32(), indices.length,
packBools(validity), indices, 0, validity.reduce((acc, d) => acc + (d ? 0 : 1),
0));
+ const values = Array.from(indices).map((d, i) => validity[i] ?
dictionary[d] : null);
+ const vector = new DictionaryVector(new DictionaryData(new
Dictionary(dictionary_vec.type, indices_data.type), dictionary_vec,
indices_data));
+
+ basicVectorTests(vector, values, ['abc', '123']);
+ describe(`sliced`, () => {
+ basicVectorTests(vector.slice(10, 20), values.slice(10,20),
extras);
+ });
+ });
+});
+
+// Creates some basic tests for the given vector.
+// Verifies that:
+// - `get` and the native iterator return the same data as `values`
+// - `indexOf` returns the same indices as `values`
+function basicVectorTests(vector: Vector, values: any[], extras: any[]) {
+ const n = values.length;
+
test(`gets expected values`, () => {
let i = -1;
while (++i < n) {
@@ -325,14 +374,14 @@ describe(`Utf8Vector`, () => {
}
});
test(`indexOf returns expected values`, () => {
- let testValues = values.concat(['abc', '12345']);
+ let testValues = values.concat(extras);
for (const value of testValues) {
const expected = values.indexOf(value);
expect(vector.indexOf(value)).toEqual(expected);
}
});
-});
+}
function toMap<T>(entries: Record<string, T>, keys: string[]) {
return keys.reduce((map, key) => {
--
To stop receiving notification emails like this one, please contact
[email protected].