This is an automated email from the ASF dual-hosted git repository.

bhulette pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git


The following commit(s) were added to refs/heads/master by this push:
     new 0a672bc  ARROW-2226, ARROW-2233: [JS] Dictionary bugfixes
0a672bc is described below

commit 0a672bcbfd05eb126cd252e5abb0fbef95c051a2
Author: Paul Taylor <[email protected]>
AuthorDate: Wed Feb 28 14:23:56 2018 -0500

    ARROW-2226, ARROW-2233: [JS] Dictionary bugfixes
    
    Resolves https://issues.apache.org/jira/browse/ARROW-2226
    
    Author: Paul Taylor <[email protected]>
    Author: Brian Hulette <[email protected]>
    
    Closes #1671 from trxcllnt/js-fix-dictionary-data and squashes the 
following commits:
    
    ccecf55 <Paul Taylor> Merge pull request #5 from 
TheNeuralBit/dictionary-vector-tests
    3fb9a26 <Brian Hulette> Fix bug in DictionaryVector with nullable indices
    2888657 <Brian Hulette> Add dictionary vector unit tests
    b0a0c08 <Paul Taylor> use indicies.offset in DictionaryData constructor
---
 js/src/data.ts               |  3 +--
 js/src/vector.ts             |  3 +++
 js/test/unit/vector-tests.ts | 61 +++++++++++++++++++++++++++++++++++++++-----
 3 files changed, 59 insertions(+), 8 deletions(-)

diff --git a/js/src/data.ts b/js/src/data.ts
index cdd9f29..3bfb320 100644
--- a/js/src/data.ts
+++ b/js/src/data.ts
@@ -152,10 +152,9 @@ export class DictionaryData<T extends DataType> extends 
BaseData<Dictionary<T>>
     public get indices() { return this._indices; }
     public get dictionary() { return this._dictionary; }
     constructor(type: Dictionary<T>, dictionary: Vector<T>, indices: 
Data<Int<any>>) {
-        super(type, indices.length, (indices as any)._nullCount);
+        super(type, indices.length, indices.offset, (indices as 
any)._nullCount);
         this._indices = indices;
         this._dictionary = dictionary;
-        this.length = this._indices.length;
     }
     public get nullCount() { return this._indices.nullCount; }
     public get nullBitmap() { return this._indices.nullBitmap; }
diff --git a/js/src/vector.ts b/js/src/vector.ts
index f36c691..6c2bbbb 100644
--- a/js/src/vector.ts
+++ b/js/src/vector.ts
@@ -399,6 +399,9 @@ export class DictionaryVector<T extends DataType = 
DataType> extends Vector<Dict
     public readonly dictionary: Vector<T>;
     constructor(data: Data<Dictionary<T>>, view: View<Dictionary<T>> = new 
DictionaryView<T>(data.dictionary, new IntVector(data.indices))) {
         super(data as Data<any>, view);
+        if (view instanceof ValidityView) {
+            view = (view as any).view;
+        }
         if (data instanceof DictionaryData && view instanceof DictionaryView) {
             this.indices = view.indices;
             this.dictionary = data.dictionary;
diff --git a/js/test/unit/vector-tests.ts b/js/test/unit/vector-tests.ts
index e2be229..3eb3fbe 100644
--- a/js/test/unit/vector-tests.ts
+++ b/js/test/unit/vector-tests.ts
@@ -17,14 +17,15 @@
 
 import { TextEncoder } from 'text-encoding-utf-8';
 import Arrow from '../Arrow';
-import { type, TypedArray, TypedArrayConstructor } from '../../src/Arrow';
+import { type, TypedArray, TypedArrayConstructor, Vector } from 
'../../src/Arrow';
+import { packBools } from '../../src/util/bit'
 
 const utf8Encoder = new TextEncoder('utf-8');
 
-const { BoolData, FlatData, FlatListData } = Arrow.data;
-const { IntVector, FloatVector, BoolVector, Utf8Vector } = Arrow.vector;
+const { BoolData, FlatData, FlatListData, DictionaryData } = Arrow.data;
+const { IntVector, FloatVector, BoolVector, Utf8Vector, DictionaryVector } = 
Arrow.vector;
 const {
-    Utf8, Bool,
+    Dictionary, Utf8, Bool,
     Float16, Float32, Float64,
     Int8, Int16, Int32, Int64,
     Uint8, Uint16, Uint32, Uint64,
@@ -310,6 +311,54 @@ describe(`Utf8Vector`, () => {
     let offset = 0;
     const offsets = Uint32Array.of(0, ...values.map((d) => { offset += 
d.length; return offset; }));
     const vector = new Utf8Vector(new FlatListData(new Utf8(), n, null, 
offsets, utf8Encoder.encode(values.join(''))));
+    basicVectorTests(vector, values, ['abc', '123']);
+    describe(`sliced`, () => {
+        basicVectorTests(vector.slice(1,3), values.slice(1,3), ['foo', 'abc']);
+    });
+});
+
+describe(`DictionaryVector`, () => {
+    const dictionary = ['foo', 'bar', 'baz'];
+    const extras = ['abc', '123']; // values to search for that should NOT be 
found
+    let offset = 0;
+    const offsets = Uint32Array.of(0, ...dictionary.map((d) => { offset += 
d.length; return offset; }));
+    const dictionary_vec = new Utf8Vector(new FlatListData(new Utf8(), 
dictionary.length, null, offsets, utf8Encoder.encode(dictionary.join(''))));
+
+    const indices = Array.from({length: 50}, () => Math.random() * 3 | 0);
+
+    describe(`index with nullCount == 0`, () => {
+        const indices_data = new FlatData(new Int32(), indices.length, new 
Uint8Array(0), indices);
+
+        const values = Array.from(indices).map((d) => dictionary[d]);
+        const vector = new DictionaryVector(new DictionaryData(new 
Dictionary(dictionary_vec.type, indices_data.type), dictionary_vec, 
indices_data));
+
+        basicVectorTests(vector, values, extras);
+
+        describe(`sliced`, () => {
+            basicVectorTests(vector.slice(10, 20), values.slice(10,20), 
extras);
+        })
+    });
+
+    describe(`index with nullCount > 0`, () => {
+        const validity = Array.from({length: indices.length}, () => 
Math.random() > 0.2 ? true : false);
+        const indices_data = new FlatData(new Int32(), indices.length, 
packBools(validity), indices, 0, validity.reduce((acc, d) => acc + (d ? 0 : 1), 
0));
+        const values = Array.from(indices).map((d, i) => validity[i] ? 
dictionary[d] : null);
+        const vector = new DictionaryVector(new DictionaryData(new 
Dictionary(dictionary_vec.type, indices_data.type), dictionary_vec, 
indices_data));
+
+        basicVectorTests(vector, values, ['abc', '123']);
+        describe(`sliced`, () => {
+            basicVectorTests(vector.slice(10, 20), values.slice(10,20), 
extras);
+        });
+    });
+});
+
+// Creates some basic tests for the given vector.
+// Verifies that:
+// - `get` and the native iterator return the same data as `values`
+// - `indexOf` returns the same indices as `values`
+function basicVectorTests(vector: Vector, values: any[], extras: any[]) {
+    const n = values.length;
+
     test(`gets expected values`, () => {
         let i = -1;
         while (++i < n) {
@@ -325,14 +374,14 @@ describe(`Utf8Vector`, () => {
         }
     });
     test(`indexOf returns expected values`, () => {
-        let testValues = values.concat(['abc', '12345']);
+        let testValues = values.concat(extras);
 
         for (const value of testValues) {
             const expected = values.indexOf(value);
             expect(vector.indexOf(value)).toEqual(expected);
         }
     });
-});
+}
 
 function toMap<T>(entries: Record<string, T>, keys: string[]) {
     return keys.reduce((map, key) => {

-- 
To stop receiving notification emails like this one, please contact
[email protected].

Reply via email to