ARROW-940: [JS] Generate multiple artifacts Running `npm run build` now produces three sets of artifacts: * `lib/`: CommonJS modules with typescript declarations * `lib-esm/`: ES6 modules with typescript declarations * `_bundles/`: minified and un-minified bundles with source maps for use in the browser
This PR also adds `.npmigore` and `bower.json` to get ready for packaging releases for both npm and bower Author: Brian Hulette <brian.hule...@ccri.com> Closes #663 from TheNeuralBit/multiple-artifacts and squashes the following commits: a056cd9 [Brian Hulette] update README 7779797 [Brian Hulette] add typescript dev dependency 895c95b [Brian Hulette] update npm main file 71aefb9 [Brian Hulette] Add bower config, add repo to npm config 0d47146 [Brian Hulette] updated read_file example b01bd75 [Brian Hulette] JS lib now creates multiple artifacts: ES5/6 with .d.ts files, and bundles Project: http://git-wip-us.apache.org/repos/asf/arrow/repo Commit: http://git-wip-us.apache.org/repos/asf/arrow/commit/2d6453b2 Tree: http://git-wip-us.apache.org/repos/asf/arrow/tree/2d6453b2 Diff: http://git-wip-us.apache.org/repos/asf/arrow/diff/2d6453b2 Branch: refs/heads/master Commit: 2d6453b25318b81af967f0cfdddacf183a60098c Parents: 22c738c Author: Brian Hulette <brian.hule...@ccri.com> Authored: Tue May 9 18:31:28 2017 -0400 Committer: Wes McKinney <wes.mckin...@twosigma.com> Committed: Tue May 9 18:31:28 2017 -0400 ---------------------------------------------------------------------- js/.gitignore | 7 +- js/.npmignore | 7 + js/README.md | 13 +- js/bin/arrow2csv.js | 13 +- js/bin/arrow_schema.js | 2 +- js/bower.json | 17 ++ js/examples/read_file.html | 12 +- js/flatbuffers.sh | 14 +- js/lib/Arrow_generated.d.ts | 5 - js/lib/arrow.ts | 493 -------------------------------- js/lib/bitarray.ts | 42 --- js/lib/types.ts | 589 --------------------------------------- js/package.json | 10 +- js/spec/arrow.js | 2 +- js/src/Arrow_generated.d.ts | 5 + js/src/arrow.ts | 493 ++++++++++++++++++++++++++++++++ js/src/bitarray.ts | 42 +++ js/src/types.ts | 589 +++++++++++++++++++++++++++++++++++++++ js/tsconfig.json | 14 +- js/webpack.config.js | 38 ++- 20 files changed, 1236 insertions(+), 1171 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/arrow/blob/2d6453b2/js/.gitignore ---------------------------------------------------------------------- diff --git a/js/.gitignore b/js/.gitignore index f67c1cc..ea5514f 100644 --- a/js/.gitignore +++ b/js/.gitignore @@ -1,6 +1,7 @@ -lib/*_generated.js -dist +src/Arrow_generated.js +lib +lib-esm +_bundles node_modules -typings .idea *.iml http://git-wip-us.apache.org/repos/asf/arrow/blob/2d6453b2/js/.npmignore ---------------------------------------------------------------------- diff --git a/js/.npmignore b/js/.npmignore new file mode 100644 index 0000000..333aeec --- /dev/null +++ b/js/.npmignore @@ -0,0 +1,7 @@ +.gitignore +.npmignore +src/ +spec/ +tsconfig.json +webpack.config.js +flatbuffers.sh http://git-wip-us.apache.org/repos/asf/arrow/blob/2d6453b2/js/README.md ---------------------------------------------------------------------- diff --git a/js/README.md b/js/README.md index cdabf54..167bafc 100644 --- a/js/README.md +++ b/js/README.md @@ -18,9 +18,8 @@ From this directory, run: ``` bash $ npm install # pull dependencies -$ tsc # build typescript -$ webpack # bundle for the browser -$ npm test # run unit tests +$ npm run build # build typescript (run tsc and webpack) +$ npm run test # run the unit tests (node.js only) ``` ### Usage @@ -30,17 +29,17 @@ The library is designed to be used with node.js or in the browser, this reposito Import the arrow module: ``` js -var arrow = require("arrow.js"); +var arrow = require("arrow"); ``` See [bin/arrow_schema.js](bin/arrow_schema.js) and [bin/arrow2csv.js](bin/arrow2csv.js) for usage examples. #### Browser -Include `dist/arrow-bundle.js` in a `<script />` tag: +Include `_bundles/arrow.js` in a `<script />` tag: ``` html -<script src="arrow-bundle.js"/> +<script src="_bundles/arrow.js"/> ``` -See [examples/read_file.html](examples/read_file.html) for a usage example - or try it out now at [theneuralbit.github.io/arrow](http://theneuralbit.github.io/arrow) +See [examples/read_file.html](examples/read_file.html) for a usage example. ### API ##### `arrow.getReader(buffer)` http://git-wip-us.apache.org/repos/asf/arrow/blob/2d6453b2/js/bin/arrow2csv.js ---------------------------------------------------------------------- diff --git a/js/bin/arrow2csv.js b/js/bin/arrow2csv.js index 8122e95..c1801f7 100755 --- a/js/bin/arrow2csv.js +++ b/js/bin/arrow2csv.js @@ -19,7 +19,7 @@ var fs = require('fs') var process = require('process'); -var arrow = require('../dist/arrow.js'); +var arrow = require('../lib/arrow.js'); var program = require('commander'); function list (val) { @@ -39,10 +39,15 @@ if (!program.schema) { var buf = fs.readFileSync(process.argv[process.argv.length - 1]); var reader = arrow.getReader(buf); -reader.loadNextBatch(); +var nrecords -for (var i = 0; i < reader.getVector(program.schema[0]).length; i += 1|0) { +nrecords = reader.loadNextBatch(); +while (nrecords > 0) { + for (var i = 0; i < nrecords; i += 1|0) { console.log(program.schema.map(function (field) { - return '' + reader.getVector(field).get(i); + return '' + reader.getVector(field).get(i); }).join(',')); + } + nrecords = reader.loadNextBatch(); + if (nrecords > 0) console.log('---'); } http://git-wip-us.apache.org/repos/asf/arrow/blob/2d6453b2/js/bin/arrow_schema.js ---------------------------------------------------------------------- diff --git a/js/bin/arrow_schema.js b/js/bin/arrow_schema.js index 44dabb4..4917628 100755 --- a/js/bin/arrow_schema.js +++ b/js/bin/arrow_schema.js @@ -19,7 +19,7 @@ var fs = require('fs'); var process = require('process'); -var arrow = require('../dist/arrow.js'); +var arrow = require('../lib/arrow.js'); var buf = fs.readFileSync(process.argv[process.argv.length - 1]); var reader = arrow.getReader(buf); http://git-wip-us.apache.org/repos/asf/arrow/blob/2d6453b2/js/bower.json ---------------------------------------------------------------------- diff --git a/js/bower.json b/js/bower.json new file mode 100644 index 0000000..c2099f8 --- /dev/null +++ b/js/bower.json @@ -0,0 +1,17 @@ +{ + "name": "arrow", + "description": "", + "main": "_bundles/arrow.js", + "authors": [], + "license": "Apache-2.0", + "homepage": "http://arrow.apache.org", + "ignore": [ + ".gitignore", + ".npmignore", + "src/", + "spec/", + "tsconfig.json", + "webpack.config.js", + "flatbuffers.sh" + ] +} http://git-wip-us.apache.org/repos/asf/arrow/blob/2d6453b2/js/examples/read_file.html ---------------------------------------------------------------------- diff --git a/js/examples/read_file.html b/js/examples/read_file.html index 933b142..5a650a0 100644 --- a/js/examples/read_file.html +++ b/js/examples/read_file.html @@ -39,12 +39,10 @@ function addCell (tr, type, name) { tr.appendChild(td); } reader.onload = function (evt) { - var buf = new Uint8Array(evt.target.result); - var schema = arrow.loadSchemaFromStream(buf); - var vectors = arrow.loadVectorsFromStream(buf); - var length = vectors[schema[0].name].length; + var reader = new arrow.getReader(new Uint8Array(evt.target.result)); + var schema = reader.getSchema(); + var length = reader.loadNextBatch(); console.log(JSON.stringify(schema, null, '\t')); -console.log(JSON.stringify(vectors, null, '\t')); var thead = document.getElementById("thead"); var tbody = document.getElementById("tbody"); @@ -58,7 +56,7 @@ console.log(JSON.stringify(vectors, null, '\t')); for (var i = 0; i < length; i += 1|0) { var tr = document.createElement("tr"); - schema.forEach(function (d) { addCell(tr, "td", vectors[d.name].get(i)); }); + schema.forEach(function (d) { addCell(tr, "td", reader.getVector(d.name).get(i)); }); tbody.appendChild(tr); } } @@ -76,6 +74,6 @@ function handleFiles(files) { <tbody id="tbody"> </tbody> </table> - <script type="text/javascript" src="../dist/arrow-bundle.js"></script> + <script type="text/javascript" src="../_bundles/arrow.js"></script> </body> </html> http://git-wip-us.apache.org/repos/asf/arrow/blob/2d6453b2/js/flatbuffers.sh ---------------------------------------------------------------------- diff --git a/js/flatbuffers.sh b/js/flatbuffers.sh index 99d2815..55967f8 100755 --- a/js/flatbuffers.sh +++ b/js/flatbuffers.sh @@ -13,7 +13,13 @@ # limitations under the License. See accompanying LICENSE file. echo "Compiling flatbuffer schemas..." -#flatc -o lib --js ../format/Message.fbs ../format/File.fbs -flatc -o lib --js ../format/*.fbs -rm -f lib/Arrow_generated.js -cat lib/*_generated.js > lib/Arrow_generated.js +mkdir -p lib lib-esm +DIR=`mktemp -d` +flatc -o $DIR --js ../format/*.fbs +cat $DIR/*_generated.js > src/Arrow_generated.js + +# Duplicate in the tsc-generated outputs - we can't make tsc pull in .js files +# and still prooduce declaration files +cat $DIR/*_generated.js > lib/Arrow_generated.js +cat $DIR/*_generated.js > lib-esm/Arrow_generated.js +rm -rf $DIR http://git-wip-us.apache.org/repos/asf/arrow/blob/2d6453b2/js/lib/Arrow_generated.d.ts ---------------------------------------------------------------------- diff --git a/js/lib/Arrow_generated.d.ts b/js/lib/Arrow_generated.d.ts deleted file mode 100644 index 1f5b454..0000000 --- a/js/lib/Arrow_generated.d.ts +++ /dev/null @@ -1,5 +0,0 @@ -export var org: { - apache: { - arrow: any - } -} http://git-wip-us.apache.org/repos/asf/arrow/blob/2d6453b2/js/lib/arrow.ts ---------------------------------------------------------------------- diff --git a/js/lib/arrow.ts b/js/lib/arrow.ts deleted file mode 100644 index 74def4d..0000000 --- a/js/lib/arrow.ts +++ /dev/null @@ -1,493 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -import { flatbuffers } from 'flatbuffers'; -import { org } from './Arrow_generated'; -import { vectorFromField, Vector } from './types'; - -import ByteBuffer = flatbuffers.ByteBuffer; -var Footer = org.apache.arrow.flatbuf.Footer; -var Message = org.apache.arrow.flatbuf.Message; -var MessageHeader = org.apache.arrow.flatbuf.MessageHeader; -var RecordBatch = org.apache.arrow.flatbuf.RecordBatch; -var DictionaryBatch = org.apache.arrow.flatbuf.DictionaryBatch; -var Schema = org.apache.arrow.flatbuf.Schema; -var Type = org.apache.arrow.flatbuf.Type; -var VectorType = org.apache.arrow.flatbuf.VectorType; - -export class ArrowReader { - - private bb; - private schema: any = []; - private vectors: Vector[]; - private vectorMap: any = {}; - private dictionaries: any = {}; - private batches: any = []; - private batchIndex: number = 0; - - constructor(bb, schema, vectors: Vector[], batches, dictionaries) { - this.bb = bb; - this.schema = schema; - this.vectors = vectors; - for (var i = 0; i < vectors.length; i += 1|0) { - this.vectorMap[vectors[i].name] = vectors[i] - } - this.batches = batches; - this.dictionaries = dictionaries; - } - - loadNextBatch() { - if (this.batchIndex < this.batches.length) { - var batch = this.batches[this.batchIndex]; - this.batchIndex += 1; - loadVectors(this.bb, this.vectors, batch); - return batch.length; - } else { - return 0; - } - } - - getSchema() { - return this.schema; - } - - getVectors() { - return this.vectors; - } - - getVector(name) { - return this.vectorMap[name]; - } - - getBatchCount() { - return this.batches.length; - } - - // the index of the next batch to be loaded - getBatchIndex() { - return this.batchIndex; - } - - // set the index of the next batch to be loaded - setBatchIndex(i: number) { - this.batchIndex = i; - } -} - -export function getSchema(buf) { return getReader(buf).getSchema(); } - -export function getReader(buf) : ArrowReader { - if (_checkMagic(buf, 0)) { - return getFileReader(buf); - } else { - return getStreamReader(buf); - } -} - -export function getStreamReader(buf) : ArrowReader { - var bb = new ByteBuffer(buf); - - var schema = _loadSchema(bb), - field, - vectors: Vector[] = [], - i,j, - iLen,jLen, - batch, - recordBatches = [], - dictionaryBatches = [], - dictionaries = {}; - - for (i = 0, iLen = schema.fieldsLength(); i < iLen; i += 1|0) { - field = schema.fields(i); - _createDictionaryVectors(field, dictionaries); - vectors.push(vectorFromField(field, dictionaries)); - } - - while (bb.position() < bb.capacity()) { - batch = _loadBatch(bb); - if (batch == null) { - break; - } else if (batch.type == MessageHeader.DictionaryBatch) { - dictionaryBatches.push(batch); - } else if (batch.type == MessageHeader.RecordBatch) { - recordBatches.push(batch) - } else { - console.error("Expected batch type" + MessageHeader.RecordBatch + " or " + - MessageHeader.DictionaryBatch + " but got " + batch.type); - } - } - - // load dictionary vectors - for (i = 0; i < dictionaryBatches.length; i += 1|0) { - batch = dictionaryBatches[i]; - loadVectors(bb, [dictionaries[batch.id]], batch); - } - - return new ArrowReader(bb, parseSchema(schema), vectors, recordBatches, dictionaries); -} - -export function getFileReader (buf) : ArrowReader { - var bb = new ByteBuffer(buf); - - var footer = _loadFooter(bb); - - var schema = footer.schema(); - var i, len, field, - vectors: Vector[] = [], - block, - batch, - recordBatchBlocks = [], - dictionaryBatchBlocks = [], - dictionaries = {}; - - for (i = 0, len = schema.fieldsLength(); i < len; i += 1|0) { - field = schema.fields(i); - _createDictionaryVectors(field, dictionaries); - vectors.push(vectorFromField(field, dictionaries)); - } - - for (i = 0; i < footer.dictionariesLength(); i += 1|0) { - block = footer.dictionaries(i); - dictionaryBatchBlocks.push({ - offset: block.offset().low, - metaDataLength: block.metaDataLength(), - bodyLength: block.bodyLength().low, - }) - } - - for (i = 0; i < footer.recordBatchesLength(); i += 1|0) { - block = footer.recordBatches(i); - recordBatchBlocks.push({ - offset: block.offset().low, - metaDataLength: block.metaDataLength(), - bodyLength: block.bodyLength().low, - }) - } - - var dictionaryBatches = dictionaryBatchBlocks.map(function (block) { - bb.setPosition(block.offset); - // TODO: Make sure this is a dictionary batch - return _loadBatch(bb); - }); - - var recordBatches = recordBatchBlocks.map(function (block) { - bb.setPosition(block.offset); - // TODO: Make sure this is a record batch - return _loadBatch(bb); - }); - - // load dictionary vectors - for (i = 0; i < dictionaryBatches.length; i += 1|0) { - batch = dictionaryBatches[i]; - loadVectors(bb, [dictionaries[batch.id]], batch); - } - - return new ArrowReader(bb, parseSchema(schema), vectors, recordBatches, dictionaries); -} - -function _loadFooter(bb) { - var fileLength: number = bb.bytes_.length; - - if (fileLength < MAGIC.length*2 + 4) { - console.error("file too small " + fileLength); - return; - } - - if (!_checkMagic(bb.bytes_, 0)) { - console.error("missing magic bytes at beginning of file") - return; - } - - if (!_checkMagic(bb.bytes_, fileLength - MAGIC.length)) { - console.error("missing magic bytes at end of file") - return; - } - - var footerLengthOffset: number = fileLength - MAGIC.length - 4; - bb.setPosition(footerLengthOffset); - var footerLength: number = Int32FromByteBuffer(bb, footerLengthOffset) - - if (footerLength <= 0 || footerLength + MAGIC.length*2 + 4 > fileLength) { - console.log("Invalid footer length: " + footerLength) - } - - var footerOffset: number = footerLengthOffset - footerLength; - bb.setPosition(footerOffset); - var footer = Footer.getRootAsFooter(bb); - - return footer; -} - -function _loadSchema(bb) { - var message =_loadMessage(bb); - if (message.headerType() != MessageHeader.Schema) { - console.error("Expected header type " + MessageHeader.Schema + " but got " + message.headerType()); - return; - } - return message.header(new Schema()); -} - -function _loadBatch(bb) { - var message = _loadMessage(bb); - if (message == null) { - return; - } else if (message.headerType() == MessageHeader.RecordBatch) { - var batch = { header: message.header(new RecordBatch()), length: message.bodyLength().low } - return _loadRecordBatch(bb, batch); - } else if (message.headerType() == MessageHeader.DictionaryBatch) { - var batch = { header: message.header(new DictionaryBatch()), length: message.bodyLength().low } - return _loadDictionaryBatch(bb, batch); - } else { - console.error("Expected header type " + MessageHeader.RecordBatch + " or " + MessageHeader.DictionaryBatch + - " but got " + message.headerType()); - return; - } -} - -function _loadRecordBatch(bb, batch) { - var data = batch.header; - var i, nodes_ = [], nodesLength = data.nodesLength(); - var buffer, buffers_ = [], buffersLength = data.buffersLength(); - - for (i = 0; i < nodesLength; i += 1) { - nodes_.push(data.nodes(i)); - } - for (i = 0; i < buffersLength; i += 1) { - buffer = data.buffers(i); - buffers_.push({ offset: bb.position() + buffer.offset().low, length: buffer.length().low }); - } - // position the buffer after the body to read the next message - bb.setPosition(bb.position() + batch.length); - - return { nodes: nodes_, buffers: buffers_, length: data.length().low, type: MessageHeader.RecordBatch }; -} - -function _loadDictionaryBatch(bb, batch) { - var id_ = batch.header.id().toFloat64().toString(), data = batch.header.data(); - var i, nodes_ = [], nodesLength = data.nodesLength(); - var buffer, buffers_ = [], buffersLength = data.buffersLength(); - - for (i = 0; i < nodesLength; i += 1) { - nodes_.push(data.nodes(i)); - } - for (i = 0; i < buffersLength; i += 1) { - buffer = data.buffers(i); - buffers_.push({ offset: bb.position() + buffer.offset().low, length: buffer.length().low }); - } - // position the buffer after the body to read the next message - bb.setPosition(bb.position() + batch.length); - - return { id: id_, nodes: nodes_, buffers: buffers_, length: data.length().low, type: MessageHeader.DictionaryBatch }; -} - -function _loadMessage(bb) { - var messageLength: number = Int32FromByteBuffer(bb, bb.position()); - if (messageLength == 0) { - return; - } - bb.setPosition(bb.position() + 4); - var message = Message.getRootAsMessage(bb); - // position the buffer at the end of the message so it's ready to read further - bb.setPosition(bb.position() + messageLength); - - return message; -} - -function _createDictionaryVectors(field, dictionaries) { - var encoding = field.dictionary(); - if (encoding != null) { - var id = encoding.id().toFloat64().toString(); - if (dictionaries[id] == null) { - // create a field for the dictionary - var dictionaryField = _createDictionaryField(id, field); - dictionaries[id] = vectorFromField(dictionaryField, null); - } - } - - // recursively examine child fields - for (var i = 0, len = field.childrenLength(); i < len; i += 1|0) { - _createDictionaryVectors(field.children(i), dictionaries); - } -} - -function _createDictionaryField(id, field) { - var builder = new flatbuffers.Builder(); - var nameOffset = builder.createString("dict-" + id); - - var typeType = field.typeType(); - var typeOffset; - if (typeType === Type.Int) { - var type = field.type(new org.apache.arrow.flatbuf.Int()); - org.apache.arrow.flatbuf.Int.startInt(builder); - org.apache.arrow.flatbuf.Int.addBitWidth(builder, type.bitWidth()); - org.apache.arrow.flatbuf.Int.addIsSigned(builder, type.isSigned()); - typeOffset = org.apache.arrow.flatbuf.Int.endInt(builder); - } else if (typeType === Type.FloatingPoint) { - var type = field.type(new org.apache.arrow.flatbuf.FloatingPoint()); - org.apache.arrow.flatbuf.FloatingPoint.startFloatingPoint(builder); - org.apache.arrow.flatbuf.FloatingPoint.addPrecision(builder, type.precision()); - typeOffset = org.apache.arrow.flatbuf.FloatingPoint.endFloatingPoint(builder); - } else if (typeType === Type.Utf8) { - org.apache.arrow.flatbuf.Utf8.startUtf8(builder); - typeOffset = org.apache.arrow.flatbuf.Utf8.endUtf8(builder); - } else if (typeType === Type.Date) { - var type = field.type(new org.apache.arrow.flatbuf.Date()); - org.apache.arrow.flatbuf.Date.startDate(builder); - org.apache.arrow.flatbuf.Date.addUnit(builder, type.unit()); - typeOffset = org.apache.arrow.flatbuf.Date.endDate(builder); - } else { - throw "Unimplemented dictionary type " + typeType; - } - if (field.childrenLength() > 0) { - throw "Dictionary encoded fields can't have children" - } - var childrenOffset = org.apache.arrow.flatbuf.Field.createChildrenVector(builder, []); - - var layout, layoutOffsets = []; - for (var i = 0, len = field.layoutLength(); i < len; i += 1|0) { - layout = field.layout(i); - org.apache.arrow.flatbuf.VectorLayout.startVectorLayout(builder); - org.apache.arrow.flatbuf.VectorLayout.addBitWidth(builder, layout.bitWidth()); - org.apache.arrow.flatbuf.VectorLayout.addType(builder, layout.type()); - layoutOffsets.push(org.apache.arrow.flatbuf.VectorLayout.endVectorLayout(builder)); - } - var layoutOffset = org.apache.arrow.flatbuf.Field.createLayoutVector(builder, layoutOffsets); - - org.apache.arrow.flatbuf.Field.startField(builder); - org.apache.arrow.flatbuf.Field.addName(builder, nameOffset); - org.apache.arrow.flatbuf.Field.addNullable(builder, field.nullable()); - org.apache.arrow.flatbuf.Field.addTypeType(builder, typeType); - org.apache.arrow.flatbuf.Field.addType(builder, typeOffset); - org.apache.arrow.flatbuf.Field.addChildren(builder, childrenOffset); - org.apache.arrow.flatbuf.Field.addLayout(builder, layoutOffset); - var offset = org.apache.arrow.flatbuf.Field.endField(builder); - builder.finish(offset); - - return org.apache.arrow.flatbuf.Field.getRootAsField(builder.bb); -} - -function Int32FromByteBuffer(bb, offset) { - return ((bb.bytes_[offset + 3] & 255) << 24) | - ((bb.bytes_[offset + 2] & 255) << 16) | - ((bb.bytes_[offset + 1] & 255) << 8) | - ((bb.bytes_[offset] & 255)); -} - -var MAGIC_STR = "ARROW1"; -var MAGIC = new Uint8Array(MAGIC_STR.length); -for (var i = 0; i < MAGIC_STR.length; i += 1|0) { - MAGIC[i] = MAGIC_STR.charCodeAt(i); -} - -function _checkMagic(buf, index) { - for (var i = 0; i < MAGIC.length; i += 1|0) { - if (MAGIC[i] != buf[index + i]) { - return false; - } - } - return true; -} - -var TYPEMAP = {} -TYPEMAP[Type.NONE] = "NONE"; -TYPEMAP[Type.Null] = "Null"; -TYPEMAP[Type.Int] = "Int"; -TYPEMAP[Type.FloatingPoint] = "FloatingPoint"; -TYPEMAP[Type.Binary] = "Binary"; -TYPEMAP[Type.Utf8] = "Utf8"; -TYPEMAP[Type.Bool] = "Bool"; -TYPEMAP[Type.Decimal] = "Decimal"; -TYPEMAP[Type.Date] = "Date"; -TYPEMAP[Type.Time] = "Time"; -TYPEMAP[Type.Timestamp] = "Timestamp"; -TYPEMAP[Type.Interval] = "Interval"; -TYPEMAP[Type.List] = "List"; -TYPEMAP[Type.FixedSizeList] = "FixedSizeList"; -TYPEMAP[Type.Struct_] = "Struct"; -TYPEMAP[Type.Union] = "Union"; - -var VECTORTYPEMAP = {}; -VECTORTYPEMAP[VectorType.OFFSET] = 'OFFSET'; -VECTORTYPEMAP[VectorType.DATA] = 'DATA'; -VECTORTYPEMAP[VectorType.VALIDITY] = 'VALIDITY'; -VECTORTYPEMAP[VectorType.TYPE] = 'TYPE'; - -function parseField(field) { - var children = []; - for (var i = 0; i < field.childrenLength(); i += 1|0) { - children.push(parseField(field.children(i))); - } - - var layouts = []; - for (var i = 0; i < field.layoutLength(); i += 1|0) { - layouts.push(VECTORTYPEMAP[field.layout(i).type()]); - } - - return { - name: field.name(), - nullable: field.nullable(), - type: TYPEMAP[field.typeType()], - children: children, - layout: layouts - }; -} - -function parseSchema(schema) { - var result = []; - var this_result, type; - for (var i = 0, len = schema.fieldsLength(); i < len; i += 1|0) { - result.push(parseField(schema.fields(i))); - } - return result; -} - -function loadVectors(bb, vectors: Vector[], recordBatch) { - var indices = { bufferIndex: 0, nodeIndex: 0 }, i; - for (i = 0; i < vectors.length; i += 1) { - loadVector(bb, vectors[i], recordBatch, indices); - } -} - -/** - * Loads a vector with data from a batch - * recordBatch: { nodes: org.apache.arrow.flatbuf.FieldNode[], buffers: { offset: number, length: number }[] } - */ -function loadVector(bb, vector: Vector, recordBatch, indices) { - var node = recordBatch.nodes[indices.nodeIndex], ownBuffersLength, ownBuffers = [], i; - indices.nodeIndex += 1; - - // dictionary vectors are always ints, so will have a data vector plus optional null vector - if (vector.field.dictionary() == null) { - ownBuffersLength = vector.field.layoutLength(); - } else if (vector.field.nullable()) { - ownBuffersLength = 2; - } else { - ownBuffersLength = 1; - } - - for (i = 0; i < ownBuffersLength; i += 1) { - ownBuffers.push(recordBatch.buffers[indices.bufferIndex + i]); - } - indices.bufferIndex += ownBuffersLength; - - vector.loadData(bb, node, ownBuffers); - - var children = vector.getChildVectors(); - for (i = 0; i < children.length; i++) { - loadVector(bb, children[i], recordBatch, indices); - } -} http://git-wip-us.apache.org/repos/asf/arrow/blob/2d6453b2/js/lib/bitarray.ts ---------------------------------------------------------------------- diff --git a/js/lib/bitarray.ts b/js/lib/bitarray.ts deleted file mode 100644 index fc3c091..0000000 --- a/js/lib/bitarray.ts +++ /dev/null @@ -1,42 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -export class BitArray { - private view: Uint8Array; - - constructor(buffer: ArrayBuffer, offset: number, length: number) { - this.view = new Uint8Array(buffer, offset || 0, Math.ceil(length / 8)); - } - - get(i) { - var index = (i >> 3) | 0; // | 0 converts to an int. Math.floor works too. - var bit = i % 8; // i % 8 is just as fast as i & 7 - return (this.view[index] & (1 << bit)) !== 0; - } - - set(i) { - var index = (i >> 3) | 0; - var bit = i % 8; - this.view[index] |= 1 << bit; - } - - unset(i) { - var index = (i >> 3) | 0; - var bit = i % 8; - this.view[index] &= ~(1 << bit); - } -} http://git-wip-us.apache.org/repos/asf/arrow/blob/2d6453b2/js/lib/types.ts ---------------------------------------------------------------------- diff --git a/js/lib/types.ts b/js/lib/types.ts deleted file mode 100644 index d656c6a..0000000 --- a/js/lib/types.ts +++ /dev/null @@ -1,589 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -import { BitArray } from './bitarray'; -import { TextDecoder } from 'text-encoding'; -import { org } from './Arrow_generated'; - -var Type = org.apache.arrow.flatbuf.Type; - -interface ArrayView { - slice(start: number, end: number) : ArrayView - toString() : string -} - -export abstract class Vector { - field: any; - name: string; - length: number; - null_count: number; - - constructor(field) { - this.field = field; - this.name = field.name(); - } - - /* Access datum at index i */ - abstract get(i); - /* Return array representing data in the range [start, end) */ - abstract slice(start: number, end: number); - /* Return array of child vectors, for container types */ - abstract getChildVectors(); - - /** - * Use recordBatch fieldNodes and Buffers to construct this Vector - * bb: flatbuffers.ByteBuffer - * node: org.apache.arrow.flatbuf.FieldNode - * buffers: { offset: number, length: number }[] - */ - public loadData(bb, node, buffers) { - this.length = node.length().low; - this.null_count = node.nullCount().low; - this.loadBuffers(bb, node, buffers); - } - - protected abstract loadBuffers(bb, node, buffers); - - /** - * Helper function for loading a VALIDITY buffer (for Nullable types) - * bb: flatbuffers.ByteBuffer - * buffer: org.apache.arrow.flatbuf.Buffer - */ - static loadValidityBuffer(bb, buffer) : BitArray { - var arrayBuffer = bb.bytes_.buffer; - var offset = bb.bytes_.byteOffset + buffer.offset; - return new BitArray(arrayBuffer, offset, buffer.length * 8); - } - - /** - * Helper function for loading an OFFSET buffer - * buffer: org.apache.arrow.flatbuf.Buffer - */ - static loadOffsetBuffer(bb, buffer) : Int32Array { - var arrayBuffer = bb.bytes_.buffer; - var offset = bb.bytes_.byteOffset + buffer.offset; - var length = buffer.length / Int32Array.BYTES_PER_ELEMENT; - return new Int32Array(arrayBuffer, offset, length); - } - -} - -class SimpleVector<T extends ArrayView> extends Vector { - protected dataView: T; - private TypedArray: { new(buffer: any, offset: number, length: number): T, BYTES_PER_ELEMENT: number }; - - constructor (field, TypedArray: { new(buffer: any, offset: number, length: number): T, BYTES_PER_ELEMENT: number }) { - super(field); - this.TypedArray = TypedArray; - } - - getChildVectors() { - return []; - } - - get(i) { - return this.dataView[i]; - } - - loadBuffers(bb, node, buffers) { - this.loadDataBuffer(bb, buffers[0]); - } - - /** - * buffer: org.apache.arrow.flatbuf.Buffer - */ - protected loadDataBuffer(bb, buffer) { - var arrayBuffer = bb.bytes_.buffer; - var offset = bb.bytes_.byteOffset + buffer.offset; - var length = buffer.length / this.TypedArray.BYTES_PER_ELEMENT; - this.dataView = new this.TypedArray(arrayBuffer, offset, length); - } - - getDataView() { - return this.dataView; - } - - toString() { - return this.dataView.toString(); - } - - slice(start, end) { - return this.dataView.slice(start, end); - } -} - -class NullableSimpleVector<T extends ArrayView> extends SimpleVector<T> { - - protected validityView: BitArray; - - get(i: number) { - if (this.validityView.get(i)) { - return this.dataView[i]; - } else { - return null; - } - } - - loadBuffers(bb, node, buffers) { - this.validityView = Vector.loadValidityBuffer(bb, buffers[0]); - this.loadDataBuffer(bb, buffers[1]); - } - - getValidityVector() { - return this.validityView; - } -} - -class Uint8Vector extends SimpleVector<Uint8Array> { constructor(field) { super(field, Uint8Array); }; } -class Uint16Vector extends SimpleVector<Uint16Array> { constructor(field) { super(field, Uint16Array); }; } -class Uint32Vector extends SimpleVector<Uint32Array> { constructor(field) { super(field, Uint32Array); }; } -class Int8Vector extends SimpleVector<Uint8Array> { constructor(field) { super(field, Uint8Array); }; } -class Int16Vector extends SimpleVector<Uint16Array> { constructor(field) { super(field, Uint16Array); }; } -class Int32Vector extends SimpleVector<Uint32Array> { constructor(field) { super(field, Uint32Array); }; } -class Float32Vector extends SimpleVector<Float32Array> { constructor(field) { super(field, Float32Array); }; } -class Float64Vector extends SimpleVector<Float64Array> { constructor(field) { super(field, Float64Array); }; } - -class NullableUint8Vector extends NullableSimpleVector<Uint8Array> { constructor(field) { super(field, Uint8Array); }; } -class NullableUint16Vector extends NullableSimpleVector<Uint16Array> { constructor(field) { super(field, Uint16Array); }; } -class NullableUint32Vector extends NullableSimpleVector<Uint32Array> { constructor(field) { super(field, Uint32Array); }; } -class NullableInt8Vector extends NullableSimpleVector<Uint8Array> { constructor(field) { super(field, Uint8Array); }; } -class NullableInt16Vector extends NullableSimpleVector<Uint16Array> { constructor(field) { super(field, Uint16Array); }; } -class NullableInt32Vector extends NullableSimpleVector<Uint32Array> { constructor(field) { super(field, Uint32Array); }; } -class NullableFloat32Vector extends NullableSimpleVector<Float32Array> { constructor(field) { super(field, Float32Array); }; } -class NullableFloat64Vector extends NullableSimpleVector<Float64Array> { constructor(field) { super(field, Float64Array); }; } - -class Uint64Vector extends SimpleVector<Uint32Array> { - constructor(field) { - super(field, Uint32Array); - } - - get(i: number) { - return { low: this.dataView[i * 2], high: this.dataView[(i * 2) + 1] }; - } -} - -class NullableUint64Vector extends NullableSimpleVector<Uint32Array> { - constructor(field) { - super(field, Uint32Array); - } - - get(i: number) { - if (this.validityView.get(i)) { - return { low: this.dataView[i * 2], high: this.dataView[(i * 2) + 1] }; - } else { - return null; - } - } -} - -class Int64Vector extends NullableSimpleVector<Uint32Array> { - constructor(field) { - super(field, Uint32Array); - } - - get(i: number) { - return { low: this.dataView[i * 2], high: this.dataView[(i * 2) + 1] }; - } -} - -class NullableInt64Vector extends NullableSimpleVector<Uint32Array> { - constructor(field) { - super(field, Uint32Array); - } - - get(i: number) { - if (this.validityView.get(i)) { - return { low: this.dataView[i * 2], high: this.dataView[(i * 2) + 1] }; - } else { - return null; - } - } -} - -class DateVector extends SimpleVector<Uint32Array> { - constructor(field) { - super(field, Uint32Array); - } - - get (i) { - return new Date(super.get(2*i+1)*Math.pow(2,32) + super.get(2*i)); - } -} - -class NullableDateVector extends DateVector { - private validityView: BitArray; - - loadBuffers(bb, node, buffers) { - this.validityView = Vector.loadValidityBuffer(bb, buffers[0]); - this.loadDataBuffer(bb, buffers[1]); - } - - get (i) { - if (this.validityView.get(i)) { - return super.get(i); - } else { - return null; - } - } - - getValidityVector() { - return this.validityView; - } -} - -class Utf8Vector extends SimpleVector<Uint8Array> { - protected offsetView: Int32Array; - static decoder: TextDecoder = new TextDecoder('utf8'); - - constructor(field) { - super(field, Uint8Array); - } - - loadBuffers(bb, node, buffers) { - this.offsetView = Vector.loadOffsetBuffer(bb, buffers[0]); - this.loadDataBuffer(bb, buffers[1]); - } - - get(i) { - return Utf8Vector.decoder.decode(this.dataView.slice(this.offsetView[i], this.offsetView[i + 1])); - } - - slice(start: number, end: number) { - var result: string[] = []; - for (var i: number = start; i < end; i += 1|0) { - result.push(this.get(i)); - } - return result; - } - - getOffsetView() { - return this.offsetView; - } -} - -class NullableUtf8Vector extends Utf8Vector { - private validityView: BitArray; - - loadBuffers(bb, node, buffers) { - this.validityView = Vector.loadValidityBuffer(bb, buffers[0]); - this.offsetView = Vector.loadOffsetBuffer(bb, buffers[1]); - this.loadDataBuffer(bb, buffers[2]); - } - - get(i) { - if (this.validityView.get(i)) { - return super.get(i); - } else { - return null; - } - } - - getValidityVector() { - return this.validityView; - } -} - -// Nested Types -class ListVector extends Uint32Vector { - private dataVector: Vector; - - constructor(field, dataVector: Vector) { - super(field); - this.dataVector = dataVector; - } - - getChildVectors() { - return [this.dataVector]; - } - - loadBuffers(bb, node, buffers) { - super.loadBuffers(bb, node, buffers); - this.length -= 1; - } - - get(i) { - var offset = super.get(i) - if (offset === null) { - return null; - } - var next_offset = super.get(i + 1) - return this.dataVector.slice(offset, next_offset) - } - - toString() { - return "length: " + (this.length); - } - - slice(start: number, end: number) { - var result = []; - for (var i = start; i < end; i += 1|0) { - result.push(this.get(i)); - } - return result; - } -} - -class NullableListVector extends ListVector { - private validityView: BitArray; - - loadBuffers(bb, node, buffers) { - this.validityView = Vector.loadValidityBuffer(bb, buffers[0]); - this.loadDataBuffer(bb, buffers[1]); - this.length -= 1; - } - - get(i) { - if (this.validityView.get(i)) { - return super.get(i); - } else { - return null; - } - } - - getValidityVector() { - return this.validityView; - } -} - -class FixedSizeListVector extends Vector { - private size: number - private dataVector: Vector; - - constructor(field, size: number, dataVector: Vector) { - super(field); - this.size = size; - this.dataVector = dataVector; - } - - getChildVectors() { - return [this.dataVector]; - } - - loadBuffers(bb, node, buffers) { - // no buffers to load - } - - get(i: number) { - return this.dataVector.slice(i * this.size, (i + 1) * this.size); - } - - slice(start : number, end : number) { - var result = []; - for (var i = start; i < end; i += 1|0) { - result.push(this.get(i)); - } - return result; - } - - getListSize() { - return this.size; - } -} - -class NullableFixedSizeListVector extends FixedSizeListVector { - private validityView: BitArray; - - loadBuffers(bb, node, buffers) { - this.validityView = Vector.loadValidityBuffer(bb, buffers[0]); - } - - get(i: number) { - if (this.validityView.get(i)) { - return super.get(i); - } else { - return null; - } - } - - getValidityVector() { - return this.validityView; - } -} - -class StructVector extends Vector { - private validityView: BitArray; - private vectors: Vector[]; - - constructor(field, vectors: Vector[]) { - super(field); - this.vectors = vectors; - } - - getChildVectors() { - return this.vectors; - } - - loadBuffers(bb, node, buffers) { - this.validityView = Vector.loadValidityBuffer(bb, buffers[0]); - } - - get(i : number) { - if (this.validityView.get(i)) { - return this.vectors.map((v: Vector) => v.get(i)); - } else { - return null; - } - } - - slice(start : number, end : number) { - var result = []; - for (var i = start; i < end; i += 1|0) { - result.push(this.get(i)); - } - return result; - } - - getValidityVector() { - return this.validityView; - } -} - -class DictionaryVector extends Vector { - - private indices: Vector; - private dictionary: Vector; - - constructor (field, indices: Vector, dictionary: Vector) { - super(field); - this.indices = indices; - this.dictionary = dictionary; - } - - get(i) { - var encoded = this.indices.get(i); - if (encoded == null) { - return null; - } else { - return this.dictionary.get(encoded); - } - } - - /** Get the dictionary encoded value */ - public getEncoded(i) { - return this.indices.get(i); - } - - slice(start, end) { - return this.indices.slice(start, end); // TODO decode - } - - getChildVectors() { - return this.indices.getChildVectors(); - } - - loadBuffers(bb, node, buffers) { - this.indices.loadData(bb, node, buffers); - } - - /** Get the index (encoded) vector */ - public getIndexVector() { - return this.indices; - } - - /** Get the dictionary vector */ - public getDictionaryVector() { - return this.dictionary; - } - - toString() { - return this.indices.toString(); - } -} - -export function vectorFromField(field, dictionaries) : Vector { - var dictionary = field.dictionary(), nullable = field.nullable(); - if (dictionary == null) { - var typeType = field.typeType(); - if (typeType === Type.List) { - var dataVector = vectorFromField(field.children(0), dictionaries); - return nullable ? new NullableListVector(field, dataVector) : new ListVector(field, dataVector); - } else if (typeType === Type.FixedSizeList) { - var dataVector = vectorFromField(field.children(0), dictionaries); - var size = field.type(new org.apache.arrow.flatbuf.FixedSizeList()).listSize(); - if (nullable) { - return new NullableFixedSizeListVector(field, size, dataVector); - } else { - return new FixedSizeListVector(field, size, dataVector); - } - } else if (typeType === Type.Struct_) { - var vectors : Vector[] = []; - for (var i : number = 0; i < field.childrenLength(); i += 1|0) { - vectors.push(vectorFromField(field.children(i), dictionaries)); - } - return new StructVector(field, vectors); - } else { - if (typeType === Type.Int) { - var type = field.type(new org.apache.arrow.flatbuf.Int()); - return _createIntVector(field, type.bitWidth(), type.isSigned(), nullable) - } else if (typeType === Type.FloatingPoint) { - var precision = field.type(new org.apache.arrow.flatbuf.FloatingPoint()).precision(); - if (precision == org.apache.arrow.flatbuf.Precision.SINGLE) { - return nullable ? new NullableFloat32Vector(field) : new Float32Vector(field); - } else if (precision == org.apache.arrow.flatbuf.Precision.DOUBLE) { - return nullable ? new NullableFloat64Vector(field) : new Float64Vector(field); - } else { - throw "Unimplemented FloatingPoint precision " + precision; - } - } else if (typeType === Type.Utf8) { - return nullable ? new NullableUtf8Vector(field) : new Utf8Vector(field); - } else if (typeType === Type.Date) { - return nullable ? new NullableDateVector(field) : new DateVector(field); - } else { - throw "Unimplemented type " + typeType; - } - } - } else { - // determine arrow type - default is signed 32 bit int - var type = dictionary.indexType(), bitWidth = 32, signed = true; - if (type != null) { - bitWidth = type.bitWidth(); - signed = type.isSigned(); - } - var indices = _createIntVector(field, bitWidth, signed, nullable); - return new DictionaryVector(field, indices, dictionaries[dictionary.id().toFloat64().toString()]); - } -} - -function _createIntVector(field, bitWidth, signed, nullable) { - if (bitWidth == 64) { - if (signed) { - return nullable ? new NullableInt64Vector(field) : new Int64Vector(field); - } else { - return nullable ? new NullableUint64Vector(field) : new Uint64Vector(field); - } - } else if (bitWidth == 32) { - if (signed) { - return nullable ? new NullableInt32Vector(field) : new Int32Vector(field); - } else { - return nullable ? new NullableUint32Vector(field) : new Uint32Vector(field); - } - } else if (bitWidth == 16) { - if (signed) { - return nullable ? new NullableInt16Vector(field) : new Int16Vector(field); - } else { - return nullable ? new NullableUint16Vector(field) : new Uint16Vector(field); - } - } else if (bitWidth == 8) { - if (signed) { - return nullable ? new NullableInt8Vector(field) : new Int8Vector(field); - } else { - return nullable ? new NullableUint8Vector(field) : new Uint8Vector(field); - } - } else { - throw "Unimplemented Int bit width " + bitWidth; - } -} http://git-wip-us.apache.org/repos/asf/arrow/blob/2d6453b2/js/package.json ---------------------------------------------------------------------- diff --git a/js/package.json b/js/package.json index 8687f50..1739e38 100644 --- a/js/package.json +++ b/js/package.json @@ -2,17 +2,21 @@ "name": "arrow", "version": "0.0.0", "description": "", - "main": "dist/arrow.js", + "main": "lib/arrow.js", "scripts": { - "postinstall": "./flatbuffers.sh", - "build": "./flatbuffers.sh && tsc && webpack", + "build": "./flatbuffers.sh && tsc && tsc -m es6 --outDir lib-esm && webpack", + "clean": "rm -rf lib lib-esm _bundles", "test": "./node_modules/mocha/bin/mocha ./spec/arrow.js" }, "author": "", + "repository": "https://github.com/apache/arrow/", "license": "Apache-2.0", "devDependencies": { + "awesome-typescript-loader": "^3.1.3", "chai": "^3.5.0", "mocha": "^3.3.0", + "typescript": "^2.3.2", + "uglifyjs-webpack-plugin": "^0.4.3", "webpack": "^2.3.3" }, "dependencies": { http://git-wip-us.apache.org/repos/asf/arrow/blob/2d6453b2/js/spec/arrow.js ---------------------------------------------------------------------- diff --git a/js/spec/arrow.js b/js/spec/arrow.js index 61a6f81..52c586b 100644 --- a/js/spec/arrow.js +++ b/js/spec/arrow.js @@ -19,7 +19,7 @@ var fs = require('fs'); var chai = require('chai'); var assert = chai.assert; var path= require('path'); -var arrow = require('../dist/arrow.js'); +var arrow = require('../lib/arrow.js'); test_files = [ { http://git-wip-us.apache.org/repos/asf/arrow/blob/2d6453b2/js/src/Arrow_generated.d.ts ---------------------------------------------------------------------- diff --git a/js/src/Arrow_generated.d.ts b/js/src/Arrow_generated.d.ts new file mode 100644 index 0000000..1f5b454 --- /dev/null +++ b/js/src/Arrow_generated.d.ts @@ -0,0 +1,5 @@ +export var org: { + apache: { + arrow: any + } +} http://git-wip-us.apache.org/repos/asf/arrow/blob/2d6453b2/js/src/arrow.ts ---------------------------------------------------------------------- diff --git a/js/src/arrow.ts b/js/src/arrow.ts new file mode 100644 index 0000000..74def4d --- /dev/null +++ b/js/src/arrow.ts @@ -0,0 +1,493 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +import { flatbuffers } from 'flatbuffers'; +import { org } from './Arrow_generated'; +import { vectorFromField, Vector } from './types'; + +import ByteBuffer = flatbuffers.ByteBuffer; +var Footer = org.apache.arrow.flatbuf.Footer; +var Message = org.apache.arrow.flatbuf.Message; +var MessageHeader = org.apache.arrow.flatbuf.MessageHeader; +var RecordBatch = org.apache.arrow.flatbuf.RecordBatch; +var DictionaryBatch = org.apache.arrow.flatbuf.DictionaryBatch; +var Schema = org.apache.arrow.flatbuf.Schema; +var Type = org.apache.arrow.flatbuf.Type; +var VectorType = org.apache.arrow.flatbuf.VectorType; + +export class ArrowReader { + + private bb; + private schema: any = []; + private vectors: Vector[]; + private vectorMap: any = {}; + private dictionaries: any = {}; + private batches: any = []; + private batchIndex: number = 0; + + constructor(bb, schema, vectors: Vector[], batches, dictionaries) { + this.bb = bb; + this.schema = schema; + this.vectors = vectors; + for (var i = 0; i < vectors.length; i += 1|0) { + this.vectorMap[vectors[i].name] = vectors[i] + } + this.batches = batches; + this.dictionaries = dictionaries; + } + + loadNextBatch() { + if (this.batchIndex < this.batches.length) { + var batch = this.batches[this.batchIndex]; + this.batchIndex += 1; + loadVectors(this.bb, this.vectors, batch); + return batch.length; + } else { + return 0; + } + } + + getSchema() { + return this.schema; + } + + getVectors() { + return this.vectors; + } + + getVector(name) { + return this.vectorMap[name]; + } + + getBatchCount() { + return this.batches.length; + } + + // the index of the next batch to be loaded + getBatchIndex() { + return this.batchIndex; + } + + // set the index of the next batch to be loaded + setBatchIndex(i: number) { + this.batchIndex = i; + } +} + +export function getSchema(buf) { return getReader(buf).getSchema(); } + +export function getReader(buf) : ArrowReader { + if (_checkMagic(buf, 0)) { + return getFileReader(buf); + } else { + return getStreamReader(buf); + } +} + +export function getStreamReader(buf) : ArrowReader { + var bb = new ByteBuffer(buf); + + var schema = _loadSchema(bb), + field, + vectors: Vector[] = [], + i,j, + iLen,jLen, + batch, + recordBatches = [], + dictionaryBatches = [], + dictionaries = {}; + + for (i = 0, iLen = schema.fieldsLength(); i < iLen; i += 1|0) { + field = schema.fields(i); + _createDictionaryVectors(field, dictionaries); + vectors.push(vectorFromField(field, dictionaries)); + } + + while (bb.position() < bb.capacity()) { + batch = _loadBatch(bb); + if (batch == null) { + break; + } else if (batch.type == MessageHeader.DictionaryBatch) { + dictionaryBatches.push(batch); + } else if (batch.type == MessageHeader.RecordBatch) { + recordBatches.push(batch) + } else { + console.error("Expected batch type" + MessageHeader.RecordBatch + " or " + + MessageHeader.DictionaryBatch + " but got " + batch.type); + } + } + + // load dictionary vectors + for (i = 0; i < dictionaryBatches.length; i += 1|0) { + batch = dictionaryBatches[i]; + loadVectors(bb, [dictionaries[batch.id]], batch); + } + + return new ArrowReader(bb, parseSchema(schema), vectors, recordBatches, dictionaries); +} + +export function getFileReader (buf) : ArrowReader { + var bb = new ByteBuffer(buf); + + var footer = _loadFooter(bb); + + var schema = footer.schema(); + var i, len, field, + vectors: Vector[] = [], + block, + batch, + recordBatchBlocks = [], + dictionaryBatchBlocks = [], + dictionaries = {}; + + for (i = 0, len = schema.fieldsLength(); i < len; i += 1|0) { + field = schema.fields(i); + _createDictionaryVectors(field, dictionaries); + vectors.push(vectorFromField(field, dictionaries)); + } + + for (i = 0; i < footer.dictionariesLength(); i += 1|0) { + block = footer.dictionaries(i); + dictionaryBatchBlocks.push({ + offset: block.offset().low, + metaDataLength: block.metaDataLength(), + bodyLength: block.bodyLength().low, + }) + } + + for (i = 0; i < footer.recordBatchesLength(); i += 1|0) { + block = footer.recordBatches(i); + recordBatchBlocks.push({ + offset: block.offset().low, + metaDataLength: block.metaDataLength(), + bodyLength: block.bodyLength().low, + }) + } + + var dictionaryBatches = dictionaryBatchBlocks.map(function (block) { + bb.setPosition(block.offset); + // TODO: Make sure this is a dictionary batch + return _loadBatch(bb); + }); + + var recordBatches = recordBatchBlocks.map(function (block) { + bb.setPosition(block.offset); + // TODO: Make sure this is a record batch + return _loadBatch(bb); + }); + + // load dictionary vectors + for (i = 0; i < dictionaryBatches.length; i += 1|0) { + batch = dictionaryBatches[i]; + loadVectors(bb, [dictionaries[batch.id]], batch); + } + + return new ArrowReader(bb, parseSchema(schema), vectors, recordBatches, dictionaries); +} + +function _loadFooter(bb) { + var fileLength: number = bb.bytes_.length; + + if (fileLength < MAGIC.length*2 + 4) { + console.error("file too small " + fileLength); + return; + } + + if (!_checkMagic(bb.bytes_, 0)) { + console.error("missing magic bytes at beginning of file") + return; + } + + if (!_checkMagic(bb.bytes_, fileLength - MAGIC.length)) { + console.error("missing magic bytes at end of file") + return; + } + + var footerLengthOffset: number = fileLength - MAGIC.length - 4; + bb.setPosition(footerLengthOffset); + var footerLength: number = Int32FromByteBuffer(bb, footerLengthOffset) + + if (footerLength <= 0 || footerLength + MAGIC.length*2 + 4 > fileLength) { + console.log("Invalid footer length: " + footerLength) + } + + var footerOffset: number = footerLengthOffset - footerLength; + bb.setPosition(footerOffset); + var footer = Footer.getRootAsFooter(bb); + + return footer; +} + +function _loadSchema(bb) { + var message =_loadMessage(bb); + if (message.headerType() != MessageHeader.Schema) { + console.error("Expected header type " + MessageHeader.Schema + " but got " + message.headerType()); + return; + } + return message.header(new Schema()); +} + +function _loadBatch(bb) { + var message = _loadMessage(bb); + if (message == null) { + return; + } else if (message.headerType() == MessageHeader.RecordBatch) { + var batch = { header: message.header(new RecordBatch()), length: message.bodyLength().low } + return _loadRecordBatch(bb, batch); + } else if (message.headerType() == MessageHeader.DictionaryBatch) { + var batch = { header: message.header(new DictionaryBatch()), length: message.bodyLength().low } + return _loadDictionaryBatch(bb, batch); + } else { + console.error("Expected header type " + MessageHeader.RecordBatch + " or " + MessageHeader.DictionaryBatch + + " but got " + message.headerType()); + return; + } +} + +function _loadRecordBatch(bb, batch) { + var data = batch.header; + var i, nodes_ = [], nodesLength = data.nodesLength(); + var buffer, buffers_ = [], buffersLength = data.buffersLength(); + + for (i = 0; i < nodesLength; i += 1) { + nodes_.push(data.nodes(i)); + } + for (i = 0; i < buffersLength; i += 1) { + buffer = data.buffers(i); + buffers_.push({ offset: bb.position() + buffer.offset().low, length: buffer.length().low }); + } + // position the buffer after the body to read the next message + bb.setPosition(bb.position() + batch.length); + + return { nodes: nodes_, buffers: buffers_, length: data.length().low, type: MessageHeader.RecordBatch }; +} + +function _loadDictionaryBatch(bb, batch) { + var id_ = batch.header.id().toFloat64().toString(), data = batch.header.data(); + var i, nodes_ = [], nodesLength = data.nodesLength(); + var buffer, buffers_ = [], buffersLength = data.buffersLength(); + + for (i = 0; i < nodesLength; i += 1) { + nodes_.push(data.nodes(i)); + } + for (i = 0; i < buffersLength; i += 1) { + buffer = data.buffers(i); + buffers_.push({ offset: bb.position() + buffer.offset().low, length: buffer.length().low }); + } + // position the buffer after the body to read the next message + bb.setPosition(bb.position() + batch.length); + + return { id: id_, nodes: nodes_, buffers: buffers_, length: data.length().low, type: MessageHeader.DictionaryBatch }; +} + +function _loadMessage(bb) { + var messageLength: number = Int32FromByteBuffer(bb, bb.position()); + if (messageLength == 0) { + return; + } + bb.setPosition(bb.position() + 4); + var message = Message.getRootAsMessage(bb); + // position the buffer at the end of the message so it's ready to read further + bb.setPosition(bb.position() + messageLength); + + return message; +} + +function _createDictionaryVectors(field, dictionaries) { + var encoding = field.dictionary(); + if (encoding != null) { + var id = encoding.id().toFloat64().toString(); + if (dictionaries[id] == null) { + // create a field for the dictionary + var dictionaryField = _createDictionaryField(id, field); + dictionaries[id] = vectorFromField(dictionaryField, null); + } + } + + // recursively examine child fields + for (var i = 0, len = field.childrenLength(); i < len; i += 1|0) { + _createDictionaryVectors(field.children(i), dictionaries); + } +} + +function _createDictionaryField(id, field) { + var builder = new flatbuffers.Builder(); + var nameOffset = builder.createString("dict-" + id); + + var typeType = field.typeType(); + var typeOffset; + if (typeType === Type.Int) { + var type = field.type(new org.apache.arrow.flatbuf.Int()); + org.apache.arrow.flatbuf.Int.startInt(builder); + org.apache.arrow.flatbuf.Int.addBitWidth(builder, type.bitWidth()); + org.apache.arrow.flatbuf.Int.addIsSigned(builder, type.isSigned()); + typeOffset = org.apache.arrow.flatbuf.Int.endInt(builder); + } else if (typeType === Type.FloatingPoint) { + var type = field.type(new org.apache.arrow.flatbuf.FloatingPoint()); + org.apache.arrow.flatbuf.FloatingPoint.startFloatingPoint(builder); + org.apache.arrow.flatbuf.FloatingPoint.addPrecision(builder, type.precision()); + typeOffset = org.apache.arrow.flatbuf.FloatingPoint.endFloatingPoint(builder); + } else if (typeType === Type.Utf8) { + org.apache.arrow.flatbuf.Utf8.startUtf8(builder); + typeOffset = org.apache.arrow.flatbuf.Utf8.endUtf8(builder); + } else if (typeType === Type.Date) { + var type = field.type(new org.apache.arrow.flatbuf.Date()); + org.apache.arrow.flatbuf.Date.startDate(builder); + org.apache.arrow.flatbuf.Date.addUnit(builder, type.unit()); + typeOffset = org.apache.arrow.flatbuf.Date.endDate(builder); + } else { + throw "Unimplemented dictionary type " + typeType; + } + if (field.childrenLength() > 0) { + throw "Dictionary encoded fields can't have children" + } + var childrenOffset = org.apache.arrow.flatbuf.Field.createChildrenVector(builder, []); + + var layout, layoutOffsets = []; + for (var i = 0, len = field.layoutLength(); i < len; i += 1|0) { + layout = field.layout(i); + org.apache.arrow.flatbuf.VectorLayout.startVectorLayout(builder); + org.apache.arrow.flatbuf.VectorLayout.addBitWidth(builder, layout.bitWidth()); + org.apache.arrow.flatbuf.VectorLayout.addType(builder, layout.type()); + layoutOffsets.push(org.apache.arrow.flatbuf.VectorLayout.endVectorLayout(builder)); + } + var layoutOffset = org.apache.arrow.flatbuf.Field.createLayoutVector(builder, layoutOffsets); + + org.apache.arrow.flatbuf.Field.startField(builder); + org.apache.arrow.flatbuf.Field.addName(builder, nameOffset); + org.apache.arrow.flatbuf.Field.addNullable(builder, field.nullable()); + org.apache.arrow.flatbuf.Field.addTypeType(builder, typeType); + org.apache.arrow.flatbuf.Field.addType(builder, typeOffset); + org.apache.arrow.flatbuf.Field.addChildren(builder, childrenOffset); + org.apache.arrow.flatbuf.Field.addLayout(builder, layoutOffset); + var offset = org.apache.arrow.flatbuf.Field.endField(builder); + builder.finish(offset); + + return org.apache.arrow.flatbuf.Field.getRootAsField(builder.bb); +} + +function Int32FromByteBuffer(bb, offset) { + return ((bb.bytes_[offset + 3] & 255) << 24) | + ((bb.bytes_[offset + 2] & 255) << 16) | + ((bb.bytes_[offset + 1] & 255) << 8) | + ((bb.bytes_[offset] & 255)); +} + +var MAGIC_STR = "ARROW1"; +var MAGIC = new Uint8Array(MAGIC_STR.length); +for (var i = 0; i < MAGIC_STR.length; i += 1|0) { + MAGIC[i] = MAGIC_STR.charCodeAt(i); +} + +function _checkMagic(buf, index) { + for (var i = 0; i < MAGIC.length; i += 1|0) { + if (MAGIC[i] != buf[index + i]) { + return false; + } + } + return true; +} + +var TYPEMAP = {} +TYPEMAP[Type.NONE] = "NONE"; +TYPEMAP[Type.Null] = "Null"; +TYPEMAP[Type.Int] = "Int"; +TYPEMAP[Type.FloatingPoint] = "FloatingPoint"; +TYPEMAP[Type.Binary] = "Binary"; +TYPEMAP[Type.Utf8] = "Utf8"; +TYPEMAP[Type.Bool] = "Bool"; +TYPEMAP[Type.Decimal] = "Decimal"; +TYPEMAP[Type.Date] = "Date"; +TYPEMAP[Type.Time] = "Time"; +TYPEMAP[Type.Timestamp] = "Timestamp"; +TYPEMAP[Type.Interval] = "Interval"; +TYPEMAP[Type.List] = "List"; +TYPEMAP[Type.FixedSizeList] = "FixedSizeList"; +TYPEMAP[Type.Struct_] = "Struct"; +TYPEMAP[Type.Union] = "Union"; + +var VECTORTYPEMAP = {}; +VECTORTYPEMAP[VectorType.OFFSET] = 'OFFSET'; +VECTORTYPEMAP[VectorType.DATA] = 'DATA'; +VECTORTYPEMAP[VectorType.VALIDITY] = 'VALIDITY'; +VECTORTYPEMAP[VectorType.TYPE] = 'TYPE'; + +function parseField(field) { + var children = []; + for (var i = 0; i < field.childrenLength(); i += 1|0) { + children.push(parseField(field.children(i))); + } + + var layouts = []; + for (var i = 0; i < field.layoutLength(); i += 1|0) { + layouts.push(VECTORTYPEMAP[field.layout(i).type()]); + } + + return { + name: field.name(), + nullable: field.nullable(), + type: TYPEMAP[field.typeType()], + children: children, + layout: layouts + }; +} + +function parseSchema(schema) { + var result = []; + var this_result, type; + for (var i = 0, len = schema.fieldsLength(); i < len; i += 1|0) { + result.push(parseField(schema.fields(i))); + } + return result; +} + +function loadVectors(bb, vectors: Vector[], recordBatch) { + var indices = { bufferIndex: 0, nodeIndex: 0 }, i; + for (i = 0; i < vectors.length; i += 1) { + loadVector(bb, vectors[i], recordBatch, indices); + } +} + +/** + * Loads a vector with data from a batch + * recordBatch: { nodes: org.apache.arrow.flatbuf.FieldNode[], buffers: { offset: number, length: number }[] } + */ +function loadVector(bb, vector: Vector, recordBatch, indices) { + var node = recordBatch.nodes[indices.nodeIndex], ownBuffersLength, ownBuffers = [], i; + indices.nodeIndex += 1; + + // dictionary vectors are always ints, so will have a data vector plus optional null vector + if (vector.field.dictionary() == null) { + ownBuffersLength = vector.field.layoutLength(); + } else if (vector.field.nullable()) { + ownBuffersLength = 2; + } else { + ownBuffersLength = 1; + } + + for (i = 0; i < ownBuffersLength; i += 1) { + ownBuffers.push(recordBatch.buffers[indices.bufferIndex + i]); + } + indices.bufferIndex += ownBuffersLength; + + vector.loadData(bb, node, ownBuffers); + + var children = vector.getChildVectors(); + for (i = 0; i < children.length; i++) { + loadVector(bb, children[i], recordBatch, indices); + } +} http://git-wip-us.apache.org/repos/asf/arrow/blob/2d6453b2/js/src/bitarray.ts ---------------------------------------------------------------------- diff --git a/js/src/bitarray.ts b/js/src/bitarray.ts new file mode 100644 index 0000000..fc3c091 --- /dev/null +++ b/js/src/bitarray.ts @@ -0,0 +1,42 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +export class BitArray { + private view: Uint8Array; + + constructor(buffer: ArrayBuffer, offset: number, length: number) { + this.view = new Uint8Array(buffer, offset || 0, Math.ceil(length / 8)); + } + + get(i) { + var index = (i >> 3) | 0; // | 0 converts to an int. Math.floor works too. + var bit = i % 8; // i % 8 is just as fast as i & 7 + return (this.view[index] & (1 << bit)) !== 0; + } + + set(i) { + var index = (i >> 3) | 0; + var bit = i % 8; + this.view[index] |= 1 << bit; + } + + unset(i) { + var index = (i >> 3) | 0; + var bit = i % 8; + this.view[index] &= ~(1 << bit); + } +} http://git-wip-us.apache.org/repos/asf/arrow/blob/2d6453b2/js/src/types.ts ---------------------------------------------------------------------- diff --git a/js/src/types.ts b/js/src/types.ts new file mode 100644 index 0000000..d656c6a --- /dev/null +++ b/js/src/types.ts @@ -0,0 +1,589 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +import { BitArray } from './bitarray'; +import { TextDecoder } from 'text-encoding'; +import { org } from './Arrow_generated'; + +var Type = org.apache.arrow.flatbuf.Type; + +interface ArrayView { + slice(start: number, end: number) : ArrayView + toString() : string +} + +export abstract class Vector { + field: any; + name: string; + length: number; + null_count: number; + + constructor(field) { + this.field = field; + this.name = field.name(); + } + + /* Access datum at index i */ + abstract get(i); + /* Return array representing data in the range [start, end) */ + abstract slice(start: number, end: number); + /* Return array of child vectors, for container types */ + abstract getChildVectors(); + + /** + * Use recordBatch fieldNodes and Buffers to construct this Vector + * bb: flatbuffers.ByteBuffer + * node: org.apache.arrow.flatbuf.FieldNode + * buffers: { offset: number, length: number }[] + */ + public loadData(bb, node, buffers) { + this.length = node.length().low; + this.null_count = node.nullCount().low; + this.loadBuffers(bb, node, buffers); + } + + protected abstract loadBuffers(bb, node, buffers); + + /** + * Helper function for loading a VALIDITY buffer (for Nullable types) + * bb: flatbuffers.ByteBuffer + * buffer: org.apache.arrow.flatbuf.Buffer + */ + static loadValidityBuffer(bb, buffer) : BitArray { + var arrayBuffer = bb.bytes_.buffer; + var offset = bb.bytes_.byteOffset + buffer.offset; + return new BitArray(arrayBuffer, offset, buffer.length * 8); + } + + /** + * Helper function for loading an OFFSET buffer + * buffer: org.apache.arrow.flatbuf.Buffer + */ + static loadOffsetBuffer(bb, buffer) : Int32Array { + var arrayBuffer = bb.bytes_.buffer; + var offset = bb.bytes_.byteOffset + buffer.offset; + var length = buffer.length / Int32Array.BYTES_PER_ELEMENT; + return new Int32Array(arrayBuffer, offset, length); + } + +} + +class SimpleVector<T extends ArrayView> extends Vector { + protected dataView: T; + private TypedArray: { new(buffer: any, offset: number, length: number): T, BYTES_PER_ELEMENT: number }; + + constructor (field, TypedArray: { new(buffer: any, offset: number, length: number): T, BYTES_PER_ELEMENT: number }) { + super(field); + this.TypedArray = TypedArray; + } + + getChildVectors() { + return []; + } + + get(i) { + return this.dataView[i]; + } + + loadBuffers(bb, node, buffers) { + this.loadDataBuffer(bb, buffers[0]); + } + + /** + * buffer: org.apache.arrow.flatbuf.Buffer + */ + protected loadDataBuffer(bb, buffer) { + var arrayBuffer = bb.bytes_.buffer; + var offset = bb.bytes_.byteOffset + buffer.offset; + var length = buffer.length / this.TypedArray.BYTES_PER_ELEMENT; + this.dataView = new this.TypedArray(arrayBuffer, offset, length); + } + + getDataView() { + return this.dataView; + } + + toString() { + return this.dataView.toString(); + } + + slice(start, end) { + return this.dataView.slice(start, end); + } +} + +class NullableSimpleVector<T extends ArrayView> extends SimpleVector<T> { + + protected validityView: BitArray; + + get(i: number) { + if (this.validityView.get(i)) { + return this.dataView[i]; + } else { + return null; + } + } + + loadBuffers(bb, node, buffers) { + this.validityView = Vector.loadValidityBuffer(bb, buffers[0]); + this.loadDataBuffer(bb, buffers[1]); + } + + getValidityVector() { + return this.validityView; + } +} + +class Uint8Vector extends SimpleVector<Uint8Array> { constructor(field) { super(field, Uint8Array); }; } +class Uint16Vector extends SimpleVector<Uint16Array> { constructor(field) { super(field, Uint16Array); }; } +class Uint32Vector extends SimpleVector<Uint32Array> { constructor(field) { super(field, Uint32Array); }; } +class Int8Vector extends SimpleVector<Uint8Array> { constructor(field) { super(field, Uint8Array); }; } +class Int16Vector extends SimpleVector<Uint16Array> { constructor(field) { super(field, Uint16Array); }; } +class Int32Vector extends SimpleVector<Uint32Array> { constructor(field) { super(field, Uint32Array); }; } +class Float32Vector extends SimpleVector<Float32Array> { constructor(field) { super(field, Float32Array); }; } +class Float64Vector extends SimpleVector<Float64Array> { constructor(field) { super(field, Float64Array); }; } + +class NullableUint8Vector extends NullableSimpleVector<Uint8Array> { constructor(field) { super(field, Uint8Array); }; } +class NullableUint16Vector extends NullableSimpleVector<Uint16Array> { constructor(field) { super(field, Uint16Array); }; } +class NullableUint32Vector extends NullableSimpleVector<Uint32Array> { constructor(field) { super(field, Uint32Array); }; } +class NullableInt8Vector extends NullableSimpleVector<Uint8Array> { constructor(field) { super(field, Uint8Array); }; } +class NullableInt16Vector extends NullableSimpleVector<Uint16Array> { constructor(field) { super(field, Uint16Array); }; } +class NullableInt32Vector extends NullableSimpleVector<Uint32Array> { constructor(field) { super(field, Uint32Array); }; } +class NullableFloat32Vector extends NullableSimpleVector<Float32Array> { constructor(field) { super(field, Float32Array); }; } +class NullableFloat64Vector extends NullableSimpleVector<Float64Array> { constructor(field) { super(field, Float64Array); }; } + +class Uint64Vector extends SimpleVector<Uint32Array> { + constructor(field) { + super(field, Uint32Array); + } + + get(i: number) { + return { low: this.dataView[i * 2], high: this.dataView[(i * 2) + 1] }; + } +} + +class NullableUint64Vector extends NullableSimpleVector<Uint32Array> { + constructor(field) { + super(field, Uint32Array); + } + + get(i: number) { + if (this.validityView.get(i)) { + return { low: this.dataView[i * 2], high: this.dataView[(i * 2) + 1] }; + } else { + return null; + } + } +} + +class Int64Vector extends NullableSimpleVector<Uint32Array> { + constructor(field) { + super(field, Uint32Array); + } + + get(i: number) { + return { low: this.dataView[i * 2], high: this.dataView[(i * 2) + 1] }; + } +} + +class NullableInt64Vector extends NullableSimpleVector<Uint32Array> { + constructor(field) { + super(field, Uint32Array); + } + + get(i: number) { + if (this.validityView.get(i)) { + return { low: this.dataView[i * 2], high: this.dataView[(i * 2) + 1] }; + } else { + return null; + } + } +} + +class DateVector extends SimpleVector<Uint32Array> { + constructor(field) { + super(field, Uint32Array); + } + + get (i) { + return new Date(super.get(2*i+1)*Math.pow(2,32) + super.get(2*i)); + } +} + +class NullableDateVector extends DateVector { + private validityView: BitArray; + + loadBuffers(bb, node, buffers) { + this.validityView = Vector.loadValidityBuffer(bb, buffers[0]); + this.loadDataBuffer(bb, buffers[1]); + } + + get (i) { + if (this.validityView.get(i)) { + return super.get(i); + } else { + return null; + } + } + + getValidityVector() { + return this.validityView; + } +} + +class Utf8Vector extends SimpleVector<Uint8Array> { + protected offsetView: Int32Array; + static decoder: TextDecoder = new TextDecoder('utf8'); + + constructor(field) { + super(field, Uint8Array); + } + + loadBuffers(bb, node, buffers) { + this.offsetView = Vector.loadOffsetBuffer(bb, buffers[0]); + this.loadDataBuffer(bb, buffers[1]); + } + + get(i) { + return Utf8Vector.decoder.decode(this.dataView.slice(this.offsetView[i], this.offsetView[i + 1])); + } + + slice(start: number, end: number) { + var result: string[] = []; + for (var i: number = start; i < end; i += 1|0) { + result.push(this.get(i)); + } + return result; + } + + getOffsetView() { + return this.offsetView; + } +} + +class NullableUtf8Vector extends Utf8Vector { + private validityView: BitArray; + + loadBuffers(bb, node, buffers) { + this.validityView = Vector.loadValidityBuffer(bb, buffers[0]); + this.offsetView = Vector.loadOffsetBuffer(bb, buffers[1]); + this.loadDataBuffer(bb, buffers[2]); + } + + get(i) { + if (this.validityView.get(i)) { + return super.get(i); + } else { + return null; + } + } + + getValidityVector() { + return this.validityView; + } +} + +// Nested Types +class ListVector extends Uint32Vector { + private dataVector: Vector; + + constructor(field, dataVector: Vector) { + super(field); + this.dataVector = dataVector; + } + + getChildVectors() { + return [this.dataVector]; + } + + loadBuffers(bb, node, buffers) { + super.loadBuffers(bb, node, buffers); + this.length -= 1; + } + + get(i) { + var offset = super.get(i) + if (offset === null) { + return null; + } + var next_offset = super.get(i + 1) + return this.dataVector.slice(offset, next_offset) + } + + toString() { + return "length: " + (this.length); + } + + slice(start: number, end: number) { + var result = []; + for (var i = start; i < end; i += 1|0) { + result.push(this.get(i)); + } + return result; + } +} + +class NullableListVector extends ListVector { + private validityView: BitArray; + + loadBuffers(bb, node, buffers) { + this.validityView = Vector.loadValidityBuffer(bb, buffers[0]); + this.loadDataBuffer(bb, buffers[1]); + this.length -= 1; + } + + get(i) { + if (this.validityView.get(i)) { + return super.get(i); + } else { + return null; + } + } + + getValidityVector() { + return this.validityView; + } +} + +class FixedSizeListVector extends Vector { + private size: number + private dataVector: Vector; + + constructor(field, size: number, dataVector: Vector) { + super(field); + this.size = size; + this.dataVector = dataVector; + } + + getChildVectors() { + return [this.dataVector]; + } + + loadBuffers(bb, node, buffers) { + // no buffers to load + } + + get(i: number) { + return this.dataVector.slice(i * this.size, (i + 1) * this.size); + } + + slice(start : number, end : number) { + var result = []; + for (var i = start; i < end; i += 1|0) { + result.push(this.get(i)); + } + return result; + } + + getListSize() { + return this.size; + } +} + +class NullableFixedSizeListVector extends FixedSizeListVector { + private validityView: BitArray; + + loadBuffers(bb, node, buffers) { + this.validityView = Vector.loadValidityBuffer(bb, buffers[0]); + } + + get(i: number) { + if (this.validityView.get(i)) { + return super.get(i); + } else { + return null; + } + } + + getValidityVector() { + return this.validityView; + } +} + +class StructVector extends Vector { + private validityView: BitArray; + private vectors: Vector[]; + + constructor(field, vectors: Vector[]) { + super(field); + this.vectors = vectors; + } + + getChildVectors() { + return this.vectors; + } + + loadBuffers(bb, node, buffers) { + this.validityView = Vector.loadValidityBuffer(bb, buffers[0]); + } + + get(i : number) { + if (this.validityView.get(i)) { + return this.vectors.map((v: Vector) => v.get(i)); + } else { + return null; + } + } + + slice(start : number, end : number) { + var result = []; + for (var i = start; i < end; i += 1|0) { + result.push(this.get(i)); + } + return result; + } + + getValidityVector() { + return this.validityView; + } +} + +class DictionaryVector extends Vector { + + private indices: Vector; + private dictionary: Vector; + + constructor (field, indices: Vector, dictionary: Vector) { + super(field); + this.indices = indices; + this.dictionary = dictionary; + } + + get(i) { + var encoded = this.indices.get(i); + if (encoded == null) { + return null; + } else { + return this.dictionary.get(encoded); + } + } + + /** Get the dictionary encoded value */ + public getEncoded(i) { + return this.indices.get(i); + } + + slice(start, end) { + return this.indices.slice(start, end); // TODO decode + } + + getChildVectors() { + return this.indices.getChildVectors(); + } + + loadBuffers(bb, node, buffers) { + this.indices.loadData(bb, node, buffers); + } + + /** Get the index (encoded) vector */ + public getIndexVector() { + return this.indices; + } + + /** Get the dictionary vector */ + public getDictionaryVector() { + return this.dictionary; + } + + toString() { + return this.indices.toString(); + } +} + +export function vectorFromField(field, dictionaries) : Vector { + var dictionary = field.dictionary(), nullable = field.nullable(); + if (dictionary == null) { + var typeType = field.typeType(); + if (typeType === Type.List) { + var dataVector = vectorFromField(field.children(0), dictionaries); + return nullable ? new NullableListVector(field, dataVector) : new ListVector(field, dataVector); + } else if (typeType === Type.FixedSizeList) { + var dataVector = vectorFromField(field.children(0), dictionaries); + var size = field.type(new org.apache.arrow.flatbuf.FixedSizeList()).listSize(); + if (nullable) { + return new NullableFixedSizeListVector(field, size, dataVector); + } else { + return new FixedSizeListVector(field, size, dataVector); + } + } else if (typeType === Type.Struct_) { + var vectors : Vector[] = []; + for (var i : number = 0; i < field.childrenLength(); i += 1|0) { + vectors.push(vectorFromField(field.children(i), dictionaries)); + } + return new StructVector(field, vectors); + } else { + if (typeType === Type.Int) { + var type = field.type(new org.apache.arrow.flatbuf.Int()); + return _createIntVector(field, type.bitWidth(), type.isSigned(), nullable) + } else if (typeType === Type.FloatingPoint) { + var precision = field.type(new org.apache.arrow.flatbuf.FloatingPoint()).precision(); + if (precision == org.apache.arrow.flatbuf.Precision.SINGLE) { + return nullable ? new NullableFloat32Vector(field) : new Float32Vector(field); + } else if (precision == org.apache.arrow.flatbuf.Precision.DOUBLE) { + return nullable ? new NullableFloat64Vector(field) : new Float64Vector(field); + } else { + throw "Unimplemented FloatingPoint precision " + precision; + } + } else if (typeType === Type.Utf8) { + return nullable ? new NullableUtf8Vector(field) : new Utf8Vector(field); + } else if (typeType === Type.Date) { + return nullable ? new NullableDateVector(field) : new DateVector(field); + } else { + throw "Unimplemented type " + typeType; + } + } + } else { + // determine arrow type - default is signed 32 bit int + var type = dictionary.indexType(), bitWidth = 32, signed = true; + if (type != null) { + bitWidth = type.bitWidth(); + signed = type.isSigned(); + } + var indices = _createIntVector(field, bitWidth, signed, nullable); + return new DictionaryVector(field, indices, dictionaries[dictionary.id().toFloat64().toString()]); + } +} + +function _createIntVector(field, bitWidth, signed, nullable) { + if (bitWidth == 64) { + if (signed) { + return nullable ? new NullableInt64Vector(field) : new Int64Vector(field); + } else { + return nullable ? new NullableUint64Vector(field) : new Uint64Vector(field); + } + } else if (bitWidth == 32) { + if (signed) { + return nullable ? new NullableInt32Vector(field) : new Int32Vector(field); + } else { + return nullable ? new NullableUint32Vector(field) : new Uint32Vector(field); + } + } else if (bitWidth == 16) { + if (signed) { + return nullable ? new NullableInt16Vector(field) : new Int16Vector(field); + } else { + return nullable ? new NullableUint16Vector(field) : new Uint16Vector(field); + } + } else if (bitWidth == 8) { + if (signed) { + return nullable ? new NullableInt8Vector(field) : new Int8Vector(field); + } else { + return nullable ? new NullableUint8Vector(field) : new Uint8Vector(field); + } + } else { + throw "Unimplemented Int bit width " + bitWidth; + } +} http://git-wip-us.apache.org/repos/asf/arrow/blob/2d6453b2/js/tsconfig.json ---------------------------------------------------------------------- diff --git a/js/tsconfig.json b/js/tsconfig.json index 89c31ef..f2ad0e8 100644 --- a/js/tsconfig.json +++ b/js/tsconfig.json @@ -1,14 +1,14 @@ { "compilerOptions": { - "outDir": "./dist/", - "allowJs": true, - "target": "es5", "module": "commonjs", - "moduleResolution": "node" + "target": "es5", + "lib": ["es2015", "dom"], + "outDir": "lib", + "moduleResolution": "node", + "sourceMap": true, + "declaration": true }, "include": [ - "typings/index.d.ts", - "lib/*.js", - "lib/*.ts" + "src/*.ts" ] }