Djjanks commented on code in PR #14:
URL: https://github.com/apache/arrow-js/pull/14#discussion_r2345306175
##########
src/ipc/reader.ts:
##########
@@ -354,12 +358,31 @@ abstract class RecordBatchReaderImpl<T extends TypeMap =
any> implements RecordB
return this;
}
- protected _loadRecordBatch(header: metadata.RecordBatch, body: any) {
- const children = this._loadVectors(header, body, this.schema.fields);
+ protected _loadRecordBatch(header: metadata.RecordBatch, body:
Uint8Array): RecordBatch<T> {
+ let children: Data<any>[];
+ if (header.compression != null) {
+ const codec = compressionRegistry.get(header.compression.type);
+ if (codec?.decode && typeof codec.decode === 'function') {
+ const { decommpressedBody, buffers } =
this._decompressBuffers(header, body, codec);
+ children = this._loadCompressedVectors(header,
decommpressedBody, this.schema.fields);
+ header = new metadata.RecordBatch(
+ header.length,
+ header.nodes,
+ buffers,
+ null
+ );
+ } else {
+ throw new Error('Record batch is compressed but codec not
found');
+ }
+ } else {
+ children = this._loadVectors(header, body, this.schema.fields);
+ }
+
const data = makeData({ type: new Struct(this.schema.fields), length:
header.length, children });
return new RecordBatch(this.schema, data);
}
- protected _loadDictionaryBatch(header: metadata.DictionaryBatch, body:
any) {
+
+ protected _loadDictionaryBatch(header: metadata.DictionaryBatch, body:
Uint8Array) {
Review Comment:
I’m using this in a web application. Here’s how you can try it out in your
own project:
### 1. Clone the fork and switch to the feature branch:
``` bash
git clone https://github.com/Djjanks/arrow-js.git
cd arrow-js
git checkout feature/arrow-compression
```
### 2. Build the package (Linux or WSL recommended):
```
yarn install
yarn build
```
### 3. Link the library locally (assuming you use npm):
```
cd targets/apache-arrow/
npm link
```
### 4. Link it in your project:
```
npm uninstall apache-arrow
npm link apache-arrow
```
### 5. Register the codec you want to use.
For example, with LZ4 (see tests or the PR for more examples):
``` ts
import { Codec, compressionRegistry } from 'apache-arrow';
import * as lz4 from 'lz4js';
const lz4Codec: Codec = {
encode(data: Uint8Array): Uint8Array { return lz4js.compress(data) },
decode(data: Uint8Array): Uint8Array { return lz4js.decompress(data) }
};
compressionRegistry.set(CompressionType.LZ4_FRAME, lz4Codec);
```
It’s not the most convenient setup, but it’s enough to experiment with
compression support right now.
For a cleaner workflow, it’s better to wait until the PR is merged into the
main repo.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]