[ 
https://issues.apache.org/jira/browse/ARROW-1693?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=16250733#comment-16250733
 ] 

ASF GitHub Bot commented on ARROW-1693:
---------------------------------------

trxcllnt commented on a change in pull request #1294: ARROW-1693: [JS] Fix 
reading C++ dictionary-encoded vectors
URL: https://github.com/apache/arrow/pull/1294#discussion_r150726479
 
 

 ##########
 File path: js/gulp/test-task.js
 ##########
 @@ -42,3 +54,78 @@ const testTask = ((cache, execArgv, testOptions) => 
memoizeTask(cache, function
 
 module.exports = testTask;
 module.exports.testTask = testTask;
+module.exports.cleanTestData = cleanTestData;
+module.exports.createTestData = createTestData;
+
+async function cleanTestData() {
+    return await del([
+        `${path.resolve('./test/arrows/cpp')}/**`,
+        `${path.resolve('./test/arrows/java')}/**`,
+    ]);
+}
+
+async function createTestData() {
+    const base = path.resolve('./test/arrows');
+    await mkdirp(path.join(base, 'cpp/file'));
+    await mkdirp(path.join(base, 'java/file'));
+    await mkdirp(path.join(base, 'cpp/stream'));
+    await mkdirp(path.join(base, 'java/stream'));
+    const errors = [];
+    const names = await glob(path.join(base, 'json/*.json'));
+    for (let jsonPath of names) {
+        const name = path.parse(path.basename(jsonPath)).name;
+        const arrowCppFilePath = path.join(base, 'cpp/file', `${name}.arrow`);
+        const arrowJavaFilePath = path.join(base, 'java/file', 
`${name}.arrow`);
+        const arrowCppStreamPath = path.join(base, 'cpp/stream', 
`${name}.arrow`);
+        const arrowJavaStreamPath = path.join(base, 'java/stream', 
`${name}.arrow`);
+        try {
+            await generateCPPFile(jsonPath, arrowCppFilePath);
+            await generateCPPStream(arrowCppFilePath, arrowCppStreamPath);
+        } catch (e) { errors.push(e.message); }
+        try {
+            await generateJavaFile(jsonPath, arrowJavaFilePath);
+            await generateJavaStream(arrowJavaFilePath, arrowJavaStreamPath);
+        } catch (e) { errors.push(e.message); }
+    }
+    if (errors.length) {
+        console.error(errors.join(`\n`));
+        process.exit(1);
+    }
+}
+
+async function generateCPPFile(jsonPath, filePath) {
+    await rimraf(filePath);
+    return await exec(
+        `../cpp/build/release/json-integration-test ${
+        `--integration --mode=JSON_TO_ARROW`} ${
+        `--json=${path.resolve(jsonPath)} --arrow=${filePath}`}`,
+        { maxBuffer: Math.pow(2, 53) - 1 }
+    );
+}
+
+async function generateCPPStream(filePath, streamPath) {
+    await rimraf(streamPath);
+    return await exec(
+        `../cpp/build/release/file-to-stream ${filePath} > ${streamPath}`,
 
 Review comment:
   I included this in my [response 
below](https://github.com/apache/arrow/pull/1294#discussion_r150721453)

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
[email protected]


> [JS] Error reading dictionary-encoded integration test files
> ------------------------------------------------------------
>
>                 Key: ARROW-1693
>                 URL: https://issues.apache.org/jira/browse/ARROW-1693
>             Project: Apache Arrow
>          Issue Type: Bug
>          Components: JavaScript
>            Reporter: Brian Hulette
>            Assignee: Brian Hulette
>              Labels: pull-request-available
>             Fix For: 0.8.0
>
>         Attachments: dictionary-cpp.arrow, dictionary-java.arrow, 
> dictionary.json
>
>
> The JS implementation crashes when reading the dictionary test case from the 
> integration tests.
> To replicate, first generate the test files with java and cpp impls:
> {code}
> $ cd ${ARROW_HOME}/integration/
> $ python -c 'from integration_test import generate_dictionary_case; 
> generate_dictionary_case().write("dictionary.json")'
> $ ../cpp/debug/debug/json-integration-test --integration 
> --json=dictionary.json --arrow=dictionary-cpp.arrow --mode=JSON_TO_ARROW
> $ java -cp 
> ../java/tools/target/arrow-tools-0.8.0-SNAPSHOT-jar-with-dependencies.jar 
> org.apache.arrow.tools.Integration -c JSON_TO_ARROW -a dictionary-java.arrow 
> -j dictionary.json
> {code}
> Attempt to read the files with the JS impl:
> {code}
> $ cd ${ARROW_HOME}/js/
> $ ./bin/arrow2csv.js -s dict1_0 -f ../integration/dictionary-{java,cpp}.arrow
> {code}
> Both files result in an error for me on 
> [a8f51858|https://github.com/apache/arrow/commit/a8f518588fda471b2e3cc8e0f0064e7c4bb99899]:
> {{TypeError: Cannot read property 'buffer' of undefined}}



--
This message was sent by Atlassian JIRA
(v6.4.14#64029)

Reply via email to