[GitHub] [tinkerpop] tkolanko commented on a change in pull request #1539: TINKERPOP-2679 update javascript driver to support stream processing

2022-03-03 Thread GitBox


tkolanko commented on a change in pull request #1539:
URL: https://github.com/apache/tinkerpop/pull/1539#discussion_r818659117



##
File path: 
gremlin-javascript/src/main/javascript/gremlin-javascript/lib/driver/client.js
##
@@ -46,13 +45,13 @@ class Client {
*/
   constructor(url, options = {}) {
 this._options = options;
-if (this._options.processor === 'session') {
+if (this._options.processor === "session") {

Review comment:
   Yeah, it's most likely my editor. I'll get these fixed up
   
   Would you be willing to accept a seperate pr that enabled eslint with airbnb 
config + prettier and just accept it's formatting?




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: commits-unsubscr...@tinkerpop.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org




[GitHub] [tinkerpop] tkolanko commented on a change in pull request #1539: TINKERPOP-2679 update javascript driver to support stream processing

2022-03-03 Thread GitBox


tkolanko commented on a change in pull request #1539:
URL: https://github.com/apache/tinkerpop/pull/1539#discussion_r818658387



##
File path: 
gremlin-javascript/src/main/javascript/gremlin-javascript/lib/driver/client.js
##
@@ -73,41 +72,118 @@ class Client {
 return this._connection.isOpen;
   }
 
+  /**
+   * Configuration specific to the current request.
+   * @typedef {Object} RequestOptions
+   * @property {String} requestId - User specified request identifier which 
must be a UUID.
+   * @property {Number} batchSize - Indicates whether the Power component is 
present.
+   * @property {String} userAgent - The size in which the result of a request 
is to be "batched" back to the client
+   * @property {Number} evaluationTimeout - The timeout for the evaluation of 
the request.
+   */
+
   /**
* Send a request to the Gremlin Server, can send a script or bytecode steps.
* @param {Bytecode|string} message The bytecode or script to send
* @param {Object} [bindings] The script bindings, if any.
-   * @param {Object} [requestOptions] Configuration specific to the current 
request.
-   * @param {String} [requestOptions.requestId] User specified request 
identifier which must be a UUID.
-   * @param {Number} [requestOptions.batchSize] The size in which the result 
of a request is to be "batched" back to the client
-   * @param {String} [requestOptions.userAgent] A custom string that specifies 
to the server where the request came from.
-   * @param {Number} [requestOptions.evaluationTimeout] The timeout for the 
evaluation of the request.
+   * @param {RequestOptions} [requestOptions] Configuration specific to the 
current request.
* @returns {Promise}
*/
   submit(message, bindings, requestOptions) {
-const requestIdOverride = requestOptions && requestOptions.requestId
-if (requestIdOverride) delete requestOptions['requestId'];
+const requestIdOverride = requestOptions && requestOptions.requestId;
+if (requestIdOverride) delete requestOptions["requestId"];
+
+const args = Object.assign(
+  {
+gremlin: message,
+aliases: { g: this._options.traversalSource || "g" },
+  },
+  requestOptions
+);
+
+if (this._options.session && this._options.processor === "session") {
+  args["session"] = this._options.session;
+}
+
+if (message instanceof Bytecode) {
+  if (this._options.session && this._options.processor === "session") {
+return this._connection.submit(
+  "session",
+  "bytecode",
+  args,
+  requestIdOverride
+);
+  } else {
+return this._connection.submit(
+  "traversal",
+  "bytecode",
+  args,
+  requestIdOverride
+);
+  }
+} else if (typeof message === "string") {
+  args["bindings"] = bindings;
+  args["language"] = "gremlin-groovy";
+  args["accept"] = this._connection.mimeType;
+  return this._connection.submit(
+this._options.processor || "",
+"eval",
+args,
+requestIdOverride
+  );
+} else {
+  throw new TypeError("message must be of type Bytecode or string");
+}
+  }
+
+  /**
+   * Send a request to the Gremlin Server and receive a stream for the 
results, can send a script or bytecode steps.
+   * @param {Bytecode|string} message The bytecode or script to send
+   * @param {Object} [bindings] The script bindings, if any.
+   * @param {RequestOptions} [requestOptions] Configuration specific to the 
current request.
+   * @returns {ReadableStream}
+   */
+  stream(message, bindings, requestOptions) {
+const requestIdOverride = requestOptions && requestOptions.requestId;
+if (requestIdOverride) delete requestOptions["requestId"];
 
-const args = Object.assign({
-  gremlin: message,
-  aliases: { 'g': this._options.traversalSource || 'g' }
-}, requestOptions)
+const args = Object.assign(
+  {
+gremlin: message,
+aliases: { g: this._options.traversalSource || "g" },
+  },
+  requestOptions
+);
 
-if (this._options.session && this._options.processor === 'session') {
-  args['session'] = this._options.session;
+if (this._options.session && this._options.processor === "session") {
+  args["session"] = this._options.session;
 }
 
 if (message instanceof Bytecode) {
-  if (this._options.session && this._options.processor === 'session') {
-return this._connection.submit('session', 'bytecode', args, 
requestIdOverride);
+  if (this._options.session && this._options.processor === "session") {
+return this._connection.stream(
+  "session",
+  "bytecode",
+  args,
+  requestIdOverride
+);
   } else {
-return this._connection.submit('traversal', 'bytecode', args, 
requestIdOverride);
+return this._connection.stream(
+  "traversal",
+  "bytecode",
+   

[GitHub] [tinkerpop] tkolanko commented on a change in pull request #1539: TINKERPOP-2679 update javascript driver to support stream processing

2022-02-22 Thread GitBox


tkolanko commented on a change in pull request #1539:
URL: https://github.com/apache/tinkerpop/pull/1539#discussion_r812367486



##
File path: docs/src/reference/gremlin-variants.asciidoc
##
@@ -1721,6 +1721,32 @@ IMPORTANT: The preferred method for setting a 
per-request timeout for scripts is
 with bytecode may try `g.with(EVALUATION_TIMEOUT, 500)` within a script. 
Scripts with multiple traversals and multiple
 timeouts will be interpreted as a sum of all timeouts identified in the script 
for that request.
 
+
+ Processing results as they are returned from the Gremlin server
+
+
+The Gremlin JavaScript driver maintains a WebSocket connection to the Gremlin 
server and receives messages according to the `batchSize` parameter on the per 
request settings or the `resultIterationBatchSize` value configured for the 
Gremlin server. When submitting scripts the default behavior is to wait for the 
entire result set to be returned from a query before allowing any processing on 
the result set. 
+
+The following examples assume that you have 100 vertices in your graph.
+
+[source,javascript]
+
+const result = await client.submit("g.V()");
+console.log(result.toArray()); // 100 - all the vertices in your graph
+
+
+When working with larger result sets it may be beneficial for memory 
management to process each chunk of data as it is returned from the gremlin 
server. The Gremlin JavaScript driver can accept an optional callback to run on 
each chunk of data returned.
+
+[source,javascript]
+
+
+await client.submit("g.V()", {}, { batchSize: 25 }, (data) => {

Review comment:
   Some of the existing tests don't like the changes I made to reduce 
duplicate code the original submit and the new stream method. I'll back out of 
those and leave the duplicated code for now; it's not that much. If we want to 
have this fixed up for 3.5.3 I can take a crack at it this weekend.




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: commits-unsubscr...@tinkerpop.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org




[GitHub] [tinkerpop] tkolanko commented on a change in pull request #1539: TINKERPOP-2679 update javascript driver to support stream processing

2022-02-22 Thread GitBox


tkolanko commented on a change in pull request #1539:
URL: https://github.com/apache/tinkerpop/pull/1539#discussion_r812367486



##
File path: docs/src/reference/gremlin-variants.asciidoc
##
@@ -1721,6 +1721,32 @@ IMPORTANT: The preferred method for setting a 
per-request timeout for scripts is
 with bytecode may try `g.with(EVALUATION_TIMEOUT, 500)` within a script. 
Scripts with multiple traversals and multiple
 timeouts will be interpreted as a sum of all timeouts identified in the script 
for that request.
 
+
+ Processing results as they are returned from the Gremlin server
+
+
+The Gremlin JavaScript driver maintains a WebSocket connection to the Gremlin 
server and receives messages according to the `batchSize` parameter on the per 
request settings or the `resultIterationBatchSize` value configured for the 
Gremlin server. When submitting scripts the default behavior is to wait for the 
entire result set to be returned from a query before allowing any processing on 
the result set. 
+
+The following examples assume that you have 100 vertices in your graph.
+
+[source,javascript]
+
+const result = await client.submit("g.V()");
+console.log(result.toArray()); // 100 - all the vertices in your graph
+
+
+When working with larger result sets it may be beneficial for memory 
management to process each chunk of data as it is returned from the gremlin 
server. The Gremlin JavaScript driver can accept an optional callback to run on 
each chunk of data returned.
+
+[source,javascript]
+
+
+await client.submit("g.V()", {}, { batchSize: 25 }, (data) => {

Review comment:
   Some of the existing tests don't like the changes I made to reduce 
duplicate code from the original submit and the new stream method. I'll back 
out of those and leave the duplicated code for now; it's not that much. If we 
want to have this fixed up for 3.5.3 I can take a crack at it this weekend.




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: commits-unsubscr...@tinkerpop.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org




[GitHub] [tinkerpop] tkolanko commented on a change in pull request #1539: TINKERPOP-2679 update javascript driver to support stream processing

2022-01-24 Thread GitBox


tkolanko commented on a change in pull request #1539:
URL: https://github.com/apache/tinkerpop/pull/1539#discussion_r790838160



##
File path: docs/src/reference/gremlin-variants.asciidoc
##
@@ -1721,6 +1721,32 @@ IMPORTANT: The preferred method for setting a 
per-request timeout for scripts is
 with bytecode may try `g.with(EVALUATION_TIMEOUT, 500)` within a script. 
Scripts with multiple traversals and multiple
 timeouts will be interpreted as a sum of all timeouts identified in the script 
for that request.
 
+
+ Processing results as they are returned from the Gremlin server
+
+
+The Gremlin JavaScript driver maintains a WebSocket connection to the Gremlin 
server and receives messages according to the `batchSize` parameter on the per 
request settings or the `resultIterationBatchSize` value configured for the 
Gremlin server. When submitting scripts the default behavior is to wait for the 
entire result set to be returned from a query before allowing any processing on 
the result set. 
+
+The following examples assume that you have 100 vertices in your graph.
+
+[source,javascript]
+
+const result = await client.submit("g.V()");
+console.log(result.toArray()); // 100 - all the vertices in your graph
+
+
+When working with larger result sets it may be beneficial for memory 
management to process each chunk of data as it is returned from the gremlin 
server. The Gremlin JavaScript driver can accept an optional callback to run on 
each chunk of data returned.
+
+[source,javascript]
+
+
+await client.submit("g.V()", {}, { batchSize: 25 }, (data) => {

Review comment:
   @jorgebay I implemented a new `client.stream` which returns a readable 
stream. Can you take a look and let me know what you think.




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: commits-unsubscr...@tinkerpop.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org




[GitHub] [tinkerpop] tkolanko commented on a change in pull request #1539: TINKERPOP-2679 update javascript driver to support stream processing

2022-01-19 Thread GitBox


tkolanko commented on a change in pull request #1539:
URL: https://github.com/apache/tinkerpop/pull/1539#discussion_r787801589



##
File path: docs/src/reference/gremlin-variants.asciidoc
##
@@ -1721,6 +1721,32 @@ IMPORTANT: The preferred method for setting a 
per-request timeout for scripts is
 with bytecode may try `g.with(EVALUATION_TIMEOUT, 500)` within a script. 
Scripts with multiple traversals and multiple
 timeouts will be interpreted as a sum of all timeouts identified in the script 
for that request.
 
+
+ Processing results as they are returned from the Gremlin server
+
+
+The Gremlin JavaScript driver maintains a WebSocket connection to the Gremlin 
server and receives messages according to the `batchSize` parameter on the per 
request settings or the `resultIterationBatchSize` value configured for the 
Gremlin server. When submitting scripts the default behavior is to wait for the 
entire result set to be returned from a query before allowing any processing on 
the result set. 
+
+The following examples assume that you have 100 vertices in your graph.
+
+[source,javascript]
+
+const result = await client.submit("g.V()");
+console.log(result.toArray()); // 100 - all the vertices in your graph
+
+
+When working with larger result sets it may be beneficial for memory 
management to process each chunk of data as it is returned from the gremlin 
server. The Gremlin JavaScript driver can accept an optional callback to run on 
each chunk of data returned.
+
+[source,javascript]
+
+
+await client.submit("g.V()", {}, { batchSize: 25 }, (data) => {

Review comment:
   ah ok, let me take a look at how complex it would be to add an async 
iterator to websocket messages. Through a quick google search there are 
[some](https://github.com/alanshaw/it-ws) 
[libraries](https://github.com/alanshaw/it-ws) but they don't seem to be 
maintained




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: commits-unsubscr...@tinkerpop.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org




[GitHub] [tinkerpop] tkolanko commented on a change in pull request #1539: TINKERPOP-2679 update javascript driver to support stream processing

2022-01-19 Thread GitBox


tkolanko commented on a change in pull request #1539:
URL: https://github.com/apache/tinkerpop/pull/1539#discussion_r78779



##
File path: docs/src/reference/gremlin-variants.asciidoc
##
@@ -1721,6 +1721,32 @@ IMPORTANT: The preferred method for setting a 
per-request timeout for scripts is
 with bytecode may try `g.with(EVALUATION_TIMEOUT, 500)` within a script. 
Scripts with multiple traversals and multiple
 timeouts will be interpreted as a sum of all timeouts identified in the script 
for that request.
 
+
+ Processing results as they are returned from the Gremlin server
+
+
+The Gremlin JavaScript driver maintains a WebSocket connection to the Gremlin 
server and receives messages according to the `batchSize` parameter on the per 
request settings or the `resultIterationBatchSize` value configured for the 
Gremlin server. When submitting scripts the default behavior is to wait for the 
entire result set to be returned from a query before allowing any processing on 
the result set. 
+
+The following examples assume that you have 100 vertices in your graph.
+
+[source,javascript]
+
+const result = await client.submit("g.V()");
+console.log(result.toArray()); // 100 - all the vertices in your graph
+
+
+When working with larger result sets it may be beneficial for memory 
management to process each chunk of data as it is returned from the gremlin 
server. The Gremlin JavaScript driver can accept an optional callback to run on 
each chunk of data returned.
+
+[source,javascript]
+
+
+await client.submit("g.V()", {}, { batchSize: 25 }, (data) => {

Review comment:
   Our use case might be different, we don't operate on traverals but 
through submitting scripts. We have a front end user interface where users can 
enter queries with auto complete, intellisense etc etc along with some options. 
The frontend POSTS the query and options to our backend which submits the 
script, parses the result set and returns a result to the frontend.
   
   
   I went with this approach so we could so something like
   ```js
   import {parseResultSet} from './parser';
   
   ...code to handle parsing the POST request and extracting what we need...
   const output = {...} // object that holds our parsed data
   try {
 await client.submit(query, bindings, options, (data) => 
parseResultSet(data, output))
   } catch(e) {
 ...additional error handling/clean up...
throw new Error(e);
   }
   return res.json(output))
   ```
   
   This seemed like an easier opt in path than trying to handle async iteration 
on websocket messages
   




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: commits-unsubscr...@tinkerpop.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org




[GitHub] [tinkerpop] tkolanko commented on a change in pull request #1539: TINKERPOP-2679 update javascript driver to support stream processing

2022-01-19 Thread GitBox


tkolanko commented on a change in pull request #1539:
URL: https://github.com/apache/tinkerpop/pull/1539#discussion_r787791909



##
File path: 
gremlin-javascript/src/main/javascript/gremlin-javascript/lib/driver/connection.js
##
@@ -290,13 +296,31 @@ class Connection extends EventEmitter {
 }
 switch (response.status.code) {
   case responseStatusCode.noContent:
+if (this._onDataMessageHandlers[response.requestId]) {
+  this._onDataMessageHandlers[response.requestId](
+new ResultSet(utils.emptyArray, response.status.attributes)
+  );
+}
 this._clearHandler(response.requestId);
 return handler.callback(null, new ResultSet(utils.emptyArray, 
response.status.attributes));
   case responseStatusCode.partialContent:
-handler.result = handler.result || [];
-handler.result.push.apply(handler.result, response.result.data);
+if (this._onDataMessageHandlers[response.requestId]) {
+  this._onDataMessageHandlers[response.requestId](
+new ResultSet(response.result.data, response.status.attributes)

Review comment:
   That would mean different parsing would have to take place between 
partial content and final parsing/if the total results were less than the batch 
size.




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: commits-unsubscr...@tinkerpop.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org